pic_compress.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /*
  2. * Copyright (c) 2020 Actions Technology Co., Ltd
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. #include <errno.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <stdbool.h>
  10. #include <string.h>
  11. #include <os_common_api.h>
  12. #include <ui_region.h>
  13. #include <compress_api.h>
  14. #include "tile_cache.h"
  15. #include "lz4/lz4.h"
  16. #include "lz4/lz4hc.h"
  17. #include "rle/rle.h"
  18. #ifndef CONFIG_SIMULATOR
  19. #include "brom_interface.h"
  20. #endif
  21. #include <display/display_hal.h>
  22. #ifdef CONFIG_DMA2D_HAL
  23. #include <dma2d_hal.h>
  24. #endif
  25. #include <memory/mem_cache.h>
  26. #if defined(__GNUC__) && !defined(__clang__)
  27. # define LZ4_FORCE_O3 __attribute__((optimize("O3")))
  28. #else
  29. # define LZ4_FORCE_O3
  30. #endif
  31. #ifdef CONFIG_PIC_COMPRESS
  32. __aligned(4) __in_section_unique(decompress.bss.cache)
  33. static uint8_t tile_temp[TILE_MAX_H * TILE_MAX_W * PIC_BYTES_PER_PIXEL];
  34. #endif
  35. #ifdef CONFIG_DMA2D_HAL
  36. static bool dma2d_inited = false;
  37. static hal_dma2d_handle_t dma2d;
  38. #endif
  39. __ramfunc int hardware_copy(char *dest, int16_t d_stride, const char *src,
  40. int16_t s_width, int16_t s_height, int16_t s_stride, uint8_t bytes_per_pixel)
  41. {
  42. int copy_line_len = bytes_per_pixel * s_width;
  43. int ret = -1;
  44. #ifdef CONFIG_DMA2D_HAL
  45. static const uint32_t hal_formats[] = {
  46. 0, HAL_PIXEL_FORMAT_A8, HAL_PIXEL_FORMAT_RGB_565,
  47. HAL_PIXEL_FORMAT_BGR_888, HAL_PIXEL_FORMAT_ARGB_8888,
  48. };
  49. if (!dma2d_inited) {
  50. if (hal_dma2d_init(&dma2d, HAL_DMA2D_FULL_MODES)) {
  51. printk("hal_dma2d_init failed\n");
  52. goto out_sw_copy;
  53. }
  54. dma2d_inited = true;
  55. dma2d.output_cfg.mode = HAL_DMA2D_M2M;
  56. dma2d.layer_cfg[1].alpha_mode = HAL_DMA2D_NO_MODIF_ALPHA;
  57. }
  58. #ifndef CONFIG_NO_PSRAM
  59. mem_dcache_clean(src, s_stride * s_height);
  60. mem_dcache_sync();
  61. #endif
  62. dma2d.output_cfg.output_pitch = d_stride;
  63. dma2d.output_cfg.color_format = hal_formats[bytes_per_pixel];
  64. hal_dma2d_config_output(&dma2d);
  65. dma2d.layer_cfg[1].input_pitch = s_stride;
  66. dma2d.layer_cfg[1].input_width = s_width;
  67. dma2d.layer_cfg[1].input_height = s_height;
  68. dma2d.layer_cfg[1].color_format = hal_formats[bytes_per_pixel];
  69. hal_dma2d_config_layer(&dma2d, 1);
  70. ret = hal_dma2d_start(&dma2d, (uint32_t)src, (uint32_t)dest, s_width, s_height);
  71. out_sw_copy:
  72. #endif /* CONFIG_DMA2D_HAL */
  73. if (ret < 0) {
  74. if (copy_line_len == s_stride && s_stride == d_stride) {
  75. memcpy(dest, src, copy_line_len * s_height);
  76. } else {
  77. for (int j = s_height; j > 0; j--) {
  78. memcpy(dest, src, copy_line_len);
  79. dest += d_stride;
  80. src += s_stride;
  81. }
  82. }
  83. }
  84. return copy_line_len * s_height;
  85. }
  86. static inline void hardware_wait_finish(void)
  87. {
  88. #ifdef CONFIG_DMA2D_HAL
  89. if (dma2d_inited)
  90. hal_dma2d_poll_transfer(&dma2d, -1);
  91. #endif
  92. }
  93. #ifdef CONFIG_PIC_COMPRESS
  94. int pic_compress(const char* picSrc, char* picDst, int srcWidth, int srcHight,
  95. int tileWidth, int tileHight, int maxOutputSize, uint8_t format, uint8_t compress_format)
  96. {
  97. const char *tile_start;
  98. int tile_width = 0, tile_height = 0, tile_cnt = 0;
  99. int total_size = 0;
  100. int data_size = 0;
  101. compress_pic_head_t *pic_head = (compress_pic_head_t *)picDst;
  102. uint8_t bytes_per_pixel = 0;
  103. int tile_x_num = 0, tile_y_num = 0;
  104. tile_head_t *tile_head_info = NULL;
  105. char* base_picDst = picDst;
  106. int src_length = 0;
  107. switch (format) {
  108. case COMPRESSED_PIC_CF_RGB_565:
  109. case COMPRESSED_PIC_CF_ARGB_1555:
  110. bytes_per_pixel = 2;
  111. break;
  112. case COMPRESSED_PIC_CF_ARGB_8565:
  113. case COMPRESSED_PIC_CF_ARGB_6666:
  114. bytes_per_pixel = 3;
  115. break;
  116. case COMPRESSED_PIC_CF_ARGB_8888:
  117. bytes_per_pixel = 4;
  118. break;
  119. case COMPRESSED_PIC_CF_A8:
  120. bytes_per_pixel = 1;
  121. break;
  122. default:
  123. return -1;
  124. }
  125. src_length = srcWidth * srcHight * bytes_per_pixel;
  126. // pic_head
  127. if (compress_format == COMPRESSED_PIC_FORMAT_LZ4) {
  128. pic_head->magic = LZ4_PIC_MAGIC;
  129. } else if (compress_format == COMPRESSED_PIC_FORMAT_RLE) {
  130. pic_head->magic = RLE_PIC_MAGIC;
  131. } else if (compress_format == COMPRESSED_PIC_FORMAT_RAW) {
  132. pic_head->magic = RAW_PIC_MAGIC;
  133. }
  134. pic_head->width = srcWidth;
  135. pic_head->height = srcHight;
  136. pic_head->tile_width = tileWidth;
  137. pic_head->tile_height = tileHight;
  138. pic_head->format = format;
  139. pic_head->bytes_per_pixel = bytes_per_pixel;
  140. picDst += sizeof(compress_pic_head_t);
  141. total_size += sizeof(compress_pic_head_t);
  142. //tile_info
  143. tile_x_num = (srcWidth + tileWidth - 1) / tileWidth;
  144. tile_y_num = (srcHight + tileHight - 1) / tileHight;
  145. tile_head_info = (tile_head_t *)picDst;
  146. picDst += sizeof(tile_head_t) * tile_x_num * tile_y_num;
  147. total_size += sizeof(tile_head_t) * tile_x_num * tile_y_num;
  148. // comprassed_data
  149. for(uint16_t j = 0; j < tile_y_num;j ++) {
  150. for (uint16_t i = 0; i < tile_x_num; i++) {
  151. tile_head_t* new_tile = &tile_head_info[j * tile_x_num + i];
  152. if((i + 1) * tileWidth > srcWidth) {
  153. tile_width = srcWidth - i * tileWidth;
  154. } else {
  155. tile_width = tileWidth;
  156. }
  157. if((j + 1) * tileHight > srcHight) {
  158. tile_height = srcHight - j * tileHight;
  159. } else {
  160. tile_height = tileHight;
  161. }
  162. new_tile->tile_addr = picDst - base_picDst;
  163. tile_start = picSrc + i * tileWidth * bytes_per_pixel
  164. + j * tileHight * srcWidth * bytes_per_pixel;
  165. for(int k = 0; k < tile_height; k++) {
  166. memcpy(&tile_temp[k * bytes_per_pixel * tile_width],
  167. tile_start + k * bytes_per_pixel * srcWidth,
  168. bytes_per_pixel * tile_width);
  169. }
  170. if (compress_format == COMPRESSED_PIC_FORMAT_LZ4) {
  171. new_tile->tile_size = LZ4_compress_HC(tile_temp,
  172. new_tile->tile_addr + base_picDst,
  173. tile_height * tile_width * bytes_per_pixel,
  174. maxOutputSize, 12);
  175. } else if (compress_format == COMPRESSED_PIC_FORMAT_RLE) {
  176. new_tile->tile_size = rle_compress(tile_temp,
  177. new_tile->tile_addr + base_picDst,
  178. tile_height * tile_width,
  179. maxOutputSize, bytes_per_pixel);
  180. } else if (compress_format == COMPRESSED_PIC_FORMAT_RAW) {
  181. new_tile->tile_size = tile_height * tile_width * bytes_per_pixel;
  182. memcpy(new_tile->tile_addr + base_picDst,tile_temp, new_tile->tile_size);
  183. }
  184. if (new_tile->tile_size <= 0) {
  185. printf("Failed to compress the data\n");
  186. return -1;
  187. }
  188. tile_cnt++;
  189. maxOutputSize -= new_tile->tile_size;
  190. picDst += new_tile->tile_size;
  191. total_size += new_tile->tile_size;
  192. data_size += new_tile->tile_size;
  193. }
  194. }
  195. pic_head->tile_num = tile_cnt;
  196. // compressed size maxed source size
  197. if (data_size > src_length) {
  198. pic_head->tile_width = srcWidth;
  199. pic_head->tile_height = srcHight;
  200. pic_head->tile_num = 1;
  201. pic_head->magic = RAW_PIC_MAGIC;
  202. total_size = bytes_per_pixel * srcWidth * srcHight + sizeof(compress_pic_head_t);
  203. memcpy((void*)(base_picDst + sizeof(compress_pic_head_t)), (void *)picSrc, src_length);
  204. }
  205. return total_size;
  206. }
  207. int pic_compress_size(const char* picSource)
  208. {
  209. compress_pic_head_t* pic_head = (compress_pic_head_t*)picSource;
  210. return pic_head->width * pic_head->height * pic_head->bytes_per_pixel;
  211. }
  212. int pic_compress_format(const char* picSource)
  213. {
  214. compress_pic_head_t* pic_head = (compress_pic_head_t*)picSource;
  215. return pic_head->format;
  216. }
  217. #endif
  218. LZ4_FORCE_O3
  219. __ramfunc int pic_decompress(const char* picSource, char* picDst, int compressedSize,
  220. int maxDecompressedSize, int out_stride, int x, int y, int w, int h)
  221. {
  222. compress_pic_head_t* pic_head = (compress_pic_head_t*)picSource;
  223. os_strace_u32x6(SYS_TRACE_ID_PIC_DECOMPRESS, pic_head->magic, pic_head->format, x, y, w, h);
  224. tile_head_t* tile_head_info = (tile_head_t*)(picSource + sizeof(compress_pic_head_t));
  225. int tile_x_num = (pic_head->width + pic_head->tile_width - 1) / pic_head->tile_width;
  226. int x_start_tile = x / pic_head->tile_width;
  227. int x_end_tile = (x + w - 1) / pic_head->tile_width;
  228. int y_start_tile = y / pic_head->tile_height;
  229. int y_end_tile = (y + h - 1) / pic_head->tile_height;
  230. int out_size = 0;
  231. //bool dec = false;
  232. //int get_cache_time = 0;
  233. //int copy_time = 0;
  234. //int decompress_time = 0;
  235. if (!out_stride) {
  236. out_stride = w * pic_head->bytes_per_pixel;
  237. }
  238. ui_region_t copy_region;
  239. ui_region_t crop_region = {
  240. .x1 = x,
  241. .y1 = y,
  242. .x2 = x + w - 1,
  243. .y2 = y + h - 1,
  244. };
  245. if (pic_head->magic == RAW_PIC_MAGIC) {
  246. char* temp_picSource = (char *)picSource + sizeof(compress_pic_head_t)
  247. + y * pic_head->bytes_per_pixel * pic_head->width
  248. + x * pic_head->bytes_per_pixel;
  249. out_size = hardware_copy(picDst, out_stride, temp_picSource, w,
  250. h, pic_head->bytes_per_pixel * pic_head->width, pic_head->bytes_per_pixel);
  251. hardware_wait_finish();
  252. os_strace_end_call_u32(SYS_TRACE_ID_PIC_DECOMPRESS, 1);
  253. return out_size;
  254. }
  255. if (y_start_tile < 0)
  256. y_start_tile = 0;
  257. if (x_start_tile < 0)
  258. x_start_tile = 0;
  259. for (int j = y_start_tile; j <= y_end_tile; j++) {
  260. for (int i = x_start_tile; i <= x_end_tile; i++) {
  261. int tile_index = i + j * tile_x_num;
  262. //uint32_t get_cache_start = k_cycle_get_32();
  263. tile_cache_item_t * cache_item = tile_cache_get(picSource, tile_index);
  264. //get_cache_time += (k_cycle_get_32() - get_cache_start);
  265. //if (!tile_cache_is_valid(cache_item)) {
  266. //dec = true;
  267. if (pic_head->magic == LZ4_PIC_MAGIC) {
  268. #ifndef CONFIG_SIMULATOR
  269. p_brom_misc_api->p_decompress(picSource + tile_head_info[tile_index].tile_addr,
  270. cache_item->tile_data,
  271. tile_head_info[tile_index].tile_size,
  272. sizeof(cache_item->tile_data));
  273. #else
  274. LZ4_decompress_safe(picSource + tile_head_info[tile_index].tile_addr,
  275. cache_item->tile_data,
  276. tile_head_info[tile_index].tile_size,
  277. sizeof(cache_item->tile_data));
  278. #endif
  279. } else if (pic_head->magic == RLE_PIC_MAGIC) {
  280. rle_decompress(picSource + tile_head_info[tile_index].tile_addr,
  281. cache_item->tile_data,
  282. tile_head_info[tile_index].tile_size,
  283. sizeof(cache_item->tile_data), pic_head->bytes_per_pixel);
  284. } else {
  285. return -ENOEXEC;
  286. }
  287. //tile_cache_set_valid(cache_item, picSource, tile_index,
  288. //tile_head_info[tile_index].tile_size);
  289. //}
  290. ui_region_t tile_region = {
  291. .x1 = i * pic_head->tile_width,
  292. .y1 = j * pic_head->tile_height,
  293. .x2 = (i + 1) * pic_head->tile_width - 1,
  294. .y2 = (j + 1) * pic_head->tile_height - 1,
  295. };
  296. if (tile_region.x2 >= pic_head->width) {
  297. tile_region.x2 = pic_head->width - 1;
  298. }
  299. if (tile_region.y2 >= pic_head->height) {
  300. tile_region.y2 = pic_head->height - 1;
  301. }
  302. if (ui_region_intersect(&copy_region, &crop_region, &tile_region) == false) {
  303. continue;
  304. }
  305. int src_stride = pic_head->bytes_per_pixel * ui_region_get_width(&tile_region);
  306. char* tile_dest_addr = picDst + (copy_region.x1 - x) * pic_head->bytes_per_pixel
  307. + (copy_region.y1 - y) * out_stride;
  308. char* tile_src_addr = cache_item->tile_data + (copy_region.x1 - tile_region.x1) * pic_head->bytes_per_pixel
  309. + (copy_region.y1 - tile_region.y1) * src_stride;
  310. out_size += hardware_copy(tile_dest_addr, out_stride, tile_src_addr,
  311. ui_region_get_width(&copy_region), ui_region_get_height(&copy_region),
  312. src_stride, pic_head->bytes_per_pixel);
  313. #if CONFIG_TILE_CACHE_NUM == 1
  314. hardware_wait_finish();
  315. #endif
  316. //copy_time += (k_cycle_get_32() - copy_start);
  317. }
  318. }
  319. #if CONFIG_TILE_CACHE_NUM > 1
  320. hardware_wait_finish();
  321. #endif
  322. os_strace_end_call_u32(SYS_TRACE_ID_PIC_DECOMPRESS, (x_end_tile - x_start_tile + 1) * (y_end_tile - y_start_tile + 1));
  323. //printk("decompress:src %p (%d %d %d %d) dec %d cost (%d = %d + %d + %d)\n",picSource, x, y, w, h, dec, k_cyc_to_us_floor32(k_cycle_get_32() - timestamp),k_cyc_to_us_floor32(get_cache_time), k_cyc_to_us_floor32(decompress_time),k_cyc_to_us_floor32(copy_time));
  324. return out_size;
  325. }