yuv_downscaler.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2010 Mehdi Juhoor
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. /*
  18. * This uses a dummy algorithm to downscale a 1920x1080 yuv film.
  19. * Each frame is split in horizontal stripes which are processed in parallel.
  20. */
  21. #include <starpu.h>
  22. #include <sys/types.h>
  23. #include <sys/stat.h>
  24. #include <unistd.h>
  25. #include <assert.h>
  26. #include <stdio.h>
  27. #include "yuv_downscaler.h"
  28. static double start;
  29. static double end;
  30. static const char *filename_in_default = "hugefile.2s.yuv";
  31. static const char *filename_out_default = "hugefile.2s.out.yuv";
  32. static char filename_in[1024];
  33. static char filename_out[1024];
  34. void parse_args(int argc, char **argv)
  35. {
  36. if (argc == 3)
  37. {
  38. strncpy(filename_in, argv[1], 1023);
  39. strncpy(filename_out, argv[2], 1023);
  40. }
  41. else
  42. {
  43. strncpy(filename_in, filename_in_default, 1023);
  44. strncpy(filename_out, filename_out_default, 1023);
  45. }
  46. }
  47. #define FRAMESIZE sizeof(struct yuv_frame)
  48. #define NEW_FRAMESIZE sizeof(struct yuv_new_frame)
  49. void ds_kernel_cpu(void *descr[], void *arg)
  50. {
  51. (void)arg;
  52. uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]);
  53. const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]);
  54. uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]);
  55. const unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]);
  56. const unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]);
  57. const unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]);
  58. unsigned line, col;
  59. for (line = 0; line < nlines; line+=FACTOR)
  60. for (col = 0; col < ncols; col+=FACTOR)
  61. {
  62. unsigned sum = 0;
  63. unsigned lline, lcol;
  64. for (lline = 0; lline < FACTOR; lline++)
  65. for (lcol = 0; lcol < FACTOR; lcol++)
  66. {
  67. unsigned in_index = (lcol + col) + (lline + line)*input_ld;
  68. sum += input[in_index];
  69. }
  70. unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld;
  71. output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR));
  72. }
  73. }
  74. static struct starpu_codelet ds_codelet =
  75. {
  76. .cpu_funcs = {ds_kernel_cpu},
  77. .cpu_funcs_name = {"ds_kernel_cpu"},
  78. .nbuffers = 2, /* input -> output */
  79. .modes = {STARPU_R, STARPU_W},
  80. .model = NULL
  81. };
  82. /* each block contains BLOCK_HEIGHT consecutive lines */
  83. static struct starpu_data_filter filter_y =
  84. {
  85. .filter_func = starpu_matrix_filter_block,
  86. .nchildren= HEIGHT/BLOCK_HEIGHT
  87. };
  88. static struct starpu_data_filter filter_uv =
  89. {
  90. .filter_func = starpu_matrix_filter_block,
  91. .nchildren = (HEIGHT/2)/BLOCK_HEIGHT
  92. };
  93. int main(int argc, char **argv)
  94. {
  95. int ret;
  96. size_t sret;
  97. assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
  98. assert(HEIGHT % FACTOR == 0);
  99. parse_args(argc, argv);
  100. /* fprintf(stderr, "Reading input file ...\n"); */
  101. /* how many frames ? */
  102. struct stat stbuf;
  103. ret = stat(filename_in, &stbuf);
  104. assert(ret);
  105. size_t filesize = stbuf.st_size;
  106. unsigned nframes = filesize/FRAMESIZE;
  107. /* fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */
  108. assert((filesize % sizeof(struct yuv_frame)) == 0);
  109. struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE);
  110. assert(yuv_in_buffer);
  111. /* fprintf(stderr, "Alloc output file ...\n"); */
  112. struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE);
  113. assert(yuv_out_buffer);
  114. /* fetch input data */
  115. FILE *f_in = fopen(filename_in, "r");
  116. if (!f_in)
  117. {
  118. fprintf(stderr, "couldn't open input file %s\n", filename_in);
  119. exit(EXIT_FAILURE);
  120. }
  121. /* allocate room for an output buffer */
  122. FILE *f_out = fopen(filename_out, "w+");
  123. if (!f_out)
  124. {
  125. fprintf(stderr, "couldn't open output file %s\n", filename_out);
  126. exit(EXIT_FAILURE);
  127. }
  128. sret = fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);
  129. assert(sret == nframes);
  130. starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t));
  131. starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t));
  132. starpu_data_handle_t *frame_v_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t));
  133. starpu_data_handle_t *new_frame_y_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t));
  134. starpu_data_handle_t *new_frame_u_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t));
  135. starpu_data_handle_t *new_frame_v_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t));
  136. ret = starpu_init(NULL);
  137. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  138. /* register and partition all layers */
  139. unsigned frame;
  140. for (frame = 0; frame < nframes; frame++)
  141. {
  142. /* register Y layer */
  143. starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM,
  144. (uintptr_t)&yuv_in_buffer[frame].y,
  145. WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
  146. starpu_data_partition(frame_y_handle[frame], &filter_y);
  147. starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM,
  148. (uintptr_t)&yuv_out_buffer[frame].y,
  149. NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
  150. starpu_data_partition(new_frame_y_handle[frame], &filter_y);
  151. /* register U layer */
  152. starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM,
  153. (uintptr_t)&yuv_in_buffer[frame].u,
  154. WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
  155. starpu_data_partition(frame_u_handle[frame], &filter_uv);
  156. starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM,
  157. (uintptr_t)&yuv_out_buffer[frame].u,
  158. NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
  159. starpu_data_partition(new_frame_u_handle[frame], &filter_uv);
  160. /* register V layer */
  161. starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM,
  162. (uintptr_t)&yuv_in_buffer[frame].v,
  163. WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
  164. starpu_data_partition(frame_v_handle[frame], &filter_uv);
  165. starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM,
  166. (uintptr_t)&yuv_out_buffer[frame].v,
  167. NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
  168. starpu_data_partition(new_frame_v_handle[frame], &filter_uv);
  169. }
  170. /* how many tasks are there ? */
  171. unsigned nblocks_y = filter_y.nchildren;
  172. unsigned nblocks_uv = filter_uv.nchildren;
  173. unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes;
  174. fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes);
  175. start = starpu_timing_now();
  176. /* do the computation */
  177. for (frame = 0; frame < nframes; frame++)
  178. {
  179. starpu_iteration_push(frame);
  180. unsigned blocky;
  181. for (blocky = 0; blocky < nblocks_y; blocky++)
  182. {
  183. struct starpu_task *task = starpu_task_create();
  184. task->cl = &ds_codelet;
  185. /* input */
  186. task->handles[0] = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);
  187. /* output */
  188. task->handles[1] = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);
  189. ret = starpu_task_submit(task);
  190. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  191. }
  192. unsigned blocku;
  193. for (blocku = 0; blocku < nblocks_uv; blocku++)
  194. {
  195. struct starpu_task *task = starpu_task_create();
  196. task->cl = &ds_codelet;
  197. /* input */
  198. task->handles[0] = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);
  199. /* output */
  200. task->handles[1] = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);
  201. ret = starpu_task_submit(task);
  202. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  203. }
  204. unsigned blockv;
  205. for (blockv = 0; blockv < nblocks_uv; blockv++)
  206. {
  207. struct starpu_task *task = starpu_task_create();
  208. task->cl = &ds_codelet;
  209. /* input */
  210. task->handles[0] = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);
  211. /* output */
  212. task->handles[1] = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);
  213. ret = starpu_task_submit(task);
  214. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  215. }
  216. starpu_iteration_pop();
  217. }
  218. /* make sure all output buffers are sync'ed */
  219. for (frame = 0; frame < nframes; frame++)
  220. {
  221. starpu_data_unregister(frame_y_handle[frame]);
  222. starpu_data_unregister(frame_u_handle[frame]);
  223. starpu_data_unregister(frame_v_handle[frame]);
  224. starpu_data_unregister(new_frame_y_handle[frame]);
  225. starpu_data_unregister(new_frame_u_handle[frame]);
  226. starpu_data_unregister(new_frame_v_handle[frame]);
  227. }
  228. free(frame_y_handle);
  229. free(frame_u_handle);
  230. free(frame_v_handle);
  231. free(new_frame_y_handle);
  232. free(new_frame_u_handle);
  233. free(new_frame_v_handle);
  234. /* There is an implicit barrier: the unregister methods will block
  235. * until the computation is done and that the result was put back into
  236. * memory. */
  237. end = starpu_timing_now();
  238. double timing = end - start;
  239. printf("# s\tFPS\n");
  240. printf("%f\t%f\n", timing/1000000, (1000000*nframes)/timing);
  241. fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out);
  242. /* partition the layers into smaller parts */
  243. starpu_shutdown();
  244. if (fclose(f_in) != 0)
  245. fprintf(stderr, "Could not close %s properly\n", filename_in);
  246. if (fclose(f_out) != 0)
  247. fprintf(stderr, "Could not close %s properly\n", filename_out);
  248. return 0;
  249. }