yuv_downscaler.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2011 Université de Bordeaux 1
  4. * Copyright (C) 2010 Mehdi Juhoor <mjuhoor@gmail.com>
  5. * Copyright (C) 2010 Centre National de la Recherche Scientifique
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <starpu.h>
  19. #include <sys/types.h>
  20. #include <sys/stat.h>
  21. #include <sys/time.h>
  22. #include <unistd.h>
  23. #include <assert.h>
  24. #include <stdio.h>
  25. #include <pthread.h>
  26. #include "yuv_downscaler.h"
  27. struct timeval start;
  28. struct timeval end;
  29. const char *filename_in_default = "hugefile.2s.yuv";
  30. const char *filename_out_default = "hugefile.2s.out.yuv";
  31. char filename_in[1024];
  32. char filename_out[1024];
  33. void parse_args(int argc, char **argv)
  34. {
  35. if (argc == 3) {
  36. strcpy(filename_in, argv[1]);
  37. strcpy(filename_out, argv[2]);
  38. }
  39. else {
  40. sprintf(filename_in, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_in_default);
  41. sprintf(filename_out, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_out_default);
  42. }
  43. }
  44. #define FRAMESIZE sizeof(struct yuv_frame)
  45. #define NEW_FRAMESIZE sizeof(struct yuv_new_frame)
  46. static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
  47. {
  48. uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]);
  49. const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]);
  50. uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]);
  51. const unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]);
  52. const unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]);
  53. const unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]);
  54. unsigned line, col;
  55. for (line = 0; line < nlines; line+=FACTOR)
  56. for (col = 0; col < ncols; col+=FACTOR)
  57. {
  58. unsigned sum = 0;
  59. unsigned lline, lcol;
  60. for (lline = 0; lline < FACTOR; lline++)
  61. for (lcol = 0; lcol < FACTOR; lcol++)
  62. {
  63. unsigned in_index = (lcol + col) + (lline + line)*input_ld;
  64. sum += input[in_index];
  65. }
  66. unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld;
  67. output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR));
  68. }
  69. }
  70. static struct starpu_codelet_t ds_codelet = {
  71. .where = STARPU_CPU,
  72. .cpu_func = ds_kernel_cpu,
  73. .nbuffers = 2, /* input -> output */
  74. .model = NULL
  75. };
  76. /* each block contains BLOCK_HEIGHT consecutive lines */
  77. static struct starpu_data_filter filter_y = {
  78. .filter_func = starpu_block_filter_func,
  79. .nchildren= HEIGHT/BLOCK_HEIGHT
  80. };
  81. static struct starpu_data_filter filter_uv = {
  82. .filter_func = starpu_block_filter_func,
  83. .nchildren = (HEIGHT/2)/BLOCK_HEIGHT
  84. };
  85. int main(int argc, char **argv)
  86. {
  87. assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
  88. assert(HEIGHT % FACTOR == 0);
  89. parse_args(argc, argv);
  90. /* fprintf(stderr, "Reading input file ...\n"); */
  91. /* how many frames ? */
  92. struct stat stbuf;
  93. stat(filename_in, &stbuf);
  94. size_t filesize = stbuf.st_size;
  95. unsigned nframes = filesize/FRAMESIZE;
  96. /* fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */
  97. assert((filesize % sizeof(struct yuv_frame)) == 0);
  98. /* fetch input data */
  99. FILE *f_in = fopen(filename_in, "r");
  100. assert(f_in);
  101. struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE);
  102. fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);
  103. /* allocate room for an output buffer */
  104. FILE *f_out = fopen(filename_out, "w+");
  105. assert(f_out);
  106. /* fprintf(stderr, "Alloc output file ...\n"); */
  107. struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE);
  108. assert(yuv_out_buffer);
  109. starpu_data_handle *frame_y_handle = (starpu_data_handle *) calloc(nframes, sizeof(starpu_data_handle));
  110. starpu_data_handle *frame_u_handle = (starpu_data_handle *) calloc(nframes, sizeof(starpu_data_handle));
  111. starpu_data_handle *frame_v_handle = (starpu_data_handle *) calloc(nframes, sizeof(starpu_data_handle));
  112. starpu_data_handle *new_frame_y_handle = (starpu_data_handle *) calloc(nframes, sizeof(starpu_data_handle));
  113. starpu_data_handle *new_frame_u_handle = (starpu_data_handle *) calloc(nframes, sizeof(starpu_data_handle));
  114. starpu_data_handle *new_frame_v_handle = (starpu_data_handle *) calloc(nframes, sizeof(starpu_data_handle));
  115. starpu_init(NULL);
  116. /* register and partition all layers */
  117. unsigned frame;
  118. for (frame = 0; frame < nframes; frame++)
  119. {
  120. /* register Y layer */
  121. starpu_matrix_data_register(&frame_y_handle[frame], 0,
  122. (uintptr_t)&yuv_in_buffer[frame].y,
  123. WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
  124. starpu_data_partition(frame_y_handle[frame], &filter_y);
  125. starpu_matrix_data_register(&new_frame_y_handle[frame], 0,
  126. (uintptr_t)&yuv_out_buffer[frame].y,
  127. NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
  128. starpu_data_partition(new_frame_y_handle[frame], &filter_y);
  129. /* register U layer */
  130. starpu_matrix_data_register(&frame_u_handle[frame], 0,
  131. (uintptr_t)&yuv_in_buffer[frame].u,
  132. WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
  133. starpu_data_partition(frame_u_handle[frame], &filter_uv);
  134. starpu_matrix_data_register(&new_frame_u_handle[frame], 0,
  135. (uintptr_t)&yuv_out_buffer[frame].u,
  136. NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
  137. starpu_data_partition(new_frame_u_handle[frame], &filter_uv);
  138. /* register V layer */
  139. starpu_matrix_data_register(&frame_v_handle[frame], 0,
  140. (uintptr_t)&yuv_in_buffer[frame].v,
  141. WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
  142. starpu_data_partition(frame_v_handle[frame], &filter_uv);
  143. starpu_matrix_data_register(&new_frame_v_handle[frame], 0,
  144. (uintptr_t)&yuv_out_buffer[frame].v,
  145. NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
  146. starpu_data_partition(new_frame_v_handle[frame], &filter_uv);
  147. }
  148. /* how many tasks are there ? */
  149. unsigned nblocks_y = filter_y.nchildren;
  150. unsigned nblocks_uv = filter_uv.nchildren;
  151. unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes;
  152. fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes);
  153. gettimeofday(&start, NULL);
  154. /* do the computation */
  155. for (frame = 0; frame < nframes; frame++)
  156. {
  157. unsigned blocky;
  158. for (blocky = 0; blocky < nblocks_y; blocky++)
  159. {
  160. struct starpu_task *task = starpu_task_create();
  161. task->cl = &ds_codelet;
  162. /* input */
  163. task->buffers[0].handle = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);
  164. task->buffers[0].mode = STARPU_R;
  165. /* output */
  166. task->buffers[1].handle = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);
  167. task->buffers[1].mode = STARPU_W;
  168. starpu_task_submit(task);
  169. }
  170. unsigned blocku;
  171. for (blocku = 0; blocku < nblocks_uv; blocku++)
  172. {
  173. struct starpu_task *task = starpu_task_create();
  174. task->cl = &ds_codelet;
  175. /* input */
  176. task->buffers[0].handle = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);
  177. task->buffers[0].mode = STARPU_R;
  178. /* output */
  179. task->buffers[1].handle = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);
  180. task->buffers[1].mode = STARPU_W;
  181. starpu_task_submit(task);
  182. }
  183. unsigned blockv;
  184. for (blockv = 0; blockv < nblocks_uv; blockv++)
  185. {
  186. struct starpu_task *task = starpu_task_create();
  187. task->cl = &ds_codelet;
  188. /* input */
  189. task->buffers[0].handle = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);
  190. task->buffers[0].mode = STARPU_R;
  191. /* output */
  192. task->buffers[1].handle = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);
  193. task->buffers[1].mode = STARPU_W;
  194. starpu_task_submit(task);
  195. }
  196. }
  197. /* make sure all output buffers are sync'ed */
  198. for (frame = 0; frame < nframes; frame++)
  199. {
  200. starpu_data_unregister(frame_y_handle[frame]);
  201. starpu_data_unregister(frame_u_handle[frame]);
  202. starpu_data_unregister(frame_v_handle[frame]);
  203. starpu_data_unregister(new_frame_y_handle[frame]);
  204. starpu_data_unregister(new_frame_u_handle[frame]);
  205. starpu_data_unregister(new_frame_v_handle[frame]);
  206. }
  207. /* There is an implicit barrier: the unregister methods will block
  208. * until the computation is done and that the result was put back into
  209. * memory. */
  210. gettimeofday(&end, NULL);
  211. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  212. fprintf(stderr, "Computation took %f seconds\n", timing/1000000);
  213. fprintf(stderr, "FPS %f\n", (1000000*nframes)/timing);
  214. fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out);
  215. /* partition the layers into smaller parts */
  216. starpu_shutdown();
  217. return 0;
  218. }