yuv_downscaler.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <sys/types.h>
  18. #include <sys/stat.h>
  19. #include <sys/time.h>
  20. #include <unistd.h>
  21. #include <assert.h>
  22. #include <stdio.h>
  23. #include <pthread.h>
  24. #include "yuv_downscaler.h"
  25. struct timeval start;
  26. struct timeval end;
  27. const char *filename_in_default = "hugefile.2s.yuv";
  28. const char *filename_out_default = "hugefile.2s.out.yuv";
  29. char filename_in[1024];
  30. char filename_out[1024];
  31. void parse_args(int argc, char **argv)
  32. {
  33. if (argc == 3) {
  34. strcpy(filename_in, argv[1]);
  35. strcpy(filename_out, argv[2]);
  36. }
  37. else {
  38. sprintf(filename_in, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_in_default);
  39. sprintf(filename_out, "%s/examples/ppm_downscaler/%s", STARPU_BUILD_DIR, filename_out_default);
  40. }
  41. }
  42. #define FRAMESIZE sizeof(struct yuv_frame)
  43. #define NEW_FRAMESIZE sizeof(struct yuv_new_frame)
  44. static pthread_cond_t ds_callback_cond = PTHREAD_COND_INITIALIZER;
  45. static pthread_mutex_t ds_callback_mutex = PTHREAD_MUTEX_INITIALIZER;
  46. static unsigned ds_callback_terminated = 0;
  47. static unsigned ds_callback_cnt = 0;
  48. static void ds_callback(void *arg)
  49. {
  50. unsigned val = STARPU_ATOMIC_ADD(&ds_callback_cnt, -1);
  51. if (val == 0)
  52. {
  53. fprintf(stderr, "Downscaling terminated...\n");
  54. pthread_mutex_lock(&ds_callback_mutex);
  55. ds_callback_terminated = 1;
  56. pthread_cond_signal(&ds_callback_cond);
  57. pthread_mutex_unlock(&ds_callback_mutex);
  58. }
  59. }
  60. static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
  61. {
  62. uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]);
  63. unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]);
  64. uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]);
  65. unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]);
  66. unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]);
  67. unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]);
  68. unsigned line, col;
  69. for (line = 0; line < nlines; line+=FACTOR)
  70. for (col = 0; col < ncols; col+=FACTOR)
  71. {
  72. unsigned sum = 0;
  73. unsigned lline, lcol;
  74. for (lline = 0; lline < FACTOR; lline++)
  75. for (lcol = 0; lcol < FACTOR; lcol++)
  76. {
  77. unsigned in_index = (lcol + col) + (lline + line)*input_ld;
  78. sum += input[in_index];
  79. }
  80. unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld;
  81. output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR));
  82. }
  83. }
  84. static struct starpu_codelet_t ds_codelet = {
  85. .where = STARPU_CPU,
  86. .cpu_func = ds_kernel_cpu,
  87. .nbuffers = 2, /* input -> output */
  88. .model = NULL
  89. };
  90. /* each block contains BLOCK_HEIGHT consecutive lines */
  91. static struct starpu_data_filter filter_y = {
  92. .filter_func = starpu_block_filter_func,
  93. .nchildren= HEIGHT/BLOCK_HEIGHT,
  94. .get_nchildren = NULL,
  95. .get_child_ops = NULL
  96. };
  97. static struct starpu_data_filter filter_u = {
  98. .filter_func = starpu_block_filter_func,
  99. .nchildren = (HEIGHT/2)/BLOCK_HEIGHT,
  100. .get_nchildren = NULL,
  101. .get_child_ops = NULL
  102. };
  103. static struct starpu_data_filter filter_v = {
  104. .filter_func = starpu_block_filter_func,
  105. .nchildren = (HEIGHT/2)/BLOCK_HEIGHT,
  106. .get_nchildren = NULL,
  107. .get_child_ops = NULL
  108. };
  109. int main(int argc, char **argv)
  110. {
  111. assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
  112. assert(HEIGHT % FACTOR == 0);
  113. parse_args(argc, argv);
  114. // fprintf(stderr, "Reading input file ...\n");
  115. /* how many frames ? */
  116. struct stat stbuf;
  117. stat(filename_in, &stbuf);
  118. size_t filesize = stbuf.st_size;
  119. unsigned nframes = filesize/FRAMESIZE;
  120. // fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes);
  121. assert((filesize % sizeof(struct yuv_frame)) == 0);
  122. /* fetch input data */
  123. FILE *f_in = fopen(filename_in, "r");
  124. assert(f_in);
  125. struct yuv_frame *yuv_in_buffer = malloc(nframes*FRAMESIZE);
  126. fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);
  127. /* allocate room for an output buffer */
  128. FILE *f_out = fopen(filename_out, "w+");
  129. assert(f_out);
  130. // fprintf(stderr, "Alloc output file ...\n");
  131. struct yuv_new_frame *yuv_out_buffer = calloc(nframes, NEW_FRAMESIZE);
  132. assert(yuv_out_buffer);
  133. starpu_data_handle *frame_y_handle = calloc(nframes, sizeof(starpu_data_handle));
  134. starpu_data_handle *frame_u_handle = calloc(nframes, sizeof(starpu_data_handle));
  135. starpu_data_handle *frame_v_handle = calloc(nframes, sizeof(starpu_data_handle));
  136. starpu_data_handle *new_frame_y_handle = calloc(nframes, sizeof(starpu_data_handle));
  137. starpu_data_handle *new_frame_u_handle = calloc(nframes, sizeof(starpu_data_handle));
  138. starpu_data_handle *new_frame_v_handle = calloc(nframes, sizeof(starpu_data_handle));
  139. starpu_init(NULL);
  140. /* register and partition all layers */
  141. unsigned frame;
  142. for (frame = 0; frame < nframes; frame++)
  143. {
  144. /* register Y layer */
  145. starpu_matrix_data_register(&frame_y_handle[frame], 0,
  146. (uintptr_t)&yuv_in_buffer[frame].y,
  147. WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
  148. starpu_data_partition(frame_y_handle[frame], &filter_y);
  149. starpu_matrix_data_register(&new_frame_y_handle[frame], 0,
  150. (uintptr_t)&yuv_out_buffer[frame].y,
  151. NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
  152. starpu_data_partition(new_frame_y_handle[frame], &filter_y);
  153. /* register U layer */
  154. starpu_matrix_data_register(&frame_u_handle[frame], 0,
  155. (uintptr_t)&yuv_in_buffer[frame].u,
  156. WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
  157. starpu_data_partition(frame_u_handle[frame], &filter_u);
  158. starpu_matrix_data_register(&new_frame_u_handle[frame], 0,
  159. (uintptr_t)&yuv_out_buffer[frame].u,
  160. NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
  161. starpu_data_partition(new_frame_u_handle[frame], &filter_u);
  162. /* register V layer */
  163. starpu_matrix_data_register(&frame_v_handle[frame], 0,
  164. (uintptr_t)&yuv_in_buffer[frame].v,
  165. WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
  166. starpu_data_partition(frame_v_handle[frame], &filter_v);
  167. starpu_matrix_data_register(&new_frame_v_handle[frame], 0,
  168. (uintptr_t)&yuv_out_buffer[frame].v,
  169. NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
  170. starpu_data_partition(new_frame_v_handle[frame], &filter_v);
  171. }
  172. /* how many tasks are there ? */
  173. unsigned nblocks_y = filter_y.filter_arg;
  174. unsigned nblocks_uv = filter_u.filter_arg;
  175. ds_callback_cnt = (nblocks_y + 2*nblocks_uv)*nframes;
  176. fprintf(stderr, "Start computation: there will be %d tasks for %d frames\n", ds_callback_cnt, nframes);
  177. gettimeofday(&start, NULL);
  178. /* do the computation */
  179. for (frame = 0; frame < nframes; frame++)
  180. {
  181. unsigned blocky;
  182. for (blocky = 0; blocky < nblocks_y; blocky++)
  183. {
  184. struct starpu_task *task = starpu_task_create();
  185. task->cl = &ds_codelet;
  186. task->callback_func = ds_callback;
  187. /* input */
  188. task->buffers[0].handle = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky);
  189. task->buffers[0].mode = STARPU_R;
  190. /* output */
  191. task->buffers[1].handle = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky);
  192. task->buffers[1].mode = STARPU_W;
  193. starpu_task_submit(task);
  194. }
  195. unsigned blocku;
  196. for (blocku = 0; blocku < nblocks_uv; blocku++)
  197. {
  198. struct starpu_task *task = starpu_task_create();
  199. task->cl = &ds_codelet;
  200. task->callback_func = ds_callback;
  201. /* input */
  202. task->buffers[0].handle = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku);
  203. task->buffers[0].mode = STARPU_R;
  204. /* output */
  205. task->buffers[1].handle = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku);
  206. task->buffers[1].mode = STARPU_W;
  207. starpu_task_submit(task);
  208. }
  209. unsigned blockv;
  210. for (blockv = 0; blockv < nblocks_uv; blockv++)
  211. {
  212. struct starpu_task *task = starpu_task_create();
  213. task->cl = &ds_codelet;
  214. task->callback_func = ds_callback;
  215. /* input */
  216. task->buffers[0].handle = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv);
  217. task->buffers[0].mode = STARPU_R;
  218. /* output */
  219. task->buffers[1].handle = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv);
  220. task->buffers[1].mode = STARPU_W;
  221. starpu_task_submit(task);
  222. }
  223. }
  224. pthread_mutex_lock(&ds_callback_mutex);
  225. if (!ds_callback_terminated)
  226. pthread_cond_wait(&ds_callback_cond, &ds_callback_mutex);
  227. pthread_mutex_unlock(&ds_callback_mutex);
  228. gettimeofday(&end, NULL);
  229. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  230. fprintf(stderr, "Computation took %f seconds\n", timing/1000000);
  231. fprintf(stderr, "FPS %f\n", (1000000*nframes)/timing);
  232. /* make sure all output buffers are sync'ed */
  233. for (frame = 0; frame < nframes; frame++)
  234. {
  235. starpu_data_acquire(new_frame_y_handle[frame], STARPU_R);
  236. starpu_data_acquire(new_frame_u_handle[frame], STARPU_R);
  237. starpu_data_acquire(new_frame_v_handle[frame], STARPU_R);
  238. }
  239. /* partition the layers into smaller parts */
  240. starpu_shutdown();
  241. fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out);
  242. return 0;
  243. }