mandelbrot.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. /*
  17. * This computes the Mandelbrot set: the output image is split in horizontal
  18. * stripes, which are computed in parallel. We also make the same computation
  19. * several times, so that OpenGL interaction allows to browse through the set.
  20. */
  21. #include <starpu.h>
  22. #include <math.h>
  23. #include <limits.h>
  24. #ifdef STARPU_HAVE_X11
  25. #include <X11/Xlib.h>
  26. #include <X11/Xutil.h>
  27. int use_x11_p = 1;
  28. #endif
  29. #ifdef STARPU_HAVE_HELGRIND_H
  30. #include <valgrind/helgrind.h>
  31. #endif
  32. #ifndef ANNOTATE_HAPPENS_BEFORE
  33. #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0)
  34. #endif
  35. #ifndef ANNOTATE_HAPPENS_AFTER
  36. #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0)
  37. #endif
  38. int demo_p = 0;
  39. static double demozoom_p = 0.05;
  40. /* NB: The X11 code is inspired from the http://locklessinc.com/articles/mandelbrot/ article */
  41. static int nblocks_p = 20;
  42. static int height_p = 400;
  43. static int width_p = 640;
  44. static int maxIt_p = 20000; /* max number of iteration in the Mandelbrot function */
  45. static int niter_p = -1; /* number of loops in case we don't use X11, -1 means infinite */
  46. static int use_spmd_p = 0;
  47. static double leftX_p = -0.745;
  48. static double rightX_p = -0.74375;
  49. static double topY_p = .15;
  50. static double bottomY_p = .14875;
  51. /*
  52. * X11 window management
  53. */
  54. #ifdef STARPU_HAVE_X11
  55. /* X11 data */
  56. static Display *dpy_p;
  57. static Window win_p;
  58. static XImage *bitmap_p;
  59. static GC gc_p;
  60. static KeySym Left_p=-1, Right_p, Down_p, Up_p, Alt_p;
  61. static void exit_x11(void)
  62. {
  63. XDestroyImage(bitmap_p);
  64. XDestroyWindow(dpy_p, win_p);
  65. XCloseDisplay(dpy_p);
  66. }
  67. static void init_x11(int width, int height, unsigned *buffer)
  68. {
  69. /* Attempt to open the display */
  70. dpy_p = XOpenDisplay(NULL);
  71. /* Failure */
  72. if (!dpy_p)
  73. exit(0);
  74. unsigned long white = WhitePixel(dpy_p, DefaultScreen(dpy_p));
  75. unsigned long black = BlackPixel(dpy_p, DefaultScreen(dpy_p));
  76. win_p = XCreateSimpleWindow(dpy_p, DefaultRootWindow(dpy_p), 0, 0,
  77. width, height, 0, black, white);
  78. /* We want to be notified when the window appears */
  79. XSelectInput(dpy_p, win_p, StructureNotifyMask);
  80. /* Make it appear */
  81. XMapWindow(dpy_p, win_p);
  82. XTextProperty tp;
  83. char name[128] = "Mandelbrot - StarPU";
  84. char *n = name;
  85. Status st = XStringListToTextProperty(&n, 1, &tp);
  86. if (st)
  87. XSetWMName(dpy_p, win_p, &tp);
  88. /* Wait for the MapNotify event */
  89. XFlush(dpy_p);
  90. int depth = DefaultDepth(dpy_p, DefaultScreen(dpy_p));
  91. Visual *visual = DefaultVisual(dpy_p, DefaultScreen(dpy_p));
  92. /* Make bitmap */
  93. bitmap_p = XCreateImage(dpy_p, visual, depth,
  94. ZPixmap, 0, (char *)buffer,
  95. width, height, 32, 0);
  96. /* Init GC */
  97. gc_p = XCreateGC(dpy_p, win_p, 0, NULL);
  98. XSetForeground(dpy_p, gc_p, black);
  99. XSelectInput(dpy_p, win_p, ExposureMask | KeyPressMask | StructureNotifyMask);
  100. Atom wmDeleteMessage;
  101. wmDeleteMessage = XInternAtom(dpy_p, "WM_DELETE_WINDOW", False);
  102. XSetWMProtocols(dpy_p, win_p, &wmDeleteMessage, 1);
  103. Left_p = XStringToKeysym ("Left");
  104. Right_p = XStringToKeysym ("Right");
  105. Up_p = XStringToKeysym ("Up");
  106. Down_p = XStringToKeysym ("Down");
  107. Alt_p = XStringToKeysym ("Alt");
  108. }
  109. static int handle_events(void)
  110. {
  111. XEvent event;
  112. XNextEvent(dpy_p, &event);
  113. if (event.type == KeyPress)
  114. {
  115. KeySym key;
  116. char text[255];
  117. XLookupString(&event.xkey,text,255,&key,0);
  118. if (key == Left_p)
  119. {
  120. double widthX = rightX_p - leftX_p;
  121. leftX_p -= 0.25*widthX;
  122. rightX_p -= 0.25*widthX;
  123. }
  124. else if (key == Right_p)
  125. {
  126. double widthX = rightX_p - leftX_p;
  127. leftX_p += 0.25*widthX;
  128. rightX_p += 0.25*widthX;
  129. }
  130. else if (key == Up_p)
  131. {
  132. double heightY = topY_p - bottomY_p;
  133. topY_p += 0.25*heightY;
  134. bottomY_p += 0.25*heightY;
  135. }
  136. else if (key == Down_p)
  137. {
  138. double heightY = topY_p - bottomY_p;
  139. topY_p -= 0.25*heightY;
  140. bottomY_p -= 0.25*heightY;
  141. }
  142. else
  143. {
  144. double widthX = rightX_p - leftX_p;
  145. double heightY = topY_p - bottomY_p;
  146. if (text[0] == '-')
  147. {
  148. /* Zoom out */
  149. leftX_p -= 0.125*widthX;
  150. rightX_p += 0.125*widthX;
  151. topY_p += 0.125*heightY;
  152. bottomY_p -= 0.125*heightY;
  153. }
  154. else if (text[0] == '+')
  155. {
  156. /* Zoom in */
  157. leftX_p += 0.125*widthX;
  158. rightX_p -= 0.125*widthX;
  159. topY_p -= 0.125*heightY;
  160. bottomY_p += 0.125*heightY;
  161. }
  162. }
  163. if (text[0]=='q')
  164. {
  165. return -1;
  166. }
  167. }
  168. if (event.type==ButtonPress)
  169. {
  170. /* tell where the mouse Button was Pressed */
  171. printf("You pressed a button at (%i,%i)\n",
  172. event.xbutton.x,event.xbutton.y);
  173. }
  174. return 0;
  175. }
  176. #endif
  177. /*
  178. * OpenCL kernel
  179. */
  180. #ifdef STARPU_USE_OPENCL
  181. char *mandelbrot_opencl_src = "\
  182. #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\
  183. #define MIN(a,b) (((a)<(b))? (a) : (b)) \n\
  184. __kernel void mandelbrot_kernel(__global unsigned* a, \n\
  185. double leftX, double topY, \n\
  186. double stepX, double stepY, \n\
  187. int maxIt, int iby, int block_size, int width) \n\
  188. { \n\
  189. size_t id_x = get_global_id(0); \n\
  190. size_t id_y = get_global_id(1); \n\
  191. if ((id_x < width) && (id_y < block_size)) \n\
  192. { \n\
  193. double xc = leftX + id_x * stepX; \n\
  194. double yc = topY - (id_y + iby*block_size) * stepY; \n\
  195. int it; \n\
  196. double x,y; \n\
  197. x = y = (double)0.0; \n\
  198. for (it=0;it<maxIt;it++) \n\
  199. { \n\
  200. double x2 = x*x; \n\
  201. double y2 = y*y; \n\
  202. if (x2+y2 > 4.0) break; \n\
  203. double twoxy = (double)2.0*x*y; \n\
  204. x = x2 - y2 + xc; \n\
  205. y = twoxy + yc; \n\
  206. } \n\
  207. unsigned int v = MIN((1024*((float)(it)/(2000))), 256); \n\
  208. a[id_x + width * id_y] = (v<<16|(255-v)<<8); \n\
  209. } \n\
  210. }";
  211. static struct starpu_opencl_program opencl_programs;
  212. static void compute_block_opencl(void *descr[], void *cl_arg)
  213. {
  214. int iby, block_size;
  215. double stepX, stepY;
  216. int *pcnt; /* unused for CUDA tasks */
  217. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  218. cl_mem data = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]);
  219. cl_kernel kernel;
  220. cl_command_queue queue;
  221. cl_int err;
  222. int id = starpu_worker_get_id_check();
  223. int devid = starpu_worker_get_devid(id);
  224. err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_programs, "mandelbrot_kernel", devid);
  225. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  226. clSetKernelArg(kernel, 0, sizeof(data), &data);
  227. clSetKernelArg(kernel, 1, sizeof(leftX_p), &leftX_p);
  228. clSetKernelArg(kernel, 2, sizeof(topY_p), &topY_p);
  229. clSetKernelArg(kernel, 3, sizeof(stepX), &stepX);
  230. clSetKernelArg(kernel, 4, sizeof(stepY), &stepY);
  231. clSetKernelArg(kernel, 5, sizeof(maxIt_p), &maxIt_p);
  232. clSetKernelArg(kernel, 6, sizeof(iby), &iby);
  233. clSetKernelArg(kernel, 7, sizeof(block_size), &block_size);
  234. clSetKernelArg(kernel, 8, sizeof(width_p), &width_p);
  235. unsigned dim = 16;
  236. size_t local[2] = {dim, 1};
  237. size_t global[2] = {width_p, block_size};
  238. err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
  239. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  240. starpu_opencl_release_kernel(kernel);
  241. }
  242. #endif
  243. /*
  244. * CPU kernel
  245. */
  246. static void compute_block(void *descr[], void *cl_arg)
  247. {
  248. int iby, block_size;
  249. double stepX, stepY;
  250. int *pcnt; /* unused for sequential tasks */
  251. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  252. unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
  253. int local_iy;
  254. for (local_iy = 0; local_iy < block_size; local_iy++)
  255. {
  256. int ix, iy;
  257. iy = iby*block_size + local_iy;
  258. for (ix = 0; ix < width_p; ix++)
  259. {
  260. double cx = leftX_p + ix * stepX;
  261. double cy = topY_p - iy * stepY;
  262. /* Z = X+I*Y */
  263. double x = 0;
  264. double y = 0;
  265. int it;
  266. for (it = 0; it < maxIt_p; it++)
  267. {
  268. double x2 = x*x;
  269. double y2 = y*y;
  270. /* Stop iterations when |Z| > 2 */
  271. if (x2 + y2 > 4.0)
  272. break;
  273. double twoxy = 2.0*x*y;
  274. /* Z = Z^2 + C */
  275. x = x2 - y2 + cx;
  276. y = twoxy + cy;
  277. }
  278. unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);
  279. data[ix + local_iy*width_p] = (v<<16|(255-v)<<8);
  280. }
  281. }
  282. }
  283. static void compute_block_spmd(void *descr[], void *cl_arg)
  284. {
  285. int iby, block_size;
  286. double stepX, stepY;
  287. int *pcnt;
  288. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  289. unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
  290. while (1)
  291. {
  292. int ix, iy; /* global coordinates */
  293. int local_iy; /* current line */
  294. local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1;
  295. ANNOTATE_HAPPENS_BEFORE(pcnt);
  296. if (local_iy >= block_size)
  297. {
  298. ANNOTATE_HAPPENS_AFTER(pcnt);
  299. break;
  300. }
  301. iy = iby*block_size + local_iy;
  302. for (ix = 0; ix < width_p; ix++)
  303. {
  304. double cx = leftX_p + ix * stepX;
  305. double cy = topY_p - iy * stepY;
  306. /* Z = X+I*Y */
  307. double x = 0;
  308. double y = 0;
  309. int it;
  310. for (it = 0; it < maxIt_p; it++)
  311. {
  312. double x2 = x*x;
  313. double y2 = y*y;
  314. /* Stop iterations when |Z| > 2 */
  315. if (x2 + y2 > 4.0)
  316. break;
  317. double twoxy = 2.0*x*y;
  318. /* Z = Z^2 + C */
  319. x = x2 - y2 + cx;
  320. y = twoxy + cy;
  321. }
  322. unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);
  323. data[ix + local_iy*width_p] = (v<<16|(255-v)<<8);
  324. }
  325. }
  326. }
  327. static struct starpu_codelet spmd_mandelbrot_cl =
  328. {
  329. .type = STARPU_SPMD,
  330. .max_parallelism = INT_MAX,
  331. .cpu_funcs = {compute_block_spmd},
  332. #ifdef STARPU_USE_OPENCL
  333. .opencl_funcs = {compute_block_opencl},
  334. .opencl_flags = {STARPU_OPENCL_ASYNC},
  335. #endif
  336. .nbuffers = 1
  337. };
  338. static struct starpu_codelet mandelbrot_cl =
  339. {
  340. .type = STARPU_SEQ,
  341. .cpu_funcs = {compute_block},
  342. #ifdef STARPU_USE_OPENCL
  343. .opencl_funcs = {compute_block_opencl},
  344. .opencl_flags = {STARPU_OPENCL_ASYNC},
  345. #endif
  346. .nbuffers = 1
  347. };
  348. static void parse_args(int argc, char **argv)
  349. {
  350. int i;
  351. for (i = 1; i < argc; i++)
  352. {
  353. if (strcmp(argv[i], "-h") == 0)
  354. {
  355. fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
  356. exit(-1);
  357. }
  358. if (strcmp(argv[i], "-width") == 0)
  359. {
  360. char *argptr;
  361. width_p = strtol(argv[++i], &argptr, 10);
  362. }
  363. if (strcmp(argv[i], "-height") == 0)
  364. {
  365. char *argptr;
  366. height_p = strtol(argv[++i], &argptr, 10);
  367. }
  368. if (strcmp(argv[i], "-nblocks") == 0)
  369. {
  370. char *argptr;
  371. nblocks_p = strtol(argv[++i], &argptr, 10);
  372. }
  373. if (strcmp(argv[i], "-niter") == 0)
  374. {
  375. char *argptr;
  376. niter_p = strtol(argv[++i], &argptr, 10);
  377. }
  378. if (strcmp(argv[i], "-pos") == 0)
  379. {
  380. int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX_p, &rightX_p,
  381. &bottomY_p, &topY_p);
  382. assert(ret == 4);
  383. }
  384. if (strcmp(argv[i], "-demo") == 0)
  385. {
  386. demo_p = 1;
  387. leftX_p = -50.22749575062760;
  388. rightX_p = 48.73874621262927;
  389. topY_p = -49.35016705749115;
  390. bottomY_p = 49.64891691946615;
  391. }
  392. if (strcmp(argv[i], "-demozoom") == 0)
  393. {
  394. char *argptr;
  395. demozoom_p = strtof(argv[++i], &argptr);
  396. }
  397. if (strcmp(argv[i], "-no-x11") == 0)
  398. {
  399. #ifdef STARPU_HAVE_X11
  400. use_x11_p = 0;
  401. #endif
  402. }
  403. if (strcmp(argv[i], "-spmd") == 0)
  404. {
  405. use_spmd_p = 1;
  406. }
  407. }
  408. }
  409. int main(int argc, char **argv)
  410. {
  411. int ret;
  412. parse_args(argc, argv);
  413. /* We don't use CUDA in that example */
  414. struct starpu_conf conf;
  415. starpu_conf_init(&conf);
  416. conf.ncuda = 0;
  417. if (use_spmd_p)
  418. {
  419. conf.sched_policy_name = "peager";
  420. }
  421. ret = starpu_init(&conf);
  422. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  423. unsigned *buffer;
  424. starpu_malloc((void **)&buffer, height_p*width_p*sizeof(unsigned));
  425. #ifdef STARPU_HAVE_X11
  426. if (use_x11_p)
  427. init_x11(width_p, height_p, buffer);
  428. #endif
  429. int block_size = height_p/nblocks_p;
  430. STARPU_ASSERT((height_p % nblocks_p) == 0);
  431. #ifdef STARPU_USE_OPENCL
  432. starpu_opencl_load_opencl_from_string(mandelbrot_opencl_src, &opencl_programs, NULL);
  433. #endif
  434. starpu_data_handle_t block_handles[nblocks_p];
  435. int iby;
  436. for (iby = 0; iby < nblocks_p; iby++)
  437. {
  438. unsigned *data = &buffer[iby*block_size*width_p];
  439. starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM,
  440. (uintptr_t)data, block_size*width_p, sizeof(unsigned));
  441. }
  442. unsigned iter = 0;
  443. double start, end;
  444. start = starpu_timing_now();
  445. while (niter_p-- != 0)
  446. {
  447. double stepX = (rightX_p - leftX_p)/width_p;
  448. double stepY = (topY_p - bottomY_p)/height_p;
  449. /* In case we have a SPMD task, each worker will grab tasks in
  450. * a greedy and select which piece of image to compute by
  451. * incrementing a counter shared by all the workers within the
  452. * parallel task. */
  453. int per_block_cnt[nblocks_p];
  454. starpu_iteration_push(niter_p);
  455. for (iby = 0; iby < nblocks_p; iby++)
  456. {
  457. per_block_cnt[iby] = 0;
  458. int *pcnt = &per_block_cnt[iby];
  459. ret = starpu_task_insert(use_spmd_p?&spmd_mandelbrot_cl:&mandelbrot_cl,
  460. STARPU_VALUE, &iby, sizeof(iby),
  461. STARPU_VALUE, &block_size, sizeof(block_size),
  462. STARPU_VALUE, &stepX, sizeof(stepX),
  463. STARPU_VALUE, &stepY, sizeof(stepY),
  464. STARPU_W, block_handles[iby],
  465. STARPU_VALUE, &pcnt, sizeof(int *),
  466. STARPU_TAG_ONLY, ((starpu_tag_t)niter_p)*nblocks_p + iby,
  467. 0);
  468. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
  469. }
  470. for (iby = 0; iby < nblocks_p; iby++)
  471. {
  472. #ifdef STARPU_HAVE_X11
  473. if (use_x11_p)
  474. {
  475. starpu_data_acquire(block_handles[iby], STARPU_R);
  476. XPutImage(dpy_p, win_p, gc_p, bitmap_p,
  477. 0, iby*block_size,
  478. 0, iby*block_size,
  479. width_p, block_size);
  480. starpu_data_release(block_handles[iby]);
  481. }
  482. #endif
  483. }
  484. starpu_iteration_pop();
  485. if (demo_p)
  486. {
  487. /* Zoom in */
  488. double zoom_factor = demozoom_p;
  489. double widthX = rightX_p - leftX_p;
  490. double heightY = topY_p - bottomY_p;
  491. iter++;
  492. /* If the window is too small, we reset the demo and display some statistics */
  493. if ((fabs(widthX) < 1e-12) || (fabs(heightY) < 1e-12))
  494. {
  495. leftX_p = -50.22749575062760;
  496. rightX_p = 48.73874621262927;
  497. topY_p = -49.35016705749115;
  498. bottomY_p = 49.64891691946615;
  499. end = starpu_timing_now();
  500. double timing = end - start;
  501. fprintf(stderr, "Time to generate %u frames : %f s\n", iter, timing/1000000.0);
  502. fprintf(stderr, "Average FPS: %f\n", ((double)iter*1e+6)/timing);
  503. /* Reset counters */
  504. iter = 0;
  505. start = starpu_timing_now();
  506. }
  507. else
  508. {
  509. leftX_p += (zoom_factor/2)*widthX;
  510. rightX_p -= (zoom_factor/2)*widthX;
  511. topY_p -= (zoom_factor/2)*heightY;
  512. bottomY_p += (zoom_factor/2)*heightY;
  513. }
  514. }
  515. #ifdef STARPU_HAVE_X11
  516. else if (use_x11_p && handle_events())
  517. break;
  518. #endif
  519. }
  520. #ifdef STARPU_HAVE_X11
  521. if (use_x11_p)
  522. exit_x11();
  523. #endif
  524. for (iby = 0; iby < nblocks_p; iby++)
  525. starpu_data_unregister(block_handles[iby]);
  526. /* starpu_data_free_pinned_if_possible(buffer); */
  527. starpu_shutdown();
  528. return 0;
  529. }