mandelbrot.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011, 2014-2015 Université de Bordeaux
  4. * Copyright (C) 2010, 2011, 2012, 2013, 2016 CNRS
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. /*
  18. * This computes the Mandelbrot set: the output image is split in horizontal
  19. * stripes, which are computed in parallel. We also make the same computation
  20. * several times, so that OpenGL interaction allows to browse through the set.
  21. */
  22. #include <starpu.h>
  23. #include <math.h>
  24. #include <limits.h>
  25. #ifdef STARPU_HAVE_X11
  26. #include <X11/Xlib.h>
  27. #include <X11/Xutil.h>
  28. int use_x11 = 1;
  29. #endif
  30. #ifdef STARPU_HAVE_HELGRIND_H
  31. #include <valgrind/helgrind.h>
  32. #endif
  33. #ifndef ANNOTATE_HAPPENS_BEFORE
  34. #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0)
  35. #endif
  36. #ifndef ANNOTATE_HAPPENS_AFTER
  37. #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0)
  38. #endif
  39. int demo = 0;
  40. static double demozoom = 0.05;
  41. /* NB: The X11 code is inspired from the http://locklessinc.com/articles/mandelbrot/ article */
  42. static int nblocks = 20;
  43. static int height = 400;
  44. static int width = 640;
  45. static int maxIt = 20000; /* max number of iteration in the Mandelbrot function */
  46. static int niter = -1; /* number of loops in case we don't use X11, -1 means infinite */
  47. static int use_spmd = 0;
  48. static double leftX = -0.745;
  49. static double rightX = -0.74375;
  50. static double topY = .15;
  51. static double bottomY = .14875;
  52. /*
  53. * X11 window management
  54. */
  55. #ifdef STARPU_HAVE_X11
  56. /* X11 data */
  57. static Display *dpy;
  58. static Window win;
  59. static XImage *bitmap;
  60. static GC gc;
  61. static KeySym Left=-1, Right, Down, Up, Alt ;
  62. static void exit_x11(void)
  63. {
  64. XDestroyImage(bitmap);
  65. XDestroyWindow(dpy, win);
  66. XCloseDisplay(dpy);
  67. }
  68. static void init_x11(int width, int height, unsigned *buffer)
  69. {
  70. /* Attempt to open the display */
  71. dpy = XOpenDisplay(NULL);
  72. /* Failure */
  73. if (!dpy)
  74. exit(0);
  75. unsigned long white = WhitePixel(dpy,DefaultScreen(dpy));
  76. unsigned long black = BlackPixel(dpy,DefaultScreen(dpy));
  77. win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0,
  78. width, height, 0, black, white);
  79. /* We want to be notified when the window appears */
  80. XSelectInput(dpy, win, StructureNotifyMask);
  81. /* Make it appear */
  82. XMapWindow(dpy, win);
  83. XTextProperty tp;
  84. char name[128] = "Mandelbrot - StarPU";
  85. char *n = name;
  86. Status st = XStringListToTextProperty(&n, 1, &tp);
  87. if (st)
  88. XSetWMName(dpy, win, &tp);
  89. /* Wait for the MapNotify event */
  90. XFlush(dpy);
  91. int depth = DefaultDepth(dpy, DefaultScreen(dpy));
  92. Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy));
  93. /* Make bitmap */
  94. bitmap = XCreateImage(dpy, visual, depth,
  95. ZPixmap, 0, (char *)buffer,
  96. width, height, 32, 0);
  97. /* Init GC */
  98. gc = XCreateGC(dpy, win, 0, NULL);
  99. XSetForeground(dpy, gc, black);
  100. XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask);
  101. Atom wmDeleteMessage;
  102. wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False);
  103. XSetWMProtocols(dpy, win, &wmDeleteMessage, 1);
  104. Left = XStringToKeysym ("Left");
  105. Right = XStringToKeysym ("Right");
  106. Up = XStringToKeysym ("Up");
  107. Down = XStringToKeysym ("Down");
  108. Alt = XStringToKeysym ("Alt");
  109. }
  110. static int handle_events(void)
  111. {
  112. XEvent event;
  113. XNextEvent(dpy, &event);
  114. if (event.type == KeyPress)
  115. {
  116. KeySym key;
  117. char text[255];
  118. XLookupString(&event.xkey,text,255,&key,0);
  119. if (key == Left)
  120. {
  121. double widthX = rightX - leftX;
  122. leftX -= 0.25*widthX;
  123. rightX -= 0.25*widthX;
  124. }
  125. else if (key == Right)
  126. {
  127. double widthX = rightX - leftX;
  128. leftX += 0.25*widthX;
  129. rightX += 0.25*widthX;
  130. }
  131. else if (key == Up)
  132. {
  133. double heightY = topY - bottomY;
  134. topY += 0.25*heightY;
  135. bottomY += 0.25*heightY;
  136. }
  137. else if (key == Down)
  138. {
  139. double heightY = topY - bottomY;
  140. topY -= 0.25*heightY;
  141. bottomY -= 0.25*heightY;
  142. }
  143. else
  144. {
  145. double widthX = rightX - leftX;
  146. double heightY = topY - bottomY;
  147. if (text[0] == '-')
  148. {
  149. /* Zoom out */
  150. leftX -= 0.125*widthX;
  151. rightX += 0.125*widthX;
  152. topY += 0.125*heightY;
  153. bottomY -= 0.125*heightY;
  154. }
  155. else if (text[0] == '+')
  156. {
  157. /* Zoom in */
  158. leftX += 0.125*widthX;
  159. rightX -= 0.125*widthX;
  160. topY -= 0.125*heightY;
  161. bottomY += 0.125*heightY;
  162. }
  163. }
  164. if (text[0]=='q')
  165. {
  166. return -1;
  167. }
  168. }
  169. if (event.type==ButtonPress)
  170. {
  171. /* tell where the mouse Button was Pressed */
  172. printf("You pressed a button at (%i,%i)\n",
  173. event.xbutton.x,event.xbutton.y);
  174. }
  175. return 0;
  176. }
  177. #endif
  178. /*
  179. * OpenCL kernel
  180. */
  181. #ifdef STARPU_USE_OPENCL
  182. char *mandelbrot_opencl_src = "\
  183. #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\
  184. #define MIN(a,b) (((a)<(b))? (a) : (b)) \n\
  185. __kernel void mandelbrot_kernel(__global unsigned* a, \n\
  186. double leftX, double topY, \n\
  187. double stepX, double stepY, \n\
  188. int maxIt, int iby, int block_size, int width) \n\
  189. { \n\
  190. size_t id_x = get_global_id(0); \n\
  191. size_t id_y = get_global_id(1); \n\
  192. if ((id_x < width) && (id_y < block_size)) \n\
  193. { \n\
  194. double xc = leftX + id_x * stepX; \n\
  195. double yc = topY - (id_y + iby*block_size) * stepY; \n\
  196. int it; \n\
  197. double x,y; \n\
  198. x = y = (double)0.0; \n\
  199. for (it=0;it<maxIt;it++) \n\
  200. { \n\
  201. double x2 = x*x; \n\
  202. double y2 = y*y; \n\
  203. if (x2+y2 > 4.0) break; \n\
  204. double twoxy = (double)2.0*x*y; \n\
  205. x = x2 - y2 + xc; \n\
  206. y = twoxy + yc; \n\
  207. } \n\
  208. unsigned int v = MIN((1024*((float)(it)/(2000))), 256); \n\
  209. a[id_x + width * id_y] = (v<<16|(255-v)<<8); \n\
  210. } \n\
  211. }";
  212. static struct starpu_opencl_program opencl_programs;
  213. static void compute_block_opencl(void *descr[], void *cl_arg)
  214. {
  215. int iby, block_size;
  216. double stepX, stepY;
  217. int *pcnt; /* unused for CUDA tasks */
  218. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  219. cl_mem data = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]);
  220. cl_kernel kernel;
  221. cl_command_queue queue;
  222. cl_int err;
  223. int id = starpu_worker_get_id_check();
  224. int devid = starpu_worker_get_devid(id);
  225. err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_programs, "mandelbrot_kernel", devid);
  226. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  227. clSetKernelArg(kernel, 0, sizeof(data), &data);
  228. clSetKernelArg(kernel, 1, sizeof(leftX), &leftX);
  229. clSetKernelArg(kernel, 2, sizeof(topY), &topY);
  230. clSetKernelArg(kernel, 3, sizeof(stepX), &stepX);
  231. clSetKernelArg(kernel, 4, sizeof(stepY), &stepY);
  232. clSetKernelArg(kernel, 5, sizeof(maxIt), &maxIt);
  233. clSetKernelArg(kernel, 6, sizeof(iby), &iby);
  234. clSetKernelArg(kernel, 7, sizeof(block_size), &block_size);
  235. clSetKernelArg(kernel, 8, sizeof(width), &width);
  236. unsigned dim = 16;
  237. size_t local[2] = {dim, 1};
  238. size_t global[2] = {width, block_size};
  239. err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
  240. if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  241. starpu_opencl_release_kernel(kernel);
  242. }
  243. #endif
  244. /*
  245. * CPU kernel
  246. */
  247. static void compute_block(void *descr[], void *cl_arg)
  248. {
  249. int iby, block_size;
  250. double stepX, stepY;
  251. int *pcnt; /* unused for sequential tasks */
  252. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  253. unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
  254. int local_iy;
  255. for (local_iy = 0; local_iy < block_size; local_iy++)
  256. {
  257. int ix, iy;
  258. iy = iby*block_size + local_iy;
  259. for (ix = 0; ix < width; ix++)
  260. {
  261. double cx = leftX + ix * stepX;
  262. double cy = topY - iy * stepY;
  263. /* Z = X+I*Y */
  264. double x = 0;
  265. double y = 0;
  266. int it;
  267. for (it = 0; it < maxIt; it++)
  268. {
  269. double x2 = x*x;
  270. double y2 = y*y;
  271. /* Stop iterations when |Z| > 2 */
  272. if (x2 + y2 > 4.0)
  273. break;
  274. double twoxy = 2.0*x*y;
  275. /* Z = Z^2 + C */
  276. x = x2 - y2 + cx;
  277. y = twoxy + cy;
  278. }
  279. unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);
  280. data[ix + local_iy*width] = (v<<16|(255-v)<<8);
  281. }
  282. }
  283. }
  284. static void compute_block_spmd(void *descr[], void *cl_arg)
  285. {
  286. int iby, block_size;
  287. double stepX, stepY;
  288. int *pcnt;
  289. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  290. unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
  291. while (1)
  292. {
  293. int ix, iy; /* global coordinates */
  294. int local_iy; /* current line */
  295. local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1;
  296. ANNOTATE_HAPPENS_BEFORE(pcnt);
  297. if (local_iy >= block_size)
  298. {
  299. ANNOTATE_HAPPENS_AFTER(pcnt);
  300. break;
  301. }
  302. iy = iby*block_size + local_iy;
  303. for (ix = 0; ix < width; ix++)
  304. {
  305. double cx = leftX + ix * stepX;
  306. double cy = topY - iy * stepY;
  307. /* Z = X+I*Y */
  308. double x = 0;
  309. double y = 0;
  310. int it;
  311. for (it = 0; it < maxIt; it++)
  312. {
  313. double x2 = x*x;
  314. double y2 = y*y;
  315. /* Stop iterations when |Z| > 2 */
  316. if (x2 + y2 > 4.0)
  317. break;
  318. double twoxy = 2.0*x*y;
  319. /* Z = Z^2 + C */
  320. x = x2 - y2 + cx;
  321. y = twoxy + cy;
  322. }
  323. unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);
  324. data[ix + local_iy*width] = (v<<16|(255-v)<<8);
  325. }
  326. }
  327. }
  328. static struct starpu_codelet spmd_mandelbrot_cl =
  329. {
  330. .type = STARPU_SPMD,
  331. .max_parallelism = INT_MAX,
  332. .cpu_funcs = {compute_block_spmd},
  333. #ifdef STARPU_USE_OPENCL
  334. .opencl_funcs = {compute_block_opencl},
  335. .opencl_flags = {STARPU_OPENCL_ASYNC},
  336. #endif
  337. .nbuffers = 1
  338. };
  339. static struct starpu_codelet mandelbrot_cl =
  340. {
  341. .type = STARPU_SEQ,
  342. .cpu_funcs = {compute_block},
  343. #ifdef STARPU_USE_OPENCL
  344. .opencl_funcs = {compute_block_opencl},
  345. .opencl_flags = {STARPU_OPENCL_ASYNC},
  346. #endif
  347. .nbuffers = 1
  348. };
  349. static void parse_args(int argc, char **argv)
  350. {
  351. int i;
  352. for (i = 1; i < argc; i++)
  353. {
  354. if (strcmp(argv[i], "-h") == 0)
  355. {
  356. fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
  357. exit(-1);
  358. }
  359. if (strcmp(argv[i], "-width") == 0)
  360. {
  361. char *argptr;
  362. width = strtol(argv[++i], &argptr, 10);
  363. }
  364. if (strcmp(argv[i], "-height") == 0)
  365. {
  366. char *argptr;
  367. height = strtol(argv[++i], &argptr, 10);
  368. }
  369. if (strcmp(argv[i], "-nblocks") == 0)
  370. {
  371. char *argptr;
  372. nblocks = strtol(argv[++i], &argptr, 10);
  373. }
  374. if (strcmp(argv[i], "-niter") == 0)
  375. {
  376. char *argptr;
  377. niter = strtol(argv[++i], &argptr, 10);
  378. }
  379. if (strcmp(argv[i], "-pos") == 0)
  380. {
  381. int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
  382. assert(ret == 4);
  383. }
  384. if (strcmp(argv[i], "-demo") == 0)
  385. {
  386. demo = 1;
  387. leftX = -50.22749575062760;
  388. rightX = 48.73874621262927;
  389. topY = -49.35016705749115;
  390. bottomY = 49.64891691946615;
  391. }
  392. if (strcmp(argv[i], "-demozoom") == 0)
  393. {
  394. char *argptr;
  395. demozoom = strtof(argv[++i], &argptr);
  396. }
  397. if (strcmp(argv[i], "-no-x11") == 0)
  398. {
  399. #ifdef STARPU_HAVE_X11
  400. use_x11 = 0;
  401. #endif
  402. }
  403. if (strcmp(argv[i], "-spmd") == 0)
  404. {
  405. use_spmd = 1;
  406. }
  407. }
  408. }
  409. int main(int argc, char **argv)
  410. {
  411. int ret;
  412. parse_args(argc, argv);
  413. /* We don't use CUDA in that example */
  414. struct starpu_conf conf;
  415. starpu_conf_init(&conf);
  416. conf.ncuda = 0;
  417. if (use_spmd)
  418. conf.sched_policy_name = "peager";
  419. ret = starpu_init(&conf);
  420. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  421. unsigned *buffer;
  422. starpu_malloc((void **)&buffer, height*width*sizeof(unsigned));
  423. #ifdef STARPU_HAVE_X11
  424. if (use_x11)
  425. init_x11(width, height, buffer);
  426. #endif
  427. int block_size = height/nblocks;
  428. STARPU_ASSERT((height % nblocks) == 0);
  429. #ifdef STARPU_USE_OPENCL
  430. starpu_opencl_load_opencl_from_string(mandelbrot_opencl_src, &opencl_programs, NULL);
  431. #endif
  432. starpu_data_handle_t block_handles[nblocks];
  433. int iby;
  434. for (iby = 0; iby < nblocks; iby++)
  435. {
  436. unsigned *data = &buffer[iby*block_size*width];
  437. starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM,
  438. (uintptr_t)data, block_size*width, sizeof(unsigned));
  439. }
  440. unsigned iter = 0;
  441. double start, end;
  442. start = starpu_timing_now();
  443. while (niter-- != 0)
  444. {
  445. double stepX = (rightX - leftX)/width;
  446. double stepY = (topY - bottomY)/height;
  447. /* In case we have a SPMD task, each worker will grab tasks in
  448. * a greedy and select which piece of image to compute by
  449. * incrementing a counter shared by all the workers within the
  450. * parallel task. */
  451. int per_block_cnt[nblocks];
  452. for (iby = 0; iby < nblocks; iby++)
  453. {
  454. per_block_cnt[iby] = 0;
  455. int *pcnt = &per_block_cnt[iby];
  456. ret = starpu_task_insert(use_spmd?&spmd_mandelbrot_cl:&mandelbrot_cl,
  457. STARPU_VALUE, &iby, sizeof(iby),
  458. STARPU_VALUE, &block_size, sizeof(block_size),
  459. STARPU_VALUE, &stepX, sizeof(stepX),
  460. STARPU_VALUE, &stepY, sizeof(stepY),
  461. STARPU_W, block_handles[iby],
  462. STARPU_VALUE, &pcnt, sizeof(int *),
  463. STARPU_TAG_ONLY, ((starpu_tag_t)niter)*nblocks + iby,
  464. 0);
  465. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
  466. }
  467. for (iby = 0; iby < nblocks; iby++)
  468. {
  469. #ifdef STARPU_HAVE_X11
  470. if (use_x11)
  471. {
  472. starpu_data_acquire(block_handles[iby], STARPU_R);
  473. XPutImage(dpy, win, gc, bitmap,
  474. 0, iby*block_size,
  475. 0, iby*block_size,
  476. width, block_size);
  477. starpu_data_release(block_handles[iby]);
  478. }
  479. #endif
  480. }
  481. if (demo)
  482. {
  483. /* Zoom in */
  484. double zoom_factor = demozoom;
  485. double widthX = rightX - leftX;
  486. double heightY = topY - bottomY;
  487. iter++;
  488. /* If the window is too small, we reset the demo and display some statistics */
  489. if ((fabs(widthX) < 1e-12) || (fabs(heightY) < 1e-12))
  490. {
  491. leftX = -50.22749575062760;
  492. rightX = 48.73874621262927;
  493. topY = -49.35016705749115;
  494. bottomY = 49.64891691946615;
  495. end = starpu_timing_now();
  496. double timing = end - start;
  497. fprintf(stderr, "Time to generate %u frames : %f s\n", iter, timing/1000000.0);
  498. fprintf(stderr, "Average FPS: %f\n", ((double)iter*1e+6)/timing);
  499. /* Reset counters */
  500. iter = 0;
  501. start = starpu_timing_now();
  502. }
  503. else
  504. {
  505. leftX += (zoom_factor/2)*widthX;
  506. rightX -= (zoom_factor/2)*widthX;
  507. topY -= (zoom_factor/2)*heightY;
  508. bottomY += (zoom_factor/2)*heightY;
  509. }
  510. }
  511. #ifdef STARPU_HAVE_X11
  512. else if (use_x11 && handle_events())
  513. break;
  514. #endif
  515. }
  516. #ifdef STARPU_HAVE_X11
  517. if (use_x11)
  518. exit_x11();
  519. #endif
  520. for (iby = 0; iby < nblocks; iby++)
  521. starpu_data_unregister(block_handles[iby]);
  522. /* starpu_data_free_pinned_if_possible(buffer); */
  523. starpu_shutdown();
  524. return 0;
  525. }