mandelbrot.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010, 2011 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <starpu.h>
  18. #include <sys/time.h>
  19. #include <math.h>
  20. #include <limits.h>
  21. #ifdef STARPU_HAVE_X11
  22. #include <X11/Xlib.h>
  23. #include <X11/Xutil.h>
  24. int use_x11 = 1;
  25. #endif
  26. int demo = 0;
  27. static double demozoom = 0.05;
  28. /* NB: The X11 code is inspired from the http://locklessinc.com/articles/mandelbrot/ article */
  29. static int nblocks = 20;
  30. static int height = 400;
  31. static int width = 640;
  32. static int maxIt = 20000; /* max number of iteration in the Mandelbrot function */
  33. static int niter = -1; /* number of loops in case we don't use X11, -1 means infinite */
  34. static int use_spmd = 0;
  35. static double leftX = -0.745;
  36. static double rightX = -0.74375;
  37. static double topY = .15;
  38. static double bottomY = .14875;
  39. /*
  40. * X11 window management
  41. */
  42. #ifdef STARPU_HAVE_X11
  43. /* X11 data */
  44. static Display *dpy;
  45. static Window win;
  46. static XImage *bitmap;
  47. static GC gc;
  48. static KeySym Left=-1, Right, Down, Up, Alt ;
  49. static void exit_x11(void)
  50. {
  51. XDestroyImage(bitmap);
  52. XDestroyWindow(dpy, win);
  53. XCloseDisplay(dpy);
  54. }
  55. static void init_x11(int width, int height, unsigned *buffer)
  56. {
  57. /* Attempt to open the display */
  58. dpy = XOpenDisplay(NULL);
  59. /* Failure */
  60. if (!dpy)
  61. exit(0);
  62. unsigned long white = WhitePixel(dpy,DefaultScreen(dpy));
  63. unsigned long black = BlackPixel(dpy,DefaultScreen(dpy));
  64. win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0,
  65. width, height, 0, black, white);
  66. /* We want to be notified when the window appears */
  67. XSelectInput(dpy, win, StructureNotifyMask);
  68. /* Make it appear */
  69. XMapWindow(dpy, win);
  70. XTextProperty tp;
  71. char name[128] = "Mandelbrot - StarPU";
  72. char *n = name;
  73. Status st = XStringListToTextProperty(&n, 1, &tp);
  74. if (st)
  75. XSetWMName(dpy, win, &tp);
  76. /* Wait for the MapNotify event */
  77. XFlush(dpy);
  78. int depth = DefaultDepth(dpy, DefaultScreen(dpy));
  79. Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy));
  80. /* Make bitmap */
  81. bitmap = XCreateImage(dpy, visual, depth,
  82. ZPixmap, 0, (char *)buffer,
  83. width, height, 32, 0);
  84. /* Init GC */
  85. gc = XCreateGC(dpy, win, 0, NULL);
  86. XSetForeground(dpy, gc, black);
  87. XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask);
  88. Atom wmDeleteMessage;
  89. wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False);
  90. XSetWMProtocols(dpy, win, &wmDeleteMessage, 1);
  91. Left = XStringToKeysym ("Left");
  92. Right = XStringToKeysym ("Right");
  93. Up = XStringToKeysym ("Up");
  94. Down = XStringToKeysym ("Down");
  95. Alt = XStringToKeysym ("Alt");
  96. }
  97. static int handle_events(void)
  98. {
  99. XEvent event;
  100. XNextEvent(dpy, &event);
  101. KeySym key;
  102. char text[255];
  103. if (event.type == KeyPress)
  104. {
  105. XLookupString(&event.xkey,text,255,&key,0);
  106. if (key == Left)
  107. {
  108. double widthX = rightX - leftX;
  109. leftX -= 0.25*widthX;
  110. rightX -= 0.25*widthX;
  111. }
  112. else if (key == Right)
  113. {
  114. double widthX = rightX - leftX;
  115. leftX += 0.25*widthX;
  116. rightX += 0.25*widthX;
  117. }
  118. else if (key == Up)
  119. {
  120. double heightY = topY - bottomY;
  121. topY += 0.25*heightY;
  122. bottomY += 0.25*heightY;
  123. }
  124. else if (key == Down)
  125. {
  126. double heightY = topY - bottomY;
  127. topY -= 0.25*heightY;
  128. bottomY -= 0.25*heightY;
  129. }
  130. else
  131. {
  132. double widthX = rightX - leftX;
  133. double heightY = topY - bottomY;
  134. if (text[0] == '-')
  135. {
  136. /* Zoom out */
  137. leftX -= 0.125*widthX;
  138. rightX += 0.125*widthX;
  139. topY += 0.125*heightY;
  140. bottomY -= 0.125*heightY;
  141. }
  142. else if (text[0] == '+')
  143. {
  144. /* Zoom in */
  145. leftX += 0.125*widthX;
  146. rightX -= 0.125*widthX;
  147. topY -= 0.125*heightY;
  148. bottomY += 0.125*heightY;
  149. }
  150. }
  151. if (text[0]=='q')
  152. {
  153. return -1;
  154. }
  155. }
  156. if (event.type==ButtonPress)
  157. {
  158. /* tell where the mouse Button was Pressed */
  159. printf("You pressed a button at (%i,%i)\n",
  160. event.xbutton.x,event.xbutton.y);
  161. }
  162. return 0;
  163. }
  164. #endif
  165. /*
  166. * OpenCL kernel
  167. */
  168. #ifdef STARPU_USE_OPENCL
  169. char *mandelbrot_opencl_src = "\
  170. #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\
  171. #define MIN(a,b) (((a)<(b))? (a) : (b)) \n\
  172. __kernel void mandelbrot_kernel(__global unsigned* a, \n\
  173. double leftX, double topY, \n\
  174. double stepX, double stepY, \n\
  175. int maxIt, int iby, int block_size, int width) \n\
  176. { \n\
  177. size_t id_x = get_global_id(0); \n\
  178. size_t id_y = get_global_id(1); \n\
  179. if ((id_x < width) && (id_y < block_size)) \n\
  180. { \n\
  181. double xc = leftX + id_x * stepX; \n\
  182. double yc = topY - (id_y + iby*block_size) * stepY; \n\
  183. int it; \n\
  184. double x,y; \n\
  185. x = y = (double)0.0; \n\
  186. for (it=0;it<maxIt;it++) \n\
  187. { \n\
  188. double x2 = x*x; \n\
  189. double y2 = y*y; \n\
  190. if (x2+y2 > 4.0) break; \n\
  191. double twoxy = (double)2.0*x*y; \n\
  192. x = x2 - y2 + xc; \n\
  193. y = twoxy + yc; \n\
  194. } \n\
  195. unsigned int v = MIN((1024*((float)(it)/(2000))), 256); \n\
  196. a[id_x + width * id_y] = (v<<16|(255-v)<<8); \n\
  197. } \n\
  198. }";
  199. static struct starpu_opencl_program opencl_programs;
  200. static void compute_block_opencl(void *descr[], void *cl_arg)
  201. {
  202. int iby, block_size;
  203. double stepX, stepY;
  204. int *pcnt; /* unused for CUDA tasks */
  205. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  206. cl_mem data = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]);
  207. cl_kernel kernel;
  208. cl_command_queue queue;
  209. cl_event event;
  210. int id = starpu_worker_get_id();
  211. int devid = starpu_worker_get_devid(id);
  212. starpu_opencl_load_kernel(&kernel, &queue, &opencl_programs, "mandelbrot_kernel", devid);
  213. clSetKernelArg(kernel, 0, sizeof(data), &data);
  214. clSetKernelArg(kernel, 1, sizeof(leftX), &leftX);
  215. clSetKernelArg(kernel, 2, sizeof(topY), &topY);
  216. clSetKernelArg(kernel, 3, sizeof(stepX), &stepX);
  217. clSetKernelArg(kernel, 4, sizeof(stepY), &stepY);
  218. clSetKernelArg(kernel, 5, sizeof(maxIt), &maxIt);
  219. clSetKernelArg(kernel, 6, sizeof(iby), &iby);
  220. clSetKernelArg(kernel, 7, sizeof(block_size), &block_size);
  221. clSetKernelArg(kernel, 8, sizeof(width), &width);
  222. unsigned dim = 16;
  223. size_t local[2] = {dim, 1};
  224. size_t global[2] = {width, block_size};
  225. clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, &event);
  226. clFinish(queue);
  227. starpu_opencl_collect_stats(event);
  228. clReleaseEvent(event);
  229. starpu_opencl_release_kernel(kernel);
  230. }
  231. #endif
  232. /*
  233. * CPU kernel
  234. */
  235. static void compute_block(void *descr[], void *cl_arg)
  236. {
  237. int ix, iy;
  238. int iby, block_size;
  239. double stepX, stepY;
  240. int *pcnt; /* unused for sequential tasks */
  241. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  242. unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
  243. int local_iy;
  244. for (local_iy = 0; local_iy < block_size; local_iy++)
  245. {
  246. iy = iby*block_size + local_iy;
  247. for (ix = 0; ix < width; ix++)
  248. {
  249. double cx = leftX + ix * stepX;
  250. double cy = topY - iy * stepY;
  251. /* Z = X+I*Y */
  252. double x = 0;
  253. double y = 0;
  254. int it;
  255. for (it = 0; it < maxIt; it++)
  256. {
  257. double x2 = x*x;
  258. double y2 = y*y;
  259. /* Stop iterations when |Z| > 2 */
  260. if (x2 + y2 > 4.0)
  261. break;
  262. double twoxy = 2.0*x*y;
  263. /* Z = Z^2 + C */
  264. x = x2 - y2 + cx;
  265. y = twoxy + cy;
  266. }
  267. unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);
  268. data[ix + local_iy*width] = (v<<16|(255-v)<<8);
  269. }
  270. }
  271. }
  272. static void compute_block_spmd(void *descr[], void *cl_arg)
  273. {
  274. int iby, block_size;
  275. double stepX, stepY;
  276. int *pcnt;
  277. starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt);
  278. unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
  279. int ix, iy; /* global coordinates */
  280. int local_iy; /* current line */
  281. while (1)
  282. {
  283. local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1;
  284. if (local_iy >= block_size)
  285. break;
  286. iy = iby*block_size + local_iy;
  287. for (ix = 0; ix < width; ix++)
  288. {
  289. double cx = leftX + ix * stepX;
  290. double cy = topY - iy * stepY;
  291. /* Z = X+I*Y */
  292. double x = 0;
  293. double y = 0;
  294. int it;
  295. for (it = 0; it < maxIt; it++)
  296. {
  297. double x2 = x*x;
  298. double y2 = y*y;
  299. /* Stop iterations when |Z| > 2 */
  300. if (x2 + y2 > 4.0)
  301. break;
  302. double twoxy = 2.0*x*y;
  303. /* Z = Z^2 + C */
  304. x = x2 - y2 + cx;
  305. y = twoxy + cy;
  306. }
  307. unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256);
  308. data[ix + local_iy*width] = (v<<16|(255-v)<<8);
  309. }
  310. }
  311. }
  312. static struct starpu_codelet spmd_mandelbrot_cl =
  313. {
  314. .where = STARPU_CPU|STARPU_OPENCL,
  315. .type = STARPU_SPMD,
  316. .max_parallelism = INT_MAX,
  317. .cpu_funcs = {compute_block_spmd, NULL},
  318. #ifdef STARPU_USE_OPENCL
  319. .opencl_funcs = {compute_block_opencl, NULL},
  320. #endif
  321. .nbuffers = 1
  322. };
  323. static struct starpu_codelet mandelbrot_cl =
  324. {
  325. .where = STARPU_CPU|STARPU_OPENCL,
  326. .type = STARPU_SEQ,
  327. .cpu_funcs = {compute_block, NULL},
  328. #ifdef STARPU_USE_OPENCL
  329. .opencl_funcs = {compute_block_opencl, NULL},
  330. #endif
  331. .nbuffers = 1
  332. };
  333. static void parse_args(int argc, char **argv)
  334. {
  335. int i;
  336. for (i = 1; i < argc; i++)
  337. {
  338. if (strcmp(argv[i], "-h") == 0)
  339. {
  340. fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]);
  341. exit(-1);
  342. }
  343. if (strcmp(argv[i], "-width") == 0)
  344. {
  345. char *argptr;
  346. width = strtol(argv[++i], &argptr, 10);
  347. }
  348. if (strcmp(argv[i], "-height") == 0)
  349. {
  350. char *argptr;
  351. height = strtol(argv[++i], &argptr, 10);
  352. }
  353. if (strcmp(argv[i], "-nblocks") == 0)
  354. {
  355. char *argptr;
  356. nblocks = strtol(argv[++i], &argptr, 10);
  357. }
  358. if (strcmp(argv[i], "-niter") == 0)
  359. {
  360. char *argptr;
  361. niter = strtol(argv[++i], &argptr, 10);
  362. }
  363. if (strcmp(argv[i], "-pos") == 0)
  364. {
  365. int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
  366. assert(ret == 4);
  367. }
  368. if (strcmp(argv[i], "-demo") == 0)
  369. {
  370. demo = 1;
  371. leftX = -50.22749575062760;
  372. rightX = 48.73874621262927;
  373. topY = -49.35016705749115;
  374. bottomY = 49.64891691946615;
  375. }
  376. if (strcmp(argv[i], "-demozoom") == 0)
  377. {
  378. char *argptr;
  379. demozoom = strtof(argv[++i], &argptr);
  380. }
  381. if (strcmp(argv[i], "-no-x11") == 0)
  382. {
  383. #ifdef STARPU_HAVE_X11
  384. use_x11 = 0;
  385. #endif
  386. }
  387. if (strcmp(argv[i], "-spmd") == 0)
  388. {
  389. use_spmd = 1;
  390. }
  391. }
  392. }
  393. int main(int argc, char **argv)
  394. {
  395. int ret;
  396. parse_args(argc, argv);
  397. /* We don't use CUDA in that example */
  398. struct starpu_conf conf;
  399. starpu_conf_init(&conf);
  400. conf.ncuda = 0;
  401. if (use_spmd)
  402. conf.sched_policy_name = "peager";
  403. ret = starpu_init(&conf);
  404. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  405. unsigned *buffer;
  406. starpu_malloc((void **)&buffer, height*width*sizeof(unsigned));
  407. #ifdef STARPU_HAVE_X11
  408. if (use_x11)
  409. init_x11(width, height, buffer);
  410. #endif
  411. int block_size = height/nblocks;
  412. STARPU_ASSERT((height % nblocks) == 0);
  413. #ifdef STARPU_USE_OPENCL
  414. starpu_opencl_load_opencl_from_string(mandelbrot_opencl_src, &opencl_programs, NULL);
  415. #endif
  416. starpu_data_handle_t block_handles[nblocks];
  417. int iby;
  418. for (iby = 0; iby < nblocks; iby++)
  419. {
  420. unsigned *data = &buffer[iby*block_size*width];
  421. starpu_vector_data_register(&block_handles[iby], 0,
  422. (uintptr_t)data, block_size*width, sizeof(unsigned));
  423. }
  424. unsigned iter = 0;
  425. struct timeval start, end;
  426. if (demo)
  427. gettimeofday(&start, NULL);
  428. while (niter-- != 0)
  429. {
  430. int ret;
  431. double stepX = (rightX - leftX)/width;
  432. double stepY = (topY - bottomY)/height;
  433. /* In case we have a SPMD task, each worker will grab tasks in
  434. * a greedy and select which piece of image to compute by
  435. * incrementing a counter shared by all the workers within the
  436. * parallel task. */
  437. int per_block_cnt[nblocks];
  438. for (iby = 0; iby < nblocks; iby++)
  439. {
  440. per_block_cnt[iby] = 0;
  441. int *pcnt = &per_block_cnt[iby];
  442. ret = starpu_insert_task(use_spmd?&spmd_mandelbrot_cl:&mandelbrot_cl,
  443. STARPU_VALUE, &iby, sizeof(iby),
  444. STARPU_VALUE, &block_size, sizeof(block_size),
  445. STARPU_VALUE, &stepX, sizeof(stepX),
  446. STARPU_VALUE, &stepY, sizeof(stepY),
  447. STARPU_W, block_handles[iby],
  448. STARPU_VALUE, &pcnt, sizeof(int *),
  449. 0);
  450. STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
  451. }
  452. for (iby = 0; iby < nblocks; iby++)
  453. {
  454. #ifdef STARPU_HAVE_X11
  455. if (use_x11)
  456. {
  457. starpu_data_acquire(block_handles[iby], STARPU_R);
  458. XPutImage(dpy, win, gc, bitmap,
  459. 0, iby*block_size,
  460. 0, iby*block_size,
  461. width, block_size);
  462. starpu_data_release(block_handles[iby]);
  463. }
  464. #endif
  465. }
  466. if (demo)
  467. {
  468. /* Zoom in */
  469. double zoom_factor = demozoom;
  470. double widthX = rightX - leftX;
  471. double heightY = topY - bottomY;
  472. iter++;
  473. /* If the window is too small, we reset the demo and display some statistics */
  474. if ((fabs(widthX) < 1e-12) || (fabs(heightY) < 1e-12))
  475. {
  476. leftX = -50.22749575062760;
  477. rightX = 48.73874621262927;
  478. topY = -49.35016705749115;
  479. bottomY = 49.64891691946615;
  480. gettimeofday(&end, NULL);
  481. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  482. fprintf(stderr, "Time to generate %u frames : %f s\n", iter, timing/1000000.0);
  483. fprintf(stderr, "Average FPS: %f\n", ((double)iter*1e+6)/timing);
  484. /* Reset counters */
  485. iter = 0;
  486. gettimeofday(&start, NULL);
  487. }
  488. else
  489. {
  490. leftX += (zoom_factor/2)*widthX;
  491. rightX -= (zoom_factor/2)*widthX;
  492. topY -= (zoom_factor/2)*heightY;
  493. bottomY += (zoom_factor/2)*heightY;
  494. }
  495. }
  496. #ifdef STARPU_HAVE_X11
  497. else if (use_x11 && handle_events())
  498. break;
  499. #endif
  500. }
  501. #ifdef STARPU_HAVE_X11
  502. if (use_x11)
  503. exit_x11();
  504. #endif
  505. for (iby = 0; iby < nblocks; iby++)
  506. starpu_data_unregister(block_handles[iby]);
  507. /* starpu_data_free_pinned_if_possible(buffer); */
  508. starpu_shutdown();
  509. return 0;
  510. }