mandelbrot.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012,2015,2017,2019 CNRS
  4. * Copyright (C) 2012 Inria
  5. * Copyright (C) 2010,2011 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <unistd.h>
  22. /* Uncomment this to activate X11 display */
  23. //#define USE_X11
  24. #define SHORT_LOG 1
  25. #define ROUND_ROBIN
  26. #ifdef USE_X11
  27. #include <X11/Xlib.h>
  28. #include <X11/Xutil.h>
  29. int use_x11 = 1;
  30. #else
  31. int use_x11 = 0;
  32. #endif
  33. int demo = 0;
  34. int frames = -1;
  35. #include <pthread.h>
  36. #include <assert.h>
  37. #include <sys/time.h>
  38. #ifdef __APPLE_CC__
  39. #include <OpenCL/opencl.h>
  40. #else
  41. #include <CL/cl.h>
  42. #endif
  43. #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)
  44. #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)
  45. #ifdef UNUSED
  46. #elif defined(__GNUC__)
  47. # define UNUSED(x) UNUSED_ ## x __attribute__((unused))
  48. #else
  49. # define UNUSED(x) x
  50. #endif
  51. const char * kernel_src = "\
  52. #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\
  53. #define TYPE double \n\
  54. #define MIN(a,b) (((a)<(b))? (a) : (b))\n\
  55. __kernel void mandelbrot_kernel(__global uint * a,\n\
  56. TYPE leftX, TYPE topY,\n\
  57. TYPE stepX, TYPE stepY,\n\
  58. uint maxIt, uint iby, uint block_size)\n\
  59. {\n\
  60. TYPE xc = leftX + get_global_id(0) * stepX;\n\
  61. TYPE yc = iby*block_size*stepY + topY + get_global_id(1) * stepY;\n\
  62. int it;\n\
  63. TYPE x,y;\n\
  64. x = y = (TYPE)0.0;\n\
  65. for (it=0;it<maxIt;it++)\n\
  66. {\n\
  67. TYPE x2 = x*x;\n\
  68. TYPE y2 = y*y;\n\
  69. if (x2+y2 > (TYPE)4) break; \n\
  70. TYPE twoxy = (TYPE)2*x*y;\n\
  71. x = x2 - y2 + xc;\n\
  72. y = twoxy + yc;\n\
  73. }\n\
  74. uint v = MIN((1024*((float)(it)/(2000))), 256);\n\
  75. a[get_global_id(0) + get_global_id(1)*get_global_size(0)] = (v<<16|(255-v)<<8); \n\
  76. }";
  77. static cl_uint nblocks = 8;
  78. static cl_uint height = 768;
  79. static cl_uint width = 1024;
  80. static cl_uint maxIt = 20000;
  81. static cl_uint group_size = 64;
  82. static double leftX = -0.745;
  83. static double rightX = -0.74375;
  84. static double topY = .15;
  85. static double bottomY = .14875;
  86. #ifdef USE_X11
  87. /* X11 data */
  88. static Display *dpy;
  89. static Window win;
  90. static XImage *bitmap;
  91. static GC gc;
  92. static KeySym Left=-1, Right, Down, Up, Alt ;
  93. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
  94. static void exit_x11(void)
  95. {
  96. XDestroyImage(bitmap);
  97. XDestroyWindow(dpy, win);
  98. XCloseDisplay(dpy);
  99. }
  100. static void init_x11(int width, int height, cl_uint *buffer)
  101. {
  102. /* Attempt to open the display */
  103. dpy = XOpenDisplay(NULL);
  104. /* Failure */
  105. if (!dpy)
  106. exit(0);
  107. unsigned long white = WhitePixel(dpy,DefaultScreen(dpy));
  108. unsigned long black = BlackPixel(dpy,DefaultScreen(dpy));
  109. win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0,
  110. width, height, 0, black, white);
  111. /* We want to be notified when the window appears */
  112. XSelectInput(dpy, win, StructureNotifyMask);
  113. /* Make it appear */
  114. XMapWindow(dpy, win);
  115. XTextProperty tp;
  116. char name[128] = "Mandelbrot";
  117. char *n = name;
  118. Status st = XStringListToTextProperty(&n, 1, &tp);
  119. if (st)
  120. XSetWMName(dpy, win, &tp);
  121. /* Wait for the MapNotify event */
  122. XFlush(dpy);
  123. int depth = DefaultDepth(dpy, DefaultScreen(dpy));
  124. Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy));
  125. /* Make bitmap */
  126. bitmap = XCreateImage(dpy, visual, depth,
  127. ZPixmap, 0, (char *)buffer,
  128. width, height, 32, 0);
  129. /* Init GC */
  130. gc = XCreateGC(dpy, win, 0, NULL);
  131. XSetForeground(dpy, gc, black);
  132. XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask);
  133. Atom wmDeleteMessage;
  134. wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False);
  135. XSetWMProtocols(dpy, win, &wmDeleteMessage, 1);
  136. Left = XStringToKeysym ("Left");
  137. Right = XStringToKeysym ("Right");
  138. Up = XStringToKeysym ("Up");
  139. Down = XStringToKeysym ("Down");
  140. Alt = XStringToKeysym ("Alt");
  141. }
  142. static int handle_events(void)
  143. {
  144. XEvent event;
  145. XNextEvent(dpy, &event);
  146. KeySym key;
  147. char text[255];
  148. double coef = 0.05;
  149. if (event.type == KeyPress)
  150. {
  151. XLookupString(&event.xkey,text,255,&key,0);
  152. if (key == Left)
  153. {
  154. double widthX = rightX - leftX;
  155. leftX -= coef*widthX;
  156. rightX -= coef*widthX;
  157. }
  158. else if (key == Right)
  159. {
  160. double widthX = rightX - leftX;
  161. leftX += coef*widthX;
  162. rightX += coef*widthX;
  163. }
  164. else if (key == Down)
  165. {
  166. double heightY = topY - bottomY;
  167. topY += coef*heightY;
  168. bottomY += coef*heightY;
  169. }
  170. else if (key == Up)
  171. {
  172. double heightY = topY - bottomY;
  173. topY -= coef*heightY;
  174. bottomY -= coef*heightY;
  175. }
  176. else {
  177. double widthX = rightX - leftX;
  178. double heightY = topY - bottomY;
  179. if (text[0] == '-')
  180. {
  181. /* Zoom out */
  182. leftX -= (coef/2)*widthX;
  183. rightX += (coef/2)*widthX;
  184. topY += (coef/2)*heightY;
  185. bottomY -= (coef/2)*heightY;
  186. }
  187. else if (text[0] == '+')
  188. {
  189. /* Zoom in */
  190. leftX += (coef/2)*widthX;
  191. rightX -= (coef/2)*widthX;
  192. topY -= (coef/2)*heightY;
  193. bottomY += (coef/2)*heightY;
  194. }
  195. }
  196. if (text[0]=='q') {
  197. return -1;
  198. }
  199. }
  200. if (event.type==ButtonPress) {
  201. /* tell where the mouse Button was Pressed */
  202. printf("You pressed a button at (%i,%i)\n",
  203. event.xbutton.x,event.xbutton.y);
  204. }
  205. return 0;
  206. }
  207. #endif //USE_X11
  208. static void parse_args(int argc, char **argv)
  209. {
  210. int i;
  211. for (i = 1; i < argc; i++) {
  212. if (strcmp(argv[i], "-h") == 0) {
  213. fprintf(stderr, "Usage: %s [-h] [ -width 1024] [-height 768] [-nblocks 16] [-group_size 64] [-no-x11] [-demo] [-frames N] [-pos leftx:rightx:bottomy:topy]\n", argv[0]);
  214. exit(-1);
  215. }
  216. if (strcmp(argv[i], "-width") == 0) {
  217. char *argptr;
  218. width = strtol(argv[++i], &argptr, 10);
  219. }
  220. if (strcmp(argv[i], "-frames") == 0) {
  221. char *argptr;
  222. frames = strtol(argv[++i], &argptr, 10);
  223. }
  224. if (strcmp(argv[i], "-height") == 0) {
  225. char *argptr;
  226. height = strtol(argv[++i], &argptr, 10);
  227. }
  228. if (strcmp(argv[i], "-group_size") == 0) {
  229. char *argptr;
  230. group_size = strtol(argv[++i], &argptr, 10);
  231. }
  232. if (strcmp(argv[i], "-nblocks") == 0) {
  233. char *argptr;
  234. nblocks = strtol(argv[++i], &argptr, 10);
  235. }
  236. if (strcmp(argv[i], "-pos") == 0) {
  237. int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
  238. assert(ret == 4);
  239. }
  240. if (strcmp(argv[i], "-demo") == 0) {
  241. demo = 1;
  242. leftX = -50.22749575062760;
  243. rightX = 48.73874621262927;
  244. topY = -49.35016705749115;
  245. bottomY = 49.64891691946615;
  246. }
  247. if (strcmp(argv[i], "-no-x11") == 0) {
  248. #ifdef USE_X11
  249. use_x11 = 0;
  250. #endif
  251. }
  252. }
  253. }
  254. int main(int argc, char **argv) {
  255. #define MAX_DEVICES 20
  256. cl_platform_id platforms[15];
  257. cl_uint num_platforms;
  258. cl_device_id devices[15];
  259. cl_uint num_devices;
  260. cl_context context;
  261. cl_program program;
  262. cl_kernel kernel;
  263. cl_command_queue cq[MAX_DEVICES];
  264. cl_int err;
  265. cl_uint i;
  266. parse_args(argc, argv);
  267. cl_uint block_size = height/nblocks;
  268. assert((height % nblocks) == 0);
  269. assert((width % group_size) == 0);
  270. clGetPlatformIDs(0, NULL, &num_platforms);
  271. if (num_platforms == 0) {
  272. printf("No OpenCL platform found\n");
  273. exit(0);
  274. }
  275. err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);
  276. check(err, "clGetPlatformIDs");
  277. unsigned int platform_idx;
  278. for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {
  279. err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
  280. check(err, "clGetDeviceIDs");
  281. if (num_devices != 0)
  282. break;
  283. }
  284. if (num_devices == 0)
  285. error("No OpenCL device found\n");
  286. cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};
  287. context = clCreateContext(properties, num_devices, devices, NULL, NULL, &err);
  288. check(err, "clCreateContext");
  289. program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);
  290. check(err, "clCreateProgram");
  291. err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  292. check(err, "clBuildProgram");
  293. kernel = clCreateKernel(program, "mandelbrot_kernel", &err);
  294. check(err, "clCreateKernel");
  295. for (i=0; i<num_devices; i++)
  296. cq[i] = clCreateCommandQueue(context, devices[i], CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
  297. check(err, "clCreateCommandQueue");
  298. cl_uint *buffer;
  299. buffer = malloc(height*width*sizeof(cl_uint));
  300. #ifdef USE_X11
  301. if (use_x11)
  302. init_x11(width, height, buffer);
  303. #endif // USE_X11
  304. cl_mem block_handles[nblocks];
  305. cl_uint iby;
  306. for (iby = 0; iby < nblocks; iby++) {
  307. cl_uint *data = &buffer[iby*block_size*width];
  308. block_handles[iby] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, block_size*width*sizeof(cl_uint), data, &err);
  309. }
  310. int stop = 0;
  311. int frame = 0;
  312. while (!stop) {
  313. struct timeval start, end;
  314. gettimeofday(&start, NULL);
  315. if (frames != -1) {
  316. frame++;
  317. stop = (frame == frames);
  318. }
  319. double stepX = (rightX - leftX)/width;
  320. double stepY = (topY - bottomY)/height;
  321. cl_event ker_events[nblocks];
  322. void * ptrs[nblocks];
  323. for (iby = 0; iby < nblocks; iby++) {
  324. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &block_handles[iby]);
  325. check(err, "clSetKernelArg out");
  326. err = clSetKernelArg(kernel, 1, sizeof(cl_double), &leftX);
  327. check(err, "clSetKernelArg leftX");
  328. err = clSetKernelArg(kernel, 2, sizeof(cl_double), &topY);
  329. check(err, "clSetKernelArg topY");
  330. err = clSetKernelArg(kernel, 3, sizeof(cl_double), &stepX);
  331. check(err, "clSetKernelArg leftX");
  332. err = clSetKernelArg(kernel, 4, sizeof(cl_double), &stepY);
  333. check(err, "clSetKernelArg topY");
  334. err = clSetKernelArg(kernel, 5, sizeof(cl_uint), &maxIt);
  335. check(err, "clSetKernelArg maxIt");
  336. err = clSetKernelArg(kernel, 6, sizeof(cl_uint), &iby);
  337. check(err, "clSetKernelArg iby");
  338. err = clSetKernelArg(kernel, 7, sizeof(cl_uint), &block_size);
  339. check(err, "clSetKernelArg block_size");
  340. size_t local[3] = {group_size, 1, 1};
  341. size_t global[3] = {width, block_size, 1};
  342. #ifdef ROUND_ROBIN
  343. int dev = iby % num_devices;
  344. #else
  345. int dev = 0;
  346. #endif
  347. err = clEnqueueNDRangeKernel(cq[dev], kernel, 3, NULL, global, local, 0, NULL, &ker_events[iby]);
  348. check(err, "clEnqueueNDRangeKernel");
  349. }
  350. for (iby = 0; iby < nblocks; iby++) {
  351. #ifdef ROUND_ROBIN
  352. int dev = iby % num_devices;
  353. #else
  354. int dev = 0;
  355. #endif
  356. ptrs[iby] = clEnqueueMapBuffer(cq[dev], block_handles[iby], CL_FALSE,CL_MAP_READ, 0, block_size*width*sizeof(cl_uint), 1, &ker_events[iby], NULL, NULL);
  357. }
  358. #ifdef ROUND_ROBIN
  359. for (i = 0; i < num_devices; i++)
  360. clFinish(cq[i]);
  361. #else
  362. clFinish(cq[0]);
  363. #endif
  364. gettimeofday(&end, NULL);
  365. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  366. #ifdef SHORT_LOG
  367. fprintf(stderr, "%f\n", timing/1000.0);
  368. #else
  369. fprintf(stderr, "Time to generate frame : %f ms\n", timing/1000.0);
  370. fprintf(stderr, "%14.14f:%14.14f:%14.14f:%14.14f\n", leftX, rightX, bottomY, topY);
  371. #endif
  372. #ifdef USE_X11
  373. if (use_x11) {
  374. for (iby = 0; iby < nblocks; iby++) {
  375. pthread_mutex_lock(&mutex);
  376. XPutImage(dpy, win, gc, bitmap,
  377. 0, iby*block_size,
  378. 0, iby*block_size,
  379. width, block_size);
  380. pthread_mutex_unlock(&mutex);
  381. }
  382. }
  383. #endif
  384. for (iby = 0; iby < nblocks; iby++) {
  385. #ifdef ROUND_ROBIN
  386. int dev = iby % num_devices;
  387. #else
  388. int dev = 0;
  389. #endif
  390. clEnqueueUnmapMemObject(cq[dev], block_handles[iby], ptrs[iby], 0, NULL, NULL);
  391. clReleaseEvent(ker_events[iby]);
  392. }
  393. if (demo) {
  394. /* Zoom in */
  395. double zoom_factor = 0.05;
  396. double widthX = rightX - leftX;
  397. double heightY = topY - bottomY;
  398. leftX += (zoom_factor/2)*widthX;
  399. rightX -= (zoom_factor/2)*widthX;
  400. topY -= (zoom_factor/2)*heightY;
  401. bottomY += (zoom_factor/2)*heightY;
  402. }
  403. else {
  404. #ifdef USE_X11
  405. if (use_x11) {
  406. handle_events();
  407. }
  408. #else
  409. stop = 1;
  410. #endif
  411. }
  412. }
  413. #ifdef USE_X11
  414. if (use_x11)
  415. exit_x11();
  416. #endif
  417. for (iby = 0; iby < nblocks; iby++) {
  418. clReleaseMemObject(block_handles[iby]);
  419. }
  420. for (i=0; i<num_devices; i++)
  421. clReleaseCommandQueue(cq[i]);
  422. clReleaseKernel(kernel);
  423. clReleaseProgram(program);
  424. clReleaseContext(context);
  425. return 0;
  426. }