| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512 | /* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010,2011 University of Bordeaux * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>/* Uncomment this to activate X11 display *///#define USE_X11#define SHORT_LOG 1#define ROUND_ROBIN#ifdef USE_X11#include <X11/Xlib.h>#include <X11/Xutil.h>int use_x11 = 1;#elseint use_x11 = 0;#endifint demo = 0;int frames = -1;#include <pthread.h>#include <assert.h>#include <sys/time.h>#ifdef __APPLE_CC__#include <OpenCL/opencl.h>#else#include <CL/cl.h>#endif#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)#ifdef UNUSED#elif defined(__GNUC__)# define UNUSED(x) UNUSED_ ## x __attribute__((unused))#else# define UNUSED(x) x#endifconst char * kernel_src = "\#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\#define TYPE double \n\#define MIN(a,b) (((a)<(b))? (a) : (b))\n\      __kernel void mandelbrot_kernel(__global uint * a,\n\          TYPE leftX, TYPE topY,\n\          TYPE stepX, TYPE stepY,\n\          uint maxIt, uint iby, uint block_size)\n\{\n\  TYPE xc = leftX + get_global_id(0) * stepX;\n\  TYPE yc = iby*block_size*stepY + topY  + get_global_id(1) * stepY;\n\  int it;\n\  TYPE x,y;\n\  x = y = (TYPE)0.0;\n\  for (it=0;it<maxIt;it++)\n\  {\n\    TYPE x2 = x*x;\n\    TYPE y2 = y*y;\n\    if (x2+y2 > (TYPE)4) break; \n\    TYPE twoxy = (TYPE)2*x*y;\n\    x = x2 - y2 + xc;\n\    y = twoxy + yc;\n\  }\n\  uint v = MIN((1024*((float)(it)/(2000))), 256);\n\  a[get_global_id(0) + get_global_id(1)*get_global_size(0)] = (v<<16|(255-v)<<8); \n\}";static cl_uint nblocks = 8;static cl_uint height = 768;static cl_uint width = 1024;static cl_uint maxIt = 20000;static cl_uint group_size = 64;static double leftX = -0.745;static double rightX = -0.74375;static double topY = .15;static double bottomY = .14875;#ifdef USE_X11      /* X11 data */      static Display *dpy;      static Window win;      static XImage *bitmap;      static GC gc;      static KeySym Left=-1, Right, Down, Up, Alt ;      static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;static void exit_x11(void){  XDestroyImage(bitmap);  XDestroyWindow(dpy, win);  XCloseDisplay(dpy);}static void init_x11(int width, int height, cl_uint *buffer){  /* Attempt to open the display */  dpy = XOpenDisplay(NULL);  /* Failure */  if (!dpy)    exit(0);  unsigned long white = WhitePixel(dpy,DefaultScreen(dpy));  unsigned long black = BlackPixel(dpy,DefaultScreen(dpy));  win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0,      width, height, 0, black, white);  /* We want to be notified when the window appears */  XSelectInput(dpy, win, StructureNotifyMask);  /* Make it appear */  XMapWindow(dpy, win);  XTextProperty tp;  char name[128] = "Mandelbrot";  char *n = name;  Status st = XStringListToTextProperty(&n, 1, &tp);  if (st)    XSetWMName(dpy, win, &tp);  /* Wait for the MapNotify event */  XFlush(dpy);  int depth = DefaultDepth(dpy, DefaultScreen(dpy));  Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy));  /* Make bitmap */  bitmap = XCreateImage(dpy, visual, depth,      ZPixmap, 0, (char *)buffer,      width, height, 32, 0);  /* Init GC */  gc = XCreateGC(dpy, win, 0, NULL);  XSetForeground(dpy, gc, black);  XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask);  Atom wmDeleteMessage;  wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False);  XSetWMProtocols(dpy, win, &wmDeleteMessage, 1);  Left = XStringToKeysym ("Left");  Right = XStringToKeysym ("Right");  Up = XStringToKeysym ("Up");  Down = XStringToKeysym ("Down");  Alt = XStringToKeysym ("Alt");}static int handle_events(void){  XEvent event;  XNextEvent(dpy, &event);  KeySym key;  char text[255];  double coef = 0.05;  if (event.type == KeyPress)  {    XLookupString(&event.xkey,text,255,&key,0);    if (key == Left)    {      double widthX = rightX - leftX;      leftX -= coef*widthX;      rightX -= coef*widthX;    }    else if (key == Right)    {      double widthX = rightX - leftX;      leftX += coef*widthX;      rightX += coef*widthX;    }    else if (key == Down)    {      double heightY = topY - bottomY;      topY += coef*heightY;      bottomY += coef*heightY;    }    else if (key == Up)    {      double heightY = topY - bottomY;      topY -= coef*heightY;      bottomY -= coef*heightY;    }    else {      double widthX = rightX - leftX;      double heightY = topY - bottomY;      if (text[0] == '-')      {        /* Zoom out */        leftX -= (coef/2)*widthX;        rightX += (coef/2)*widthX;        topY += (coef/2)*heightY;        bottomY -= (coef/2)*heightY;      }      else if (text[0] == '+')      {        /* Zoom in */        leftX += (coef/2)*widthX;        rightX -= (coef/2)*widthX;        topY -= (coef/2)*heightY;        bottomY += (coef/2)*heightY;      }    }    if (text[0]=='q') {      return -1;    }  }  if (event.type==ButtonPress) {    /* tell where the mouse Button was Pressed */    printf("You pressed a button at (%i,%i)\n",        event.xbutton.x,event.xbutton.y);  }  return 0;}#endif //USE_X11static void parse_args(int argc, char **argv){	int i;	for (i = 1; i < argc; i++) {		if (strcmp(argv[i], "-h") == 0) {			fprintf(stderr, "Usage: %s [-h] [ -width 1024] [-height 768] [-nblocks 16] [-group_size 64] [-no-x11] [-demo] [-frames N] [-pos leftx:rightx:bottomy:topy]\n", argv[0]);			exit(-1);		}		if (strcmp(argv[i], "-width") == 0) {			char *argptr;			width = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-frames") == 0) {			char *argptr;			frames = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-height") == 0) {			char *argptr;			height = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-group_size") == 0) {			char *argptr;			group_size = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-nblocks") == 0) {			char *argptr;			nblocks = strtol(argv[++i], &argptr, 10);		}		if (strcmp(argv[i], "-pos") == 0) {			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);			assert(ret == 4);		}		if (strcmp(argv[i], "-demo") == 0) {			demo = 1;			leftX = -50.22749575062760;			rightX = 48.73874621262927;			topY = -49.35016705749115;			bottomY = 49.64891691946615;		}		if (strcmp(argv[i], "-no-x11") == 0) {#ifdef USE_X11			use_x11 = 0;#endif		}	}}int main(int argc, char **argv) {#define MAX_DEVICES 20  cl_platform_id platforms[15];  cl_uint num_platforms;  cl_device_id devices[15];  cl_uint num_devices;  cl_context context;  cl_program program;  cl_kernel kernel;  cl_command_queue cq[MAX_DEVICES];  cl_int err;  cl_uint i;  parse_args(argc, argv);  cl_uint block_size = height/nblocks;  assert((height % nblocks) == 0);  assert((width % group_size) == 0);  err = clGetPlatformIDs(0, NULL, &num_platforms);  if (num_platforms == 0) {    printf("No OpenCL platform found\n");    exit(0);  }  err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);  check(err, "clGetPlatformIDs");  unsigned int platform_idx;  for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {    err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);    check(err, "clGetDeviceIDs");    if (num_devices != 0)      break;  }  if (num_devices == 0)    error("No OpenCL device found\n");  cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};  context = clCreateContext(properties, num_devices, devices, NULL, NULL, &err);  check(err, "clCreateContext");  program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);  check(err, "clCreateProgram");  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);  check(err, "clBuildProgram");  kernel = clCreateKernel(program, "mandelbrot_kernel", &err);  check(err, "clCreateKernel");  for (i=0; i<num_devices; i++)    cq[i] = clCreateCommandQueue(context, devices[i],  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);  check(err, "clCreateCommandQueue");  cl_uint *buffer;  buffer = malloc(height*width*sizeof(cl_uint));#ifdef USE_X11  if (use_x11)    init_x11(width, height, buffer);#endif // USE_X11  cl_mem block_handles[nblocks];  cl_uint iby;  for (iby = 0; iby < nblocks; iby++) {    cl_uint *data = &buffer[iby*block_size*width];    block_handles[iby] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, block_size*width*sizeof(cl_uint), data, &err);  }  int stop = 0;  int frame = 0;  while (!stop) {    struct timeval start, end;    gettimeofday(&start, NULL);    if (frames != -1) {      frame++;      stop = (frame == frames);    }    double stepX = (rightX - leftX)/width;    double stepY = (topY - bottomY)/height;    cl_event ker_events[nblocks];    void * ptrs[nblocks];    for (iby = 0; iby < nblocks; iby++) {      err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &block_handles[iby]);      check(err, "clSetKernelArg out");      err = clSetKernelArg(kernel, 1, sizeof(cl_double), &leftX);      check(err, "clSetKernelArg leftX");      err = clSetKernelArg(kernel, 2, sizeof(cl_double), &topY);      check(err, "clSetKernelArg topY");      err = clSetKernelArg(kernel, 3, sizeof(cl_double), &stepX);      check(err, "clSetKernelArg leftX");      err = clSetKernelArg(kernel, 4, sizeof(cl_double), &stepY);      check(err, "clSetKernelArg topY");      err = clSetKernelArg(kernel, 5, sizeof(cl_uint), &maxIt);      check(err, "clSetKernelArg maxIt");      err = clSetKernelArg(kernel, 6, sizeof(cl_uint), &iby);      check(err, "clSetKernelArg iby");      err = clSetKernelArg(kernel, 7, sizeof(cl_uint), &block_size);      check(err, "clSetKernelArg block_size");      size_t local[3] = {group_size, 1, 1};      size_t global[3] = {width, block_size, 1};#ifdef ROUND_ROBIN      int dev = iby % num_devices;#else      int dev = 0;#endif      err = clEnqueueNDRangeKernel(cq[dev], kernel, 3, NULL, global, local, 0, NULL, &ker_events[iby]);      check(err, "clEnqueueNDRangeKernel");    }    for (iby = 0; iby < nblocks; iby++) {#ifdef ROUND_ROBIN      int dev = iby % num_devices;#else      int dev = 0;#endif      ptrs[iby] = clEnqueueMapBuffer(cq[dev], block_handles[iby], CL_FALSE,CL_MAP_READ, 0, block_size*width*sizeof(cl_uint), 1, &ker_events[iby], NULL, NULL);    }#ifdef ROUND_ROBIN    for (i = 0; i < num_devices; i++)      clFinish(cq[i]);#else    clFinish(cq[0]);#endif    gettimeofday(&end, NULL);    double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));#ifdef SHORT_LOG    fprintf(stderr, "%f\n", timing/1000.0);#else    fprintf(stderr, "Time to generate frame : %f ms\n", timing/1000.0);    fprintf(stderr, "%14.14f:%14.14f:%14.14f:%14.14f\n", leftX, rightX, bottomY, topY);#endif#ifdef USE_X11    if (use_x11) {      for (iby = 0; iby < nblocks; iby++) {        pthread_mutex_lock(&mutex);        XPutImage(dpy, win, gc, bitmap,            0, iby*block_size,            0, iby*block_size,            width, block_size);        pthread_mutex_unlock(&mutex);      }    }#endif    for (iby = 0; iby < nblocks; iby++) {#ifdef ROUND_ROBIN      int dev = iby % num_devices;#else      int dev = 0;#endif      clEnqueueUnmapMemObject(cq[dev], block_handles[iby], ptrs[iby], 0, NULL, NULL);      clReleaseEvent(ker_events[iby]);    }    if (demo) {      /* Zoom in */      double zoom_factor = 0.05;      double widthX = rightX - leftX;      double heightY = topY - bottomY;      leftX += (zoom_factor/2)*widthX;      rightX -= (zoom_factor/2)*widthX;      topY -= (zoom_factor/2)*heightY;      bottomY += (zoom_factor/2)*heightY;    }    else {#ifdef USE_X11      if (use_x11) {        handle_events();      }#else      stop = 1;#endif    }  }#ifdef USE_X11  if (use_x11)    exit_x11();#endif  for (iby = 0; iby < nblocks; iby++) {    clReleaseMemObject(block_handles[iby]);  }  for (i=0; i<num_devices; i++)    clReleaseCommandQueue(cq[i]);  clReleaseKernel(kernel);  clReleaseProgram(program);  clReleaseContext(context);  return 0;}
 |