Browse Source

Add some SOCL examples

Sylvain Henry 13 years ago
parent
commit
6ce4934cd3

+ 6 - 3
Makefile.am

@@ -21,14 +21,17 @@ SUBDIRS = src
 if USE_MPI
 SUBDIRS += mpi
 endif
+
+if BUILD_SOCL
+SUBDIRS += socl
+endif
+
 SUBDIRS += tools examples tests doc
+
 if COND_OPT
 SUBDIRS += tests/opt examples/opt
 endif
 
-if BUILD_SOCL
-SUBDIRS += socl
-endif
 
 if BUILD_GCC_PLUGIN
 SUBDIRS += gcc-plugin

+ 2 - 0
configure.ac

@@ -1131,6 +1131,7 @@ else
 fi
 
 AM_CONDITIONAL([BUILD_SOCL], [test "x$build_socl" = "xyes"])
+AM_CONDITIONAL([STARPU_USE_SOCL], [test "x$build_socl" = "xyes"])
 
 ###############################################################################
 #                                                                             #
@@ -1377,6 +1378,7 @@ AC_OUTPUT([
         examples/opt/Makefile
 	examples/starpufft/Makefile
 	examples/stencil/Makefile
+	examples/socl/Makefile
 	tests/Makefile
         tests/opt/Makefile
 	doc/Makefile

+ 4 - 0
examples/Makefile.am

@@ -21,6 +21,10 @@ AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
 
 SUBDIRS = stencil
 
+if STARPU_USE_SOCL
+SUBDIRS += socl
+endif
+
 if STARPU_HAVE_FFTW
 if STARPU_HAVE_FFTWF
 SUBDIRS += starpufft

+ 51 - 0
examples/socl/Makefile.am

@@ -0,0 +1,51 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+LIBS = $(top_builddir)/socl/src/libsocl.la
+AM_CPPFLAGS = -I$(top_srcdir)/socl/include/ 
+AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
+
+
+SOCL_EXAMPLES	=
+TESTS		=	$(SOCL_EXAMPLES)
+
+check_PROGRAMS	=	$(STARPU_EXAMPLES)
+
+examplebindir = $(libdir)/starpu/examples/socl/
+examplebin_PROGRAMS =
+
+
+examplebin_PROGRAMS +=				\
+	basic/basic		\
+	mandelbrot/mandelbrot		\
+	clinfo/clinfo
+
+
+SOCL_EXAMPLES +=				\
+	basic/basic		\
+	mandelbrot/mandelbrot		\
+	clinfo/clinfo
+
+basic_basic_SOURCES = basic/basic.c
+clinfo_clinfo_SOURCES = clinfo/clinfo.c
+mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c
+
+mandelbrot_mandelbrot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS)
+if HAVE_X11
+mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS)
+mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) -lX11 $(X_LIBS) $(X_EXTRA_LIBS)
+endif

+ 211 - 0
examples/socl/basic/basic.c

@@ -0,0 +1,211 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <CL/cl.h>
+
+#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)
+#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)
+
+#ifdef UNUSED
+#elif defined(__GNUC__)
+# define UNUSED(x) UNUSED_ ## x __attribute__((unused))
+#else
+# define UNUSED(x) x
+#endif
+
+#define SIZE 1024
+#define TYPE float
+#define REALSIZE (SIZE * sizeof(TYPE))
+
+const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \
+   size_t x = get_global_id(0);\
+   size_t y = get_global_id(1);\
+   size_t w = get_global_size(0); \
+   int idx = y*w+x; \
+   d[idx] = s1[idx] + s2[idx];\
+}";
+
+
+
+int main(int UNUSED(argc), char** UNUSED(argv)) {
+   cl_platform_id platforms[15];
+   cl_uint num_platforms;
+   cl_device_id devices[15];
+   cl_uint num_devices;
+   cl_context context;
+   cl_program program;
+   cl_kernel kernel;
+   cl_mem s1m, s2m, dm;
+   cl_command_queue cq;
+   cl_int err;
+
+   TYPE s1[SIZE],s2[SIZE],d[SIZE];
+
+   {
+      int i;
+      for (i=0; i<SIZE; i++) {
+         s1[i] = 2.0;
+         s2[i] = 7.0;
+         d[i] = 98.0;
+      }
+   }
+
+   printf("Querying platform...\n");
+   err = clGetPlatformIDs(0, NULL, &num_platforms);
+   if (num_platforms == 0) {
+      printf("No OpenCL platform found. If you use SOCL, this could mean StarPU wasn't configured for OpenCL. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n");
+      exit(0);
+   }
+   err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);
+   check(err, "clGetPlatformIDs");
+
+   printf("Querying devices...\n");
+   unsigned int platform_idx;
+   for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {
+      err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
+      check(err, "clGetDeviceIDs");
+      if (num_devices != 0)
+         break;
+   }
+   if (num_devices == 0)
+      error("No OpenCL device found\n");
+
+   printf("Creating context...\n");
+   cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};
+   context = clCreateContext(properties, num_devices, devices, NULL, NULL, &err);
+   check(err, "clCreateContext");
+
+   printf("Creating program...\n");
+   program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);
+   check(err, "clCreateProgram");
+
+   printf("Building program...\n");
+   err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+   check(err, "clBuildProgram");
+
+   printf("Creating kernel...\n");
+   kernel = clCreateKernel(program, "add", &err);
+   check(err, "clCreateKernel");
+
+   printf("Creating buffers...\n");
+   s1m = clCreateBuffer(context, CL_MEM_READ_WRITE, REALSIZE, NULL, &err);
+   check(err, "clCreateBuffer s1");
+   s2m = clCreateBuffer(context, CL_MEM_READ_ONLY, REALSIZE, NULL, &err);
+   check(err, "clCreateBuffer s2");
+   dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY, REALSIZE, NULL, &err);
+   check(err, "clCreateBuffer d");
+
+   printf("Creating command queue...\n");
+   cl_event eventW1, eventW2, eventK, eventR;
+
+#ifdef PROFILING
+   cq = clCreateCommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+#else
+   cq = clCreateCommandQueue(context, devices[0], 0, &err);
+#endif
+   check(err, "clCreateCommandQueue");
+
+   printf("Enqueueing WriteBuffers...\n");
+   err = clEnqueueWriteBuffer(cq, s1m, CL_FALSE, 0, REALSIZE, s1, 0, NULL, &eventW1);
+   check(err, "clEnqueueWriteBuffer s1");
+   err = clEnqueueWriteBuffer(cq, s2m, CL_FALSE, 0, REALSIZE, s2, 0, NULL, &eventW2);
+   check(err, "clEnqueueWriteBuffer s2");
+
+   printf("Setting kernel arguments...\n");
+   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m);
+   check(err, "clSetKernelArg 0");
+   err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m);
+   check(err, "clSetKernelArg 1");
+   err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm);
+   check(err, "clSetKernelArg 2");
+
+   printf("Enqueueing NDRangeKernel...\n");
+   size_t local[3] = {16, 1, 1};
+   size_t global[3] = {1024, 1, 1};
+   cl_event deps[] = {eventW1,eventW2};
+   err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, 2, deps, &eventK);
+   check(err, "clEnqueueNDRangeKernel");
+
+   printf("Enqueueing ReadBuffer...\n");
+   err = clEnqueueReadBuffer(cq, dm, CL_FALSE, 0, REALSIZE, d, 0, NULL, &eventR);
+   check(err, "clEnqueueReadBuffer");
+
+   clFinish(cq);
+
+   {
+      int i;
+      for (i=0; i<SIZE; i++) {
+        printf("%f ", d[i]);
+      }
+      printf("\n");
+   }
+
+#ifdef PROFILING
+   #define DURATION(event,label) do { \
+      cl_ulong t0,t1; \
+      err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t0, NULL);\
+      check(err, "clGetEventProfilingInfo");\
+      err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t1, NULL);\
+      check(err, "clGetEventProfilingInfo");\
+      printf("Profiling %s: %lu nanoseconds\n", label, t1-t0);\
+   } while (0);
+
+   DURATION(eventW1, "first buffer writing");
+   DURATION(eventW2, "second buffer writing");
+   DURATION(eventK, "kernel execution");
+   DURATION(eventR, "result buffer reading");
+#endif
+
+   
+   printf("Releasing events...\n");
+   err = clReleaseEvent(eventW1);
+   err |= clReleaseEvent(eventW2);
+   err |= clReleaseEvent(eventK);
+   err |= clReleaseEvent(eventR);
+   check(err, "clReleaseCommandQueue");
+
+   printf("Releasing command queue...\n");
+   err = clReleaseCommandQueue(cq);
+   check(err, "clReleaseCommandQueue");
+
+   printf("Releasing buffers...\n");
+   err = clReleaseMemObject(s1m);
+   check(err, "clReleaseMemObject s1");
+   err = clReleaseMemObject(s2m);
+   check(err, "clReleaseMemObject s2");
+   err = clReleaseMemObject(dm);
+   check(err, "clReleaseMemObject d");
+
+   printf("Releasing kernel...\n");
+   err = clReleaseKernel(kernel);
+   check(err, "clReleaseKernel");
+
+   printf("Releasing program...\n");
+   err = clReleaseProgram(program);
+   check(err, "clReleaseProgram");
+
+   printf("Releasing context...\n");
+   err = clReleaseContext(context);
+   check(err, "clReleaseContext");
+
+   return 0;
+}

+ 299 - 0
examples/socl/clinfo/clinfo.c

@@ -0,0 +1,299 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <CL/cl.h>
+
+inline 
+void 
+checkErr(cl_int err, const char * name) {
+    if (err != CL_SUCCESS) {
+        fprintf(stderr, "ERROR: %s (%d)\n", name, err);
+        exit(1);
+    }
+}
+
+int
+main(void) {
+   cl_int err;
+   cl_uint num_platforms;
+   cl_platform_id *platforms;
+
+   // Plaform info
+   err = clGetPlatformIDs(0, NULL, &num_platforms);
+   checkErr(err, "Unable to get platform count");
+
+   platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platforms);
+   err = clGetPlatformIDs(num_platforms, platforms, NULL);
+   checkErr(err, "Unable to get platform list");
+   
+   
+   // Iteratate over platforms
+   printf("Number of platforms:\t\t\t\t %d\n", num_platforms);
+
+   {
+      unsigned int i;
+      for (i=0; i<num_platforms; i++) {
+         char str[256];
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, sizeof(str), &str, NULL);
+         checkErr(err, "clGetPlatformInfo(CL_PLATFORM_PROFILE)");
+         printf("  Plaform Profile:\t\t\t\t %s\n", str);    
+
+         err= clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(str), &str, NULL);
+         checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VERSION)");
+         printf("  Plaform Version:\t\t\t\t %s\n", str);    
+
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
+         checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
+         printf("  Plaform Name:\t\t\t\t\t %s\n", str);    
+
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(str), &str, NULL);
+         checkErr(err, "clGetPlatformInfo(CL_PLATFORM_VENDOR)");
+         printf("  Plaform Vendor:\t\t\t\t %s\n", str);    
+
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(str), &str, NULL);
+         checkErr(err, "clGetPlatformInfo(CL_PLATFORM_EXTENSIONS)");
+         printf("  Plaform Extensions:\t\t\t %s\n", str);    
+      }
+   }
+
+   printf("\n\n");
+
+   // Now Iteratate over each platform and its devices
+   {
+      unsigned int i;
+      for (i=0; i<num_platforms; i++) {
+         char str[256];
+         cl_device_id * devices;
+         cl_uint num_devices;
+
+         err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), &str, NULL);
+         checkErr(err, "clGetPlatformInfo(CL_PLATFORM_NAME)");
+         printf("  Plaform Name:\t\t\t\t\t %s\n", str);    
+
+         err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
+         checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
+         devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
+         
+         err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
+         checkErr(err, "clGetDeviceIds(CL_DEVICE_TYPE_ALL)");
+
+         printf("  Number of devices:\t\t\t\t %d\n", num_devices);
+         {
+            unsigned int j;
+            for (j=0; j<num_devices; j++) {
+               cl_device_type dev_type;
+               printf("\n  DEVICE %d\n", j);
+               
+               err = clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
+               checkErr(err, "clGetDeviceInfo(CL_DEVICE_TYPE)");
+
+               printf("  Device Type:\t\t\t\t\t ");
+               if (dev_type & CL_DEVICE_TYPE_ACCELERATOR)
+                  printf("CL_DEVICE_TYPE_ACCELERATOR ");
+               else if (dev_type & CL_DEVICE_TYPE_CPU)
+                  printf("CL_DEVICE_TYPE_CPU ");
+               else if (dev_type & CL_DEVICE_TYPE_GPU)
+                  printf("CL_DEVICE_TYPE_GPU ");
+               else if (dev_type & CL_DEVICE_TYPE_DEFAULT)
+                  printf("CL_DEVICE_TYPE_DEFAULT ");
+
+               printf("\n");
+
+               {
+                  cl_uint vendor_id;
+                  err = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);
+                  checkErr(err, "clGetDeviceInfo(CL_DEVICE_VENDOR_ID)");
+                  printf("  Device ID:\t\t\t\t\t %d\n", vendor_id); 
+               }
+               {
+                  cl_uint units;
+                  err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
+                  checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS)");
+                  printf("  Max compute units:\t\t\t\t %d\n", units); 
+               }
+
+               {
+                  cl_uint dims;
+                  size_t *sizes;
+                  err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(dims), &dims, NULL);
+                  checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)");
+                  printf("  Max work item dimensions:\t\t\t %d\n", dims); 
+
+                  sizes = (size_t*)malloc(dims * sizeof(size_t));
+                  err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*dims, sizes, NULL);
+                  checkErr(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES)");
+                  printf("  Max work item dimensions:\t\t\t %d\n", dims); 
+
+                  {
+                     unsigned int k;
+                     printf("    Max work items:\t\t\t\t (");
+                     for (k=0; k<dims; k++) {
+                        printf("%u", (unsigned int)sizes[k]);
+                        if (k != dims-1)
+                           printf(",");
+                     }
+                     printf(")\n");
+                  }
+               }
+
+#define GET_SIZET(CL_D,str) { \
+   size_t val; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, (unsigned int)val); \
+}
+
+#define GET_STRING(CL_D,str,size) { \
+   char val[size]; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, val); \
+}
+
+#define GET_UINT(CL_D,str) { \
+   cl_uint val; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, val); \
+}
+
+#define GET_ULONG(CL_D,str) { \
+   cl_ulong val; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, val); \
+}
+
+#define GET_BOOL(CL_D,str) { \
+   cl_bool val; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, (val == CL_TRUE ? "Yes" : "No")); \
+}
+
+#define GET_BOOL_CUSTOM(CL_D,str,t,f) { \
+   cl_bool val; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, (val == CL_TRUE ? t : f)); \
+}
+
+#define GET_BITSET_AND(TYPE,CL_D,test,str) { \
+   TYPE val; \
+   err = clGetDeviceInfo(devices[j], CL_D, sizeof(val), &val, NULL); \
+   checkErr(err, "clGetDeviceInfo(" #CL_D ")"); \
+   printf(str, ((val & test) == CL_TRUE ? "Yes" : "No")); \
+}
+      
+               GET_SIZET(CL_DEVICE_MAX_WORK_GROUP_SIZE, "  Max work group size:\t\t\t\t %u\n")
+               
+               GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "  Preferred vector width char:\t\t\t %u\n")
+               GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "  Preferred vector width short:\t\t\t %u\n")
+               GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "  Preferred vector width int:\t\t\t %u\n")
+               GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, "  Preferred vector width long:\t\t\t %u\n")
+               GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, "  Preferred vector width float:\t\t\t %u\n")
+               GET_UINT(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, "  Preferred vector width double:\t\t %u\n")
+               GET_UINT(CL_DEVICE_MAX_CLOCK_FREQUENCY, "  Max clock frequency:\t\t\t\t %uMHz\n")
+               GET_UINT(CL_DEVICE_ADDRESS_BITS, "  Address bits:\t\t\t\t\t %ubits\n")
+               GET_ULONG(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "  Max memory allocation:\t\t\t %lu bytes\n")
+
+               GET_BOOL(CL_DEVICE_IMAGE_SUPPORT, "  Image support:\t\t\t\t %s\n")
+
+               GET_SIZET(CL_DEVICE_MAX_PARAMETER_SIZE, "  Max size of kernel argument:\t\t\t %u\n")
+               GET_UINT(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "  Alignment of base addres:\t\t\t %u bits\n")
+               GET_UINT(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "  Minimum alignment for any datatype:\t\t %u bytes\n")
+
+               printf("  Single precision floating point capability\n");
+               GET_BITSET_AND(cl_device_fp_config,CL_DEVICE_SINGLE_FP_CONFIG, CL_FP_DENORM, "    Denorms:\t\t\t\t\t %s\n")
+               GET_BITSET_AND(cl_device_fp_config,CL_DEVICE_SINGLE_FP_CONFIG, CL_FP_INF_NAN, "    Quiet NaNs:\t\t\t\t\t %s\n")
+               GET_BITSET_AND(cl_device_fp_config,CL_DEVICE_SINGLE_FP_CONFIG, CL_FP_ROUND_TO_NEAREST, "    Round to nearest even:\t\t\t %s\n")
+               GET_BITSET_AND(cl_device_fp_config,CL_DEVICE_SINGLE_FP_CONFIG, CL_FP_ROUND_TO_ZERO, "    Round to zero:\t\t\t\t %s\n")
+               GET_BITSET_AND(cl_device_fp_config,CL_DEVICE_SINGLE_FP_CONFIG, CL_FP_ROUND_TO_INF, "    Round to +ve and infinity:\t\t\t %s\n")
+               GET_BITSET_AND(cl_device_fp_config,CL_DEVICE_SINGLE_FP_CONFIG, CL_FP_FMA, "    IEEE754-2008 fused multiply-add:\t\t %s\n")
+
+               {
+                  cl_device_mem_cache_type cache;
+                  err = clGetDeviceInfo(devices[j], CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(cache), &cache, NULL);
+                  checkErr(err, "clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE)");
+                  printf("  Cache type:\t\t\t\t\t ");
+                  switch (cache) {
+                     case CL_NONE:
+                        printf("None\n");
+                        break;
+                     case CL_READ_ONLY_CACHE:
+                        printf("Read only\n");
+                        break;
+                     case CL_READ_WRITE_CACHE:
+                        printf("Read/Write\n");
+                        break;
+                  }
+               }
+
+               GET_UINT(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "  Cache line size:\t\t\t\t %u bytes\n")
+               GET_ULONG(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "  Cache size:\t\t\t\t\t %lu bytes\n")
+               GET_ULONG(CL_DEVICE_GLOBAL_MEM_SIZE, "  Global memory size:\t\t\t\t %lu bytes\n")
+               GET_ULONG(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "  Constant buffer size:\t\t\t\t %lu bytes\n")
+               GET_UINT(CL_DEVICE_MAX_CONSTANT_ARGS, "  Max number of constant args:\t\t\t %u\n")
+
+               {
+                  cl_device_local_mem_type cache;
+                  err = clGetDeviceInfo(devices[j], CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cache), &cache, NULL);
+                  checkErr(err, "clGetDeviceInfo(CL_DEVICE_LOCAL_MEM_TYPE)");
+                  printf("  Local memory type:\t\t\t\t ");
+                  switch (cache) {
+                     case CL_LOCAL:
+                        printf("Local\n");
+                        break;
+                     case CL_GLOBAL:
+                        printf("Global\n");
+                        break;
+                  }
+               }
+
+               GET_ULONG(CL_DEVICE_LOCAL_MEM_SIZE, "  Local memory size:\t\t\t\t %lu bytes\n")
+               GET_SIZET(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "  Profiling timer resolution:\t\t\t %u\n")
+               GET_BOOL_CUSTOM(CL_DEVICE_ENDIAN_LITTLE, "  Device endianess:\t\t\t\t %s\n", "Little", "Big")
+               GET_BOOL(CL_DEVICE_AVAILABLE, "  Available:\t\t\t\t\t %s\n")
+               GET_BOOL(CL_DEVICE_COMPILER_AVAILABLE, "  Compiler available:\t\t\t\t %s\n")
+
+               printf("  Execution capabilities:\t\t\t\t \n");
+               GET_BITSET_AND(cl_device_exec_capabilities, CL_DEVICE_EXECUTION_CAPABILITIES, CL_EXEC_KERNEL, "  Execute OpenCL kernels:\t\t\t %s\n")
+               GET_BITSET_AND(cl_device_exec_capabilities, CL_DEVICE_EXECUTION_CAPABILITIES, CL_EXEC_NATIVE_KERNEL, "  Execute native kernels:\t\t\t %s\n")
+
+               printf("  Queue properties:\t\t\t\t\n ");
+               GET_BITSET_AND(cl_command_queue_properties, CL_DEVICE_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "   Out-of-Order:\t\t\t\t %s\n")
+               GET_BITSET_AND(cl_command_queue_properties, CL_DEVICE_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, "    Profiling:\t\t\t\t\t %s\n")
+
+
+               GET_STRING(CL_DEVICE_NAME, "  Name:\t\t\t\t\t\t %s\n", 256);
+               GET_STRING(CL_DEVICE_VENDOR, "  Vendor:\t\t\t\t\t %s\n", 256);
+               GET_STRING(CL_DRIVER_VERSION, "  Driver version:\t\t\t\t %s\n", 10);
+               GET_STRING(CL_DEVICE_PROFILE, "  Profile:\t\t\t\t\t %s\n", 30);
+               GET_STRING(CL_DEVICE_VERSION, "  Version:\t\t\t\t\t %s\n", 50);
+               GET_STRING(CL_DEVICE_EXTENSIONS, "  Extensions:\t\t\t\t\t %s\n", 4096);
+            
+               printf("\n");
+            }
+         }
+      }
+   }
+
+   return 0;
+}

+ 507 - 0
examples/socl/mandelbrot/mandelbrot.c

@@ -0,0 +1,507 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Uncomment this to activate X11 display */
+#define USE_X11
+
+#define SHORT_LOG 1
+#define ROUND_ROBIN
+
+#ifdef USE_X11
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+int use_x11 = 1;
+#else
+int use_x11 = 0;
+#endif
+
+int demo = 0;
+int frames = -1;
+
+
+#include <pthread.h>
+#include <assert.h>
+#include <sys/time.h>
+
+#include <CL/cl.h>
+
+#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)
+#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)
+
+#ifdef UNUSED
+#elif defined(__GNUC__)
+# define UNUSED(x) UNUSED_ ## x __attribute__((unused))
+#else
+# define UNUSED(x) x
+#endif
+
+const char * kernel_src = "\
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\
+#define TYPE double \n\
+#define MIN(a,b) (((a)<(b))? (a) : (b))\n\
+      __kernel void mandelbrot_kernel(__global uint * a,\n\
+          TYPE leftX, TYPE topY,\n\
+          TYPE stepX, TYPE stepY,\n\
+          uint maxIt, uint iby, uint block_size)\n\
+{\n\
+  TYPE xc = leftX + get_global_id(0) * stepX;\n\
+  TYPE yc = iby*block_size*stepY + topY  + get_global_id(1) * stepY;\n\
+  int it;\n\
+  TYPE x,y;\n\
+  x = y = (TYPE)0.0;\n\
+  for (it=0;it<maxIt;it++)\n\
+  {\n\
+    TYPE x2 = x*x;\n\
+    TYPE y2 = y*y;\n\
+    if (x2+y2 > (TYPE)4) break; \n\
+    TYPE twoxy = (TYPE)2*x*y;\n\
+    x = x2 - y2 + xc;\n\
+    y = twoxy + yc;\n\
+  }\n\
+  uint v = MIN((1024*((float)(it)/(2000))), 256);\n\
+  a[get_global_id(0) + get_global_id(1)*get_global_size(0)] = (v<<16|(255-v)<<8); \n\
+}";
+
+static cl_uint nblocks = 8;
+static cl_uint height = 768;
+static cl_uint width = 1024;
+static cl_uint maxIt = 20000;
+
+static cl_uint group_size = 64;
+
+static double leftX = -0.745;
+static double rightX = -0.74375;
+static double topY = .15;
+static double bottomY = .14875;
+
+#ifdef USE_X11
+      /* X11 data */
+      static Display *dpy;
+      static Window win;
+      static XImage *bitmap;
+      static GC gc;
+      static KeySym Left=-1, Right, Down, Up, Alt ;
+      static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void exit_x11(void)
+{
+  XDestroyImage(bitmap);
+  XDestroyWindow(dpy, win);
+  XCloseDisplay(dpy);
+}
+
+static void init_x11(int width, int height, cl_uint *buffer)
+{
+  /* Attempt to open the display */
+  dpy = XOpenDisplay(NULL);
+
+  /* Failure */
+  if (!dpy)
+    exit(0);
+
+  unsigned long white = WhitePixel(dpy,DefaultScreen(dpy));
+  unsigned long black = BlackPixel(dpy,DefaultScreen(dpy));
+
+  win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0,
+      width, height, 0, black, white);
+
+  /* We want to be notified when the window appears */
+  XSelectInput(dpy, win, StructureNotifyMask);
+
+  /* Make it appear */
+  XMapWindow(dpy, win);
+
+  XTextProperty tp;
+  char name[128] = "Mandelbrot";
+  char *n = name;
+  Status st = XStringListToTextProperty(&n, 1, &tp);
+  if (st)
+    XSetWMName(dpy, win, &tp);
+
+  /* Wait for the MapNotify event */
+  XFlush(dpy);
+
+  int depth = DefaultDepth(dpy, DefaultScreen(dpy));
+  Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy));
+
+  /* Make bitmap */
+  bitmap = XCreateImage(dpy, visual, depth,
+      ZPixmap, 0, (char *)buffer,
+      width, height, 32, 0);
+
+  /* Init GC */
+  gc = XCreateGC(dpy, win, 0, NULL);
+  XSetForeground(dpy, gc, black);
+
+  XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask);
+
+  Atom wmDeleteMessage;
+  wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False);
+  XSetWMProtocols(dpy, win, &wmDeleteMessage, 1);
+
+  Left = XStringToKeysym ("Left");
+  Right = XStringToKeysym ("Right");
+  Up = XStringToKeysym ("Up");
+  Down = XStringToKeysym ("Down");
+  Alt = XStringToKeysym ("Alt");
+}
+
+static int handle_events(void)
+{
+  XEvent event;
+  XNextEvent(dpy, &event);
+
+  KeySym key;
+  char text[255];
+
+  double coef = 0.05;
+
+  if (event.type == KeyPress)
+  {
+    XLookupString(&event.xkey,text,255,&key,0);
+    if (key == Left)
+    {
+      double widthX = rightX - leftX;
+      leftX -= coef*widthX;
+      rightX -= coef*widthX;
+    }
+    else if (key == Right)
+    {
+      double widthX = rightX - leftX;
+      leftX += coef*widthX;
+      rightX += coef*widthX;
+    }
+    else if (key == Down)
+    {
+      double heightY = topY - bottomY;
+      topY += coef*heightY;
+      bottomY += coef*heightY;
+    }
+    else if (key == Up)
+    {
+      double heightY = topY - bottomY;
+      topY -= coef*heightY;
+      bottomY -= coef*heightY;
+    }
+    else {
+      double widthX = rightX - leftX;
+      double heightY = topY - bottomY;
+
+      if (text[0] == '-')
+      {
+        /* Zoom out */
+        leftX -= (coef/2)*widthX;
+        rightX += (coef/2)*widthX;
+        topY += (coef/2)*heightY;
+        bottomY -= (coef/2)*heightY;
+      }
+      else if (text[0] == '+')
+      {
+        /* Zoom in */
+        leftX += (coef/2)*widthX;
+        rightX -= (coef/2)*widthX;
+        topY -= (coef/2)*heightY;
+        bottomY += (coef/2)*heightY;
+      }
+    }
+
+    if (text[0]=='q') {
+      return -1;
+    }
+  }
+
+  if (event.type==ButtonPress) {
+    /* tell where the mouse Button was Pressed */
+    printf("You pressed a button at (%i,%i)\n",
+        event.xbutton.x,event.xbutton.y);
+  }
+
+  return 0;
+}
+#endif //USE_X11
+
+static void parse_args(int argc, char **argv)
+{
+	int i;
+	for (i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "-h") == 0) {
+			fprintf(stderr, "Usage: %s [-h] [ -width 1024] [-height 768] [-nblocks 16] [-group_size 64] [-no-x11] [-demo] [-frames N] [-pos leftx:rightx:bottomy:topy]\n", argv[0]);
+			exit(-1);
+		}
+
+		if (strcmp(argv[i], "-width") == 0) {
+			char *argptr;
+			width = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-frames") == 0) {
+			char *argptr;
+			frames = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-height") == 0) {
+			char *argptr;
+			height = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-group_size") == 0) {
+			char *argptr;
+			group_size = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-nblocks") == 0) {
+			char *argptr;
+			nblocks = strtol(argv[++i], &argptr, 10);
+		}
+
+		if (strcmp(argv[i], "-pos") == 0) {
+			int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY);
+			assert(ret == 4);
+		}
+
+		if (strcmp(argv[i], "-demo") == 0) {
+			demo = 1;
+			leftX = -50.22749575062760;
+			rightX = 48.73874621262927;
+			topY = -49.35016705749115;
+			bottomY = 49.64891691946615;
+
+		}
+
+		if (strcmp(argv[i], "-no-x11") == 0) {
+#ifdef USE_X11
+			use_x11 = 0;
+#endif
+		}
+	}
+}
+
+int main(int argc, char **argv) {
+#define MAX_DEVICES 20
+  cl_platform_id platforms[15];
+  cl_uint num_platforms;
+  cl_device_id devices[15];
+  cl_uint num_devices;
+  cl_context context;
+  cl_program program;
+  cl_kernel kernel;
+  cl_command_queue cq[MAX_DEVICES];
+  cl_int err;
+  cl_uint i;
+
+  parse_args(argc, argv);
+
+  cl_uint block_size = height/nblocks;
+  assert((height % nblocks) == 0);
+  assert((width % group_size) == 0);
+
+  err = clGetPlatformIDs(0, NULL, &num_platforms);
+  if (num_platforms == 0) {
+    printf("No OpenCL platform found. If you use SOCL, this could mean StarPU wasn't configured for OpenCL. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n");
+    exit(0);
+  }
+  err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);
+  check(err, "clGetPlatformIDs");
+
+  unsigned int platform_idx;
+  for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {
+    err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
+    check(err, "clGetDeviceIDs");
+    if (num_devices != 0)
+      break;
+  }
+  if (num_devices == 0)
+    error("No OpenCL device found\n");
+
+  cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};
+  context = clCreateContext(properties, num_devices, devices, NULL, NULL, &err);
+  check(err, "clCreateContext");
+
+  program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);
+  check(err, "clCreateProgram");
+
+  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+  check(err, "clBuildProgram");
+
+  kernel = clCreateKernel(program, "mandelbrot_kernel", &err);
+  check(err, "clCreateKernel");
+
+
+  for (i=0; i<num_devices; i++)
+    cq[i] = clCreateCommandQueue(context, devices[i],  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+  check(err, "clCreateCommandQueue");
+
+  cl_uint *buffer;
+  buffer = malloc(height*width*sizeof(cl_uint));
+
+#ifdef USE_X11
+  if (use_x11)
+    init_x11(width, height, buffer);
+#endif // USE_X11
+
+
+
+  cl_mem block_handles[nblocks];
+
+  cl_uint iby;
+
+  for (iby = 0; iby < nblocks; iby++) {
+    cl_uint *data = &buffer[iby*block_size*width];
+    block_handles[iby] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, block_size*width*sizeof(cl_uint), data, &err);
+  }
+
+  int stop = 0;
+  int frame = 0;
+
+  while (!stop) {
+    struct timeval start, end;
+    gettimeofday(&start, NULL);
+
+    if (frames != -1) {
+      frame++;
+      stop = (frame == frames);
+    }
+
+    double stepX = (rightX - leftX)/width;
+    double stepY = (topY - bottomY)/height;
+    cl_event ker_events[nblocks];
+    void * ptrs[nblocks];
+
+    for (iby = 0; iby < nblocks; iby++) {
+      err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &block_handles[iby]);
+      check(err, "clSetKernelArg out");
+      err = clSetKernelArg(kernel, 1, sizeof(cl_double), &leftX);
+      check(err, "clSetKernelArg leftX");
+      err = clSetKernelArg(kernel, 2, sizeof(cl_double), &topY);
+      check(err, "clSetKernelArg topY");
+      err = clSetKernelArg(kernel, 3, sizeof(cl_double), &stepX);
+      check(err, "clSetKernelArg leftX");
+      err = clSetKernelArg(kernel, 4, sizeof(cl_double), &stepY);
+      check(err, "clSetKernelArg topY");
+      err = clSetKernelArg(kernel, 5, sizeof(cl_uint), &maxIt);
+      check(err, "clSetKernelArg maxIt");
+      err = clSetKernelArg(kernel, 6, sizeof(cl_uint), &iby);
+      check(err, "clSetKernelArg iby");
+      err = clSetKernelArg(kernel, 7, sizeof(cl_uint), &block_size);
+      check(err, "clSetKernelArg block_size");
+
+      size_t local[3] = {group_size, 1, 1};
+      size_t global[3] = {width, block_size, 1};
+#ifdef ROUND_ROBIN
+      int dev = iby % num_devices;
+#else
+      int dev = 0;
+#endif
+      err = clEnqueueNDRangeKernel(cq[dev], kernel, 3, NULL, global, local, 0, NULL, &ker_events[iby]);
+      check(err, "clEnqueueNDRangeKernel");
+    }
+
+    for (iby = 0; iby < nblocks; iby++) {
+#ifdef ROUND_ROBIN
+      int dev = iby % num_devices;
+#else
+      int dev = 0;
+#endif
+      ptrs[iby] = clEnqueueMapBuffer(cq[dev], block_handles[iby], CL_FALSE,CL_MAP_READ, 0, block_size*width*sizeof(cl_uint), 1, &ker_events[iby], NULL, NULL);
+    }
+
+#ifdef ROUND_ROBIN
+    for (i = 0; i < num_devices; i++)
+      clFinish(cq[i]);
+#else
+    clFinish(cq[0]);
+#endif
+
+    gettimeofday(&end, NULL);
+    double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+
+#ifdef SHORT_LOG
+    fprintf(stderr, "%f\n", timing/1000.0);
+#else
+    fprintf(stderr, "Time to generate frame : %f ms\n", timing/1000.0);
+    fprintf(stderr, "%14.14f:%14.14f:%14.14f:%14.14f\n", leftX, rightX, bottomY, topY);
+#endif
+
+#ifdef USE_X11
+    if (use_x11) {
+      for (iby = 0; iby < nblocks; iby++) {
+        pthread_mutex_lock(&mutex);
+        XPutImage(dpy, win, gc, bitmap,
+            0, iby*block_size,
+            0, iby*block_size,
+            width, block_size);
+        pthread_mutex_unlock(&mutex);
+      }
+    }
+#endif
+
+    for (iby = 0; iby < nblocks; iby++) {
+#ifdef ROUND_ROBIN
+      int dev = iby % num_devices;
+#else
+      int dev = 0;
+#endif
+      clEnqueueUnmapMemObject(cq[dev], block_handles[iby], ptrs[iby], 0, NULL, NULL);
+      clReleaseEvent(ker_events[iby]);
+    }
+
+
+
+    if (demo) {
+      /* Zoom in */
+      double zoom_factor = 0.05;
+      double widthX = rightX - leftX;
+      double heightY = topY - bottomY;
+      leftX += (zoom_factor/2)*widthX;
+      rightX -= (zoom_factor/2)*widthX;
+      topY -= (zoom_factor/2)*heightY;
+      bottomY += (zoom_factor/2)*heightY;
+    }
+    else {
+#ifdef USE_X11
+      if (use_x11) {
+        handle_events();
+      }
+#else
+      stop = 1;
+#endif
+    }
+  }
+
+#ifdef USE_X11
+  if (use_x11)
+    exit_x11();
+#endif
+
+  for (iby = 0; iby < nblocks; iby++) {
+    clReleaseMemObject(block_handles[iby]);
+  }
+
+  for (i=0; i<num_devices; i++)
+    clReleaseCommandQueue(cq[i]);
+
+  clReleaseKernel(kernel);
+  clReleaseProgram(program);
+  clReleaseContext(context);
+
+  return 0;
+}