Преглед изворни кода

SOCL: add support for manual scheduling and prefetching

Commands submitted in a command queue attached to a device other than the virtual one will be executed by the specified device.
  * clEnqueueWriteBuffer may be used to initialize and prefetch a buffer on a specific device
  * clEnqueueNDRangeKernel may be used to schedule a kernel on a specific device
  * clEnqueueReadBuffer may be used to ensure a buffer is read from a specific device (is that of any use?)
Sylvain Henry пре 13 година
родитељ
комит
f8a139ce57

+ 5 - 5
socl/examples/Makefile.am

@@ -51,21 +51,21 @@ examplebin_PROGRAMS =
 examplebin_PROGRAMS +=		\
 	basic/basic		\
 	clinfo/clinfo \
-  matmul/matmul
+  matmul/matmul \
+  mansched/mansched
 
-#	mandelbrot/mandelbrot
 
 SOCL_EXAMPLES +=		\
 	basic/basic		\
 	clinfo/clinfo\
-  matmul/matmul
+  matmul/matmul \
+  mansched/mansched
 
-#	mandelbrot/mandelbrot
 
 basic_basic_SOURCES = basic/basic.c
 clinfo_clinfo_SOURCES = clinfo/clinfo.c
 matmul_matmul_SOURCES = matmul/matmul.c
-#mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c
+mansched_mansched_SOURCES = mansched/mansched.c
 
 #mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) $(AM_CFLAGS)
 #if HAVE_X11

+ 1 - 1
socl/examples/basic/basic.c

@@ -81,7 +81,7 @@ int main(int UNUSED(argc), char** UNUSED(argv)) {
    printf("Querying devices...\n");
    unsigned int platform_idx;
    for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {
-      err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
+      err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_ACCELERATOR, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
       check(err, "clGetDeviceIDs");
       if (num_devices != 0)
          break;

+ 213 - 0
socl/examples/mansched/mansched.c

@@ -0,0 +1,213 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010,2011 University of Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <CL/cl.h>
+
+#define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)
+#define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)
+
+#ifdef UNUSED
+#elif defined(__GNUC__)
+# define UNUSED(x) UNUSED_ ## x __attribute__((unused))
+#else
+# define UNUSED(x) x
+#endif
+
+#define SIZE 1024
+#define TYPE float
+#define REALSIZE (SIZE * sizeof(TYPE))
+
+const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \
+   size_t x = get_global_id(0);\
+   size_t y = get_global_id(1);\
+   size_t w = get_global_size(0); \
+   int idx = y*w+x; \
+   d[idx] = s1[idx] + s2[idx];\
+}";
+
+
+
+int main(int UNUSED(argc), char** UNUSED(argv)) {
+   cl_platform_id platforms[15];
+   cl_uint num_platforms;
+   cl_device_id devices[15];
+   cl_uint num_devices;
+   cl_context context;
+   cl_program program;
+   cl_kernel kernel;
+   cl_mem s1m, s2m, dm;
+   cl_command_queue cq;
+   int d;
+   cl_int err;
+
+   TYPE s1[SIZE],s2[SIZE],dst[SIZE];
+
+   {
+      int i;
+      for (i=0; i<SIZE; i++) {
+         s1[i] = 2.0;
+         s2[i] = 7.0;
+         dst[i] = 98.0;
+      }
+   }
+
+   printf("Querying platform...\n");
+   err = clGetPlatformIDs(0, NULL, &num_platforms);
+   if (num_platforms == 0) {
+      printf("No OpenCL platform found.\n");
+      exit(77);
+   }
+   err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);
+   check(err, "clGetPlatformIDs");
+
+   printf("Querying devices...\n");
+   unsigned int platform_idx;
+   for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {
+      err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
+      check(err, "clGetDeviceIDs");
+
+      for (d=0; d<num_devices; d++) {
+
+         printf("Creating context...\n");
+         cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};
+         context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
+         check(err, "clCreateContext");
+
+         printf("Creating program...\n");
+         program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);
+         check(err, "clCreateProgram");
+
+         printf("Building program...\n");
+         err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+         check(err, "clBuildProgram");
+
+         printf("Creating kernel...\n");
+         kernel = clCreateKernel(program, "add", &err);
+         check(err, "clCreateKernel");
+
+         printf("Creating buffers...\n");
+         s1m = clCreateBuffer(context, CL_MEM_READ_WRITE, REALSIZE, NULL, &err);
+         check(err, "clCreateBuffer s1");
+         s2m = clCreateBuffer(context, CL_MEM_READ_ONLY, REALSIZE, NULL, &err);
+         check(err, "clCreateBuffer s2");
+         dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY, REALSIZE, NULL, &err);
+         check(err, "clCreateBuffer dst");
+
+         printf("Creating command queue...\n");
+         cl_event eventW1, eventW2, eventK, eventR;
+
+#ifdef PROFILING
+         cq = clCreateCommandQueue(context, devices[d], CL_QUEUE_PROFILING_ENABLE, &err);
+#else
+         cq = clCreateCommandQueue(context, devices[d], 0, &err);
+#endif
+         check(err, "clCreateCommandQueue");
+
+         printf("Enqueueing WriteBuffers...\n");
+         err = clEnqueueWriteBuffer(cq, s1m, CL_FALSE, 0, REALSIZE, s1, 0, NULL, &eventW1);
+         check(err, "clEnqueueWriteBuffer s1");
+         err = clEnqueueWriteBuffer(cq, s2m, CL_FALSE, 0, REALSIZE, s2, 0, NULL, &eventW2);
+         check(err, "clEnqueueWriteBuffer s2");
+
+         printf("Setting kernel arguments...\n");
+         err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m);
+         check(err, "clSetKernelArg 0");
+         err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m);
+         check(err, "clSetKernelArg 1");
+         err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm);
+         check(err, "clSetKernelArg 2");
+
+         printf("Enqueueing NDRangeKernel...\n");
+         size_t local[3] = {16, 1, 1};
+         size_t global[3] = {1024, 1, 1};
+         cl_event deps[] = {eventW1,eventW2};
+         err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, 2, deps, &eventK);
+         check(err, "clEnqueueNDRangeKernel");
+
+         printf("Enqueueing ReadBuffer...\n");
+         err = clEnqueueReadBuffer(cq, dm, CL_FALSE, 0, REALSIZE, dst, 0, NULL, &eventR);
+         check(err, "clEnqueueReadBuffer");
+
+         printf("Finishing queue...\n");
+         clFinish(cq);
+
+         printf("Data...\n");
+         {
+            int i;
+            for (i=0; i<SIZE; i++) {
+               printf("%f ", dst[i]);
+            }
+            printf("\n");
+         }
+
+#ifdef PROFILING
+         #define DURATION(event,label) do { \
+            cl_ulong t0,t1; \
+            err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t0, NULL);\
+            check(err, "clGetEventProfilingInfo");\
+            err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t1, NULL);\
+            check(err, "clGetEventProfilingInfo");\
+            printf("Profiling %s: %lu nanoseconds\n", label, t1-t0);\
+         } while (0);
+
+         DURATION(eventW1, "first buffer writing");
+         DURATION(eventW2, "second buffer writing");
+         DURATION(eventK, "kernel execution");
+         DURATION(eventR, "result buffer reading");
+#endif
+
+
+         printf("Releasing events...\n");
+         err = clReleaseEvent(eventW1);
+         err |= clReleaseEvent(eventW2);
+         err |= clReleaseEvent(eventK);
+         err |= clReleaseEvent(eventR);
+         check(err, "clReleaseCommandQueue");
+
+         printf("Releasing command queue...\n");
+         err = clReleaseCommandQueue(cq);
+         check(err, "clReleaseCommandQueue");
+
+         printf("Releasing buffers...\n");
+         err = clReleaseMemObject(s1m);
+         check(err, "clReleaseMemObject s1");
+         err = clReleaseMemObject(s2m);
+         check(err, "clReleaseMemObject s2");
+         err = clReleaseMemObject(dm);
+         check(err, "clReleaseMemObject d");
+
+         printf("Releasing kernel...\n");
+         err = clReleaseKernel(kernel);
+         check(err, "clReleaseKernel");
+
+         printf("Releasing program...\n");
+         err = clReleaseProgram(program);
+         check(err, "clReleaseProgram");
+
+         printf("Releasing context...\n");
+         err = clReleaseContext(context);
+         check(err, "clReleaseContext");
+      }
+   }
+
+   return 0;
+}

+ 6 - 0
socl/src/cl_enqueuecopybuffer.c

@@ -84,6 +84,12 @@ cl_int command_copy_buffer_submit(command_copy_buffer cmd) {
 	task->handles[1] = dst_buffer->handle;
 	task->cl = &codelet_copybuffer;
 
+	/* Execute the task on a specific worker? */
+	if (cmd->_command.cq->device != &socl_virtual_device) {
+	  task->execute_on_a_specific_worker = 1;
+	  task->workerid = (int)(intptr_t)cmd->_command.cq->device;
+	}
+
 	arg = (struct arg_copybuffer*)malloc(sizeof(struct arg_copybuffer));
 	arg->src_offset = src_offset;
 	arg->dst_offset = dst_offset;

+ 6 - 0
socl/src/cl_enqueuendrangekernel.c

@@ -127,6 +127,12 @@ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) {
 	task->cl_arg = cmd;
 	task->cl_arg_size = sizeof(cmd);
 
+	/* Execute the task on a specific worker? */
+	if (cmd->_command.cq->device != &socl_virtual_device) {
+	  task->execute_on_a_specific_worker = 1;
+	  task->workerid = (int)(intptr_t)cmd->_command.cq->device;
+	}
+
 	struct starpu_codelet * codelet = cmd->codelet;
 
 	/* We need to detect which parameters are OpenCL's memory objects and

+ 6 - 0
socl/src/cl_enqueuereadbuffer.c

@@ -82,6 +82,12 @@ cl_int command_read_buffer_submit(command_read_buffer cmd) {
 	task->handles[0] = buffer->handle;
 	task->cl = &codelet_readbuffer;
 
+	/* Execute the task on a specific worker? */
+	if (cmd->_command.cq->device != &socl_virtual_device) {
+	  task->execute_on_a_specific_worker = 1;
+	  task->workerid = (int)(intptr_t)cmd->_command.cq->device;
+	}
+
 	arg = (struct arg_readbuffer*)malloc(sizeof(struct arg_readbuffer));
 	arg->offset = offset;
 	arg->cb = cb;

+ 6 - 0
socl/src/cl_enqueuewritebuffer.c

@@ -104,6 +104,12 @@ cl_int command_write_buffer_submit(command_write_buffer cmd) {
 	task->cl_arg = arg;
 	task->cl_arg_size = sizeof(struct arg_writebuffer);
 
+	/* Execute the task on a specific worker? */
+	if (cmd->_command.cq->device != &socl_virtual_device) {
+	  task->execute_on_a_specific_worker = 1;
+	  task->workerid = (int)(intptr_t)cmd->_command.cq->device;
+	}
+
 	gc_entity_store(&arg->buffer, buffer);
 
 	//The buffer now contains meaningful data

+ 26 - 13
socl/src/cl_getdeviceids.c

@@ -35,8 +35,8 @@ soclGetDeviceIDs(cl_platform_id   platform,
       socl_init_starpu();
 
    if (_starpu_init_failed) {
-	*num_devices = 0;
-	return CL_SUCCESS;
+      *num_devices = 0;
+      return CL_SUCCESS;
    }
 
    if (platform != NULL && platform != &socl_platform)
@@ -50,19 +50,32 @@ soclGetDeviceIDs(cl_platform_id   platform,
       && (device_type != CL_DEVICE_TYPE_ALL))
       return CL_INVALID_DEVICE_TYPE;
 
-   {
-      int i;
-      unsigned int num = 0;
-      for (i=0; i<socl_device_count; i++) {
-         if (socl_devices[i].type & device_type) {
-            if (devices != NULL && num < num_entries)
-               devices[num] = (cl_device_id)&socl_devices[i];
-            num++;
-         }
+   unsigned int num = 0;
+   if (socl_virtual_device.type & device_type) {
+      if (devices != NULL && num < num_entries) devices[num] = (cl_device_id)&socl_virtual_device;
+      num++;
+   }
+
+   int ndevs = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER);
+
+   int workers[ndevs];
+   starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, ndevs);
+
+   int i;
+   for (i=0; i < ndevs; i++) {
+      int devid = starpu_worker_get_devid(workers[i]);
+      cl_device_id dev;
+      starpu_opencl_get_device(devid, &dev);
+      cl_device_type typ;
+      clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(typ), &typ, NULL);
+      if (typ & device_type) {
+         if (devices != NULL && num < num_entries) devices[num] = (cl_device_id)(intptr_t)workers[i];
+         num++;
       }
-      if (num_devices != NULL)
-         *num_devices = num;
    }
 
+   if (num_devices != NULL)
+      *num_devices = num;
+
    return CL_SUCCESS;
 }

+ 58 - 59
socl/src/cl_getdeviceinfo.c

@@ -18,9 +18,6 @@
 #include "getinfo.h"
 
 
-/**
- * \brief Return dummy infos
- */
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetDeviceInfo(cl_device_id    device,
                 cl_device_info  param_name, 
@@ -28,65 +25,67 @@ soclGetDeviceInfo(cl_device_id    device,
                 void *          param_value,
                 size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 
 {
-   int i, found=0;
-   for (i=0; i<socl_device_count; i++) {
-      if (device == &socl_devices[i]) {
-        found = 1;
-        break;
-      }
-   }
 
-   if (!found)
-      return CL_INVALID_DEVICE;
+   //FIXME: we do not check if the device is valid
+   /* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier)
+      return CL_INVALID_DEVICE;*/
 
-   switch (param_name) {
-      INFO_CASE(CL_DEVICE_TYPE, device->type)
-      INFO_CASE(CL_DEVICE_VENDOR_ID, SOCL_DEVICE_VENDOR_ID)
-      INFO_CASE(CL_DEVICE_MAX_COMPUTE_UNITS, device->max_compute_units)
-      INFO_CASE(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, device->max_work_item_dimensions)
-      INFO_CASE(CL_DEVICE_MAX_WORK_ITEM_SIZES, device->max_work_item_sizes)
-      INFO_CASE(CL_DEVICE_MAX_WORK_GROUP_SIZE, device->max_work_group_size)
-      INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, device->preferred_vector_widths[0])
-      INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, device->preferred_vector_widths[1])
-      INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, device->preferred_vector_widths[2])
-      INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, device->preferred_vector_widths[3])
-      INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, device->preferred_vector_widths[4])
-      INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, device->preferred_vector_widths[5])
-      INFO_CASE(CL_DEVICE_MAX_CLOCK_FREQUENCY, device->max_clock_frequency)
-      INFO_CASE(CL_DEVICE_ADDRESS_BITS, device->address_bits)
-      INFO_CASE(CL_DEVICE_MAX_MEM_ALLOC_SIZE, device->max_mem_alloc_size)
-      INFO_CASE(CL_DEVICE_IMAGE_SUPPORT, device->image_support)
-      INFO_CASE(CL_DEVICE_MAX_PARAMETER_SIZE, device->max_parameter_size)
-      INFO_CASE(CL_DEVICE_MEM_BASE_ADDR_ALIGN, device->mem_base_addr_align)
-      INFO_CASE(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, device->min_data_type_align_size)
-      INFO_CASE(CL_DEVICE_SINGLE_FP_CONFIG, device->single_fp_config)
-      INFO_CASE(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, device->global_mem_cache_type)
-      INFO_CASE(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, device->global_mem_cacheline_size)
-      INFO_CASE(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, device->global_mem_cache_size)
-      INFO_CASE(CL_DEVICE_GLOBAL_MEM_SIZE, device->global_mem_size)
-      INFO_CASE(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, device->max_constant_buffer_size)
-      INFO_CASE(CL_DEVICE_MAX_CONSTANT_ARGS, device->max_constant_args)
-      INFO_CASE(CL_DEVICE_LOCAL_MEM_TYPE, device->local_mem_type)
-      INFO_CASE(CL_DEVICE_LOCAL_MEM_SIZE, device->local_mem_size)
-      INFO_CASE(CL_DEVICE_ERROR_CORRECTION_SUPPORT, device->error_correction_support)
-      INFO_CASE(CL_DEVICE_PROFILING_TIMER_RESOLUTION, device->profiling_timer_resolution)
-      INFO_CASE(CL_DEVICE_ENDIAN_LITTLE, device->endian_little)
-      INFO_CASE(CL_DEVICE_AVAILABLE, device->available)
-      INFO_CASE(CL_DEVICE_COMPILER_AVAILABLE, device->compiler_available)
-      INFO_CASE(CL_DEVICE_EXECUTION_CAPABILITIES, device->execution_capabilities)
-      INFO_CASE(CL_DEVICE_QUEUE_PROPERTIES, device->queue_properties)
-      case CL_DEVICE_PLATFORM: {
-         cl_platform_id p = &socl_platform;
-         INFO_CASE_EX2(p);
+   if (device == &socl_virtual_device) {
+      switch (param_name) {
+         INFO_CASE(CL_DEVICE_TYPE, device->type)
+         INFO_CASE(CL_DEVICE_VENDOR_ID, SOCL_DEVICE_VENDOR_ID)
+         INFO_CASE(CL_DEVICE_MAX_COMPUTE_UNITS, device->max_compute_units)
+         INFO_CASE(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, device->max_work_item_dimensions)
+         INFO_CASE(CL_DEVICE_MAX_WORK_ITEM_SIZES, device->max_work_item_sizes)
+         INFO_CASE(CL_DEVICE_MAX_WORK_GROUP_SIZE, device->max_work_group_size)
+         INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, device->preferred_vector_widths[0])
+         INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, device->preferred_vector_widths[1])
+         INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, device->preferred_vector_widths[2])
+         INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, device->preferred_vector_widths[3])
+         INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, device->preferred_vector_widths[4])
+         INFO_CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, device->preferred_vector_widths[5])
+         INFO_CASE(CL_DEVICE_MAX_CLOCK_FREQUENCY, device->max_clock_frequency)
+         INFO_CASE(CL_DEVICE_ADDRESS_BITS, device->address_bits)
+         INFO_CASE(CL_DEVICE_MAX_MEM_ALLOC_SIZE, device->max_mem_alloc_size)
+         INFO_CASE(CL_DEVICE_IMAGE_SUPPORT, device->image_support)
+         INFO_CASE(CL_DEVICE_MAX_PARAMETER_SIZE, device->max_parameter_size)
+         INFO_CASE(CL_DEVICE_MEM_BASE_ADDR_ALIGN, device->mem_base_addr_align)
+         INFO_CASE(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, device->min_data_type_align_size)
+         INFO_CASE(CL_DEVICE_SINGLE_FP_CONFIG, device->single_fp_config)
+         INFO_CASE(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, device->global_mem_cache_type)
+         INFO_CASE(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, device->global_mem_cacheline_size)
+         INFO_CASE(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, device->global_mem_cache_size)
+         INFO_CASE(CL_DEVICE_GLOBAL_MEM_SIZE, device->global_mem_size)
+         INFO_CASE(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, device->max_constant_buffer_size)
+         INFO_CASE(CL_DEVICE_MAX_CONSTANT_ARGS, device->max_constant_args)
+         INFO_CASE(CL_DEVICE_LOCAL_MEM_TYPE, device->local_mem_type)
+         INFO_CASE(CL_DEVICE_LOCAL_MEM_SIZE, device->local_mem_size)
+         INFO_CASE(CL_DEVICE_ERROR_CORRECTION_SUPPORT, device->error_correction_support)
+         INFO_CASE(CL_DEVICE_PROFILING_TIMER_RESOLUTION, device->profiling_timer_resolution)
+         INFO_CASE(CL_DEVICE_ENDIAN_LITTLE, device->endian_little)
+         INFO_CASE(CL_DEVICE_AVAILABLE, device->available)
+         INFO_CASE(CL_DEVICE_COMPILER_AVAILABLE, device->compiler_available)
+         INFO_CASE(CL_DEVICE_EXECUTION_CAPABILITIES, device->execution_capabilities)
+         INFO_CASE(CL_DEVICE_QUEUE_PROPERTIES, device->queue_properties)
+         case CL_DEVICE_PLATFORM: {
+                                  cl_platform_id p = &socl_platform;
+                                  INFO_CASE_EX2(p);
+                               }
+         INFO_CASE_STRING(CL_DEVICE_NAME, device->name)
+         INFO_CASE_STRING(CL_DEVICE_VENDOR, SOCL_VENDOR)
+         INFO_CASE_STRING(CL_DRIVER_VERSION, SOCL_DRIVER_VERSION)
+         INFO_CASE_STRING(CL_DEVICE_PROFILE, SOCL_PROFILE)
+         INFO_CASE_STRING(CL_DEVICE_VERSION, SOCL_VERSION)
+         INFO_CASE_STRING(CL_DEVICE_EXTENSIONS, device->extensions)
+         default:
+            return CL_INVALID_VALUE;
       }
-      INFO_CASE_STRING(CL_DEVICE_NAME, device->name)
-      INFO_CASE_STRING(CL_DEVICE_VENDOR, SOCL_VENDOR)
-      INFO_CASE_STRING(CL_DRIVER_VERSION, SOCL_DRIVER_VERSION)
-      INFO_CASE_STRING(CL_DEVICE_PROFILE, SOCL_PROFILE)
-      INFO_CASE_STRING(CL_DEVICE_VERSION, SOCL_VERSION)
-      INFO_CASE_STRING(CL_DEVICE_EXTENSIONS, device->extensions)
-      default:
-         return CL_INVALID_VALUE;
+   }
+   else {
+      int devid = starpu_worker_get_devid((int)(intptr_t)device);
+      cl_device_id dev;
+      starpu_opencl_get_device(devid, &dev);
+      return clGetDeviceInfo(dev, param_name, param_value_size, param_value, param_value_size_ret);
    }
 
    return CL_SUCCESS;

+ 32 - 271
socl/src/devices.c

@@ -24,275 +24,36 @@ const char * __attribute__ ((aligned (16))) SOCL_DRIVER_VERSION = "0.1";
 
 const cl_uint __attribute__ ((aligned (16))) SOCL_DEVICE_VENDOR_ID = 666;
 
-const struct _cl_device_id socl_devices[] = {
-   { 
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_CPU,
-      .max_compute_units = 1,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {1,1,1},
-      .max_work_group_size = 1,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 3000,
-      .address_bits = 64,
-      .max_mem_alloc_size = 1024*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_READ_WRITE_CACHE,
-      .global_mem_cacheline_size = 128,
-      .global_mem_cache_size = 16*1024,
-      .global_mem_size = (cl_ulong)4*1024*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_GLOBAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 100,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual CPU 1",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 1",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 2",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 3",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 4",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 5",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 6",
-      .extensions = ""
-   },
-   {
-      .dispatch = &socl_master_dispatch,
-      .type = CL_DEVICE_TYPE_GPU,
-      .max_compute_units = 12,
-      .max_work_item_dimensions = 3,
-      .max_work_item_sizes = {512,512,64},
-      .max_work_group_size = 512,
-      .preferred_vector_widths = {16,8,4,2,4,2},
-      .max_clock_frequency = 1600,
-      .address_bits = 32,
-      .max_mem_alloc_size = 100*1024*1024,
-      .image_support = CL_FALSE,
-      .max_parameter_size = 256,
-      .mem_base_addr_align = 0,
-      .min_data_type_align_size = 0,
-      .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
-      .global_mem_cache_type = CL_NONE,
-      .global_mem_cacheline_size = 0,
-      .global_mem_cache_size = 0,
-      .global_mem_size = (cl_ulong)500*1024*1024,
-      .max_constant_args = 8,
-      .local_mem_type = CL_LOCAL,
-      .local_mem_size = 16*1024,
-      .error_correction_support = CL_FALSE,
-      .profiling_timer_resolution = 10,
-      .endian_little = CL_TRUE,
-      .available = CL_TRUE,
-      .compiler_available = CL_TRUE,
-      .execution_capabilities = CL_EXEC_KERNEL,
-      .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
-      .name = "StarPU virtual GPU 7",
-      .extensions = ""
-   }
-
+const struct _cl_device_id socl_virtual_device = {
+   .dispatch = &socl_master_dispatch,
+   .type = CL_DEVICE_TYPE_ACCELERATOR,
+   .max_compute_units = 1,
+   .max_work_item_dimensions = 3,
+   .max_work_item_sizes = {1,1,1},
+   .max_work_group_size = 1,
+   .preferred_vector_widths = {16,8,4,2,4,2},
+   .max_clock_frequency = 3000,
+   .address_bits = 64,
+   .max_mem_alloc_size = 1024*1024*1024,
+   .image_support = CL_FALSE,
+   .max_parameter_size = 256,
+   .mem_base_addr_align = 0,
+   .min_data_type_align_size = 0,
+   .single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN,
+   .global_mem_cache_type = CL_READ_WRITE_CACHE,
+   .global_mem_cacheline_size = 128,
+   .global_mem_cache_size = 16*1024,
+   .global_mem_size = (cl_ulong)4*1024*1024*1024,
+   .max_constant_args = 8,
+   .local_mem_type = CL_GLOBAL,
+   .local_mem_size = 16*1024,
+   .error_correction_support = CL_FALSE,
+   .profiling_timer_resolution = 100,
+   .endian_little = CL_TRUE,
+   .available = CL_TRUE,
+   .compiler_available = CL_TRUE,
+   .execution_capabilities = CL_EXEC_KERNEL,
+   .queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE,
+   .name = "SOCL Virtual Device",
+   .extensions = ""
 };
-
-const int socl_device_count = sizeof(socl_devices) / sizeof(struct _cl_device_id);
-
-
-

+ 1 - 3
socl/src/devices.h

@@ -60,8 +60,6 @@ struct _cl_device_id {
    char              extensions[100];          //Array size has been arbitrarily defined
 };
 
-const struct _cl_device_id socl_devices[100];
-
-const int socl_device_count;
+const struct _cl_device_id socl_virtual_device;
 
 #endif /* SOCL_DEVICES_H */

+ 2 - 2
socl/src/socl.c

@@ -147,8 +147,8 @@ struct _cl_icd_dispatch socl_master_dispatch = {
 struct _cl_platform_id socl_platform = {&socl_master_dispatch};
 
 const char * __attribute__ ((aligned (16))) SOCL_PROFILE = "FULL_PROFILE";
-const char * __attribute__ ((aligned (16))) SOCL_VERSION = "OpenCL 1.0 StarPU Edition (0.0.1)";
-const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_NAME    = "StarPU Platform";
+const char * __attribute__ ((aligned (16))) SOCL_VERSION = "OpenCL 1.0 SOCL Edition (0.1.0)";
+const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_NAME    = "SOCL Platform";
 const char * __attribute__ ((aligned (16))) SOCL_VENDOR  = "INRIA";
 const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_EXTENSIONS = "cl_khr_icd";
 const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_ICD_SUFFIX_KHR ="SOCL";

+ 2 - 2
src/drivers/opencl/driver_opencl.c

@@ -351,9 +351,9 @@ void _starpu_opencl_init(void)
 						platform_valid = 0;
 					}
 				}
-				if(strcmp(name, "StarPU Platform") == 0) {
+				if(strcmp(name, "SOCL Platform") == 0) {
 					platform_valid = 0;
-					_STARPU_DEBUG("Skipping StarPU's SOCL Platform\n");
+					_STARPU_DEBUG("Skipping SOCL Platform\n");
 				}
 #ifdef STARPU_VERBOSE
 				if (platform_valid)