瀏覽代碼

SOCL: fix new SOCL model issues

Now that SOCl exposes real devices to applications:
  - clGetKernelWorkgroupInfo has to return correct values (instead of the min/max value);
  - clCreateProgramWithSource must only create programs for devices in the specified context;
  - clCreateProgramWithSource is allowed to fail on some devices of the context as valid devices can be specified for clBuildProgram and clEnqueueNDRangeKernel;
  - clCreateKernel must only create kernels for devices for which programs have been built.
Sylvain Henry 12 年之前
父節點
當前提交
6b57bccbd8

+ 31 - 28
socl/src/cl_createkernel.c

@@ -22,11 +22,17 @@ static void soclCreateKernel_task(void *data) {
    int range = starpu_worker_get_range();
    cl_int err;
 
+   if (k->program->cl_programs[range] == NULL) {
+      k->errcodes[range] = CL_SUCCESS;
+      DEBUG_MSG("[Device %d] Kernel creation skipped: program has not been built for this device.\n", starpu_worker_get_id());
+      return;
+   }
+
    DEBUG_MSG("[Device %d] Creating kernel...\n", starpu_worker_get_id());
    k->cl_kernels[range] = clCreateKernel(k->program->cl_programs[range], k->kernel_name, &err);
    if (err != CL_SUCCESS) {
       k->errcodes[range] = err;
-      ERROR_STOP("[Device %d] Unable to create kernel. Aborting.\n", starpu_worker_get_id());
+      ERROR_STOP("[Device %d] Unable to create kernel. Error %d. Aborting.\n", starpu_worker_get_id(), err);
       return;
    }
 
@@ -54,32 +60,20 @@ static void soclCreateKernel_task(void *data) {
    }
 }
 
-static void rk_task(void *data) {
-   cl_kernel k = (cl_kernel)data;
-
-   int range = starpu_worker_get_range();
-
-   if (k->cl_kernels[range] != NULL) {
-      cl_int err = clReleaseKernel(k->cl_kernels[range]);
-      if (err != CL_SUCCESS)
-         DEBUG_CL("clReleaseKernel", err);
-   }
-}
-
 static void release_callback_kernel(void * e) {
   cl_kernel kernel = (cl_kernel)e;
 
   //Free args
-  unsigned int j;
-  for (j=0; j<kernel->num_args; j++) {
-    switch (kernel->arg_type[j]) {
+  unsigned int i;
+  for (i=0; i<kernel->num_args; i++) {
+    switch (kernel->arg_type[i]) {
       case Null:
         break;
       case Buffer:
-        gc_entity_unstore((cl_mem*)&kernel->arg_value[j]);
+        gc_entity_unstore((cl_mem*)&kernel->arg_value[i]);
         break;
       case Immediate:
-        free(kernel->arg_value[j]);
+        free(kernel->arg_value[i]);
         break;
     }
   }
@@ -91,7 +85,13 @@ static void release_callback_kernel(void * e) {
     free(kernel->arg_type);
 
   //Release real kernels...
-  starpu_execute_on_each_worker_ex(rk_task, kernel, STARPU_OPENCL, "SOCL_RELEASE_KERNEL");
+  for (i=0; i<socl_device_count; i++) {
+     if (kernel->cl_kernels[i] != NULL) {
+        cl_int err = clReleaseKernel(kernel->cl_kernels[i]);
+        if (err != CL_SUCCESS)
+           DEBUG_CL("clReleaseKernel", err);
+     }
+  }
 
   //Release perfmodel
   free(kernel->perfmodel);
@@ -109,7 +109,6 @@ soclCreateKernel(cl_program    program,
                cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0
 {
    cl_kernel k;
-   int device_count;
 
    if (program == NULL) {
       if (errcode_ret != NULL)
@@ -144,26 +143,25 @@ soclCreateKernel(cl_program    program,
    k->id = id++;
    #endif
    
-   device_count = starpu_opencl_worker_get_count();
-   k->cl_kernels = (cl_kernel*)malloc(device_count * sizeof(cl_kernel));
-   k->errcodes = (cl_int*)malloc(device_count * sizeof(cl_int));
+   k->cl_kernels = (cl_kernel*)malloc(socl_device_count * sizeof(cl_kernel));
+   k->errcodes = (cl_int*)malloc(socl_device_count * sizeof(cl_int));
 
    {
-      int i;
-      for (i=0; i<device_count; i++) {
+      unsigned int i;
+      for (i=0; i<socl_device_count; i++) {
          k->cl_kernels[i] = NULL;
          k->errcodes[i] = -9999;
       }
    }
 
    /* Create kernel on each device */
-   DEBUG_MSG("[Kernel %d] Create %d kernels (name \"%s\")\n", k->id, starpu_opencl_worker_get_count(), kernel_name);
+   DEBUG_MSG("[Kernel %d] Create %d kernels (name \"%s\")\n", k->id, socl_device_count, kernel_name);
    starpu_execute_on_each_worker_ex(soclCreateKernel_task, k, STARPU_OPENCL, "SOCL_CREATE_KERNEL");
 
    if (errcode_ret != NULL) {
-      int i;
+      unsigned int i;
       *errcode_ret = CL_SUCCESS;
-      for (i=0; i<device_count; i++) {
+      for (i=0; i<socl_device_count; i++) {
          switch (k->errcodes[i]) {
             #define CASE_RET(e) case e: *errcode_ret = e; return k;
             CASE_RET(CL_INVALID_PROGRAM)
@@ -176,6 +174,11 @@ soclCreateKernel(cl_program    program,
             #undef CASE_RET
          }
       }
+
+      if (k->num_args == 666) {
+         *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE;
+         return k;
+      }
    }
 
    return k;

+ 22 - 24
socl/src/cl_createprogramwithsource.c

@@ -39,21 +39,17 @@ static void soclCreateProgramWithSource_task(void *data) {
 
 }
 
-static void rp_task(void *data) {
-   cl_program program = (cl_program)data;
-
-   int range = starpu_worker_get_range();
-
-   cl_int err = clReleaseProgram(program->cl_programs[range]);
-   if (err != CL_SUCCESS)
-      DEBUG_CL("clReleaseProgram", err);
-}
-
 static void release_callback_program(void * e) {
   cl_program program = (cl_program)e;
 
-  /* Destruct object */
-  starpu_execute_on_each_worker_ex(rp_task, program, STARPU_OPENCL, "SOCL_RELEASE_PROGRAM");
+  unsigned int i;
+  for (i=0; i<socl_device_count; i++) {
+     if (program->cl_programs[i] != NULL) {
+        cl_int err = clReleaseProgram(program->cl_programs[i]);
+        if (err != CL_SUCCESS)
+           DEBUG_CL("clReleaseProgram", err);
+     }
+  }
 
   /* Release references */
   gc_entity_unstore(&program->context);
@@ -74,15 +70,11 @@ soclCreateProgramWithSource(cl_context      context,
 {
    cl_program p;
    struct cpws_data *data;
-   int device_count;
+   unsigned int i;
 
    if (errcode_ret != NULL)
       *errcode_ret = CL_SUCCESS;
 
-   device_count = starpu_opencl_worker_get_count();
-   assert(device_count > 0);
-   DEBUG_MSG("Worker count: %d\n", device_count);
-
    /* Check arguments */
    if (count == 0 || strings == NULL) {
       if (errcode_ret != NULL)
@@ -107,7 +99,7 @@ soclCreateProgramWithSource(cl_context      context,
    #endif
 
 
-   p->cl_programs = (cl_program*)malloc(sizeof(cl_program) * device_count);
+   p->cl_programs = (cl_program*)malloc(sizeof(cl_program) * socl_device_count);
    if (p->cl_programs == NULL) {
       if (errcode_ret != NULL)
          *errcode_ret = CL_OUT_OF_HOST_MEMORY;
@@ -115,8 +107,7 @@ soclCreateProgramWithSource(cl_context      context,
    }
 
    {
-      int i;
-      for (i=0; i<device_count; i++)
+      for (i=0; i<socl_device_count; i++)
          p->cl_programs[i] = NULL;
    }
 
@@ -132,15 +123,22 @@ soclCreateProgramWithSource(cl_context      context,
    data->strings = (char**)strings;
    data->lengths = (size_t*)lengths;
 
-   data->errcodes = (cl_int*)malloc(sizeof(cl_int) * device_count);
+   data->errcodes = (cl_int*)malloc(sizeof(cl_int) * socl_device_count);
+   for (i=0; i<socl_device_count; i++) {
+      data->errcodes[i] = CL_SUCCESS;
+   }
+
 
    /* Init real cl_program for each OpenCL device */
-   starpu_execute_on_each_worker_ex(soclCreateProgramWithSource_task, data, STARPU_OPENCL, "SOCL_CREATE_PROGRAM");
+   unsigned workers[context->num_devices];
+   for (i=0; i<context->num_devices; i++) {
+      workers[i] = context->devices[i]->worker_id;
+   }
+   starpu_execute_on_specific_workers(soclCreateProgramWithSource_task, data, context->num_devices, workers, "SOCL_CREATE_PROGRAM");
 
    if (errcode_ret != NULL) {
-      int i;
       *errcode_ret = CL_SUCCESS;
-      for (i=0; i<device_count; i++) {
+      for (i=0; i<socl_device_count; i++) {
          if (data->errcodes[i] != CL_SUCCESS) {
             DEBUG_MSG("Worker [%d] failed\n", i);
             DEBUG_CL("clCreateProgramWithSource", data->errcodes[i]);

+ 1 - 0
socl/src/cl_getdeviceids.c

@@ -56,6 +56,7 @@ soclGetDeviceIDs(cl_platform_id   platform,
    starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, ndevs);
 
    if (socl_devices == NULL) {
+      socl_device_count = ndevs;
       socl_devices = malloc(sizeof(struct _cl_device_id) * ndevs);
       int i;
       for (i=0; i < ndevs; i++) {

+ 6 - 108
socl/src/cl_getkernelworkgroupinfo.c

@@ -15,123 +15,21 @@
  */
 
 #include "socl.h"
-#include "getinfo.h"
-
-struct gkwgi_data {
-   cl_kernel_work_group_info param_name;
-   cl_kernel kernel;
-   union {
-      size_t work_group_size;
-      size_t compile_work_group_size[3];
-      cl_ulong local_mem_size;
-   };
-};
-
-static void gkwgi_task(void *data) {
-   cl_int err;
-   struct gkwgi_data *d = (struct gkwgi_data*)data;
-
-   int wid = starpu_worker_get_id();
-   int range = starpu_worker_get_range();
-
-   cl_device_id device;
-   starpu_opencl_get_device(wid, &device);
-
-   size_t value;
-   size_t oldval;
-   err = clGetKernelWorkGroupInfo(d->kernel->cl_kernels[range], device, d->param_name, sizeof(value), &value, NULL);
-   if (err != CL_SUCCESS) {
-      DEBUG_MSG("Worker [%d] failed\n", wid);
-      DEBUG_CL("clGetKernelWorkGroupInfo", err);
-   }
-
-   switch (d->param_name) {
-      case CL_KERNEL_WORK_GROUP_SIZE: {
-         //Get the smallest work group size
-         do {
-            oldval = d->work_group_size;
-         } while (value < oldval && !(__sync_bool_compare_and_swap(&d->work_group_size, oldval, value)));
-      }
-      break;
-      case CL_KERNEL_LOCAL_MEM_SIZE: {
-         //Get the biggest local mem size
-         do {
-            oldval = d->local_mem_size;
-         } while (value > oldval && !(__sync_bool_compare_and_swap(&d->local_mem_size, oldval, value)));
-      }
-      break;
-   }
-
-}
-
-static void gkwgi_task2(void **UNUSED(desc), void *data) {
-   cl_int err;
-   struct gkwgi_data *d = (struct gkwgi_data*)data;
-
-   int wid = starpu_worker_get_id();
-   int range = starpu_worker_get_range();
-
-   cl_device_id device;
-   starpu_opencl_get_device(wid, &device);
-
-   err = clGetKernelWorkGroupInfo(d->kernel->cl_kernels[range], device, d->param_name, sizeof(d->compile_work_group_size), &d->compile_work_group_size, NULL);
-   if (err != CL_SUCCESS) {
-      DEBUG_MSG("Worker [%d] failed\n", wid);
-      DEBUG_CL("clGetKernelWorkGroupInfo", err);
-   }
-}
-
-static struct starpu_codelet gkwgi_codelet = {
-   .where = STARPU_OPENCL,
-   .opencl_funcs = { gkwgi_task2, NULL },
-   .nbuffers = 0,
-   .model = NULL
-};
 
 CL_API_ENTRY cl_int CL_API_CALL
 soclGetKernelWorkGroupInfo(cl_kernel                kernel,
-                         cl_device_id               UNUSED(device),
+                         cl_device_id               device,
                          cl_kernel_work_group_info  param_name,
                          size_t                     param_value_size,
                          void *                     param_value,
                          size_t *                   param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
 {
-   if (kernel == NULL)
-      return CL_INVALID_KERNEL;
-
-
-   struct gkwgi_data data;
-   data.param_name = param_name;
-   data.kernel = kernel;
-
-   switch (param_name) {
-      case CL_KERNEL_WORK_GROUP_SIZE:
-         /* We take the smallest value to be sure the kernel can be executed on any available device */
-         data.work_group_size = SIZE_MAX;
-         starpu_execute_on_each_worker_ex(gkwgi_task, &data, STARPU_OPENCL, "SOCL_GET_KERNEL_WORKGROUP_INFO");
-         INFO_CASE_EX2(data.work_group_size);
-      case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: {
-         struct starpu_task *task;
-         task = starpu_task_create();
-         task->cl = &gkwgi_codelet;
-         task->cl_arg = &data;
-         task->cl_arg_size = sizeof(data);
-         task->synchronous = 1;
-         int ret = starpu_task_submit(task);
-	if (ret != 0)
-		DEBUG_ERROR("Unable to submit a task. Error %d\n", ret);
-         INFO_CASE_EX2(data.compile_work_group_size);
-         }
-      case CL_KERNEL_LOCAL_MEM_SIZE:
-         /* We take the biggest value to be sure the kernel can be executed on any available device */
-         data.local_mem_size = 0;
-         starpu_execute_on_each_worker_ex(gkwgi_task, &data, STARPU_OPENCL, "SOCL_GET_KERNEL_WORKGROUP_INFO");
-         INFO_CASE_EX2(data.local_mem_size);
-      default:
-         return CL_INVALID_OPERATION;
-   }
+   int range = starpu_worker_get_range_by_id(device->worker_id);
+   cl_device_id dev;
+   starpu_opencl_get_device(device->device_id, &dev);
 
-   return CL_SUCCESS;
+   return clGetKernelWorkGroupInfo(kernel->cl_kernels[range], dev, 
+      param_name, param_value_size, param_value, param_value_size_ret);
 }
 
 

+ 1 - 0
socl/src/socl.c

@@ -161,3 +161,4 @@ const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_ICD_SUFFIX_KHR ="SOCL"
 int __attribute__ ((aligned (16))) profiling_queue_count = 0;
 
 struct _cl_device_id * socl_devices = NULL;
+unsigned int socl_device_count = 0;

+ 1 - 0
socl/src/socl.h

@@ -757,5 +757,6 @@ soclIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
 struct _cl_icd_dispatch socl_master_dispatch;
 struct _cl_platform_id socl_platform;
 struct _cl_device_id * socl_devices;
+extern unsigned int socl_device_count;
 
 #endif /* SOCL_H */

+ 6 - 2
socl/src/util.c

@@ -16,8 +16,7 @@
 
 #include "socl.h"
 
-int starpu_worker_get_range() {
-   int id = starpu_worker_get_id();
+int starpu_worker_get_range_by_id(int id) {
    int i, oid = 0;
    for (i=0; i<id; i++)
       if (starpu_worker_get_type(i) == STARPU_OPENCL_WORKER) oid++;
@@ -25,6 +24,11 @@ int starpu_worker_get_range() {
    return oid;
 }
 
+int starpu_worker_get_range() {
+   int id = starpu_worker_get_id();
+   return starpu_worker_get_range_by_id(id);
+}
+
 void * memdupa(const void *p, size_t size) {
 	void * s = malloc(size);
 	memcpy(s,p,size);

+ 1 - 0
socl/src/util.h

@@ -17,6 +17,7 @@
 #ifndef SOCL_UTIL_H
 #define SOCL_UTIL_H
 
+int starpu_worker_get_range_by_id(int id);
 int starpu_worker_get_range();
 
 /**