Browse Source

SOCL: workaround for Intel OpenCL destructor bug

Intel OpenCL implementation uses a destructor to release OpenCL entities that have not been released explicitly by the application. If this destructor is executed before SOCL's one, several entities have not yet been released. In particular, SOCL destructor calls starpu_shutdown that releases OpenCL contexts. If they have already been released, StarPU crashes.

This patch introduces the environment variable SOCL_SKIP_DESTRUCTOR that disables SOCL destructor when it is set. Using this, SOCL will never call starpu_shutdown. Among other things, it means that calibration data are not saved.

To cleanly shut SOCL and StarPU down, applications can now retrieves an OpenCL extension function (using clGetExtensionFunctionAddress or clGetExtensionFunctionAddressForPlatform) called "clShutdown" which has the following prototype: void clShutdown(void); This function must only be called once at the end of the application.
Sylvain Henry 12 years ago
parent
commit
ce1868ac44
5 changed files with 56 additions and 22 deletions
  1. 16 2
      socl/src/cl_getextensionfunctionaddress.c
  2. 30 19
      socl/src/init.c
  3. 6 0
      socl/src/init.h
  4. 1 1
      socl/src/socl.c
  5. 3 0
      socl/src/socl.h

+ 16 - 2
socl/src/cl_getextensionfunctionaddress.c

@@ -18,14 +18,28 @@
 
 #include <string.h>
 #include "socl.h"
+#include "init.h"
 
 CL_API_ENTRY void * CL_API_CALL
-soclGetExtensionFunctionAddress(const char * UNUSED(func_name)) CL_API_SUFFIX__VERSION_1_0
+soclGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0
 {
-   //TODO
+   if (func_name != NULL && strcmp(func_name, "clShutdown") == 0) {
+      return (void*)soclShutdown;
+   }
+
    return NULL;
 }
 
+CL_API_ENTRY void * CL_API_CALL
+soclGetExtensionFunctionAddressForPlatform(cl_platform_id p, const char * func_name) CL_API_SUFFIX__VERSION_1_2
+{
+
+   if (p != &socl_platform) 
+      return NULL;
+
+   return soclGetExtensionFunctionAddress(func_name);
+}
+
 CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(
              const char *   func_name) CL_API_SUFFIX__VERSION_1_0 {
   if( func_name != NULL &&  strcmp("clIcdGetPlatformIDsKHR", func_name) == 0 )

+ 30 - 19
socl/src/init.c

@@ -17,6 +17,7 @@
  */
 
 #include <pthread.h>
+#include <stdlib.h>
 #include "socl.h"
 #include "gc.h"
 #include "mem_objects.h"
@@ -69,30 +70,40 @@ __attribute__((constructor)) static void socl_init() {
   gc_start();
 }
 
+
+void soclShutdown() {
+   pthread_mutex_lock(&_socl_mutex);
+   if( _starpu_init )
+      starpu_task_wait_for_all();
+
+   gc_stop();
+
+   if( _starpu_init )
+      starpu_task_wait_for_all();
+
+   int active_entities = gc_active_entity_count();
+
+   if (active_entities != 0)
+      DEBUG_MSG("Unreleased entities: %d\n", active_entities);
+
+   if( _starpu_init )
+      starpu_shutdown();
+   pthread_mutex_unlock(&_socl_mutex);
+
+   if (socl_devices != NULL) {
+      free(socl_devices);
+      socl_devices = NULL;
+   }
+}
+
 /**
  * Shutdown SOCL
  */
 __attribute__((destructor)) static void socl_shutdown() {
-  pthread_mutex_lock(&_socl_mutex);
-  if( _starpu_init )
-    starpu_task_wait_for_all();
-
-  gc_stop();
-
-  if( _starpu_init )
-    starpu_task_wait_for_all();
 
-  int active_entities = gc_active_entity_count();
+  char * skip_str = getenv("SOCL_SKIP_DESTRUCTOR");
+  int skip = (skip_str != NULL) || atoi(skip_str);
 
-  if (active_entities != 0)
-    DEBUG_MSG("Unreleased entities: %d\n", active_entities);
+  if (!skip) soclShutdown();
 
-  if( _starpu_init )
-    starpu_shutdown();
-  pthread_mutex_unlock(&_socl_mutex);
-
-  if (socl_devices != NULL) {
-    free(socl_devices);
-    socl_devices = NULL;
-  }
 }

+ 6 - 0
socl/src/init.h

@@ -20,6 +20,9 @@
 #include "gc.h"
 #include "mem_objects.h"
 
+#ifndef SOCL_INIT_H
+#define SOCL_INIT_H
+
 extern int _starpu_init_failed;
 extern volatile int _starpu_init;
 /**
@@ -27,3 +30,6 @@ extern volatile int _starpu_init;
  */
 
 void socl_init_starpu(void);
+void soclShutdown(void);
+
+#endif /* SOCL_INIT_H */

+ 1 - 1
socl/src/socl.c

@@ -126,7 +126,7 @@ struct _cl_icd_dispatch socl_master_dispatch = {
   (void *) NULL, //  clEnqueueMigrateMemObjects,
   (void *) NULL, //  clEnqueueMarkerWithWaitList,
   (void *) NULL, //  clEnqueueBarrierWithWaitList,
-  (void *) NULL, //  clGetExtensionFunctionAddressForPlatform,
+  (void *) soclGetExtensionFunctionAddressForPlatform, //  clGetExtensionFunctionAddressForPlatform,
   (void *) NULL, //  clCreateFromGLTexture,
   (void *) NULL,
   (void *) NULL,

+ 3 - 0
socl/src/socl.h

@@ -741,6 +741,9 @@ soclEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_
 extern CL_API_ENTRY void * CL_API_CALL
 soclGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
 
+extern void * CL_API_CALL
+soclGetExtensionFunctionAddressForPlatform(cl_platform_id p, const char * func_name) CL_API_SUFFIX__VERSION_1_2;
+
 extern CL_API_ENTRY cl_int CL_API_CALL
 soclIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
                  cl_platform_id * /* platforms */,