浏览代码

merge with trunk

Andra Hugo 13 年之前
父节点
当前提交
8e06afcf33

+ 54 - 45
ChangeLog

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
+# Copyright (C) 2009-2012  Université de Bordeaux 1
 # Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,50 @@ StarPU 1.0 (svn revision xxxx)
 ==============================================
 The extensions-again release
 
+New features:
+  * Add SOCL, an OpenCL interface on top of StarPU.
+  * Add a gcc plugin to extend the C interface with pragmas which allows to
+	easily define codelets and issue tasks.
+  * Add reduction mode to starpu_mpi_insert_task.
+  * A new multi-format interface permits to use different binary formats
+	on CPUs & GPUs, the conversion functions being provided by the
+	application and called by StarPU as needed (and as less as
+	possible).
+  * Deprecate cost_model, and introduce cost_function, which is provided
+	with the whole task structure, the target arch and implementation
+	number.
+  * Permit the application to provide its own size base for performance
+	models.
+  * Applications can provide several implementations of a codelet for the
+	same architecture.
+  * Add a StarPU-Top feedback and steering interface.
+  * Permit to specify MPI tags for more efficient starpu_mpi_insert_task
+
+Changes:
+  * Make environment variables take precedence over the configuration
+        passed to starpu_init()
+  * Libtool interface versioning has been included in libraries names
+        (libstarpu-1.0.so, libstarpumpi-1.0.so,
+        libstarpufft-1.0.so, libsocl-1.0.so)
+  * Install headers under $includedir/starpu/1.0.
+  * Make where field for struct starpu_codelet optional. When unset, its
+	value will be automatically set based on the availability of the
+	different XXX_funcs fields of the codelet.
+  * Define access modes for data handles into starpu_codelet and no longer
+	in starpu_task. Hence mark (struct starpu_task).buffers as
+	deprecated, and add (struct starpu_task).handles and (struct
+	starpu_codelet).modes
+  * Fields xxx_func of struct starpu_codelet are made deprecated. One
+	should use fields xxx_funcs instead.
+  * Some types were renamed for consistency. when using pkg-config libstarpu,
+        starpu_deprecated_api.h is automatically included (after starpu.h) to
+        keep compatibility with existing software. Other changes are mentioned
+        below, compatibility is also preserved for them.
+        To port code to use new names (this is not mandatory), the
+        tools/dev/rename.sh script can be used, and pkg-config starpu-1.0 should
+        be used.
+
+Small features:
   * Allow users to disable asynchronous data transfers between CPUs and
 	GPUs.
   * Update OpenCL driver to enable CPU devices (the environment variable
@@ -27,65 +71,30 @@ The extensions-again release
         interface --- define a new function pointer allocate_new_data
         which creates a new data interface of the given type based on
         an existing handle
-  * Make environment variables take precedence over the configuration
-        passed to starpu_init()
-  * Add man pages for some of the tools
-  * Add reduction mode to starpu_mpi_insert_task
-  * Add C++ application example in examples/cpp/
-  * Increase default value for STARPU_MAXCPUS -- Maximum number of
-        CPUs supported -- to 64.
-  * Libtool interface versioning has been included in libraries names
-        (libstarpu-1.0.so, libstarpumpi-1.0.so,
-        libstarpufft-1.0.so, libsocl-1.0.so)
-  * Enable by default the SOCL extension.
-  * Enable by default the GCC plug-in extension.
   * Add a field named magic to struct starpu_task which is set when
         initialising the task. starpu_task_submit will fail if the
         field does not have the right value. This will hence avoid
         submitting tasks which have not been properly initialised.
-  * Make where field for struct starpu_codelet optional. When unset, its
-	value will be automatically set based on the availability of the
-	different XXX_funcs fields of the codelet.
   * Add a hook function pre_exec_hook in struct starpu_sched_policy.
         The function is meant to be called in drivers. Schedulers
         can use it to be notified when a task is about being computed.
-  * Define access modes for data handles into starpu_codelet and no longer
-	in starpu_task. Hence mark (struct starpu_task).buffers as
-	deprecated, and add (struct starpu_task).handles and (struct
-	starpu_codelet).modes
-  * Install headers under $includedir/starpu/1.0.
-  * Deprecate cost_model, and introduce cost_function, which is provided
-	with the whole task structure, the target arch and implementation
-	number
-  * Permit the application to provide its own size base for performance
-	models
-  * Fields xxx_func of struct starpu_codelet are made deprecated. One
-	should use instead fields xxx_funcs.
-  * Applications can provide several implementations of a codelet for the
-	same architecture.
-  * A new multi-format interface permits to use different binary formats
-	on CPUs & GPUs, the conversion functions being provided by the
-	application and called by StarPU as needed (and as less as
-	possible).
-  * Add a gcc plugin to extend the C interface with pragmas which allows to
-	easily define codelets and issue tasks.
   * Add codelet execution time statistics plot.
   * Add bus speed in starpu_machine_display.
-  * Add a StarPU-Top feedback and steering interface.
-  * Documentation improvement.
   * Add a STARPU_DATA_ACQUIRE_CB which permits to inline the code to be
 	done.
-  * Permit to specify MPI tags for more efficient starpu_mpi_insert_task
-  * Add SOCL, an OpenCL interface on top of StarPU.
   * Add gdb functions.
   * Add complex support to LU example.
-  * Add an OpenMP fork-join example.
   * Permit to use the same data several times in write mode in the
 	parameters of the same task.
-  * Some types were renamed for consistency. The tools/dev/rename.sh
-	script can be used to port code using former names. You can also
-	choose to include starpu_deprecated_api.h (after starpu.h) to keep
-	using the old types.
+
+Small changes:
+  * Increase default value for STARPU_MAXCPUS -- Maximum number of
+        CPUs supported -- to 64.
+  * Add man pages for some of the tools
+  * Add C++ application example in examples/cpp/
+  * Add an OpenMP fork-join example.
+  * Documentation improvement.
+
 
 StarPU 0.9 (svn revision 3721)
 ==============================================

+ 3 - 0
examples/spmd/vector_scal_spmd.c

@@ -110,6 +110,7 @@ int main(int argc, char **argv)
 	conf.sched_policy_name = "pheft";
 
 	ret = starpu_init(&conf);
+	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 	starpu_data_handle_t vector_handle;
@@ -142,5 +143,7 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]);
 	FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]);
 
+	free(vector);
+
 	return ret;
 }

+ 6 - 0
include/starpu_util.h

@@ -96,6 +96,12 @@ extern "C"
 #define STARPU_DEPRECATED
 #endif /* __GNUC__ */
 
+#if STARPU_GNUC_PREREQ(3,3)
+#define STARPU_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+#else
+#define STARPU_WARN_UNUSED_RESULT
+#endif /* __GNUC__ */
+
 #if defined(__i386__) || defined(__x86_64__)
 
 static __inline unsigned starpu_cmpxchg(unsigned *ptr, unsigned old, unsigned next)

+ 1 - 1
socl/examples/basic/basic.c

@@ -72,7 +72,7 @@ int main(int UNUSED(argc), char** UNUSED(argv)) {
    printf("Querying platform...\n");
    err = clGetPlatformIDs(0, NULL, &num_platforms);
    if (num_platforms == 0) {
-      printf("No OpenCL platform found. If you use SOCL, this could mean StarPU wasn't configured for OpenCL. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n");
+      printf("No OpenCL platform found.\n");
       exit(77);
    }
    err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);

+ 1 - 1
socl/examples/clinfo/clinfo.c

@@ -37,7 +37,7 @@ main(void) {
    // Plaform info
    err = clGetPlatformIDs(0, NULL, &num_platforms);
    if (num_platforms == 0) {
-      printf("No OpenCL platform found. If you use SOCL, this could mean StarPU wasn't configured for OpenCL. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n");
+      printf("No OpenCL platform found.\n");
       exit(77);
    }
    checkErr(err, "Unable to get platform count");

+ 1 - 1
socl/examples/mandelbrot/mandelbrot.c

@@ -316,7 +316,7 @@ int main(int argc, char **argv) {
 
   err = clGetPlatformIDs(0, NULL, &num_platforms);
   if (num_platforms == 0) {
-    printf("No OpenCL platform found. If you use SOCL, this could mean StarPU wasn't configured for OpenCL. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n");
+    printf("No OpenCL platform found\n");
     exit(0);
   }
   err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);

+ 17 - 15
socl/src/cl_getplatformids.c

@@ -16,6 +16,8 @@
 
 #include "socl.h"
 
+extern int _starpu_init_failed;
+
 /**
  * \brief Get StarPU platform ID
  */
@@ -24,24 +26,24 @@ soclGetPlatformIDs(cl_uint          num_entries,
                  cl_platform_id * platforms,
                  cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0
 {
-   if ((num_entries == 0 && platforms != NULL)
-       || (num_platforms == NULL && platforms == NULL))
-	   return CL_INVALID_VALUE;
-
-   if (starpu_opencl_worker_get_count() == 0) {
-      DEBUG_MSG("StarPU didn't find any OpenCL device. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n")
+     if (_starpu_init_failed)
+     {
+	  if (num_platforms != NULL)
+	       *num_platforms = 0;
+	  return CL_SUCCESS;
+     }
 
-      if (num_platforms != NULL)
-         *num_platforms = 0;
-   }
-   else {
+     if ((num_entries == 0 && platforms != NULL)
+	 || (num_platforms == NULL && platforms == NULL))
+	  return CL_INVALID_VALUE;
 
-      if (platforms != NULL)
-         platforms[0] = &socl_platform;
+     else {
+	  if (platforms != NULL)
+	       platforms[0] = &socl_platform;
 
-      if (num_platforms != NULL)
-         *num_platforms = 1;
-   }
+	  if (num_platforms != NULL)
+	       *num_platforms = 1;
+     }
 
    return CL_SUCCESS;
 }

+ 19 - 1
socl/src/init.c

@@ -18,6 +18,8 @@
 #include "gc.h"
 #include "mem_objects.h"
 
+int _starpu_init_failed;
+
 /**
  * Initialize SOCL
  */
@@ -32,7 +34,23 @@ __attribute__((constructor)) static void socl_init() {
 
   mem_object_init();
 
-  starpu_init(&conf);
+  _starpu_init_failed = starpu_init(&conf);
+  if (_starpu_init_failed != 0)
+  {
+       DEBUG_MSG("Error when calling starpu_init: %d\n", _starpu_init_failed);
+  }
+  else {
+       if (starpu_cpu_worker_get_count() == 0)
+       {
+	    DEBUG_MSG("StarPU did not find any CPU device. SOCL needs at least 1 CPU.\n");
+	    _starpu_init_failed = -ENODEV;
+       }
+       if (starpu_opencl_worker_get_count() == 0)
+       {
+	    DEBUG_MSG("StarPU didn't find any OpenCL device. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n");
+	    _starpu_init_failed = -ENODEV;
+       }
+  }
 
   /* Disable dataflow implicit dependencies */
   starpu_data_set_default_sequential_consistency_flag(0);

+ 1 - 2
src/core/dependencies/cg.c

@@ -71,7 +71,7 @@ int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct
 	ret = successors->terminated;
 
 	/* where should that cg should be put in the array ? */
-	unsigned index = STARPU_ATOMIC_ADD(&successors->nsuccs, 1) - 1;
+	unsigned index = successors->nsuccs++;
 
 #ifdef STARPU_DYNAMIC_DEPS_SIZE
 	if (index >= successors->succ_list_size)
@@ -82,7 +82,6 @@ int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct
 		else
 			successors->succ_list_size = 4;
 
-		/* NB: this is thread safe as the tag->lock is taken */
 		successors->succ = (struct _starpu_cg **) realloc(successors->succ,
 			successors->succ_list_size*sizeof(struct _starpu_cg *));
 	}

+ 1 - 1
src/core/workers.c

@@ -458,7 +458,7 @@ int starpu_init(struct starpu_conf *user_conf)
 
 	if (user_conf)
 	{
-	     int asynchronous_copy_disabled = starpu_get_env_number("DISABLE_STARPU_ASYNCHRONOUS_COPY");
+	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
 	     if (asynchronous_copy_disabled == 1)
 		  config.disable_asynchronous_copy = 1;
 	     else

+ 21 - 2
src/drivers/opencl/driver_opencl.c

@@ -87,10 +87,13 @@ static void unlimit_gpu_mem_if_needed(int devid)
 
 size_t starpu_opencl_get_global_mem_size(int devid)
 {
+	cl_int err;
 	cl_ulong totalGlobalMem;
 
 	/* Request the size of the current device's memory */
-	clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
+	err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(totalGlobalMem), &totalGlobalMem, NULL);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
 
 	return (size_t)totalGlobalMem;
 }
@@ -143,7 +146,9 @@ cl_int _starpu_opencl_init_context(int devid)
 
         // Create transfer queue for the given device
         cl_command_queue_properties props;
-        clGetDeviceInfo(devices[devid], CL_DEVICE_QUEUE_PROPERTIES, sizeof(props), &props, NULL);
+        err = clGetDeviceInfo(devices[devid], CL_DEVICE_QUEUE_PROPERTIES, sizeof(props), &props, NULL);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
         props &= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
         transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err);
         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
@@ -191,6 +196,20 @@ cl_int starpu_opencl_allocate_memory(cl_mem *mem, size_t size, cl_mem_flags flag
 	if (err == CL_OUT_OF_HOST_MEMORY) return err;
         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 
+	/*
+	 * OpenCL uses lazy memory allocation: we will only know if the
+	 * allocation failed when trying to copy data onto the device. But we
+	 * want to know this __now__, so we just perform a dummy copy.
+	 */
+	char dummy = 0;
+	err = clEnqueueWriteBuffer(queues[worker->devid], memory, CL_TRUE,
+				0, sizeof(dummy), &dummy,
+				0, NULL, NULL);
+	if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE)
+		return err;
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
         *mem = memory;
         return CL_SUCCESS;
 }

+ 1 - 1
tests/datawizard/reclaim.c

@@ -54,9 +54,9 @@ static void dummy_func(void *descr[], __attribute__ ((unused)) void *_args)
 
 static struct starpu_codelet dummy_cl =
 {
-        .where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dummy_func, NULL},
 	.cuda_funcs = {dummy_func, NULL},
+	.opencl_funcs = {dummy_func, NULL},
 	.nbuffers = 3,
 	.modes = {STARPU_RW, STARPU_R, STARPU_R}
 };