Andra Hugo 13 yıl önce
ebeveyn
işleme
5b3f428b00
46 değiştirilmiş dosya ile 606 ekleme ve 228 silme
  1. 1 0
      ChangeLog
  2. 25 0
      README.dev
  3. 1 1
      configure.ac
  4. 50 0
      doc/chapters/basic-api.texi
  5. 6 1
      doc/chapters/c-extensions.texi
  6. 51 9
      doc/chapters/mpi-support.texi
  7. 44 0
      doc/devel/handle_refcnt
  8. 62 0
      doc/devel/replicate_refcnt
  9. 4 1
      examples/Makefile.am
  10. 6 6
      examples/axpy/axpy.c
  11. 5 1
      gcc-plugin/src/Makefile.am
  12. 16 2
      gcc-plugin/src/starpu.c
  13. 44 0
      gcc-plugin/tests/opencl-types.c
  14. 1 1
      include/starpu_util.h
  15. 95 4
      m4/gcc.m4
  16. 19 1
      src/common/starpu_spinlock.c
  17. 2 1
      src/common/starpu_spinlock.h
  18. 3 1
      src/core/dependencies/cg.c
  19. 3 1
      src/core/dependencies/cg.h
  20. 1 0
      src/core/dependencies/data_concurrency.c
  21. 7 1
      src/core/dependencies/tags.c
  22. 5 3
      src/core/dependencies/task_deps.c
  23. 6 2
      src/core/jobs.c
  24. 3 1
      src/core/jobs.h
  25. 9 6
      src/core/perfmodel/perfmodel.c
  26. 6 9
      src/core/sched_policy.c
  27. 4 1
      src/core/task.c
  28. 15 41
      src/core/topology.c
  29. 45 11
      src/core/workers.c
  30. 6 6
      src/core/workers.h
  31. 5 1
      src/datawizard/coherency.c
  32. 4 0
      src/datawizard/data_request.c
  33. 4 4
      src/datawizard/interfaces/bcsr_interface.c
  34. 2 2
      src/datawizard/interfaces/block_interface.c
  35. 4 4
      src/datawizard/interfaces/csr_interface.c
  36. 2 2
      src/datawizard/interfaces/matrix_interface.c
  37. 1 1
      src/datawizard/interfaces/multiformat_interface.c
  38. 2 2
      src/datawizard/interfaces/variable_interface.c
  39. 2 2
      src/datawizard/interfaces/vector_interface.c
  40. 10 6
      src/datawizard/memalloc.c
  41. 5 0
      src/datawizard/reduction.c
  42. 7 1
      src/datawizard/user_interactions.c
  43. 1 1
      src/sched_policies/detect_combined_workers.c
  44. 0 86
      starpu-top/StarPU-Top-qwt-embed.pri
  45. 6 4
      tests/datawizard/mpi_like_async.c
  46. 6 1
      tests/main/starpu_init.c

+ 1 - 0
ChangeLog

@@ -38,6 +38,7 @@ New features:
   * Permit to specify MPI tags for more efficient starpu_mpi_insert_task
 
 Changes:
+  * Fix several memory leaks and race conditions
   * Make environment variables take precedence over the configuration
         passed to starpu_init()
   * Libtool interface versioning has been included in libraries names

+ 25 - 0
README.dev

@@ -20,6 +20,7 @@ Contents
 - Developer Warnings
 - Naming Conventions
 - Coding Style
+- Error handling
 
 Developer Warnings
 ------------------
@@ -51,3 +52,27 @@ Coding Style
 ------------
 
 * Curly braces always go on a new line
+
+
+
+Error handling
+--------------
+* Use STARPU_ABORT() for catastrophic errors, from which StarPU will never
+  recover.
+
+	switch (node_kind)
+	{
+		case STARPU_CPU_RAM:
+			do_stg();
+			break;
+		...
+		default:
+			/* We cannot be here */
+			STARPU_ABORT();
+	}
+
+* Use STARPU_ASSERT() to run checks that are very likely to succeed, but still
+  are useful for debugging purposes. It should be OK to disable them with
+  --enable-fast.
+
+	STARPU_ASSERT(j->terminated != 0)

+ 1 - 1
configure.ac

@@ -1676,6 +1676,6 @@ AC_MSG_NOTICE([
 	       MPI test suite: $running_mpi_check
 	       FFT Support: $fft_support
 	       GCC plug-in: $build_gcc_plugin
-	       GCC plug-in test suite: $run_gcc_plugin_test_suite
+	       GCC plug-in test suite (requires GNU Guile): $run_gcc_plugin_test_suite
 	       SOCL enabled:  $build_socl
 ])

+ 50 - 0
doc/chapters/basic-api.texi

@@ -143,6 +143,11 @@ application: statistics and other post-mortem debugging information are not
 guaranteed to be available until this method has been called.
 @end deftypefun
 
+@deftypefun int starpu_asynchronous_copy_disabled ()
+Return 1 if asynchronous data transfers between CPU and accelerators
+are disabled.
+@end deftypefun
+
 @node Workers' Properties
 @section Workers' Properties
 
@@ -241,6 +246,24 @@ This function returns the identifier of the memory node associated to the
 worker identified by @var{workerid}.
 @end deftypefun
 
+@deftp {Data Type} {enum starpu_node_kind}
+todo
+@table @asis
+@item @code{STARPU_UNUSED}
+@item @code{STARPU_CPU_RAM}
+@item @code{STARPU_CUDA_RAM}
+@item @code{STARPU_OPENCL_RAM}
+@item @code{STARPU_SPU_LS}
+@end table
+@end deftp
+
+@deftypefun {enum starpu_node_kind} starpu_node_get_kind (uint32_t @var{node})
+Returns the type of the given node as defined by @code{enum
+starpu_node_kind}. For example, when defining a new data interface,
+this function should be used in the allocation function to determine
+on which device the memory needs to be allocated.
+@end deftypefun
+
 @node Data Library
 @section Data Library
 
@@ -1312,6 +1335,16 @@ State of a task, can be either of
 @end table
 @end deftp
 
+@deftp {Data Type} {struct starpu_buffer_descr}
+This type is used to describe a data handle along with an
+access mode.
+@table @asis
+@item @code{starpu_data_handle_t handle} describes a data,
+@item @code{enum starpu_access_mode mode} describes its access mode
+@end table
+@end deftp
+
+
 @deftp {Data Type} {struct starpu_task}
 The @code{starpu_task} structure describes a task that can be offloaded on the various
 processing units managed by StarPU. It instantiates a codelet. It can either be
@@ -1760,6 +1793,23 @@ Lock to protect concurrency between loading from disk (W), updating the values
 @end table
 @end deftp
 
+@deftp {Data Type} {struct starpu_regression_model}
+@table @asis
+@item @code{double sumlny} sum of ln(measured)
+@item @code{double sumlnx} sum of ln(size)
+@item @code{double sumlnx2} sum of ln(size)^2
+@item @code{unsigned long minx} minimum size
+@item @code{unsigned long maxx} maximum size
+@item @code{double sumlnxlny} sum of ln(size)*ln(measured)
+@item @code{double alpha} 	 estimated = alpha * size ^ beta
+@item @code{double beta}
+@item @code{unsigned valid} whether the linear regression model is valid (i.e. enough measures)
+@item @code{double a, b, c} estimaed = a size ^b + c
+@item @code{unsigned nl_valid} whether the non-linear regression model is valid (i.e. enough measures)
+@item @code{unsigned nsample} number of sample values for non-linear regression
+@end table
+@end deftp
+
 @deftp {Data Type} {struct starpu_per_arch_perfmodel}
 contains information about the performance model of a given arch.
 

+ 6 - 1
doc/chapters/c-extensions.texi

@@ -10,7 +10,12 @@
 When GCC plug-in support is available, StarPU builds a plug-in for the
 GNU Compiler Collection (GCC), which defines extensions to languages of
 the C family (C, C++, Objective-C) that make it easier to write StarPU
-code@footnote{This feature is only available for GCC 4.5 and later.  It
+code@footnote{This feature is only available for GCC 4.5 and later.  You
+may need to install a specific @code{-dev} package of your distro, such
+as @code{gcc-4.6-plugin-dev} on Debian and derivatives.  In addition,
+the plug-in's test suite is only run when
+@url{http://www.gnu.org/software/guile/, GNU@tie{}Guile} is found at
+@code{configure}-time.  Building the GCC plug-in
 can be disabled by configuring with @code{--disable-gcc-extensions}.}.
 
 Those extensions include syntactic sugar for defining

+ 51 - 9
doc/chapters/mpi-support.texi

@@ -20,10 +20,10 @@ distributed application, by automatically issuing all required data transfers
 according to the task graph and an application-provided distribution.
 
 @menu
-* The API::                     
-* Simple Example::              
-* MPI Insert Task Utility::         
-* MPI Collective Operations::         
+* The API::
+* Simple Example::
+* MPI Insert Task Utility::
+* MPI Collective Operations::
 @end menu
 
 @node The API
@@ -66,49 +66,91 @@ by calling @code{starpu_mpi_initialize_extended}.
 
 @subsection Communication
 
-TODO
+The standard point to point communications of MPI have been
+implemented. The semantic is similar to the MPI one, but adapted to
+the DSM provided by StarPU. A MPI request will only be submitted when
+the data is available in the main memory of the node submitting the
+request.
 
 @deftypefun int starpu_mpi_send (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm})
+Performs a standard-mode, blocking send of @var{data_handle} to the
+node @var{dest} using the message tag @code{mpi_tag} within the
+communicator @var{comm}.
 @end deftypefun
 
 @deftypefun int starpu_mpi_recv (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, MPI_Status *@var{status})
+Performs a standard-mode, blocking receive in @var{data_handle} from the
+node @var{source} using the message tag @code{mpi_tag} within the
+communicator @var{comm}.
 @end deftypefun
 
 @deftypefun int starpu_mpi_isend (starpu_data_handle_t @var{data_handle}, starpu_mpi_req *@var{req}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm})
-
+Posts a standard-mode, non blocking send of @var{data_handle} to the
+node @var{dest} using the message tag @code{mpi_tag} within the
+communicator @var{comm}. After the call, the pointer to the request
+@var{req} can be used to test the completion of the communication.
 @end deftypefun
 
 @deftypefun int starpu_mpi_irecv (starpu_data_handle_t @var{data_handle}, starpu_mpi_req *@var{req}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm})
+Posts a nonblocking receive in @var{data_handle} from the
+node @var{source} using the message tag @code{mpi_tag} within the
+communicator @var{comm}. After the call, the pointer to the request
+@var{req} can be used to test the completion of the communication.
 @end deftypefun
 
 @deftypefun int starpu_mpi_isend_detached (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm}, void (*@var{callback})(void *), void *@var{arg})
+Posts a standard-mode, non blocking send of @var{data_handle} to the
+node @var{dest} using the message tag @code{mpi_tag} within the
+communicator @var{comm}. On completion, the @var{callback} function is
+called with the argument @var{arg}.
 @end deftypefun
 
 @deftypefun int starpu_mpi_irecv_detached (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, void (*@var{callback})(void *), void *@var{arg})
+Posts a nonblocking receive in @var{data_handle} from the
+node @var{source} using the message tag @code{mpi_tag} within the
+communicator @var{comm}. On completion, the @var{callback} function is
+called with the argument @var{arg}.
 @end deftypefun
 
 @deftypefun int starpu_mpi_wait (starpu_mpi_req *@var{req}, MPI_Status *@var{status})
+Returns when the operation identified by request @var{req} is complete.
 @end deftypefun
 
 @deftypefun int starpu_mpi_test (starpu_mpi_req *@var{req}, int *@var{flag}, MPI_Status *@var{status})
+If the operation identified by @var{req} is complete, set @var{flag}
+to 1. The @var{status} object is set to contain information on the
+completed operation.
 @end deftypefun
 
 @deftypefun int starpu_mpi_barrier (MPI_Comm @var{comm})
+Blocks the caller until all group members of the communicator
+@var{comm} have called it.
 @end deftypefun
 
 @deftypefun int starpu_mpi_isend_detached_unlock_tag (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm}, starpu_tag_t @var{tag})
-When the transfer is completed, the tag is unlocked
+Posts a standard-mode, non blocking send of @var{data_handle} to the
+node @var{dest} using the message tag @code{mpi_tag} within the
+communicator @var{comm}. On completion, @var{tag} is unlocked.
 @end deftypefun
 
 @deftypefun int starpu_mpi_irecv_detached_unlock_tag (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, starpu_tag_t @var{tag})
+Posts a nonblocking receive in @var{data_handle} from the
+node @var{source} using the message tag @code{mpi_tag} within the
+communicator @var{comm}. On completion, @var{tag} is unlocked.
 @end deftypefun
 
 @deftypefun int starpu_mpi_isend_array_detached_unlock_tag (unsigned @var{array_size}, starpu_data_handle_t *@var{data_handle}, int *@var{dest}, int *@var{mpi_tag}, MPI_Comm *@var{comm}, starpu_tag_t @var{tag})
-Asynchronously send an array of buffers, and unlocks the tag once all
-of them are transmitted.
+Posts @var{array_size} standard-mode, non blocking send of the data of
+data @var{data_handle[x]} to the node @var{dest[x]} using the message
+tag @code{mpi_tag[x]} within the communicator @var{comm[x]}. On
+completion of the all the requests, @var{tag} is unlocked.
 @end deftypefun
 
 @deftypefun int starpu_mpi_irecv_array_detached_unlock_tag (unsigned @var{array_size}, starpu_data_handle_t *@var{data_handle}, int *@var{source}, int *@var{mpi_tag}, MPI_Comm *@var{comm}, starpu_tag_t @var{tag})
+Posts @var{array_size} nonblocking receive in @var{data_handle[x]} from the
+node @var{source[x]} using the message tag @code{mpi_tag[x]} within the
+communicator @var{comm[x]}. On completion of the all the requests,
+@var{tag} is unlocked.
 @end deftypefun
 
 @page

+ 44 - 0
doc/devel/handle_refcnt

@@ -0,0 +1,44 @@
+handle.refcnt usage
+===================
+
+It is the counter for a software rwlock, accounting the number of readers
+(mode == STARPU_R) or writers (mode == STARPU_W)
+
+It is thus used
+
+- during task duration for each data
+- right after completion of a data request
+- during application data acquisition
+
+* Creates a reference:
+  * Released by _starpu_notify_data_dependencies():
+    - _starpu_attempt_to_submit_data_request() immediately when returning 0, but otherwise (1) when the pending request is given control
+  * Released by _starpu_notify_data_dependencies():
+    - _starpu_notify_data_dependencies() when may_unlock_data_req_list_head returned a request which can proceed
+
+* Indirectly creates a reference:
+  * Released by starpu_data_release():
+    - _starpu_attempt_to_submit_data_request_from_apps() when returning 0
+    - starpu_data_acquire_cb()
+    - starpu_data_acquire()
+  * Released by _starpu_notify_data_dependencies():
+    - attempt_to_submit_data_request_from_job() when returning 0
+  * Released by _starpu_push_task_output():
+    - _submit_job_enforce_data_deps() for each data.
+
+* Releases a reference:
+  - _starpu_notify_data_dependencies()
+
+* Indirectly releases a reference:
+  - _starpu_release_data_on_node()
+  - _starpu_push_task_output() for each data
+  - starpu_data_release()
+
+* Temporarily creates a reference, released immediately:
+  - _starpu_write_through_data() for needed write-throughs, released by wt_callback
+  - _starpu_benchmark_ping_pong()
+  - _starpu_data_unregister()
+  - _starpu_prefetch_data_on_node_with_mode()
+  - starpu_data_invalidate()
+
+TODO: unclear conversion use

+ 62 - 0
doc/devel/replicate_refcnt

@@ -0,0 +1,62 @@
+replicate.refcnt usage
+======================
+
+It is used to make sure the replicate still exists on some memory node.
+
+It is thus used
+
+- during task duration for each data
+- during data requests, to make sure that the source and target still exist
+- during data reduction, to keep the per-worker data available for reduction
+- during application data acquisition
+
+Function detail
+---------------
+
+* makes sure there is at least a reference:
+  - copy_data_1_to_1_generic()
+  - _starpu_driver_copy_data_1_to_1()
+  - _starpu_post_data_request()
+  - starpu_handle_data_request()
+
+* creates a reference:
+  * Released by starpu_handle_data_request_completion:
+    - _starpu_create_data_request():
+      - 1 on dst_replicate
+      - 1 on src_replicate if (mode & STARPU_R)
+    - _starpu_search_existing_data_request() on src_replicate when turning a request without STARPU_R into one that does.
+
+  * Released by _starpu_data_end_reduction_mode_terminate:
+    - _starpu_data_end_reduction_mode() for each initialized per-worker buffer, creates a reduction_tmp_handles for them
+
+  * Released by _starpu_release_data_on_node or inside _starpu_prefetch_data_on_node_with_mode:
+    - _starpu_fetch_data_on_node() when !detached
+
+* indirectly creates a reference:
+  * Released by _starpu_push_task_output():
+    - fetch_data()
+    - _starpu_fetch_task_input() through fetch_data() for each task data (one per unique data)
+  * Released by starpu_data_release():
+    - starpu_data_acquire_cb()
+
+* releases a reference:
+  - starpu_handle_data_request_completion()
+    - 1 on dst_replicate
+    - 1 on src_replicate if (mode & STARPU_R)
+  - _starpu_data_end_reduction_mode_terminate() for each per-worker buffer which has a reduction_tmp_handles
+  - _starpu_release_data_on_node()
+
+* indirectly releases a reference:
+  - starpu_handle_data_request() through starpu_handle_data_request_completion() when returning 0.
+  - _starpu_push_task_output() for each task data (one per unique data)
+  - starpu_data_release()
+  - _starpu_handle_node_data_requests() through _starpu_handle_node_data_requests() for each completed request, which is not put back on any list.
+
+* temporarily increases, and decreases after:
+  - transfer_subtree_to_node()
+  - _starpu_allocate_interface()
+  - _starpu_prefetch_data_on_node_with_mode(), when produced by the call to _starpu_fetch_data_on_node (!detached)
+  - starpu_data_prefetch_on_node(), 
+  - starpu_data_invalidate()
+  - _starpu_data_unregister()
+  - _starpu_benchmark_ping_pong()

+ 4 - 1
examples/Makefile.am

@@ -59,7 +59,8 @@ EXTRA_DIST = 					\
 	filters/custom_mf/conversion_opencl.cl  \
 	filters/custom_mf/custom_opencl.cl \
 	interface/complex_kernels.cl \
-	reductions/dot_product_opencl_kernels.cl
+	reductions/dot_product_opencl_kernels.cl	\
+	scheduler/schedulers.sh
 
 CLEANFILES = 					\
 	gordon/null_kernel_gordon.spuelf
@@ -152,6 +153,8 @@ noinst_HEADERS = 				\
 
 STARPU_EXAMPLES	=
 TESTS		=	$(STARPU_EXAMPLES)
+TESTS		+=	scheduler/schedulers.sh
+
 
 if STARPU_HAVE_WINDOWS
 check_PROGRAMS	=	$(STARPU_EXAMPLES)

+ 6 - 6
examples/axpy/axpy.c

@@ -117,7 +117,7 @@ struct starpu_opencl_program opencl_program;
 
 int main(int argc, char **argv)
 {
-	int ret;
+	int ret, exit_value;
 
 	/* Initialize StarPU */
 	ret = starpu_init(NULL);
@@ -187,8 +187,8 @@ int main(int argc, char **argv)
 		ret = starpu_task_submit(task);
 		if (ret == -ENODEV)
 		{
-		     ret = 77;
-		     goto enodev;
+			exit_value = 77;
+			goto enodev;
 		}
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
@@ -209,8 +209,8 @@ enodev:
 
 	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", vec_y[0], alpha);
 
-	if (ret != 77)
-		ret = check();
+	if (exit_value != 77)
+		exit_value = check();
 
 	starpu_free((void *)vec_x);
 	starpu_free((void *)vec_y);
@@ -222,5 +222,5 @@ enodev:
 	/* Stop StarPU */
 	starpu_shutdown();
 
-	return ret;
+	return exit_value;
 }

+ 5 - 1
gcc-plugin/src/Makefile.am

@@ -27,7 +27,11 @@ AM_CPPFLAGS =						\
   -I$(top_srcdir)/include				\
   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1
 
-AM_LDFLAGS = -module
+AM_LDFLAGS = -module --tag="$(GCC_FOR_PLUGIN_LIBTOOL_TAG)"
+
+# Use either `gcc' or `g++', whichever is appropriate to build
+# plug-ins for this version of GCC.
+CC = $(GCC_FOR_PLUGIN)
 
 showcheck:
 	-cat $(TEST_LOGS) /dev/null

+ 16 - 2
gcc-plugin/src/starpu.c

@@ -19,8 +19,6 @@
 
 #include <starpu-gcc-config.h>
 
-int plugin_is_GPL_compatible;
-
 /* #define ENABLE_TREE_CHECKING 1 */
 
 #include <gcc-plugin.h>
@@ -62,6 +60,13 @@ int plugin_is_GPL_compatible;
 #include <starpu.h>  /* for `STARPU_CPU' & co.  */
 
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Declared with `C' linkage in <gcc-plugin.h>.  */
+int plugin_is_GPL_compatible;
+
 /* The name of this plug-in.  */
 static const char plugin_name[] = "starpu";
 
@@ -1027,14 +1032,19 @@ validate_opencl_argument_type (location_t loc, const_tree type)
 	  static const struct { const char *c; const char *cl; }
 	  type_map[] =
 	    {
+	      /* Scalar types defined in OpenCL 1.2.  See
+		 <http://www.khronos.org/files/opencl-1-2-quick-reference-card.pdf>.  */
 	      { "char", "cl_char" },
 	      { "unsigned char", "cl_uchar" },
+	      { "uchar", "cl_uchar" },
 	      { "short int", "cl_short" },
 	      { "unsigned short", "cl_ushort" },
 	      { "int", "cl_int" },
 	      { "unsigned int", "cl_uint" },
+	      { "uint", "cl_uint" },
 	      { "long int", "cl_long" },
 	      { "long unsigned int", "cl_ulong" },
+	      { "ulong", "cl_ulong" },
 	      { "float", "cl_float" },
 	      { "double", "cl_double" },
 	      { NULL, NULL }
@@ -2454,3 +2464,7 @@ plugin_init (struct plugin_name_args *plugin_info,
 
   return 0;
 }
+
+#ifdef __cplusplus
+}
+#endif

+ 44 - 0
gcc-plugin/tests/opencl-types.c

@@ -25,6 +25,7 @@
 
 #include <mocks.h>
 #include <unistd.h>
+#include <sys/types.h>				  /* for `uint' & co. */
 
 
 /* Make sure `size_t' is flagged.  */
@@ -113,6 +114,49 @@ my_uchar_task_opencl (char c[])
 }
 
 
+/* "unsigned int" is aka. "uint".  */
+
+static void my_uint_task (const uint *c)
+  __attribute__ ((task));
+static void my_uint_task_cpu (const uint *c)
+  __attribute__ ((task_implementation ("cpu", my_uint_task)));
+static void my_uint_task_opencl (const uint *c)	  /* no warning */
+  __attribute__ ((task_implementation ("opencl", my_uint_task)));
+
+static void
+my_uint_task_cpu (const uint *c)
+{
+}
+
+static void
+my_uint_task_opencl (const uint *c)
+{
+}
+
+
+/* "unsigned char" is aka. "uchar".  */
+
+typedef float uchar;				  /* not a real `uchar'! */
+
+static void my_fake_uchar_task (const uchar *c)
+  __attribute__ ((task));
+static void my_fake_uchar_task_cpu (const uchar *c)
+  __attribute__ ((task_implementation ("cpu", my_fake_uchar_task)));
+static void my_fake_uchar_task_opencl (const uchar *c) /* (warning "differs from the same-named OpenCL type") */
+  __attribute__ ((task_implementation ("opencl", my_fake_uchar_task)));
+
+static void
+my_fake_uchar_task_cpu (const uchar *c)
+{
+}
+
+static void
+my_fake_uchar_task_opencl (const uchar *c)
+{
+}
+
+
+
 /* No OpenCL, no problems.  */
 
 static void my_cool_task (size_t size, long long x[size])

+ 1 - 1
include/starpu_util.h

@@ -49,7 +49,7 @@ extern "C"
 #  endif
 #endif
 
-#define STARPU_ABORT()		abort()
+#define STARPU_ABORT()		assert(0)
 
 #if defined(STARPU_HAVE_STRERROR_R)
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err < 0) { \

+ 95 - 4
m4/gcc.m4

@@ -25,9 +25,87 @@ AC_DEFUN([_STARPU_WITH_GCC_PLUGIN_API], [
   CPPFLAGS="$save_CPPFLAGS"
 ])
 
+dnl Set $ac_cv_starpu_gcc_for_plugin to the compiler to use to compile
+dnl GCC plug-ins.  It's `gcc' for GCC 4.5/4.6, probably `g++' for 4.7,
+dnl and definitely `g++' for 4.8, because the last two build
+dnl themselves with `g++', leading to mangled names.
+dnl See <http://thread.gmane.org/gmane.comp.gcc.devel/125210> for details.
+AC_DEFUN([_STARPU_GCC_PLUGIN_LANGUAGE], [
+  AC_CACHE_CHECK([which compiler to use to build GCC plug-ins],
+    [ac_cv_starpu_gcc_for_plugin], [
+     for GCC_FOR_PLUGIN in "$CC" "$CXX" ""
+     do
+       if test "x$GCC_FOR_PLUGIN" = "x"; then
+	 break;
+       fi
+
+       cat > conftest.c <<END_OF_CONFTEST
+	 #include <gcc-plugin.h>
+         #include <plugin-version.h>
+	 #include <cpplib.h>
+
+	 int plugin_is_GPL_compatible;
+
+         extern struct cpp_reader *parse_in; /* C-family front-ends */
+
+	 static void
+	 define_something (void *gcc_data, void *user_data)
+	 {
+	   cpp_define (parse_in, "CONFTEST_GCC_PLUGIN=1");
+	 }
+
+	 int
+	 plugin_init (struct plugin_name_args *plugin_info,
+		      struct plugin_gcc_version *version)
+	 {
+	   if (!plugin_default_version_check (version, &gcc_version))
+	     return 1;
+	   register_callback ("conftest", PLUGIN_START_UNIT,
+			      define_something, NULL);
+	   return 0;
+	 }
+END_OF_CONFTEST
+
+       # Build the plug-in.
+       rm -f conftest.so
+       _STARPU_WITH_GCC_PLUGIN_API([
+	 _AC_DO(["$GCC_FOR_PLUGIN" "$CPPFLAGS" -fPIC -shared conftest.c -o conftest.so]) || {
+	   AC_MSG_ERROR([failed to build a GCC plug-in with `$GCC_FOR_PLUGIN'])
+	 }
+       ])
+
+       # Attempt to use it.
+       save_CFLAGS="$CFLAGS"
+       CFLAGS="-fplugin=$PWD/conftest.so"
+       AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+			   #ifndef CONFTEST_GCC_PLUGIN
+			   # error plug-in not loaded
+			   #endif]], [])],
+	 [ac_cv_starpu_gcc_for_plugin="$GCC_FOR_PLUGIN"], [:])
+       CFLAGS="$save_CFLAGS"
+
+       rm -f conftest.so conftest.c
+
+       if test "x$ac_cv_starpu_gcc_for_plugin" != "x"; then
+	 # We're done.
+	 break
+       fi
+     done
+
+     if test "x$ac_cv_starpu_gcc_for_plugin" = "x"; then
+       AC_MSG_RESULT([none])
+       AC_MSG_ERROR([could not find a suitable compiler for GCC plug-ins])
+     fi
+  ])
+
+  $1="$ac_cv_starpu_gcc_for_plugin"
+])
+
+
 dnl Check whether GCC plug-in support is available (GCC 4.5+).
 AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
   AC_REQUIRE([AC_PROG_CC])
+  AC_REQUIRE([AC_PROG_CXX]) dnl for GCC 4.7+
   AC_CACHE_CHECK([whether GCC supports plug-ins], [ac_cv_have_gcc_plugins], [
     if test "x$GCC" = xyes; then
       # ICC 12.1.0 and Clang 3.1 (among others) support `--version',
@@ -83,11 +161,24 @@ AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
       AC_CHECK_HEADERS([c-common.h c-pragma.h c-family/c-common.h c-family/c-pragma.h],
         [], [], [#include <gcc-plugin.h>
 	         #include <tree.h>])
-
-      AC_DEFINE_UNQUOTED([STARPU_INCLUDE_DIR],
-        ["`eval "echo $includedir"`/starpu/$STARPU_EFFECTIVE_VERSION"],
-        [Define to the directory where StarPU's headers are installed.])
     ])
+
+
+    AC_DEFINE_UNQUOTED([STARPU_INCLUDE_DIR],
+      ["`eval "echo $includedir"`/starpu/$STARPU_EFFECTIVE_VERSION"],
+      [Define to the directory where StarPU's headers are installed.])
+
+    dnl Now, `gcc' or `g++'?
+    _STARPU_GCC_PLUGIN_LANGUAGE([GCC_FOR_PLUGIN])
+    AC_SUBST([GCC_FOR_PLUGIN])
+
+    dnl Determine the corresponding Libtool tag.
+    if test "$GCC_FOR_PLUGIN" = "$CXX"; then
+      GCC_FOR_PLUGIN_LIBTOOL_TAG="CXX"
+    else
+      GCC_FOR_PLUGIN_LIBTOOL_TAG="CC"
+    fi
+    AC_SUBST([GCC_FOR_PLUGIN_LIBTOOL_TAG])
   fi
 
   AC_SUBST([GCC_PLUGIN_INCLUDE_DIR])

+ 19 - 1
src/common/starpu_spinlock.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -85,6 +85,24 @@ int _starpu_spin_lock(struct _starpu_spinlock *lock)
 #endif
 }
 
+int _starpu_spin_checklocked(struct _starpu_spinlock *lock)
+{
+#ifdef STARPU_SPINLOCK_CHECK
+	int ret = pthread_mutex_trylock(&lock->errcheck_lock);
+	STARPU_ASSERT(ret != 0);
+	return ret == 0;
+#else
+#ifdef HAVE_PTHREAD_SPIN_LOCK
+	int ret = pthread_spin_trylock(&lock->lock);
+	STARPU_ASSERT(ret != 0);
+	return ret == 0;
+#else
+	STARPU_ASSERT(lock->taken);
+	return !lock->taken;
+#endif
+#endif
+}
+
 int _starpu_spin_trylock(struct _starpu_spinlock *lock)
 {
 #ifdef STARPU_SPINLOCK_CHECK

+ 2 - 1
src/common/starpu_spinlock.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -43,6 +43,7 @@ int _starpu_spin_destroy(struct _starpu_spinlock *lock);
 
 int _starpu_spin_lock(struct _starpu_spinlock *lock);
 int _starpu_spin_trylock(struct _starpu_spinlock *lock);
+int _starpu_spin_checklocked(struct _starpu_spinlock *lock);
 int _starpu_spin_unlock(struct _starpu_spinlock *lock);
 
 #endif // __STARPU_SPINLOCK_H__

+ 3 - 1
src/core/dependencies/cg.c

@@ -152,6 +152,8 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 				unsigned ndeps_completed =
 					STARPU_ATOMIC_ADD(&job_successors->ndeps_completed, 1);
 
+				STARPU_ASSERT(job_successors->ndeps >= ndeps_completed);
+
 				/* Need to atomically test submitted and check
 				 * dependencies, since this is concurrent with
 				 * _starpu_submit_job */
@@ -183,7 +185,6 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 	unsigned succ;
 
 	_starpu_spin_lock(&successors->lock);
-	successors->terminated = 1;
 	/* Note: some thread might be concurrently adding other items */
 	for (succ = 0; succ < successors->nsuccs; succ++)
 	{
@@ -216,5 +217,6 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 
 		_starpu_spin_lock(&successors->lock);
 	}
+	successors->terminated = 1;
 	_starpu_spin_unlock(&successors->lock);
 }

+ 3 - 1
src/core/dependencies/cg.h

@@ -45,7 +45,9 @@ struct _starpu_cg_list
 	unsigned ndeps; /* how many deps ? */
 	unsigned ndeps_completed; /* how many deps are done ? */
 
-	/* Whether the completion is finished. */
+	/* Whether the completion is finished.
+	 * For restartable/restarted tasks, only the first iteration is taken into account here.
+	 */
 	unsigned terminated;
 
 	/* List of successors */

+ 1 - 0
src/core/dependencies/data_concurrency.c

@@ -263,6 +263,7 @@ static unsigned unlock_one_requester(struct _starpu_data_requester *r)
 /* The header lock must already be taken by the caller */
 void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 {
+	_starpu_spin_checklocked(&handle->header_lock);
 	/* A data access has finished so we remove a reference. */
 	STARPU_ASSERT(handle->refcnt > 0);
 	handle->refcnt--;

+ 7 - 1
src/core/dependencies/tags.c

@@ -233,7 +233,13 @@ void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
 
 	/* the tag is now associated to a job */
 	_starpu_spin_lock(&tag->lock);
-	tag->state = STARPU_ASSOCIATED;
+	/* When the same tag may be signaled several times by different tasks,
+	 * and it's already done, we should not reset the "done" state.
+	 * When the tag is simply used by the same task several times, we have
+	 * to do so. */
+	if (job->task->regenerate || job->submitted == 2 ||
+			tag->state != STARPU_DONE)
+		tag->state = STARPU_ASSOCIATED;
 	_starpu_spin_unlock(&tag->lock);
 }
 

+ 5 - 3
src/core/dependencies/task_deps.c

@@ -81,9 +81,11 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 		dep_job = _starpu_get_job_associated_to_task(dep_task);
 
 		STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself.");
-		if (check)
-			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Task dependencies have to be set before submission");
-		else
+		if (check) {
+			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Unless it is not to be destroyed automatically, a task dependencies have to be set before submission");
+			STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
+			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
+		} else
 			STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination");
 
 		_STARPU_TRACE_TASK_DEPS(dep_job, job);

+ 6 - 2
src/core/jobs.c

@@ -247,7 +247,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
 
 	if (regenerate)
 	{
-		STARPU_ASSERT(detach && !destroy && !task->synchronous);
+		STARPU_ASSERT_MSG(detach && !destroy && !task->synchronous, "Regenerated task must be detached, and not have detroy=1 or synchronous=1");
 
 		/* We reuse the same job structure */
 		int ret = _starpu_submit_job(j);
@@ -286,7 +286,11 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
 	else
 	{
 		/* existing deps (if any) are fulfilled */
-		tag->state = STARPU_READY;
+		/* If the same tag is being signaled by several tasks, do not
+		 * clear a DONE state. If it's the same job submitted several
+		 * times with the same tag, we have to do it */
+		if (j->submitted == 2 || tag->state != STARPU_DONE)
+			tag->state = STARPU_READY;
 		/* already prepare for next run */
 		tag_successors->ndeps_completed = 0;
 		ret = 0;

+ 3 - 1
src/core/jobs.h

@@ -92,7 +92,9 @@ LIST_TYPE(_starpu_job,
 	uint32_t footprint;
 
 	/* Indicates whether the task associated to that job has already been
-	 * submitted to StarPU or not (using starpu_task_submit). */
+	 * submitted to StarPU (1) or not (0) (using starpu_task_submit).
+	 * Becomes and stays 2 when the task is submitted several times.
+	 */
 	unsigned submitted;
 
 	/* Indicates whether the task associated to this job is terminated or

+ 9 - 6
src/core/perfmodel/perfmodel.c

@@ -225,7 +225,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 	unsigned i;
 	int err;
 	double sum = 0.0;
-	unsigned int node, cpu_node;
+	int node, cpu_node;
 
 	/* We need to get one node per archtype. This is kinda ugly,
 	 * but it does the job.
@@ -267,28 +267,31 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 		if (!_starpu_data_is_multiformat_handle(handle))
 			continue;
 
+		node = -EINVAL;
+#ifdef STARPU_USE_CPU
 		if (arch < STARPU_CUDA_DEFAULT)
 			node = cpu_node;
+#endif
 #ifdef STARPU_USE_CUDA
-		else if (arch >= STARPU_CUDA_DEFAULT && arch < STARPU_OPENCL_DEFAULT)
+		if (arch >= STARPU_CUDA_DEFAULT && arch < STARPU_OPENCL_DEFAULT)
 			node = cuda_node;
 #endif
 #ifdef STARPU_USE_OPENCL
-		else if (arch >= STARPU_OPENCL_DEFAULT && arch < STARPU_GORDON_DEFAULT)
+		if (arch >= STARPU_OPENCL_DEFAULT && arch < STARPU_GORDON_DEFAULT)
 			node = opencl_node;
 #endif
-		else {
-			node = -EINVAL;
+		if (node == -EINVAL)
 			STARPU_ASSERT(0);
-		}
 
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 			continue;
 
 		conversion_task = _starpu_create_conversion_task(handle, node);
 		sum += starpu_task_expected_length(conversion_task, arch, nimpl);
+		_starpu_spin_lock(&handle->header_lock);
 		handle->refcnt--;
 		handle->busy_count--;
+		_starpu_spin_unlock(&handle->header_lock);
 		starpu_task_deinit(conversion_task);
 		free(conversion_task);
 	}

+ 6 - 9
src/core/sched_policy.c

@@ -184,15 +184,8 @@ void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _st
 	if (use_prefetch == -1)
 		use_prefetch = 1;
 
-	/* By default, we don't calibrate */
-	unsigned do_calibrate = 0;
-	int res = starpu_get_env_number("STARPU_CALIBRATE");
-	if (res == -1 && config->user_conf)
-		res = config->user_conf->calibrate;
-
-	do_calibrate = (res < 0)?0:(unsigned)res;
-
-	_starpu_set_calibrate_flag(do_calibrate);
+	/* Set calibrate flag */
+	_starpu_set_calibrate_flag(config->conf->calibrate);
 
 	struct starpu_sched_policy *selected_policy;
 	selected_policy = select_sched_policy(config, required_policy);
@@ -289,6 +282,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 		int *combined_workerid = combined_worker->combined_workerid;
 
 		int ret = 0;
+		int i;
 
 		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 		j->task_size = worker_size;
@@ -421,8 +415,10 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
 	node_kind = starpu_node_get_kind(node);
 
+	_starpu_spin_lock(&handle->header_lock);
 	handle->refcnt++;
 	handle->busy_count++;
+	_starpu_spin_unlock(&handle->header_lock);
 
 	struct starpu_multiformat_data_interface_ops *mf_ops;
 	mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
@@ -479,6 +475,7 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 	struct timespec pop_start_time;
 	if (profiling)
 		_starpu_clock_gettime(&pop_start_time);
+
 pick:
 	_STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
 	/* perhaps there is some local task to be executed first */

+ 4 - 1
src/core/task.c

@@ -224,7 +224,10 @@ int _starpu_submit_job(struct _starpu_job *j)
 	/* Need to atomically set submitted to 1 and check dependencies, since
 	 * this is concucrent with _starpu_notify_cg */
 	j->terminated = 0;
-	j->submitted = 1;
+	if (!j->submitted)
+		j->submitted = 1;
+	else
+		j->submitted = 2;
 
 	int ret = _starpu_enforce_deps_and_schedule(j);
 

+ 15 - 41
src/core/topology.c

@@ -55,7 +55,7 @@ static struct starpu_htbl32_node *devices_using_cuda = NULL;
 #  ifdef STARPU_USE_OPENCL
 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config *config);
 #  endif
-static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
+static void _starpu_initialize_workers_gpuid(int *explicit_workers_gpuid,
                                              int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus);
 static unsigned may_bind_automatically = 0;
 #endif
@@ -69,8 +69,7 @@ static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config
 {
 	struct starpu_machine_topology *topology = &config->topology;
 
-        _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_cuda_gpuid,
-                                         config->user_conf==NULL?NULL:(int *)config->user_conf->workers_cuda_gpuid,
+        _starpu_initialize_workers_gpuid(config->conf->use_explicit_workers_cuda_gpuid==0?NULL:(int *)config->conf->workers_cuda_gpuid,
                                          &(config->current_cuda_gpuid), (int *)topology->workers_cuda_gpuid, "STARPU_WORKERS_CUDAID",
                                          topology->nhwcudagpus);
 }
@@ -81,8 +80,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 {
 	struct starpu_machine_topology *topology = &config->topology;
 
-        _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_opencl_gpuid,
-                                         config->user_conf==NULL?NULL:(int *)config->user_conf->workers_opencl_gpuid,
+        _starpu_initialize_workers_gpuid(config->conf->use_explicit_workers_opencl_gpuid==0?NULL:(int *)config->conf->workers_opencl_gpuid,
                                          &(config->current_opencl_gpuid), (int *)topology->workers_opencl_gpuid, "STARPU_WORKERS_OPENCLID",
                                          topology->nhwopenclgpus);
 
@@ -130,7 +128,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
 
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
-static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
+static void _starpu_initialize_workers_gpuid(int *explicit_workers_gpuid,
                                              int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus)
 {
 	char *strval;
@@ -185,7 +183,7 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 			}
 		}
 	}
-	else if (use_explicit_workers_gpuid)
+	else if (explicit_workers_gpuid)
 	{
 		/* we use the explicit value from the user */
 		memcpy(workers_gpuid,
@@ -196,8 +194,8 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 	{
 		/* by default, we take a round robin policy */
 		if (nhwgpus > 0)
-		for (i = 0; i < STARPU_NMAXWORKERS; i++)
-			workers_gpuid[i] = (unsigned)(i % nhwgpus);
+		     for (i = 0; i < STARPU_NMAXWORKERS; i++)
+			  workers_gpuid[i] = (unsigned)(i % nhwgpus);
 
 		/* StarPU can use sampling techniques to bind threads correctly */
 		may_bind_automatically = 1;
@@ -273,8 +271,7 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
 	return config->topology.nhwcpus;
 }
 
-static int _starpu_init_machine_config(struct _starpu_machine_config *config,
-				struct starpu_conf *user_conf)
+static int _starpu_init_machine_config(struct _starpu_machine_config *config)
 {
 	int i;
 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
@@ -290,14 +287,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 	_starpu_initialize_workers_bindid(config);
 
 #ifdef STARPU_USE_CUDA
-	int ncuda;
-	ncuda = starpu_get_env_number("STARPU_NCUDA");
-
-	/* STARPU_NCUDA is not set. Did the user specify anything ? */
-	if (ncuda == -1 && user_conf)
-		ncuda = user_conf->ncuda;
+	int ncuda = config->conf->ncuda;
 
-	
 	if (ncuda != 0)
 	{
 		/* The user did not disable CUDA. We need to initialize CUDA
@@ -364,12 +355,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 #endif
 
 #ifdef STARPU_USE_OPENCL
-	int nopencl;
-	nopencl = starpu_get_env_number("STARPU_NOPENCL");
-
-	/* STARPU_NOPENCL is not set. Did the user specify anything ? */
-	if (nopencl == -1 && user_conf)
-		nopencl = user_conf->nopencl;
+	int nopencl = config->conf->nopencl;
 
 	if (nopencl != 0)
 	{
@@ -440,12 +426,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 #endif
 
 #ifdef STARPU_USE_GORDON
-	int ngordon;
-	ngordon = starpu_get_env_number("STARPU_NGORDON");
-
-	/* STARPU_NGORDON is not set. Did the user specify anything ? */
-	if (ngordon == -1 && user_conf)
-		ngordon = user_conf->ngordon;
+	int ngordon = config->conf->ngordon;
 
 	if (ngordon != 0)
 	{
@@ -488,12 +469,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 /* we put the CPU section after the accelerator : in case there was an
  * accelerator found, we devote one cpu */
 #ifdef STARPU_USE_CPU
-	int ncpu;
-	ncpu = starpu_get_env_number("STARPU_NCPUS");
-
-	/* STARPU_NCPUS is not set. Did the user specify anything ? */
-	if (ncpu == -1 && user_conf)
-		ncpu = user_conf->ncpus;
+	int ncpu = config->conf->ncpus;
 
 	if (ncpu != 0)
 	{
@@ -603,11 +579,11 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 			}
 		}
 	}
-	else if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
+	else if (config->conf->use_explicit_workers_bindid)
 	{
 		/* we use the explicit value from the user */
 		memcpy(topology->workers_bindid,
-			config->user_conf->workers_bindid,
+			config->conf->workers_bindid,
 			STARPU_NMAXWORKERS*sizeof(unsigned));
 	}
 	else
@@ -879,9 +855,7 @@ int _starpu_build_topology(struct _starpu_machine_config *config)
 {
 	int ret;
 
-	struct starpu_conf *user_conf = config->user_conf;
-
-	ret = _starpu_init_machine_config(config, user_conf);
+	ret = _starpu_init_machine_config(config);
 	if (ret)
 		return ret;
 

+ 45 - 11
src/core/workers.c

@@ -372,17 +372,49 @@ int starpu_conf_init(struct starpu_conf *conf)
 	conf->nspus = starpu_get_env_number("STARPU_NGORDON");
 	conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
 
+	if (conf->calibrate == -1)
+	     conf->calibrate = 0;
+
 	conf->use_explicit_workers_bindid = 0; /* TODO */
 	conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
 	conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
 
 	conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
+	if (conf->single_combined_worker == -1)
+	     conf->single_combined_worker = 0;
 
 	conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
 
 	return 0;
 }
 
+static void _starpu_conf_set_value_against_environment(char *name, int *value)
+{
+	int number;
+	number = starpu_get_env_number(name);
+	if (number != -1)
+	{
+		*value = number;
+	}
+}
+
+static void _starpu_conf_check_environment(struct starpu_conf *conf)
+{
+	char *sched = getenv("STARPU_SCHED");
+	if (sched)
+	{
+		conf->sched_policy_name = sched;
+	}
+
+	_starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
+	_starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
+	_starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
+	_starpu_conf_set_value_against_environment("STARPU_NGORDON", &conf->nspus);
+	_starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
+	_starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
+}
+
 int starpu_init(struct starpu_conf *user_conf)
 {
 	int ret;
@@ -454,21 +486,19 @@ int starpu_init(struct starpu_conf *user_conf)
 
 	/* store the pointer to the user explicit configuration during the
 	 * initialization */
-	config.user_conf = user_conf;
-
-	if (user_conf)
+	if (user_conf == NULL)
 	{
-	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
-	     if (asynchronous_copy_disabled == 1)
-		  config.disable_asynchronous_copy = 1;
-	     else
-		  config.disable_asynchronous_copy = (user_conf->disable_asynchronous_copy == 1);
+	     struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
+	     starpu_conf_init(conf);
+	     config.conf = conf;
+	     config.default_conf = 1;
 	}
 	else
 	{
-	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
-	     config.disable_asynchronous_copy = (asynchronous_copy_disabled == 1);
+	     config.conf = user_conf;
+	     config.default_conf = 0;
 	}
+	_starpu_conf_check_environment(config.conf);
 
 	_starpu_init_all_sched_ctxs(&config);
 	ret = _starpu_build_topology(&config);
@@ -667,6 +697,10 @@ void starpu_shutdown(void)
 	_STARPU_PTHREAD_COND_SIGNAL(&init_cond);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
 
+	/* Clear memory if it was allocated by StarPU */
+	if (config.default_conf)
+	     free(config.conf);
+
 	_STARPU_DEBUG("Shutdown finished\n");
 }
 
@@ -723,7 +757,7 @@ unsigned starpu_spu_worker_get_count(void)
 
 int starpu_asynchronous_copy_disabled()
 {
-	return config.disable_asynchronous_copy;
+	return config.conf->disable_asynchronous_copy;
 }
 
 /* When analyzing performance, it is useful to see what is the processing unit

+ 6 - 6
src/core/workers.h

@@ -158,16 +158,16 @@ struct _starpu_machine_config
 	 * the result of (worker_mask & STARPU_CUDA). */
 	uint32_t worker_mask;
 
-	/* in case the user gives an explicit configuration, this is only valid
-	 * during starpu_init. */
-	struct starpu_conf *user_conf;
+        /* either the user given configuration passed to starpu_init or a default configuration */
+	struct starpu_conf *conf;
+	/* set to 1 if no conf has been given by the user, it
+	 * indicates the memory allocated for the default
+	 * configuration should be freed on shutdown */
+	int default_conf;
 
 	/* this flag is set until the runtime is stopped */
 	unsigned running;
 
-        /* indicate if the asynchronous copies should be disabled */
-        int disable_asynchronous_copy;
-
 	/* all the sched ctx of the current instance of starpu */
 	struct _starpu_sched_ctx sched_ctxs[STARPU_NMAX_SCHED_CTXS];
 };

+ 5 - 1
src/datawizard/coherency.c

@@ -282,6 +282,8 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 
 	if (r)
 	{
+		_starpu_spin_checklocked(&r->handle->header_lock);
+
 		_starpu_spin_lock(&r->lock);
 
                 /* perhaps we need to "upgrade" the request */
@@ -333,13 +335,15 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
  * 		    else (invalid,owner->shared)
  */
 
-/* This function is called with handle's header lock taken */
 struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_handle_t handle,
 								  struct _starpu_data_replicate *dst_replicate,
 								  enum starpu_access_mode mode, unsigned is_prefetch,
 								  unsigned async,
 								  void (*callback_func)(void *), void *callback_arg)
 {
+	/* This function is called with handle's header lock taken */
+	_starpu_spin_checklocked(&handle->header_lock);
+
 	unsigned requesting_node = dst_replicate->memory_node;
 
 	if (dst_replicate->state != STARPU_INVALID)

+ 4 - 0
src/datawizard/data_request.c

@@ -90,6 +90,8 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha
 {
 	struct _starpu_data_request *r = _starpu_data_request_new();
 
+	_starpu_spin_checklocked(&handle->header_lock);
+
 	_starpu_spin_init(&r->lock);
 
 	r->handle = handle;
@@ -235,6 +237,8 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 #ifdef STARPU_MEMORY_STATUS
 	enum _starpu_cache_state old_src_replicate_state = src_replicate->state;
 #endif
+
+	_starpu_spin_checklocked(&handle->header_lock);
 	_starpu_update_data_state(handle, r->dst_replicate, mode);
 
 #ifdef STARPU_MEMORY_STATUS

+ 4 - 4
src/datawizard/interfaces/bcsr_interface.c

@@ -331,7 +331,7 @@ static ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, uint32_t dst_
 		}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	/* allocation succeeded */
@@ -361,7 +361,7 @@ fail_rowptr:
 			break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 fail_colind:
@@ -380,7 +380,7 @@ fail_colind:
 			break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 fail_nzval:
@@ -416,7 +416,7 @@ static void free_bcsr_buffer_on_node(void *data_interface, uint32_t node)
 			break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 }
 

+ 2 - 2
src/datawizard/interfaces/block_interface.c

@@ -349,7 +349,7 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 			}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	if (!fail)
@@ -401,7 +401,7 @@ static void free_block_buffer_on_node(void *data_interface, uint32_t node)
                         break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 }
 

+ 4 - 4
src/datawizard/interfaces/csr_interface.c

@@ -305,7 +305,7 @@ static ssize_t allocate_csr_buffer_on_node(void *data_interface_, uint32_t dst_n
 			}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	/* allocation succeeded */
@@ -340,7 +340,7 @@ fail_rowptr:
 			break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 fail_colind:
@@ -364,7 +364,7 @@ fail_colind:
 			break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 fail_nzval:
@@ -409,7 +409,7 @@ static void free_csr_buffer_on_node(void *data_interface, uint32_t node)
 			break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 }
 

+ 2 - 2
src/datawizard/interfaces/matrix_interface.c

@@ -328,7 +328,7 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 			}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	if (!fail)
@@ -379,7 +379,7 @@ static void free_matrix_buffer_on_node(void *data_interface, uint32_t node)
                         break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 }
 

+ 1 - 1
src/datawizard/interfaces/multiformat_interface.c

@@ -363,7 +363,7 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 			}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	if (fail)

+ 2 - 2
src/datawizard/interfaces/variable_interface.c

@@ -247,7 +247,7 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 			}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	if (fail)
@@ -281,7 +281,7 @@ static void free_variable_buffer_on_node(void *data_interface, uint32_t node)
                         break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 }
 

+ 2 - 2
src/datawizard/interfaces/vector_interface.c

@@ -283,7 +283,7 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 			}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 
 	if (fail)
@@ -326,7 +326,7 @@ static void free_vector_buffer_on_node(void *data_interface, uint32_t node)
                         break;
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 }
 

+ 10 - 6
src/datawizard/memalloc.c

@@ -158,6 +158,7 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 #endif
 			/* TODO use request !! */
 			/* Take temporary references on the replicates */
+			_starpu_spin_checklocked(&handle->header_lock);
 			src_replicate->refcnt++;
 			dst_replicate->refcnt++;
 			handle->busy_count+=2;
@@ -708,11 +709,12 @@ static size_t _starpu_get_global_mem_size(int dst_node)
 	switch(kind)
 	{
 		case STARPU_CPU_RAM:
-#ifdef STARPU_DEVEL
-#warning to be fixed
-#endif
-			global_mem_size = 64*1024*1024;
-			break;
+		{
+			/* We should probably never get here : if there is no
+ 			 * space left in RAM, the operating system should swap
+			 * to disk for us. */
+			STARPU_ABORT();
+		}
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
 		{
@@ -730,7 +732,7 @@ static size_t _starpu_get_global_mem_size(int dst_node)
 		}
 #endif
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	return global_mem_size;
 }
@@ -752,6 +754,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 	unsigned attempts = 0;
 	ssize_t allocated_memory;
 
+	_starpu_spin_checklocked(&handle->header_lock);
+
 	_starpu_data_allocation_inc_stats(dst_node);
 
 #ifdef STARPU_USE_ALLOCATION_CACHE

+ 5 - 0
src/datawizard/reduction.c

@@ -113,6 +113,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 	unsigned replicate_count = 0;
 	starpu_data_handle_t replicate_array[1 + STARPU_NMAXWORKERS];
 
+	_starpu_spin_checklocked(&handle->header_lock);
+
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		if (handle->per_node[node].state != STARPU_INVALID)
@@ -328,6 +330,9 @@ void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle)
 
 //	fprintf(stderr, "_starpu_data_end_reduction_mode_terminate\n");
 	unsigned worker;
+
+	_starpu_spin_checklocked(&handle->header_lock);
+
 	for (worker = 0; worker < nworkers; worker++)
 	{
 		struct _starpu_data_replicate *replicate;

+ 7 - 1
src/datawizard/user_interactions.c

@@ -32,13 +32,17 @@ int starpu_data_request_allocation(starpu_data_handle_t handle, uint32_t node)
 
 	STARPU_ASSERT(handle);
 
-	r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, STARPU_NONE, 0, 0);
+	_starpu_spin_lock(&handle->header_lock);
+
+	r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, STARPU_NONE, 0, 1);
 
 	/* we do not increase the refcnt associated to the request since we are
 	 * not waiting for its termination */
 
 	_starpu_post_data_request(r, node);
 
+	_starpu_spin_unlock(&handle->header_lock);
+
 	return 0;
 }
 
@@ -212,9 +216,11 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 	{
 		struct starpu_task *task = _starpu_create_conversion_task(handle, 0);
 		int ret;
+		_starpu_spin_lock(&handle->header_lock);
 		handle->refcnt--;
 		handle->busy_count--;
 		handle->mf_node = 0;
+		_starpu_spin_unlock(&handle->header_lock);
 		task->synchronous = 1;
 		ret = _starpu_task_submit_internally(task);
 		STARPU_ASSERT(!ret);

+ 1 - 1
src/sched_policies/detect_combined_workers.c

@@ -478,7 +478,7 @@ void _starpu_sched_find_worker_combinations(struct starpu_machine_topology *topo
 {
     struct _starpu_machine_config *config = _starpu_get_machine_config();
 
-    if ((config->user_conf && config->user_conf->single_combined_worker > 0) || starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER") > 0)
+    if (config->conf->single_combined_worker > 0)
 	combine_all_cpu_workers(topology);
     else
     {

+ 0 - 86
starpu-top/StarPU-Top-qwt-embed.pri

@@ -1,86 +0,0 @@
-SRCDIR=.
-SOURCES += \
-    $$SRCDIR/qwt/qwt_thermo.cpp \
-    $$SRCDIR/qwt/qwt_math.cpp \
-    $$SRCDIR/qwt/qwt_scale_engine.cpp \
-    $$SRCDIR/qwt/qwt_scale_draw.cpp \
-    $$SRCDIR/qwt/qwt_scale_map.cpp \
-    $$SRCDIR/qwt/qwt_scale_div.cpp \
-    $$SRCDIR/qwt/qwt_painter.cpp \
-    $$SRCDIR/qwt/qwt_abstract_scale.cpp \
-    $$SRCDIR/qwt/qwt_abstract_scale_draw.cpp \
-    $$SRCDIR/qwt/qwt_interval.cpp \
-    $$SRCDIR/qwt/qwt_text.cpp \
-    $$SRCDIR/qwt/qwt_text_engine.cpp \
-    $$SRCDIR/qwt/qwt_clipper.cpp \
-    $$SRCDIR/qwt/qwt_color_map.cpp \
-    $$SRCDIR/qwt/qwt_plot.cpp \
-    $$SRCDIR/qwt/qwt_plot_dict.cpp \
-    $$SRCDIR/qwt/qwt_plot_layout.cpp \
-    $$SRCDIR/qwt/qwt_scale_widget.cpp \
-    $$SRCDIR/qwt/qwt_text_label.cpp \
-    $$SRCDIR/qwt/qwt_legend.cpp \
-    $$SRCDIR/qwt/qwt_dyngrid_layout.cpp \
-    $$SRCDIR/qwt/qwt_plot_canvas.cpp \
-    $$SRCDIR/qwt/qwt_plot_item.cpp \
-    $$SRCDIR/qwt/qwt_legend_item.cpp \
-    $$SRCDIR/qwt/qwt_symbol.cpp \
-    $$SRCDIR/qwt/qwt_plot_axis.cpp \
-    $$SRCDIR/qwt/qwt_plot_xml.cpp \
-    $$SRCDIR/qwt/qwt_knob.cpp \
-    $$SRCDIR/qwt/qwt_abstract_slider.cpp \
-    $$SRCDIR/qwt/qwt_round_scale_draw.cpp \
-    $$SRCDIR/qwt/qwt_double_range.cpp \
-    $$SRCDIR/qwt/qwt_wheel.cpp \
-    $$SRCDIR/qwt/qwt_slider.cpp \
-    $$SRCDIR/qwt/qwt_plot_curve.cpp \
-    $$SRCDIR/qwt/qwt_plot_seriesitem.cpp \
-    $$SRCDIR/qwt/qwt_series_data.cpp \
-    $$SRCDIR/qwt/qwt_point_polar.cpp \
-    $$SRCDIR/qwt/qwt_curve_fitter.cpp \
-    $$SRCDIR/qwt/qwt_spline.cpp \
-    $$SRCDIR/qwt/qwt_dial.cpp \
-    $$SRCDIR/qwt/qwt_dial_needle.cpp
-HEADERS += \
-    $$SRCDIR/qwt/qwt_thermo.h \
-    $$SRCDIR/qwt/qwt_math.h \
-    $$SRCDIR/qwt/qwt_scale_engine.h \
-    $$SRCDIR/qwt/qwt_scale_draw.h \
-    $$SRCDIR/qwt/qwt_scale_map.h \
-    $$SRCDIR/qwt/qwt_scale_div.h \
-    $$SRCDIR/qwt/qwt_painter.h \
-    $$SRCDIR/qwt/qwt_global.h \
-    $$SRCDIR/qwt/qwt_abstract_scale.h \
-    $$SRCDIR/qwt/qwt_abstract_scale_draw.h \
-    $$SRCDIR/qwt/qwt_interval.h \
-    $$SRCDIR/qwt/qwt_text.h \
-    $$SRCDIR/qwt/qwt_text_engine.h \
-    $$SRCDIR/qwt/qwt_clipper.h \
-    $$SRCDIR/qwt/qwt_color_map.h \
-    $$SRCDIR/qwt/qwt_plot.h \
-    $$SRCDIR/qwt/qwt_plot_dict.h \
-    $$SRCDIR/qwt/qwt_plot_layout.h \
-    $$SRCDIR/qwt/qwt_scale_widget.h \
-    $$SRCDIR/qwt/qwt_text_label.h \
-    $$SRCDIR/qwt/qwt_legend.h \
-    $$SRCDIR/qwt/qwt_dyngrid_layout.h \
-    $$SRCDIR/qwt/qwt_plot_canvas.h \
-    $$SRCDIR/qwt/qwt_plot_item.h \
-    $$SRCDIR/qwt/qwt_legend_itemmanager.h \
-    $$SRCDIR/qwt/qwt_legend_item.h \
-    $$SRCDIR/qwt/qwt_symbol.h \
-    $$SRCDIR/qwt/qwt_knob.h \
-    $$SRCDIR/qwt/qwt_abstract_slider.h \
-    $$SRCDIR/qwt/qwt_round_scale_draw.h \
-    $$SRCDIR/qwt/qwt_double_range.h \
-    $$SRCDIR/qwt/qwt_wheel.h \
-    $$SRCDIR/qwt/qwt_slider.h \
-    $$SRCDIR/qwt/qwt_plot_curve.h \
-    $$SRCDIR/qwt/qwt_plot_seriesitem.h \
-    $$SRCDIR/qwt/qwt_series_data.h \
-    $$SRCDIR/qwt/qwt_point_polar.h \
-    $$SRCDIR/qwt/qwt_curve_fitter.h \
-    $$SRCDIR/qwt/qwt_spline.h \
-    $$SRCDIR/qwt/qwt_dial.h \
-    $$SRCDIR/qwt/qwt_dial_needle.h
-INCLUDEPATH += $$SRCDIR/qwt

+ 6 - 4
tests/datawizard/mpi_like_async.c

@@ -376,6 +376,12 @@ int main(int argc, char **argv)
 	/* We check that the value in the "last" thread is valid */
 	starpu_data_handle_t last_handle = problem_data[nthreads - 1].handle;
 	starpu_data_acquire(last_handle, STARPU_R);
+
+#ifdef STARPU_USE_OPENCL
+        ret = starpu_opencl_unload_opencl(&opencl_program);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
+#endif
+
 	ret = EXIT_SUCCESS;
 	if (problem_data[nthreads - 1].val != (nthreads * niter))
 	{
@@ -389,10 +395,6 @@ int main(int argc, char **argv)
 		starpu_data_unregister(problem_data[t].handle);
 	}
 
-#ifdef STARPU_USE_OPENCL
-        ret = starpu_opencl_unload_opencl(&opencl_program);
-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
-#endif
 	starpu_shutdown();
 
 	STARPU_RETURN(ret);

+ 6 - 1
tests/main/starpu_init.c

@@ -31,6 +31,8 @@ static int check_cpu(int env_cpu, int conf_cpu, int expected_cpu, int *cpu)
 {
 	int ret;
 
+	FPRINTF(stderr, "Testing with env=%d - conf=%d\n", env_cpu, conf_cpu);
+
 	if (env_cpu != -1)
 	{
 		char string[50];
@@ -40,7 +42,10 @@ static int check_cpu(int env_cpu, int conf_cpu, int expected_cpu, int *cpu)
 
 	struct starpu_conf user_conf;
 	starpu_conf_init(&user_conf);
-	user_conf.ncpus = conf_cpu;
+	if (conf_cpu != -1)
+	{
+	     user_conf.ncpus = conf_cpu;
+	}
 	ret = starpu_init(&user_conf);
 
 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;