13 yıl önce · 5b3f428b00
--- a/ChangeLog
+++ b/ChangeLog
@@ -38,6 +38,7 @@ New features:
 
				   * Permit to specify MPI tags for more efficient starpu_mpi_insert_task
			
 
				 
			
 
				 Changes:
			
 
				+  * Fix several memory leaks and race conditions
			
 
				   * Make environment variables take precedence over the configuration
			
 
				         passed to starpu_init()
			
 
				   * Libtool interface versioning has been included in libraries names
			
--- a/README.dev
+++ b/README.dev
@@ -20,6 +20,7 @@ Contents
 
				 - Developer Warnings
			
 
				 - Naming Conventions
			
 
				 - Coding Style
			
 
				+- Error handling
			
 
				 
			
 
				 Developer Warnings
			
 
				 ------------------
			
@@ -51,3 +52,27 @@ Coding Style
 
				 ------------
			
 
				 
			
 
				 * Curly braces always go on a new line
			
 
				+
			
 
				+
			
 
				+
			
 
				+Error handling
			
 
				+--------------
			
 
				+* Use STARPU_ABORT() for catastrophic errors, from which StarPU will never
			
 
				+  recover.
			
 
				+
			
 
				+	switch (node_kind)
			
 
				+	{
			
 
				+		case STARPU_CPU_RAM:
			
 
				+			do_stg();
			
 
				+			break;
			
 
				+		...
			
 
				+		default:
			
 
				+			/* We cannot be here */
			
 
				+			STARPU_ABORT();
			
 
				+	}
			
 
				+
			
 
				+* Use STARPU_ASSERT() to run checks that are very likely to succeed, but still
			
 
				+  are useful for debugging purposes. It should be OK to disable them with
			
 
				+  --enable-fast.
			
 
				+
			
 
				+	STARPU_ASSERT(j->terminated != 0)
			
--- a/configure.ac
+++ b/configure.ac
@@ -1676,6 +1676,6 @@ AC_MSG_NOTICE([
 
				 	       MPI test suite: $running_mpi_check
			
 
				 	       FFT Support: $fft_support
			
 
				 	       GCC plug-in: $build_gcc_plugin
			
 
				-	       GCC plug-in test suite: $run_gcc_plugin_test_suite
			
 
				+	       GCC plug-in test suite (requires GNU Guile): $run_gcc_plugin_test_suite
			
 
				 	       SOCL enabled:  $build_socl
			
 
				 ])
			
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -143,6 +143,11 @@ application: statistics and other post-mortem debugging information are not
 
				 guaranteed to be available until this method has been called.
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftypefun int starpu_asynchronous_copy_disabled ()
			
 
				+Return 1 if asynchronous data transfers between CPU and accelerators
			
 
				+are disabled.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @node Workers' Properties
			
 
				 @section Workers' Properties
			
 
				 
			
@@ -241,6 +246,24 @@ This function returns the identifier of the memory node associated to the
 
				 worker identified by @var{workerid}.
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftp {Data Type} {enum starpu_node_kind}
			
 
				+todo
			
 
				+@table @asis
			
 
				+@item @code{STARPU_UNUSED}
			
 
				+@item @code{STARPU_CPU_RAM}
			
 
				+@item @code{STARPU_CUDA_RAM}
			
 
				+@item @code{STARPU_OPENCL_RAM}
			
 
				+@item @code{STARPU_SPU_LS}
			
 
				+@end table
			
 
				+@end deftp
			
 
				+
			
 
				+@deftypefun {enum starpu_node_kind} starpu_node_get_kind (uint32_t @var{node})
			
 
				+Returns the type of the given node as defined by @code{enum
			
 
				+starpu_node_kind}. For example, when defining a new data interface,
			
 
				+this function should be used in the allocation function to determine
			
 
				+on which device the memory needs to be allocated.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @node Data Library
			
 
				 @section Data Library
			
 
				 
			
@@ -1312,6 +1335,16 @@ State of a task, can be either of
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
 
				+@deftp {Data Type} {struct starpu_buffer_descr}
			
 
				+This type is used to describe a data handle along with an
			
 
				+access mode.
			
 
				+@table @asis
			
 
				+@item @code{starpu_data_handle_t handle} describes a data,
			
 
				+@item @code{enum starpu_access_mode mode} describes its access mode
			
 
				+@end table
			
 
				+@end deftp
			
 
				+
			
 
				+
			
 
				 @deftp {Data Type} {struct starpu_task}
			
 
				 The @code{starpu_task} structure describes a task that can be offloaded on the various
			
 
				 processing units managed by StarPU. It instantiates a codelet. It can either be
			
@@ -1760,6 +1793,23 @@ Lock to protect concurrency between loading from disk (W), updating the values
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
 
				+@deftp {Data Type} {struct starpu_regression_model}
			
 
				+@table @asis
			
 
				+@item @code{double sumlny} sum of ln(measured)
			
 
				+@item @code{double sumlnx} sum of ln(size)
			
 
				+@item @code{double sumlnx2} sum of ln(size)^2
			
 
				+@item @code{unsigned long minx} minimum size
			
 
				+@item @code{unsigned long maxx} maximum size
			
 
				+@item @code{double sumlnxlny} sum of ln(size)*ln(measured)
			
 
				+@item @code{double alpha} 	 estimated = alpha * size ^ beta
			
 
				+@item @code{double beta}
			
 
				+@item @code{unsigned valid} whether the linear regression model is valid (i.e. enough measures)
			
 
				+@item @code{double a, b, c} estimaed = a size ^b + c
			
 
				+@item @code{unsigned nl_valid} whether the non-linear regression model is valid (i.e. enough measures)
			
 
				+@item @code{unsigned nsample} number of sample values for non-linear regression
			
 
				+@end table
			
 
				+@end deftp
			
 
				+
			
 
				 @deftp {Data Type} {struct starpu_per_arch_perfmodel}
			
 
				 contains information about the performance model of a given arch.
			
 
				 
			
--- a/doc/chapters/c-extensions.texi
+++ b/doc/chapters/c-extensions.texi
@@ -10,7 +10,12 @@
 
				 When GCC plug-in support is available, StarPU builds a plug-in for the
			
 
				 GNU Compiler Collection (GCC), which defines extensions to languages of
			
 
				 the C family (C, C++, Objective-C) that make it easier to write StarPU
			
 
				-code@footnote{This feature is only available for GCC 4.5 and later.  It
			
 
				+code@footnote{This feature is only available for GCC 4.5 and later.  You
			
 
				+may need to install a specific @code{-dev} package of your distro, such
			
 
				+as @code{gcc-4.6-plugin-dev} on Debian and derivatives.  In addition,
			
 
				+the plug-in's test suite is only run when
			
 
				+@url{http://www.gnu.org/software/guile/, GNU@tie{}Guile} is found at
			
 
				+@code{configure}-time.  Building the GCC plug-in
			
 
				 can be disabled by configuring with @code{--disable-gcc-extensions}.}.
			
 
				 
			
 
				 Those extensions include syntactic sugar for defining
			
--- a/doc/chapters/mpi-support.texi
+++ b/doc/chapters/mpi-support.texi
@@ -20,10 +20,10 @@ distributed application, by automatically issuing all required data transfers
 
				 according to the task graph and an application-provided distribution.
			
 
				 
			
 
				 @menu
			
 
				-* The API::                     
			
 
				-* Simple Example::              
			
 
				-* MPI Insert Task Utility::         
			
 
				-* MPI Collective Operations::         
			
 
				+* The API::
			
 
				+* Simple Example::
			
 
				+* MPI Insert Task Utility::
			
 
				+* MPI Collective Operations::
			
 
				 @end menu
			
 
				 
			
 
				 @node The API
			
@@ -66,49 +66,91 @@ by calling @code{starpu_mpi_initialize_extended}.
 
				 
			
 
				 @subsection Communication
			
 
				 
			
 
				-TODO
			
 
				+The standard point to point communications of MPI have been
			
 
				+implemented. The semantic is similar to the MPI one, but adapted to
			
 
				+the DSM provided by StarPU. A MPI request will only be submitted when
			
 
				+the data is available in the main memory of the node submitting the
			
 
				+request.
			
 
				 
			
 
				 @deftypefun int starpu_mpi_send (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm})
			
 
				+Performs a standard-mode, blocking send of @var{data_handle} to the
			
 
				+node @var{dest} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_recv (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, MPI_Status *@var{status})
			
 
				+Performs a standard-mode, blocking receive in @var{data_handle} from the
			
 
				+node @var{source} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_isend (starpu_data_handle_t @var{data_handle}, starpu_mpi_req *@var{req}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm})
			
 
				-
			
 
				+Posts a standard-mode, non blocking send of @var{data_handle} to the
			
 
				+node @var{dest} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}. After the call, the pointer to the request
			
 
				+@var{req} can be used to test the completion of the communication.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_irecv (starpu_data_handle_t @var{data_handle}, starpu_mpi_req *@var{req}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm})
			
 
				+Posts a nonblocking receive in @var{data_handle} from the
			
 
				+node @var{source} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}. After the call, the pointer to the request
			
 
				+@var{req} can be used to test the completion of the communication.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_isend_detached (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm}, void (*@var{callback})(void *), void *@var{arg})
			
 
				+Posts a standard-mode, non blocking send of @var{data_handle} to the
			
 
				+node @var{dest} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}. On completion, the @var{callback} function is
			
 
				+called with the argument @var{arg}.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_irecv_detached (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, void (*@var{callback})(void *), void *@var{arg})
			
 
				+Posts a nonblocking receive in @var{data_handle} from the
			
 
				+node @var{source} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}. On completion, the @var{callback} function is
			
 
				+called with the argument @var{arg}.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_wait (starpu_mpi_req *@var{req}, MPI_Status *@var{status})
			
 
				+Returns when the operation identified by request @var{req} is complete.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_test (starpu_mpi_req *@var{req}, int *@var{flag}, MPI_Status *@var{status})
			
 
				+If the operation identified by @var{req} is complete, set @var{flag}
			
 
				+to 1. The @var{status} object is set to contain information on the
			
 
				+completed operation.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_barrier (MPI_Comm @var{comm})
			
 
				+Blocks the caller until all group members of the communicator
			
 
				+@var{comm} have called it.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_isend_detached_unlock_tag (starpu_data_handle_t @var{data_handle}, int @var{dest}, int @var{mpi_tag}, MPI_Comm @var{comm}, starpu_tag_t @var{tag})
			
 
				-When the transfer is completed, the tag is unlocked
			
 
				+Posts a standard-mode, non blocking send of @var{data_handle} to the
			
 
				+node @var{dest} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}. On completion, @var{tag} is unlocked.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_irecv_detached_unlock_tag (starpu_data_handle_t @var{data_handle}, int @var{source}, int @var{mpi_tag}, MPI_Comm @var{comm}, starpu_tag_t @var{tag})
			
 
				+Posts a nonblocking receive in @var{data_handle} from the
			
 
				+node @var{source} using the message tag @code{mpi_tag} within the
			
 
				+communicator @var{comm}. On completion, @var{tag} is unlocked.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_isend_array_detached_unlock_tag (unsigned @var{array_size}, starpu_data_handle_t *@var{data_handle}, int *@var{dest}, int *@var{mpi_tag}, MPI_Comm *@var{comm}, starpu_tag_t @var{tag})
			
 
				-Asynchronously send an array of buffers, and unlocks the tag once all
			
 
				-of them are transmitted.
			
 
				+Posts @var{array_size} standard-mode, non blocking send of the data of
			
 
				+data @var{data_handle[x]} to the node @var{dest[x]} using the message
			
 
				+tag @code{mpi_tag[x]} within the communicator @var{comm[x]}. On
			
 
				+completion of the all the requests, @var{tag} is unlocked.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun int starpu_mpi_irecv_array_detached_unlock_tag (unsigned @var{array_size}, starpu_data_handle_t *@var{data_handle}, int *@var{source}, int *@var{mpi_tag}, MPI_Comm *@var{comm}, starpu_tag_t @var{tag})
			
 
				+Posts @var{array_size} nonblocking receive in @var{data_handle[x]} from the
			
 
				+node @var{source[x]} using the message tag @code{mpi_tag[x]} within the
			
 
				+communicator @var{comm[x]}. On completion of the all the requests,
			
 
				+@var{tag} is unlocked.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @page
			
--- a/doc/devel/handle_refcnt
+++ b/doc/devel/handle_refcnt
@@ -0,0 +1,44 @@
 
				+handle.refcnt usage
			
 
				+===================
			
 
				+
			
 
				+It is the counter for a software rwlock, accounting the number of readers
			
 
				+(mode == STARPU_R) or writers (mode == STARPU_W)
			
 
				+
			
 
				+It is thus used
			
 
				+
			
 
				+- during task duration for each data
			
 
				+- right after completion of a data request
			
 
				+- during application data acquisition
			
 
				+
			
 
				+* Creates a reference:
			
 
				+  * Released by _starpu_notify_data_dependencies():
			
 
				+    - _starpu_attempt_to_submit_data_request() immediately when returning 0, but otherwise (1) when the pending request is given control
			
 
				+  * Released by _starpu_notify_data_dependencies():
			
 
				+    - _starpu_notify_data_dependencies() when may_unlock_data_req_list_head returned a request which can proceed
			
 
				+
			
 
				+* Indirectly creates a reference:
			
 
				+  * Released by starpu_data_release():
			
 
				+    - _starpu_attempt_to_submit_data_request_from_apps() when returning 0
			
 
				+    - starpu_data_acquire_cb()
			
 
				+    - starpu_data_acquire()
			
 
				+  * Released by _starpu_notify_data_dependencies():
			
 
				+    - attempt_to_submit_data_request_from_job() when returning 0
			
 
				+  * Released by _starpu_push_task_output():
			
 
				+    - _submit_job_enforce_data_deps() for each data.
			
 
				+
			
 
				+* Releases a reference:
			
 
				+  - _starpu_notify_data_dependencies()
			
 
				+
			
 
				+* Indirectly releases a reference:
			
 
				+  - _starpu_release_data_on_node()
			
 
				+  - _starpu_push_task_output() for each data
			
 
				+  - starpu_data_release()
			
 
				+
			
 
				+* Temporarily creates a reference, released immediately:
			
 
				+  - _starpu_write_through_data() for needed write-throughs, released by wt_callback
			
 
				+  - _starpu_benchmark_ping_pong()
			
 
				+  - _starpu_data_unregister()
			
 
				+  - _starpu_prefetch_data_on_node_with_mode()
			
 
				+  - starpu_data_invalidate()
			
 
				+
			
 
				+TODO: unclear conversion use
			
--- a/doc/devel/replicate_refcnt
+++ b/doc/devel/replicate_refcnt
@@ -0,0 +1,62 @@
 
				+replicate.refcnt usage
			
 
				+======================
			
 
				+
			
 
				+It is used to make sure the replicate still exists on some memory node.
			
 
				+
			
 
				+It is thus used
			
 
				+
			
 
				+- during task duration for each data
			
 
				+- during data requests, to make sure that the source and target still exist
			
 
				+- during data reduction, to keep the per-worker data available for reduction
			
 
				+- during application data acquisition
			
 
				+
			
 
				+Function detail
			
 
				+---------------
			
 
				+
			
 
				+* makes sure there is at least a reference:
			
 
				+  - copy_data_1_to_1_generic()
			
 
				+  - _starpu_driver_copy_data_1_to_1()
			
 
				+  - _starpu_post_data_request()
			
 
				+  - starpu_handle_data_request()
			
 
				+
			
 
				+* creates a reference:
			
 
				+  * Released by starpu_handle_data_request_completion:
			
 
				+    - _starpu_create_data_request():
			
 
				+      - 1 on dst_replicate
			
 
				+      - 1 on src_replicate if (mode & STARPU_R)
			
 
				+    - _starpu_search_existing_data_request() on src_replicate when turning a request without STARPU_R into one that does.
			
 
				+
			
 
				+  * Released by _starpu_data_end_reduction_mode_terminate:
			
 
				+    - _starpu_data_end_reduction_mode() for each initialized per-worker buffer, creates a reduction_tmp_handles for them
			
 
				+
			
 
				+  * Released by _starpu_release_data_on_node or inside _starpu_prefetch_data_on_node_with_mode:
			
 
				+    - _starpu_fetch_data_on_node() when !detached
			
 
				+
			
 
				+* indirectly creates a reference:
			
 
				+  * Released by _starpu_push_task_output():
			
 
				+    - fetch_data()
			
 
				+    - _starpu_fetch_task_input() through fetch_data() for each task data (one per unique data)
			
 
				+  * Released by starpu_data_release():
			
 
				+    - starpu_data_acquire_cb()
			
 
				+
			
 
				+* releases a reference:
			
 
				+  - starpu_handle_data_request_completion()
			
 
				+    - 1 on dst_replicate
			
 
				+    - 1 on src_replicate if (mode & STARPU_R)
			
 
				+  - _starpu_data_end_reduction_mode_terminate() for each per-worker buffer which has a reduction_tmp_handles
			
 
				+  - _starpu_release_data_on_node()
			
 
				+
			
 
				+* indirectly releases a reference:
			
 
				+  - starpu_handle_data_request() through starpu_handle_data_request_completion() when returning 0.
			
 
				+  - _starpu_push_task_output() for each task data (one per unique data)
			
 
				+  - starpu_data_release()
			
 
				+  - _starpu_handle_node_data_requests() through _starpu_handle_node_data_requests() for each completed request, which is not put back on any list.
			
 
				+
			
 
				+* temporarily increases, and decreases after:
			
 
				+  - transfer_subtree_to_node()
			
 
				+  - _starpu_allocate_interface()
			
 
				+  - _starpu_prefetch_data_on_node_with_mode(), when produced by the call to _starpu_fetch_data_on_node (!detached)
			
 
				+  - starpu_data_prefetch_on_node(), 
			
 
				+  - starpu_data_invalidate()
			
 
				+  - _starpu_data_unregister()
			
 
				+  - _starpu_benchmark_ping_pong()
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -59,7 +59,8 @@ EXTRA_DIST = 					\
 
				 	filters/custom_mf/conversion_opencl.cl  \
			
 
				 	filters/custom_mf/custom_opencl.cl \
			
 
				 	interface/complex_kernels.cl \
			
 
				-	reductions/dot_product_opencl_kernels.cl
			
 
				+	reductions/dot_product_opencl_kernels.cl	\
			
 
				+	scheduler/schedulers.sh
			
 
				 
			
 
				 CLEANFILES = 					\
			
 
				 	gordon/null_kernel_gordon.spuelf
			
@@ -152,6 +153,8 @@ noinst_HEADERS = 				\
 
				 
			
 
				 STARPU_EXAMPLES	=
			
 
				 TESTS		=	$(STARPU_EXAMPLES)
			
 
				+TESTS		+=	scheduler/schedulers.sh
			
 
				+
			
 
				 
			
 
				 if STARPU_HAVE_WINDOWS
			
 
				 check_PROGRAMS	=	$(STARPU_EXAMPLES)
			
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -117,7 +117,7 @@ struct starpu_opencl_program opencl_program;
 
				 
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				-	int ret;
			
 
				+	int ret, exit_value;
			
 
				 
			
 
				 	/* Initialize StarPU */
			
 
				 	ret = starpu_init(NULL);
			
@@ -187,8 +187,8 @@ int main(int argc, char **argv)
 
				 		ret = starpu_task_submit(task);
			
 
				 		if (ret == -ENODEV)
			
 
				 		{
			
 
				-		     ret = 77;
			
 
				-		     goto enodev;
			
 
				+			exit_value = 77;
			
 
				+			goto enodev;
			
 
				 		}
			
 
				 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				 	}
			
@@ -209,8 +209,8 @@ enodev:
 
				 
			
 
				 	FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", vec_y[0], alpha);
			
 
				 
			
 
				-	if (ret != 77)
			
 
				-		ret = check();
			
 
				+	if (exit_value != 77)
			
 
				+		exit_value = check();
			
 
				 
			
 
				 	starpu_free((void *)vec_x);
			
 
				 	starpu_free((void *)vec_y);
			
@@ -222,5 +222,5 @@ enodev:
 
				 	/* Stop StarPU */
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				-	return ret;
			
 
				+	return exit_value;
			
 
				 }
			
--- a/gcc-plugin/src/Makefile.am
+++ b/gcc-plugin/src/Makefile.am
@@ -27,7 +27,11 @@ AM_CPPFLAGS =						\
 
				   -I$(top_srcdir)/include				\
			
 
				   -I$(GCC_PLUGIN_INCLUDE_DIR) -Wall -DYYERROR_VERBOSE=1
			
 
				 
			
 
				-AM_LDFLAGS = -module
			
 
				+AM_LDFLAGS = -module --tag="$(GCC_FOR_PLUGIN_LIBTOOL_TAG)"
			
 
				+
			
 
				+# Use either `gcc' or `g++', whichever is appropriate to build
			
 
				+# plug-ins for this version of GCC.
			
 
				+CC = $(GCC_FOR_PLUGIN)
			
 
				 
			
 
				 showcheck:
			
 
				 	-cat $(TEST_LOGS) /dev/null
			
--- a/gcc-plugin/src/starpu.c
+++ b/gcc-plugin/src/starpu.c
@@ -19,8 +19,6 @@
 
				 
			
 
				 #include <starpu-gcc-config.h>
			
 
				 
			
 
				-int plugin_is_GPL_compatible;
			
 
				-
			
 
				 /* #define ENABLE_TREE_CHECKING 1 */
			
 
				 
			
 
				 #include <gcc-plugin.h>
			
@@ -62,6 +60,13 @@ int plugin_is_GPL_compatible;
 
				 #include <starpu.h>  /* for `STARPU_CPU' & co.  */
			
 
				 
			
 
				 
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+/* Declared with `C' linkage in <gcc-plugin.h>.  */
			
 
				+int plugin_is_GPL_compatible;
			
 
				+
			
 
				 /* The name of this plug-in.  */
			
 
				 static const char plugin_name[] = "starpu";
			
 
				 
			
@@ -1027,14 +1032,19 @@ validate_opencl_argument_type (location_t loc, const_tree type)
 
				 	  static const struct { const char *c; const char *cl; }
			
 
				 	  type_map[] =
			
 
				 	    {
			
 
				+	      /* Scalar types defined in OpenCL 1.2.  See
			
 
				+		 <http://www.khronos.org/files/opencl-1-2-quick-reference-card.pdf>.  */
			
 
				 	      { "char", "cl_char" },
			
 
				 	      { "unsigned char", "cl_uchar" },
			
 
				+	      { "uchar", "cl_uchar" },
			
 
				 	      { "short int", "cl_short" },
			
 
				 	      { "unsigned short", "cl_ushort" },
			
 
				 	      { "int", "cl_int" },
			
 
				 	      { "unsigned int", "cl_uint" },
			
 
				+	      { "uint", "cl_uint" },
			
 
				 	      { "long int", "cl_long" },
			
 
				 	      { "long unsigned int", "cl_ulong" },
			
 
				+	      { "ulong", "cl_ulong" },
			
 
				 	      { "float", "cl_float" },
			
 
				 	      { "double", "cl_double" },
			
 
				 	      { NULL, NULL }
			
@@ -2454,3 +2464,7 @@ plugin_init (struct plugin_name_args *plugin_info,
 
				 
			
 
				   return 0;
			
 
				 }
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/gcc-plugin/tests/opencl-types.c
+++ b/gcc-plugin/tests/opencl-types.c
@@ -25,6 +25,7 @@
 
				 
			
 
				 #include <mocks.h>
			
 
				 #include <unistd.h>
			
 
				+#include <sys/types.h>				  /* for `uint' & co. */
			
 
				 
			
 
				 
			
 
				 /* Make sure `size_t' is flagged.  */
			
@@ -113,6 +114,49 @@ my_uchar_task_opencl (char c[])
 
				 }
			
 
				 
			
 
				 
			
 
				+/* "unsigned int" is aka. "uint".  */
			
 
				+
			
 
				+static void my_uint_task (const uint *c)
			
 
				+  __attribute__ ((task));
			
 
				+static void my_uint_task_cpu (const uint *c)
			
 
				+  __attribute__ ((task_implementation ("cpu", my_uint_task)));
			
 
				+static void my_uint_task_opencl (const uint *c)	  /* no warning */
			
 
				+  __attribute__ ((task_implementation ("opencl", my_uint_task)));
			
 
				+
			
 
				+static void
			
 
				+my_uint_task_cpu (const uint *c)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+my_uint_task_opencl (const uint *c)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/* "unsigned char" is aka. "uchar".  */
			
 
				+
			
 
				+typedef float uchar;				  /* not a real `uchar'! */
			
 
				+
			
 
				+static void my_fake_uchar_task (const uchar *c)
			
 
				+  __attribute__ ((task));
			
 
				+static void my_fake_uchar_task_cpu (const uchar *c)
			
 
				+  __attribute__ ((task_implementation ("cpu", my_fake_uchar_task)));
			
 
				+static void my_fake_uchar_task_opencl (const uchar *c) /* (warning "differs from the same-named OpenCL type") */
			
 
				+  __attribute__ ((task_implementation ("opencl", my_fake_uchar_task)));
			
 
				+
			
 
				+static void
			
 
				+my_fake_uchar_task_cpu (const uchar *c)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+my_fake_uchar_task_opencl (const uchar *c)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				 /* No OpenCL, no problems.  */
			
 
				 
			
 
				 static void my_cool_task (size_t size, long long x[size])
			
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -49,7 +49,7 @@ extern "C"
 
				 #  endif
			
 
				 #endif
			
 
				 
			
 
				-#define STARPU_ABORT()		abort()
			
 
				+#define STARPU_ABORT()		assert(0)
			
 
				 
			
 
				 #if defined(STARPU_HAVE_STRERROR_R)
			
 
				 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err < 0) { \
			
--- a/m4/gcc.m4
+++ b/m4/gcc.m4
@@ -25,9 +25,87 @@ AC_DEFUN([_STARPU_WITH_GCC_PLUGIN_API], [
 
				   CPPFLAGS="$save_CPPFLAGS"
			
 
				 ])
			
 
				 
			
 
				+dnl Set $ac_cv_starpu_gcc_for_plugin to the compiler to use to compile
			
 
				+dnl GCC plug-ins.  It's `gcc' for GCC 4.5/4.6, probably `g++' for 4.7,
			
 
				+dnl and definitely `g++' for 4.8, because the last two build
			
 
				+dnl themselves with `g++', leading to mangled names.
			
 
				+dnl See <http://thread.gmane.org/gmane.comp.gcc.devel/125210> for details.
			
 
				+AC_DEFUN([_STARPU_GCC_PLUGIN_LANGUAGE], [
			
 
				+  AC_CACHE_CHECK([which compiler to use to build GCC plug-ins],
			
 
				+    [ac_cv_starpu_gcc_for_plugin], [
			
 
				+     for GCC_FOR_PLUGIN in "$CC" "$CXX" ""
			
 
				+     do
			
 
				+       if test "x$GCC_FOR_PLUGIN" = "x"; then
			
 
				+	 break;
			
 
				+       fi
			
 
				+
			
 
				+       cat > conftest.c <<END_OF_CONFTEST
			
 
				+	 #include <gcc-plugin.h>
			
 
				+         #include <plugin-version.h>
			
 
				+	 #include <cpplib.h>
			
 
				+
			
 
				+	 int plugin_is_GPL_compatible;
			
 
				+
			
 
				+         extern struct cpp_reader *parse_in; /* C-family front-ends */
			
 
				+
			
 
				+	 static void
			
 
				+	 define_something (void *gcc_data, void *user_data)
			
 
				+	 {
			
 
				+	   cpp_define (parse_in, "CONFTEST_GCC_PLUGIN=1");
			
 
				+	 }
			
 
				+
			
 
				+	 int
			
 
				+	 plugin_init (struct plugin_name_args *plugin_info,
			
 
				+		      struct plugin_gcc_version *version)
			
 
				+	 {
			
 
				+	   if (!plugin_default_version_check (version, &gcc_version))
			
 
				+	     return 1;
			
 
				+	   register_callback ("conftest", PLUGIN_START_UNIT,
			
 
				+			      define_something, NULL);
			
 
				+	   return 0;
			
 
				+	 }
			
 
				+END_OF_CONFTEST
			
 
				+
			
 
				+       # Build the plug-in.
			
 
				+       rm -f conftest.so
			
 
				+       _STARPU_WITH_GCC_PLUGIN_API([
			
 
				+	 _AC_DO(["$GCC_FOR_PLUGIN" "$CPPFLAGS" -fPIC -shared conftest.c -o conftest.so]) || {
			
 
				+	   AC_MSG_ERROR([failed to build a GCC plug-in with `$GCC_FOR_PLUGIN'])
			
 
				+	 }
			
 
				+       ])
			
 
				+
			
 
				+       # Attempt to use it.
			
 
				+       save_CFLAGS="$CFLAGS"
			
 
				+       CFLAGS="-fplugin=$PWD/conftest.so"
			
 
				+       AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
			
 
				+			   #ifndef CONFTEST_GCC_PLUGIN
			
 
				+			   # error plug-in not loaded
			
 
				+			   #endif]], [])],
			
 
				+	 [ac_cv_starpu_gcc_for_plugin="$GCC_FOR_PLUGIN"], [:])
			
 
				+       CFLAGS="$save_CFLAGS"
			
 
				+
			
 
				+       rm -f conftest.so conftest.c
			
 
				+
			
 
				+       if test "x$ac_cv_starpu_gcc_for_plugin" != "x"; then
			
 
				+	 # We're done.
			
 
				+	 break
			
 
				+       fi
			
 
				+     done
			
 
				+
			
 
				+     if test "x$ac_cv_starpu_gcc_for_plugin" = "x"; then
			
 
				+       AC_MSG_RESULT([none])
			
 
				+       AC_MSG_ERROR([could not find a suitable compiler for GCC plug-ins])
			
 
				+     fi
			
 
				+  ])
			
 
				+
			
 
				+  $1="$ac_cv_starpu_gcc_for_plugin"
			
 
				+])
			
 
				+
			
 
				+
			
 
				 dnl Check whether GCC plug-in support is available (GCC 4.5+).
			
 
				 AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
			
 
				   AC_REQUIRE([AC_PROG_CC])
			
 
				+  AC_REQUIRE([AC_PROG_CXX]) dnl for GCC 4.7+
			
 
				   AC_CACHE_CHECK([whether GCC supports plug-ins], [ac_cv_have_gcc_plugins], [
			
 
				     if test "x$GCC" = xyes; then
			
 
				       # ICC 12.1.0 and Clang 3.1 (among others) support `--version',
			
@@ -83,11 +161,24 @@ AC_DEFUN([STARPU_GCC_PLUGIN_SUPPORT], [
 
				       AC_CHECK_HEADERS([c-common.h c-pragma.h c-family/c-common.h c-family/c-pragma.h],
			
 
				         [], [], [#include <gcc-plugin.h>
			
 
				 	         #include <tree.h>])
			
 
				-
			
 
				-      AC_DEFINE_UNQUOTED([STARPU_INCLUDE_DIR],
			
 
				-        ["`eval "echo $includedir"`/starpu/$STARPU_EFFECTIVE_VERSION"],
			
 
				-        [Define to the directory where StarPU's headers are installed.])
			
 
				     ])
			
 
				+
			
 
				+
			
 
				+    AC_DEFINE_UNQUOTED([STARPU_INCLUDE_DIR],
			
 
				+      ["`eval "echo $includedir"`/starpu/$STARPU_EFFECTIVE_VERSION"],
			
 
				+      [Define to the directory where StarPU's headers are installed.])
			
 
				+
			
 
				+    dnl Now, `gcc' or `g++'?
			
 
				+    _STARPU_GCC_PLUGIN_LANGUAGE([GCC_FOR_PLUGIN])
			
 
				+    AC_SUBST([GCC_FOR_PLUGIN])
			
 
				+
			
 
				+    dnl Determine the corresponding Libtool tag.
			
 
				+    if test "$GCC_FOR_PLUGIN" = "$CXX"; then
			
 
				+      GCC_FOR_PLUGIN_LIBTOOL_TAG="CXX"
			
 
				+    else
			
 
				+      GCC_FOR_PLUGIN_LIBTOOL_TAG="CC"
			
 
				+    fi
			
 
				+    AC_SUBST([GCC_FOR_PLUGIN_LIBTOOL_TAG])
			
 
				   fi
			
 
				 
			
 
				   AC_SUBST([GCC_PLUGIN_INCLUDE_DIR])
			
--- a/src/common/starpu_spinlock.c
+++ b/src/common/starpu_spinlock.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -85,6 +85,24 @@ int _starpu_spin_lock(struct _starpu_spinlock *lock)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+int _starpu_spin_checklocked(struct _starpu_spinlock *lock)
			
 
				+{
			
 
				+#ifdef STARPU_SPINLOCK_CHECK
			
 
				+	int ret = pthread_mutex_trylock(&lock->errcheck_lock);
			
 
				+	STARPU_ASSERT(ret != 0);
			
 
				+	return ret == 0;
			
 
				+#else
			
 
				+#ifdef HAVE_PTHREAD_SPIN_LOCK
			
 
				+	int ret = pthread_spin_trylock(&lock->lock);
			
 
				+	STARPU_ASSERT(ret != 0);
			
 
				+	return ret == 0;
			
 
				+#else
			
 
				+	STARPU_ASSERT(lock->taken);
			
 
				+	return !lock->taken;
			
 
				+#endif
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 int _starpu_spin_trylock(struct _starpu_spinlock *lock)
			
 
				 {
			
 
				 #ifdef STARPU_SPINLOCK_CHECK
			
--- a/src/common/starpu_spinlock.h
+++ b/src/common/starpu_spinlock.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -43,6 +43,7 @@ int _starpu_spin_destroy(struct _starpu_spinlock *lock);
 
				 
			
 
				 int _starpu_spin_lock(struct _starpu_spinlock *lock);
			
 
				 int _starpu_spin_trylock(struct _starpu_spinlock *lock);
			
 
				+int _starpu_spin_checklocked(struct _starpu_spinlock *lock);
			
 
				 int _starpu_spin_unlock(struct _starpu_spinlock *lock);
			
 
				 
			
 
				 #endif // __STARPU_SPINLOCK_H__
			
--- a/src/core/dependencies/cg.c
+++ b/src/core/dependencies/cg.c
@@ -152,6 +152,8 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 
				 				unsigned ndeps_completed =
			
 
				 					STARPU_ATOMIC_ADD(&job_successors->ndeps_completed, 1);
			
 
				 
			
 
				+				STARPU_ASSERT(job_successors->ndeps >= ndeps_completed);
			
 
				+
			
 
				 				/* Need to atomically test submitted and check
			
 
				 				 * dependencies, since this is concurrent with
			
 
				 				 * _starpu_submit_job */
			
@@ -183,7 +185,6 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 
				 	unsigned succ;
			
 
				 
			
 
				 	_starpu_spin_lock(&successors->lock);
			
 
				-	successors->terminated = 1;
			
 
				 	/* Note: some thread might be concurrently adding other items */
			
 
				 	for (succ = 0; succ < successors->nsuccs; succ++)
			
 
				 	{
			
@@ -216,5 +217,6 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 
				 
			
 
				 		_starpu_spin_lock(&successors->lock);
			
 
				 	}
			
 
				+	successors->terminated = 1;
			
 
				 	_starpu_spin_unlock(&successors->lock);
			
 
				 }
			
--- a/src/core/dependencies/cg.h
+++ b/src/core/dependencies/cg.h
@@ -45,7 +45,9 @@ struct _starpu_cg_list
 
				 	unsigned ndeps; /* how many deps ? */
			
 
				 	unsigned ndeps_completed; /* how many deps are done ? */
			
 
				 
			
 
				-	/* Whether the completion is finished. */
			
 
				+	/* Whether the completion is finished.
			
 
				+	 * For restartable/restarted tasks, only the first iteration is taken into account here.
			
 
				+	 */
			
 
				 	unsigned terminated;
			
 
				 
			
 
				 	/* List of successors */
			
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -263,6 +263,7 @@ static unsigned unlock_one_requester(struct _starpu_data_requester *r)
 
				 /* The header lock must already be taken by the caller */
			
 
				 void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
			
 
				 {
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				 	/* A data access has finished so we remove a reference. */
			
 
				 	STARPU_ASSERT(handle->refcnt > 0);
			
 
				 	handle->refcnt--;
			
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -233,7 +233,13 @@ void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
 
				 
			
 
				 	/* the tag is now associated to a job */
			
 
				 	_starpu_spin_lock(&tag->lock);
			
 
				-	tag->state = STARPU_ASSOCIATED;
			
 
				+	/* When the same tag may be signaled several times by different tasks,
			
 
				+	 * and it's already done, we should not reset the "done" state.
			
 
				+	 * When the tag is simply used by the same task several times, we have
			
 
				+	 * to do so. */
			
 
				+	if (job->task->regenerate || job->submitted == 2 ||
			
 
				+			tag->state != STARPU_DONE)
			
 
				+		tag->state = STARPU_ASSOCIATED;
			
 
				 	_starpu_spin_unlock(&tag->lock);
			
 
				 }
			
 
				 
			
--- a/src/core/dependencies/task_deps.c
+++ b/src/core/dependencies/task_deps.c
@@ -81,9 +81,11 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 
				 		dep_job = _starpu_get_job_associated_to_task(dep_task);
			
 
				 
			
 
				 		STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself.");
			
 
				-		if (check)
			
 
				-			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Task dependencies have to be set before submission");
			
 
				-		else
			
 
				+		if (check) {
			
 
				+			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Unless it is not to be destroyed automatically, a task dependencies have to be set before submission");
			
 
				+			STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
			
 
				+			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
			
 
				+		} else
			
 
				 			STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination");
			
 
				 
			
 
				 		_STARPU_TRACE_TASK_DEPS(dep_job, job);
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -247,7 +247,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
 
				 
			
 
				 	if (regenerate)
			
 
				 	{
			
 
				-		STARPU_ASSERT(detach && !destroy && !task->synchronous);
			
 
				+		STARPU_ASSERT_MSG(detach && !destroy && !task->synchronous, "Regenerated task must be detached, and not have detroy=1 or synchronous=1");
			
 
				 
			
 
				 		/* We reuse the same job structure */
			
 
				 		int ret = _starpu_submit_job(j);
			
@@ -286,7 +286,11 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
 
				 	else
			
 
				 	{
			
 
				 		/* existing deps (if any) are fulfilled */
			
 
				-		tag->state = STARPU_READY;
			
 
				+		/* If the same tag is being signaled by several tasks, do not
			
 
				+		 * clear a DONE state. If it's the same job submitted several
			
 
				+		 * times with the same tag, we have to do it */
			
 
				+		if (j->submitted == 2 || tag->state != STARPU_DONE)
			
 
				+			tag->state = STARPU_READY;
			
 
				 		/* already prepare for next run */
			
 
				 		tag_successors->ndeps_completed = 0;
			
 
				 		ret = 0;
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -92,7 +92,9 @@ LIST_TYPE(_starpu_job,
 
				 	uint32_t footprint;
			
 
				 
			
 
				 	/* Indicates whether the task associated to that job has already been
			
 
				-	 * submitted to StarPU or not (using starpu_task_submit). */
			
 
				+	 * submitted to StarPU (1) or not (0) (using starpu_task_submit).
			
 
				+	 * Becomes and stays 2 when the task is submitted several times.
			
 
				+	 */
			
 
				 	unsigned submitted;
			
 
				 
			
 
				 	/* Indicates whether the task associated to this job is terminated or
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -225,7 +225,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 	unsigned i;
			
 
				 	int err;
			
 
				 	double sum = 0.0;
			
 
				-	unsigned int node, cpu_node;
			
 
				+	int node, cpu_node;
			
 
				 
			
 
				 	/* We need to get one node per archtype. This is kinda ugly,
			
 
				 	 * but it does the job.
			
@@ -267,28 +267,31 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 		if (!_starpu_data_is_multiformat_handle(handle))
			
 
				 			continue;
			
 
				 
			
 
				+		node = -EINVAL;
			
 
				+#ifdef STARPU_USE_CPU
			
 
				 		if (arch < STARPU_CUDA_DEFAULT)
			
 
				 			node = cpu_node;
			
 
				+#endif
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		else if (arch >= STARPU_CUDA_DEFAULT && arch < STARPU_OPENCL_DEFAULT)
			
 
				+		if (arch >= STARPU_CUDA_DEFAULT && arch < STARPU_OPENCL_DEFAULT)
			
 
				 			node = cuda_node;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-		else if (arch >= STARPU_OPENCL_DEFAULT && arch < STARPU_GORDON_DEFAULT)
			
 
				+		if (arch >= STARPU_OPENCL_DEFAULT && arch < STARPU_GORDON_DEFAULT)
			
 
				 			node = opencl_node;
			
 
				 #endif
			
 
				-		else {
			
 
				-			node = -EINVAL;
			
 
				+		if (node == -EINVAL)
			
 
				 			STARPU_ASSERT(0);
			
 
				-		}
			
 
				 
			
 
				 		if (!_starpu_handle_needs_conversion_task(handle, node))
			
 
				 			continue;
			
 
				 
			
 
				 		conversion_task = _starpu_create_conversion_task(handle, node);
			
 
				 		sum += starpu_task_expected_length(conversion_task, arch, nimpl);
			
 
				+		_starpu_spin_lock(&handle->header_lock);
			
 
				 		handle->refcnt--;
			
 
				 		handle->busy_count--;
			
 
				+		_starpu_spin_unlock(&handle->header_lock);
			
 
				 		starpu_task_deinit(conversion_task);
			
 
				 		free(conversion_task);
			
 
				 	}
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -184,15 +184,8 @@ void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _st
 
				 	if (use_prefetch == -1)
			
 
				 		use_prefetch = 1;
			
 
				 
			
 
				-	/* By default, we don't calibrate */
			
 
				-	unsigned do_calibrate = 0;
			
 
				-	int res = starpu_get_env_number("STARPU_CALIBRATE");
			
 
				-	if (res == -1 && config->user_conf)
			
 
				-		res = config->user_conf->calibrate;
			
 
				-
			
 
				-	do_calibrate = (res < 0)?0:(unsigned)res;
			
 
				-
			
 
				-	_starpu_set_calibrate_flag(do_calibrate);
			
 
				+	/* Set calibrate flag */
			
 
				+	_starpu_set_calibrate_flag(config->conf->calibrate);
			
 
				 
			
 
				 	struct starpu_sched_policy *selected_policy;
			
 
				 	selected_policy = select_sched_policy(config, required_policy);
			
@@ -289,6 +282,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 		int *combined_workerid = combined_worker->combined_workerid;
			
 
				 
			
 
				 		int ret = 0;
			
 
				+		int i;
			
 
				 
			
 
				 		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				 		j->task_size = worker_size;
			
@@ -421,8 +415,10 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 
				 	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
			
 
				 	node_kind = starpu_node_get_kind(node);
			
 
				 
			
 
				+	_starpu_spin_lock(&handle->header_lock);
			
 
				 	handle->refcnt++;
			
 
				 	handle->busy_count++;
			
 
				+	_starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 	struct starpu_multiformat_data_interface_ops *mf_ops;
			
 
				 	mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			
@@ -479,6 +475,7 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
 
				 	struct timespec pop_start_time;
			
 
				 	if (profiling)
			
 
				 		_starpu_clock_gettime(&pop_start_time);
			
 
				+
			
 
				 pick:
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
			
 
				 	/* perhaps there is some local task to be executed first */
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -224,7 +224,10 @@ int _starpu_submit_job(struct _starpu_job *j)
 
				 	/* Need to atomically set submitted to 1 and check dependencies, since
			
 
				 	 * this is concucrent with _starpu_notify_cg */
			
 
				 	j->terminated = 0;
			
 
				-	j->submitted = 1;
			
 
				+	if (!j->submitted)
			
 
				+		j->submitted = 1;
			
 
				+	else
			
 
				+		j->submitted = 2;
			
 
				 
			
 
				 	int ret = _starpu_enforce_deps_and_schedule(j);
			
 
				 
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -55,7 +55,7 @@ static struct starpu_htbl32_node *devices_using_cuda = NULL;
 
				 #  ifdef STARPU_USE_OPENCL
			
 
				 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config *config);
			
 
				 #  endif
			
 
				-static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
			
 
				+static void _starpu_initialize_workers_gpuid(int *explicit_workers_gpuid,
			
 
				                                              int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus);
			
 
				 static unsigned may_bind_automatically = 0;
			
 
				 #endif
			
@@ -69,8 +69,7 @@ static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config
 
				 {
			
 
				 	struct starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-        _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_cuda_gpuid,
			
 
				-                                         config->user_conf==NULL?NULL:(int *)config->user_conf->workers_cuda_gpuid,
			
 
				+        _starpu_initialize_workers_gpuid(config->conf->use_explicit_workers_cuda_gpuid==0?NULL:(int *)config->conf->workers_cuda_gpuid,
			
 
				                                          &(config->current_cuda_gpuid), (int *)topology->workers_cuda_gpuid, "STARPU_WORKERS_CUDAID",
			
 
				                                          topology->nhwcudagpus);
			
 
				 }
			
@@ -81,8 +80,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 {
			
 
				 	struct starpu_machine_topology *topology = &config->topology;
			
 
				 
			
 
				-        _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_opencl_gpuid,
			
 
				-                                         config->user_conf==NULL?NULL:(int *)config->user_conf->workers_opencl_gpuid,
			
 
				+        _starpu_initialize_workers_gpuid(config->conf->use_explicit_workers_opencl_gpuid==0?NULL:(int *)config->conf->workers_opencl_gpuid,
			
 
				                                          &(config->current_opencl_gpuid), (int *)topology->workers_opencl_gpuid, "STARPU_WORKERS_OPENCLID",
			
 
				                                          topology->nhwopenclgpus);
			
 
				 
			
@@ -130,7 +128,7 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 
			
 
				 
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
 
				-static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
			
 
				+static void _starpu_initialize_workers_gpuid(int *explicit_workers_gpuid,
			
 
				                                              int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus)
			
 
				 {
			
 
				 	char *strval;
			
@@ -185,7 +183,7 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	else if (use_explicit_workers_gpuid)
			
 
				+	else if (explicit_workers_gpuid)
			
 
				 	{
			
 
				 		/* we use the explicit value from the user */
			
 
				 		memcpy(workers_gpuid,
			
@@ -196,8 +194,8 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
				 	{
			
 
				 		/* by default, we take a round robin policy */
			
 
				 		if (nhwgpus > 0)
			
 
				-		for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				-			workers_gpuid[i] = (unsigned)(i % nhwgpus);
			
 
				+		     for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				+			  workers_gpuid[i] = (unsigned)(i % nhwgpus);
			
 
				 
			
 
				 		/* StarPU can use sampling techniques to bind threads correctly */
			
 
				 		may_bind_automatically = 1;
			
@@ -273,8 +271,7 @@ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
 
				 	return config->topology.nhwcpus;
			
 
				 }
			
 
				 
			
 
				-static int _starpu_init_machine_config(struct _starpu_machine_config *config,
			
 
				-				struct starpu_conf *user_conf)
			
 
				+static int _starpu_init_machine_config(struct _starpu_machine_config *config)
			
 
				 {
			
 
				 	int i;
			
 
				 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
@@ -290,14 +287,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 	_starpu_initialize_workers_bindid(config);
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-	int ncuda;
			
 
				-	ncuda = starpu_get_env_number("STARPU_NCUDA");
			
 
				-
			
 
				-	/* STARPU_NCUDA is not set. Did the user specify anything ? */
			
 
				-	if (ncuda == -1 && user_conf)
			
 
				-		ncuda = user_conf->ncuda;
			
 
				+	int ncuda = config->conf->ncuda;
			
 
				 
			
 
				-	
			
 
				 	if (ncuda != 0)
			
 
				 	{
			
 
				 		/* The user did not disable CUDA. We need to initialize CUDA
			
@@ -364,12 +355,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-	int nopencl;
			
 
				-	nopencl = starpu_get_env_number("STARPU_NOPENCL");
			
 
				-
			
 
				-	/* STARPU_NOPENCL is not set. Did the user specify anything ? */
			
 
				-	if (nopencl == -1 && user_conf)
			
 
				-		nopencl = user_conf->nopencl;
			
 
				+	int nopencl = config->conf->nopencl;
			
 
				 
			
 
				 	if (nopencl != 0)
			
 
				 	{
			
@@ -440,12 +426,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-	int ngordon;
			
 
				-	ngordon = starpu_get_env_number("STARPU_NGORDON");
			
 
				-
			
 
				-	/* STARPU_NGORDON is not set. Did the user specify anything ? */
			
 
				-	if (ngordon == -1 && user_conf)
			
 
				-		ngordon = user_conf->ngordon;
			
 
				+	int ngordon = config->conf->ngordon;
			
 
				 
			
 
				 	if (ngordon != 0)
			
 
				 	{
			
@@ -488,12 +469,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 /* we put the CPU section after the accelerator : in case there was an
			
 
				  * accelerator found, we devote one cpu */
			
 
				 #ifdef STARPU_USE_CPU
			
 
				-	int ncpu;
			
 
				-	ncpu = starpu_get_env_number("STARPU_NCPUS");
			
 
				-
			
 
				-	/* STARPU_NCPUS is not set. Did the user specify anything ? */
			
 
				-	if (ncpu == -1 && user_conf)
			
 
				-		ncpu = user_conf->ncpus;
			
 
				+	int ncpu = config->conf->ncpus;
			
 
				 
			
 
				 	if (ncpu != 0)
			
 
				 	{
			
@@ -603,11 +579,11 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	else if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
			
 
				+	else if (config->conf->use_explicit_workers_bindid)
			
 
				 	{
			
 
				 		/* we use the explicit value from the user */
			
 
				 		memcpy(topology->workers_bindid,
			
 
				-			config->user_conf->workers_bindid,
			
 
				+			config->conf->workers_bindid,
			
 
				 			STARPU_NMAXWORKERS*sizeof(unsigned));
			
 
				 	}
			
 
				 	else
			
@@ -879,9 +855,7 @@ int _starpu_build_topology(struct _starpu_machine_config *config)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	struct starpu_conf *user_conf = config->user_conf;
			
 
				-
			
 
				-	ret = _starpu_init_machine_config(config, user_conf);
			
 
				+	ret = _starpu_init_machine_config(config);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -372,17 +372,49 @@ int starpu_conf_init(struct starpu_conf *conf)
 
				 	conf->nspus = starpu_get_env_number("STARPU_NGORDON");
			
 
				 	conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
			
 
				 
			
 
				+	if (conf->calibrate == -1)
			
 
				+	     conf->calibrate = 0;
			
 
				+
			
 
				 	conf->use_explicit_workers_bindid = 0; /* TODO */
			
 
				 	conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
			
 
				 	conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
			
 
				 
			
 
				 	conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
			
 
				+	if (conf->single_combined_worker == -1)
			
 
				+	     conf->single_combined_worker = 0;
			
 
				 
			
 
				 	conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void _starpu_conf_set_value_against_environment(char *name, int *value)
			
 
				+{
			
 
				+	int number;
			
 
				+	number = starpu_get_env_number(name);
			
 
				+	if (number != -1)
			
 
				+	{
			
 
				+		*value = number;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void _starpu_conf_check_environment(struct starpu_conf *conf)
			
 
				+{
			
 
				+	char *sched = getenv("STARPU_SCHED");
			
 
				+	if (sched)
			
 
				+	{
			
 
				+		conf->sched_policy_name = sched;
			
 
				+	}
			
 
				+
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_NGORDON", &conf->nspus);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
			
 
				+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
			
 
				+}
			
 
				+
			
 
				 int starpu_init(struct starpu_conf *user_conf)
			
 
				 {
			
 
				 	int ret;
			
@@ -454,21 +486,19 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 
			
 
				 	/* store the pointer to the user explicit configuration during the
			
 
				 	 * initialization */
			
 
				-	config.user_conf = user_conf;
			
 
				-
			
 
				-	if (user_conf)
			
 
				+	if (user_conf == NULL)
			
 
				 	{
			
 
				-	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
			
 
				-	     if (asynchronous_copy_disabled == 1)
			
 
				-		  config.disable_asynchronous_copy = 1;
			
 
				-	     else
			
 
				-		  config.disable_asynchronous_copy = (user_conf->disable_asynchronous_copy == 1);
			
 
				+	     struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
			
 
				+	     starpu_conf_init(conf);
			
 
				+	     config.conf = conf;
			
 
				+	     config.default_conf = 1;
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-	     int asynchronous_copy_disabled = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
			
 
				-	     config.disable_asynchronous_copy = (asynchronous_copy_disabled == 1);
			
 
				+	     config.conf = user_conf;
			
 
				+	     config.default_conf = 0;
			
 
				 	}
			
 
				+	_starpu_conf_check_environment(config.conf);
			
 
				 
			
 
				 	_starpu_init_all_sched_ctxs(&config);
			
 
				 	ret = _starpu_build_topology(&config);
			
@@ -667,6 +697,10 @@ void starpu_shutdown(void)
 
				 	_STARPU_PTHREAD_COND_SIGNAL(&init_cond);
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
			
 
				 
			
 
				+	/* Clear memory if it was allocated by StarPU */
			
 
				+	if (config.default_conf)
			
 
				+	     free(config.conf);
			
 
				+
			
 
				 	_STARPU_DEBUG("Shutdown finished\n");
			
 
				 }
			
 
				 
			
@@ -723,7 +757,7 @@ unsigned starpu_spu_worker_get_count(void)
 
				 
			
 
				 int starpu_asynchronous_copy_disabled()
			
 
				 {
			
 
				-	return config.disable_asynchronous_copy;
			
 
				+	return config.conf->disable_asynchronous_copy;
			
 
				 }
			
 
				 
			
 
				 /* When analyzing performance, it is useful to see what is the processing unit
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -158,16 +158,16 @@ struct _starpu_machine_config
 
				 	 * the result of (worker_mask & STARPU_CUDA). */
			
 
				 	uint32_t worker_mask;
			
 
				 
			
 
				-	/* in case the user gives an explicit configuration, this is only valid
			
 
				-	 * during starpu_init. */
			
 
				-	struct starpu_conf *user_conf;
			
 
				+        /* either the user given configuration passed to starpu_init or a default configuration */
			
 
				+	struct starpu_conf *conf;
			
 
				+	/* set to 1 if no conf has been given by the user, it
			
 
				+	 * indicates the memory allocated for the default
			
 
				+	 * configuration should be freed on shutdown */
			
 
				+	int default_conf;
			
 
				 
			
 
				 	/* this flag is set until the runtime is stopped */
			
 
				 	unsigned running;
			
 
				 
			
 
				-        /* indicate if the asynchronous copies should be disabled */
			
 
				-        int disable_asynchronous_copy;
			
 
				-
			
 
				 	/* all the sched ctx of the current instance of starpu */
			
 
				 	struct _starpu_sched_ctx sched_ctxs[STARPU_NMAX_SCHED_CTXS];
			
 
				 };
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -282,6 +282,8 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 
				 
			
 
				 	if (r)
			
 
				 	{
			
 
				+		_starpu_spin_checklocked(&r->handle->header_lock);
			
 
				+
			
 
				 		_starpu_spin_lock(&r->lock);
			
 
				 
			
 
				                 /* perhaps we need to "upgrade" the request */
			
@@ -333,13 +335,15 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 
				  * 		    else (invalid,owner->shared)
			
 
				  */
			
 
				 
			
 
				-/* This function is called with handle's header lock taken */
			
 
				 struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_handle_t handle,
			
 
				 								  struct _starpu_data_replicate *dst_replicate,
			
 
				 								  enum starpu_access_mode mode, unsigned is_prefetch,
			
 
				 								  unsigned async,
			
 
				 								  void (*callback_func)(void *), void *callback_arg)
			
 
				 {
			
 
				+	/* This function is called with handle's header lock taken */
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				+
			
 
				 	unsigned requesting_node = dst_replicate->memory_node;
			
 
				 
			
 
				 	if (dst_replicate->state != STARPU_INVALID)
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -90,6 +90,8 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha
 
				 {
			
 
				 	struct _starpu_data_request *r = _starpu_data_request_new();
			
 
				 
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				+
			
 
				 	_starpu_spin_init(&r->lock);
			
 
				 
			
 
				 	r->handle = handle;
			
@@ -235,6 +237,8 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 
				 #ifdef STARPU_MEMORY_STATUS
			
 
				 	enum _starpu_cache_state old_src_replicate_state = src_replicate->state;
			
 
				 #endif
			
 
				+
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				 	_starpu_update_data_state(handle, r->dst_replicate, mode);
			
 
				 
			
 
				 #ifdef STARPU_MEMORY_STATUS
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -331,7 +331,7 @@ static ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, uint32_t dst_
 
				 		}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	/* allocation succeeded */
			
@@ -361,7 +361,7 @@ fail_rowptr:
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 fail_colind:
			
@@ -380,7 +380,7 @@ fail_colind:
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 fail_nzval:
			
@@ -416,7 +416,7 @@ static void free_bcsr_buffer_on_node(void *data_interface, uint32_t node)
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -349,7 +349,7 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 
				 			}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	if (!fail)
			
@@ -401,7 +401,7 @@ static void free_block_buffer_on_node(void *data_interface, uint32_t node)
 
				                         break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -305,7 +305,7 @@ static ssize_t allocate_csr_buffer_on_node(void *data_interface_, uint32_t dst_n
 
				 			}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	/* allocation succeeded */
			
@@ -340,7 +340,7 @@ fail_rowptr:
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 fail_colind:
			
@@ -364,7 +364,7 @@ fail_colind:
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 fail_nzval:
			
@@ -409,7 +409,7 @@ static void free_csr_buffer_on_node(void *data_interface, uint32_t node)
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -328,7 +328,7 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
				 			}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	if (!fail)
			
@@ -379,7 +379,7 @@ static void free_matrix_buffer_on_node(void *data_interface, uint32_t node)
 
				                         break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -363,7 +363,7 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 			}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	if (fail)
			
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -247,7 +247,7 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 
				 			}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	if (fail)
			
@@ -281,7 +281,7 @@ static void free_variable_buffer_on_node(void *data_interface, uint32_t node)
 
				                         break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -283,7 +283,7 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 
				 			}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				 	if (fail)
			
@@ -326,7 +326,7 @@ static void free_vector_buffer_on_node(void *data_interface, uint32_t node)
 
				                         break;
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -158,6 +158,7 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 
				 #endif
			
 
				 			/* TODO use request !! */
			
 
				 			/* Take temporary references on the replicates */
			
 
				+			_starpu_spin_checklocked(&handle->header_lock);
			
 
				 			src_replicate->refcnt++;
			
 
				 			dst_replicate->refcnt++;
			
 
				 			handle->busy_count+=2;
			
@@ -708,11 +709,12 @@ static size_t _starpu_get_global_mem_size(int dst_node)
 
				 	switch(kind)
			
 
				 	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				-#ifdef STARPU_DEVEL
			
 
				-#warning to be fixed
			
 
				-#endif
			
 
				-			global_mem_size = 64*1024*1024;
			
 
				-			break;
			
 
				+		{
			
 
				+			/* We should probably never get here : if there is no
			
 
				+ 			 * space left in RAM, the operating system should swap
			
 
				+			 * to disk for us. */
			
 
				+			STARPU_ABORT();
			
 
				+		}
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case STARPU_CUDA_RAM:
			
 
				 		{
			
@@ -730,7 +732,7 @@ static size_t _starpu_get_global_mem_size(int dst_node)
 
				 		}
			
 
				 #endif
			
 
				 		default:
			
 
				-			STARPU_ASSERT(0);
			
 
				+			STARPU_ABORT();
			
 
				 	}
			
 
				 	return global_mem_size;
			
 
				 }
			
@@ -752,6 +754,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 
				 	unsigned attempts = 0;
			
 
				 	ssize_t allocated_memory;
			
 
				 
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				+
			
 
				 	_starpu_data_allocation_inc_stats(dst_node);
			
 
				 
			
 
				 #ifdef STARPU_USE_ALLOCATION_CACHE
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -113,6 +113,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 	unsigned replicate_count = 0;
			
 
				 	starpu_data_handle_t replicate_array[1 + STARPU_NMAXWORKERS];
			
 
				 
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				+
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				 		if (handle->per_node[node].state != STARPU_INVALID)
			
@@ -328,6 +330,9 @@ void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle)
 
				 
			
 
				 //	fprintf(stderr, "_starpu_data_end_reduction_mode_terminate\n");
			
 
				 	unsigned worker;
			
 
				+
			
 
				+	_starpu_spin_checklocked(&handle->header_lock);
			
 
				+
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				 		struct _starpu_data_replicate *replicate;
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -32,13 +32,17 @@ int starpu_data_request_allocation(starpu_data_handle_t handle, uint32_t node)
 
				 
			
 
				 	STARPU_ASSERT(handle);
			
 
				 
			
 
				-	r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, STARPU_NONE, 0, 0);
			
 
				+	_starpu_spin_lock(&handle->header_lock);
			
 
				+
			
 
				+	r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, STARPU_NONE, 0, 1);
			
 
				 
			
 
				 	/* we do not increase the refcnt associated to the request since we are
			
 
				 	 * not waiting for its termination */
			
 
				 
			
 
				 	_starpu_post_data_request(r, node);
			
 
				 
			
 
				+	_starpu_spin_unlock(&handle->header_lock);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -212,9 +216,11 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 
				 	{
			
 
				 		struct starpu_task *task = _starpu_create_conversion_task(handle, 0);
			
 
				 		int ret;
			
 
				+		_starpu_spin_lock(&handle->header_lock);
			
 
				 		handle->refcnt--;
			
 
				 		handle->busy_count--;
			
 
				 		handle->mf_node = 0;
			
 
				+		_starpu_spin_unlock(&handle->header_lock);
			
 
				 		task->synchronous = 1;
			
 
				 		ret = _starpu_task_submit_internally(task);
			
 
				 		STARPU_ASSERT(!ret);
			
--- a/src/sched_policies/detect_combined_workers.c
+++ b/src/sched_policies/detect_combined_workers.c
@@ -478,7 +478,7 @@ void _starpu_sched_find_worker_combinations(struct starpu_machine_topology *topo
 
				 {
			
 
				     struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 
			
 
				-    if ((config->user_conf && config->user_conf->single_combined_worker > 0) || starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER") > 0)
			
 
				+    if (config->conf->single_combined_worker > 0)
			
 
				 	combine_all_cpu_workers(topology);
			
 
				     else
			
 
				     {
			
--- a/starpu-top/StarPU-Top-qwt-embed.pri
+++ b/starpu-top/StarPU-Top-qwt-embed.pri
@@ -1,86 +0,0 @@
 
				-SRCDIR=.

			
 
				-SOURCES += \

			
 
				-    $$SRCDIR/qwt/qwt_thermo.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_math.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_scale_engine.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_scale_draw.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_scale_map.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_scale_div.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_painter.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_abstract_scale.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_abstract_scale_draw.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_interval.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_text.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_text_engine.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_clipper.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_color_map.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_dict.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_layout.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_scale_widget.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_text_label.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_legend.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_dyngrid_layout.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_canvas.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_item.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_legend_item.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_symbol.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_axis.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_xml.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_knob.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_abstract_slider.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_round_scale_draw.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_double_range.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_wheel.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_slider.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_curve.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_plot_seriesitem.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_series_data.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_point_polar.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_curve_fitter.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_spline.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_dial.cpp \

			
 
				-    $$SRCDIR/qwt/qwt_dial_needle.cpp

			
 
				-HEADERS += \

			
 
				-    $$SRCDIR/qwt/qwt_thermo.h \

			
 
				-    $$SRCDIR/qwt/qwt_math.h \

			
 
				-    $$SRCDIR/qwt/qwt_scale_engine.h \

			
 
				-    $$SRCDIR/qwt/qwt_scale_draw.h \

			
 
				-    $$SRCDIR/qwt/qwt_scale_map.h \

			
 
				-    $$SRCDIR/qwt/qwt_scale_div.h \

			
 
				-    $$SRCDIR/qwt/qwt_painter.h \

			
 
				-    $$SRCDIR/qwt/qwt_global.h \

			
 
				-    $$SRCDIR/qwt/qwt_abstract_scale.h \

			
 
				-    $$SRCDIR/qwt/qwt_abstract_scale_draw.h \

			
 
				-    $$SRCDIR/qwt/qwt_interval.h \

			
 
				-    $$SRCDIR/qwt/qwt_text.h \

			
 
				-    $$SRCDIR/qwt/qwt_text_engine.h \

			
 
				-    $$SRCDIR/qwt/qwt_clipper.h \

			
 
				-    $$SRCDIR/qwt/qwt_color_map.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot_dict.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot_layout.h \

			
 
				-    $$SRCDIR/qwt/qwt_scale_widget.h \

			
 
				-    $$SRCDIR/qwt/qwt_text_label.h \

			
 
				-    $$SRCDIR/qwt/qwt_legend.h \

			
 
				-    $$SRCDIR/qwt/qwt_dyngrid_layout.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot_canvas.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot_item.h \

			
 
				-    $$SRCDIR/qwt/qwt_legend_itemmanager.h \

			
 
				-    $$SRCDIR/qwt/qwt_legend_item.h \

			
 
				-    $$SRCDIR/qwt/qwt_symbol.h \

			
 
				-    $$SRCDIR/qwt/qwt_knob.h \

			
 
				-    $$SRCDIR/qwt/qwt_abstract_slider.h \

			
 
				-    $$SRCDIR/qwt/qwt_round_scale_draw.h \

			
 
				-    $$SRCDIR/qwt/qwt_double_range.h \

			
 
				-    $$SRCDIR/qwt/qwt_wheel.h \

			
 
				-    $$SRCDIR/qwt/qwt_slider.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot_curve.h \

			
 
				-    $$SRCDIR/qwt/qwt_plot_seriesitem.h \

			
 
				-    $$SRCDIR/qwt/qwt_series_data.h \

			
 
				-    $$SRCDIR/qwt/qwt_point_polar.h \

			
 
				-    $$SRCDIR/qwt/qwt_curve_fitter.h \

			
 
				-    $$SRCDIR/qwt/qwt_spline.h \

			
 
				-    $$SRCDIR/qwt/qwt_dial.h \

			
 
				-    $$SRCDIR/qwt/qwt_dial_needle.h

			
 
				-INCLUDEPATH += $$SRCDIR/qwt

			
--- a/tests/datawizard/mpi_like_async.c
+++ b/tests/datawizard/mpi_like_async.c
@@ -376,6 +376,12 @@ int main(int argc, char **argv)
 
				 	/* We check that the value in the "last" thread is valid */
			
 
				 	starpu_data_handle_t last_handle = problem_data[nthreads - 1].handle;
			
 
				 	starpu_data_acquire(last_handle, STARPU_R);
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				+#endif
			
 
				+
			
 
				 	ret = EXIT_SUCCESS;
			
 
				 	if (problem_data[nthreads - 1].val != (nthreads * niter))
			
 
				 	{
			
@@ -389,10 +395,6 @@ int main(int argc, char **argv)
 
				 		starpu_data_unregister(problem_data[t].handle);
			
 
				 	}
			
 
				 
			
 
				-#ifdef STARPU_USE_OPENCL
			
 
				-        ret = starpu_opencl_unload_opencl(&opencl_program);
			
 
				-        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
			
 
				-#endif
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	STARPU_RETURN(ret);
			
--- a/tests/main/starpu_init.c
+++ b/tests/main/starpu_init.c
@@ -31,6 +31,8 @@ static int check_cpu(int env_cpu, int conf_cpu, int expected_cpu, int *cpu)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				+	FPRINTF(stderr, "Testing with env=%d - conf=%d\n", env_cpu, conf_cpu);
			
 
				+
			
 
				 	if (env_cpu != -1)
			
 
				 	{
			
 
				 		char string[50];
			
@@ -40,7 +42,10 @@ static int check_cpu(int env_cpu, int conf_cpu, int expected_cpu, int *cpu)
 
				 
			
 
				 	struct starpu_conf user_conf;
			
 
				 	starpu_conf_init(&user_conf);
			
 
				-	user_conf.ncpus = conf_cpu;
			
 
				+	if (conf_cpu != -1)
			
 
				+	{
			
 
				+	     user_conf.ncpus = conf_cpu;
			
 
				+	}
			
 
				 	ret = starpu_init(&user_conf);
			
 
				 
			
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;