13 years ago · 058743e72a
--- a/.gitignore
+++ b/.gitignore
@@ -248,3 +248,40 @@ starpu.log
 
																 /tools/starpu_perfmodel_plot.1
															
 
																 /starpu-1.0.pc
															
 
																 /gcc-plugin/examples/cholesky/cholesky
															
 
																+/gcc-plugin/tests/*.log
															
 
																+/test/*.log
															
 
																+/examples/*.log
															
 
																+/tests/main/declare_deps_after_submission
															
 
																+/tests/main/declare_deps_after_submission_synchronous
															
 
																+/tests/main/declare_deps_in_callback
															
 
																+/tests/main/deprecated
															
 
																+/tests/main/deprecated_buffer
															
 
																+/tests/main/deprecated_func
															
 
																+/tests/main/empty_task
															
 
																+/tests/main/empty_task_chain
															
 
																+/tests/main/empty_task_sync_point
															
 
																+/tests/main/empty_task_sync_point_tasks
															
 
																+/tests/main/execute_on_a_specific_worker
															
 
																+/tests/main/get_current_task
															
 
																+/tests/main/insert_task
															
 
																+/tests/main/multiformat_data_release
															
 
																+/tests/main/multiformat_handle_conversion
															
 
																+/tests/main/multithreaded
															
 
																+/tests/main/multithreaded_init
															
 
																+/tests/main/regenerate
															
 
																+/tests/main/restart
															
 
																+/tests/main/starpu_init
															
 
																+/tests/main/starpu_task_bundle
															
 
																+/tests/main/starpu_task_wait
															
 
																+/tests/main/starpu_task_wait_for_all
															
 
																+/tests/main/starpu_worker_exists
															
 
																+/tests/main/static_restartable
															
 
																+/tests/main/static_restartable_tag
															
 
																+/tests/main/static_restartable_using_initializer
															
 
																+/tests/main/subgraph_repeat
															
 
																+/tests/main/subgraph_repeat_regenerate
															
 
																+/tests/main/tag_wait_api
															
 
																+/tests/main/task_wait_api
															
 
																+/tests/main/wait_all_regenerable_tasks
															
 
																+/tools/starpu_workers_activity
															
 
																+/tests/datawizard/interfaces/copy_interfaces
															
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -597,7 +597,8 @@ parallel CPU implementation of the computation to be achieved. This can also be
 
																 useful to improve the load balance between slow CPUs and fast GPUs: since CPUs
															
 
																 work collectively on a single task, the completion time of tasks on CPUs become
															
 
																 comparable to the completion time on GPUs, thus relieving from granularity
															
 
																-discrepancy concerns.
															
 
																+discrepancy concerns. Hwloc support needs to be enabled to get good performance,
															
 
																+otherwise StarPU will not know how to better group cores.
															
 
																 Two modes of execution exist to accomodate with existing usages.
															
--- a/examples/basic_examples/block.c
+++ b/examples/basic_examples/block.c
@@ -60,6 +60,8 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
 
																         if (STARPU_UNLIKELY(ret == -ENODEV))
															
 
																 	{
															
 
																                 FPRINTF(stderr, "No worker may execute this task\n");
															
 
																+		task->destroy = 0;
															
 
																+                starpu_task_destroy(task);
															
 
																                 return 1;
															
 
																 	}
															
--- a/examples/basic_examples/vector_scal_c.c
+++ b/examples/basic_examples/vector_scal_c.c
@@ -53,13 +53,14 @@ static struct starpu_codelet cl =
 
																 	.model = &vector_scal_model
															
 
																 };
															
 
																-void compute_(int *F_NX, float *vector)
															
 
																+int compute_(int *F_NX, float *vector)
															
 
																 {
															
 
																         int NX = *F_NX;
															
 
																 	int ret;
															
 
																 	/* Initialize StarPU with default configuration */
															
 
																 	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return 77;
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* Tell StaPU to associate the "vector" vector with the "vector_handle"
															
@@ -98,7 +99,7 @@ void compute_(int *F_NX, float *vector)
 
																 	/* execute the task on any eligible computational ressource */
															
 
																 	ret = starpu_task_submit(task);
															
 
																-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	/* StarPU does not need to manipulate the array anymore so we can stop
															
 
																  	 * monitoring it */
															
@@ -106,4 +107,6 @@ void compute_(int *F_NX, float *vector)
 
																 	/* terminate StarPU, no task can be submitted after */
															
 
																 	starpu_shutdown();
															
 
																+
															
 
																+	return ret;
															
 
																 }
															
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -297,6 +297,11 @@ int main(int argc, char **argv)
 
																 	parse_args(argc, argv);
															
 
																+#ifdef STARPU_SLOW_MACHINE
															
 
																+	size /= 4;
															
 
																+	nblocks /= 4;
															
 
																+#endif
															
 
																+
															
 
																 	ret = starpu_init(NULL);
															
 
																 	if (ret == -ENODEV)
															
 
																 		return 77;
															
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -275,6 +275,10 @@ int main(int argc, char **argv)
 
																 	parse_args(argc, argv);
															
 
																+#ifdef STARPU_SLOW_MACHINE
															
 
																+	niter /= 10;
															
 
																+#endif
															
 
																+
															
 
																 	ret = starpu_init(NULL);
															
 
																 	if (ret == -ENODEV)
															
 
																 		return 77;
															
--- a/examples/opt/pi/pi.c
+++ b/examples/opt/pi/pi.c
@@ -88,6 +88,7 @@ int main(int argc, char **argv)
 
																 	parse_args(argc, argv);
															
 
																 	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return 77;
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* Initialize the random number generator */
															
--- a/examples/opt/pi/pi_redux.c
+++ b/examples/opt/pi/pi_redux.c
@@ -189,19 +189,6 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 
																 }
															
 
																 #endif
															
 
																-/* The amount of work does not depend on the data size at all :) */
															
 
																-static size_t size_base(struct starpu_task *task, unsigned nimpl)
															
 
																-{
															
 
																-	return NSHOT_PER_TASK;
															
 
																-}
															
 
																-
															
 
																-static struct starpu_perfmodel model =
															
 
																-{
															
 
																-	.type = STARPU_HISTORY_BASED,
															
 
																-	.size_base = size_base,
															
 
																-	.symbol = "monte_carlo_pi_redux"
															
 
																-};
															
 
																-
															
 
																 static struct starpu_codelet pi_cl =
															
 
																 {
															
 
																 	.where =
															
@@ -319,6 +306,7 @@ int main(int argc, char **argv)
 
																 	parse_args(argc, argv);
															
 
																 	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return 77;
															
 
																 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* Launch a Random Number Generator (RNG) on each worker */
															
--- a/gcc-plugin/src/starpu.c
+++ b/gcc-plugin/src/starpu.c
@@ -1735,6 +1735,12 @@ build_codelet_initializer (tree task_decl)
 
																     return init;
															
 
																   }
															
 
																+  tree codelet_name ()
															
 
																+  {
															
 
																+    const char *name = IDENTIFIER_POINTER (DECL_NAME (task_decl));
															
 
																+    return build_string_literal (strlen (name) + 1, name);
															
 
																+  }
															
 
																+
															
 
																   tree where_init (tree impls)
															
 
																   {
															
 
																     tree impl;
															
@@ -1841,7 +1847,8 @@ build_codelet_initializer (tree task_decl)
 
																   impls = task_implementation_list (task_decl);
															
 
																   inits =
															
 
																-    chain_trees (field_initializer ("where", where_init (impls)),
															
 
																+    chain_trees (field_initializer ("name", codelet_name ()),
															
 
																+		 field_initializer ("where", where_init (impls)),
															
 
																 		 field_initializer ("nbuffers", pointer_arg_count ()),
															
 
																 		 field_initializer ("modes", access_mode_array ()),
															
 
																 		 field_initializer ("cpu_funcs",
															
--- a/gcc-plugin/tests/Makefile.am
+++ b/gcc-plugin/tests/Makefile.am
@@ -74,4 +74,4 @@ check-hook:
 
																 endif !HAVE_GUILE
															
 
																 showcheck:
															
 
																-	-cat $(TEST_LOGS) /dev/null
															
 
																+	-cat $(TEST_LOGS) /dev/null
															
--- a/gcc-plugin/tests/mocks.h
+++ b/gcc-plugin/tests/mocks.h
@@ -57,6 +57,7 @@ const struct insert_task_argument *expected_insert_task_arguments;
 
																 int
															
 
																 starpu_insert_task (struct starpu_codelet *cl, ...)
															
 
																 {
															
 
																+  assert (cl->name != NULL && strlen (cl->name) > 0);
															
 
																   assert (cl->where == (STARPU_CPU | STARPU_OPENCL));
															
 
																   /* TODO: Call `cpu_func' & co. and check whether they do the right
															
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -211,9 +211,9 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 
																 			 * _starpu_compar_handles.  */
															
 
																 			continue;
															
 
																+                j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
															
 
																                 if (attempt_to_submit_data_request_from_job(j, buf))
															
 
																 		{
															
 
																-                        j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
															
 
																 			return 1;
															
 
																                 }
															
 
																 	}
															
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -106,6 +106,7 @@ static void _starpu_tag_free(void *_tag)
 
																 #endif
															
 
																 		_starpu_spin_unlock(&tag->lock);
															
 
																+		_starpu_spin_destroy(&tag->lock);
															
 
																 		free(tag);
															
 
																 	}
															
@@ -128,6 +129,10 @@ void _starpu_tag_clear(void)
 
																 {
															
 
																 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
															
 
																+	/* XXX: _starpu_tag_free takes the tag spinlocks while we are keeping
															
 
																+	 * the global rwlock. This contradicts the lock order of
															
 
																+	 * starpu_tag_wait_array. Should not be a problem in practice since
															
 
																+	 * _starpu_tag_clear is called at shutdown only. */
															
 
																 	_starpu_htbl_clear_tags(&tag_htbl, 0, _starpu_tag_free);
															
 
																 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock);
															
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -498,8 +498,10 @@ static void benchmark_all_gpu_devices(void)
 
																 	hwloc_topology_load(hwtopology);
															
 
																 #endif
															
 
																-	/* TODO: use hwloc */
															
 
																-#ifdef __linux__
															
 
																+#ifdef STARPU_HAVE_HWLOC
															
 
																+	hwloc_cpuset_t former_cpuset = hwloc_bitmap_alloc();
															
 
																+	hwloc_get_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD);
															
 
																+#elif __linux__
															
 
																 	/* Save the current cpu binding */
															
 
																 	cpu_set_t former_process_affinity;
															
 
																 	int ret;
															
@@ -545,8 +547,9 @@ static void benchmark_all_gpu_devices(void)
 
																 	}
															
 
																 #endif
															
 
																-	/* FIXME: use hwloc */
															
 
																-#ifdef __linux__
															
 
																+#ifdef STARPU_HAVE_HWLOC
															
 
																+	hwloc_set_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD);
															
 
																+#elif __linux__
															
 
																 	/* Restore the former affinity */
															
 
																 	ret = sched_setaffinity(0, sizeof(former_process_affinity), &former_process_affinity);
															
 
																 	if (ret)
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -966,10 +966,11 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
																 	{
															
 
																 		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
															
 
																 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
															
 
																-		struct starpu_htbl32_node *history = per_arch_model->history;
															
 
																+		struct starpu_htbl32_node *history;
															
 
																 		struct starpu_history_entry *entry;
															
 
																 		_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
															
 
																+		history = per_arch_model->history;
															
 
																 		entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
															
 
																 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
															
@@ -997,11 +998,13 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
																 	per_arch_model = &model->per_arch[arch][nimpl];
															
 
																+	_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
															
 
																 	history = per_arch_model->history;
															
 
																-	if (!history)
															
 
																+	if (!history) {
															
 
																+		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
															
 
																 		return NAN;
															
 
																+	}
															
 
																-	_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
															
 
																 	entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
															
 
																 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -290,7 +290,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
																 	_starpu_initialize_workers_bindid(config);
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	int ncuda = -1;
															
 
																+	int ncuda;
															
 
																 	ncuda = starpu_get_env_number("STARPU_NCUDA");
															
 
																 	/* STARPU_NCUDA is not set. Did the user specify anything ? */
															
@@ -303,14 +303,30 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
																 		/* The user did not disable CUDA. We need to initialize CUDA
															
 
																  		 * early to count the number of devices */
															
 
																 		_starpu_init_cuda();
															
 
																+		int nb_devices = _starpu_get_cuda_device_count();
															
 
																 		if (ncuda == -1)
															
 
																 		{
															
 
																 			/* Nothing was specified, so let's choose ! */
															
 
																-			ncuda = STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
															
 
																+			ncuda = nb_devices;
															
 
																+			if (ncuda > STARPU_MAXCUDADEVS)
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n",
															
 
																+					nb_devices, STARPU_MAXCUDADEVS);
															
 
																+				ncuda = STARPU_MAXCUDADEVS;
															
 
																+			}
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																+			if (ncuda > nb_devices)
															
 
																+			{
															
 
																+				/* The user requires more CUDA devices than there is available */
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d CUDA devices requested. Only %d available.\n",
															
 
																+					ncuda, nb_devices);
															
 
																+				ncuda = nb_devices;
															
 
																+			}
															
 
																 			/* Let's make sure this value is OK. */
															
 
																 			if (ncuda > STARPU_MAXCUDADEVS)
															
 
																 			{
															
@@ -319,14 +335,6 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
																 					ncuda, STARPU_MAXCUDADEVS);
															
 
																 				ncuda = STARPU_MAXCUDADEVS;
															
 
																 			}
															
 
																-
															
 
																-			if ((unsigned) ncuda > _starpu_get_cuda_device_count())
															
 
																-			{
															
 
																-				fprintf(stderr,
															
 
																-					"# Warning: %d CUDA devices requested. Only %d available.\n",
															
 
																-					ncuda, _starpu_get_cuda_device_count());
															
 
																-				ncuda = _starpu_get_cuda_device_count();
															
 
																-			}
															
 
																 		}
															
 
																 	}
															
@@ -369,12 +377,19 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
																  		 * early to count the number of devices */
															
 
																 		_starpu_opencl_init();
															
 
																 		int nb_devices;
															
 
																-		nb_devices = STARPU_MIN(_starpu_opencl_get_device_count(), STARPU_MAXOPENCLDEVS);
															
 
																+		nb_devices = _starpu_opencl_get_device_count();
															
 
																 		if (nopencl == -1)
															
 
																 		{
															
 
																 			/* Nothing was specified, so let's choose ! */
															
 
																 			nopencl = nb_devices;
															
 
																+			if (nopencl > STARPU_MAXOPENCLDEVS)
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n",
															
 
																+					nb_devices, STARPU_MAXOPENCLDEVS);
															
 
																+				nopencl = STARPU_MAXOPENCLDEVS;
															
 
																+			}
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
@@ -385,8 +400,9 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
																 				fprintf(stderr,
															
 
																 					"# Warning: %d OpenCL devices requested. Only %d available.\n",
															
 
																 					nopencl, nb_devices);
															
 
																-					topology->nopenclgpus = nb_devices;
															
 
																+				nopencl = nb_devices;
															
 
																 			}
															
 
																+			/* Let's make sure this value is OK. */
															
 
																 			if (nopencl > STARPU_MAXOPENCLDEVS)
															
 
																 			{
															
 
																 				fprintf(stderr,
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -169,7 +169,11 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-		if ((cl->type == STARPU_SPMD) || (cl->type == STARPU_FORKJOIN))
															
 
																+		if ((cl->type == STARPU_SPMD)
															
 
																+#ifdef STARPU_HAVE_HWLOC
															
 
																+				|| (cl->type == STARPU_FORKJOIN)
															
 
																+#endif
															
 
																+				)
															
 
																 		{
															
 
																 			/* TODO we should add other types of constraints */
															
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -443,8 +443,10 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
																 		if (hop != nhops - 1)
															
 
																 		{
															
 
																-			if (!reused_requests[hop + 1])
															
 
																+			if (!reused_requests[hop + 1]) {
															
 
																 				r->next_req[r->next_req_count++] = requests[hop + 1];
															
 
																+				STARPU_ASSERT(r->next_req_count <= STARPU_MAXNODES);
															
 
																+			}
															
 
																 		}
															
 
																 		else
															
 
																 			_starpu_data_request_append_callback(r, callback_func, callback_arg);
															
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -382,6 +382,12 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
																 			still_valid[node]?newstate:STARPU_INVALID;
															
 
																 	}
															
 
																+	for (child = 0; child < root_handle->nchildren; child++)
															
 
																+	{
															
 
																+		struct _starpu_data_state *child_handle = &root_handle->children[child];
															
 
																+		_starpu_spin_unlock(&child_handle->header_lock);
															
 
																+	}
															
 
																+
															
 
																 	/* there is no child anymore */
															
 
																 	free(root_handle->children);
															
 
																 	root_handle->children = NULL;
															
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -630,8 +630,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
																 		else
															
 
																 		{
															
 
																 			/* Are all plans contiguous */
															
 
																-                        /* XXX non contiguous buffers are not properly supported yet. (TODO) */
															
 
																-                        STARPU_ASSERT(0);
															
 
																+                        STARPU_ASSERT_MSG(0, "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
															
 
																                 }
															
 
																         }
															
 
																 	else
															
@@ -697,8 +696,7 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 
																                 else
															
 
																 		{
															
 
																 			/* Are all plans contiguous */
															
 
																-                        /* XXX non contiguous buffers are not properly supported yet. (TODO) */
															
 
																-                        STARPU_ASSERT(0);
															
 
																+                        STARPU_ASSERT_MSG(0, "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
															
 
																                 }
															
 
																         }
															
 
																 	else
															
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -605,8 +605,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
																 	struct starpu_matrix_interface *dst_matrix = dst_interface;
															
 
																         int err,ret;
															
 
																-	/* XXX non contiguous matrices are not supported with OpenCL yet ! (TODO) */
															
 
																-	STARPU_ASSERT((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx));
															
 
																+	STARPU_ASSERT_MSG((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx), "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
															
 
																 	err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_matrix->ptr, src_node, (cl_mem)dst_matrix->dev_handle, dst_node,
															
 
																                                                            src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
															
@@ -625,8 +624,7 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 
																 	struct starpu_matrix_interface *dst_matrix = dst_interface;
															
 
																         int err, ret;
															
 
																-	/* XXX non contiguous matrices are not supported with OpenCL yet ! (TODO) */
															
 
																-	STARPU_ASSERT((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx));
															
 
																+	STARPU_ASSERT_MSG((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx), "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
															
 
																         err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_matrix->dev_handle, src_node, (void*)dst_matrix->ptr, dst_node,
															
 
																                                                            src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
															
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -267,7 +267,7 @@ static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 		case STARPU_OPENCL_RAM:
															
 
																-			/* TODO */
															
 
																+			STARPU_ASSERT_MSG(0, "XXX multiformat not supported on OpenCL yet (TODO)");
															
 
																 			break;
															
 
																 #endif
															
 
																 		default:
															
@@ -711,7 +711,8 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
 
																 	(void) dst_interface;
															
 
																 	(void) src_node;
															
 
																 	(void) dst_node;
															
 
																-/* TODO */
															
 
																+
															
 
																+	STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
															
 
																 	return 0;
															
 
																 }
															
 
																 #endif
															
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -282,6 +282,7 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 
																 		while (!wrapper.finished)
															
 
																 			_STARPU_PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock);
															
 
																 		_STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper.lock);
															
 
																+		_STARPU_PTHREAD_MUTEX_DESTROY(&wrapper.lock);
															
 
																 	}
															
 
																 	/* At that moment, the caller holds a reference to the piece of data.
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -20,6 +20,7 @@
 
																 #include <math.h>
															
 
																 #include <starpu.h>
															
 
																+#include <starpu_profiling.h>
															
 
																 #include <drivers/driver_common/driver_common.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <core/debug.h>
															
@@ -30,6 +31,7 @@
 
																 static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_args, int is_parallel_task, int rank, enum starpu_perf_archtype perf_arch)
															
 
																 {
															
 
																 	int ret;
															
 
																+	int profiling = starpu_profiling_status_get();
															
 
																 	struct timespec codelet_start, codelet_end;
															
 
																 	struct starpu_task *task = j->task;
															
@@ -51,7 +53,8 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 
																 	if (is_parallel_task)
															
 
																 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
															
 
																-	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank);
															
 
																+	/* Give profiling variable */
															
 
																+	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
															
 
																 	/* In case this is a Fork-join parallel task, the worker does not
															
 
																 	 * execute the kernel at all. */
															
@@ -68,7 +71,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 
																 			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid);
															
 
																 	}
															
 
																-	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank);
															
 
																+	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank, profiling);
															
 
																 	if (is_parallel_task)
															
 
																 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
															
@@ -76,7 +79,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 
																 	if (rank == 0)
															
 
																 	{
															
 
																 		_starpu_driver_update_job_feedback(j, cpu_args,
															
 
																-				perf_arch, &codelet_start, &codelet_end);
															
 
																+				perf_arch, &codelet_start, &codelet_end, profiling);
															
 
																 		_starpu_push_task_output(j, 0);
															
 
																 	}
															
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -20,6 +20,7 @@
 
																 #include <starpu.h>
															
 
																 #include <starpu_cuda.h>
															
 
																+#include <starpu_profiling.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <common/config.h>
															
 
																 #include <core/debug.h>
															
@@ -198,6 +199,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
																 	struct timespec codelet_start, codelet_end;
															
 
																+	int profiling = starpu_profiling_status_get();
															
 
																 	unsigned calibrate_model = 0;
															
 
																 	STARPU_ASSERT(task);
															
@@ -223,7 +225,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
																 			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																 	}
															
 
																-	_starpu_driver_start_job(args, j, &codelet_start, 0);
															
 
																+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
															
 
																 #ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																 	/* We make sure we do manipulate the proper device */
															
@@ -236,9 +238,9 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
																 	STARPU_ASSERT(func);
															
 
																 	func(task->interfaces, task->cl_arg);
															
 
																-	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
															
 
																+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
															
 
																-	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
															
 
																+	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end, profiling);
															
 
																 	_starpu_push_task_output(j, mask);
															
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010, 2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  *
															
@@ -27,12 +27,11 @@
 
																 #include <core/sched_policy.h>
															
 
																 #include <top/starpu_top_core.h>
															
 
																-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank)
															
 
																+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank, int profiling)
															
 
																 {
															
 
																 	struct starpu_task *task = j->task;
															
 
																 	struct starpu_codelet *cl = task->cl;
															
 
																 	struct starpu_task_profiling_info *profiling_info;
															
 
																-	int profiling = starpu_profiling_status_get();
															
 
																 	int starpu_top=_starpu_top_status_get();
															
 
																 	int workerid = args->workerid;
															
 
																 	unsigned calibrate_model = 0;
															
@@ -65,12 +64,11 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 
																 	_STARPU_TRACE_START_CODELET_BODY(j);
															
 
																 }
															
 
																-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank)
															
 
																+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)
															
 
																 {
															
 
																 	struct starpu_task *task = j->task;
															
 
																 	struct starpu_codelet *cl = task->cl;
															
 
																 	struct starpu_task_profiling_info *profiling_info = task->profiling_info;
															
 
																-	int profiling = starpu_profiling_status_get();
															
 
																 	int starpu_top=_starpu_top_status_get();
															
 
																 	int workerid = args->workerid;
															
 
																 	unsigned calibrate_model = 0;
															
@@ -93,7 +91,7 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 
																 }
															
 
																 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
															
 
																 					enum starpu_perf_archtype perf_arch,
															
 
																-					struct timespec *codelet_start, struct timespec *codelet_end)
															
 
																+					struct timespec *codelet_start, struct timespec *codelet_end, int profiling)
															
 
																 {
															
 
																 	struct starpu_task_profiling_info *profiling_info = j->task->profiling_info;
															
 
																 	struct timespec measured_ts;
															
@@ -101,13 +99,12 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
																 	int workerid = worker_args->workerid;
															
 
																 	struct starpu_codelet *cl = j->task->cl;
															
 
																 	int calibrate_model = 0;
															
 
																-	int profiling = starpu_profiling_status_get();
															
 
																 	int updated = 0;
															
 
																-	if (cl->model && _starpu_get_calibrate_flag())
															
 
																+	if (cl->model && cl->model->benchmarking)
															
 
																 		calibrate_model = 1;
															
 
																-	if (profiling_info || calibrate_model)
															
 
																+	if ((profiling && profiling_info) || calibrate_model)
															
 
																 	{
															
 
																 		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
															
 
																 		measured = starpu_timing_timespec_to_us(&measured_ts);
															
--- a/src/drivers/driver_common/driver_common.h
+++ b/src/drivers/driver_common/driver_common.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010, 2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -24,12 +24,12 @@
 
																 #include <common/utils.h>
															
 
																 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
															
 
																-			      struct timespec *codelet_start, int rank);
															
 
																+			      struct timespec *codelet_start, int rank, int profiling);
															
 
																 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch,
															
 
																-			    struct timespec *codelet_end, int rank);
															
 
																+			    struct timespec *codelet_end, int rank, int profiling);
															
 
																 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
															
 
																 					enum starpu_perf_archtype perf_arch,
															
 
																-					struct timespec *codelet_start, struct timespec *codelet_end);
															
 
																+					struct timespec *codelet_start, struct timespec *codelet_end, int profiling);
															
 
																 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
															
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -20,6 +20,7 @@
 
																 #include <math.h>
															
 
																 #include <starpu.h>
															
 
																+#include <starpu_profiling.h>
															
 
																 #include <common/config.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <core/debug.h>
															
@@ -579,6 +580,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
																 	STARPU_ASSERT(j);
															
 
																 	struct starpu_task *task = j->task;
															
 
																+	int profiling = starpu_profiling_status_get();
															
 
																 	struct timespec codelet_start, codelet_end;
															
 
																 	STARPU_ASSERT(task);
															
@@ -594,16 +596,16 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
																 		return -EAGAIN;
															
 
																 	}
															
 
																-	_starpu_driver_start_job(args, j, &codelet_start, 0);
															
 
																+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
															
 
																 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
															
 
																 	STARPU_ASSERT(func);
															
 
																 	func(task->interfaces, task->cl_arg);
															
 
																-	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
															
 
																+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
															
 
																 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
															
 
																-							&codelet_start, &codelet_end);
															
 
																+					   &codelet_start, &codelet_end, profiling);
															
 
																 	_starpu_push_task_output(j, mask);
															
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -307,11 +307,15 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 		for (nimpl = 0; nimpl <STARPU_MAXIMPLEMENTATIONS; nimpl++) 
															
 
																 		{
															
 
																 			/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																+			pthread_mutex_t *sched_mutex;
															
 
																+			pthread_cond_t *sched_cond;
															
 
																+			starpu_worker_get_sched_condition(sched_ctx_id, workerid, &sched_mutex, &sched_cond);
															
 
																+			_STARPU_PTHREAD_MUTEX_LOCK(&sched_mutex[worker]);
															
 
																 			exp_start[worker] = STARPU_MAX(exp_start[worker], starpu_timing_now());
															
 
																 			exp_end[worker_ctx][nimpl] = exp_start[worker] + exp_len[worker];
															
 
																 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
															
 
																  				max_exp_end = exp_end[worker_ctx][nimpl];
															
 
																-			
															
 
																+			_STARPU_PTHREAD_MUTEX_UNLOCK(&sched_mutex[worker]);
															
 
																 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
															
 
																 			{
															
 
																 				/* no one on that queue may execute this task */
															
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -34,8 +34,6 @@
 
																 #define DBL_MAX __DBL_MAX__
															
 
																 #endif
															
 
																-static pthread_mutex_t big_lock;
															
 
																-
															
 
																 static unsigned nworkers, ncombinedworkers;
															
 
																 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
															
 
																 //static unsigned napplicable_perf_archtypes = 0;
															
@@ -93,18 +91,18 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 	int ret = 0;
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
															
 
																-
															
 
																 	if (is_basic_worker)
															
 
																 	{
															
 
																 		task->predicted = exp_end_predicted - worker_exp_end[best_workerid];
															
 
																 		/* TODO */
															
 
																 		task->predicted_transfer = 0;
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx->sched_mutex[best_workerid]);
															
 
																 		worker_exp_len[best_workerid] += task->predicted;
															
 
																 		worker_exp_end[best_workerid] = exp_end_predicted;
															
 
																 		worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid];
															
 
																 		ntasks[best_workerid]++;
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx->sched_mutex[best_workerid]);
															
 
																 		ret = starpu_push_local_task(best_workerid, task, prio);
															
 
																 	}
															
@@ -135,19 +133,19 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 			/* TODO */
															
 
																 			alias->predicted_transfer = 0;
															
 
																+			_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx->sched_mutex[local_worker]);
															
 
																 			worker_exp_len[local_worker] += alias->predicted;
															
 
																 			worker_exp_end[local_worker] = exp_end_predicted;
															
 
																 			worker_exp_start[local_worker] = exp_end_predicted - worker_exp_len[local_worker];
															
 
																 			ntasks[local_worker]++;
															
 
																+			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx->sched_mutex[local_worker]);
															
 
																 			ret |= starpu_push_local_task(local_worker, alias, prio);
															
 
																 		}
															
 
																 	}
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
															
 
																-
															
 
																 	return ret;
															
 
																 }
															
@@ -245,10 +243,12 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
																 	{
															
 
																 		worker = sched_ctx->workerids[worker_ctx];
															
 
																 		/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx->sched_mutex[worker]);
															
 
																 		worker_exp_start[worker] = STARPU_MAX(worker_exp_start[worker], starpu_timing_now());
															
 
																 		worker_exp_end[worker] = worker_exp_start[worker] + worker_exp_len[worker];
															
 
																 		if (worker_exp_end[worker] > max_exp_end)
															
 
																 			max_exp_end = worker_exp_end[worker];
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx->sched_mutex[worker]);
															
 
																 	}
															
 
																 	unsigned nimpl;
															
@@ -325,8 +325,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
																 		} //end for
															
 
																 	}
															
 
																-	if (unknown)
															
 
																-	{
															
 
																+	if (unknown) {
															
 
																 		forced_best = ntasks_best;
															
 
																 		forced_best_ctx = ntasks_best_ctx;
															
 
																 		forced_nimpl = nimpl_best;
															
@@ -483,7 +482,6 @@ static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 
																 		_STARPU_PTHREAD_COND_INIT(sched_ctx->sched_cond[workerid], NULL);
															
 
																 	}
															
 
																-	_STARPU_PTHREAD_MUTEX_INIT(&big_lock, NULL);
															
 
																 	/* We pre-compute an array of all the perfmodel archs that are applicable */
															
 
																 	unsigned total_worker_count = nworkers_ctx + ncombinedworkers;
															
--- a/src/util/malloc.c
+++ b/src/util/malloc.c
@@ -24,7 +24,7 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																+#if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
															
 
																 struct malloc_pinned_codelet_struct
															
 
																 {
															
 
																 	void **ptr;
															
@@ -41,7 +41,7 @@ struct malloc_pinned_codelet_struct
 
																 //}
															
 
																 //#endif
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																+#if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)
															
 
																 static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
															
 
																 {
															
 
																 	struct malloc_pinned_codelet_struct *s = arg;
															
@@ -53,7 +53,7 @@ static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED,
 
																 }
															
 
																 #endif
															
 
																-#if defined(STARPU_USE_CUDA)// || defined(STARPU_USE_OPENCL)
															
 
																+#if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
															
 
																 static struct starpu_perfmodel malloc_pinned_model =
															
 
																 {
															
 
																 	.type = STARPU_HISTORY_BASED,
															
@@ -81,12 +81,14 @@ int starpu_malloc(void **A, size_t dim)
 
																 	if (_starpu_can_submit_cuda_task())
															
 
																 	{
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+		cudaError_t cures;
															
 
																+		cures = cudaHostAlloc(A, dim, cudaHostAllocPortable);
															
 
																+		if (STARPU_UNLIKELY(cures))
															
 
																+			STARPU_CUDA_REPORT_ERROR(cures);
															
 
																+#else
															
 
																 		int push_res;
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning TODO: CUDA4 is able to directly allocate from any thread without having to launch a task
															
 
																-#endif
															
 
																-
															
 
																 		struct malloc_pinned_codelet_struct s =
															
 
																 		{
															
 
																 			.ptr = A,
															
@@ -106,6 +108,7 @@ int starpu_malloc(void **A, size_t dim)
 
																 		push_res = starpu_task_submit(task);
															
 
																 		STARPU_ASSERT(push_res != -ENODEV);
															
 
																 #endif
															
 
																+#endif
															
 
																 	}
															
 
																 //	else if (_starpu_can_submit_opencl_task())
															
 
																 //	{
															
@@ -142,7 +145,7 @@ int starpu_malloc(void **A, size_t dim)
 
																 	return 0;
															
 
																 }
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																+#if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)
															
 
																 static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
															
 
																 {
															
 
																 	cudaError_t cures;
															
@@ -161,7 +164,7 @@ static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, vo
 
																 //}
															
 
																 //#endif
															
 
																-#if defined(STARPU_USE_CUDA) // || defined(STARPU_USE_OPENCL)
															
 
																+#if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)) // || defined(STARPU_USE_OPENCL)
															
 
																 static struct starpu_perfmodel free_pinned_model =
															
 
																 {
															
 
																 	.type = STARPU_HISTORY_BASED,
															
@@ -185,16 +188,21 @@ int starpu_free(void *A)
 
																 		return -EDEADLK;
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																+	if (_starpu_can_submit_cuda_task())
															
 
																+	{
															
 
																+#ifndef HAVE_CUDA_MEMCPY_PEER
															
 
																 	if (!_starpu_is_initialized())
															
 
																 	{
															
 
																+#endif
															
 
																 		/* This is especially useful when starpu_free is called from
															
 
																  		 * the GCC-plugin. starpu_shutdown will probably have already
															
 
																 		 * been called, so we will not be able to submit a task. */
															
 
																 		cudaError_t err = cudaFreeHost(A);
															
 
																 		if (STARPU_UNLIKELY(err))
															
 
																 			STARPU_CUDA_REPORT_ERROR(err);
															
 
																+#ifndef HAVE_CUDA_MEMCPY_PEER
															
 
																 	}
															
 
																-	else if (_starpu_can_submit_cuda_task())
															
 
																+	else
															
 
																 	{
															
 
																 		int push_res;
															
@@ -211,6 +219,7 @@ int starpu_free(void *A)
 
																 		push_res = starpu_task_submit(task);
															
 
																 		STARPU_ASSERT(push_res != -ENODEV);
															
 
																 	}
															
 
																+#endif
															
 
																 //	else if (_starpu_can_submit_opencl_task())
															
 
																 //	{
															
 
																 //#ifdef STARPU_USE_OPENCL
															
@@ -230,7 +239,7 @@ int starpu_free(void *A)
 
																 //		STARPU_ASSERT(push_res != -ENODEV);
															
 
																 //#endif
															
 
																 //	}
															
 
																-	else
															
 
																+	} else
															
 
																 #endif
															
 
																 	{
															
 
																 		free(A);
															
--- a/src/util/starpu_insert_task.c
+++ b/src/util/starpu_insert_task.c
@@ -78,5 +78,12 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 
																 	va_start(varg_list, cl);
															
 
																         struct starpu_task *task = starpu_task_create();
															
 
																-        return _starpu_insert_task_create_and_submit(arg_buffer, cl, &task, varg_list);
															
 
																+	int ret = _starpu_insert_task_create_and_submit(arg_buffer, cl, &task, varg_list);
															
 
																+
															
 
																+	if (ret == -ENODEV)
															
 
																+	{
															
 
																+		task->destroy = 0;
															
 
																+		starpu_task_destroy(task);
															
 
																+	}
															
 
																+        return ret;
															
 
																 }
															
--- a/tests/main/subgraph_repeat.c
+++ b/tests/main/subgraph_repeat.c
@@ -62,19 +62,19 @@ static struct starpu_codelet dummy_codelet =
 
																 static void callback_task_D(void *arg __attribute__((unused)))
															
 
																 {
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
															
 
																 	loop_cnt++;
															
 
																 	if (loop_cnt == niter)
															
 
																 	{
															
 
																 		/* We are done */
															
 
																-		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
															
 
																 		_STARPU_PTHREAD_COND_SIGNAL(&cond);
															
 
																 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
															
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																 		int ret;
															
 
																-
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
															
 
																 		/* Let's go for another iteration */
															
 
																 		ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 		ret = starpu_task_submit(&taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
--- a/tests/overlap/overlap.c
+++ b/tests/overlap/overlap.c
@@ -25,7 +25,11 @@
 
																 #include <pthread.h>
															
 
																 #include "../helper.h"
															
 
																+#ifdef STARPU_SLOW_MACHINE
															
 
																+#define NTASKS	1000
															
 
																+#else
															
 
																 #define NTASKS	10000
															
 
																+#endif
															
 
																 #define VECTORSIZE	1024
															
 
																 #define TASKDURATION	24U
															
--- a/tools/dev/check_register.sh
+++ b/tools/dev/check_register.sh
@@ -1,37 +0,0 @@
 
																-#!/bin/bash
															
 
																-
															
 
																-# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																-#
															
 
																-# Copyright (C) 2011  Centre National de la Recherche Scientifique
															
 
																-#
															
 
																-# StarPU is free software; you can redistribute it and/or modify
															
 
																-# it under the terms of the GNU Lesser General Public License as published by
															
 
																-# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																-# your option) any later version.
															
 
																-#
															
 
																-# StarPU is distributed in the hope that it will be useful, but
															
 
																-# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																-#
															
 
																-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																-
															
 
																-stcolor=$(tput sgr0)
															
 
																-datacolor=$(tput setaf 2)
															
 
																-filecolor=$(tput setaf 1)
															
 
																-
															
 
																-process_file()
															
 
																-{
															
 
																-    datas=$(grep "data_register(" $f| awk -F',' '{print $1}' | awk -F'(' '{print $2}' | tr -d '&' | sed 's/\[/\\\[/g' | sed 's/\]/\\\]/g' | sed 's/\*/\\\*/g')
															
 
																-    for data in $datas ; do
															
 
																-	x=$(grep "data_unregister($data" $1)
															
 
																-	if test "$x" == "" ; then
															
 
																-	    x=$(grep "data_unregister_no_coherency($data" $1)
															
 
																-	    if test "$x" == "" ; then
															
 
																-		echo "Error. File <${filecolor}$1${stcolor}>. Handle <${datacolor}$data${stcolor}> is not unregistered"
															
 
																-	    fi
															
 
																-	fi
															
 
																-    done
															
 
																-}
															
 
																-
															
 
																-for f in $(find tests -type f -not -path "*svn*") ; do process_file $f ; done
															
 
																-for f in $(find examples -type f -not -path "*svn*") ; do process_file $f ; done
															
--- a/tools/dev/experimental/cuda_check_return_values.cocci
+++ b/tools/dev/experimental/cuda_check_return_values.cocci
@@ -51,7 +51,9 @@ E@p = cuda_func(...);
 
																 @initialize:python depends on report || org@
															
 
																+from re import sub
															
 
																 msg = "Ignoring the return value of %s."
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @no_assignment@
															
 
																 identifier cuda_func =~ "^cuda";
															
@@ -70,7 +72,7 @@ position no_assignment.p;
 
																 p << no_assignment.p;
															
 
																 func << no_assignment.cuda_func;
															
 
																 @@
															
 
																-coccilib.org.print_todo(p[0], msg % func)
															
 
																+coccilib.org.print_todo(p[0], orgmsg % func)
															
 
																 @depends on no_assignment && patch@
															
 
																 identifier no_assignment.cuda_func;
															
--- a/tools/dev/experimental/destroy_task_on_error.cocci
+++ b/tools/dev/experimental/destroy_task_on_error.cocci
@@ -0,0 +1,123 @@
 
																+/*
															
 
																+ * StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012 inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/*
															
 
																+ * When the submission of a task fails, StarPU cannot destroy the task, even if
															
 
																+ * the destroy flag is set. So we have to destroy it ourselves while handling
															
 
																+ * the error.
															
 
																+ *
															
 
																+ * TODO: match if statments without braces.
															
 
																+ */
															
 
																+
															
 
																+virtual context
															
 
																+virtual org
															
 
																+virtual patch
															
 
																+virtual report
															
 
																+
															
 
																+@initialize:python depends on org || report@
															
 
																+msg = "Warning: in %s(): "
															
 
																+msg+= "\"%s\" should probably be destroyed in the body of the if statement"
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																+
															
 
																+@r@
															
 
																+local idexpression t;
															
 
																+identifier err;
															
 
																+identifier f;
															
 
																+position p;
															
 
																+@@
															
 
																+f(...)
															
 
																+{
															
 
																+<+...
															
 
																+(
															
 
																+err = starpu_task_submit(t);
															
 
																+|
															
 
																+int err = starpu_task_submit(t);
															
 
																+)
															
 
																+if@p(
															
 
																+(
															
 
																+err == -ENODEV
															
 
																+|
															
 
																+err != 0
															
 
																+|
															
 
																+STARPU_UNLIKELY(err == -ENODEV)
															
 
																+|
															
 
																+STARPU_UNLIKELY(err != 0)
															
 
																+)
															
 
																+ )
															
 
																+{
															
 
																+... when != starpu_task_destroy(t);
															
 
																+    when != exit(...);
															
 
																+    when != STARPU_ASSERT(...);
															
 
																+    when != return 77;
															
 
																+}
															
 
																+...+>
															
 
																+}
															
 
																+
															
 
																+// Context mode.
															
 
																+@depends on r && context@
															
 
																+position r.p;
															
 
																+@@
															
 
																+*if@p(...) { ... }
															
 
																+
															
 
																+// Org mode.
															
 
																+@script:python depends on r && org@
															
 
																+p << r.p;
															
 
																+t << r.t;
															
 
																+f << r.f;
															
 
																+@@
															
 
																+coccilib.org.print_todo(p[0], orgmsg % (f,t))
															
 
																+
															
 
																+// Patch mode.
															
 
																+// XXX: Instead of "..." we could use a statement list (statement list SS). But
															
 
																+// it does not seem to work with if there is a "return" statement in the body
															
 
																+// of the if condition.
															
 
																+// Using "..." makes the patch ugly, but this may be fixed in a future version
															
 
																+// of spatch.
															
 
																+@depends on r && patch@
															
 
																+local idexpression r.t;
															
 
																+position r.p;
															
 
																+identifier r.f;
															
 
																+@@
															
 
																+f(...)
															
 
																+{
															
 
																+<+...
															
 
																+if@p (...)
															
 
																+(
															
 
																+{
															
 
																+...
															
 
																++ t->destroy = 0;
															
 
																++ starpu_task_destroy(t);
															
 
																+return ...;
															
 
																+}
															
 
																+|
															
 
																+{
															
 
																+...
															
 
																++ t->destroy = 0;
															
 
																++ starpu_task_destroy(t);
															
 
																+}
															
 
																+)
															
 
																+...+>
															
 
																+}
															
 
																+
															
 
																+// Report mode.
															
 
																+@script:python depends on r && report@
															
 
																+p << r.p;
															
 
																+t << r.t;
															
 
																+f << r.f;
															
 
																+@@
															
 
																+coccilib.report.print_report(p[0], msg % (f,t))
															
--- a/tools/dev/experimental/destroy_task_on_error_test.c
+++ b/tools/dev/experimental/destroy_task_on_error_test.c
@@ -0,0 +1,65 @@
 
																+/*
															
 
																+ * StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012 inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+static void
															
 
																+good_0(void)
															
 
																+{
															
 
																+	struct starpu_task *task;
															
 
																+	task = starpu_task_create();
															
 
																+	int ret = starpu_task_submit(task);
															
 
																+	if (ret == -ENODEV)
															
 
																+	{
															
 
																+		fprintf(stderr, "fail\n");
															
 
																+		starpu_task_destroy(task);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+bad_0(void)
															
 
																+{
															
 
																+	struct starpu_task *task1, *task2;
															
 
																+
															
 
																+	task1 = starpu_task_create();
															
 
																+	int ret = starpu_task_submit(task1);
															
 
																+	if (ret == -ENODEV)
															
 
																+	{
															
 
																+		fprintf(stderr, "Fail\n");
															
 
																+	}
															
 
																+
															
 
																+	task2 = starpu_task_create();
															
 
																+	ret = starpu_task_submit(task2);
															
 
																+	if (ret == -ENODEV)
															
 
																+	{
															
 
																+		fprintf(stderr, "Fail\n");
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+bad_unlikely(void)
															
 
																+{
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+	task = starpu_task_create();
															
 
																+
															
 
																+	int ret = starpu_task_submit(task);
															
 
																+	if (STARPU_UNLIKELY(ret == -ENODEV))
															
 
																+	{
															
 
																+		error();
															
 
																+		return 1;
															
 
																+	}
															
 
																+
															
 
																+	starpu_task_destroy(task);
															
 
																+}
															
--- a/tools/dev/experimental/function_call_termination_condition.cocci
+++ b/tools/dev/experimental/function_call_termination_condition.cocci
@@ -34,6 +34,8 @@ virtual report
 
																 @initialize:python depends on report || org@
															
 
																 msg="Function call in the termination condition of a for loop"
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @r@
															
 
																 type t;
															
@@ -62,17 +64,16 @@ expression r.E1;
 
																 @script:python depends on r && org@
															
 
																 p << r.p;
															
 
																 @@
															
 
																-coccilib.org.print_todo(p[0], msg)
															
 
																+coccilib.org.print_todo(p[0], orgmsg)
															
 
																 @depends on r && patch@
															
 
																-type r.t;
															
 
																 expression r.E1, E2, E3;
															
 
																 identifier r.it;
															
 
																 position r.p;
															
 
																 @@
															
 
																 -for@p(it = E1; it < E3; E2) 
															
 
																-+t max = E3;
															
 
																-+for(it = E1; i < max; E2) 
															
 
																++max = E3;
															
 
																++for(it = E1; it < max; E2)
															
 
																 {
															
 
																 ...
															
 
																 }
															
--- a/tools/dev/experimental/name_codelets.cocci
+++ b/tools/dev/experimental/name_codelets.cocci
@@ -31,6 +31,8 @@ virtual report
 
																 @initialize:python depends on org || report@
															
 
																 msg = "Warning: %s has no attribute name"
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @found@
															
 
																 identifier cl;
															
@@ -61,7 +63,7 @@ position found.p;
 
																 cl << found.cl;
															
 
																 p << found.p;
															
 
																 @@
															
 
																-coccilib.org.print_todo(p[0], msg % cl)
															
 
																+coccilib.org.print_todo(p[0], orgmsg % cl)
															
 
																 // Patch mode.
															
 
																 @script:python stringify depends on found && !named && patch@
															
--- a/tools/dev/experimental/not_unlocked_mutex.cocci
+++ b/tools/dev/experimental/not_unlocked_mutex.cocci
@@ -21,6 +21,8 @@ virtual report
 
																 @initialize:python depends on report || org@
															
 
																 msg="The mutex \"%s\" is not unlocked when leaving \"%s\""
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @r@
															
 
																 expression E;
															
@@ -53,7 +55,7 @@ f << r.func;
 
																 E << r.E;
															
 
																 @@
															
 
																 for p in ps:
															
 
																-	coccilib.org.print_todo(p, msg % (E, f))
															
 
																+	coccilib.org.print_todo(p, orgmsg % (E, f))
															
 
																 @depends on r && patch@
															
--- a/tools/dev/experimental/opencl_check_return_values.cocci
+++ b/tools/dev/experimental/opencl_check_return_values.cocci
@@ -51,5 +51,5 @@ coccilib.report.print_report(p[0], msg)
 
																 p << ignored_return_value.p;
															
 
																 func << ignored_return_value.opencl_func;
															
 
																 @@
															
 
																-msg = "Ignoring the return value of %s." % func
															
 
																+msg = "Ignoring the return value of =%s=." % func
															
 
																 coccilib.org.print_todo(p[0], msg)
															
--- a/tools/dev/experimental/skip_valgrind.cocci
+++ b/tools/dev/experimental/skip_valgrind.cocci
@@ -21,6 +21,8 @@ virtual report
 
																 @initialize:python depends on report || org@
															
 
																 msg="Should you add STARPU_SKIP_IF_VALGRIND; at the beginning of this function ?"
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @find_codelet@
															
 
																 identifier a, b;
															
@@ -66,7 +68,7 @@ position find_codelet.p;
 
																 @script:python depends on find_codelet && !is_empty_codelet && !is_already_ok && org@
															
 
																 p << find_codelet.p;
															
 
																 @@
															
 
																-coccilib.org.print_todo(p[0], msg)
															
 
																+coccilib.org.print_todo(p[0], orgmsg)
															
 
																 @depends on find_codelet && !is_empty_codelet && !is_already_ok && patch@
															
 
																 identifier find_codelet.a, find_codelet.b;
															
--- a/tools/dev/experimental/unchecked_starpu_function_calls.cocci
+++ b/tools/dev/experimental/unchecked_starpu_function_calls.cocci
@@ -27,6 +27,8 @@ virtual report
 
																 @initialize:python depends on report || org@
															
 
																 msg = "Unchecked call to %s"
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @unchecked_starpu_func_call@
															
 
																 identifier f;
															
@@ -59,7 +61,7 @@ f(...)
 
																 f << unchecked_starpu_func_call.starpu_function;
															
 
																 p << unchecked_starpu_func_call.p;
															
 
																 @@
															
 
																-coccilib.org.print_todo(p[0], msg % f)
															
 
																+coccilib.org.print_todo(p[0], orgmsg % f)
															
 
																 // Patch mode.
															
 
																 @has_ret depends on unchecked_starpu_func_call@
															
--- a/tools/dev/experimental/use_starpu_macros.cocci
+++ b/tools/dev/experimental/use_starpu_macros.cocci
@@ -24,6 +24,8 @@ virtual report
 
																 @initialize:python depends on report || org@
															
 
																 d = { 'abort':'STARPU_ABORT', 'assert':'STARPU_ASSERT'}
															
 
																 msg = "Please use %s rather than %s."
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @r@
															
 
																 identifier f =~ "abort|assert";
															
@@ -88,7 +90,7 @@ expression E1, E2;
 
																 p << r.p;
															
 
																 f << r.f;
															
 
																 @@
															
 
																-coccilib.org.print_todo(p[0], msg % (d[str(f)], f))
															
 
																+coccilib.org.print_todo(p[0], orgmsg % (d[str(f)], f))
															
 
																 @script:python depends on min && org@
															
 
																 p << min.p;
															
--- a/tools/dev/experimental/use_starpu_pthread_macros.cocci
+++ b/tools/dev/experimental/use_starpu_pthread_macros.cocci
@@ -43,6 +43,8 @@ d = {
 
																 'pthread_spin_unlock'     : '_STARPU_PTHREAD_SPIN_UNLOCK'
															
 
																 }
															
 
																 msg = "Use %s instead of %s."
															
 
																+from re import sub
															
 
																+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
															
 
																 @r@
															
 
																 identifier f =~ "^pthread_";
															
@@ -66,9 +68,9 @@ p << r.p;
 
																 f << r.f;
															
 
																 @@
															
 
																 if str(f) in d.keys():
															
 
																-	coccilib.org.print_todo(p[0], msg % (d[str(f)], f))
															
 
																+	coccilib.org.print_todo(p[0], orgmsg % (d[str(f)], f))
															
 
																 else:
															
 
																-	coccilib.org.print_todo(p[0], "Shouldn't %s be wrapped in a macro ?" % str(f))
															
 
																+	coccilib.org.print_todo(p[0], "Shouldn't =%s= be wrapped in a macro ?" % str(f))
															
 
																 //
															
--- a/tools/dev/internal/check_unrenamed_list_types.sh
+++ b/tools/dev/internal/check_unrenamed_list_types.sh
--- a/tools/dev/internal/rename_internal.sed
+++ b/tools/dev/internal/rename_internal.sed
--- a/tools/dev/internal/rename_internal.sh
+++ b/tools/dev/internal/rename_internal.sh
--- a/tools/dev/mycocci.sh
+++ b/tools/dev/mycocci.sh
@@ -121,7 +121,7 @@ do
 
																 		scripts_dir=$OPTARG;
															
 
																 		;;
															
 
																 	t)
															
 
																-		target=$OPTARG;
															
 
																+		target="$target $OPTARG";
															
 
																 		;;
															
 
																 	\?)
															
 
																 		echo "Invalid option -$OPTARG"
															
--- a/tools/dev/starpu_use_macro.sed
+++ b/tools/dev/starpu_use_macro.sed
@@ -1,15 +0,0 @@
 
																-# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																-#
															
 
																-# Copyright (C) 2012 INRIA
															
 
																-#
															
 
																-# StarPU is free software; you can redistribute it and/or modify
															
 
																-# it under the terms of the GNU Lesser General Public License as published by
															
 
																-# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																-# your option) any later version.
															
 
																-#
															
 
																-# StarPU is distributed in the hope that it will be useful, but
															
 
																-# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																-#
															
 
																-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																-s/^#if STARPU_USE_/#ifdef STARPU_USE_/
															
--- a/tools/dev/starpu_use_macro.sh
+++ b/tools/dev/starpu_use_macro.sh
@@ -15,4 +15,4 @@
 
																 #
															
 
																 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																-find . -type f -name "*.c" -not -path "*svn*" -exec sed -i -f $(dirname $0)/starpu_use_macro.sed {} \;
															
 
																+find . -type f -name "*.c" -not -path "*svn*" -exec sed -i 's/^#if STARPU_USE_/#ifdef STARPU_USE_/' {} \;
															
--- a/tools/valgrind/starpu.suppr
+++ b/tools/valgrind/starpu.suppr
@@ -2,7 +2,7 @@
 
																    config.running is not racy from starpu_shutdown
															
 
																    Helgrind:Race
															
 
																    fun:starpu_shutdown
															
 
																-   fun:main
															
 
																+   ...
															
 
																 }
															
 
																 {
															
@@ -25,3 +25,25 @@
 
																    fun:_starpu_msi_cache_miss
															
 
																    ...
															
 
																 }
															
 
																+
															
 
																+{
															
 
																+   known race, but not problematic in practice, see comment in _starpu_tag_clear
															
 
																+   Helgrind:LockOrder
															
 
																+   ...
															
 
																+   fun:_starpu_tag_free
															
 
																+   fun:_starpu_htbl_clear_tags
															
 
																+   ...
															
 
																+   fun:_starpu_tag_clear
															
 
																+   fun:starpu_shutdown
															
 
																+   ...
															
 
																+}
															
 
																+
															
 
																+
															
 
																+{
															
 
																+   There is actually no race on current_mode, because the mode can not change unexpectedly, until _starpu_notify_data_dependencies() is called further down. Valgrind can not know about such software rwlock.
															
 
																+   Helgrind:Race
															
 
																+   fun:_starpu_release_data_on_node
															
 
																+   fun:_starpu_push_task_output
															
 
																+   ...
															
 
																+}
															
 
																+