Selaa lähdekoodia

another merge

Andra Hugo 13 vuotta sitten
vanhempi
commit
058743e72a
53 muutettua tiedostoa jossa 452 lisäystä ja 167 poistoa
  1. 37 0
      .gitignore
  2. 2 1
      doc/chapters/advanced-examples.texi
  3. 2 0
      examples/basic_examples/block.c
  4. 5 2
      examples/basic_examples/vector_scal_c.c
  5. 5 0
      examples/lu/lu_example.c
  6. 4 0
      examples/mult/xgemm.c
  7. 1 0
      examples/opt/pi/pi.c
  8. 1 13
      examples/opt/pi/pi_redux.c
  9. 8 1
      gcc-plugin/src/starpu.c
  10. 1 1
      gcc-plugin/tests/Makefile.am
  11. 1 0
      gcc-plugin/tests/mocks.h
  12. 1 1
      src/core/dependencies/data_concurrency.c
  13. 5 0
      src/core/dependencies/tags.c
  14. 8 5
      src/core/perfmodel/perfmodel_bus.c
  15. 6 3
      src/core/perfmodel/perfmodel_history.c
  16. 28 12
      src/core/topology.c
  17. 5 1
      src/core/workers.c
  18. 3 1
      src/datawizard/coherency.c
  19. 6 0
      src/datawizard/filters.c
  20. 3 5
      src/datawizard/interfaces/block_interface.c
  21. 3 5
      src/datawizard/interfaces/matrix_interface.c
  22. 3 2
      src/datawizard/interfaces/multiformat_interface.c
  23. 1 0
      src/datawizard/user_interactions.c
  24. 6 3
      src/drivers/cpu/driver_cpu.c
  25. 5 3
      src/drivers/cuda/driver_cuda.c
  26. 6 9
      src/drivers/driver_common/driver_common.c
  27. 4 4
      src/drivers/driver_common/driver_common.h
  28. 5 3
      src/drivers/opencl/driver_opencl.c
  29. 5 1
      src/sched_policies/heft.c
  30. 7 9
      src/sched_policies/parallel_heft.c
  31. 20 11
      src/util/malloc.c
  32. 8 1
      src/util/starpu_insert_task.c
  33. 2 2
      tests/main/subgraph_repeat.c
  34. 4 0
      tests/overlap/overlap.c
  35. 0 37
      tools/dev/check_register.sh
  36. 3 1
      tools/dev/experimental/cuda_check_return_values.cocci
  37. 123 0
      tools/dev/experimental/destroy_task_on_error.cocci
  38. 65 0
      tools/dev/experimental/destroy_task_on_error_test.c
  39. 5 4
      tools/dev/experimental/function_call_termination_condition.cocci
  40. 3 1
      tools/dev/experimental/name_codelets.cocci
  41. 3 1
      tools/dev/experimental/not_unlocked_mutex.cocci
  42. 1 1
      tools/dev/experimental/opencl_check_return_values.cocci
  43. 3 1
      tools/dev/experimental/skip_valgrind.cocci
  44. 3 1
      tools/dev/experimental/unchecked_starpu_function_calls.cocci
  45. 3 1
      tools/dev/experimental/use_starpu_macros.cocci
  46. 4 2
      tools/dev/experimental/use_starpu_pthread_macros.cocci
  47. 0 0
      tools/dev/internal/check_unrenamed_list_types.sh
  48. 0 0
      tools/dev/internal/rename_internal.sed
  49. 0 0
      tools/dev/internal/rename_internal.sh
  50. 1 1
      tools/dev/mycocci.sh
  51. 0 15
      tools/dev/starpu_use_macro.sed
  52. 1 1
      tools/dev/starpu_use_macro.sh
  53. 23 1
      tools/valgrind/starpu.suppr

+ 37 - 0
.gitignore

@@ -248,3 +248,40 @@ starpu.log
 /tools/starpu_perfmodel_plot.1
 /tools/starpu_perfmodel_plot.1
 /starpu-1.0.pc
 /starpu-1.0.pc
 /gcc-plugin/examples/cholesky/cholesky
 /gcc-plugin/examples/cholesky/cholesky
+/gcc-plugin/tests/*.log
+/test/*.log
+/examples/*.log
+/tests/main/declare_deps_after_submission
+/tests/main/declare_deps_after_submission_synchronous
+/tests/main/declare_deps_in_callback
+/tests/main/deprecated
+/tests/main/deprecated_buffer
+/tests/main/deprecated_func
+/tests/main/empty_task
+/tests/main/empty_task_chain
+/tests/main/empty_task_sync_point
+/tests/main/empty_task_sync_point_tasks
+/tests/main/execute_on_a_specific_worker
+/tests/main/get_current_task
+/tests/main/insert_task
+/tests/main/multiformat_data_release
+/tests/main/multiformat_handle_conversion
+/tests/main/multithreaded
+/tests/main/multithreaded_init
+/tests/main/regenerate
+/tests/main/restart
+/tests/main/starpu_init
+/tests/main/starpu_task_bundle
+/tests/main/starpu_task_wait
+/tests/main/starpu_task_wait_for_all
+/tests/main/starpu_worker_exists
+/tests/main/static_restartable
+/tests/main/static_restartable_tag
+/tests/main/static_restartable_using_initializer
+/tests/main/subgraph_repeat
+/tests/main/subgraph_repeat_regenerate
+/tests/main/tag_wait_api
+/tests/main/task_wait_api
+/tests/main/wait_all_regenerable_tasks
+/tools/starpu_workers_activity
+/tests/datawizard/interfaces/copy_interfaces

+ 2 - 1
doc/chapters/advanced-examples.texi

@@ -597,7 +597,8 @@ parallel CPU implementation of the computation to be achieved. This can also be
 useful to improve the load balance between slow CPUs and fast GPUs: since CPUs
 useful to improve the load balance between slow CPUs and fast GPUs: since CPUs
 work collectively on a single task, the completion time of tasks on CPUs become
 work collectively on a single task, the completion time of tasks on CPUs become
 comparable to the completion time on GPUs, thus relieving from granularity
 comparable to the completion time on GPUs, thus relieving from granularity
-discrepancy concerns.
+discrepancy concerns. Hwloc support needs to be enabled to get good performance,
+otherwise StarPU will not know how to better group cores.
 
 
 Two modes of execution exist to accomodate with existing usages.
 Two modes of execution exist to accomodate with existing usages.
 
 

+ 2 - 0
examples/basic_examples/block.c

@@ -60,6 +60,8 @@ int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny,
         if (STARPU_UNLIKELY(ret == -ENODEV))
         if (STARPU_UNLIKELY(ret == -ENODEV))
 	{
 	{
                 FPRINTF(stderr, "No worker may execute this task\n");
                 FPRINTF(stderr, "No worker may execute this task\n");
+		task->destroy = 0;
+                starpu_task_destroy(task);
                 return 1;
                 return 1;
 	}
 	}
 
 

+ 5 - 2
examples/basic_examples/vector_scal_c.c

@@ -53,13 +53,14 @@ static struct starpu_codelet cl =
 	.model = &vector_scal_model
 	.model = &vector_scal_model
 };
 };
 
 
-void compute_(int *F_NX, float *vector)
+int compute_(int *F_NX, float *vector)
 {
 {
         int NX = *F_NX;
         int NX = *F_NX;
 	int ret;
 	int ret;
 
 
 	/* Initialize StarPU with default configuration */
 	/* Initialize StarPU with default configuration */
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	/* Tell StaPU to associate the "vector" vector with the "vector_handle"
 	/* Tell StaPU to associate the "vector" vector with the "vector_handle"
@@ -98,7 +99,7 @@ void compute_(int *F_NX, float *vector)
 
 
 	/* execute the task on any eligible computational ressource */
 	/* execute the task on any eligible computational ressource */
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 
 
 	/* StarPU does not need to manipulate the array anymore so we can stop
 	/* StarPU does not need to manipulate the array anymore so we can stop
  	 * monitoring it */
  	 * monitoring it */
@@ -106,4 +107,6 @@ void compute_(int *F_NX, float *vector)
 
 
 	/* terminate StarPU, no task can be submitted after */
 	/* terminate StarPU, no task can be submitted after */
 	starpu_shutdown();
 	starpu_shutdown();
+
+	return ret;
 }
 }

+ 5 - 0
examples/lu/lu_example.c

@@ -297,6 +297,11 @@ int main(int argc, char **argv)
 
 
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
+#ifdef STARPU_SLOW_MACHINE
+	size /= 4;
+	nblocks /= 4;
+#endif
+
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
 		return 77;
 		return 77;

+ 4 - 0
examples/mult/xgemm.c

@@ -275,6 +275,10 @@ int main(int argc, char **argv)
 
 
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
+#ifdef STARPU_SLOW_MACHINE
+	niter /= 10;
+#endif
+
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
 		return 77;
 		return 77;

+ 1 - 0
examples/opt/pi/pi.c

@@ -88,6 +88,7 @@ int main(int argc, char **argv)
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	/* Initialize the random number generator */
 	/* Initialize the random number generator */

+ 1 - 13
examples/opt/pi/pi_redux.c

@@ -189,19 +189,6 @@ static void pi_func_cuda(void *descr[], void *cl_arg __attribute__ ((unused)))
 }
 }
 #endif
 #endif
 
 
-/* The amount of work does not depend on the data size at all :) */
-static size_t size_base(struct starpu_task *task, unsigned nimpl)
-{
-	return NSHOT_PER_TASK;
-}
-
-static struct starpu_perfmodel model =
-{
-	.type = STARPU_HISTORY_BASED,
-	.size_base = size_base,
-	.symbol = "monte_carlo_pi_redux"
-};
-
 static struct starpu_codelet pi_cl =
 static struct starpu_codelet pi_cl =
 {
 {
 	.where =
 	.where =
@@ -319,6 +306,7 @@ int main(int argc, char **argv)
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	/* Launch a Random Number Generator (RNG) on each worker */
 	/* Launch a Random Number Generator (RNG) on each worker */

+ 8 - 1
gcc-plugin/src/starpu.c

@@ -1735,6 +1735,12 @@ build_codelet_initializer (tree task_decl)
     return init;
     return init;
   }
   }
 
 
+  tree codelet_name ()
+  {
+    const char *name = IDENTIFIER_POINTER (DECL_NAME (task_decl));
+    return build_string_literal (strlen (name) + 1, name);
+  }
+
   tree where_init (tree impls)
   tree where_init (tree impls)
   {
   {
     tree impl;
     tree impl;
@@ -1841,7 +1847,8 @@ build_codelet_initializer (tree task_decl)
   impls = task_implementation_list (task_decl);
   impls = task_implementation_list (task_decl);
 
 
   inits =
   inits =
-    chain_trees (field_initializer ("where", where_init (impls)),
+    chain_trees (field_initializer ("name", codelet_name ()),
+		 field_initializer ("where", where_init (impls)),
 		 field_initializer ("nbuffers", pointer_arg_count ()),
 		 field_initializer ("nbuffers", pointer_arg_count ()),
 		 field_initializer ("modes", access_mode_array ()),
 		 field_initializer ("modes", access_mode_array ()),
 		 field_initializer ("cpu_funcs",
 		 field_initializer ("cpu_funcs",

+ 1 - 1
gcc-plugin/tests/Makefile.am

@@ -74,4 +74,4 @@ check-hook:
 endif !HAVE_GUILE
 endif !HAVE_GUILE
 
 
 showcheck:
 showcheck:
-	-cat $(TEST_LOGS) /dev/null
+	-cat $(TEST_LOGS) /dev/null

+ 1 - 0
gcc-plugin/tests/mocks.h

@@ -57,6 +57,7 @@ const struct insert_task_argument *expected_insert_task_arguments;
 int
 int
 starpu_insert_task (struct starpu_codelet *cl, ...)
 starpu_insert_task (struct starpu_codelet *cl, ...)
 {
 {
+  assert (cl->name != NULL && strlen (cl->name) > 0);
   assert (cl->where == (STARPU_CPU | STARPU_OPENCL));
   assert (cl->where == (STARPU_CPU | STARPU_OPENCL));
 
 
   /* TODO: Call `cpu_func' & co. and check whether they do the right
   /* TODO: Call `cpu_func' & co. and check whether they do the right

+ 1 - 1
src/core/dependencies/data_concurrency.c

@@ -211,9 +211,9 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 			 * _starpu_compar_handles.  */
 			 * _starpu_compar_handles.  */
 			continue;
 			continue;
 
 
+                j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
                 if (attempt_to_submit_data_request_from_job(j, buf))
                 if (attempt_to_submit_data_request_from_job(j, buf))
 		{
 		{
-                        j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
 			return 1;
 			return 1;
                 }
                 }
 	}
 	}

+ 5 - 0
src/core/dependencies/tags.c

@@ -106,6 +106,7 @@ static void _starpu_tag_free(void *_tag)
 #endif
 #endif
 
 
 		_starpu_spin_unlock(&tag->lock);
 		_starpu_spin_unlock(&tag->lock);
+		_starpu_spin_destroy(&tag->lock);
 
 
 		free(tag);
 		free(tag);
 	}
 	}
@@ -128,6 +129,10 @@ void _starpu_tag_clear(void)
 {
 {
 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
 
 
+	/* XXX: _starpu_tag_free takes the tag spinlocks while we are keeping
+	 * the global rwlock. This contradicts the lock order of
+	 * starpu_tag_wait_array. Should not be a problem in practice since
+	 * _starpu_tag_clear is called at shutdown only. */
 	_starpu_htbl_clear_tags(&tag_htbl, 0, _starpu_tag_free);
 	_starpu_htbl_clear_tags(&tag_htbl, 0, _starpu_tag_free);
 
 
 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock);
 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock);

+ 8 - 5
src/core/perfmodel/perfmodel_bus.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -498,8 +498,10 @@ static void benchmark_all_gpu_devices(void)
 	hwloc_topology_load(hwtopology);
 	hwloc_topology_load(hwtopology);
 #endif
 #endif
 
 
-	/* TODO: use hwloc */
-#ifdef __linux__
+#ifdef STARPU_HAVE_HWLOC
+	hwloc_cpuset_t former_cpuset = hwloc_bitmap_alloc();
+	hwloc_get_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD);
+#elif __linux__
 	/* Save the current cpu binding */
 	/* Save the current cpu binding */
 	cpu_set_t former_process_affinity;
 	cpu_set_t former_process_affinity;
 	int ret;
 	int ret;
@@ -545,8 +547,9 @@ static void benchmark_all_gpu_devices(void)
 	}
 	}
 #endif
 #endif
 
 
-	/* FIXME: use hwloc */
-#ifdef __linux__
+#ifdef STARPU_HAVE_HWLOC
+	hwloc_set_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD);
+#elif __linux__
 	/* Restore the former affinity */
 	/* Restore the former affinity */
 	ret = sched_setaffinity(0, sizeof(former_process_affinity), &former_process_affinity);
 	ret = sched_setaffinity(0, sizeof(former_process_affinity), &former_process_affinity);
 	if (ret)
 	if (ret)

+ 6 - 3
src/core/perfmodel/perfmodel_history.c

@@ -966,10 +966,11 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 	{
 	{
 		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
 		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
-		struct starpu_htbl32_node *history = per_arch_model->history;
+		struct starpu_htbl32_node *history;
 		struct starpu_history_entry *entry;
 		struct starpu_history_entry *entry;
 
 
 		_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
 		_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
+		history = per_arch_model->history;
 		entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
 		entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 
 
@@ -997,11 +998,13 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
 
 	per_arch_model = &model->per_arch[arch][nimpl];
 	per_arch_model = &model->per_arch[arch][nimpl];
 
 
+	_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
 	history = per_arch_model->history;
 	history = per_arch_model->history;
-	if (!history)
+	if (!history) {
+		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 		return NAN;
 		return NAN;
+	}
 
 
-	_STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
 	entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
 	entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 
 

+ 28 - 12
src/core/topology.c

@@ -290,7 +290,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 	_starpu_initialize_workers_bindid(config);
 	_starpu_initialize_workers_bindid(config);
 
 
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
-	int ncuda = -1;
+	int ncuda;
 	ncuda = starpu_get_env_number("STARPU_NCUDA");
 	ncuda = starpu_get_env_number("STARPU_NCUDA");
 
 
 	/* STARPU_NCUDA is not set. Did the user specify anything ? */
 	/* STARPU_NCUDA is not set. Did the user specify anything ? */
@@ -303,14 +303,30 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 		/* The user did not disable CUDA. We need to initialize CUDA
 		/* The user did not disable CUDA. We need to initialize CUDA
  		 * early to count the number of devices */
  		 * early to count the number of devices */
 		_starpu_init_cuda();
 		_starpu_init_cuda();
+		int nb_devices = _starpu_get_cuda_device_count();
 
 
 		if (ncuda == -1)
 		if (ncuda == -1)
 		{
 		{
 			/* Nothing was specified, so let's choose ! */
 			/* Nothing was specified, so let's choose ! */
-			ncuda = STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
+			ncuda = nb_devices;
+			if (ncuda > STARPU_MAXCUDADEVS)
+			{
+				fprintf(stderr,
+					"# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n",
+					nb_devices, STARPU_MAXCUDADEVS);
+				ncuda = STARPU_MAXCUDADEVS;
+			}
 		}
 		}
 		else
 		else
 		{
 		{
+			if (ncuda > nb_devices)
+			{
+				/* The user requires more CUDA devices than there is available */
+				fprintf(stderr,
+					"# Warning: %d CUDA devices requested. Only %d available.\n",
+					ncuda, nb_devices);
+				ncuda = nb_devices;
+			}
 			/* Let's make sure this value is OK. */
 			/* Let's make sure this value is OK. */
 			if (ncuda > STARPU_MAXCUDADEVS)
 			if (ncuda > STARPU_MAXCUDADEVS)
 			{
 			{
@@ -319,14 +335,6 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 					ncuda, STARPU_MAXCUDADEVS);
 					ncuda, STARPU_MAXCUDADEVS);
 				ncuda = STARPU_MAXCUDADEVS;
 				ncuda = STARPU_MAXCUDADEVS;
 			}
 			}
-
-			if ((unsigned) ncuda > _starpu_get_cuda_device_count())
-			{
-				fprintf(stderr,
-					"# Warning: %d CUDA devices requested. Only %d available.\n",
-					ncuda, _starpu_get_cuda_device_count());
-				ncuda = _starpu_get_cuda_device_count();
-			}
 		}
 		}
 	}
 	}
 
 
@@ -369,12 +377,19 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
  		 * early to count the number of devices */
  		 * early to count the number of devices */
 		_starpu_opencl_init();
 		_starpu_opencl_init();
 		int nb_devices;
 		int nb_devices;
-		nb_devices = STARPU_MIN(_starpu_opencl_get_device_count(), STARPU_MAXOPENCLDEVS);
+		nb_devices = _starpu_opencl_get_device_count();
 
 
 		if (nopencl == -1)
 		if (nopencl == -1)
 		{
 		{
 			/* Nothing was specified, so let's choose ! */
 			/* Nothing was specified, so let's choose ! */
 			nopencl = nb_devices;
 			nopencl = nb_devices;
+			if (nopencl > STARPU_MAXOPENCLDEVS)
+			{
+				fprintf(stderr,
+					"# Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldadev=xxx to update the maximum value of supported OpenCL devices.\n",
+					nb_devices, STARPU_MAXOPENCLDEVS);
+				nopencl = STARPU_MAXOPENCLDEVS;
+			}
 		}
 		}
 		else
 		else
 		{
 		{
@@ -385,8 +400,9 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 				fprintf(stderr,
 				fprintf(stderr,
 					"# Warning: %d OpenCL devices requested. Only %d available.\n",
 					"# Warning: %d OpenCL devices requested. Only %d available.\n",
 					nopencl, nb_devices);
 					nopencl, nb_devices);
-					topology->nopenclgpus = nb_devices;
+				nopencl = nb_devices;
 			}
 			}
+			/* Let's make sure this value is OK. */
 			if (nopencl > STARPU_MAXOPENCLDEVS)
 			if (nopencl > STARPU_MAXOPENCLDEVS)
 			{
 			{
 				fprintf(stderr,
 				fprintf(stderr,

+ 5 - 1
src/core/workers.c

@@ -169,7 +169,11 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 	}
 	}
 	else
 	else
 	{
 	{
-		if ((cl->type == STARPU_SPMD) || (cl->type == STARPU_FORKJOIN))
+		if ((cl->type == STARPU_SPMD)
+#ifdef STARPU_HAVE_HWLOC
+				|| (cl->type == STARPU_FORKJOIN)
+#endif
+				)
 		{
 		{
 			/* TODO we should add other types of constraints */
 			/* TODO we should add other types of constraints */
 
 

+ 3 - 1
src/datawizard/coherency.c

@@ -443,8 +443,10 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
 
 		if (hop != nhops - 1)
 		if (hop != nhops - 1)
 		{
 		{
-			if (!reused_requests[hop + 1])
+			if (!reused_requests[hop + 1]) {
 				r->next_req[r->next_req_count++] = requests[hop + 1];
 				r->next_req[r->next_req_count++] = requests[hop + 1];
+				STARPU_ASSERT(r->next_req_count <= STARPU_MAXNODES);
+			}
 		}
 		}
 		else
 		else
 			_starpu_data_request_append_callback(r, callback_func, callback_arg);
 			_starpu_data_request_append_callback(r, callback_func, callback_arg);

+ 6 - 0
src/datawizard/filters.c

@@ -382,6 +382,12 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 			still_valid[node]?newstate:STARPU_INVALID;
 			still_valid[node]?newstate:STARPU_INVALID;
 	}
 	}
 
 
+	for (child = 0; child < root_handle->nchildren; child++)
+	{
+		struct _starpu_data_state *child_handle = &root_handle->children[child];
+		_starpu_spin_unlock(&child_handle->header_lock);
+	}
+
 	/* there is no child anymore */
 	/* there is no child anymore */
 	free(root_handle->children);
 	free(root_handle->children);
 	root_handle->children = NULL;
 	root_handle->children = NULL;

+ 3 - 5
src/datawizard/interfaces/block_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -630,8 +630,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 		else
 		else
 		{
 		{
 			/* Are all plans contiguous */
 			/* Are all plans contiguous */
-                        /* XXX non contiguous buffers are not properly supported yet. (TODO) */
-                        STARPU_ASSERT(0);
+                        STARPU_ASSERT_MSG(0, "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
                 }
                 }
         }
         }
 	else
 	else
@@ -697,8 +696,7 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
                 else
                 else
 		{
 		{
 			/* Are all plans contiguous */
 			/* Are all plans contiguous */
-                        /* XXX non contiguous buffers are not properly supported yet. (TODO) */
-                        STARPU_ASSERT(0);
+                        STARPU_ASSERT_MSG(0, "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
                 }
                 }
         }
         }
 	else
 	else

+ 3 - 5
src/datawizard/interfaces/matrix_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -605,8 +605,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 	struct starpu_matrix_interface *dst_matrix = dst_interface;
 	struct starpu_matrix_interface *dst_matrix = dst_interface;
         int err,ret;
         int err,ret;
 
 
-	/* XXX non contiguous matrices are not supported with OpenCL yet ! (TODO) */
-	STARPU_ASSERT((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx));
+	STARPU_ASSERT_MSG((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx), "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
 
 
 	err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_matrix->ptr, src_node, (cl_mem)dst_matrix->dev_handle, dst_node,
 	err = starpu_opencl_copy_ram_to_opencl_async_sync((void*)src_matrix->ptr, src_node, (cl_mem)dst_matrix->dev_handle, dst_node,
                                                            src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
                                                            src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
@@ -625,8 +624,7 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 	struct starpu_matrix_interface *dst_matrix = dst_interface;
 	struct starpu_matrix_interface *dst_matrix = dst_interface;
         int err, ret;
         int err, ret;
 
 
-	/* XXX non contiguous matrices are not supported with OpenCL yet ! (TODO) */
-	STARPU_ASSERT((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx));
+	STARPU_ASSERT_MSG((src_matrix->ld == src_matrix->nx) && (dst_matrix->ld == dst_matrix->nx), "XXX non contiguous buffers are not properly supported in OpenCL yet. (TODO)");
 
 
         err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_matrix->dev_handle, src_node, (void*)dst_matrix->ptr, dst_node,
         err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_matrix->dev_handle, src_node, (void*)dst_matrix->ptr, dst_node,
                                                            src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
                                                            src_matrix->nx*src_matrix->ny*src_matrix->elemsize,

+ 3 - 2
src/datawizard/interfaces/multiformat_interface.c

@@ -267,7 +267,7 @@ static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
 #endif
 #endif
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 		case STARPU_OPENCL_RAM:
 		case STARPU_OPENCL_RAM:
-			/* TODO */
+			STARPU_ASSERT_MSG(0, "XXX multiformat not supported on OpenCL yet (TODO)");
 			break;
 			break;
 #endif
 #endif
 		default:
 		default:
@@ -711,7 +711,8 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
 	(void) dst_interface;
 	(void) dst_interface;
 	(void) src_node;
 	(void) src_node;
 	(void) dst_node;
 	(void) dst_node;
-/* TODO */
+
+	STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
 	return 0;
 	return 0;
 }
 }
 #endif
 #endif

+ 1 - 0
src/datawizard/user_interactions.c

@@ -282,6 +282,7 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 		while (!wrapper.finished)
 		while (!wrapper.finished)
 			_STARPU_PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock);
 			_STARPU_PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper.lock);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper.lock);
+		_STARPU_PTHREAD_MUTEX_DESTROY(&wrapper.lock);
 	}
 	}
 
 
 	/* At that moment, the caller holds a reference to the piece of data.
 	/* At that moment, the caller holds a reference to the piece of data.

+ 6 - 3
src/drivers/cpu/driver_cpu.c

@@ -20,6 +20,7 @@
 
 
 #include <math.h>
 #include <math.h>
 #include <starpu.h>
 #include <starpu.h>
+#include <starpu_profiling.h>
 #include <drivers/driver_common/driver_common.h>
 #include <drivers/driver_common/driver_common.h>
 #include <common/utils.h>
 #include <common/utils.h>
 #include <core/debug.h>
 #include <core/debug.h>
@@ -30,6 +31,7 @@
 static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_args, int is_parallel_task, int rank, enum starpu_perf_archtype perf_arch)
 static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_args, int is_parallel_task, int rank, enum starpu_perf_archtype perf_arch)
 {
 {
 	int ret;
 	int ret;
+	int profiling = starpu_profiling_status_get();
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start, codelet_end;
 
 
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
@@ -51,7 +53,8 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 	if (is_parallel_task)
 	if (is_parallel_task)
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier);
 
 
-	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank);
+	/* Give profiling variable */
+	_starpu_driver_start_job(cpu_args, j, &codelet_start, rank, profiling);
 
 
 	/* In case this is a Fork-join parallel task, the worker does not
 	/* In case this is a Fork-join parallel task, the worker does not
 	 * execute the kernel at all. */
 	 * execute the kernel at all. */
@@ -68,7 +71,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid);
 			_starpu_bind_thread_on_cpu(cpu_args->config, cpu_args->bindid);
 	}
 	}
 
 
-	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank);
+	_starpu_driver_end_job(cpu_args, j, perf_arch, &codelet_end, rank, profiling);
 
 
 	if (is_parallel_task)
 	if (is_parallel_task)
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
 		_STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier);
@@ -76,7 +79,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 	if (rank == 0)
 	if (rank == 0)
 	{
 	{
 		_starpu_driver_update_job_feedback(j, cpu_args,
 		_starpu_driver_update_job_feedback(j, cpu_args,
-				perf_arch, &codelet_start, &codelet_end);
+				perf_arch, &codelet_start, &codelet_end, profiling);
 		_starpu_push_task_output(j, 0);
 		_starpu_push_task_output(j, 0);
 	}
 	}
 
 

+ 5 - 3
src/drivers/cuda/driver_cuda.c

@@ -20,6 +20,7 @@
 
 
 #include <starpu.h>
 #include <starpu.h>
 #include <starpu_cuda.h>
 #include <starpu_cuda.h>
+#include <starpu_profiling.h>
 #include <common/utils.h>
 #include <common/utils.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <core/debug.h>
 #include <core/debug.h>
@@ -198,6 +199,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
 
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start, codelet_end;
 
 
+	int profiling = starpu_profiling_status_get();
 	unsigned calibrate_model = 0;
 	unsigned calibrate_model = 0;
 
 
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
@@ -223,7 +225,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 			STARPU_CUDA_REPORT_ERROR(cures);
 			STARPU_CUDA_REPORT_ERROR(cures);
 	}
 	}
 
 
-	_starpu_driver_start_job(args, j, &codelet_start, 0);
+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
 
 
 #ifdef HAVE_CUDA_MEMCPY_PEER
 #ifdef HAVE_CUDA_MEMCPY_PEER
 	/* We make sure we do manipulate the proper device */
 	/* We make sure we do manipulate the proper device */
@@ -236,9 +238,9 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 	STARPU_ASSERT(func);
 	STARPU_ASSERT(func);
 	func(task->interfaces, task->cl_arg);
 	func(task->interfaces, task->cl_arg);
 
 
-	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 
 
-	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end);
+	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end, profiling);
 
 
 	_starpu_push_task_output(j, mask);
 	_starpu_push_task_output(j, mask);
 
 

+ 6 - 9
src/drivers/driver_common/driver_common.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
@@ -27,12 +27,11 @@
 #include <core/sched_policy.h>
 #include <core/sched_policy.h>
 #include <top/starpu_top_core.h>
 #include <top/starpu_top_core.h>
 
 
-void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank)
+void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct timespec *codelet_start, int rank, int profiling)
 {
 {
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_task_profiling_info *profiling_info;
 	struct starpu_task_profiling_info *profiling_info;
-	int profiling = starpu_profiling_status_get();
 	int starpu_top=_starpu_top_status_get();
 	int starpu_top=_starpu_top_status_get();
 	int workerid = args->workerid;
 	int workerid = args->workerid;
 	unsigned calibrate_model = 0;
 	unsigned calibrate_model = 0;
@@ -65,12 +64,11 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 	_STARPU_TRACE_START_CODELET_BODY(j);
 	_STARPU_TRACE_START_CODELET_BODY(j);
 }
 }
 
 
-void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank)
+void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch STARPU_ATTRIBUTE_UNUSED, struct timespec *codelet_end, int rank, int profiling)
 {
 {
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_codelet *cl = task->cl;
 	struct starpu_task_profiling_info *profiling_info = task->profiling_info;
 	struct starpu_task_profiling_info *profiling_info = task->profiling_info;
-	int profiling = starpu_profiling_status_get();
 	int starpu_top=_starpu_top_status_get();
 	int starpu_top=_starpu_top_status_get();
 	int workerid = args->workerid;
 	int workerid = args->workerid;
 	unsigned calibrate_model = 0;
 	unsigned calibrate_model = 0;
@@ -93,7 +91,7 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 }
 }
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 					enum starpu_perf_archtype perf_arch,
 					enum starpu_perf_archtype perf_arch,
-					struct timespec *codelet_start, struct timespec *codelet_end)
+					struct timespec *codelet_start, struct timespec *codelet_end, int profiling)
 {
 {
 	struct starpu_task_profiling_info *profiling_info = j->task->profiling_info;
 	struct starpu_task_profiling_info *profiling_info = j->task->profiling_info;
 	struct timespec measured_ts;
 	struct timespec measured_ts;
@@ -101,13 +99,12 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	int workerid = worker_args->workerid;
 	int workerid = worker_args->workerid;
 	struct starpu_codelet *cl = j->task->cl;
 	struct starpu_codelet *cl = j->task->cl;
 	int calibrate_model = 0;
 	int calibrate_model = 0;
-	int profiling = starpu_profiling_status_get();
 	int updated = 0;
 	int updated = 0;
 
 
-	if (cl->model && _starpu_get_calibrate_flag())
+	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 		calibrate_model = 1;
 
 
-	if (profiling_info || calibrate_model)
+	if ((profiling && profiling_info) || calibrate_model)
 	{
 	{
 		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
 		starpu_timespec_sub(codelet_end, codelet_start, &measured_ts);
 		measured = starpu_timing_timespec_to_us(&measured_ts);
 		measured = starpu_timing_timespec_to_us(&measured_ts);

+ 4 - 4
src/drivers/driver_common/driver_common.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,12 +24,12 @@
 #include <common/utils.h>
 #include <common/utils.h>
 
 
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
 void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j,
-			      struct timespec *codelet_start, int rank);
+			      struct timespec *codelet_start, int rank, int profiling);
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch,
 void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, enum starpu_perf_archtype perf_arch,
-			    struct timespec *codelet_end, int rank);
+			    struct timespec *codelet_end, int rank, int profiling);
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args,
 					enum starpu_perf_archtype perf_arch,
 					enum starpu_perf_archtype perf_arch,
-					struct timespec *codelet_start, struct timespec *codelet_end);
+					struct timespec *codelet_start, struct timespec *codelet_end, int profiling);
 
 
 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
 
 

+ 5 - 3
src/drivers/opencl/driver_opencl.c

@@ -20,6 +20,7 @@
 
 
 #include <math.h>
 #include <math.h>
 #include <starpu.h>
 #include <starpu.h>
+#include <starpu_profiling.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/utils.h>
 #include <core/debug.h>
 #include <core/debug.h>
@@ -579,6 +580,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 	STARPU_ASSERT(j);
 	STARPU_ASSERT(j);
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
 
 
+	int profiling = starpu_profiling_status_get();
 	struct timespec codelet_start, codelet_end;
 	struct timespec codelet_start, codelet_end;
 
 
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
@@ -594,16 +596,16 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 		return -EAGAIN;
 		return -EAGAIN;
 	}
 	}
 
 
-	_starpu_driver_start_job(args, j, &codelet_start, 0);
+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
 
 
 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
 	starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl);
 	STARPU_ASSERT(func);
 	STARPU_ASSERT(func);
 	func(task->interfaces, task->cl_arg);
 	func(task->interfaces, task->cl_arg);
 
 
-	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0);
+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 
 
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
 	_starpu_driver_update_job_feedback(j, args, args->perf_arch,
-							&codelet_start, &codelet_end);
+					   &codelet_start, &codelet_end, profiling);
 
 
 	_starpu_push_task_output(j, mask);
 	_starpu_push_task_output(j, mask);
 
 

+ 5 - 1
src/sched_policies/heft.c

@@ -307,11 +307,15 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 		for (nimpl = 0; nimpl <STARPU_MAXIMPLEMENTATIONS; nimpl++) 
 		for (nimpl = 0; nimpl <STARPU_MAXIMPLEMENTATIONS; nimpl++) 
 		{
 		{
 			/* Sometimes workers didn't take the tasks as early as we expected */
 			/* Sometimes workers didn't take the tasks as early as we expected */
+			pthread_mutex_t *sched_mutex;
+			pthread_cond_t *sched_cond;
+			starpu_worker_get_sched_condition(sched_ctx_id, workerid, &sched_mutex, &sched_cond);
+			_STARPU_PTHREAD_MUTEX_LOCK(&sched_mutex[worker]);
 			exp_start[worker] = STARPU_MAX(exp_start[worker], starpu_timing_now());
 			exp_start[worker] = STARPU_MAX(exp_start[worker], starpu_timing_now());
 			exp_end[worker_ctx][nimpl] = exp_start[worker] + exp_len[worker];
 			exp_end[worker_ctx][nimpl] = exp_start[worker] + exp_len[worker];
 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
  				max_exp_end = exp_end[worker_ctx][nimpl];
  				max_exp_end = exp_end[worker_ctx][nimpl];
-			
+			_STARPU_PTHREAD_MUTEX_UNLOCK(&sched_mutex[worker]);
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
 			{
 			{
 				/* no one on that queue may execute this task */
 				/* no one on that queue may execute this task */

+ 7 - 9
src/sched_policies/parallel_heft.c

@@ -34,8 +34,6 @@
 #define DBL_MAX __DBL_MAX__
 #define DBL_MAX __DBL_MAX__
 #endif
 #endif
 
 
-static pthread_mutex_t big_lock;
-
 static unsigned nworkers, ncombinedworkers;
 static unsigned nworkers, ncombinedworkers;
 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
 //static unsigned napplicable_perf_archtypes = 0;
 //static unsigned napplicable_perf_archtypes = 0;
@@ -93,18 +91,18 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
 
 	int ret = 0;
 	int ret = 0;
 
 
-	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
-
 	if (is_basic_worker)
 	if (is_basic_worker)
 	{
 	{
 		task->predicted = exp_end_predicted - worker_exp_end[best_workerid];
 		task->predicted = exp_end_predicted - worker_exp_end[best_workerid];
 		/* TODO */
 		/* TODO */
 		task->predicted_transfer = 0;
 		task->predicted_transfer = 0;
+		_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx->sched_mutex[best_workerid]);
 		worker_exp_len[best_workerid] += task->predicted;
 		worker_exp_len[best_workerid] += task->predicted;
 		worker_exp_end[best_workerid] = exp_end_predicted;
 		worker_exp_end[best_workerid] = exp_end_predicted;
 		worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid];
 		worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid];
 
 
 		ntasks[best_workerid]++;
 		ntasks[best_workerid]++;
+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx->sched_mutex[best_workerid]);
 
 
 		ret = starpu_push_local_task(best_workerid, task, prio);
 		ret = starpu_push_local_task(best_workerid, task, prio);
 	}
 	}
@@ -135,19 +133,19 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 			/* TODO */
 			/* TODO */
 			alias->predicted_transfer = 0;
 			alias->predicted_transfer = 0;
 
 
+			_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx->sched_mutex[local_worker]);
 			worker_exp_len[local_worker] += alias->predicted;
 			worker_exp_len[local_worker] += alias->predicted;
 			worker_exp_end[local_worker] = exp_end_predicted;
 			worker_exp_end[local_worker] = exp_end_predicted;
 			worker_exp_start[local_worker] = exp_end_predicted - worker_exp_len[local_worker];
 			worker_exp_start[local_worker] = exp_end_predicted - worker_exp_len[local_worker];
 
 
 			ntasks[local_worker]++;
 			ntasks[local_worker]++;
+			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx->sched_mutex[local_worker]);
 
 
 			ret |= starpu_push_local_task(local_worker, alias, prio);
 			ret |= starpu_push_local_task(local_worker, alias, prio);
 		}
 		}
 
 
 	}
 	}
 
 
-	_STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock);
-
 	return ret;
 	return ret;
 }
 }
 
 
@@ -245,10 +243,12 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 	{
 	{
 		worker = sched_ctx->workerids[worker_ctx];
 		worker = sched_ctx->workerids[worker_ctx];
 		/* Sometimes workers didn't take the tasks as early as we expected */
 		/* Sometimes workers didn't take the tasks as early as we expected */
+		_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx->sched_mutex[worker]);
 		worker_exp_start[worker] = STARPU_MAX(worker_exp_start[worker], starpu_timing_now());
 		worker_exp_start[worker] = STARPU_MAX(worker_exp_start[worker], starpu_timing_now());
 		worker_exp_end[worker] = worker_exp_start[worker] + worker_exp_len[worker];
 		worker_exp_end[worker] = worker_exp_start[worker] + worker_exp_len[worker];
 		if (worker_exp_end[worker] > max_exp_end)
 		if (worker_exp_end[worker] > max_exp_end)
 			max_exp_end = worker_exp_end[worker];
 			max_exp_end = worker_exp_end[worker];
+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx->sched_mutex[worker]);
 	}
 	}
 
 
 	unsigned nimpl;
 	unsigned nimpl;
@@ -325,8 +325,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 		} //end for
 		} //end for
 	}
 	}
 
 
-	if (unknown)
-	{
+	if (unknown) {
 		forced_best = ntasks_best;
 		forced_best = ntasks_best;
 		forced_best_ctx = ntasks_best_ctx;
 		forced_best_ctx = ntasks_best_ctx;
 		forced_nimpl = nimpl_best;
 		forced_nimpl = nimpl_best;
@@ -483,7 +482,6 @@ static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 		_STARPU_PTHREAD_COND_INIT(sched_ctx->sched_cond[workerid], NULL);
 		_STARPU_PTHREAD_COND_INIT(sched_ctx->sched_cond[workerid], NULL);
 	}
 	}
 
 
-	_STARPU_PTHREAD_MUTEX_INIT(&big_lock, NULL);
 
 
 	/* We pre-compute an array of all the perfmodel archs that are applicable */
 	/* We pre-compute an array of all the perfmodel archs that are applicable */
 	unsigned total_worker_count = nworkers_ctx + ncombinedworkers;
 	unsigned total_worker_count = nworkers_ctx + ncombinedworkers;

+ 20 - 11
src/util/malloc.c

@@ -24,7 +24,7 @@
 #include <starpu_cuda.h>
 #include <starpu_cuda.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
 
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
+#if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
 struct malloc_pinned_codelet_struct
 struct malloc_pinned_codelet_struct
 {
 {
 	void **ptr;
 	void **ptr;
@@ -41,7 +41,7 @@ struct malloc_pinned_codelet_struct
 //}
 //}
 //#endif
 //#endif
 
 
-#ifdef STARPU_USE_CUDA
+#if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)
 static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 {
 	struct malloc_pinned_codelet_struct *s = arg;
 	struct malloc_pinned_codelet_struct *s = arg;
@@ -53,7 +53,7 @@ static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED,
 }
 }
 #endif
 #endif
 
 
-#if defined(STARPU_USE_CUDA)// || defined(STARPU_USE_OPENCL)
+#if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
 static struct starpu_perfmodel malloc_pinned_model =
 static struct starpu_perfmodel malloc_pinned_model =
 {
 {
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
@@ -81,12 +81,14 @@ int starpu_malloc(void **A, size_t dim)
 	if (_starpu_can_submit_cuda_task())
 	if (_starpu_can_submit_cuda_task())
 	{
 	{
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
+#ifdef HAVE_CUDA_MEMCPY_PEER
+		cudaError_t cures;
+		cures = cudaHostAlloc(A, dim, cudaHostAllocPortable);
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
+#else
 		int push_res;
 		int push_res;
 
 
-#ifdef STARPU_DEVEL
-#warning TODO: CUDA4 is able to directly allocate from any thread without having to launch a task
-#endif
-
 		struct malloc_pinned_codelet_struct s =
 		struct malloc_pinned_codelet_struct s =
 		{
 		{
 			.ptr = A,
 			.ptr = A,
@@ -106,6 +108,7 @@ int starpu_malloc(void **A, size_t dim)
 		push_res = starpu_task_submit(task);
 		push_res = starpu_task_submit(task);
 		STARPU_ASSERT(push_res != -ENODEV);
 		STARPU_ASSERT(push_res != -ENODEV);
 #endif
 #endif
+#endif
 	}
 	}
 //	else if (_starpu_can_submit_opencl_task())
 //	else if (_starpu_can_submit_opencl_task())
 //	{
 //	{
@@ -142,7 +145,7 @@ int starpu_malloc(void **A, size_t dim)
 	return 0;
 	return 0;
 }
 }
 
 
-#ifdef STARPU_USE_CUDA
+#if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)
 static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 {
 {
 	cudaError_t cures;
 	cudaError_t cures;
@@ -161,7 +164,7 @@ static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, vo
 //}
 //}
 //#endif
 //#endif
 
 
-#if defined(STARPU_USE_CUDA) // || defined(STARPU_USE_OPENCL)
+#if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)) // || defined(STARPU_USE_OPENCL)
 static struct starpu_perfmodel free_pinned_model =
 static struct starpu_perfmodel free_pinned_model =
 {
 {
 	.type = STARPU_HISTORY_BASED,
 	.type = STARPU_HISTORY_BASED,
@@ -185,16 +188,21 @@ int starpu_free(void *A)
 		return -EDEADLK;
 		return -EDEADLK;
 
 
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
+	if (_starpu_can_submit_cuda_task())
+	{
+#ifndef HAVE_CUDA_MEMCPY_PEER
 	if (!_starpu_is_initialized())
 	if (!_starpu_is_initialized())
 	{
 	{
+#endif
 		/* This is especially useful when starpu_free is called from
 		/* This is especially useful when starpu_free is called from
  		 * the GCC-plugin. starpu_shutdown will probably have already
  		 * the GCC-plugin. starpu_shutdown will probably have already
 		 * been called, so we will not be able to submit a task. */
 		 * been called, so we will not be able to submit a task. */
 		cudaError_t err = cudaFreeHost(A);
 		cudaError_t err = cudaFreeHost(A);
 		if (STARPU_UNLIKELY(err))
 		if (STARPU_UNLIKELY(err))
 			STARPU_CUDA_REPORT_ERROR(err);
 			STARPU_CUDA_REPORT_ERROR(err);
+#ifndef HAVE_CUDA_MEMCPY_PEER
 	}
 	}
-	else if (_starpu_can_submit_cuda_task())
+	else
 	{
 	{
 		int push_res;
 		int push_res;
 
 
@@ -211,6 +219,7 @@ int starpu_free(void *A)
 		push_res = starpu_task_submit(task);
 		push_res = starpu_task_submit(task);
 		STARPU_ASSERT(push_res != -ENODEV);
 		STARPU_ASSERT(push_res != -ENODEV);
 	}
 	}
+#endif
 //	else if (_starpu_can_submit_opencl_task())
 //	else if (_starpu_can_submit_opencl_task())
 //	{
 //	{
 //#ifdef STARPU_USE_OPENCL
 //#ifdef STARPU_USE_OPENCL
@@ -230,7 +239,7 @@ int starpu_free(void *A)
 //		STARPU_ASSERT(push_res != -ENODEV);
 //		STARPU_ASSERT(push_res != -ENODEV);
 //#endif
 //#endif
 //	}
 //	}
-	else
+	} else
 #endif
 #endif
 	{
 	{
 		free(A);
 		free(A);

+ 8 - 1
src/util/starpu_insert_task.c

@@ -78,5 +78,12 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 
 
 	va_start(varg_list, cl);
 	va_start(varg_list, cl);
         struct starpu_task *task = starpu_task_create();
         struct starpu_task *task = starpu_task_create();
-        return _starpu_insert_task_create_and_submit(arg_buffer, cl, &task, varg_list);
+	int ret = _starpu_insert_task_create_and_submit(arg_buffer, cl, &task, varg_list);
+
+	if (ret == -ENODEV)
+	{
+		task->destroy = 0;
+		starpu_task_destroy(task);
+	}
+        return ret;
 }
 }

+ 2 - 2
tests/main/subgraph_repeat.c

@@ -62,19 +62,19 @@ static struct starpu_codelet dummy_codelet =
 
 
 static void callback_task_D(void *arg __attribute__((unused)))
 static void callback_task_D(void *arg __attribute__((unused)))
 {
 {
+	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	loop_cnt++;
 	loop_cnt++;
 
 
 	if (loop_cnt == niter)
 	if (loop_cnt == niter)
 	{
 	{
 		/* We are done */
 		/* We are done */
-		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 		_STARPU_PTHREAD_COND_SIGNAL(&cond);
 		_STARPU_PTHREAD_COND_SIGNAL(&cond);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 	}
 	}
 	else
 	else
 	{
 	{
 		int ret;
 		int ret;
-
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		/* Let's go for another iteration */
 		/* Let's go for another iteration */
 		ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		ret = starpu_task_submit(&taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		ret = starpu_task_submit(&taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 4 - 0
tests/overlap/overlap.c

@@ -25,7 +25,11 @@
 #include <pthread.h>
 #include <pthread.h>
 #include "../helper.h"
 #include "../helper.h"
 
 
+#ifdef STARPU_SLOW_MACHINE
+#define NTASKS	1000
+#else
 #define NTASKS	10000
 #define NTASKS	10000
+#endif
 #define VECTORSIZE	1024
 #define VECTORSIZE	1024
 #define TASKDURATION	24U
 #define TASKDURATION	24U
 
 

+ 0 - 37
tools/dev/check_register.sh

@@ -1,37 +0,0 @@
-#!/bin/bash
-
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2011  Centre National de la Recherche Scientifique
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-
-stcolor=$(tput sgr0)
-datacolor=$(tput setaf 2)
-filecolor=$(tput setaf 1)
-
-process_file()
-{
-    datas=$(grep "data_register(" $f| awk -F',' '{print $1}' | awk -F'(' '{print $2}' | tr -d '&' | sed 's/\[/\\\[/g' | sed 's/\]/\\\]/g' | sed 's/\*/\\\*/g')
-    for data in $datas ; do
-	x=$(grep "data_unregister($data" $1)
-	if test "$x" == "" ; then
-	    x=$(grep "data_unregister_no_coherency($data" $1)
-	    if test "$x" == "" ; then
-		echo "Error. File <${filecolor}$1${stcolor}>. Handle <${datacolor}$data${stcolor}> is not unregistered"
-	    fi
-	fi
-    done
-}
-
-for f in $(find tests -type f -not -path "*svn*") ; do process_file $f ; done
-for f in $(find examples -type f -not -path "*svn*") ; do process_file $f ; done

+ 3 - 1
tools/dev/experimental/cuda_check_return_values.cocci

@@ -51,7 +51,9 @@ E@p = cuda_func(...);
 
 
 
 
 @initialize:python depends on report || org@
 @initialize:python depends on report || org@
+from re import sub
 msg = "Ignoring the return value of %s."
 msg = "Ignoring the return value of %s."
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @no_assignment@
 @no_assignment@
 identifier cuda_func =~ "^cuda";
 identifier cuda_func =~ "^cuda";
@@ -70,7 +72,7 @@ position no_assignment.p;
 p << no_assignment.p;
 p << no_assignment.p;
 func << no_assignment.cuda_func;
 func << no_assignment.cuda_func;
 @@
 @@
-coccilib.org.print_todo(p[0], msg % func)
+coccilib.org.print_todo(p[0], orgmsg % func)
 
 
 @depends on no_assignment && patch@
 @depends on no_assignment && patch@
 identifier no_assignment.cuda_func;
 identifier no_assignment.cuda_func;

+ 123 - 0
tools/dev/experimental/destroy_task_on_error.cocci

@@ -0,0 +1,123 @@
+/*
+ * StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * When the submission of a task fails, StarPU cannot destroy the task, even if
+ * the destroy flag is set. So we have to destroy it ourselves while handling
+ * the error.
+ *
+ * TODO: match if statments without braces.
+ */
+
+virtual context
+virtual org
+virtual patch
+virtual report
+
+@initialize:python depends on org || report@
+msg = "Warning: in %s(): "
+msg+= "\"%s\" should probably be destroyed in the body of the if statement"
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
+
+@r@
+local idexpression t;
+identifier err;
+identifier f;
+position p;
+@@
+f(...)
+{
+<+...
+(
+err = starpu_task_submit(t);
+|
+int err = starpu_task_submit(t);
+)
+if@p(
+(
+err == -ENODEV
+|
+err != 0
+|
+STARPU_UNLIKELY(err == -ENODEV)
+|
+STARPU_UNLIKELY(err != 0)
+)
+ )
+{
+... when != starpu_task_destroy(t);
+    when != exit(...);
+    when != STARPU_ASSERT(...);
+    when != return 77;
+}
+...+>
+}
+
+// Context mode.
+@depends on r && context@
+position r.p;
+@@
+*if@p(...) { ... }
+
+// Org mode.
+@script:python depends on r && org@
+p << r.p;
+t << r.t;
+f << r.f;
+@@
+coccilib.org.print_todo(p[0], orgmsg % (f,t))
+
+// Patch mode.
+// XXX: Instead of "..." we could use a statement list (statement list SS). But
+// it does not seem to work with if there is a "return" statement in the body
+// of the if condition.
+// Using "..." makes the patch ugly, but this may be fixed in a future version
+// of spatch.
+@depends on r && patch@
+local idexpression r.t;
+position r.p;
+identifier r.f;
+@@
+f(...)
+{
+<+...
+if@p (...)
+(
+{
+...
++ t->destroy = 0;
++ starpu_task_destroy(t);
+return ...;
+}
+|
+{
+...
++ t->destroy = 0;
++ starpu_task_destroy(t);
+}
+)
+...+>
+}
+
+// Report mode.
+@script:python depends on r && report@
+p << r.p;
+t << r.t;
+f << r.f;
+@@
+coccilib.report.print_report(p[0], msg % (f,t))

+ 65 - 0
tools/dev/experimental/destroy_task_on_error_test.c

@@ -0,0 +1,65 @@
+/*
+ * StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+static void
+good_0(void)
+{
+	struct starpu_task *task;
+	task = starpu_task_create();
+	int ret = starpu_task_submit(task);
+	if (ret == -ENODEV)
+	{
+		fprintf(stderr, "fail\n");
+		starpu_task_destroy(task);
+	}
+}
+
+static void
+bad_0(void)
+{
+	struct starpu_task *task1, *task2;
+
+	task1 = starpu_task_create();
+	int ret = starpu_task_submit(task1);
+	if (ret == -ENODEV)
+	{
+		fprintf(stderr, "Fail\n");
+	}
+
+	task2 = starpu_task_create();
+	ret = starpu_task_submit(task2);
+	if (ret == -ENODEV)
+	{
+		fprintf(stderr, "Fail\n");
+	}
+}
+
+static void
+bad_unlikely(void)
+{
+	struct starpu_task *task;
+
+	task = starpu_task_create();
+
+	int ret = starpu_task_submit(task);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		error();
+		return 1;
+	}
+
+	starpu_task_destroy(task);
+}

+ 5 - 4
tools/dev/experimental/function_call_termination_condition.cocci

@@ -34,6 +34,8 @@ virtual report
 
 
 @initialize:python depends on report || org@
 @initialize:python depends on report || org@
 msg="Function call in the termination condition of a for loop"
 msg="Function call in the termination condition of a for loop"
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @r@
 @r@
 type t;
 type t;
@@ -62,17 +64,16 @@ expression r.E1;
 @script:python depends on r && org@
 @script:python depends on r && org@
 p << r.p;
 p << r.p;
 @@
 @@
-coccilib.org.print_todo(p[0], msg)
+coccilib.org.print_todo(p[0], orgmsg)
 
 
 @depends on r && patch@
 @depends on r && patch@
-type r.t;
 expression r.E1, E2, E3;
 expression r.E1, E2, E3;
 identifier r.it;
 identifier r.it;
 position r.p;
 position r.p;
 @@
 @@
 -for@p(it = E1; it < E3; E2) 
 -for@p(it = E1; it < E3; E2) 
-+t max = E3;
-+for(it = E1; i < max; E2) 
++max = E3;
++for(it = E1; it < max; E2)
 {
 {
 ...
 ...
 }
 }

+ 3 - 1
tools/dev/experimental/name_codelets.cocci

@@ -31,6 +31,8 @@ virtual report
 
 
 @initialize:python depends on org || report@
 @initialize:python depends on org || report@
 msg = "Warning: %s has no attribute name"
 msg = "Warning: %s has no attribute name"
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @found@
 @found@
 identifier cl;
 identifier cl;
@@ -61,7 +63,7 @@ position found.p;
 cl << found.cl;
 cl << found.cl;
 p << found.p;
 p << found.p;
 @@
 @@
-coccilib.org.print_todo(p[0], msg % cl)
+coccilib.org.print_todo(p[0], orgmsg % cl)
 
 
 // Patch mode.
 // Patch mode.
 @script:python stringify depends on found && !named && patch@
 @script:python stringify depends on found && !named && patch@

+ 3 - 1
tools/dev/experimental/not_unlocked_mutex.cocci

@@ -21,6 +21,8 @@ virtual report
 
 
 @initialize:python depends on report || org@
 @initialize:python depends on report || org@
 msg="The mutex \"%s\" is not unlocked when leaving \"%s\""
 msg="The mutex \"%s\" is not unlocked when leaving \"%s\""
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @r@
 @r@
 expression E;
 expression E;
@@ -53,7 +55,7 @@ f << r.func;
 E << r.E;
 E << r.E;
 @@
 @@
 for p in ps:
 for p in ps:
-	coccilib.org.print_todo(p, msg % (E, f))
+	coccilib.org.print_todo(p, orgmsg % (E, f))
 
 
 
 
 @depends on r && patch@
 @depends on r && patch@

+ 1 - 1
tools/dev/experimental/opencl_check_return_values.cocci

@@ -51,5 +51,5 @@ coccilib.report.print_report(p[0], msg)
 p << ignored_return_value.p;
 p << ignored_return_value.p;
 func << ignored_return_value.opencl_func;
 func << ignored_return_value.opencl_func;
 @@
 @@
-msg = "Ignoring the return value of %s." % func
+msg = "Ignoring the return value of =%s=." % func
 coccilib.org.print_todo(p[0], msg)
 coccilib.org.print_todo(p[0], msg)

+ 3 - 1
tools/dev/experimental/skip_valgrind.cocci

@@ -21,6 +21,8 @@ virtual report
 
 
 @initialize:python depends on report || org@
 @initialize:python depends on report || org@
 msg="Should you add STARPU_SKIP_IF_VALGRIND; at the beginning of this function ?"
 msg="Should you add STARPU_SKIP_IF_VALGRIND; at the beginning of this function ?"
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @find_codelet@
 @find_codelet@
 identifier a, b;
 identifier a, b;
@@ -66,7 +68,7 @@ position find_codelet.p;
 @script:python depends on find_codelet && !is_empty_codelet && !is_already_ok && org@
 @script:python depends on find_codelet && !is_empty_codelet && !is_already_ok && org@
 p << find_codelet.p;
 p << find_codelet.p;
 @@
 @@
-coccilib.org.print_todo(p[0], msg)
+coccilib.org.print_todo(p[0], orgmsg)
 
 
 @depends on find_codelet && !is_empty_codelet && !is_already_ok && patch@
 @depends on find_codelet && !is_empty_codelet && !is_already_ok && patch@
 identifier find_codelet.a, find_codelet.b;
 identifier find_codelet.a, find_codelet.b;

+ 3 - 1
tools/dev/experimental/unchecked_starpu_function_calls.cocci

@@ -27,6 +27,8 @@ virtual report
 
 
 @initialize:python depends on report || org@
 @initialize:python depends on report || org@
 msg = "Unchecked call to %s"
 msg = "Unchecked call to %s"
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @unchecked_starpu_func_call@
 @unchecked_starpu_func_call@
 identifier f;
 identifier f;
@@ -59,7 +61,7 @@ f(...)
 f << unchecked_starpu_func_call.starpu_function;
 f << unchecked_starpu_func_call.starpu_function;
 p << unchecked_starpu_func_call.p;
 p << unchecked_starpu_func_call.p;
 @@
 @@
-coccilib.org.print_todo(p[0], msg % f)
+coccilib.org.print_todo(p[0], orgmsg % f)
 
 
 // Patch mode.
 // Patch mode.
 @has_ret depends on unchecked_starpu_func_call@
 @has_ret depends on unchecked_starpu_func_call@

+ 3 - 1
tools/dev/experimental/use_starpu_macros.cocci

@@ -24,6 +24,8 @@ virtual report
 @initialize:python depends on report || org@
 @initialize:python depends on report || org@
 d = { 'abort':'STARPU_ABORT', 'assert':'STARPU_ASSERT'}
 d = { 'abort':'STARPU_ABORT', 'assert':'STARPU_ASSERT'}
 msg = "Please use %s rather than %s."
 msg = "Please use %s rather than %s."
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @r@
 @r@
 identifier f =~ "abort|assert";
 identifier f =~ "abort|assert";
@@ -88,7 +90,7 @@ expression E1, E2;
 p << r.p;
 p << r.p;
 f << r.f;
 f << r.f;
 @@
 @@
-coccilib.org.print_todo(p[0], msg % (d[str(f)], f))
+coccilib.org.print_todo(p[0], orgmsg % (d[str(f)], f))
 
 
 @script:python depends on min && org@
 @script:python depends on min && org@
 p << min.p;
 p << min.p;

+ 4 - 2
tools/dev/experimental/use_starpu_pthread_macros.cocci

@@ -43,6 +43,8 @@ d = {
 'pthread_spin_unlock'     : '_STARPU_PTHREAD_SPIN_UNLOCK'
 'pthread_spin_unlock'     : '_STARPU_PTHREAD_SPIN_UNLOCK'
 }
 }
 msg = "Use %s instead of %s."
 msg = "Use %s instead of %s."
+from re import sub
+orgmsg = sub(r'(%[a-z])', r'=\1=', msg)
 
 
 @r@
 @r@
 identifier f =~ "^pthread_";
 identifier f =~ "^pthread_";
@@ -66,9 +68,9 @@ p << r.p;
 f << r.f;
 f << r.f;
 @@
 @@
 if str(f) in d.keys():
 if str(f) in d.keys():
-	coccilib.org.print_todo(p[0], msg % (d[str(f)], f))
+	coccilib.org.print_todo(p[0], orgmsg % (d[str(f)], f))
 else:
 else:
-	coccilib.org.print_todo(p[0], "Shouldn't %s be wrapped in a macro ?" % str(f))
+	coccilib.org.print_todo(p[0], "Shouldn't =%s= be wrapped in a macro ?" % str(f))
 
 
 
 
 //
 //

tools/dev/check_unrenamed_list_types.sh → tools/dev/internal/check_unrenamed_list_types.sh


tools/dev/rename_internal.sed → tools/dev/internal/rename_internal.sed


tools/dev/rename_internal.sh → tools/dev/internal/rename_internal.sh


+ 1 - 1
tools/dev/mycocci.sh

@@ -121,7 +121,7 @@ do
 		scripts_dir=$OPTARG;
 		scripts_dir=$OPTARG;
 		;;
 		;;
 	t)
 	t)
-		target=$OPTARG;
+		target="$target $OPTARG";
 		;;
 		;;
 	\?)
 	\?)
 		echo "Invalid option -$OPTARG"
 		echo "Invalid option -$OPTARG"

+ 0 - 15
tools/dev/starpu_use_macro.sed

@@ -1,15 +0,0 @@
-# StarPU --- Runtime system for heterogeneous multicore architectures.
-#
-# Copyright (C) 2012 INRIA
-#
-# StarPU is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# StarPU is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-s/^#if STARPU_USE_/#ifdef STARPU_USE_/

+ 1 - 1
tools/dev/starpu_use_macro.sh

@@ -15,4 +15,4 @@
 #
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
 
-find . -type f -name "*.c" -not -path "*svn*" -exec sed -i -f $(dirname $0)/starpu_use_macro.sed {} \;
+find . -type f -name "*.c" -not -path "*svn*" -exec sed -i 's/^#if STARPU_USE_/#ifdef STARPU_USE_/' {} \;

+ 23 - 1
tools/valgrind/starpu.suppr

@@ -2,7 +2,7 @@
    config.running is not racy from starpu_shutdown
    config.running is not racy from starpu_shutdown
    Helgrind:Race
    Helgrind:Race
    fun:starpu_shutdown
    fun:starpu_shutdown
-   fun:main
+   ...
 }
 }
 
 
 {
 {
@@ -25,3 +25,25 @@
    fun:_starpu_msi_cache_miss
    fun:_starpu_msi_cache_miss
    ...
    ...
 }
 }
+
+{
+   known race, but not problematic in practice, see comment in _starpu_tag_clear
+   Helgrind:LockOrder
+   ...
+   fun:_starpu_tag_free
+   fun:_starpu_htbl_clear_tags
+   ...
+   fun:_starpu_tag_clear
+   fun:starpu_shutdown
+   ...
+}
+
+
+{
+   There is actually no race on current_mode, because the mode can not change unexpectedly, until _starpu_notify_data_dependencies() is called further down. Valgrind can not know about such software rwlock.
+   Helgrind:Race
+   fun:_starpu_release_data_on_node
+   fun:_starpu_push_task_output
+   ...
+}
+