浏览代码

Merge remote-tracking branch 'origin/master'

HE Kun 4 年之前
父节点
当前提交
015357bdd6

+ 1 - 1
contrib/ci.inria.fr/job-0-tarball.sh

@@ -22,7 +22,7 @@ export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH
 if test -d build ; then chmod -R 777 build && rm -rf build ; fi
 mkdir build && cd build
 ../configure --enable-build-doc-pdf
-make V=1
+make -j4
 make dist
 cp *gz ..
 cp doc/doxygen/starpu.pdf ..

+ 1 - 2
contrib/ci.inria.fr/job-1-check.sh

@@ -91,8 +91,7 @@ fi
 export STARPU_TIMEOUT_ENV=1800
 export MPIEXEC_TIMEOUT=1800
 
-make
-#make check
+make -j4
 (make -k check || true) 2>&1 | tee  ../check_$$
 make showsuite
 

+ 0 - 2
examples/cg/cg.h

@@ -32,7 +32,6 @@
 #define TYPE	double
 #define GEMV	STARPU_DGEMV
 #define DOT	STARPU_DDOT
-#define GEMV	STARPU_DGEMV
 #define AXPY	STARPU_DAXPY
 #define SCAL	STARPU_DSCAL
 #define cublasdot	cublasDdot
@@ -44,7 +43,6 @@
 #define TYPE	float
 #define GEMV	STARPU_SGEMV
 #define DOT	STARPU_SDOT
-#define GEMV	STARPU_SGEMV
 #define AXPY	STARPU_SAXPY
 #define SCAL	STARPU_SSCAL
 #define cublasdot	cublasSdot

+ 19 - 10
examples/cg/cg_kernels.c

@@ -34,7 +34,7 @@ static const TYPE gm1 = -1.0;
 static int nblocks = 8;
 
 #ifdef STARPU_QUICK_CHECK
-static int i_max = 2;
+static int i_max = 5;
 static int long long n = 2048;
 #elif !defined(STARPU_LONG_CHECK)
 static int long long n = 4096;
@@ -154,7 +154,8 @@ struct starpu_codelet accumulate_variable_cl =
 #endif
 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
 	.nbuffers = 2,
-	.model = &accumulate_variable_model
+	.model = &accumulate_variable_model,
+	.name = "accumulate_variable"
 };
 
 #ifdef STARPU_USE_CUDA
@@ -198,7 +199,8 @@ struct starpu_codelet accumulate_vector_cl =
 #endif
 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
 	.nbuffers = 2,
-	.model = &accumulate_vector_model
+	.model = &accumulate_vector_model,
+	.name = "accumulate_vector"
 };
 
 /*
@@ -242,7 +244,8 @@ struct starpu_codelet bzero_variable_cl =
 #endif
 	.modes = {STARPU_W},
 	.nbuffers = 1,
-	.model = &bzero_variable_model
+	.model = &bzero_variable_model,
+	.name = "bzero_variable"
 };
 
 #ifdef STARPU_USE_CUDA
@@ -283,7 +286,8 @@ struct starpu_codelet bzero_vector_cl =
 #endif
 	.modes = {STARPU_W},
 	.nbuffers = 1,
-	.model = &bzero_vector_model
+	.model = &bzero_vector_model,
+	.name = "bzero_vector"
 };
 
 /*
@@ -343,7 +347,8 @@ static struct starpu_codelet dot_kernel_cl =
 #endif
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 	.nbuffers = 3,
-	.model = &dot_kernel_model
+	.model = &dot_kernel_model,
+	.name = "dot_kernel"
 };
 
 int dot_kernel(HANDLE_TYPE_VECTOR v1,
@@ -427,7 +432,8 @@ static struct starpu_codelet scal_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 1,
-	.model = &scal_kernel_model
+	.model = &scal_kernel_model,
+	.name = "scal_kernel"
 };
 
 /*
@@ -506,7 +512,8 @@ static struct starpu_codelet gemv_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 3,
-	.model = &gemv_kernel_model
+	.model = &gemv_kernel_model,
+	.name = "gemv_kernel"
 };
 
 int gemv_kernel(HANDLE_TYPE_VECTOR v1,
@@ -611,7 +618,8 @@ static struct starpu_codelet scal_axpy_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 2,
-	.model = &scal_axpy_kernel_model
+	.model = &scal_axpy_kernel_model,
+	.name = "scal_axpy_kernel"
 };
 
 int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1,
@@ -690,7 +698,8 @@ static struct starpu_codelet axpy_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 2,
-	.model = &axpy_kernel_model
+	.model = &axpy_kernel_model,
+	.name = "axpy_kernel"
 };
 
 int axpy_kernel(HANDLE_TYPE_VECTOR v1,

+ 25 - 26
include/starpu_task.h

@@ -789,11 +789,34 @@ struct starpu_task
 
 	/**
 	   Optional field, the default value is <c>NULL</c>. This is a
+	   function pointer of prototype <c>void (*f)(void *)</c> which
+	   specifies a possible callback. If this pointer is non-<c>NULL</c>,
+	   the callback function is executed on the host after the execution of
+	   the task. Contrary to starpu_task::callback_func, it is called
+	   before releasing tasks which depend on this task, so those cannot be
+	   already executing. The callback is passed
+	   the value contained in the starpu_task::epilogue_callback_arg field.
+	   No callback is executed if the field is set to <c>NULL</c>.
+	*/
+	void (*epilogue_callback_func)(void *);
+
+	/**
+	   Optional field, the default value is <c>NULL</c>. This is
+	   the pointer passed to the epilogue callback function. This field is
+	   ignored if the field starpu_task::epilogue_callback_func is set to
+	   <c>NULL</c>.
+	*/
+	void *epilogue_callback_arg;
+
+	/**
+	   Optional field, the default value is <c>NULL</c>. This is a
 	   function pointer of prototype <c>void (*f)(void *)</c>
 	   which specifies a possible callback. If this pointer is
 	   non-<c>NULL</c>, the callback function is executed on the
-	   host after the execution of the task. Tasks which depend on
-	   it might already be executing. The callback is passed the
+	   host after the execution of the task. Contrary to
+	   starpu_task::epilogue_callback, it is called after releasing
+	   tasks which depend on this task, so those
+	   might already be executing. The callback is passed the
 	   value contained in the starpu_task::callback_arg field. No
 	   callback is executed if the field is set to <c>NULL</c>.
 
@@ -821,30 +844,6 @@ struct starpu_task
 
 	/**
 	   Optional field, the default value is <c>NULL</c>. This is a
-	   function pointer of prototype <c>void (*f)(void *)</c> which specifies
-	   a possible callback just as starpu_task::callback_func. But this function
-	   is executed before task dependencies release. The callback is passed
-	   the value contained in the starpu_task::epilogue_callback_arg field.
-	   No callback is executed if the field is set to <c>NULL</c>.
-
-	   With starpu_task_insert() and alike this can be specified thanks to
-	   ::STARPU_EPILOGUE_CALLBACK followed by the function pointer.
-	*/
-	void (*epilogue_callback_func)(void *);
-
-	/**
-	   Optional field, the default value is <c>NULL</c>. This is
-	   the pointer passed to the epilogue callback function. This field is
-	   ignored if the field starpu_task::epilogue_callback_func is set to
-	   <c>NULL</c>.
-
-	   With starpu_task_insert() and alike this can be specified thanks to
-	   ::STARPU_EPILOGUE_CALLBACK_ARG followed by the argument.
-	*/
-	void *epilogue_callback_arg;
-
-	/**
-	   Optional field, the default value is <c>NULL</c>. This is a
 	   function pointer of prototype <c>void (*f)(void *)</c>
 	   which specifies a possible callback. If this pointer is
 	   non-<c>NULL</c>, the callback function is executed on the

+ 0 - 1
mpi/examples/Makefile.am

@@ -279,7 +279,6 @@ endif
 if !STARPU_SIMGRID
 if !STARPU_NO_BLAS_LIB
 examplebin_PROGRAMS += cg/cg
-starpu_mpi_EXAMPLES += cg/cg
 
 cg_cg_SOURCES =					\
 	cg/cg.c						\

+ 3 - 0
src/common/utils.h

@@ -179,6 +179,9 @@ struct starpu_codelet;
 /** Returns the symbol associated to that job if any. */
 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl);
 
+/** Returns the name of a codelet, or fallback to the name of the perfmodel. */
+const char *_starpu_codelet_get_name(struct starpu_codelet *cl);
+
 int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
 
 void _starpu_util_init(void);

+ 59 - 60
src/core/perfmodel/perfmodel_history.c

@@ -76,6 +76,62 @@ LIST_TYPE(_starpu_perfmodel,
 )
 static struct _starpu_perfmodel_list registered_models;
 
+static char _starpu_perfmodel_hostname[STR_LONG_LENGTH];
+
+void starpu_perfmodel_initialize(void)
+{
+	/* make sure the performance model directory exists (or create it) */
+	_starpu_create_sampling_directory_if_needed();
+
+	_starpu_perfmodel_list_init(&registered_models);
+
+	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
+	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
+
+	_starpu_gethostname(_starpu_perfmodel_hostname, sizeof(_starpu_perfmodel_hostname));
+}
+
+void _starpu_initialize_registered_performance_models(void)
+{
+	starpu_perfmodel_initialize();
+
+	struct _starpu_machine_config *conf = _starpu_get_machine_config();
+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
+	unsigned nmic = 0;
+	enum starpu_worker_archtype archtype;
+#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
+	unsigned i;
+#endif
+#if STARPU_MAXMICDEVS > 0
+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
+#endif
+	unsigned nmpi = 0;
+#if STARPU_MAXMPIDEVS > 0
+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
+#endif
+
+	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
+	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
+	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
+	current_arch_comb = 0;
+	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
+	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
+
+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
+	{
+		char name[128];
+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
+	}
+}
+
 void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl)
 {
 	int i;
@@ -1145,19 +1201,14 @@ static void get_model_debug_path(struct starpu_perfmodel *model, const char *arc
 {
 	STARPU_ASSERT(path);
 
-	char hostname[STR_LONG_LENGTH];
-	_starpu_gethostname(hostname, sizeof(hostname));
-
-	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, hostname, arch);
+	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, _starpu_perfmodel_hostname, arch);
 }
 
 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen)
 {
-	char hostname[STR_LONG_LENGTH];
-	_starpu_gethostname(hostname, sizeof(hostname));
 	const char *dot = strrchr(symbol, '.');
 
-	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":hostname);
+	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":_starpu_perfmodel_hostname);
 }
 
 #ifndef STARPU_SIMGRID
@@ -1205,7 +1256,7 @@ static void _starpu_dump_registered_models(void)
 	     node != _starpu_perfmodel_list_end(&registered_models);
 	     node  = _starpu_perfmodel_list_next(node))
 	{
-		if (node->model->is_init)
+		if (node->model->is_init && (node->model->type != STARPU_PER_WORKER && node->model->type != STARPU_PER_ARCH && node->model->type != STARPU_COMMON))
 			starpu_save_history_based_model(node->model);
 	}
 
@@ -1213,58 +1264,6 @@ static void _starpu_dump_registered_models(void)
 #endif
 }
 
-void starpu_perfmodel_initialize(void)
-{
-	/* make sure the performance model directory exists (or create it) */
-	_starpu_create_sampling_directory_if_needed();
-
-	_starpu_perfmodel_list_init(&registered_models);
-
-	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
-	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
-}
-
-void _starpu_initialize_registered_performance_models(void)
-{
-	starpu_perfmodel_initialize();
-
-	struct _starpu_machine_config *conf = _starpu_get_machine_config();
-	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
-	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
-	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
-	unsigned nmic = 0;
-	enum starpu_worker_archtype archtype;
-#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
-	unsigned i;
-#endif
-#if STARPU_MAXMICDEVS > 0
-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
-		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
-#endif
-	unsigned nmpi = 0;
-#if STARPU_MAXMPIDEVS > 0
-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
-		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
-#endif
-
-	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
-	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
-	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
-	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
-	current_arch_comb = 0;
-	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
-	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
-
-	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
-	{
-		char name[128];
-		const char *arch = starpu_worker_get_type_as_env_var(archtype);
-		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
-		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
-		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
-	}
-}
-
 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 {
 	if(model->is_init && model->state && model->state->per_arch != NULL)

+ 1 - 8
src/datawizard/coherency.c

@@ -774,14 +774,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _st
 {
         _STARPU_LOG_IN();
 
-	int cpt = 0;
-	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
-	{
-		cpt++;
-		_starpu_datawizard_progress(STARPU_DATAWIZARD_DO_ALLOC);
-	}
-	if (cpt == STARPU_SPIN_MAXTRY)
-		_starpu_spin_lock(&handle->header_lock);
+	_starpu_spin_lock(&handle->header_lock);
 
 	if (mode & STARPU_R && is_prefetch > STARPU_FETCH)
 	{

+ 1 - 0
src/datawizard/copy_driver.c

@@ -274,6 +274,7 @@ void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node
 	if (elapsed > 300)
 	{
 		static int warned = 0;
+		STARPU_HG_DISABLE_CHECKING(warned);
 		if (!warned)
 		{
 			char src_name[16], dst_name[16];

+ 5 - 0
src/datawizard/datawizard.c

@@ -124,6 +124,11 @@ int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push
 
 		return ret;
 	}
+
+	/* processing requests may release some tasks, we cannot be already
+	 * scheduling a task. */
+	STARPU_ASSERT(!worker->state_sched_op_pending);
+
 	if (worker->set)
 		/* Runing one of the workers of a worker set. The reference for
 		 * driving memory is its worker 0 (see registrations in topology.c) */

+ 2 - 0
src/datawizard/memalloc.c

@@ -1068,6 +1068,7 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 	if (reclaim && !force)
 	{
 		static unsigned warned;
+		STARPU_HG_DISABLE_CHECKING(warned);
 		if (!warned)
 		{
 			if (STARPU_ATOMIC_ADD(&warned, 1) == 1)
@@ -1290,6 +1291,7 @@ void starpu_memchunk_tidy(unsigned node)
 		goto out;
 
 	static unsigned warned;
+	STARPU_HG_DISABLE_CHECKING(warned);
 	if (!warned)
 	{
 		if (STARPU_ATOMIC_ADD(&warned, 1) == 1)

+ 2 - 1
src/datawizard/reduction.c

@@ -54,7 +54,7 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 	STARPU_ASSERT(replicate->allocated);
 
 	struct starpu_codelet *init_cl = handle->init_cl;
-	STARPU_ASSERT(init_cl);
+	STARPU_ASSERT_MSG(init_cl, "There is no initialisation codelet for the reduction of the handle %p. Maybe you forget to call starpu_data_set_reduction_methods() ?", handle->root_handle);
 
 	_starpu_cl_func_t init_func = NULL;
 
@@ -289,6 +289,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0) & STARPU_COMMUTE))
 					{
 						static int warned;
+						STARPU_HG_DISABLE_CHECKING(warned);
 						if (!warned)
 						{
 							warned = 1;

+ 1 - 0
src/sched_policies/component_fifo.c

@@ -108,6 +108,7 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
 		{
 			static int warned;
+			STARPU_HG_DISABLE_CHECKING(warned);
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
 			{
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);

+ 1 - 0
src/sched_policies/component_perfmodel_select.c

@@ -46,6 +46,7 @@ static int perfmodel_select_push_task(struct starpu_sched_component * component,
 		if(isnan(length))
 		{
 			static int warned;
+			STARPU_HG_DISABLE_CHECKING(warned);
 			if (!warned)
 			{
 				warned = 1;

+ 1 - 0
src/sched_policies/component_prio.c

@@ -129,6 +129,7 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
 		{
 			static int warned;
+			STARPU_HG_DISABLE_CHECKING(warned);
 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
 			{
 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);

+ 2 - 0
src/sched_policies/component_worker.c

@@ -178,6 +178,8 @@ static struct _starpu_worker_task_list * _starpu_worker_task_list_create(void)
 	/* These are only for statistics */
 	STARPU_HG_DISABLE_CHECKING(l->exp_end);
 	STARPU_HG_DISABLE_CHECKING(l->exp_start);
+	STARPU_HG_DISABLE_CHECKING(l->exp_len);
+	STARPU_HG_DISABLE_CHECKING(l->pipeline_len);
 	STARPU_PTHREAD_MUTEX_INIT(&l->mutex,NULL);
 	return l;
 }

+ 3 - 0
src/sched_policies/fifo_queues.c

@@ -60,6 +60,9 @@ void _starpu_init_fifo(struct _starpu_fifo_taskq *fifo)
 	fifo->exp_end = fifo->exp_start;
 	fifo->exp_len_per_priority = NULL;
 	fifo->pipeline_len = 0.0;
+	STARPU_HG_DISABLE_CHECKING(fifo->exp_start);
+	STARPU_HG_DISABLE_CHECKING(fifo->exp_len);
+	STARPU_HG_DISABLE_CHECKING(fifo->exp_end);
 }
 
 struct _starpu_fifo_taskq *_starpu_create_fifo(void)

+ 1 - 0
src/sched_policies/graph_test_policy.c

@@ -118,6 +118,7 @@ static struct _starpu_prio_deque *select_prio(unsigned sched_ctx_id, struct _sta
 			if (!task->cl || task->cl->model == NULL)
 			{
 				static unsigned _warned;
+				STARPU_HG_DISABLE_CHECKING(_warned);
 				if (STARPU_ATOMIC_ADD(&_warned, 1) == 1)
 				{
 					_STARPU_DISP("Warning: graph_test needs performance models for all tasks, including %s\n",

+ 1 - 0
src/sched_policies/parallel_heft.c

@@ -365,6 +365,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 			if (isnan(local_task_length[worker_ctx][nimpl]))
 			{
 				static int warned;
+				STARPU_HG_DISABLE_CHECKING(warned);
 				if (!warned)
 				{
 					warned = 1;

+ 3 - 0
src/sched_policies/prio_deque.h

@@ -39,6 +39,9 @@ static inline void _starpu_prio_deque_init(struct _starpu_prio_deque *pdeque)
 {
 	memset(pdeque,0,sizeof(*pdeque));
 	starpu_task_prio_list_init(&pdeque->list);
+	STARPU_HG_DISABLE_CHECKING(pdeque->exp_start);
+	STARPU_HG_DISABLE_CHECKING(pdeque->exp_end);
+	STARPU_HG_DISABLE_CHECKING(pdeque->exp_len);
 }
 
 static inline void _starpu_prio_deque_destroy(struct _starpu_prio_deque *pdeque)

+ 13 - 0
src/util/misc.c

@@ -18,6 +18,19 @@
 #include <common/utils.h>
 #include <core/jobs.h>
 
+const char *_starpu_codelet_get_name(struct starpu_codelet *cl)
+{
+	if (!cl)
+		return NULL;
+
+	if (cl->name)
+		return cl->name;
+	else if (cl->model && cl->model->symbol && cl->model->symbol[0])
+		return cl->model->symbol;
+	else
+		return NULL;
+}
+
 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl)
 {
 	if (!cl)

+ 1 - 0
src/util/starpu_clusters_create.c

@@ -736,6 +736,7 @@ void _starpu_cluster(struct _starpu_cluster_group *group)
 
 		if (size > 1)
 		{
+			STARPU_HG_DISABLE_CHECKING(starpu_cluster_warned);
 			if (!starpu_cluster_warned)
 			{
 				_STARPU_DISP("STARPU CLUSTERS: Caution! It seems that you have"

+ 2 - 2
src/util/starpu_task_insert_utils.c

@@ -393,7 +393,7 @@ void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starp
 	{
 		STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == arg_mode,
 				  "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
-				  cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
+				  _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
 				  *current_buffer, arg_mode);
 	}
 	else
@@ -443,7 +443,7 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 		{
 			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == descrs[i].mode,
 					"The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
-					cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
+					_starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
 					*current_buffer, descrs[i].mode);
 		}
 		else

+ 9 - 3
starpupy/src/starpu_task_wrapper.c

@@ -248,8 +248,8 @@ void starpupy_codelet_func(void *buffers[], void *cl_arg)
 	PyGILState_Release(state);
 }
 
-/*function passed to starpu_task.callback_func*/
-void cb_func(void *v)
+/*function passed to starpu_task.epilogue_callback_func*/
+void epilogue_cb_func(void *v)
 {
 	PyObject *fut; /*asyncio.Future*/
 	PyObject *loop; /*asyncio.Eventloop*/
@@ -316,6 +316,11 @@ void cb_func(void *v)
 
 	/*restore previous GIL state*/
 	PyGILState_Release(state);
+}
+
+void cb_func(void *v)
+{
+	struct starpu_task *task = starpu_task_get_current();
 
 	/*deallocate task*/
 	free(task->cl);
@@ -576,7 +581,8 @@ static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args)
 	starpu_codelet_pack_arg_fini(&data, &task->cl_arg, &task->cl_arg_size);
 
 	task->prologue_callback_func=&prologue_cb_func;
-	task->epilogue_callback_func=&cb_func;
+	task->epilogue_callback_func=&epilogue_cb_func;
+	task->callback_func=&cb_func;
 
 	/*call starpu_task_submit method*/
 	int ret;

+ 1 - 1
tools/dev/valgrind/libc.suppr

@@ -59,7 +59,7 @@
    Ignore libc printf races
    Helgrind:Race
    ...
-   fun:vfprintf
+   fun:__vfprintf_internal
    ...
 }
 

+ 18 - 0
tools/dev/valgrind/starpu.suppr

@@ -132,6 +132,15 @@
 }
 
 {
+   mc / handle locking order1
+   Helgrind:LockOrder
+   ...
+   fun:__starpu_spin_lock
+   fun:try_to_throw_mem_chunk
+   ...
+}
+
+{
    mc / handle locking order2
    Helgrind:LockOrder
    ...
@@ -141,6 +150,15 @@
 }
 
 {
+   mc / handle locking order2
+   Helgrind:LockOrder
+   ...
+   fun:__starpu_spin_lock
+   fun:try_to_find_reusable_mc
+   ...
+}
+
+{
    mc / handle locking order3
    Helgrind:LockOrder
    ...