4 年之前 · 015357bdd6
--- a/contrib/ci.inria.fr/job-0-tarball.sh
+++ b/contrib/ci.inria.fr/job-0-tarball.sh
@@ -22,7 +22,7 @@ export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH
 
				 if test -d build ; then chmod -R 777 build && rm -rf build ; fi
			
 
				 mkdir build && cd build
			
 
				 ../configure --enable-build-doc-pdf
			
 
				-make V=1
			
 
				+make -j4
			
 
				 make dist
			
 
				 cp *gz ..
			
 
				 cp doc/doxygen/starpu.pdf ..
			
--- a/contrib/ci.inria.fr/job-1-check.sh
+++ b/contrib/ci.inria.fr/job-1-check.sh
@@ -91,8 +91,7 @@ fi
 
				 export STARPU_TIMEOUT_ENV=1800
			
 
				 export MPIEXEC_TIMEOUT=1800
			
 
				 
			
 
				-make
			
 
				-#make check
			
 
				+make -j4
			
 
				 (make -k check || true) 2>&1 | tee  ../check_$$
			
 
				 make showsuite
			
 
				 
			
--- a/examples/cg/cg.h
+++ b/examples/cg/cg.h
@@ -32,7 +32,6 @@
 
				 #define TYPE	double
			
 
				 #define GEMV	STARPU_DGEMV
			
 
				 #define DOT	STARPU_DDOT
			
 
				-#define GEMV	STARPU_DGEMV
			
 
				 #define AXPY	STARPU_DAXPY
			
 
				 #define SCAL	STARPU_DSCAL
			
 
				 #define cublasdot	cublasDdot
			
@@ -44,7 +43,6 @@
 
				 #define TYPE	float
			
 
				 #define GEMV	STARPU_SGEMV
			
 
				 #define DOT	STARPU_SDOT
			
 
				-#define GEMV	STARPU_SGEMV
			
 
				 #define AXPY	STARPU_SAXPY
			
 
				 #define SCAL	STARPU_SSCAL
			
 
				 #define cublasdot	cublasSdot
			
--- a/examples/cg/cg_kernels.c
+++ b/examples/cg/cg_kernels.c
@@ -34,7 +34,7 @@ static const TYPE gm1 = -1.0;
 
				 static int nblocks = 8;
			
 
				 
			
 
				 #ifdef STARPU_QUICK_CHECK
			
 
				-static int i_max = 2;
			
 
				+static int i_max = 5;
			
 
				 static int long long n = 2048;
			
 
				 #elif !defined(STARPU_LONG_CHECK)
			
 
				 static int long long n = 4096;
			
@@ -154,7 +154,8 @@ struct starpu_codelet accumulate_variable_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &accumulate_variable_model
			
 
				+	.model = &accumulate_variable_model,
			
 
				+	.name = "accumulate_variable"
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -198,7 +199,8 @@ struct starpu_codelet accumulate_vector_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &accumulate_vector_model
			
 
				+	.model = &accumulate_vector_model,
			
 
				+	.name = "accumulate_vector"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -242,7 +244,8 @@ struct starpu_codelet bzero_variable_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_W},
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &bzero_variable_model
			
 
				+	.model = &bzero_variable_model,
			
 
				+	.name = "bzero_variable"
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -283,7 +286,8 @@ struct starpu_codelet bzero_vector_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_W},
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &bzero_vector_model
			
 
				+	.model = &bzero_vector_model,
			
 
				+	.name = "bzero_vector"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -343,7 +347,8 @@ static struct starpu_codelet dot_kernel_cl =
 
				 #endif
			
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 	.nbuffers = 3,
			
 
				-	.model = &dot_kernel_model
			
 
				+	.model = &dot_kernel_model,
			
 
				+	.name = "dot_kernel"
			
 
				 };
			
 
				 
			
 
				 int dot_kernel(HANDLE_TYPE_VECTOR v1,
			
@@ -427,7 +432,8 @@ static struct starpu_codelet scal_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &scal_kernel_model
			
 
				+	.model = &scal_kernel_model,
			
 
				+	.name = "scal_kernel"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -506,7 +512,8 @@ static struct starpu_codelet gemv_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 3,
			
 
				-	.model = &gemv_kernel_model
			
 
				+	.model = &gemv_kernel_model,
			
 
				+	.name = "gemv_kernel"
			
 
				 };
			
 
				 
			
 
				 int gemv_kernel(HANDLE_TYPE_VECTOR v1,
			
@@ -611,7 +618,8 @@ static struct starpu_codelet scal_axpy_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &scal_axpy_kernel_model
			
 
				+	.model = &scal_axpy_kernel_model,
			
 
				+	.name = "scal_axpy_kernel"
			
 
				 };
			
 
				 
			
 
				 int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1,
			
@@ -690,7 +698,8 @@ static struct starpu_codelet axpy_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &axpy_kernel_model
			
 
				+	.model = &axpy_kernel_model,
			
 
				+	.name = "axpy_kernel"
			
 
				 };
			
 
				 
			
 
				 int axpy_kernel(HANDLE_TYPE_VECTOR v1,
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -789,11 +789,34 @@ struct starpu_task
 
				 
			
 
				 	/**
			
 
				 	   Optional field, the default value is <c>NULL</c>. This is a
			
 
				+	   function pointer of prototype <c>void (*f)(void *)</c> which
			
 
				+	   specifies a possible callback. If this pointer is non-<c>NULL</c>,
			
 
				+	   the callback function is executed on the host after the execution of
			
 
				+	   the task. Contrary to starpu_task::callback_func, it is called
			
 
				+	   before releasing tasks which depend on this task, so those cannot be
			
 
				+	   already executing. The callback is passed
			
 
				+	   the value contained in the starpu_task::epilogue_callback_arg field.
			
 
				+	   No callback is executed if the field is set to <c>NULL</c>.
			
 
				+	*/
			
 
				+	void (*epilogue_callback_func)(void *);
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field, the default value is <c>NULL</c>. This is
			
 
				+	   the pointer passed to the epilogue callback function. This field is
			
 
				+	   ignored if the field starpu_task::epilogue_callback_func is set to
			
 
				+	   <c>NULL</c>.
			
 
				+	*/
			
 
				+	void *epilogue_callback_arg;
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field, the default value is <c>NULL</c>. This is a
			
 
				 	   function pointer of prototype <c>void (*f)(void *)</c>
			
 
				 	   which specifies a possible callback. If this pointer is
			
 
				 	   non-<c>NULL</c>, the callback function is executed on the
			
 
				-	   host after the execution of the task. Tasks which depend on
			
 
				-	   it might already be executing. The callback is passed the
			
 
				+	   host after the execution of the task. Contrary to
			
 
				+	   starpu_task::epilogue_callback, it is called after releasing
			
 
				+	   tasks which depend on this task, so those
			
 
				+	   might already be executing. The callback is passed the
			
 
				 	   value contained in the starpu_task::callback_arg field. No
			
 
				 	   callback is executed if the field is set to <c>NULL</c>.
			
 
				 
			
@@ -821,30 +844,6 @@ struct starpu_task
 
				 
			
 
				 	/**
			
 
				 	   Optional field, the default value is <c>NULL</c>. This is a
			
 
				-	   function pointer of prototype <c>void (*f)(void *)</c> which specifies
			
 
				-	   a possible callback just as starpu_task::callback_func. But this function
			
 
				-	   is executed before task dependencies release. The callback is passed
			
 
				-	   the value contained in the starpu_task::epilogue_callback_arg field.
			
 
				-	   No callback is executed if the field is set to <c>NULL</c>.
			
 
				-
			
 
				-	   With starpu_task_insert() and alike this can be specified thanks to
			
 
				-	   ::STARPU_EPILOGUE_CALLBACK followed by the function pointer.
			
 
				-	*/
			
 
				-	void (*epilogue_callback_func)(void *);
			
 
				-
			
 
				-	/**
			
 
				-	   Optional field, the default value is <c>NULL</c>. This is
			
 
				-	   the pointer passed to the epilogue callback function. This field is
			
 
				-	   ignored if the field starpu_task::epilogue_callback_func is set to
			
 
				-	   <c>NULL</c>.
			
 
				-
			
 
				-	   With starpu_task_insert() and alike this can be specified thanks to
			
 
				-	   ::STARPU_EPILOGUE_CALLBACK_ARG followed by the argument.
			
 
				-	*/
			
 
				-	void *epilogue_callback_arg;
			
 
				-
			
 
				-	/**
			
 
				-	   Optional field, the default value is <c>NULL</c>. This is a
			
 
				 	   function pointer of prototype <c>void (*f)(void *)</c>
			
 
				 	   which specifies a possible callback. If this pointer is
			
 
				 	   non-<c>NULL</c>, the callback function is executed on the
			
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -279,7 +279,6 @@ endif
 
				 if !STARPU_SIMGRID
			
 
				 if !STARPU_NO_BLAS_LIB
			
 
				 examplebin_PROGRAMS += cg/cg
			
 
				-starpu_mpi_EXAMPLES += cg/cg
			
 
				 
			
 
				 cg_cg_SOURCES =					\
			
 
				 	cg/cg.c						\
			
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -179,6 +179,9 @@ struct starpu_codelet;
 
				 /** Returns the symbol associated to that job if any. */
			
 
				 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl);
			
 
				 
			
 
				+/** Returns the name of a codelet, or fallback to the name of the perfmodel. */
			
 
				+const char *_starpu_codelet_get_name(struct starpu_codelet *cl);
			
 
				+
			
 
				 int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
			
 
				 
			
 
				 void _starpu_util_init(void);
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -76,6 +76,62 @@ LIST_TYPE(_starpu_perfmodel,
 
				 )
			
 
				 static struct _starpu_perfmodel_list registered_models;
			
 
				 
			
 
				+static char _starpu_perfmodel_hostname[STR_LONG_LENGTH];
			
 
				+
			
 
				+void starpu_perfmodel_initialize(void)
			
 
				+{
			
 
				+	/* make sure the performance model directory exists (or create it) */
			
 
				+	_starpu_create_sampling_directory_if_needed();
			
 
				+
			
 
				+	_starpu_perfmodel_list_init(&registered_models);
			
 
				+
			
 
				+	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
			
 
				+	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				+
			
 
				+	_starpu_gethostname(_starpu_perfmodel_hostname, sizeof(_starpu_perfmodel_hostname));
			
 
				+}
			
 
				+
			
 
				+void _starpu_initialize_registered_performance_models(void)
			
 
				+{
			
 
				+	starpu_perfmodel_initialize();
			
 
				+
			
 
				+	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				+	unsigned nmic = 0;
			
 
				+	enum starpu_worker_archtype archtype;
			
 
				+#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				+	unsigned i;
			
 
				+#endif
			
 
				+#if STARPU_MAXMICDEVS > 0
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				+#endif
			
 
				+	unsigned nmpi = 0;
			
 
				+#if STARPU_MAXMPIDEVS > 0
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				+#endif
			
 
				+
			
 
				+	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
 
				+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				+	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
			
 
				+	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				+	current_arch_comb = 0;
			
 
				+	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				+	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				+
			
 
				+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				+	{
			
 
				+		char name[128];
			
 
				+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl)
			
 
				 {
			
 
				 	int i;
			
@@ -1145,19 +1201,14 @@ static void get_model_debug_path(struct starpu_perfmodel *model, const char *arc
 
				 {
			
 
				 	STARPU_ASSERT(path);
			
 
				 
			
 
				-	char hostname[STR_LONG_LENGTH];
			
 
				-	_starpu_gethostname(hostname, sizeof(hostname));
			
 
				-
			
 
				-	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, hostname, arch);
			
 
				+	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, _starpu_perfmodel_hostname, arch);
			
 
				 }
			
 
				 
			
 
				 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen)
			
 
				 {
			
 
				-	char hostname[STR_LONG_LENGTH];
			
 
				-	_starpu_gethostname(hostname, sizeof(hostname));
			
 
				 	const char *dot = strrchr(symbol, '.');
			
 
				 
			
 
				-	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":hostname);
			
 
				+	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":_starpu_perfmodel_hostname);
			
 
				 }
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
@@ -1205,7 +1256,7 @@ static void _starpu_dump_registered_models(void)
 
				 	     node != _starpu_perfmodel_list_end(&registered_models);
			
 
				 	     node  = _starpu_perfmodel_list_next(node))
			
 
				 	{
			
 
				-		if (node->model->is_init)
			
 
				+		if (node->model->is_init && (node->model->type != STARPU_PER_WORKER && node->model->type != STARPU_PER_ARCH && node->model->type != STARPU_COMMON))
			
 
				 			starpu_save_history_based_model(node->model);
			
 
				 	}
			
 
				 
			
@@ -1213,58 +1264,6 @@ static void _starpu_dump_registered_models(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-void starpu_perfmodel_initialize(void)
			
 
				-{
			
 
				-	/* make sure the performance model directory exists (or create it) */
			
 
				-	_starpu_create_sampling_directory_if_needed();
			
 
				-
			
 
				-	_starpu_perfmodel_list_init(&registered_models);
			
 
				-
			
 
				-	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
			
 
				-	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				-}
			
 
				-
			
 
				-void _starpu_initialize_registered_performance_models(void)
			
 
				-{
			
 
				-	starpu_perfmodel_initialize();
			
 
				-
			
 
				-	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				-	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				-	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				-	unsigned nmic = 0;
			
 
				-	enum starpu_worker_archtype archtype;
			
 
				-#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				-	unsigned i;
			
 
				-#endif
			
 
				-#if STARPU_MAXMICDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				-		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				-#endif
			
 
				-	unsigned nmpi = 0;
			
 
				-#if STARPU_MAXMPIDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				-		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				-#endif
			
 
				-
			
 
				-	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
 
				-	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				-	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
			
 
				-	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				-	current_arch_comb = 0;
			
 
				-	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				-	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				-
			
 
				-	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				-	{
			
 
				-		char name[128];
			
 
				-		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				-		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				-		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				-		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
			
 
				 {
			
 
				 	if(model->is_init && model->state && model->state->per_arch != NULL)
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -774,14 +774,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _st
 
				 {
			
 
				         _STARPU_LOG_IN();
			
 
				 
			
 
				-	int cpt = 0;
			
 
				-	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
			
 
				-	{
			
 
				-		cpt++;
			
 
				-		_starpu_datawizard_progress(STARPU_DATAWIZARD_DO_ALLOC);
			
 
				-	}
			
 
				-	if (cpt == STARPU_SPIN_MAXTRY)
			
 
				-		_starpu_spin_lock(&handle->header_lock);
			
 
				+	_starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				 	if (mode & STARPU_R && is_prefetch > STARPU_FETCH)
			
 
				 	{
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -274,6 +274,7 @@ void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node
 
				 	if (elapsed > 300)
			
 
				 	{
			
 
				 		static int warned = 0;
			
 
				+		STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 		if (!warned)
			
 
				 		{
			
 
				 			char src_name[16], dst_name[16];
			
--- a/src/datawizard/datawizard.c
+++ b/src/datawizard/datawizard.c
@@ -124,6 +124,11 @@ int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push
 
				 
			
 
				 		return ret;
			
 
				 	}
			
 
				+
			
 
				+	/* processing requests may release some tasks, we cannot be already
			
 
				+	 * scheduling a task. */
			
 
				+	STARPU_ASSERT(!worker->state_sched_op_pending);
			
 
				+
			
 
				 	if (worker->set)
			
 
				 		/* Runing one of the workers of a worker set. The reference for
			
 
				 		 * driving memory is its worker 0 (see registrations in topology.c) */
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -1068,6 +1068,7 @@ size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t recl
 
				 	if (reclaim && !force)
			
 
				 	{
			
 
				 		static unsigned warned;
			
 
				+		STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 		if (!warned)
			
 
				 		{
			
 
				 			if (STARPU_ATOMIC_ADD(&warned, 1) == 1)
			
@@ -1290,6 +1291,7 @@ void starpu_memchunk_tidy(unsigned node)
 
				 		goto out;
			
 
				 
			
 
				 	static unsigned warned;
			
 
				+	STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 	if (!warned)
			
 
				 	{
			
 
				 		if (STARPU_ATOMIC_ADD(&warned, 1) == 1)
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -54,7 +54,7 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
				 	STARPU_ASSERT(replicate->allocated);
			
 
				 
			
 
				 	struct starpu_codelet *init_cl = handle->init_cl;
			
 
				-	STARPU_ASSERT(init_cl);
			
 
				+	STARPU_ASSERT_MSG(init_cl, "There is no initialisation codelet for the reduction of the handle %p. Maybe you forget to call starpu_data_set_reduction_methods() ?", handle->root_handle);
			
 
				 
			
 
				 	_starpu_cl_func_t init_func = NULL;
			
 
				 
			
@@ -289,6 +289,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0) & STARPU_COMMUTE))
			
 
				 					{
			
 
				 						static int warned;
			
 
				+						STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 						if (!warned)
			
 
				 						{
			
 
				 							warned = 1;
			
--- a/src/sched_policies/component_fifo.c
+++ b/src/sched_policies/component_fifo.c
@@ -108,6 +108,7 @@ static int fifo_push_local_task(struct starpu_sched_component * component, struc
 
				 		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
			
 
				 		{
			
 
				 			static int warned;
			
 
				+			STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
			
 
				 			{
			
 
				 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
			
--- a/src/sched_policies/component_perfmodel_select.c
+++ b/src/sched_policies/component_perfmodel_select.c
@@ -46,6 +46,7 @@ static int perfmodel_select_push_task(struct starpu_sched_component * component,
 
				 		if(isnan(length))
			
 
				 		{
			
 
				 			static int warned;
			
 
				+			STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 			if (!warned)
			
 
				 			{
			
 
				 				warned = 1;
			
--- a/src/sched_policies/component_prio.c
+++ b/src/sched_policies/component_prio.c
@@ -129,6 +129,7 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc
 
				 		if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold)
			
 
				 		{
			
 
				 			static int warned;
			
 
				+			STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 			if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned)
			
 
				 			{
			
 
				 				_STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold);
			
--- a/src/sched_policies/component_worker.c
+++ b/src/sched_policies/component_worker.c
@@ -178,6 +178,8 @@ static struct _starpu_worker_task_list * _starpu_worker_task_list_create(void)
 
				 	/* These are only for statistics */
			
 
				 	STARPU_HG_DISABLE_CHECKING(l->exp_end);
			
 
				 	STARPU_HG_DISABLE_CHECKING(l->exp_start);
			
 
				+	STARPU_HG_DISABLE_CHECKING(l->exp_len);
			
 
				+	STARPU_HG_DISABLE_CHECKING(l->pipeline_len);
			
 
				 	STARPU_PTHREAD_MUTEX_INIT(&l->mutex,NULL);
			
 
				 	return l;
			
 
				 }
			
--- a/src/sched_policies/fifo_queues.c
+++ b/src/sched_policies/fifo_queues.c
@@ -60,6 +60,9 @@ void _starpu_init_fifo(struct _starpu_fifo_taskq *fifo)
 
				 	fifo->exp_end = fifo->exp_start;
			
 
				 	fifo->exp_len_per_priority = NULL;
			
 
				 	fifo->pipeline_len = 0.0;
			
 
				+	STARPU_HG_DISABLE_CHECKING(fifo->exp_start);
			
 
				+	STARPU_HG_DISABLE_CHECKING(fifo->exp_len);
			
 
				+	STARPU_HG_DISABLE_CHECKING(fifo->exp_end);
			
 
				 }
			
 
				 
			
 
				 struct _starpu_fifo_taskq *_starpu_create_fifo(void)
			
--- a/src/sched_policies/graph_test_policy.c
+++ b/src/sched_policies/graph_test_policy.c
@@ -118,6 +118,7 @@ static struct _starpu_prio_deque *select_prio(unsigned sched_ctx_id, struct _sta
 
				 			if (!task->cl || task->cl->model == NULL)
			
 
				 			{
			
 
				 				static unsigned _warned;
			
 
				+				STARPU_HG_DISABLE_CHECKING(_warned);
			
 
				 				if (STARPU_ATOMIC_ADD(&_warned, 1) == 1)
			
 
				 				{
			
 
				 					_STARPU_DISP("Warning: graph_test needs performance models for all tasks, including %s\n",
			
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -365,6 +365,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
				 			if (isnan(local_task_length[worker_ctx][nimpl]))
			
 
				 			{
			
 
				 				static int warned;
			
 
				+				STARPU_HG_DISABLE_CHECKING(warned);
			
 
				 				if (!warned)
			
 
				 				{
			
 
				 					warned = 1;
			
--- a/src/sched_policies/prio_deque.h
+++ b/src/sched_policies/prio_deque.h
@@ -39,6 +39,9 @@ static inline void _starpu_prio_deque_init(struct _starpu_prio_deque *pdeque)
 
				 {
			
 
				 	memset(pdeque,0,sizeof(*pdeque));
			
 
				 	starpu_task_prio_list_init(&pdeque->list);
			
 
				+	STARPU_HG_DISABLE_CHECKING(pdeque->exp_start);
			
 
				+	STARPU_HG_DISABLE_CHECKING(pdeque->exp_end);
			
 
				+	STARPU_HG_DISABLE_CHECKING(pdeque->exp_len);
			
 
				 }
			
 
				 
			
 
				 static inline void _starpu_prio_deque_destroy(struct _starpu_prio_deque *pdeque)
			
--- a/src/util/misc.c
+++ b/src/util/misc.c
@@ -18,6 +18,19 @@
 
				 #include <common/utils.h>
			
 
				 #include <core/jobs.h>
			
 
				 
			
 
				+const char *_starpu_codelet_get_name(struct starpu_codelet *cl)
			
 
				+{
			
 
				+	if (!cl)
			
 
				+		return NULL;
			
 
				+
			
 
				+	if (cl->name)
			
 
				+		return cl->name;
			
 
				+	else if (cl->model && cl->model->symbol && cl->model->symbol[0])
			
 
				+		return cl->model->symbol;
			
 
				+	else
			
 
				+		return NULL;
			
 
				+}
			
 
				+
			
 
				 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl)
			
 
				 {
			
 
				 	if (!cl)
			
--- a/src/util/starpu_clusters_create.c
+++ b/src/util/starpu_clusters_create.c
@@ -736,6 +736,7 @@ void _starpu_cluster(struct _starpu_cluster_group *group)
 
				 
			
 
				 		if (size > 1)
			
 
				 		{
			
 
				+			STARPU_HG_DISABLE_CHECKING(starpu_cluster_warned);
			
 
				 			if (!starpu_cluster_warned)
			
 
				 			{
			
 
				 				_STARPU_DISP("STARPU CLUSTERS: Caution! It seems that you have"
			
--- a/src/util/starpu_task_insert_utils.c
+++ b/src/util/starpu_task_insert_utils.c
@@ -393,7 +393,7 @@ void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starp
 
				 	{
			
 
				 		STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == arg_mode,
			
 
				 				  "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
			
 
				-				  cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				+				  _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				 				  *current_buffer, arg_mode);
			
 
				 	}
			
 
				 	else
			
@@ -443,7 +443,7 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 
				 		{
			
 
				 			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == descrs[i].mode,
			
 
				 					"The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
			
 
				-					cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				+					_starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				 					*current_buffer, descrs[i].mode);
			
 
				 		}
			
 
				 		else
			
--- a/starpupy/src/starpu_task_wrapper.c
+++ b/starpupy/src/starpu_task_wrapper.c
@@ -248,8 +248,8 @@ void starpupy_codelet_func(void *buffers[], void *cl_arg)
 
				 	PyGILState_Release(state);
			
 
				 }
			
 
				 
			
 
				-/*function passed to starpu_task.callback_func*/
			
 
				-void cb_func(void *v)
			
 
				+/*function passed to starpu_task.epilogue_callback_func*/
			
 
				+void epilogue_cb_func(void *v)
			
 
				 {
			
 
				 	PyObject *fut; /*asyncio.Future*/
			
 
				 	PyObject *loop; /*asyncio.Eventloop*/
			
@@ -316,6 +316,11 @@ void cb_func(void *v)
 
				 
			
 
				 	/*restore previous GIL state*/
			
 
				 	PyGILState_Release(state);
			
 
				+}
			
 
				+
			
 
				+void cb_func(void *v)
			
 
				+{
			
 
				+	struct starpu_task *task = starpu_task_get_current();
			
 
				 
			
 
				 	/*deallocate task*/
			
 
				 	free(task->cl);
			
@@ -576,7 +581,8 @@ static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args)
 
				 	starpu_codelet_pack_arg_fini(&data, &task->cl_arg, &task->cl_arg_size);
			
 
				 
			
 
				 	task->prologue_callback_func=&prologue_cb_func;
			
 
				-	task->epilogue_callback_func=&cb_func;
			
 
				+	task->epilogue_callback_func=&epilogue_cb_func;
			
 
				+	task->callback_func=&cb_func;
			
 
				 
			
 
				 	/*call starpu_task_submit method*/
			
 
				 	int ret;
			
--- a/tools/dev/valgrind/libc.suppr
+++ b/tools/dev/valgrind/libc.suppr
@@ -59,7 +59,7 @@
 
				    Ignore libc printf races
			
 
				    Helgrind:Race
			
 
				    ...
			
 
				-   fun:vfprintf
			
 
				+   fun:__vfprintf_internal
			
 
				    ...
			
 
				 }
			
 
				 
			
--- a/tools/dev/valgrind/starpu.suppr
+++ b/tools/dev/valgrind/starpu.suppr
@@ -132,6 +132,15 @@
 
				 }
			
 
				 
			
 
				 {
			
 
				+   mc / handle locking order1
			
 
				+   Helgrind:LockOrder
			
 
				+   ...
			
 
				+   fun:__starpu_spin_lock
			
 
				+   fun:try_to_throw_mem_chunk
			
 
				+   ...
			
 
				+}
			
 
				+
			
 
				+{
			
 
				    mc / handle locking order2
			
 
				    Helgrind:LockOrder
			
 
				    ...
			
@@ -141,6 +150,15 @@
 
				 }
			
 
				 
			
 
				 {
			
 
				+   mc / handle locking order2
			
 
				+   Helgrind:LockOrder
			
 
				+   ...
			
 
				+   fun:__starpu_spin_lock
			
 
				+   fun:try_to_find_reusable_mc
			
 
				+   ...
			
 
				+}
			
 
				+
			
 
				+{
			
 
				    mc / handle locking order3
			
 
				    Helgrind:LockOrder
			
 
				    ...