4 jaren geleden · eadeb66929
--- a/examples/cg/cg.h
+++ b/examples/cg/cg.h
@@ -32,7 +32,6 @@
 
				 #define TYPE	double
			
 
				 #define GEMV	STARPU_DGEMV
			
 
				 #define DOT	STARPU_DDOT
			
 
				-#define GEMV	STARPU_DGEMV
			
 
				 #define AXPY	STARPU_DAXPY
			
 
				 #define SCAL	STARPU_DSCAL
			
 
				 #define cublasdot	cublasDdot
			
@@ -44,7 +43,6 @@
 
				 #define TYPE	float
			
 
				 #define GEMV	STARPU_SGEMV
			
 
				 #define DOT	STARPU_SDOT
			
 
				-#define GEMV	STARPU_SGEMV
			
 
				 #define AXPY	STARPU_SAXPY
			
 
				 #define SCAL	STARPU_SSCAL
			
 
				 #define cublasdot	cublasSdot
			
--- a/examples/cg/cg_kernels.c
+++ b/examples/cg/cg_kernels.c
@@ -34,7 +34,7 @@ static const TYPE gm1 = -1.0;
 
				 static int nblocks = 8;
			
 
				 
			
 
				 #ifdef STARPU_QUICK_CHECK
			
 
				-static int i_max = 2;
			
 
				+static int i_max = 5;
			
 
				 static int long long n = 2048;
			
 
				 #elif !defined(STARPU_LONG_CHECK)
			
 
				 static int long long n = 4096;
			
@@ -154,7 +154,8 @@ struct starpu_codelet accumulate_variable_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &accumulate_variable_model
			
 
				+	.model = &accumulate_variable_model,
			
 
				+	.name = "accumulate_variable"
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -198,7 +199,8 @@ struct starpu_codelet accumulate_vector_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &accumulate_vector_model
			
 
				+	.model = &accumulate_vector_model,
			
 
				+	.name = "accumulate_vector"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -242,7 +244,8 @@ struct starpu_codelet bzero_variable_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_W},
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &bzero_variable_model
			
 
				+	.model = &bzero_variable_model,
			
 
				+	.name = "bzero_variable"
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -283,7 +286,8 @@ struct starpu_codelet bzero_vector_cl =
 
				 #endif
			
 
				 	.modes = {STARPU_W},
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &bzero_vector_model
			
 
				+	.model = &bzero_vector_model,
			
 
				+	.name = "bzero_vector"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -343,7 +347,8 @@ static struct starpu_codelet dot_kernel_cl =
 
				 #endif
			
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 	.nbuffers = 3,
			
 
				-	.model = &dot_kernel_model
			
 
				+	.model = &dot_kernel_model,
			
 
				+	.name = "dot_kernel"
			
 
				 };
			
 
				 
			
 
				 int dot_kernel(HANDLE_TYPE_VECTOR v1,
			
@@ -427,7 +432,8 @@ static struct starpu_codelet scal_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 1,
			
 
				-	.model = &scal_kernel_model
			
 
				+	.model = &scal_kernel_model,
			
 
				+	.name = "scal_kernel"
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -506,7 +512,8 @@ static struct starpu_codelet gemv_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 3,
			
 
				-	.model = &gemv_kernel_model
			
 
				+	.model = &gemv_kernel_model,
			
 
				+	.name = "gemv_kernel"
			
 
				 };
			
 
				 
			
 
				 int gemv_kernel(HANDLE_TYPE_VECTOR v1,
			
@@ -611,7 +618,8 @@ static struct starpu_codelet scal_axpy_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &scal_axpy_kernel_model
			
 
				+	.model = &scal_axpy_kernel_model,
			
 
				+	.name = "scal_axpy_kernel"
			
 
				 };
			
 
				 
			
 
				 int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1,
			
@@ -690,7 +698,8 @@ static struct starpu_codelet axpy_kernel_cl =
 
				 	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				 #endif
			
 
				 	.nbuffers = 2,
			
 
				-	.model = &axpy_kernel_model
			
 
				+	.model = &axpy_kernel_model,
			
 
				+	.name = "axpy_kernel"
			
 
				 };
			
 
				 
			
 
				 int axpy_kernel(HANDLE_TYPE_VECTOR v1,
			
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -279,7 +279,6 @@ endif
 
				 if !STARPU_SIMGRID
			
 
				 if !STARPU_NO_BLAS_LIB
			
 
				 examplebin_PROGRAMS += cg/cg
			
 
				-starpu_mpi_EXAMPLES += cg/cg
			
 
				 
			
 
				 cg_cg_SOURCES =					\
			
 
				 	cg/cg.c						\
			
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -179,6 +179,9 @@ struct starpu_codelet;
 
				 /** Returns the symbol associated to that job if any. */
			
 
				 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl);
			
 
				 
			
 
				+/** Returns the name of a codelet, or fallback to the name of the perfmodel. */
			
 
				+const char *_starpu_codelet_get_name(struct starpu_codelet *cl);
			
 
				+
			
 
				 int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
			
 
				 
			
 
				 void _starpu_util_init(void);
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -76,6 +76,62 @@ LIST_TYPE(_starpu_perfmodel,
 
				 )
			
 
				 static struct _starpu_perfmodel_list registered_models;
			
 
				 
			
 
				+static char _starpu_perfmodel_hostname[STR_LONG_LENGTH];
			
 
				+
			
 
				+void starpu_perfmodel_initialize(void)
			
 
				+{
			
 
				+	/* make sure the performance model directory exists (or create it) */
			
 
				+	_starpu_create_sampling_directory_if_needed();
			
 
				+
			
 
				+	_starpu_perfmodel_list_init(&registered_models);
			
 
				+
			
 
				+	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
			
 
				+	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				+
			
 
				+	_starpu_gethostname(_starpu_perfmodel_hostname, sizeof(_starpu_perfmodel_hostname));
			
 
				+}
			
 
				+
			
 
				+void _starpu_initialize_registered_performance_models(void)
			
 
				+{
			
 
				+	starpu_perfmodel_initialize();
			
 
				+
			
 
				+	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				+	unsigned nmic = 0;
			
 
				+	enum starpu_worker_archtype archtype;
			
 
				+#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				+	unsigned i;
			
 
				+#endif
			
 
				+#if STARPU_MAXMICDEVS > 0
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				+#endif
			
 
				+	unsigned nmpi = 0;
			
 
				+#if STARPU_MAXMPIDEVS > 0
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				+#endif
			
 
				+
			
 
				+	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
 
				+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				+	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
			
 
				+	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				+	current_arch_comb = 0;
			
 
				+	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				+	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				+
			
 
				+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				+	{
			
 
				+		char name[128];
			
 
				+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl)
			
 
				 {
			
 
				 	int i;
			
@@ -1145,19 +1201,14 @@ static void get_model_debug_path(struct starpu_perfmodel *model, const char *arc
 
				 {
			
 
				 	STARPU_ASSERT(path);
			
 
				 
			
 
				-	char hostname[STR_LONG_LENGTH];
			
 
				-	_starpu_gethostname(hostname, sizeof(hostname));
			
 
				-
			
 
				-	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, hostname, arch);
			
 
				+	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, _starpu_perfmodel_hostname, arch);
			
 
				 }
			
 
				 
			
 
				 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen)
			
 
				 {
			
 
				-	char hostname[STR_LONG_LENGTH];
			
 
				-	_starpu_gethostname(hostname, sizeof(hostname));
			
 
				 	const char *dot = strrchr(symbol, '.');
			
 
				 
			
 
				-	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":hostname);
			
 
				+	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":_starpu_perfmodel_hostname);
			
 
				 }
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
@@ -1213,58 +1264,6 @@ static void _starpu_dump_registered_models(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-void starpu_perfmodel_initialize(void)
			
 
				-{
			
 
				-	/* make sure the performance model directory exists (or create it) */
			
 
				-	_starpu_create_sampling_directory_if_needed();
			
 
				-
			
 
				-	_starpu_perfmodel_list_init(&registered_models);
			
 
				-
			
 
				-	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
			
 
				-	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				-}
			
 
				-
			
 
				-void _starpu_initialize_registered_performance_models(void)
			
 
				-{
			
 
				-	starpu_perfmodel_initialize();
			
 
				-
			
 
				-	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				-	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				-	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				-	unsigned nmic = 0;
			
 
				-	enum starpu_worker_archtype archtype;
			
 
				-#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				-	unsigned i;
			
 
				-#endif
			
 
				-#if STARPU_MAXMICDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				-		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				-#endif
			
 
				-	unsigned nmpi = 0;
			
 
				-#if STARPU_MAXMPIDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				-		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				-#endif
			
 
				-
			
 
				-	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
 
				-	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				-	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
			
 
				-	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				-	current_arch_comb = 0;
			
 
				-	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				-	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				-
			
 
				-	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				-	{
			
 
				-		char name[128];
			
 
				-		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				-		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				-		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				-		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
			
 
				 {
			
 
				 	if(model->is_init && model->state && model->state->per_arch != NULL)
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -774,14 +774,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _st
 
				 {
			
 
				         _STARPU_LOG_IN();
			
 
				 
			
 
				-	int cpt = 0;
			
 
				-	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
			
 
				-	{
			
 
				-		cpt++;
			
 
				-		_starpu_datawizard_progress(STARPU_DATAWIZARD_DO_ALLOC);
			
 
				-	}
			
 
				-	if (cpt == STARPU_SPIN_MAXTRY)
			
 
				-		_starpu_spin_lock(&handle->header_lock);
			
 
				+	_starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				 	if (mode & STARPU_R && is_prefetch > STARPU_FETCH)
			
 
				 	{
			
--- a/src/datawizard/datawizard.c
+++ b/src/datawizard/datawizard.c
@@ -124,6 +124,11 @@ int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push
 
				 
			
 
				 		return ret;
			
 
				 	}
			
 
				+
			
 
				+	/* processing requests may release some tasks, we cannot be already
			
 
				+	 * scheduling a task. */
			
 
				+	STARPU_ASSERT(!worker->state_sched_op_pending);
			
 
				+
			
 
				 	if (worker->set)
			
 
				 		/* Runing one of the workers of a worker set. The reference for
			
 
				 		 * driving memory is its worker 0 (see registrations in topology.c) */
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -54,7 +54,7 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
				 	STARPU_ASSERT(replicate->allocated);
			
 
				 
			
 
				 	struct starpu_codelet *init_cl = handle->init_cl;
			
 
				-	STARPU_ASSERT(init_cl);
			
 
				+	STARPU_ASSERT_MSG(init_cl, "There is no initialisation codelet for the reduction of the handle %p. Maybe you forget to call starpu_data_set_reduction_methods() ?", handle->root_handle);
			
 
				 
			
 
				 	_starpu_cl_func_t init_func = NULL;
			
 
				 
			
--- a/src/util/misc.c
+++ b/src/util/misc.c
@@ -18,6 +18,19 @@
 
				 #include <common/utils.h>
			
 
				 #include <core/jobs.h>
			
 
				 
			
 
				+const char *_starpu_codelet_get_name(struct starpu_codelet *cl)
			
 
				+{
			
 
				+	if (!cl)
			
 
				+		return NULL;
			
 
				+
			
 
				+	if (cl->name)
			
 
				+		return cl->name;
			
 
				+	else if (cl->model && cl->model->symbol && cl->model->symbol[0])
			
 
				+		return cl->model->symbol;
			
 
				+	else
			
 
				+		return NULL;
			
 
				+}
			
 
				+
			
 
				 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl)
			
 
				 {
			
 
				 	if (!cl)
			
--- a/src/util/starpu_task_insert_utils.c
+++ b/src/util/starpu_task_insert_utils.c
@@ -385,7 +385,7 @@ void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starp
 
				 	{
			
 
				 		STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == arg_mode,
			
 
				 				  "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
			
 
				-				  cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				+				  _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				 				  *current_buffer, arg_mode);
			
 
				 	}
			
 
				 	else
			
@@ -435,7 +435,7 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 
				 		{
			
 
				 			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == descrs[i].mode,
			
 
				 					"The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
			
 
				-					cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				+					_starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
			
 
				 					*current_buffer, descrs[i].mode);
			
 
				 		}
			
 
				 		else