Bladeren bron

Merge branch 'master' into fpga

Nathalie Furmento 4 jaren geleden
bovenliggende
commit
eadeb66929

+ 0 - 2
examples/cg/cg.h

@@ -32,7 +32,6 @@
 #define TYPE	double
 #define GEMV	STARPU_DGEMV
 #define DOT	STARPU_DDOT
-#define GEMV	STARPU_DGEMV
 #define AXPY	STARPU_DAXPY
 #define SCAL	STARPU_DSCAL
 #define cublasdot	cublasDdot
@@ -44,7 +43,6 @@
 #define TYPE	float
 #define GEMV	STARPU_SGEMV
 #define DOT	STARPU_SDOT
-#define GEMV	STARPU_SGEMV
 #define AXPY	STARPU_SAXPY
 #define SCAL	STARPU_SSCAL
 #define cublasdot	cublasSdot

+ 19 - 10
examples/cg/cg_kernels.c

@@ -34,7 +34,7 @@ static const TYPE gm1 = -1.0;
 static int nblocks = 8;
 
 #ifdef STARPU_QUICK_CHECK
-static int i_max = 2;
+static int i_max = 5;
 static int long long n = 2048;
 #elif !defined(STARPU_LONG_CHECK)
 static int long long n = 4096;
@@ -154,7 +154,8 @@ struct starpu_codelet accumulate_variable_cl =
 #endif
 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
 	.nbuffers = 2,
-	.model = &accumulate_variable_model
+	.model = &accumulate_variable_model,
+	.name = "accumulate_variable"
 };
 
 #ifdef STARPU_USE_CUDA
@@ -198,7 +199,8 @@ struct starpu_codelet accumulate_vector_cl =
 #endif
 	.modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R},
 	.nbuffers = 2,
-	.model = &accumulate_vector_model
+	.model = &accumulate_vector_model,
+	.name = "accumulate_vector"
 };
 
 /*
@@ -242,7 +244,8 @@ struct starpu_codelet bzero_variable_cl =
 #endif
 	.modes = {STARPU_W},
 	.nbuffers = 1,
-	.model = &bzero_variable_model
+	.model = &bzero_variable_model,
+	.name = "bzero_variable"
 };
 
 #ifdef STARPU_USE_CUDA
@@ -283,7 +286,8 @@ struct starpu_codelet bzero_vector_cl =
 #endif
 	.modes = {STARPU_W},
 	.nbuffers = 1,
-	.model = &bzero_vector_model
+	.model = &bzero_vector_model,
+	.name = "bzero_vector"
 };
 
 /*
@@ -343,7 +347,8 @@ static struct starpu_codelet dot_kernel_cl =
 #endif
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 	.nbuffers = 3,
-	.model = &dot_kernel_model
+	.model = &dot_kernel_model,
+	.name = "dot_kernel"
 };
 
 int dot_kernel(HANDLE_TYPE_VECTOR v1,
@@ -427,7 +432,8 @@ static struct starpu_codelet scal_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 1,
-	.model = &scal_kernel_model
+	.model = &scal_kernel_model,
+	.name = "scal_kernel"
 };
 
 /*
@@ -506,7 +512,8 @@ static struct starpu_codelet gemv_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 3,
-	.model = &gemv_kernel_model
+	.model = &gemv_kernel_model,
+	.name = "gemv_kernel"
 };
 
 int gemv_kernel(HANDLE_TYPE_VECTOR v1,
@@ -611,7 +618,8 @@ static struct starpu_codelet scal_axpy_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 2,
-	.model = &scal_axpy_kernel_model
+	.model = &scal_axpy_kernel_model,
+	.name = "scal_axpy_kernel"
 };
 
 int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1,
@@ -690,7 +698,8 @@ static struct starpu_codelet axpy_kernel_cl =
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 	.nbuffers = 2,
-	.model = &axpy_kernel_model
+	.model = &axpy_kernel_model,
+	.name = "axpy_kernel"
 };
 
 int axpy_kernel(HANDLE_TYPE_VECTOR v1,

+ 0 - 1
mpi/examples/Makefile.am

@@ -279,7 +279,6 @@ endif
 if !STARPU_SIMGRID
 if !STARPU_NO_BLAS_LIB
 examplebin_PROGRAMS += cg/cg
-starpu_mpi_EXAMPLES += cg/cg
 
 cg_cg_SOURCES =					\
 	cg/cg.c						\

+ 3 - 0
src/common/utils.h

@@ -179,6 +179,9 @@ struct starpu_codelet;
 /** Returns the symbol associated to that job if any. */
 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl);
 
+/** Returns the name of a codelet, or fallback to the name of the perfmodel. */
+const char *_starpu_codelet_get_name(struct starpu_codelet *cl);
+
 int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex);
 
 void _starpu_util_init(void);

+ 58 - 59
src/core/perfmodel/perfmodel_history.c

@@ -76,6 +76,62 @@ LIST_TYPE(_starpu_perfmodel,
 )
 static struct _starpu_perfmodel_list registered_models;
 
+static char _starpu_perfmodel_hostname[STR_LONG_LENGTH];
+
+void starpu_perfmodel_initialize(void)
+{
+	/* make sure the performance model directory exists (or create it) */
+	_starpu_create_sampling_directory_if_needed();
+
+	_starpu_perfmodel_list_init(&registered_models);
+
+	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
+	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
+
+	_starpu_gethostname(_starpu_perfmodel_hostname, sizeof(_starpu_perfmodel_hostname));
+}
+
+void _starpu_initialize_registered_performance_models(void)
+{
+	starpu_perfmodel_initialize();
+
+	struct _starpu_machine_config *conf = _starpu_get_machine_config();
+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
+	unsigned nmic = 0;
+	enum starpu_worker_archtype archtype;
+#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
+	unsigned i;
+#endif
+#if STARPU_MAXMICDEVS > 0
+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
+#endif
+	unsigned nmpi = 0;
+#if STARPU_MAXMPIDEVS > 0
+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
+#endif
+
+	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
+	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
+	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
+	current_arch_comb = 0;
+	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
+	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
+
+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
+	{
+		char name[128];
+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
+	}
+}
+
 void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl)
 {
 	int i;
@@ -1145,19 +1201,14 @@ static void get_model_debug_path(struct starpu_perfmodel *model, const char *arc
 {
 	STARPU_ASSERT(path);
 
-	char hostname[STR_LONG_LENGTH];
-	_starpu_gethostname(hostname, sizeof(hostname));
-
-	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, hostname, arch);
+	snprintf(path, maxlen, "%s/%s.%s.%s.debug", _starpu_get_perf_model_dir_debug(), model->symbol, _starpu_perfmodel_hostname, arch);
 }
 
 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen)
 {
-	char hostname[STR_LONG_LENGTH];
-	_starpu_gethostname(hostname, sizeof(hostname));
 	const char *dot = strrchr(symbol, '.');
 
-	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":hostname);
+	snprintf(path, maxlen, "%s/%s%s%s", _starpu_get_perf_model_dir_codelet(), symbol, dot?"":".", dot?"":_starpu_perfmodel_hostname);
 }
 
 #ifndef STARPU_SIMGRID
@@ -1213,58 +1264,6 @@ static void _starpu_dump_registered_models(void)
 #endif
 }
 
-void starpu_perfmodel_initialize(void)
-{
-	/* make sure the performance model directory exists (or create it) */
-	_starpu_create_sampling_directory_if_needed();
-
-	_starpu_perfmodel_list_init(&registered_models);
-
-	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
-	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
-}
-
-void _starpu_initialize_registered_performance_models(void)
-{
-	starpu_perfmodel_initialize();
-
-	struct _starpu_machine_config *conf = _starpu_get_machine_config();
-	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
-	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
-	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
-	unsigned nmic = 0;
-	enum starpu_worker_archtype archtype;
-#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
-	unsigned i;
-#endif
-#if STARPU_MAXMICDEVS > 0
-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
-		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
-#endif
-	unsigned nmpi = 0;
-#if STARPU_MAXMPIDEVS > 0
-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
-		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
-#endif
-
-	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
-	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
-	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
-	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
-	current_arch_comb = 0;
-	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
-	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
-
-	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
-	{
-		char name[128];
-		const char *arch = starpu_worker_get_type_as_env_var(archtype);
-		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
-		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
-		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
-	}
-}
-
 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 {
 	if(model->is_init && model->state && model->state->per_arch != NULL)

+ 1 - 8
src/datawizard/coherency.c

@@ -774,14 +774,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _st
 {
         _STARPU_LOG_IN();
 
-	int cpt = 0;
-	while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
-	{
-		cpt++;
-		_starpu_datawizard_progress(STARPU_DATAWIZARD_DO_ALLOC);
-	}
-	if (cpt == STARPU_SPIN_MAXTRY)
-		_starpu_spin_lock(&handle->header_lock);
+	_starpu_spin_lock(&handle->header_lock);
 
 	if (mode & STARPU_R && is_prefetch > STARPU_FETCH)
 	{

+ 5 - 0
src/datawizard/datawizard.c

@@ -124,6 +124,11 @@ int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push
 
 		return ret;
 	}
+
+	/* processing requests may release some tasks, we cannot be already
+	 * scheduling a task. */
+	STARPU_ASSERT(!worker->state_sched_op_pending);
+
 	if (worker->set)
 		/* Runing one of the workers of a worker set. The reference for
 		 * driving memory is its worker 0 (see registrations in topology.c) */

+ 1 - 1
src/datawizard/reduction.c

@@ -54,7 +54,7 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 	STARPU_ASSERT(replicate->allocated);
 
 	struct starpu_codelet *init_cl = handle->init_cl;
-	STARPU_ASSERT(init_cl);
+	STARPU_ASSERT_MSG(init_cl, "There is no initialisation codelet for the reduction of the handle %p. Maybe you forget to call starpu_data_set_reduction_methods() ?", handle->root_handle);
 
 	_starpu_cl_func_t init_func = NULL;
 

+ 13 - 0
src/util/misc.c

@@ -18,6 +18,19 @@
 #include <common/utils.h>
 #include <core/jobs.h>
 
+const char *_starpu_codelet_get_name(struct starpu_codelet *cl)
+{
+	if (!cl)
+		return NULL;
+
+	if (cl->name)
+		return cl->name;
+	else if (cl->model && cl->model->symbol && cl->model->symbol[0])
+		return cl->model->symbol;
+	else
+		return NULL;
+}
+
 const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl)
 {
 	if (!cl)

+ 2 - 2
src/util/starpu_task_insert_utils.c

@@ -385,7 +385,7 @@ void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starp
 	{
 		STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == arg_mode,
 				  "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
-				  cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
+				  _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
 				  *current_buffer, arg_mode);
 	}
 	else
@@ -435,7 +435,7 @@ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, s
 		{
 			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == descrs[i].mode,
 					"The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n",
-					cl->name, STARPU_CODELET_GET_MODE(cl, *current_buffer),
+					_starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer),
 					*current_buffer, descrs[i].mode);
 		}
 		else