4 jaren geleden · 53ed719fd8
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -76,6 +76,58 @@ LIST_TYPE(_starpu_perfmodel,
 
				 )
			
 
				 static struct _starpu_perfmodel_list registered_models;
			
 
				 
			
 
				+void starpu_perfmodel_initialize(void)
			
 
				+{
			
 
				+	/* make sure the performance model directory exists (or create it) */
			
 
				+	_starpu_create_sampling_directory_if_needed();
			
 
				+
			
 
				+	_starpu_perfmodel_list_init(&registered_models);
			
 
				+
			
 
				+	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
			
 
				+	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				+}
			
 
				+
			
 
				+void _starpu_initialize_registered_performance_models(void)
			
 
				+{
			
 
				+	starpu_perfmodel_initialize();
			
 
				+
			
 
				+	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				+	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				+	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				+	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				+	unsigned nmic = 0;
			
 
				+	enum starpu_worker_archtype archtype;
			
 
				+#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				+	unsigned i;
			
 
				+#endif
			
 
				+#if STARPU_MAXMICDEVS > 0
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				+		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				+#endif
			
 
				+	unsigned nmpi = 0;
			
 
				+#if STARPU_MAXMPIDEVS > 0
			
 
				+	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				+		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				+#endif
			
 
				+
			
 
				+	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
 
				+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				+	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
			
 
				+	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				+	current_arch_comb = 0;
			
 
				+	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				+	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				+
			
 
				+	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				+	{
			
 
				+		char name[128];
			
 
				+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl)
			
 
				 {
			
 
				 	int i;
			
@@ -1213,58 +1265,6 @@ static void _starpu_dump_registered_models(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-void starpu_perfmodel_initialize(void)
			
 
				-{
			
 
				-	/* make sure the performance model directory exists (or create it) */
			
 
				-	_starpu_create_sampling_directory_if_needed();
			
 
				-
			
 
				-	_starpu_perfmodel_list_init(&registered_models);
			
 
				-
			
 
				-	STARPU_PTHREAD_RWLOCK_INIT(&registered_models_rwlock, NULL);
			
 
				-	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				-}
			
 
				-
			
 
				-void _starpu_initialize_registered_performance_models(void)
			
 
				-{
			
 
				-	starpu_perfmodel_initialize();
			
 
				-
			
 
				-	struct _starpu_machine_config *conf = _starpu_get_machine_config();
			
 
				-	unsigned ncores = conf->topology.nhwworker[STARPU_CPU_WORKER][0];
			
 
				-	unsigned ncuda =  conf->topology.nhwdevices[STARPU_CUDA_WORKER];
			
 
				-	unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER];
			
 
				-	unsigned nmic = 0;
			
 
				-	enum starpu_worker_archtype archtype;
			
 
				-#if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				-	unsigned i;
			
 
				-#endif
			
 
				-#if STARPU_MAXMICDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MIC_WORKER]; i++)
			
 
				-		nmic += conf->topology.nhwworker[STARPU_MIC_WORKER][i];
			
 
				-#endif
			
 
				-	unsigned nmpi = 0;
			
 
				-#if STARPU_MAXMPIDEVS > 0
			
 
				-	for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++)
			
 
				-		nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i];
			
 
				-#endif
			
 
				-
			
 
				-	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nmpi), this is too big
			
 
				-	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nmpi), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				-	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nmpi);
			
 
				-	_STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				-	current_arch_comb = 0;
			
 
				-	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				-	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				-
			
 
				-	for (archtype = 0; archtype < STARPU_NARCH; archtype++)
			
 
				-	{
			
 
				-		char name[128];
			
 
				-		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				-		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				-		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				-		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
			
 
				 {
			
 
				 	if(model->is_init && model->state && model->state->per_arch != NULL)