Просмотр исходного кода

Add STARPU_PERF_MODEL_HOMOGENEOUS_CPU environment variable to allow having one perfmodel per CPU core

Samuel Thibault лет назад: 8
Родитель
Сommit
0e79ad3c82

+ 10 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -680,6 +680,16 @@ This specifies the main directory in which StarPU stores its
 performance model files. The default is <c>$STARPU_HOME/.starpu/sampling</c>.
 </dd>
 
+<dt>STARPU_PERF_MODEL_HOMOGENEOUS_CPU</dt>
+<dd>
+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CPU
+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CPU
+When this is set to 0, StarPU will assume that CPU devices do not have the same
+performance, and thus use different performance models for them, thus making
+kernel calibration much longer, since measurements have to be made for each CPU
+core.
+</dd>
+
 <dt>STARPU_PERF_MODEL_HOMOGENEOUS_CUDA</dt>
 <dd>
 \anchor STARPU_PERF_MODEL_HOMOGENEOUS_CUDA

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -1076,7 +1076,7 @@ void _starpu_initialize_registered_performance_models(void)
 	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
-	/* ignore_devid[STARPU_CPU_WORKER]; */ /* Always true for now */
+	ignore_devid[STARPU_CPU_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
 	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
 	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL", 0);
 	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);

+ 2 - 1
src/core/topology.c

@@ -1534,6 +1534,7 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 	STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
 
 	unsigned cpu;
+	unsigned homogeneous = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
 	for (cpu = 0; cpu < topology->ncpus; cpu++)
 	{
 		int worker_idx = topology->nworkers + cpu;
@@ -1541,7 +1542,7 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 		_STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices,  sizeof(struct starpu_perfmodel_device));
 		config->workers[worker_idx].perf_arch.ndevices = 1;
 		config->workers[worker_idx].perf_arch.devices[0].type = STARPU_CPU_WORKER;
-		config->workers[worker_idx].perf_arch.devices[0].devid = 0;
+		config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : cpu;
 		config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
 		config->workers[worker_idx].subworkerid = 0;
 		config->workers[worker_idx].devid = cpu;