8 years ago · 8cf58b4493
--- a/doc/doxygen/chapters/210_check_list_performance.doxy
+++ b/doc/doxygen/chapters/210_check_list_performance.doxy
@@ -246,9 +246,24 @@ to configure a performance model for the codelets of the application (see
 
				 use on-line calibration.  StarPU will automatically calibrate codelets
			
 
				 which have never been calibrated yet, and save the result in
			
 
				 <c>$STARPU_HOME/.starpu/sampling/codelets</c>.
			
 
				-The models are indexed by machine name. To share the models between
			
 
				-machines (e.g. for a homogeneous cluster), use <c>export
			
 
				-STARPU_HOSTNAME=some_global_name</c> (\ref STARPU_HOSTNAME). To force continuing calibration,
			
 
				+The models are indexed by machine name.
			
 
				+
			
 
				+By default, StarPU stores separate performance models according to the hostname
			
 
				+of the system. To avoid having to calibrate performance models for each node
			
 
				+of a homogeneous cluster for instance, the model can be shared by using
			
 
				+<c>export STARPU_HOSTNAME=some_global_name</c> (\ref STARPU_HOSTNAME), where
			
 
				+<c>some_global_name</c> is the name of the cluster for instance, which thus
			
 
				+overrides the hostname of the system.
			
 
				+
			
 
				+By default, StarPU stores separate performance models for each GPU. To avoid
			
 
				+having to calibrate performance models for each GPU of a homogeneous set of GPU
			
 
				+devices for instance, the model can be shared by setting
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_CUDA=1</c> ,
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL=1</c> ,
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_MIC=1</c> , or
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_SCC=1</c> (depending on your GPU device type).
			
 
				+
			
 
				+To force continuing calibration,
			
 
				 use <c>export STARPU_CALIBRATE=1</c> (\ref STARPU_CALIBRATE). This may be necessary if your application
			
 
				 has not-so-stable performance. StarPU will force calibration (and thus ignore
			
 
				 the current result) until 10 (<c>_STARPU_CALIBRATION_MINIMUM</c>) measurements have been
			
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -622,6 +622,46 @@ This specifies the main directory in which StarPU stores its
 
				 performance model files. The default is <c>$STARPU_HOME/.starpu/sampling</c>.
			
 
				 </dd>
			
 
				 
			
 
				+<dt>STARPU_PERF_MODEL_IGNORE_CUDAID</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CUDA
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CUDA
			
 
				+When this is set to 1, StarPU will assume that all CUDA devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+CUDA GPUs.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_PERF_MODEL_IGNORE_OPENCLID</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL
			
 
				+When this is set to 1, StarPU will assume that all OPENCL devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+OPENCL GPUs.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_PERF_MODEL_IGNORE_MICID</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_MIC
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_MIC
			
 
				+When this is set to 1, StarPU will assume that all MIC devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+MIC GPUs.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_PERF_MODEL_IGNORE_SCCID</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_SCC
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_SCC
			
 
				+When this is set to 1, StarPU will assume that all SCC devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+SCC GPUs.
			
 
				+</dd>
			
 
				+
			
 
				 <dt>STARPU_HOSTNAME</dt>
			
 
				 <dd>
			
 
				 \anchor STARPU_HOSTNAME
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -50,6 +50,7 @@ static int current_arch_comb;
 
				 static int nb_arch_combs;
			
 
				 static starpu_pthread_rwlock_t arch_combs_mutex;
			
 
				 static int historymaxerror;
			
 
				+static char ignore_devid[STARPU_ANY_WORKER];
			
 
				 
			
 
				 /* How many executions a codelet will have to be measured before we
			
 
				  * consider that calibration will provide a value good enough for scheduling */
			
@@ -108,7 +109,8 @@ int _starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device
 
				 				for(dev2 = 0; dev2 < ndevices; dev2++)
			
 
				 				{
			
 
				 					if(arch_combs[comb]->devices[dev1].type == devices[dev2].type &&
			
 
				-					   arch_combs[comb]->devices[dev1].devid == devices[dev2].devid &&
			
 
				+					   (ignore_devid[devices[dev2].type] ||
			
 
				+					    arch_combs[comb]->devices[dev1].devid == devices[dev2].devid) &&
			
 
				 					   arch_combs[comb]->devices[dev1].ncores == devices[dev2].ncores)
			
 
				 						nfounded++;
			
 
				 				}
			
@@ -912,6 +914,11 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				+	/* ignore_devid[STARPU_CPU_WORKER]; */ /* Always true for now */
			
 
				+	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
			
 
				+	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_IGNORE_HOMOGENEOUS_OPENCL", 0);
			
 
				+	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);
			
 
				+	ignore_devid[STARPU_SCC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_SCC", 0);
			
 
				 }
			
 
				 
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)