10 years ago · 8b3d9dee57
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -83,11 +83,8 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct
 
				 	double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				 
			
 
				 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				-	if (comb == -1)
			
 
				-		return NAN;
			
 
				-	if (model->state->per_arch[comb] == NULL)
			
 
				-		// The model has not been executed on this combination
			
 
				-		return NAN;
			
 
				+	STARPU_ASSERT_MSG(comb != -1, "Didn't find the proper arch combination\n");
			
 
				+	STARPU_ASSERT_MSG(model->state->per_arch[comb] != NULL, "STARPU_PER_ARCH needs per-arch cost_function to be defined");
			
 
				 
			
 
				 	per_arch_cost_function = model->state->per_arch[comb][nimpl].cost_function;
			
 
				 	STARPU_ASSERT_MSG(per_arch_cost_function, "STARPU_PER_ARCH needs per-arch cost_function to be defined");
			
@@ -117,7 +114,7 @@ double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch* perf_arc
 
				 
			
 
				 		speedup += coef * (perf_arch->devices[dev].ncores + 1);
			
 
				 	}
			
 
				-	return speedup == 0 ? NAN : speedup;
			
 
				+	return speedup;
			
 
				 }
			
 
				 
			
 
				 static double common_task_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct starpu_task *task, unsigned nimpl)
			
@@ -142,20 +139,14 @@ void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model)
 
				 
			
 
				 	starpu_perfmodel_init(NULL, model);
			
 
				 
			
 
				-	// Check if a symbol is defined before trying to load the model from a file
			
 
				-	if (!model->symbol)
			
 
				-		return;
			
 
				-
			
 
				 	if (model->is_loaded)
			
 
				 		return;
			
 
				 
			
 
				 	switch (model->type)
			
 
				 	{
			
 
				 		case STARPU_PER_ARCH:
			
 
				-			_starpu_load_per_arch_based_model(model);
			
 
				-			break;
			
 
				 		case STARPU_COMMON:
			
 
				-			_starpu_load_common_based_model(model);
			
 
				+			/* Nothing more to do than init */
			
 
				 			break;
			
 
				 		case STARPU_HISTORY_BASED:
			
 
				 		case STARPU_NL_REGRESSION_BASED:
			
@@ -215,13 +206,16 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 					    struct starpu_perfmodel_arch* arch,
			
 
				 					    unsigned nimpl)
			
 
				 {
			
 
				-	if(arch->ndevices > 1)
			
 
				-		return -1.0;
			
 
				 	unsigned i;
			
 
				 	double sum = 0.0;
			
 
				 	enum starpu_node_kind node_kind;
			
 
				 	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
			
 
				 
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning TODO: conversion time with combined arch perfmodel
			
 
				+#endif
			
 
				+	STARPU_ASSERT_MSG(arch->ndevices == 1, "TODO");
			
 
				+
			
 
				 	for (i = 0; i < nbuffers; i++)
			
 
				 	{
			
 
				 		starpu_data_handle_t handle;
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2013  Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2014  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -69,8 +69,6 @@ void _starpu_get_perf_model_dir_bus(char *path, size_t maxlen);
 
				 void _starpu_get_perf_model_dir_debug(char *path, size_t maxlen);
			
 
				 
			
 
				 double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl);
			
 
				-void _starpu_load_per_arch_based_model(struct starpu_perfmodel *model);
			
 
				-void _starpu_load_common_based_model(struct starpu_perfmodel *model);
			
 
				 void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history);
			
 
				 void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model);
			
 
				 void _starpu_initialize_registered_performance_models(void);
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -905,29 +905,12 @@ void _starpu_deinitialize_registered_performance_models(void)
 
				 	_free_arch_combs();
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * XXX: We should probably factorize the beginning of the _starpu_load_*_model
			
 
				- * functions. This is a bit tricky though, because we must be sure to unlock
			
 
				- * registered_models_rwlock at the right place.
			
 
				- */
			
 
				-void _starpu_load_per_arch_based_model(struct starpu_perfmodel *model)
			
 
				-{
			
 
				-	starpu_perfmodel_init(NULL, model);
			
 
				-}
			
 
				-
			
 
				-void _starpu_load_common_based_model(struct starpu_perfmodel *model)
			
 
				-{
			
 
				-	starpu_perfmodel_init(NULL, model);
			
 
				-}
			
 
				-
			
 
				 /* We first try to grab the global lock in read mode to check whether the model
			
 
				  * was loaded or not (this is very likely to have been already loaded). If the
			
 
				  * model was not loaded yet, we take the lock in write mode, and if the model
			
 
				  * is still not loaded once we have the lock, we do load it.  */
			
 
				 void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
			
 
				 {
			
 
				-	starpu_perfmodel_init(NULL, model);
			
 
				-
			
 
				 	STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock);
			
 
				 
			
 
				 	if(!model->is_loaded)
			
@@ -935,6 +918,9 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 
				 		char path[256];
			
 
				 		get_model_path(model, path, 256);
			
 
				 
			
 
				+		// Check if a symbol is defined before trying to load the model from a file
			
 
				+		STARPU_ASSERT_MSG(model->symbol, "history-based performance models must have a symbol");
			
 
				+
			
 
				 		_STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol);
			
 
				 
			
 
				 		unsigned calibrate_flag = _starpu_get_calibrate_flag();
			
@@ -1102,7 +1088,7 @@ char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
 
				 
			
 
				 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl)
			
 
				 {
			
 
				-	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				+	int comb = _starpu_perfmodel_create_comb_if_needed(arch);
			
 
				 	STARPU_ASSERT(comb != -1);
			
 
				 
			
 
				 	snprintf(archname, maxlen, "Comb%d_impl%u", comb, impl);
			
@@ -1126,21 +1112,34 @@ double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model
 
				 	int comb;
			
 
				 	double exp = NAN;
			
 
				 	size_t size;
			
 
				-	struct starpu_perfmodel_regression_model *regmodel;
			
 
				+	struct starpu_perfmodel_regression_model *regmodel = NULL;
			
 
				 
			
 
				 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				+	size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
 
				+
			
 
				 	if(comb == -1)
			
 
				-		return NAN;
			
 
				+		goto docal;
			
 
				 	if (model->state->per_arch[comb] == NULL)
			
 
				 		// The model has not been executed on this combination
			
 
				-		return NAN;
			
 
				+		goto docal;
			
 
				 
			
 
				 	regmodel = &model->state->per_arch[comb][nimpl].regression;
			
 
				-	size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
 
				 
			
 
				 	if (regmodel->valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
			
 
				                 exp = regmodel->alpha*pow((double)size, regmodel->beta);
			
 
				 
			
 
				+docal:
			
 
				+	STARPU_HG_DISABLE_CHECKING(model->benchmarking);
			
 
				+	if (isnan(exp) && !model->benchmarking)
			
 
				+	{
			
 
				+		char archname[32];
			
 
				+
			
 
				+		starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
			
 
				+		_STARPU_DISP("Warning: model %s is not calibrated enough for %s size %lu (only %u measurements from size %lu to %lu), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname, (unsigned long) size, regmodel?regmodel->nsample:0, regmodel?regmodel->minx:0, regmodel?regmodel->maxx:0);
			
 
				+		_starpu_set_calibrate_flag(1);
			
 
				+		model->benchmarking = 1;
			
 
				+	}
			
 
				+
			
 
				 	return exp;
			
 
				 }
			
 
				 
			
@@ -1150,13 +1149,14 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 	double exp = NAN;
			
 
				 	size_t size;
			
 
				 	struct starpu_perfmodel_regression_model *regmodel;
			
 
				+	struct starpu_perfmodel_history_table *entry = NULL;
			
 
				 
			
 
				 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if(comb == -1)
			
 
				-		return NAN;
			
 
				+		goto docal;
			
 
				 	if (model->state->per_arch[comb] == NULL)
			
 
				 		// The model has not been executed on this combination
			
 
				-		return NAN;
			
 
				+		goto docal;
			
 
				 
			
 
				 	regmodel = &model->state->per_arch[comb][nimpl].regression;
			
 
				 	size = _starpu_job_get_data_size(model, arch, nimpl, j);
			
@@ -1168,7 +1168,6 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
			
 
				 		struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][nimpl];
			
 
				 		struct starpu_perfmodel_history_table *history;
			
 
				-		struct starpu_perfmodel_history_table *entry;
			
 
				 
			
 
				 		STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
			
 
				 		history = per_arch_model->history;
			
@@ -1182,6 +1181,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 		if (entry && entry->history_entry && entry->history_entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
			
 
				 			exp = entry->history_entry->mean;
			
 
				 
			
 
				+docal:
			
 
				 		STARPU_HG_DISABLE_CHECKING(model->benchmarking);
			
 
				 		if (isnan(exp) && !model->benchmarking)
			
 
				 		{
			
@@ -1202,16 +1202,16 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, s
 
				 	int comb;
			
 
				 	double exp = NAN;
			
 
				 	struct starpu_perfmodel_per_arch *per_arch_model;
			
 
				-	struct starpu_perfmodel_history_entry *entry;
			
 
				+	struct starpu_perfmodel_history_entry *entry = NULL;
			
 
				 	struct starpu_perfmodel_history_table *history, *elt;
			
 
				 	uint32_t key;
			
 
				 
			
 
				 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if(comb == -1)
			
 
				-		return NAN;
			
 
				+		goto docal;
			
 
				 	if (model->state->per_arch[comb] == NULL)
			
 
				 		// The model has not been executed on this combination
			
 
				-		return NAN;
			
 
				+		goto docal;
			
 
				 
			
 
				 	per_arch_model = &model->state->per_arch[comb][nimpl];
			
 
				 
			
@@ -1232,6 +1232,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, s
 
				 		/* Calibrated enough */
			
 
				 		exp = entry->mean;
			
 
				 
			
 
				+docal:
			
 
				 	STARPU_HG_DISABLE_CHECKING(model->benchmarking);
			
 
				 	if (isnan(exp) && !model->benchmarking)
			
 
				 	{