瀏覽代碼

move all internal fields of struct starpu_perfmodel to a new internal struct _starpu_perfmodel_state

Nathalie Furmento 11 年之前
父節點
當前提交
93e5c4a322

+ 5 - 12
include/starpu_perfmodel.h

@@ -124,6 +124,9 @@ enum starpu_perfmodel_type
 	STARPU_NL_REGRESSION_BASED
 };
 
+struct _starpu_perfmodel_state;
+typedef struct _starpu_perfmodel_state* starpu_perfmodel_state_t;
+
 struct starpu_perfmodel
 {
 	enum starpu_perfmodel_type type;
@@ -135,20 +138,10 @@ struct starpu_perfmodel
 
 	const char *symbol;
 
-//#ifdef STARPU_DEVEL
-//#warning move all the fields in a private structure. may be difficult as it is not mandatory to call starpu_perfmodel_init when using a perfmodel
-//#endif
-	struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
-	int** per_arch_is_set; /*STARPU_MAXIMPLEMENTATIONS*/
-
-	unsigned is_init;
 	unsigned is_loaded;
 	unsigned benchmarking;
-	starpu_pthread_rwlock_t model_rwlock;
-	int *nimpls;
-	int ncombs;  /* The number of combinations currently used by the model */
-	int ncombs_set; /* The number of combinations allocated in the array nimpls and ncombs */
-	int *combs;
+
+	starpu_perfmodel_state_t state;
 };
 
 void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model);

+ 2 - 2
src/core/perfmodel/perfmodel.c

@@ -83,11 +83,11 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct
 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	if (comb == -1)
 		return NAN;
-	if (model->per_arch[comb] == NULL)
+	if (model->state->per_arch[comb] == NULL)
 		// The model has not been executed on this combination
 		return NAN;
 
-	per_arch_cost_function = model->per_arch[comb][nimpl].cost_function;
+	per_arch_cost_function = model->state->per_arch[comb][nimpl].cost_function;
 	STARPU_ASSERT_MSG(per_arch_cost_function, "STARPU_PER_ARCH needs per-arch cost_function to be defined");
 
 	return per_arch_cost_function(task, arch, nimpl);

+ 13 - 0
src/core/perfmodel/perfmodel.h

@@ -40,6 +40,19 @@ extern "C"
  */
 #define _STARPU_PERFMODEL_VERSION 44
 
+struct _starpu_perfmodel_state
+{
+	struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/
+	int** per_arch_is_set; /*STARPU_MAXIMPLEMENTATIONS*/
+
+	unsigned is_init;
+	starpu_pthread_rwlock_t model_rwlock;
+	int *nimpls;
+	int ncombs;  /* The number of combinations currently used by the model */
+	int ncombs_set; /* The number of combinations allocated in the array nimpls and ncombs */
+	int *combs;
+};
+
 struct _starpu_perfmodel_list
 {
 	struct _starpu_perfmodel_list *next;

+ 110 - 104
src/core/perfmodel/perfmodel_history.c

@@ -134,9 +134,9 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_p
 	struct starpu_task *task = j->task;
 	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 
-	if (model && model->per_arch && comb != -1 && model->per_arch[comb] && model->per_arch[comb][impl].size_base)
+	if (model && model->state->per_arch && comb != -1 && model->state->per_arch[comb] && model->state->per_arch[comb][impl].size_base)
 	{
-		return model->per_arch[comb][impl].size_base(task, arch, impl);
+		return model->state->per_arch[comb][impl].size_base(task, arch, impl);
 	}
 	else if (model && model->size_base)
 	{
@@ -185,7 +185,7 @@ static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, int comb, in
 {
 	struct starpu_perfmodel_per_arch *per_arch_model;
 
-	per_arch_model = &model->per_arch[comb][impl];
+	per_arch_model = &model->state->per_arch[comb][impl];
 	struct starpu_perfmodel_regression_model *reg_model;
 	reg_model = &per_arch_model->regression;
 
@@ -367,19 +367,20 @@ static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 	{
 		/* Parsing each implementation */
 		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
-		model->nimpls[comb] = implmax;
-		model->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(struct starpu_perfmodel_per_arch));
-		model->per_arch_is_set[comb] = (int *)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(int));
+		model->state->nimpls[comb] = implmax;
+		model->state->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(struct starpu_perfmodel_per_arch));
+		model->state->per_arch_is_set[comb] = (int *)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(int));
+		model->state->per_arch_is_set[comb] = (int *)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(int));
 		for(i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 		{
-			memset(&model->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch));
-			model->per_arch_is_set[comb][i] = 0;
+			memset(&model->state->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch));
+			model->state->per_arch_is_set[comb][i] = 0;
 		}
 
 		for (impl = 0; impl < implmax; impl++)
 		{
-			struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[comb][impl];
-			model->per_arch_is_set[comb][impl] = 1;
+			struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
+			model->state->per_arch_is_set[comb][impl] = 1;
 			parse_per_arch_model_file(f, per_arch_model, scan_history);
 		}
 	}
@@ -449,7 +450,7 @@ static void parse_comb(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 	if(id_comb == -1)
 		id_comb = starpu_perfmodel_arch_comb_add(ndevices, devices);
 
-	model->combs[comb] = id_comb;
+	model->state->combs[comb] = id_comb;
 	parse_arch(f, model, scan_history, id_comb);
 }
 
@@ -469,7 +470,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	ret = fscanf(f, "%d\n", &ncombs);
 	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
 	if(ncombs > 0)
-		model->ncombs = ncombs;
+	{
+		model->state->ncombs = ncombs;
+	}
 
 	if (ncombs > nb_arch_combs)
 	{
@@ -489,7 +492,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 {
 	struct starpu_perfmodel_per_arch *per_arch_model;
 
-	per_arch_model = &model->per_arch[comb][impl];
+	per_arch_model = &model->state->per_arch[comb][impl];
 	/* count the number of elements in the lists */
 	struct starpu_perfmodel_history_list *ptr = NULL;
 	unsigned nentries = 0;
@@ -535,7 +538,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 	fprintf(f, "# Performance Model Version\n");
 	fprintf(f, "%d\n\n", _STARPU_PERFMODEL_VERSION);
 
-	int ncombs = model->ncombs;
+	int ncombs = model->state->ncombs;
 
 	fprintf(f, "####################\n");
 	fprintf(f, "# COMBs\n");
@@ -545,9 +548,9 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 	int comb, impl, dev;
 	for(comb = 0; comb < ncombs; comb++)
 	{
-		int ndevices = arch_combs[model->combs[comb]]->ndevices;
+		int ndevices = arch_combs[model->state->combs[comb]]->ndevices;
 		fprintf(f, "####################\n");
-		fprintf(f, "# COMB_%d\n", model->combs[comb]);
+		fprintf(f, "# COMB_%d\n", model->state->combs[comb]);
 		fprintf(f, "# number of types devices\n");
 		fprintf(f, "%u\n", ndevices);
 
@@ -556,20 +559,20 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 			fprintf(f, "####################\n");
 			fprintf(f, "# DEV_%d\n", dev);
 			fprintf(f, "# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, SCC - 4)\n");
-			fprintf(f, "%u\n", arch_combs[model->combs[comb]]->devices[dev].type);
+			fprintf(f, "%u\n", arch_combs[model->state->combs[comb]]->devices[dev].type);
 
 			fprintf(f, "####################\n");
 			fprintf(f, "# DEV_%d\n", dev);
 			fprintf(f, "# device id \n");
-			fprintf(f, "%u\n", arch_combs[model->combs[comb]]->devices[dev].devid);
+			fprintf(f, "%u\n", arch_combs[model->state->combs[comb]]->devices[dev].devid);
 
 			fprintf(f, "####################\n");
 			fprintf(f, "# DEV_%d\n", dev);
 			fprintf(f, "# number of cores \n");
-			fprintf(f, "%u\n", arch_combs[model->combs[comb]]->devices[dev].ncores);
+			fprintf(f, "%u\n", arch_combs[model->state->combs[comb]]->devices[dev].ncores);
 		}
 
-		int nimpls = model->nimpls[comb];
+		int nimpls = model->state->nimpls[comb];
 		fprintf(f, "##########\n");
 		fprintf(f, "# number of implementations\n");
 		fprintf(f, "%u\n", nimpls);
@@ -584,18 +587,18 @@ void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
 {
 	int i;
 
-	STARPU_ASSERT(nb > model->ncombs_set);
-	model->per_arch = (struct starpu_perfmodel_per_arch**) realloc(model->per_arch, nb*sizeof(struct starpu_perfmodel_per_arch*));
-	model->per_arch_is_set = (int**) realloc(model->per_arch_is_set, nb*sizeof(struct starpu_perfmodel_per_arch*));
-	model->nimpls = (int *)realloc(model->nimpls, nb*sizeof(int));
-	model->combs = (int*)realloc(model->combs, nb*sizeof(int));
-	for(i = model->ncombs_set; i < nb; i++)
+	STARPU_ASSERT(nb > model->state->ncombs_set);
+	model->state->per_arch = (struct starpu_perfmodel_per_arch**) realloc(model->state->per_arch, nb*sizeof(struct starpu_perfmodel_per_arch*));
+	model->state->per_arch_is_set = (int**) realloc(model->state->per_arch_is_set, nb*sizeof(struct starpu_perfmodel_per_arch*));
+	model->state->nimpls = (int *)realloc(model->state->nimpls, nb*sizeof(int));
+	model->state->combs = (int*)realloc(model->state->combs, nb*sizeof(int));
+	for(i = model->state->ncombs_set; i < nb; i++)
 	{
-		model->per_arch[i] = NULL;
-		model->per_arch_is_set[i] = NULL;
-		model->nimpls[i] = 0;
+		model->state->per_arch[i] = NULL;
+		model->state->per_arch_is_set[i] = NULL;
+		model->state->nimpls[i] = 0;
 	}
-	model->ncombs_set = nb;
+	model->state->ncombs_set = nb;
 }
 
 void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model)
@@ -605,7 +608,7 @@ void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model)
 	int already_init;
 
 	STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
-	already_init = model->is_init;
+	already_init = model->state && model->state->is_init;
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 
 	if (already_init)
@@ -616,35 +619,36 @@ void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model)
 	STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
 
 	/* Was the model initialized since the previous test ? */
-	if (model->is_init)
+	if (model->state && model->state->is_init)
 	{
 		STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 		return;
 	}
 
-	STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
+	model->state = malloc(sizeof(struct _starpu_perfmodel_state));
+	STARPU_PTHREAD_RWLOCK_INIT(&model->state->model_rwlock, NULL);
 	if(model->type != STARPU_COMMON)
 	{
 		int i;
 
-		model->per_arch = (struct starpu_perfmodel_per_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_per_arch*));
-		model->per_arch_is_set = (int**) malloc(nb_arch_combs*sizeof(int*));
-		model->nimpls = (int *)malloc(nb_arch_combs*sizeof(int));
-		model->combs = (int*)malloc(nb_arch_combs*sizeof(int));
-		model->ncombs = 0;
-		model->ncombs_set = nb_arch_combs;
+		model->state->per_arch = (struct starpu_perfmodel_per_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_per_arch*));
+		model->state->per_arch_is_set = (int**) malloc(nb_arch_combs*sizeof(int*));
+		model->state->nimpls = (int *)malloc(nb_arch_combs*sizeof(int));
+		model->state->combs = (int*)malloc(nb_arch_combs*sizeof(int));
+		model->state->ncombs = 0;
+		model->state->ncombs_set = nb_arch_combs;
 
 		for(i = 0; i < nb_arch_combs; i++)
 		{
-			model->per_arch[i] = NULL;
-			model->per_arch_is_set[i] = NULL;
-			model->nimpls[i] = 0;
+			model->state->per_arch[i] = NULL;
+			model->state->per_arch_is_set[i] = NULL;
+			model->state->nimpls[i] = 0;
 		}
 		if(f)
 			parse_model_file(f, model, 0);
 	}
 
-	model->is_init = 1;
+	model->state->is_init = 1;
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 }
 
@@ -785,16 +789,16 @@ void _starpu_initialize_registered_performance_models(void)
 
 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 {
-	if(model->is_init && model->per_arch != NULL)
+	if(model->state->is_init && model->state->per_arch != NULL)
 	{
-		int ncombs = model->ncombs;
+		int ncombs = model->state->ncombs;
 		int comb, impl;
 		for(comb = 0; comb < ncombs; comb++)
 		{
-			int nimpls = model->nimpls[comb];
+			int nimpls = model->state->nimpls[comb];
 			for(impl = 0; impl < nimpls; impl++)
 			{
-				struct starpu_perfmodel_per_arch *archmodel = &model->per_arch[model->combs[comb]][impl];
+				struct starpu_perfmodel_per_arch *archmodel = &model->state->per_arch[model->state->combs[comb]][impl];
 				struct starpu_perfmodel_history_list *list, *plist;
 				struct starpu_perfmodel_history_table *entry, *tmp;
 
@@ -815,26 +819,27 @@ void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 				}
 				archmodel->list = NULL;
 			}
-			free(model->per_arch[model->combs[comb]]);
-			model->per_arch[model->combs[comb]] = NULL;
-			free(model->per_arch_is_set[model->combs[comb]]);
-			model->per_arch_is_set[model->combs[comb]] = NULL;
+			free(model->state->per_arch[model->state->combs[comb]]);
+			model->state->per_arch[model->state->combs[comb]] = NULL;
+
+			free(model->state->per_arch_is_set[model->state->combs[comb]]);
+			model->state->per_arch_is_set[model->state->combs[comb]] = NULL;
 		}
-		free(model->per_arch);
-		model->per_arch = NULL;
+		free(model->state->per_arch);
+		model->state->per_arch = NULL;
 
-		free(model->per_arch_is_set);
-		model->per_arch_is_set = NULL;
+		free(model->state->per_arch_is_set);
+		model->state->per_arch_is_set = NULL;
 
-		free(model->nimpls);
-		model->nimpls = NULL;
+		free(model->state->nimpls);
+		model->state->nimpls = NULL;
 
-		free(model->combs);
-		model->combs = NULL;
-		model->ncombs = 0;
+		free(model->state->combs);
+		model->state->combs = NULL;
+		model->state->ncombs = 0;
 	}
 
-	model->is_init = 0;
+	model->state->is_init = 0;
 	model->is_loaded = 0;
 }
 
@@ -854,9 +859,9 @@ void _starpu_deinitialize_registered_performance_models(void)
 	{
 		struct starpu_perfmodel *model = node->model;
 
-		STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
+		STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock);
 		_starpu_deinitialize_performance_model(model);
-		STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
+		STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 
 		pnode = node;
 		node = node->next;
@@ -892,7 +897,7 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 {
 	starpu_perfmodel_init(NULL, model);
 
-	STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
+	STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock);
 
 	if(!model->is_loaded)
 	{
@@ -938,7 +943,7 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 
 		model->is_loaded = 1;
 	}
-	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
+	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 
 }
 
@@ -1084,11 +1089,11 @@ double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model
 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	if(comb == -1)
 		return NAN;
-	if (model->per_arch[comb] == NULL)
+	if (model->state->per_arch[comb] == NULL)
 		// The model has not been executed on this combination
 		return NAN;
 
-	regmodel = &model->per_arch[comb][nimpl].regression;
+	regmodel = &model->state->per_arch[comb][nimpl].regression;
 	size = _starpu_job_get_data_size(model, arch, nimpl, j);
 
 	if (regmodel->valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
@@ -1107,11 +1112,11 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	if(comb == -1)
 		return NAN;
-	if (model->per_arch[comb] == NULL)
+	if (model->state->per_arch[comb] == NULL)
 		// The model has not been executed on this combination
 		return NAN;
 
-	regmodel = &model->per_arch[comb][nimpl].regression;
+	regmodel = &model->state->per_arch[comb][nimpl].regression;
 	size = _starpu_job_get_data_size(model, arch, nimpl, j);
 
 	if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
@@ -1119,14 +1124,14 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 	else
 	{
 		uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
-		struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[comb][nimpl];
+		struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][nimpl];
 		struct starpu_perfmodel_history_table *history;
 		struct starpu_perfmodel_history_table *entry;
 
-		STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
+		STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
 		history = per_arch_model->history;
 		HASH_FIND_UINT32_T(history, &key, entry);
-		STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
+		STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 
 		/* Here helgrind would shout that this is unprotected access.
 		 * We do not care about racing access to the mean, we only want
@@ -1162,18 +1167,18 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, s
 	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	if(comb == -1)
 		return NAN;
-	if (model->per_arch[comb] == NULL)
+	if (model->state->per_arch[comb] == NULL)
 		// The model has not been executed on this combination
 		return NAN;
 
-	per_arch_model = &model->per_arch[comb][nimpl];
+	per_arch_model = &model->state->per_arch[comb][nimpl];
 
 	key = _starpu_compute_buffers_footprint(model, arch, nimpl, j);
-	STARPU_PTHREAD_RWLOCK_RDLOCK(&model->model_rwlock);
+	STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
 	history = per_arch_model->history;
 	HASH_FIND_UINT32_T(history, &key, elt);
 	entry = (elt == NULL) ? NULL : elt->history_entry;
-	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
+	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 
 	/* Here helgrind would shout that this is unprotected access.
 	 * We do not care about racing access to the mean, we only want
@@ -1219,9 +1224,9 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 		int c;
 		unsigned found = 0;
-		for(c = 0; c < model->ncombs; c++)
+		for(c = 0; c < model->state->ncombs; c++)
 		{
-			if(model->combs[c] == comb)
+			if(model->state->combs[c] == comb)
 			{
 				found = 1;
 				break;
@@ -1229,34 +1234,35 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 		}
 		if(!found)
 		{
-			if (model->ncombs + 1 >= model->ncombs_set)
+			if (model->state->ncombs + 1 >= model->state->ncombs_set)
 			{
 				// The number of combinations is bigger than the one which was initially allocated, we need to reallocate
 				_starpu_perfmodel_realloc(model, nb_arch_combs);
 			}
-			model->combs[model->ncombs++] = comb;
+			model->state->combs[model->state->ncombs++] = comb;
 		}
 
-		STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
+		STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock);
 
-		if(!model->per_arch[comb])
+		if(!model->state->per_arch[comb])
 		{
-			model->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(struct starpu_perfmodel_per_arch));
-			model->per_arch_is_set[comb] = (int*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(int));
+			model->state->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(struct starpu_perfmodel_per_arch));
+			model->state->per_arch_is_set[comb] = (int*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(int));
+			model->state->per_arch_is_set[comb] = (int*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(int));
 			int i;
 			for(i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 			{
-				memset(&model->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch));
-				model->per_arch_is_set[comb][i] = 0;
+				memset(&model->state->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch));
+				model->state->per_arch_is_set[comb][i] = 0;
 			}
 		}
 
-		struct starpu_perfmodel_per_arch *per_arch_model = &model->per_arch[comb][impl];
-		if (model->per_arch_is_set[comb][impl] == 0)
+		struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
+		if (model->state->per_arch_is_set[comb][impl] == 0)
 		{
 			// We are adding a new implementation for the given comb and the given impl
-			model->nimpls[comb]++;
-			model->per_arch_is_set[comb][impl] = 1;
+			model->state->nimpls[comb]++;
+			model->state->per_arch_is_set[comb][impl] = 1;
 		}
 
 		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
@@ -1415,7 +1421,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 		fprintf(f, "\n");
 		fclose(f);
 #endif
-		STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
+		STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
 	}
 }
 
@@ -1439,13 +1445,13 @@ int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model)
 	int comb;
 
 	fprintf(output, "Model <%s>\n", model->symbol);
-	for(comb = 0; comb < model->ncombs; comb++)
+	for(comb = 0; comb < model->state->ncombs; comb++)
 	{
 		struct starpu_perfmodel_arch *arch;
 		int device;
 
-		arch = _starpu_arch_comb_get(model->combs[comb]);
-		fprintf(output, "\tComb %d: %d device%s\n", model->combs[comb], arch->ndevices, arch->ndevices>1?"s":"");
+		arch = _starpu_arch_comb_get(model->state->combs[comb]);
+		fprintf(output, "\tComb %d: %d device%s\n", model->state->combs[comb], arch->ndevices, arch->ndevices>1?"s":"");
 		for(device=0 ; device<arch->ndevices ; device++)
 		{
 			char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type);
@@ -1460,7 +1466,7 @@ struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct sta
 	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	if(comb == -1) return NULL;
 
-	return &model->per_arch[comb][impl];
+	return &model->state->per_arch[comb][impl];
 }
 
 struct starpu_perfmodel_per_arch *_starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, va_list varg_list)
@@ -1509,21 +1515,21 @@ struct starpu_perfmodel_per_arch *_starpu_perfmodel_get_model_per_devices(struct
 		comb = starpu_perfmodel_arch_comb_add(arch.ndevices, arch.devices);
 
 	// Realloc if necessary
-	if (comb >= model->ncombs_set)
+	if (comb >= model->state->ncombs_set)
 		_starpu_perfmodel_realloc(model, comb+1);
 
 	// Get the per_arch object
-	if (model->per_arch[comb] == NULL)
+	if (model->state->per_arch[comb] == NULL)
 	{
-		model->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc((impl+1) * sizeof(struct starpu_perfmodel_per_arch));
-		model->per_arch_is_set[comb] = (int*)malloc((impl+1) * sizeof(int));
-		model->nimpls[comb] = 0;
+		model->state->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc((impl+1) * sizeof(struct starpu_perfmodel_per_arch));
+		model->state->per_arch_is_set[comb] = (int*)malloc((impl+1) * sizeof(int));
+		model->state->nimpls[comb] = 0;
 	}
-	memset(&model->per_arch[comb][impl], 0, sizeof(struct starpu_perfmodel_per_arch));
-	model->per_arch_is_set[comb][impl] = 1;
-	model->nimpls[comb] ++;
+	memset(&model->state->per_arch[comb][impl], 0, sizeof(struct starpu_perfmodel_per_arch));
+	model->state->per_arch_is_set[comb][impl] = 1;
+	model->state->nimpls[comb] ++;
 
-	return &model->per_arch[comb][impl];
+	return &model->state->per_arch[comb][impl];
 }
 
 struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...)

+ 7 - 6
src/core/perfmodel/perfmodel_print.c

@@ -66,7 +66,8 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 {
 	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
 	STARPU_ASSERT(comb != -1);
-	struct starpu_perfmodel_per_arch *arch_model = &model->per_arch[comb][nimpl];
+
+	struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][nimpl];
 	char archname[32];
 
 	if (arch_model->regression.nsample || arch_model->regression.valid || arch_model->regression.nl_valid || arch_model->list)
@@ -177,7 +178,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 		for(comb = 0; comb < starpu_get_narch_combs(); comb++)
 		{
 			struct starpu_perfmodel_arch *arch_comb = _starpu_arch_comb_get(comb);
-			int nimpls = model->nimpls[comb];
+			int nimpls = model->state ? model->state->nimpls[comb] : 0;
 			for(impl = 0; impl < nimpls; impl++)
 				starpu_perfmodel_print(model, arch_comb, impl, parameter, footprint, output);
 		}
@@ -195,7 +196,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			perf_arch.devices[0].ncores = 1;
 			int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
 			STARPU_ASSERT(comb != -1);
-			int nimpls = model->nimpls[comb];
+			int nimpls = model->state->nimpls[comb];
 			for (implid = 0; implid < nimpls; implid++)
 				starpu_perfmodel_print(model, &perf_arch,implid, parameter, footprint, output); /* Display all codelets on cpu */
 			free(perf_arch.devices);
@@ -221,7 +222,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			perf_arch.devices[0].ncores = k-1;
 			int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
 			STARPU_ASSERT(comb != -1);
-			int nimpls = model->nimpls[comb];
+			int nimpls = model->state->nimpls[comb];
 
 			for (implid = 0; implid < nimpls; implid++)
 				starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
@@ -246,7 +247,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 				if(arch_comb->ndevices == 1 && arch_comb->devices[0].type == STARPU_CUDA_WORKER)
 				{
 					perf_arch.devices[0].devid = arch_comb->devices[0].devid;
-					int nimpls = model->nimpls[comb];
+					int nimpls = model->state->nimpls[comb];
 
 					for (implid = 0; implid < nimpls; implid++)
 						starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
@@ -272,7 +273,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 
 			int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
 			STARPU_ASSERT(comb != -1);
-			int nimpls = model->nimpls[comb];
+			int nimpls = model->state->nimpls[comb];
 
 			int implid;
 			for (implid = 0; implid < nimpls; implid++)

+ 9 - 8
tests/perfmodels/valid_model.c

@@ -16,6 +16,7 @@
 
 #include <config.h>
 #include <starpu.h>
+#include <core/perfmodel/perfmodel.h>
 #include "../helper.h"
 
 void func(void *descr[], void *arg)
@@ -79,11 +80,11 @@ static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model
 	if (ret != 1)
 	{
 		int i, impl;
-		for(i = 0; i < lmodel.ncombs; i++)
+		for(i = 0; i < lmodel.state->ncombs; i++)
 		{
-			int comb = lmodel.combs[i];
-			for(impl = 0; impl < lmodel.nimpls[i]; impl++)
-				old_nsamples += lmodel.per_arch[comb][impl].regression.nsample;
+			int comb = lmodel.state->combs[i];
+			for(impl = 0; impl < lmodel.state->nimpls[i]; impl++)
+				old_nsamples += lmodel.state->per_arch[comb][impl].regression.nsample;
 		}
 	}
 
@@ -112,12 +113,12 @@ static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model
 	{
 		int i;
 		new_nsamples = 0;
-		for(i = 0; i < lmodel.ncombs; i++)
+		for(i = 0; i < lmodel.state->ncombs; i++)
 		{
-			int comb = lmodel.combs[i];
+			int comb = lmodel.state->combs[i];
 			int impl;
-			for(impl = 0; impl < lmodel.nimpls[i]; impl++)
-			     new_nsamples += lmodel.per_arch[comb][impl].regression.nsample;
+			for(impl = 0; impl < lmodel.state->nimpls[i]; impl++)
+			     new_nsamples += lmodel.state->per_arch[comb][impl].regression.nsample;
 		}
 	}
 

+ 16 - 16
tools/starpu_perfmodel_plot.c

@@ -244,18 +244,18 @@ static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_
 	col = 2;
 
 	int i;
-	for(i = 0; i < model->ncombs; i++)
+	for(i = 0; i < model->state->ncombs; i++)
 	{
-		int comb = model->combs[i];
+		int comb = model->state->combs[i];
 		if (options->comb_is_set == 0 || options->comb == comb)
 		{
 			struct starpu_perfmodel_arch *arch;
 			int impl;
 
 			arch = _starpu_arch_comb_get(comb);
-			for(impl = 0; impl < model->nimpls[i]; impl++)
+			for(impl = 0; impl < model->state->nimpls[i]; impl++)
 			{
-				struct starpu_perfmodel_per_arch *arch_model = &model->per_arch[comb][impl];
+				struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl];
 				starpu_perfmodel_get_arch_name(arch, arch_name, 32, impl);
 
 				if (arch_model->list)
@@ -275,15 +275,15 @@ static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_
 
 		minimum = ULONG_MAX;
 		/* Get the next minimum */
-		for(i = 0; i < model->ncombs; i++)
+		for(i = 0; i < model->state->ncombs; i++)
 		{
-			int comb = model->combs[i];
+			int comb = model->state->combs[i];
 			if (options->comb_is_set == 0 || options->comb == comb)
 			{
 				int impl;
-				for(impl = 0; impl < model->nimpls[i]; impl++)
+				for(impl = 0; impl < model->state->nimpls[i]; impl++)
 				{
-					struct starpu_perfmodel_per_arch *arch_model = &model->per_arch[comb][impl];
+					struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl];
 					for (ptr = arch_model->list; ptr; ptr = ptr->next)
 					{
 						unsigned long size = ptr->entry->size;
@@ -298,16 +298,16 @@ static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_
 
 		fprintf(stderr, "%lu ", minimum);
 		fprintf(datafile, "%-15lu ", minimum);
-		for(i = 0; i < model->ncombs; i++)
+		for(i = 0; i < model->state->ncombs; i++)
 		{
-			int comb = model->combs[i];
+			int comb = model->state->combs[i];
 			if (options->comb_is_set == 0 || options->comb == comb)
 			{
 				int impl;
 
-				for(impl = 0; impl < model->nimpls[i]; impl++)
+				for(impl = 0; impl < model->state->nimpls[i]; impl++)
 				{
-					struct starpu_perfmodel_per_arch *arch_model = &model->per_arch[comb][impl];
+					struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl];
 					for (ptr = arch_model->list; ptr; ptr = ptr->next)
 					{
 						struct starpu_perfmodel_history_entry *entry = ptr->entry;
@@ -339,18 +339,18 @@ static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_
 static void display_all_perf_models(FILE *gnuplot_file, struct starpu_perfmodel *model, int *first, struct _perfmodel_plot_options *options)
 {
 	int i;
-	for(i = 0; i < model->ncombs; i++)
+	for(i = 0; i < model->state->ncombs; i++)
 	{
-		int comb = model->combs[i];
+		int comb = model->state->combs[i];
 		if (options->comb_is_set == 0 || options->comb == comb)
 		{
 			struct starpu_perfmodel_arch *arch;
 			int impl;
 
 			arch = _starpu_arch_comb_get(comb);
-			for(impl = 0; impl < model->nimpls[i]; impl++)
+			for(impl = 0; impl < model->state->nimpls[i]; impl++)
 			{
-				struct starpu_perfmodel_per_arch *archmodel = &model->per_arch[comb][impl];
+				struct starpu_perfmodel_per_arch *archmodel = &model->state->per_arch[comb][impl];
 				display_perf_model(gnuplot_file, model, arch, archmodel, comb, impl, first, options);
 			}
 		}