11 anos atrás · 8a472c0c88
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -136,12 +136,16 @@ struct starpu_perfmodel
 
				 
			
 
				 	const char *symbol;
			
 
				 
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning move all the fields in a private structure
			
 
				+#endif
			
 
				 	unsigned is_init;
			
 
				 	unsigned is_loaded;
			
 
				 	unsigned benchmarking;
			
 
				 	starpu_pthread_rwlock_t model_rwlock;
			
 
				 	int *nimpls;
			
 
				-	int ncombs;
			
 
				+	int ncombs;  /* The number of combinations currently used by the model */
			
 
				+	int ncombs_set; /* The number of combinations allocated in the array nimpls and ncombs */
			
 
				 	int *combs;
			
 
				 };
			
 
				 
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -94,6 +94,8 @@ void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 
				 
			
 
				 struct starpu_perfmodel_arch * _starpu_arch_comb_get(int comb);
			
 
				 
			
 
				+void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -40,7 +40,7 @@
 
				 
			
 
				 struct starpu_perfmodel_arch **arch_combs;
			
 
				 int current_arch_comb;
			
 
				-unsigned nb_arch_combs;
			
 
				+int nb_arch_combs;
			
 
				 
			
 
				 struct starpu_perfmodel_history_table
			
 
				 {
			
@@ -58,6 +58,12 @@ static struct _starpu_perfmodel_list *registered_models = NULL;
 
				 
			
 
				 int starpu_add_arch_comb(int ndevices, struct starpu_perfmodel_device* devices)
			
 
				 {
			
 
				+	if (current_arch_comb >= nb_arch_combs)
			
 
				+	{
			
 
				+		// We need to allocate more arch_combs
			
 
				+		nb_arch_combs += 10;
			
 
				+		arch_combs = (struct starpu_perfmodel_arch**) realloc(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				+	}
			
 
				 	arch_combs[current_arch_comb] = (struct starpu_perfmodel_arch*)malloc(sizeof(struct starpu_perfmodel_arch));
			
 
				 	arch_combs[current_arch_comb]->devices = (struct starpu_perfmodel_device*)malloc(ndevices*sizeof(struct starpu_perfmodel_device));
			
 
				 	arch_combs[current_arch_comb]->ndevices = ndevices;
			
@@ -401,6 +407,7 @@ static enum starpu_worker_archtype _get_enum_type(int type)
 
				 	}
			
 
				 
			
 
				 }
			
 
				+
			
 
				 static void parse_comb(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, int comb)
			
 
				 {
			
 
				 	int ndevices = 0;
			
@@ -459,19 +466,11 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
				 
			
 
				 	if (ncombs > nb_arch_combs)
			
 
				 	{
			
 
				-		int i;
			
 
				-
			
 
				-		arch_combs = (struct starpu_perfmodel_arch**) realloc(arch_combs, ncombs*sizeof(struct starpu_perfmodel_arch*));
			
 
				-		model->per_arch = (struct starpu_perfmodel_per_arch**) realloc(model->per_arch, ncombs*sizeof(struct starpu_perfmodel_per_arch*));
			
 
				-		model->nimpls = (int *)realloc(model->nimpls, ncombs*sizeof(int));
			
 
				-		model->combs = (int*)realloc(model->combs, ncombs*sizeof(int));
			
 
				-
			
 
				-		for(i = ncombs; i < nb_arch_combs; i++)
			
 
				-		{
			
 
				-			model->per_arch[i] = NULL;
			
 
				-			model->nimpls[i] = 0;
			
 
				-		}
			
 
				+		// The model has more combs than the original number of arch_combs, we need to reallocate
			
 
				 		nb_arch_combs = ncombs;
			
 
				+		arch_combs = (struct starpu_perfmodel_arch**) realloc(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				+
			
 
				+		_starpu_perfmodel_realloc(model, nb_arch_combs);
			
 
				 	}
			
 
				 
			
 
				 	int comb;
			
@@ -574,6 +573,22 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	STARPU_ASSERT(nb > model->ncombs_set);
			
 
				+	model->per_arch = (struct starpu_perfmodel_per_arch**) realloc(model->per_arch, nb*sizeof(struct starpu_perfmodel_per_arch*));
			
 
				+	model->nimpls = (int *)realloc(model->nimpls, nb*sizeof(int));
			
 
				+	model->combs = (int*)realloc(model->combs, nb*sizeof(int));
			
 
				+	for(i = model->ncombs_set; i < nb; i++)
			
 
				+	{
			
 
				+		model->per_arch[i] = NULL;
			
 
				+		model->nimpls[i] = 0;
			
 
				+	}
			
 
				+	model->ncombs_set = nb;
			
 
				+}
			
 
				+
			
 
				 void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model)
			
 
				 {
			
 
				 	STARPU_ASSERT(model && model->symbol);
			
@@ -601,12 +616,13 @@ void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model)
 
				 	STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
			
 
				 	if(model->type != STARPU_COMMON)
			
 
				 	{
			
 
				-		unsigned i;
			
 
				+		int i;
			
 
				 
			
 
				 		model->per_arch = (struct starpu_perfmodel_per_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_per_arch*));
			
 
				 		model->nimpls = (int *)malloc(nb_arch_combs*sizeof(int));
			
 
				 		model->combs = (int*)malloc(nb_arch_combs*sizeof(int));
			
 
				 		model->ncombs = 0;
			
 
				+		model->ncombs_set = nb_arch_combs;
			
 
				 
			
 
				 		for(i = 0; i < nb_arch_combs; i++)
			
 
				 		{
			
@@ -748,7 +764,10 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	for(i = 0; i < conf->topology.nhwmicdevices; i++)
			
 
				 		nmic += conf->topology.nhwmiccores[i];
			
 
				 	unsigned nscc = conf->topology.nhwscc;
			
 
				-	nb_arch_combs = pow(2, (ncores + ncuda + nopencl + nmic + nscc));
			
 
				+
			
 
				+	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nscc), this is too big
			
 
				+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nscc), and reallocate when necessary in starpu_add_arch_comb
			
 
				+	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nscc);
			
 
				 	arch_combs = (struct starpu_perfmodel_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				 	current_arch_comb = 0;
			
 
				 }
			
@@ -1194,7 +1213,14 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			}
			
 
				 		}
			
 
				 		if(!found)
			
 
				+		{
			
 
				+			if (model->ncombs + 1 >= model->ncombs_set)
			
 
				+			{
			
 
				+				// The number of combinations is bigger than the one which was initially allocated, we need to reallocate
			
 
				+				_starpu_perfmodel_realloc(model, nb_arch_combs);
			
 
				+			}
			
 
				 			model->combs[model->ncombs++] = comb;
			
 
				+		}
			
 
				 
			
 
				 		STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);