Pārlūkot izejas kodu

src/core:
- reallocate arch_combs if number of combinations read in a perfmodel file is greater than the one computed in starpu_perfmodel_init
- arch_combs variables are now private to src/core/perfmodel_history, so define a function _starpu_arch_comb_get

Nathalie Furmento 11 gadi atpakaļ
vecāks
revīzija
552cbdbcf0

+ 3 - 5
src/core/perfmodel/perfmodel.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2013  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -40,9 +40,6 @@ extern "C"
  */
 #define _STARPU_PERFMODEL_VERSION 44
 
-struct starpu_perfmodel_arch **arch_combs;
-int narch_combs;
-
 struct _starpu_perfmodel_list
 {
 	struct _starpu_perfmodel_list *next;
@@ -89,13 +86,14 @@ int *_starpu_get_cuda_affinity_vector(unsigned gpuid);
 int *_starpu_get_opencl_affinity_vector(unsigned gpuid);
 #endif
 
-
 void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, 
 					    double latency_write, double latency_read, unsigned node);
 
 int _starpu_read_double(FILE *f, char *format, double *val);
 void _starpu_simgrid_get_platform_path(char *path, size_t maxlen);
 
+struct starpu_perfmodel_arch * _starpu_arch_comb_get(int comb);
+
 #ifdef __cplusplus
 }
 #endif

+ 59 - 35
src/core/perfmodel/perfmodel_history.c

@@ -38,6 +38,10 @@
 #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add)
 #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out)
 
+struct starpu_perfmodel_arch **arch_combs;
+int current_arch_comb;
+unsigned nb_arch_combs;
+
 struct starpu_perfmodel_history_table
 {
 	UT_hash_handle hh;
@@ -54,18 +58,18 @@ static struct _starpu_perfmodel_list *registered_models = NULL;
 
 int starpu_add_arch_comb(int ndevices, struct starpu_perfmodel_device* devices)
 {
-	arch_combs[narch_combs] = (struct starpu_perfmodel_arch*)malloc(sizeof(struct starpu_perfmodel_arch));
-	arch_combs[narch_combs]->devices = (struct starpu_perfmodel_device*)malloc(ndevices*sizeof(struct starpu_perfmodel_device));
-	arch_combs[narch_combs]->ndevices = ndevices;
+	arch_combs[current_arch_comb] = (struct starpu_perfmodel_arch*)malloc(sizeof(struct starpu_perfmodel_arch));
+	arch_combs[current_arch_comb]->devices = (struct starpu_perfmodel_device*)malloc(ndevices*sizeof(struct starpu_perfmodel_device));
+	arch_combs[current_arch_comb]->ndevices = ndevices;
 	int dev;
 	for(dev = 0; dev < ndevices; dev++)
 	{
-		arch_combs[narch_combs]->devices[dev].type = devices[dev].type;
-		arch_combs[narch_combs]->devices[dev].devid = devices[dev].devid;
-		arch_combs[narch_combs]->devices[dev].ncores = devices[dev].ncores;
+		arch_combs[current_arch_comb]->devices[dev].type = devices[dev].type;
+		arch_combs[current_arch_comb]->devices[dev].devid = devices[dev].devid;
+		arch_combs[current_arch_comb]->devices[dev].ncores = devices[dev].ncores;
 	}
-	narch_combs++;
-	return narch_combs-1;
+	current_arch_comb++;
+	return current_arch_comb-1;
 }
 
 int starpu_get_arch_comb(int ndevices, struct starpu_perfmodel_device *devices)
@@ -73,7 +77,7 @@ int starpu_get_arch_comb(int ndevices, struct starpu_perfmodel_device *devices)
 	int nfounded = 0;
 	unsigned found = 0;
 	int comb;
-	for(comb = 0; comb < narch_combs; comb++)
+	for(comb = 0; comb < current_arch_comb; comb++)
 	{
 		if(arch_combs[comb]->ndevices == ndevices)
 		{
@@ -82,8 +86,8 @@ int starpu_get_arch_comb(int ndevices, struct starpu_perfmodel_device *devices)
 			{
 				for(dev2 = 0; dev2 < ndevices; dev2++)
 				{
-					if(arch_combs[comb]->devices[dev1].type == devices[dev2].type && 
-					   arch_combs[comb]->devices[dev1].devid == devices[dev2].devid && 
+					if(arch_combs[comb]->devices[dev1].type == devices[dev2].type &&
+					   arch_combs[comb]->devices[dev1].devid == devices[dev2].devid &&
 					   arch_combs[comb]->devices[dev1].ncores == devices[dev2].ncores)
 						nfounded++;
 				}
@@ -93,24 +97,29 @@ int starpu_get_arch_comb(int ndevices, struct starpu_perfmodel_device *devices)
 		}
 		if(found)
 			return comb;
-	}	
+	}
 	return -1;
 }
 
 static 	void _free_arch_combs(void)
 {
 	int i;
-	for(i = 0; i < narch_combs; i++)
+	for(i = 0; i < current_arch_comb; i++)
 	{
 		free(arch_combs[i]->devices);
 		free(arch_combs[i]);
 	}
-	narch_combs = 0;
+	current_arch_comb = 0;
 }
 
 int starpu_get_narch_combs()
 {
-	return narch_combs;
+	return current_arch_comb;
+}
+
+struct starpu_perfmodel_arch *_starpu_arch_comb_get(int comb)
+{
+	return arch_combs[comb];
 }
 
 size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned impl, struct _starpu_job *j)
@@ -353,7 +362,6 @@ static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
 		model->nimpls[comb] = implmax;
 		model->per_arch[comb] = (struct starpu_perfmodel_per_arch*)malloc(STARPU_MAXIMPLEMENTATIONS*sizeof(struct starpu_perfmodel_per_arch));
-		int i;
 		for(i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++)
 			memset(&model->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch));
 
@@ -381,7 +389,7 @@ static enum starpu_worker_archtype _get_enum_type(int type)
 			return STARPU_CPU_WORKER;
         	case 1:
 			return STARPU_CUDA_WORKER;
-	        case 2: 
+	        case 2:
 			return STARPU_OPENCL_WORKER;
         	case 3:
 			return STARPU_MIC_WORKER;
@@ -400,7 +408,7 @@ static void parse_comb(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
 
 	struct starpu_perfmodel_device devices[ndevices];
-	
+
 	int dev;
 	for(dev = 0; dev < ndevices; dev++)
 	{
@@ -418,14 +426,14 @@ static void parse_comb(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 		_starpu_drop_comments(f);
 		ret = fscanf(f, "%d\n", &ncores);
 		STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
-		devices[dev].type = dev_type;	
+		devices[dev].type = dev_type;
 		devices[dev].devid = dev_id;
 		devices[dev].ncores = ncores;
 	}
 	int id_comb = starpu_get_arch_comb(ndevices, devices);
 	if(id_comb == -1)
 		id_comb = starpu_add_arch_comb(ndevices, devices);
-	
+
 	model->combs[comb] = id_comb;
 	parse_arch(f, model, scan_history, id_comb);
 }
@@ -440,8 +448,7 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	STARPU_ASSERT_MSG(version == _STARPU_PERFMODEL_VERSION, "Incorrect performance model file with a model version %d not being the current model version (%d)\n",
 			  version, _STARPU_PERFMODEL_VERSION);
 	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
-	
-	
+
 	int ncombs = 0;
 	_starpu_drop_comments(f);
 	ret = fscanf(f, "%d\n", &ncombs);
@@ -449,6 +456,23 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	if(ncombs > 0)
 		model->ncombs = ncombs;
 
+	if (ncombs > nb_arch_combs)
+	{
+		int i;
+
+		arch_combs = (struct starpu_perfmodel_arch**) realloc(arch_combs, ncombs*sizeof(struct starpu_perfmodel_arch*));
+		model->per_arch = (struct starpu_perfmodel_per_arch**) realloc(model->per_arch, ncombs*sizeof(struct starpu_perfmodel_per_arch*));
+		model->nimpls = (int *)realloc(model->nimpls, ncombs*sizeof(int));
+		model->combs = (int*)realloc(model->combs, ncombs*sizeof(int));
+
+		for(i = ncombs; i < nb_arch_combs; i++)
+		{
+			model->per_arch[i] = NULL;
+			model->nimpls[i] = 0;
+		}
+		nb_arch_combs = ncombs;
+	}
+
 	int comb;
 	for(comb = 0; comb < ncombs; comb++)
 		parse_comb(f, model, scan_history, comb);
@@ -519,7 +543,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 		fprintf(f, "# COMB_%d\n", comb);
 		fprintf(f, "# number of types devices\n");
 		fprintf(f, "%u\n", ndevices);
-		
+
 		for(dev = 0; dev < ndevices; dev++)
 		{
 			fprintf(f, "####################\n");
@@ -537,7 +561,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 			fprintf(f, "# number of cores \n");
 			fprintf(f, "%u\n", arch_combs[model->combs[comb]]->devices[dev].ncores);
 		}
-		
+
 		int nimpls = model->nimpls[model->combs[comb]];
 		fprintf(f, "##########\n");
 		fprintf(f, "# number of implementations\n");
@@ -585,15 +609,15 @@ void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model)
 		for(i = 0; i < conf->topology.nhwmicdevices; i++)
 			nmic += conf->topology.nhwmiccores[i];
 		unsigned nscc = conf->topology.nhwscc;
-		unsigned npossible_combs= pow(2, (ncores + ncuda + nopencl + nmic + nscc));
-		arch_combs = (struct starpu_perfmodel_arch**) malloc(npossible_combs*sizeof(struct starpu_perfmodel_arch*));
-		narch_combs = 0;
-		model->per_arch = (struct starpu_perfmodel_per_arch**) malloc(npossible_combs*sizeof(struct starpu_perfmodel_per_arch*));
-		model->nimpls = (int *)malloc(npossible_combs*sizeof(int));
-		model->combs = (int*)malloc(npossible_combs*sizeof(int));
+		nb_arch_combs = pow(2, (ncores + ncuda + nopencl + nmic + nscc));
+		arch_combs = (struct starpu_perfmodel_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
+		current_arch_comb = 0;
+		model->per_arch = (struct starpu_perfmodel_per_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_per_arch*));
+		model->nimpls = (int *)malloc(nb_arch_combs*sizeof(int));
+		model->combs = (int*)malloc(nb_arch_combs*sizeof(int));
 		model->ncombs = 0;
 
-		for(i = 0; i < npossible_combs; i++)
+		for(i = 0; i < nb_arch_combs; i++)
 		{
 			model->per_arch[i] = NULL;
 			model->nimpls[i] = 0;
@@ -757,14 +781,14 @@ void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 				struct starpu_perfmodel_per_arch *archmodel = &model->per_arch[model->combs[comb]][impl];
 				struct starpu_perfmodel_history_list *list, *plist;
 				struct starpu_perfmodel_history_table *entry, *tmp;
-				
+
 				HASH_ITER(hh, archmodel->history, entry, tmp)
 				{
 					HASH_DEL(archmodel->history, entry);
 					free(entry);
 				}
 				archmodel->history = NULL;
-				
+
 				list = archmodel->list;
 				while (list)
 				{
@@ -777,7 +801,7 @@ void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
 			}
 			free(model->per_arch[model->combs[comb]]);
 			model->per_arch[model->combs[comb]] = NULL;
-		}		
+		}
 		free(model->per_arch);
 		model->per_arch = NULL;
 
@@ -1229,7 +1253,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 				double local_deviation = measured/entry->mean;
 				int historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
-				
+
 				if (entry->nsample &&
 					(100 * local_deviation > (100 + historymaxerror)
 					 || (100 / local_deviation > (100 + historymaxerror))))

+ 9 - 6
src/core/perfmodel/perfmodel_print.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011, 2013  Université de Bordeaux 1
- * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,6 +20,7 @@
 #include <starpu_perfmodel.h>
 #include <common/config.h>
 #include "perfmodel.h"
+
 static
 void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per_arch_model, char *parameter, uint32_t *footprint, FILE *output)
 {
@@ -173,11 +174,12 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 	if (arch == NULL)
 	{
 		int comb, impl;
-		for(comb = 0; comb < narch_combs; comb++)
+		for(comb = 0; comb < starpu_get_narch_combs(); comb++)
 		{
+			struct starpu_perfmodel_arch *arch_comb = _starpu_arch_comb_get(comb);
 			int nimpls = model->nimpls[comb];
 			for(impl = 0; impl < nimpls; impl++)
-				starpu_perfmodel_print(model, arch_combs[comb], impl, parameter, footprint, output);
+				starpu_perfmodel_print(model, arch_comb, impl, parameter, footprint, output);
 		}
 	}
 	else
@@ -238,11 +240,12 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			perf_arch.devices[0].type = STARPU_CUDA_WORKER;
 			perf_arch.devices[0].ncores = 1;
 			int comb;
-			for(comb = 0; comb < narch_combs; comb++)
+			for(comb = 0; comb < starpu_get_narch_combs(); comb++)
 			{
-				if(arch_combs[comb]->ndevices == 1 && arch_combs[comb]->devices[0].type == STARPU_CUDA_WORKER)
+				struct starpu_perfmodel_arch *arch_comb = _starpu_arch_comb_get(comb);
+				if(arch_comb->ndevices == 1 && arch_comb->devices[0].type == STARPU_CUDA_WORKER)
 				{
-					perf_arch.devices[0].devid = arch_combs[comb]->devices[0].devid;
+					perf_arch.devices[0].devid = arch_comb->devices[0].devid;
 					int nimpls = model->nimpls[comb];
 
 					for (implid = 0; implid < nimpls; implid++)