11 years ago · 2f8c7b58bd
--- a/examples/cholesky/cholesky_models.c
+++ b/examples/cholesky/cholesky_models.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -143,9 +143,9 @@ void initialize_chol_model(struct starpu_perfmodel* model, char * symbol,
 
				         arch_cpu.devices[0].devid = 0;
			
 
				         arch_cpu.devices[0].ncores = 1;
			
 
				 
			
 
				-        int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+        int comb_cpu = starpu_perfmodel_arch_comb_get(arch_cpu.ndevices, arch_cpu.devices);
			
 
				         if(comb_cpu == -1)
			
 
				-                comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+                comb_cpu = starpu_perfmodel_arch_comb_add(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 
			
 
				 
			
 
				         model->per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
@@ -162,9 +162,9 @@ void initialize_chol_model(struct starpu_perfmodel* model, char * symbol,
 
				 		arch_cuda.devices[0].devid = 0;
			
 
				 		arch_cuda.devices[0].ncores = 1;
			
 
				 
			
 
				-		int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+		int comb_cuda = starpu_perfmodel_arch_comb_get(arch_cuda.ndevices, arch_cuda.devices);
			
 
				 		if(comb_cuda == -1)
			
 
				-			comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+			comb_cuda = starpu_perfmodel_arch_comb_add(arch_cuda.ndevices, arch_cuda.devices);
			
 
				 
			
 
				 		model->per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				 		memset(&model->per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
--- a/examples/heat/lu_kernels_model.c
+++ b/examples/heat/lu_kernels_model.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -230,9 +230,9 @@ void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol,
 
				 	arch_cpu.devices[0].devid = 0;
			
 
				 	arch_cpu.devices[0].ncores = 1;
			
 
				 
			
 
				-	int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+	int comb_cpu = starpu_perfmodel_arch_comb_get(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 	if(comb_cpu == -1)
			
 
				-		comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+		comb_cpu = starpu_perfmodel_arch_comb_add(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 
			
 
				 
			
 
				 	model->per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
@@ -249,9 +249,9 @@ void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol,
 
				 		arch_cuda.devices[0].devid = 0;
			
 
				 		arch_cuda.devices[0].ncores = 1;
			
 
				 
			
 
				-		int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+		int comb_cuda = starpu_perfmodel_arch_comb_get(arch_cuda.ndevices, arch_cuda.devices);
			
 
				 		if(comb_cuda == -1)
			
 
				-			comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+			comb_cuda = starpu_perfmodel_arch_comb_add(arch_cuda.ndevices, arch_cuda.devices);
			
 
				 
			
 
				 		model->per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				 		memset(&model->per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -159,8 +159,8 @@ struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct sta
 
				 int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model);
			
 
				 int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
			
 
				 int starpu_get_narch_combs();
			
 
				-int starpu_add_arch_comb(int ndevices, struct starpu_perfmodel_device* devices);
			
 
				-int starpu_get_arch_comb(int ndevices, struct starpu_perfmodel_device *devices);
			
 
				+int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
			
 
				+int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
			
 
				 
			
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
			
 
				 char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
			
--- a/sc_hypervisor/examples/cholesky/cholesky_models.c
+++ b/sc_hypervisor/examples/cholesky/cholesky_models.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -139,9 +139,9 @@ void initialize_chol_model(struct starpu_perfmodel* model, char * symbol,
 
				         arch_cpu.devices[0].devid = 0;
			
 
				         arch_cpu.devices[0].ncores = 1;
			
 
				 
			
 
				-	int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+	int comb_cpu = starpu_perfmodel_arch_comb_get(arch_cpu.ndevices, arch_cpu.devices);
			
 
				         if(comb_cpu == -1)
			
 
				-                comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+                comb_cpu = starpu_perfmodel_arch_comb_add(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 
			
 
				 
			
 
				 	model->per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
@@ -158,9 +158,9 @@ void initialize_chol_model(struct starpu_perfmodel* model, char * symbol,
 
				                 arch_cuda.devices[0].devid = 0;
			
 
				 		arch_cuda.devices[0].ncores = 1;
			
 
				 
			
 
				-		int comb_cuda = starpu_get_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+		int comb_cuda = starpu_perfmodel_arch_comb_get(arch_cuda.ndevices, arch_cuda.devices);
			
 
				 		if(comb_cuda == -1)
			
 
				-			comb_cuda = starpu_add_arch_comb(arch_cuda.ndevices, arch_cuda.devices);
			
 
				+			comb_cuda = starpu_perfmodel_arch_comb_add(arch_cuda.ndevices, arch_cuda.devices);
			
 
				 
			
 
				                 model->per_arch[comb_cuda] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
			
 
				                 memset(&model->per_arch[comb_cuda][0], 0, sizeof(struct starpu_perfmodel_per_arch));
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -82,7 +82,7 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct
 
				 	double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				 	double (*per_arch_cost_model)(struct starpu_data_descr *);
			
 
				 
			
 
				-	comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if (comb == -1)
			
 
				 		return NAN;
			
 
				 	if (model->per_arch[comb] == NULL)
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -56,7 +56,7 @@ struct starpu_perfmodel_history_table
 
				 static starpu_pthread_rwlock_t registered_models_rwlock;
			
 
				 static struct _starpu_perfmodel_list *registered_models = NULL;
			
 
				 
			
 
				-int starpu_add_arch_comb(int ndevices, struct starpu_perfmodel_device* devices)
			
 
				+int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices)
			
 
				 {
			
 
				 	if (current_arch_comb >= nb_arch_combs)
			
 
				 	{
			
@@ -78,7 +78,7 @@ int starpu_add_arch_comb(int ndevices, struct starpu_perfmodel_device* devices)
 
				 	return current_arch_comb-1;
			
 
				 }
			
 
				 
			
 
				-int starpu_get_arch_comb(int ndevices, struct starpu_perfmodel_device *devices)
			
 
				+int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices)
			
 
				 {
			
 
				 	int comb;
			
 
				 	for(comb = 0; comb < current_arch_comb; comb++)
			
@@ -132,7 +132,7 @@ struct starpu_perfmodel_arch *_starpu_arch_comb_get(int comb)
 
				 size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned impl, struct _starpu_job *j)
			
 
				 {
			
 
				 	struct starpu_task *task = j->task;
			
 
				-	int comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 
			
 
				 	if (model && model->per_arch && comb != -1 && model->per_arch[comb] && model->per_arch[comb][impl].size_base)
			
 
				 	{
			
@@ -438,9 +438,9 @@ static void parse_comb(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 
				 		devices[dev].devid = dev_id;
			
 
				 		devices[dev].ncores = ncores;
			
 
				 	}
			
 
				-	int id_comb = starpu_get_arch_comb(ndevices, devices);
			
 
				+	int id_comb = starpu_perfmodel_arch_comb_get(ndevices, devices);
			
 
				 	if(id_comb == -1)
			
 
				-		id_comb = starpu_add_arch_comb(ndevices, devices);
			
 
				+		id_comb = starpu_perfmodel_arch_comb_add(ndevices, devices);
			
 
				 
			
 
				 	model->combs[comb] = id_comb;
			
 
				 	parse_arch(f, model, scan_history, id_comb);
			
@@ -766,7 +766,7 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	unsigned nscc = conf->topology.nhwscc;
			
 
				 
			
 
				 	// We used to allocate 2**(ncores + ncuda + nopencl + nmic + nscc), this is too big
			
 
				-	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nscc), and reallocate when necessary in starpu_add_arch_comb
			
 
				+	// We now allocate only 2*(ncores + ncuda + nopencl + nmic + nscc), and reallocate when necessary in starpu_perfmodel_arch_comb_add
			
 
				 	nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmic + nscc);
			
 
				 	arch_combs = (struct starpu_perfmodel_arch**) malloc(nb_arch_combs*sizeof(struct starpu_perfmodel_arch*));
			
 
				 	current_arch_comb = 0;
			
@@ -1040,7 +1040,7 @@ char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
 
				 
			
 
				 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl)
			
 
				 {
			
 
				-	int comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	STARPU_ASSERT(comb != -1);
			
 
				 
			
 
				 	snprintf(archname, maxlen, "Comb%d_impl%u", comb, impl);
			
@@ -1049,7 +1049,7 @@ void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *ar
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
			
 
				 				    struct starpu_perfmodel_arch* arch, char *path, size_t maxlen, unsigned nimpl)
			
 
				 {
			
 
				-	int comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	STARPU_ASSERT(comb != -1);
			
 
				 	char archname[32];
			
 
				 	starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
			
@@ -1066,7 +1066,7 @@ double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model
 
				 	size_t size;
			
 
				 	struct starpu_perfmodel_regression_model *regmodel;
			
 
				 
			
 
				-	comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if(comb == -1)
			
 
				 		return NAN;
			
 
				 	if (model->per_arch[comb] == NULL)
			
@@ -1089,7 +1089,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 	size_t size;
			
 
				 	struct starpu_perfmodel_regression_model *regmodel;
			
 
				 
			
 
				-	comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if(comb == -1)
			
 
				 		return NAN;
			
 
				 	if (model->per_arch[comb] == NULL)
			
@@ -1144,7 +1144,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, s
 
				 	struct starpu_perfmodel_history_table *history, *elt;
			
 
				 	uint32_t key;
			
 
				 
			
 
				-	comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if(comb == -1)
			
 
				 		return NAN;
			
 
				 	if (model->per_arch[comb] == NULL)
			
@@ -1198,9 +1198,9 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 {
			
 
				 	if (model)
			
 
				 	{
			
 
				-		int comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+		int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 		if(comb == -1)
			
 
				-			comb = starpu_add_arch_comb(arch->ndevices, arch->devices);
			
 
				+			comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices);
			
 
				 
			
 
				 		int c;
			
 
				 		unsigned found = 0;
			
@@ -1413,7 +1413,7 @@ void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct star
 
				 
			
 
				 struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl)
			
 
				 {
			
 
				-	int comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	if(comb == -1) return NULL;
			
 
				 
			
 
				 	return &model->per_arch[comb][impl];
			
--- a/src/core/perfmodel/perfmodel_print.c
+++ b/src/core/perfmodel/perfmodel_print.c
@@ -64,7 +64,7 @@ void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per
 
				 
			
 
				 void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
			
 
				 {
			
 
				-	int comb = starpu_get_arch_comb(arch->ndevices, arch->devices);
			
 
				+	int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
			
 
				 	STARPU_ASSERT(comb != -1);
			
 
				 	struct starpu_perfmodel_per_arch *arch_model = &model->per_arch[comb][nimpl];
			
 
				 	char archname[32];
			
@@ -193,7 +193,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 
				 			perf_arch.devices[0].type = STARPU_CPU_WORKER;
			
 
				 			perf_arch.devices[0].devid = 0;
			
 
				 			perf_arch.devices[0].ncores = 1;
			
 
				-			int comb = starpu_get_arch_comb(perf_arch.ndevices, perf_arch.devices);
			
 
				+			int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
			
 
				 			STARPU_ASSERT(comb != -1);
			
 
				 			int nimpls = model->nimpls[comb];
			
 
				 			for (implid = 0; implid < nimpls; implid++)
			
@@ -219,7 +219,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 
				 			perf_arch.devices[0].type = STARPU_CPU_WORKER;
			
 
				 			perf_arch.devices[0].devid = 0;
			
 
				 			perf_arch.devices[0].ncores = k-1;
			
 
				-			int comb = starpu_get_arch_comb(perf_arch.ndevices, perf_arch.devices);
			
 
				+			int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
			
 
				 			STARPU_ASSERT(comb != -1);
			
 
				 			int nimpls = model->nimpls[comb];
			
 
				 
			
@@ -270,7 +270,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 
				 			perf_arch.devices[0].devid = gpuid;
			
 
				 			perf_arch.devices[0].ncores = 1;
			
 
				 
			
 
				-			int comb = starpu_get_arch_comb(perf_arch.ndevices, perf_arch.devices);
			
 
				+			int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices);
			
 
				 			STARPU_ASSERT(comb != -1);
			
 
				 			int nimpls = model->nimpls[comb];
			
 
				 
			
--- a/tests/sched_policies/simple_cpu_gpu_sched.c
+++ b/tests/sched_policies/simple_cpu_gpu_sched.c
@@ -113,9 +113,9 @@ init_perfmodels_gpu(int gpu_type)
 
				 		arch_gpu.devices[0].devid = starpu_worker_get_devid(worker_gpu_ids[worker_gpu]);
			
 
				 		arch_gpu.devices[0].ncores = 1;
			
 
				 
			
 
				-		int comb_gpu = starpu_get_arch_comb(arch_gpu.ndevices, arch_gpu.devices);
			
 
				+		int comb_gpu = starpu_perfmodel_arch_comb_get(arch_gpu.ndevices, arch_gpu.devices);
			
 
				 		if(comb_gpu == -1)
			
 
				-			comb_gpu = starpu_add_arch_comb(arch_gpu.ndevices, arch_gpu.devices);
			
 
				+			comb_gpu = starpu_perfmodel_arch_comb_add(arch_gpu.ndevices, arch_gpu.devices);
			
 
				 
			
 
				 //#error per_arch[comb_gpu] peut ne pas etre alloue, on doit fournir des fonctions publiques pour eviter de taper directtement dedans, la fonction se chargerait d allouer per_arch[comb_gpu] si necessaire
			
 
				 		if (comb_gpu >= model_cpu_task.ncombs_set)
			
@@ -150,9 +150,9 @@ init_perfmodels(void)
 
				 	arch_cpu.devices[0].devid = 0;
			
 
				 	arch_cpu.devices[0].ncores = 1;
			
 
				 
			
 
				-	int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+	int comb_cpu = starpu_perfmodel_arch_comb_get(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 	if (comb_cpu == -1)
			
 
				-		comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
			
 
				+		comb_cpu = starpu_perfmodel_arch_comb_add(arch_cpu.ndevices, arch_cpu.devices);
			
 
				 
			
 
				 //#error per_arch[comb_cpu] peut ne pas etre alloue, on doit fournir des fonctions publiques pour eviter de taper directtement dedans, la fonction se chargerait d allouer per_arch[comb_cpu] si necessaire
			
 
				 	if (comb_cpu >= model_cpu_task.ncombs_set)