15 years ago · 480ee416d5
--- a/include/starpu-perfmodel.h
+++ b/include/starpu-perfmodel.h
@@ -20,6 +20,7 @@
 
				 #include <stdio.h>
			
 
				 #include <pthread.h>
			
 
				 #include <starpu_config.h>
			
 
				+#include <starpu-task.h> // for MAXCUDADEVS
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 extern "C" {
			
@@ -35,19 +36,14 @@ struct starpu_buffer_descr_t;
 
				    so we do not use the archtype enum type directly for performance models
			
 
				 */
			
 
				 
			
 
				-/* on most system we will consider one or two architectures as all accelerators
			
 
				-   are likely to be identical */
			
 
				-#define NARCH_VARIATIONS	6
			
 
				-
			
 
				 enum starpu_perf_archtype {
			
 
				 	STARPU_CORE_DEFAULT = 0,
			
 
				 	STARPU_CUDA_DEFAULT = 1,
			
 
				-	STARPU_CUDA_2 = 2,
			
 
				-	STARPU_CUDA_3 = 3,
			
 
				-	STARPU_CUDA_4 = 4,
			
 
				-	STARPU_GORDON_DEFAULT = 5
			
 
				+	/* STARPU_CUDA_DEFAULT + devid */
			
 
				+	STARPU_GORDON_DEFAULT = STARPU_CUDA_DEFAULT + MAXCUDADEVS
			
 
				 };
			
 
				 
			
 
				+#define NARCH_VARIATIONS	(STARPU_GORDON_DEFAULT+1)
			
 
				 
			
 
				 struct starpu_regression_model_t {
			
 
				 	/* sum of ln(measured) */
			
@@ -109,7 +105,9 @@ struct starpu_perfmodel_t {
 
				  * performance model files */
			
 
				 int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *model);
			
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
			
 
				-		enum starpu_perf_archtype arch, char **path, size_t maxlen);
			
 
				+		enum starpu_perf_archtype arch, char *path, size_t maxlen);
			
 
				+void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch,
			
 
				+		char *archname, size_t maxlen);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -189,6 +189,17 @@ void create_sampling_directory_if_needed(void)
 
				 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
			
 
				 		}
			
 
				 	
			
 
				+		ret = mkdir(PERF_MODEL_DIR_DEBUG, S_IRWXU);
			
 
				+		if (ret == -1)
			
 
				+		{
			
 
				+			STARPU_ASSERT(errno == EEXIST);
			
 
				+	
			
 
				+			/* make sure that it is actually a directory */
			
 
				+			struct stat sb;
			
 
				+			stat(PERF_MODEL_DIR, &sb);
			
 
				+			STARPU_ASSERT(S_ISDIR(sb.st_mode));
			
 
				+		}
			
 
				+	
			
 
				 		directory_existence_was_tested = 1;
			
 
				 	}
			
 
				 }
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -28,6 +28,7 @@
 
				 
			
 
				 #define PERF_MODEL_DIR_CODELETS	PERF_MODEL_DIR"/codelets/"
			
 
				 #define PERF_MODEL_DIR_BUS	PERF_MODEL_DIR"/bus/"
			
 
				+#define PERF_MODEL_DIR_DEBUG	PERF_MODEL_DIR"/debug/"
			
 
				 
			
 
				 struct starpu_buffer_descr_t;
			
 
				 struct jobq_s;
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -146,12 +146,9 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_
 
				 
			
 
				 static void parse_model_file(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history)
			
 
				 {
			
 
				-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT], scan_history);
			
 
				-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT], scan_history);
			
 
				-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_2], scan_history);
			
 
				-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_3], scan_history);
			
 
				-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_4], scan_history);
			
 
				-	parse_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT], scan_history);
			
 
				+	unsigned arch;
			
 
				+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
			
 
				+		parse_per_arch_model_file(f, &model->per_arch[arch], scan_history);
			
 
				 }
			
 
				 
			
 
				 static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model)
			
@@ -188,23 +185,16 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t
 
				 static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
			
 
				 {
			
 
				 	fprintf(f, "#################\n");
			
 
				-	fprintf(f, "# Model for COREs\n");
			
 
				-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT]);
			
 
				-	fprintf(f, "\n##################\n");
			
 
				-	fprintf(f,   "# Model for CUDA 1\n");
			
 
				-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT]);
			
 
				-	fprintf(f, "\n##################\n");
			
 
				-	fprintf(f,   "# Model for CUDA 2\n");
			
 
				-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_2]);
			
 
				-	fprintf(f, "\n##################\n");
			
 
				-	fprintf(f,   "# Model for CUDA 3\n");
			
 
				-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_3]);
			
 
				-	fprintf(f, "\n##################\n");
			
 
				-	fprintf(f,   "# Model for CUDA 4\n");
			
 
				-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_4]);
			
 
				-	fprintf(f, "\n##################\n");
			
 
				-	fprintf(f,   "# Model for GORDON\n");
			
 
				-	dump_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT]);
			
 
				+
			
 
				+	unsigned arch;
			
 
				+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
			
 
				+	{
			
 
				+		char archname[32];
			
 
				+		starpu_perfmodel_get_arch_name(arch, archname, 32);
			
 
				+		fprintf(f, "# Model for %s\n", archname);
			
 
				+		dump_per_arch_model_file(f, &model->per_arch[arch]);
			
 
				+		fprintf(f, "\n##################\n");
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_arch_model)
			
@@ -215,12 +205,9 @@ static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_ar
 
				 
			
 
				 static void initialize_model(struct starpu_perfmodel_t *model)
			
 
				 {
			
 
				-	initialize_per_arch_model(&model->per_arch[STARPU_CORE_DEFAULT]);
			
 
				-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_DEFAULT]);
			
 
				-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_2]);
			
 
				-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_3]);
			
 
				-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_4]);
			
 
				-	initialize_per_arch_model(&model->per_arch[STARPU_GORDON_DEFAULT]);
			
 
				+	unsigned arch;
			
 
				+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
			
 
				+		initialize_per_arch_model(&model->per_arch[arch]);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_model_list_t *registered_models = NULL;
			
@@ -228,7 +215,9 @@ static struct starpu_model_list_t *registered_models = NULL;
 
				 
			
 
				 static void get_model_debug_path(struct starpu_perfmodel_t *model, const char *arch, char *path, size_t maxlen)
			
 
				 {
			
 
				-	strncpy(path, PERF_MODEL_DIR, maxlen);
			
 
				+	STARPU_ASSERT(path);
			
 
				+
			
 
				+	strncpy(path, PERF_MODEL_DIR_DEBUG, maxlen);
			
 
				 	strncat(path, model->symbol, maxlen);
			
 
				 	
			
 
				 	char hostname[32];
			
@@ -253,30 +242,16 @@ void register_model(struct starpu_perfmodel_t *model)
 
				 	registered_models = node;
			
 
				 
			
 
				 #ifdef MODEL_DEBUG
			
 
				-	char debugpath[256];
			
 
				-	get_model_debug_path(model, "cuda", debugpath, 256);
			
 
				-	model->per_arch[STARPU_CUDA_DEFAULT].debug_file = fopen(debugpath, "a+");
			
 
				-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_DEFAULT].debug_file);
			
 
				-
			
 
				-	get_model_debug_path(model, "cuda_2", debugpath, 256);
			
 
				-	model->per_arch[STARPU_CUDA_2].debug_file = fopen(debugpath, "a+");
			
 
				-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_2].debug_file);
			
 
				-
			
 
				-	get_model_debug_path(model, "cuda_3", debugpath, 256);
			
 
				-	model->per_arch[STARPU_CUDA_3].debug_file = fopen(debugpath, "a+");
			
 
				-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_3].debug_file);
			
 
				-
			
 
				-	get_model_debug_path(model, "cuda_4", debugpath, 256);
			
 
				-	model->per_arch[STARPU_CUDA_4].debug_file = fopen(debugpath, "a+");
			
 
				-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_4].debug_file);
			
 
				-
			
 
				-	get_model_debug_path(model, "core", debugpath, 256);
			
 
				-	model->per_arch[STARPU_CORE_DEFAULT].debug_file = fopen(debugpath, "a+");
			
 
				-	STARPU_ASSERT(model->per_arch[STARPU_CORE_DEFAULT].debug_file);
			
 
				-
			
 
				-	get_model_debug_path(model, "gordon", debugpath, 256);
			
 
				-	model->per_arch[STARPU_GORDON_DEFAULT].debug_file = fopen(debugpath, "a+");
			
 
				-	STARPU_ASSERT(model->per_arch[STARPU_GORDON_DEFAULT].debug_file);
			
 
				+	create_sampling_directory_if_needed();
			
 
				+
			
 
				+	unsigned arch;
			
 
				+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
			
 
				+	{
			
 
				+		char debugpath[256];
			
 
				+		starpu_perfmodel_debugfilepath(model, arch, debugpath, 256);
			
 
				+		model->per_arch[arch].debug_file = fopen(debugpath, "a+");
			
 
				+		STARPU_ASSERT(model->per_arch[arch].debug_file);
			
 
				+	}
			
 
				 #endif
			
 
				 
			
 
				 	return;
			
@@ -462,39 +437,37 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *mod
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen)
			
 
				+{
			
 
				+	if (arch == STARPU_CORE_DEFAULT)
			
 
				+	{
			
 
				+		snprintf(archname, maxlen, "core");
			
 
				+	}
			
 
				+	else if ((STARPU_CUDA_DEFAULT <= arch)
			
 
				+		&& (arch < STARPU_CUDA_DEFAULT + MAXCUDADEVS))
			
 
				+	{
			
 
				+		int devid = arch - STARPU_CUDA_DEFAULT;
			
 
				+		snprintf(archname, maxlen, "cuda_%d", devid);
			
 
				+	}
			
 
				+	else if (arch == STARPU_GORDON_DEFAULT)
			
 
				+	{
			
 
				+		snprintf(archname, maxlen, "gordon");
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		STARPU_ASSERT(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
			
 
				-		enum starpu_perf_archtype arch, char **path, size_t maxlen)
			
 
				+		enum starpu_perf_archtype arch, char *path, size_t maxlen)
			
 
				 {
			
 
				-	char *archname;
			
 
				+	char archname[32];
			
 
				+	starpu_perfmodel_get_arch_name(arch, archname, 32);
			
 
				 
			
 
				 	STARPU_ASSERT(path);
			
 
				 
			
 
				-	switch(arch) {
			
 
				-		case STARPU_CORE_DEFAULT:
			
 
				-			archname = "core";
			
 
				-			break;
			
 
				-		case STARPU_CUDA_DEFAULT:
			
 
				-			archname = "cuda";
			
 
				-			break;
			
 
				-		case STARPU_CUDA_2:
			
 
				-			archname = "cuda_2";
			
 
				-			break;
			
 
				-		case STARPU_CUDA_3:
			
 
				-			archname = "cuda_3";
			
 
				-			break;
			
 
				-		case STARPU_CUDA_4:
			
 
				-			archname = "cuda_4";
			
 
				-			break;
			
 
				-		case STARPU_GORDON_DEFAULT:
			
 
				-			archname = "gordon";
			
 
				-			break;
			
 
				-		default:
			
 
				-			/* unknown architecture */
			
 
				-			*path = NULL;
			
 
				-			return;
			
 
				-	}
			
 
				-
			
 
				-	get_model_debug_path(model, archname, *path, maxlen);
			
 
				+	get_model_debug_path(model, archname, path, maxlen);
			
 
				 }
			
 
				 
			
 
				 double regression_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -177,25 +177,8 @@ static int init_machine_config(struct machine_config_s *config,
 
				 	for (cudagpu = 0; cudagpu < config->ncudagpus; cudagpu++)
			
 
				 	{
			
 
				 		config->workers[config->nworkers + cudagpu].arch = STARPU_CUDA_WORKER;
			
 
				-		/* XXX could be cleaner, we something like STARPU_CUDA_DEFAULT + gpuid */
			
 
				 		int devid = get_next_gpuid(config);
			
 
				-		enum starpu_perf_archtype arch;
			
 
				-		switch (devid) {
			
 
				-			case 0:
			
 
				-			default:
			
 
				-				arch = STARPU_CUDA_DEFAULT;
			
 
				-				break;
			
 
				-			case 1:
			
 
				-				arch = STARPU_CUDA_2;
			
 
				-				break;
			
 
				-			case 2:
			
 
				-				arch = STARPU_CUDA_3;
			
 
				-				break;
			
 
				-			case 3:
			
 
				-				arch = STARPU_CUDA_4;
			
 
				-				break;
			
 
				-		}
			
 
				-		
			
 
				+		enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
			
 
				 		config->workers[config->nworkers + cudagpu].id = devid;
			
 
				 		config->workers[config->nworkers + cudagpu].perf_arch = arch; 
			
 
				 		config->workers[config->nworkers + cudagpu].worker_mask = CUDA;
			
--- a/tools/perfmodel-display.c
+++ b/tools/perfmodel-display.c
@@ -100,9 +100,8 @@ static void display_perf_model(struct starpu_perfmodel_t *model, enum starpu_per
 
				 			fprintf(stderr, "\t\tc = %le\n", arch_model->regression.c);
			
 
				 		}
			
 
				 
			
 
				-		char *debugname = malloc(1024);
			
 
				-		starpu_perfmodel_debugfilepath(model, arch, &debugname, 1024);
			
 
				-		free(debugname);
			
 
				+		char debugname[256];
			
 
				+		starpu_perfmodel_debugfilepath(model, arch, debugname, 1024);
			
 
				 		printf("\t debug file path : %s\n", debugname);
			
 
				 	}
			
 
				 	else {
			
@@ -133,10 +132,9 @@ static void display_perf_model(struct starpu_perfmodel_t *model, enum starpu_per
 
				 		}
			
 
				 
			
 
				 		if (strcmp(parameter, "path-file-debug") == 0) {
			
 
				-			char *debugname = malloc(1024);
			
 
				-			starpu_perfmodel_debugfilepath(model, arch, &debugname, 1024);
			
 
				+			char debugname[256];
			
 
				+			starpu_perfmodel_debugfilepath(model, arch, debugname, 1024);
			
 
				 			printf("%s\n", debugname);
			
 
				-			free(debugname);
			
 
				 			return;
			
 
				 		}
			
 
				 
			
@@ -152,26 +150,14 @@ static void display_all_perf_models(struct starpu_perfmodel_t *model)
 
				 	if (arch == NULL)
			
 
				 	{
			
 
				 		/* display all architectures */
			
 
				-
			
 
				-		/* yet, we assume there is a single performance model per
			
 
				-		 * architecture */
			
 
				-		fprintf(stderr, "performance model for CPUs :\n");
			
 
				-		display_perf_model(model, STARPU_CORE_DEFAULT);
			
 
				-	
			
 
				-		fprintf(stderr, "performance model for CUDA :\n");
			
 
				-		display_perf_model(model, STARPU_CUDA_DEFAULT);
			
 
				-	
			
 
				-		fprintf(stderr, "performance model for CUDA (2):\n");
			
 
				-		display_perf_model(model, STARPU_CUDA_2);
			
 
				-	
			
 
				-		fprintf(stderr, "performance model for CUDA (3):\n");
			
 
				-		display_perf_model(model, STARPU_CUDA_3);
			
 
				-	
			
 
				-		fprintf(stderr, "performance model for CUDA (4):\n");
			
 
				-		display_perf_model(model, STARPU_CUDA_4);
			
 
				-	
			
 
				-		fprintf(stderr, "performance model for GORDON :\n");
			
 
				-		display_perf_model(model, STARPU_GORDON_DEFAULT);
			
 
				+		unsigned arch;
			
 
				+		for (arch = 0; arch < NARCH_VARIATIONS; arch++)
			
 
				+		{
			
 
				+			char archname[32];
			
 
				+			starpu_perfmodel_get_arch_name(arch, archname, 32);
			
 
				+			fprintf(stderr, "performance model for %s\n", archname);
			
 
				+			display_perf_model(model, arch);
			
 
				+		}
			
 
				 	}
			
 
				 	else {
			
 
				 		if (strcmp(arch, "core") == 0) {
			
@@ -180,14 +166,19 @@ static void display_all_perf_models(struct starpu_perfmodel_t *model)
 
				 		}
			
 
				 
			
 
				 		if (strcmp(arch, "cuda") == 0) {
			
 
				-			display_perf_model(model, STARPU_CUDA_DEFAULT);
			
 
				-			display_perf_model(model, STARPU_CUDA_2);
			
 
				-			display_perf_model(model, STARPU_CUDA_3);
			
 
				-			display_perf_model(model, STARPU_CUDA_4);
			
 
				+			unsigned archid;
			
 
				+			for (archid = STARPU_CUDA_DEFAULT; archid < STARPU_CUDA_DEFAULT + MAXCUDADEVS; archid++)
			
 
				+			{
			
 
				+				char archname[32];
			
 
				+				starpu_perfmodel_get_arch_name(archid, archname, 32);
			
 
				+				fprintf(stderr, "performance model for %s\n", archname);
			
 
				+				display_perf_model(model, archid);
			
 
				+			}
			
 
				 			return;
			
 
				 		}
			
 
				 
			
 
				 		if (strcmp(arch, "gordon") == 0) {
			
 
				+			fprintf(stderr, "performance model for gordon\n");
			
 
				 			display_perf_model(model, STARPU_GORDON_DEFAULT);
			
 
				 			return;
			
 
				 		}