Browse Source

- Cleanup the code managing performance models and per model "debug" files. We
can directly have MAXCUDADEVS CUDA devices now.
- The debug files are now stored in the debug/ sub-directory of the sampling
directory.

Cédric Augonnet 15 years ago
parent
commit
480ee416d5

+ 7 - 9
include/starpu-perfmodel.h

@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <pthread.h>
 #include <starpu_config.h>
+#include <starpu-task.h> // for MAXCUDADEVS
 
 #ifdef __cplusplus
 extern "C" {
@@ -35,19 +36,14 @@ struct starpu_buffer_descr_t;
    so we do not use the archtype enum type directly for performance models
 */
 
-/* on most system we will consider one or two architectures as all accelerators
-   are likely to be identical */
-#define NARCH_VARIATIONS	6
-
 enum starpu_perf_archtype {
 	STARPU_CORE_DEFAULT = 0,
 	STARPU_CUDA_DEFAULT = 1,
-	STARPU_CUDA_2 = 2,
-	STARPU_CUDA_3 = 3,
-	STARPU_CUDA_4 = 4,
-	STARPU_GORDON_DEFAULT = 5
+	/* STARPU_CUDA_DEFAULT + devid */
+	STARPU_GORDON_DEFAULT = STARPU_CUDA_DEFAULT + MAXCUDADEVS
 };
 
+#define NARCH_VARIATIONS	(STARPU_GORDON_DEFAULT+1)
 
 struct starpu_regression_model_t {
 	/* sum of ln(measured) */
@@ -109,7 +105,9 @@ struct starpu_perfmodel_t {
  * performance model files */
 int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *model);
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
-		enum starpu_perf_archtype arch, char **path, size_t maxlen);
+		enum starpu_perf_archtype arch, char *path, size_t maxlen);
+void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch,
+		char *archname, size_t maxlen);
 
 #ifdef __cplusplus
 }

+ 11 - 0
src/core/perfmodel/perfmodel.c

@@ -189,6 +189,17 @@ void create_sampling_directory_if_needed(void)
 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
 		}
 	
+		ret = mkdir(PERF_MODEL_DIR_DEBUG, S_IRWXU);
+		if (ret == -1)
+		{
+			STARPU_ASSERT(errno == EEXIST);
+	
+			/* make sure that it is actually a directory */
+			struct stat sb;
+			stat(PERF_MODEL_DIR, &sb);
+			STARPU_ASSERT(S_ISDIR(sb.st_mode));
+		}
+	
 		directory_existence_was_tested = 1;
 	}
 }

+ 1 - 0
src/core/perfmodel/perfmodel.h

@@ -28,6 +28,7 @@
 
 #define PERF_MODEL_DIR_CODELETS	PERF_MODEL_DIR"/codelets/"
 #define PERF_MODEL_DIR_BUS	PERF_MODEL_DIR"/bus/"
+#define PERF_MODEL_DIR_DEBUG	PERF_MODEL_DIR"/debug/"
 
 struct starpu_buffer_descr_t;
 struct jobq_s;

+ 55 - 82
src/core/perfmodel/perfmodel_history.c

@@ -146,12 +146,9 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_
 
 static void parse_model_file(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history)
 {
-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT], scan_history);
-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT], scan_history);
-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_2], scan_history);
-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_3], scan_history);
-	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_4], scan_history);
-	parse_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT], scan_history);
+	unsigned arch;
+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
+		parse_per_arch_model_file(f, &model->per_arch[arch], scan_history);
 }
 
 static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model)
@@ -188,23 +185,16 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t
 static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
 {
 	fprintf(f, "#################\n");
-	fprintf(f, "# Model for COREs\n");
-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT]);
-	fprintf(f, "\n##################\n");
-	fprintf(f,   "# Model for CUDA 1\n");
-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT]);
-	fprintf(f, "\n##################\n");
-	fprintf(f,   "# Model for CUDA 2\n");
-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_2]);
-	fprintf(f, "\n##################\n");
-	fprintf(f,   "# Model for CUDA 3\n");
-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_3]);
-	fprintf(f, "\n##################\n");
-	fprintf(f,   "# Model for CUDA 4\n");
-	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_4]);
-	fprintf(f, "\n##################\n");
-	fprintf(f,   "# Model for GORDON\n");
-	dump_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT]);
+
+	unsigned arch;
+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
+	{
+		char archname[32];
+		starpu_perfmodel_get_arch_name(arch, archname, 32);
+		fprintf(f, "# Model for %s\n", archname);
+		dump_per_arch_model_file(f, &model->per_arch[arch]);
+		fprintf(f, "\n##################\n");
+	}
 }
 
 static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_arch_model)
@@ -215,12 +205,9 @@ static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_ar
 
 static void initialize_model(struct starpu_perfmodel_t *model)
 {
-	initialize_per_arch_model(&model->per_arch[STARPU_CORE_DEFAULT]);
-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_DEFAULT]);
-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_2]);
-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_3]);
-	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_4]);
-	initialize_per_arch_model(&model->per_arch[STARPU_GORDON_DEFAULT]);
+	unsigned arch;
+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
+		initialize_per_arch_model(&model->per_arch[arch]);
 }
 
 static struct starpu_model_list_t *registered_models = NULL;
@@ -228,7 +215,9 @@ static struct starpu_model_list_t *registered_models = NULL;
 
 static void get_model_debug_path(struct starpu_perfmodel_t *model, const char *arch, char *path, size_t maxlen)
 {
-	strncpy(path, PERF_MODEL_DIR, maxlen);
+	STARPU_ASSERT(path);
+
+	strncpy(path, PERF_MODEL_DIR_DEBUG, maxlen);
 	strncat(path, model->symbol, maxlen);
 	
 	char hostname[32];
@@ -253,30 +242,16 @@ void register_model(struct starpu_perfmodel_t *model)
 	registered_models = node;
 
 #ifdef MODEL_DEBUG
-	char debugpath[256];
-	get_model_debug_path(model, "cuda", debugpath, 256);
-	model->per_arch[STARPU_CUDA_DEFAULT].debug_file = fopen(debugpath, "a+");
-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_DEFAULT].debug_file);
-
-	get_model_debug_path(model, "cuda_2", debugpath, 256);
-	model->per_arch[STARPU_CUDA_2].debug_file = fopen(debugpath, "a+");
-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_2].debug_file);
-
-	get_model_debug_path(model, "cuda_3", debugpath, 256);
-	model->per_arch[STARPU_CUDA_3].debug_file = fopen(debugpath, "a+");
-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_3].debug_file);
-
-	get_model_debug_path(model, "cuda_4", debugpath, 256);
-	model->per_arch[STARPU_CUDA_4].debug_file = fopen(debugpath, "a+");
-	STARPU_ASSERT(model->per_arch[STARPU_CUDA_4].debug_file);
-
-	get_model_debug_path(model, "core", debugpath, 256);
-	model->per_arch[STARPU_CORE_DEFAULT].debug_file = fopen(debugpath, "a+");
-	STARPU_ASSERT(model->per_arch[STARPU_CORE_DEFAULT].debug_file);
-
-	get_model_debug_path(model, "gordon", debugpath, 256);
-	model->per_arch[STARPU_GORDON_DEFAULT].debug_file = fopen(debugpath, "a+");
-	STARPU_ASSERT(model->per_arch[STARPU_GORDON_DEFAULT].debug_file);
+	create_sampling_directory_if_needed();
+
+	unsigned arch;
+	for (arch = 0; arch < NARCH_VARIATIONS; arch++)
+	{
+		char debugpath[256];
+		starpu_perfmodel_debugfilepath(model, arch, debugpath, 256);
+		model->per_arch[arch].debug_file = fopen(debugpath, "a+");
+		STARPU_ASSERT(model->per_arch[arch].debug_file);
+	}
 #endif
 
 	return;
@@ -462,39 +437,37 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *mod
 	return 0;
 }
 
+void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archname, size_t maxlen)
+{
+	if (arch == STARPU_CORE_DEFAULT)
+	{
+		snprintf(archname, maxlen, "core");
+	}
+	else if ((STARPU_CUDA_DEFAULT <= arch)
+		&& (arch < STARPU_CUDA_DEFAULT + MAXCUDADEVS))
+	{
+		int devid = arch - STARPU_CUDA_DEFAULT;
+		snprintf(archname, maxlen, "cuda_%d", devid);
+	}
+	else if (arch == STARPU_GORDON_DEFAULT)
+	{
+		snprintf(archname, maxlen, "gordon");
+	}
+	else
+	{
+		STARPU_ASSERT(0);
+	}
+}
+
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
-		enum starpu_perf_archtype arch, char **path, size_t maxlen)
+		enum starpu_perf_archtype arch, char *path, size_t maxlen)
 {
-	char *archname;
+	char archname[32];
+	starpu_perfmodel_get_arch_name(arch, archname, 32);
 
 	STARPU_ASSERT(path);
 
-	switch(arch) {
-		case STARPU_CORE_DEFAULT:
-			archname = "core";
-			break;
-		case STARPU_CUDA_DEFAULT:
-			archname = "cuda";
-			break;
-		case STARPU_CUDA_2:
-			archname = "cuda_2";
-			break;
-		case STARPU_CUDA_3:
-			archname = "cuda_3";
-			break;
-		case STARPU_CUDA_4:
-			archname = "cuda_4";
-			break;
-		case STARPU_GORDON_DEFAULT:
-			archname = "gordon";
-			break;
-		default:
-			/* unknown architecture */
-			*path = NULL;
-			return;
-	}
-
-	get_model_debug_path(model, archname, *path, maxlen);
+	get_model_debug_path(model, archname, path, maxlen);
 }
 
 double regression_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)

+ 1 - 18
src/core/topology.c

@@ -177,25 +177,8 @@ static int init_machine_config(struct machine_config_s *config,
 	for (cudagpu = 0; cudagpu < config->ncudagpus; cudagpu++)
 	{
 		config->workers[config->nworkers + cudagpu].arch = STARPU_CUDA_WORKER;
-		/* XXX could be cleaner, we something like STARPU_CUDA_DEFAULT + gpuid */
 		int devid = get_next_gpuid(config);
-		enum starpu_perf_archtype arch;
-		switch (devid) {
-			case 0:
-			default:
-				arch = STARPU_CUDA_DEFAULT;
-				break;
-			case 1:
-				arch = STARPU_CUDA_2;
-				break;
-			case 2:
-				arch = STARPU_CUDA_3;
-				break;
-			case 3:
-				arch = STARPU_CUDA_4;
-				break;
-		}
-		
+		enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
 		config->workers[config->nworkers + cudagpu].id = devid;
 		config->workers[config->nworkers + cudagpu].perf_arch = arch; 
 		config->workers[config->nworkers + cudagpu].worker_mask = CUDA;

+ 21 - 30
tools/perfmodel-display.c

@@ -100,9 +100,8 @@ static void display_perf_model(struct starpu_perfmodel_t *model, enum starpu_per
 			fprintf(stderr, "\t\tc = %le\n", arch_model->regression.c);
 		}
 
-		char *debugname = malloc(1024);
-		starpu_perfmodel_debugfilepath(model, arch, &debugname, 1024);
-		free(debugname);
+		char debugname[256];
+		starpu_perfmodel_debugfilepath(model, arch, debugname, 1024);
 		printf("\t debug file path : %s\n", debugname);
 	}
 	else {
@@ -133,10 +132,9 @@ static void display_perf_model(struct starpu_perfmodel_t *model, enum starpu_per
 		}
 
 		if (strcmp(parameter, "path-file-debug") == 0) {
-			char *debugname = malloc(1024);
-			starpu_perfmodel_debugfilepath(model, arch, &debugname, 1024);
+			char debugname[256];
+			starpu_perfmodel_debugfilepath(model, arch, debugname, 1024);
 			printf("%s\n", debugname);
-			free(debugname);
 			return;
 		}
 
@@ -152,26 +150,14 @@ static void display_all_perf_models(struct starpu_perfmodel_t *model)
 	if (arch == NULL)
 	{
 		/* display all architectures */
-
-		/* yet, we assume there is a single performance model per
-		 * architecture */
-		fprintf(stderr, "performance model for CPUs :\n");
-		display_perf_model(model, STARPU_CORE_DEFAULT);
-	
-		fprintf(stderr, "performance model for CUDA :\n");
-		display_perf_model(model, STARPU_CUDA_DEFAULT);
-	
-		fprintf(stderr, "performance model for CUDA (2):\n");
-		display_perf_model(model, STARPU_CUDA_2);
-	
-		fprintf(stderr, "performance model for CUDA (3):\n");
-		display_perf_model(model, STARPU_CUDA_3);
-	
-		fprintf(stderr, "performance model for CUDA (4):\n");
-		display_perf_model(model, STARPU_CUDA_4);
-	
-		fprintf(stderr, "performance model for GORDON :\n");
-		display_perf_model(model, STARPU_GORDON_DEFAULT);
+		unsigned arch;
+		for (arch = 0; arch < NARCH_VARIATIONS; arch++)
+		{
+			char archname[32];
+			starpu_perfmodel_get_arch_name(arch, archname, 32);
+			fprintf(stderr, "performance model for %s\n", archname);
+			display_perf_model(model, arch);
+		}
 	}
 	else {
 		if (strcmp(arch, "core") == 0) {
@@ -180,14 +166,19 @@ static void display_all_perf_models(struct starpu_perfmodel_t *model)
 		}
 
 		if (strcmp(arch, "cuda") == 0) {
-			display_perf_model(model, STARPU_CUDA_DEFAULT);
-			display_perf_model(model, STARPU_CUDA_2);
-			display_perf_model(model, STARPU_CUDA_3);
-			display_perf_model(model, STARPU_CUDA_4);
+			unsigned archid;
+			for (archid = STARPU_CUDA_DEFAULT; archid < STARPU_CUDA_DEFAULT + MAXCUDADEVS; archid++)
+			{
+				char archname[32];
+				starpu_perfmodel_get_arch_name(archid, archname, 32);
+				fprintf(stderr, "performance model for %s\n", archname);
+				display_perf_model(model, archid);
+			}
 			return;
 		}
 
 		if (strcmp(arch, "gordon") == 0) {
+			fprintf(stderr, "performance model for gordon\n");
 			display_perf_model(model, STARPU_GORDON_DEFAULT);
 			return;
 		}