Explorar o código

New perfmodel format.

Cyril Roelandt %!s(int64=14) %!d(string=hai) anos
pai
achega
f19d1d96c1
Modificáronse 1 ficheiros con 177 adicións e 45 borrados
  1. 177 45
      src/core/perfmodel/perfmodel_history.c

+ 177 - 45
src/core/perfmodel/perfmodel_history.c

@@ -204,12 +204,19 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_
 	}
 }
 
-static void parse_arch(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history, unsigned archmin, unsigned archmax, unsigned implmax, int skiparch, int skipimpl)
+static void parse_arch(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history, unsigned archmin, unsigned archmax, int skiparch)
 {
 	unsigned arch, impl;
 	struct starpu_per_arch_perfmodel_t dummy;
+	int nimpls, implmax, skipimpl;
+	unsigned ret;
+	
 
 	for (arch = archmin; arch < archmax; arch++) {
+		_starpu_drop_comments(f);
+		ret = fscanf(f, "%d\n", &nimpls);
+		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
+		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
 		for (impl = 0; impl < implmax; impl++) {
 			parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
 		}
@@ -221,6 +228,10 @@ static void parse_arch(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_
 	}
 
 	if (skiparch > 0) {
+		_starpu_drop_comments(f);
+		ret = fscanf(f, "%d\n", &nimpls);
+		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
+		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
 		for (arch = 0; arch < skiparch; arch ++) {
 			for (impl = 0; impl < implmax; impl++) {
 				parse_per_arch_model_file(f, &dummy, 0);
@@ -237,48 +248,71 @@ static void parse_arch(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_
 static void parse_model_file(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history)
 {
 	unsigned ret;
-	int ncpus, ncudas, nopencls, ngordons, nimpls;
 	unsigned archmin = 0;
 	unsigned max_gordondevs = 1; /* XXX : we need a STARPU_MAXGORDONDEVS cst */
+	unsigned narchs;
+	int nimpls;
 
-	_starpu_drop_comments(f);
-	ret = fscanf(f, "%u %u %u %u %u %u\n", &ncpus, &ncudas, &nopencls, &ngordons, &nimpls);
-	STARPU_ASSERT(ret == 5);
+	/* We could probably write a clean loop here, but the code would not
+	 * really be easier to read. */
 
 	/* Parsing CPUs */
-	parse_arch(f, model, scan_history,
-			archmin,
-			STARPU_MIN(ncpus, STARPU_MAXCPUS),
-			STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS),
-			ncpus - STARPU_MAXCPUS,
-			nimpls - STARPU_MAXIMPLEMENTATIONS);
+	_starpu_drop_comments(f);
+	ret = fscanf(f, "%u\n", &narchs);
+	STARPU_ASSERT(ret == 1);
+
+	_STARPU_DEBUG("Parsing %u CPUs\n", narchs);
+	if (narchs > 0)
+	{
+		parse_arch(f, model, scan_history,
+				archmin,
+				STARPU_MIN(narchs, STARPU_MAXCPUS),
+				narchs - STARPU_MAXCPUS);
+	}
 
 	/* Parsing CUDA devs */
-	archmin += STARPU_MAXCPUS;
-	parse_arch(f, model, scan_history,
-			archmin,
-			archmin + STARPU_MIN(ncudas, STARPU_MAXCUDADEVS),
-			STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS),
-			ncudas - STARPU_MAXCUDADEVS,
-			nimpls - STARPU_MAXIMPLEMENTATIONS);
+	_starpu_drop_comments(f);
+	ret = fscanf(f, "%u\n", &narchs);
+	STARPU_ASSERT(ret == 1);
+	_STARPU_DEBUG("Parsing %u CUDA devices\n", narchs);
+	if (narchs > 0)
+	{
+		archmin += STARPU_MAXCPUS;
+		parse_arch(f, model, scan_history,
+				archmin,
+				archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
+				narchs - STARPU_MAXCUDADEVS);
+	}
 
 	/* Parsing OpenCL devs */
-	archmin += STARPU_MAXCUDADEVS;
-	parse_arch(f, model, scan_history,
-			archmin,
-			archmin + STARPU_MIN(nopencls, STARPU_MAXOPENCLDEVS),
-			STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS),
-			nopencls - STARPU_MAXOPENCLDEVS,
-			nimpls - STARPU_MAXIMPLEMENTATIONS);
+	_starpu_drop_comments(f);
+	ret = fscanf(f, "%u\n", &narchs);
+	STARPU_ASSERT(ret == 1);
+
+	_STARPU_DEBUG("Parsing %u OpenCL devices\n", narchs);
+	if (narchs > 0)
+	{
+		archmin += STARPU_MAXCUDADEVS;
+		parse_arch(f, model, scan_history,
+				archmin,
+				archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
+				narchs - STARPU_MAXOPENCLDEVS);
+	}
 
 	/* Parsing Gordon implementations */
-	archmin += STARPU_MAXOPENCLDEVS;
-	parse_arch(f, model, scan_history,
-			archmin,
-			archmin + max_gordondevs,
-			STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS),
-			ngordons - max_gordondevs,
-			nimpls - STARPU_MAXIMPLEMENTATIONS);
+	_starpu_drop_comments(f);
+	ret = fscanf(f, "%u\n", &narchs);
+	STARPU_ASSERT(ret == 1);
+
+	_STARPU_DEBUG("Parsing %u Gordon devices\n", narchs);
+	if (narchs > 0)
+	{
+		archmin += STARPU_MAXOPENCLDEVS;
+		parse_arch(f, model, scan_history,
+				archmin,
+				archmin + max_gordondevs,
+				narchs - max_gordondevs);
+	}
 }
 
 
@@ -301,7 +335,12 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel_t *model,
 		}
 	}
 
+	if (nentries == 0)
+		return;
 	/* header */
+	char archname[32];
+	starpu_perfmodel_get_arch_name((enum starpu_perf_archtype) arch, archname, 32, nimpl);
+	fprintf(f, "# Model for %s\n", archname);
 	fprintf(f, "# number of entries\n%u\n", nentries);
 
 	dump_reg_model(f, model, arch, nimpl);
@@ -316,30 +355,123 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel_t *model,
 			ptr = ptr->next;
 		}
 	}
+
+	fprintf(f, "\n##################\n");
 }
 
-static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
+static unsigned get_n_entries(struct starpu_perfmodel_t *model, unsigned arch, unsigned impl)
 {
-	fprintf(f, "#################\n");
-	fprintf(f, "# CPUs, CUDAs, OpenCLs, Gordons, Implementations\n");
-	fprintf(f, "%u %u %u %u %u\n", 
-		STARPU_MAXCPUS, STARPU_MAXCUDADEVS,
-		STARPU_MAXOPENCLDEVS, 1, /* XXX : gordon ? */
-		STARPU_MAXIMPLEMENTATIONS);
-	fprintf(f, "#################\n");
-		
+	struct starpu_per_arch_perfmodel_t *per_arch_model;
+	per_arch_model = &model->per_arch[arch][impl];
+	/* count the number of elements in the lists */
+	struct starpu_history_list_t *ptr = NULL;
+	unsigned nentries = 0;
 
+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+	{
+		/* Dump the list of all entries in the history */
+		ptr = per_arch_model->list;
+		while(ptr) {
+			nentries++;
+			ptr = ptr->next;
+		}
+	}
+	return nentries;
+}
+
+static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
+{
+	unsigned number_of_archs[4] = { 0, 0, 0, 0};
 	unsigned arch;
 	unsigned nimpl;
+	unsigned idx = 0;
+
+	/* Finding the number of archs to write for each kind of device */
+	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
+	{
+		switch (arch)
+		{
+			case STARPU_CUDA_DEFAULT:
+			case STARPU_OPENCL_DEFAULT:
+			case STARPU_GORDON_DEFAULT:
+				idx++;
+				break;
+			default:
+				break;
+		}
+
+		unsigned nentries = 0;
+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		{
+			nentries = get_n_entries(model, arch, nimpl) != 0;
+			if (nentries > 0)
+			{
+				number_of_archs[idx]++;
+				break;
+			}
+		}
+	}
+
+	/* Writing stuff */
+	char *name;
+	unsigned substract_to_arch = 0;
 	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
 	{
+		switch (arch)
+		{
+			case STARPU_CPU_DEFAULT:
+				name = "CPU";
+				fprintf(f, "##################\n");
+				fprintf(f, "# %ss\n", name);
+				fprintf(f, "# number of %s architectures\n", name);
+				fprintf(f, "%u\n", number_of_archs[0]);
+				break;
+			case STARPU_CUDA_DEFAULT:
+				name = "CUDA";
+				substract_to_arch = STARPU_MAXCPUS;
+				fprintf(f, "##################\n");
+				fprintf(f, "# %ss\n", name);
+				fprintf(f, "# number of %s architectures\n", name);
+				fprintf(f, "%u\n", number_of_archs[1]);
+				break;
+			case STARPU_OPENCL_DEFAULT:
+				name = "OPENCL";
+				substract_to_arch += STARPU_MAXCUDADEVS;
+				fprintf(f, "##################\n");
+				fprintf(f, "# %ss\n", name);
+				fprintf(f, "# number of %s architectures\n", name);
+				fprintf(f, "%u\n", number_of_archs[2]);
+				break;
+			case STARPU_GORDON_DEFAULT:
+				name = "GORDON";
+				substract_to_arch += STARPU_MAXOPENCLDEVS;
+				fprintf(f, "##################\n");
+				fprintf(f, "# %ss\n", name);
+				fprintf(f, "# number of %s architectures\n", name);
+				fprintf(f, "%u\n", number_of_archs[3]);
+				break;
+			default:
+				break;
+		}
+
 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		{
-			char archname[32];
-			starpu_perfmodel_get_arch_name((enum starpu_perf_archtype) arch, archname, 32, nimpl);
-			fprintf(f, "# Model for %s\n", archname);
+			if (get_n_entries(model, arch, nimpl) == 0)
+				break;
+
+		}
+		unsigned max_impl = nimpl;
+
+		if (max_impl == 0)
+			continue;
+
+		fprintf(f, "###########\n");
+		fprintf(f, "# %s_%u\n", name, arch - substract_to_arch);
+		fprintf(f, "# number of implementations\n");
+		fprintf(f, "%u\n", max_impl);
+		for (nimpl = 0; nimpl < max_impl; nimpl++)
+		{
 			dump_per_arch_model_file(f, model, arch, nimpl);
-			fprintf(f, "\n##################\n");
 		}
 	}
 }