11 years ago · 5cbb25edb6
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -238,6 +238,8 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_perfmodel_per_arch
 
				 	int res = fscanf(f, "%u\n", &nentries);
			
 
				 	STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file");
			
 
				 
			
 
				+	_STARPU_DEBUG("nentries:%u\n", nentries);
			
 
				+	
			
 
				 	scan_reg_model(f, &per_arch_model->regression);
			
 
				 
			
 
				 	/* parse cpu entries */
			
@@ -261,130 +263,90 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_perfmodel_per_arch
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned archmin, unsigned archmax, unsigned skiparch)
			
 
				+static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned* arch, unsigned archmax)
			
 
				 {
			
 
				 	struct starpu_perfmodel_per_arch dummy;
			
 
				-	int nimpls, implmax, skipimpl, impl;
			
 
				-	unsigned ret, arch;
			
 
				+	unsigned nimpls, implmax, impl, i, ret;
			
 
				+	_STARPU_DEBUG("Parsing arch %u \n",*arch); 
			
 
				 
			
 
				-	for (arch = archmin; arch < archmax; arch++)
			
 
				-	{
			
 
				-		_STARPU_DEBUG("Parsing arch %u\n", arch);
			
 
				-		_starpu_drop_comments(f);
			
 
				-		ret = fscanf(f, "%d\n", &nimpls);
			
 
				-		_STARPU_DEBUG("%d implementations\n", nimpls);
			
 
				-		STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
			
 
				-		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
			
 
				-		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
			
 
				-		for (impl = 0; impl < implmax; impl++)
			
 
				-		{
			
 
				-			parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
			
 
				-		}
			
 
				-		if (skipimpl > 0)
			
 
				-		{
			
 
				-			for (impl = 0; impl < skipimpl; impl++)
			
 
				-			{
			
 
				-				parse_per_arch_model_file(f, &dummy, 0);
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				+	/* Parsing number of implementation */
			
 
				+	_starpu_drop_comments(f);
			
 
				+	ret = fscanf(f, "%u\n", &nimpls);
			
 
				+	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
			
 
				 
			
 
				-	if (skiparch > 0)
			
 
				+	/* Parsing each implementation */
			
 
				+	implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
			
 
				+	for (impl = 0; impl < implmax; impl++)
			
 
				 	{
			
 
				-		_starpu_drop_comments(f);
			
 
				-		for (arch = 0; arch < skiparch; arch ++)
			
 
				-		{
			
 
				-			_STARPU_DEBUG("skipping arch %u\n", arch);
			
 
				-			ret = fscanf(f, "%d\n", &nimpls);
			
 
				-			_STARPU_DEBUG("%d implementations\n", nimpls);
			
 
				-			STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
			
 
				-			implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
			
 
				-			skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
			
 
				-			for (impl = 0; impl < implmax; impl++)
			
 
				-			{
			
 
				-				parse_per_arch_model_file(f, &dummy, 0);
			
 
				-			}
			
 
				-			if (skipimpl > 0)
			
 
				-			{
			
 
				-				for (impl = 0; impl < skipimpl; impl++)
			
 
				-				{
			
 
				-					parse_per_arch_model_file(f, &dummy, 0);
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				+		if(*arch < archmax)
			
 
				+			parse_per_arch_model_file(f, &model->per_arch[*arch][impl], scan_history);
			
 
				+		else
			
 
				+			parse_per_arch_model_file(f, &dummy, 0);
			
 
				+
			
 
				 	}
			
 
				+	/* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
			
 
				+	 * we skip the last implementation */
			
 
				+	if (impl < nimpls)
			
 
				+		for (i = impl; impl < nimpls; i++)
			
 
				+			parse_per_arch_model_file(f, &dummy, 0);
			
 
				+
			
 
				 }
			
 
				 
			
 
				-static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
			
 
				+static void parse_device(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned * arch, unsigned archmax)
			
 
				 {
			
 
				-	unsigned ret;
			
 
				-	unsigned archmin = 0;
			
 
				-	unsigned narchs;
			
 
				-
			
 
				-	/* We could probably write a clean loop here, but the code would not
			
 
				-	 * really be easier to read. */
			
 
				+	unsigned maxncore, ncore, ret;
			
 
				 
			
 
				-	/* Parsing CPUs */
			
 
				+	/* Parsing maximun number of worker for this device */
			
 
				 	_starpu_drop_comments(f);
			
 
				-	ret = fscanf(f, "%u\n", &narchs);
			
 
				+	ret = fscanf(f, "%u\n", &maxncore);
			
 
				 	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
			
 
				-
			
 
				-	_STARPU_DEBUG("Parsing %u CPUs\n", narchs);
			
 
				-	if (narchs > 0)
			
 
				+	
			
 
				+	/* Parsing each arch */
			
 
				+	for(ncore=0; ncore < maxncore; ncore++)
			
 
				 	{
			
 
				-		parse_arch(f, model, scan_history,
			
 
				-			   archmin,
			
 
				-			   STARPU_MIN(narchs, STARPU_MAXCPUS),
			
 
				-			   narchs > STARPU_MAXCPUS ? narchs - STARPU_MAXCPUS : 0);
			
 
				+		parse_arch(f,model,scan_history,arch,archmax);
			
 
				+		(*arch)++;
			
 
				 	}
			
 
				+}
			
 
				 
			
 
				-	/* Parsing CUDA devs */
			
 
				-	_starpu_drop_comments(f);
			
 
				-	ret = fscanf(f, "%u\n", &narchs);
			
 
				-	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
			
 
				-	archmin += STARPU_MAXCPUS;
			
 
				-	_STARPU_DEBUG("Parsing %u CUDA devices\n", narchs);
			
 
				-	if (narchs > 0)
			
 
				-	{
			
 
				-		parse_arch(f, model, scan_history,
			
 
				-			   archmin,
			
 
				-			   archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
			
 
				-			   narchs > STARPU_MAXCUDADEVS ? narchs - STARPU_MAXCUDADEVS : 0);
			
 
				-	}
			
 
				+static void parse_archtype(FILE *f, struct starpu_perfmodel *model, unsigned scan_history, unsigned * arch, unsigned archmax)
			
 
				+{
			
 
				+	unsigned ndevice, devid, ret;
			
 
				 
			
 
				-	/* Parsing OpenCL devs */
			
 
				+	/* Parsing number of device for this archtype */
			
 
				 	_starpu_drop_comments(f);
			
 
				-	ret = fscanf(f, "%u\n", &narchs);
			
 
				+	ret = fscanf(f, "%u\n", &ndevice);
			
 
				 	STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file");
			
 
				 
			
 
				-	archmin += STARPU_MAXCUDADEVS;
			
 
				-	_STARPU_DEBUG("Parsing %u OpenCL devices\n", narchs);
			
 
				-	if (narchs > 0)
			
 
				-	{
			
 
				-		parse_arch(f, model, scan_history,
			
 
				-			   archmin,
			
 
				-			   archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
			
 
				-			   narchs > STARPU_MAXOPENCLDEVS ? narchs - STARPU_MAXOPENCLDEVS : 0);
			
 
				-	}
			
 
				+	/* Parsing each device for this archtype*/
			
 
				+	for(devid=0; devid < ndevice; devid++)
			
 
				+		parse_device(f,model,scan_history,arch,archmax);
			
 
				+}
			
 
				 
			
 
				-	/* Parsing MIC devs */
			
 
				-	_starpu_drop_comments(f);
			
 
				-	ret = fscanf(f, "%u\n", &narchs);
			
 
				-	if (ret == 0)
			
 
				-		narchs = 0;
			
 
				+static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned scan_history)
			
 
				+{
			
 
				+	unsigned arch, archmax;
			
 
				+	_STARPU_DEBUG("Start parsing\n");
			
 
				 
			
 
				-	archmin += STARPU_MAXOPENCLDEVS;
			
 
				-	_STARPU_DEBUG("Parsing %u MIC devices\n", narchs);
			
 
				-	if (narchs > 0)
			
 
				-	{
			
 
				-		parse_arch(f, model, scan_history,
			
 
				-			   archmin,
			
 
				-			   archmin + STARPU_MIN(narchs, STARPU_MAXMICDEVS),
			
 
				-			   narchs > STARPU_MAXMICDEVS ? narchs - STARPU_MAXMICDEVS : 0);
			
 
				-	}
			
 
				-}
			
 
				+	/* Parsing each kind of archtype */
			
 
				+
			
 
				+	/* Parsing CPU */
			
 
				+	arch = STARPU_CPU_DEFAULT;
			
 
				+	archmax = STARPU_CPU_DEFAULT + STARPU_MAXCPUS;
			
 
				+	parse_archtype(f, model, scan_history, &arch, archmax);
			
 
				 
			
 
				 
			
 
				+	/* Parsing CUDA */
			
 
				+	arch = STARPU_CUDA_DEFAULT;
			
 
				+	archmax = STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS; 
			
 
				+	parse_archtype(f, model, scan_history, &arch, archmax);
			
 
				+
			
 
				+	/* Parsing OpenCL */
			
 
				+	arch = STARPU_OPENCL_DEFAULT;
			
 
				+	archmax = STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS; 
			
 
				+	parse_archtype(f, model, scan_history, &arch, archmax);
			
 
				+}
			
 
				+
			
 
				 static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
			
 
				 {
			
 
				 	struct starpu_perfmodel_per_arch *per_arch_model;
			
@@ -408,6 +370,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 
				 	/* header */
			
 
				 	char archname[32];
			
 
				 	starpu_perfmodel_get_arch_name((enum starpu_perfmodel_archtype) arch, archname, 32, nimpl);
			
 
				+	fprintf(f, "#####\n");
			
 
				 	fprintf(f, "# Model for %s\n", archname);
			
 
				 	fprintf(f, "# number of entries\n%u\n", nentries);
			
 
				 
			
@@ -425,7 +388,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	fprintf(f, "\n##################\n");
			
 
				+	fprintf(f, "\n");
			
 
				 }
			
 
				 
			
 
				 static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, unsigned impl)
			
@@ -507,20 +470,30 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 		{
			
 
				 			case STARPU_CPU_DEFAULT:
			
 
				 				name = "CPU";
			
 
				-				my_narch = narch[0];
			
 
				+				fprintf(f, "####################\n");
			
 
				+				fprintf(f, "# %ss\n", name);
			
 
				+				fprintf(f, "# number of %s devices\n", name);
			
 
				+				fprintf(f, "1\n");
			
 
				+				fprintf(f, "###############\n");
			
 
				+				fprintf(f, "# CPU_0\n");
			
 
				+				fprintf(f, "# number of workers on device CPU_0\n");
			
 
				+				fprintf(f, "%u\n", my_narch = narch[0]);
			
 
				 				break;
			
 
				 			case STARPU_CUDA_DEFAULT:
			
 
				 				name = "CUDA";
			
 
				 				substract_to_arch = STARPU_MAXCPUS;
			
 
				-				my_narch = narch[1];
			
 
				+				fprintf(f, "####################\n");
			
 
				+				fprintf(f, "# %ss\n", name);
			
 
				+				fprintf(f, "# number of %s devices\n", name);
			
 
				+				fprintf(f, "%u\n", my_narch = narch[1]);
			
 
				 				break;
			
 
				 			case STARPU_OPENCL_DEFAULT:
			
 
				 				name = "OPENCL";
			
 
				-				my_narch = narch[2];
			
 
				-				break;
			
 
				-			case STARPU_MIC_DEFAULT:
			
 
				-				name = "MIC";
			
 
				-				my_narch = narch[3];
			
 
				+				substract_to_arch += STARPU_MAXCUDADEVS;
			
 
				+				fprintf(f, "####################\n");
			
 
				+				fprintf(f, "# %ss\n", name);
			
 
				+				fprintf(f, "# number of %ss devices\n", name);
			
 
				+				fprintf(f, "%u\n", my_narch = narch[2]);
			
 
				 				break;
			
 
				 			default:
			
 
				 				/* The current worker arch was already written,
			
@@ -529,15 +502,6 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 				break;
			
 
				 		}
			
 
				 
			
 
				-		if (!arch_already_visited)
			
 
				-		{
			
 
				-			arch_base = arch;
			
 
				-			fprintf(f, "##################\n");
			
 
				-			fprintf(f, "# %ss\n", name);
			
 
				-			fprintf(f, "# number of %s architectures\n", name);
			
 
				-			fprintf(f, "%u\n", my_narch);
			
 
				-		}
			
 
				-
			
 
				 		unsigned max_impl = 0;
			
 
				 		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				 		{
			
@@ -557,12 +521,22 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 		if (arch >= my_narch + arch_base)
			
 
				 			continue;
			
 
				 
			
 
				-		fprintf(f, "###########\n");
			
 
				-		if (substract_to_arch)
			
 
				+		if(substract_to_arch)
			
 
				+		{
			
 
				+			fprintf(f, "###############\n");
			
 
				 			fprintf(f, "# %s_%u\n", name, arch - substract_to_arch);
			
 
				+			fprintf(f, "# number of workers on device %s_%u\n", name, arch - substract_to_arch);
			
 
				+			fprintf(f, "1\n");
			
 
				+			fprintf(f, "##########\n");
			
 
				+			fprintf(f, "# 1 worker(s) in parallel\n");
			
 
				+		}
			
 
				 		else
			
 
				+		{
			
 
				 			/* CPU */
			
 
				-			fprintf(f, "# %u CPU(s) in parallel\n", arch + 1);
			
 
				+			fprintf(f, "##########\n");
			
 
				+			fprintf(f, "# %u worker(s) in parallel\n", arch +1);
			
 
				+		}
			
 
				+		
			
 
				 		fprintf(f, "# number of implementations\n");
			
 
				 		fprintf(f, "%u\n", max_impl);
			
 
				 		for (nimpl = 0; nimpl < max_impl; nimpl++)
			
@@ -570,6 +544,15 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 			dump_per_arch_model_file(f, model, arch, nimpl);
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	fprintf(f,"####################");
			
 
				+	fprintf(f,"# MICs");
			
 
				+	fprintf(f,"# number of MIC devices");
			
 
				+	fprintf(f,"0");
			
 
				+	fprintf(f,"####################");
			
 
				+	fprintf(f,"# SCCs");
			
 
				+	fprintf(f,"# number of SCC devices");
			
 
				+	fprintf(f,"0");
			
 
				 }
			
 
				 
			
 
				 static void initialize_per_arch_model(struct starpu_perfmodel_per_arch *per_arch_model)