6 years ago · 73d45564a4
--- a/ChangeLog
+++ b/ChangeLog
@@ -24,6 +24,8 @@ New features:
 
				     tile or vector size without reallocating the buffer.
			
 
				   * Application can change the allocation used by StarPU with
			
 
				     starpu_malloc_set_hooks()
			
 
				+  * XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml()
			
 
				+    function
			
 
				 
			
 
				 StarPU 1.3.0 (svn revision xxxx)
			
 
				 ==============================================
			
--- a/doc/doxygen/chapters/370_online_performance_tools.doxy
+++ b/doc/doxygen/chapters/370_online_performance_tools.doxy
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2011,2012,2016                           Inria
			
 
				  * Copyright (C) 2010-2019                                CNRS
			
 
				- * Copyright (C) 2009-2011,2014,2016,2018                 Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2011,2014,2016,2018-2019            Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -539,6 +539,10 @@ used to get the footprint used for indexing history-based performance
 
				 models. starpu_task_destroy() needs to be called to destroy the dummy
			
 
				 task afterwards. See <c>tests/perfmodels/regression_based.c</c> for an example.
			
 
				 
			
 
				+The application can also request an on-the-fly XML report of the performance
			
 
				+model, by calling starpu_perfmodel_dump_xml() to print the report to a
			
 
				+<c>FILE*</c>.
			
 
				+
			
 
				 \section DataTrace Data trace and tasks length
			
 
				 It is possible to get statistics about tasks length and data size by using :
			
 
				 \verbatim
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -335,6 +335,11 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
 
				 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
			
 
				 
			
 
				 /**
			
 
				+  Dump performance model \p model to output stream \p output, in XML format.
			
 
				+*/
			
 
				+void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				    Free internal memory used for sampling directory
			
 
				    management. It should only be called by an application which is not
			
 
				    calling starpu_shutdown() as this function already calls it. See for
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -23,6 +23,10 @@ CLEANFILES = *.gcno *.gcda *.linkinfo
 
				 
			
 
				 EXTRA_DIST = dolib.c
			
 
				 
			
 
				+xml_DATA = $(srcdir)/core/perfmodel/starpu-perfmodel.dtd
			
 
				+xmldir = $(pkgdatadir)
			
 
				+EXTRA_DIST += core/perfmodel/starpu-perfmodel.dtd
			
 
				+
			
 
				 ldflags =
			
 
				 
			
 
				 libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -556,16 +556,17 @@ static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmode
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
			
 
				+static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history, struct starpu_perfmodel *model)
			
 
				 {
			
 
				 	unsigned nentries;
			
 
				+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
			
 
				 
			
 
				 	_starpu_drop_comments(f);
			
 
				 
			
 
				 	int res = fscanf(f, "%u\n", &nentries);
			
 
				 	STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path);
			
 
				 
			
 
				-	scan_reg_model(f, path, &per_arch_model->regression);
			
 
				+	scan_reg_model(f, path, reg_model);
			
 
				 
			
 
				 	/* parse entries */
			
 
				 	unsigned i;
			
@@ -592,6 +593,20 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 
				 		if (scan_history)
			
 
				 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
			
 
				 	}
			
 
				+
			
 
				+	if (model && model->type == STARPU_PERFMODEL_INVALID)
			
 
				+	{
			
 
				+		/* Tool loading a perfmodel without having the corresponding codelet */
			
 
				+		if (reg_model->ncoeff != 0)
			
 
				+			model->type = STARPU_MULTIPLE_REGRESSION_BASED;
			
 
				+		else if (!isnan(reg_model->a) && !isnan(reg_model->b) && !isnan(reg_model->c))
			
 
				+			model->type = STARPU_NL_REGRESSION_BASED;
			
 
				+		else if (!isnan(reg_model->alpha) && !isnan(reg_model->beta))
			
 
				+			model->type = STARPU_REGRESSION_BASED;
			
 
				+		else if (nentries)
			
 
				+			model->type = STARPU_HISTORY_BASED;
			
 
				+		/* else unknown, leave invalid */
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 
			
@@ -623,7 +638,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 
				 		{
			
 
				 			struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
			
 
				 			model->state->per_arch_is_set[comb][impl] = 1;
			
 
				-			parse_per_arch_model_file(f, path, per_arch_model, scan_history);
			
 
				+			parse_per_arch_model_file(f, path, per_arch_model, scan_history, model);
			
 
				 		}
			
 
				 	}
			
 
				 	else
			
@@ -634,7 +649,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 
				 	/* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
			
 
				 	 * we skip the last implementation */
			
 
				 	for (i = impl; i < nimpls; i++)
			
 
				-		parse_per_arch_model_file(f, path, &dummy, 0);
			
 
				+		parse_per_arch_model_file(f, path, &dummy, 0, NULL);
			
 
				 }
			
 
				 
			
 
				 static enum starpu_worker_archtype _get_enum_type(int type)
			
@@ -909,6 +924,165 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+static void dump_history_entry_xml(FILE *f, struct starpu_perfmodel_history_entry *entry)
			
 
				+{
			
 
				+	fprintf(f, "      <entry footprint=\"%08x\" size=\"%lu\" flops=\"%e\" mean=\"%e\" deviation=\"%e\" sum=\"%e\" sum2=\"%e\" nsample=\"%u\"/>\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
			
 
				+}
			
 
				+
			
 
				+static void dump_reg_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, int impl)
			
 
				+{
			
 
				+	struct starpu_perfmodel_per_arch *per_arch_model;
			
 
				+
			
 
				+	per_arch_model = &model->state->per_arch[comb][impl];
			
 
				+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
			
 
				+
			
 
				+	/*
			
 
				+	 * Linear Regression model
			
 
				+	 */
			
 
				+
			
 
				+	if (model->type == STARPU_REGRESSION_BASED)
			
 
				+	{
			
 
				+		fprintf(f, "      <!-- time = alpha size ^ beta -->\n");
			
 
				+		fprintf(f, "      <l_regression sumlnx=\"%e\" sumlnx2=\"%e\" sumlny=\"%e\" sumlnxlny=\"%e\"", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny);
			
 
				+		fprintf(f, " alpha=\"");
			
 
				+		_starpu_write_double(f, "%e", reg_model->alpha);
			
 
				+		fprintf(f, "\" beta=\"");
			
 
				+		_starpu_write_double(f, "%e", reg_model->beta);
			
 
				+		fprintf(f, "\" nsample=\"%u\" minx=\"%lu\" maxx=\"%lu\"/>\n", reg_model->nsample, reg_model->minx, reg_model->maxx);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Non-Linear Regression model
			
 
				+	 */
			
 
				+
			
 
				+	else if (model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	{
			
 
				+		fprintf(f, "      <!-- time = a size ^b + c -->\n");
			
 
				+		fprintf(f, "      <nl_regression a=\"");
			
 
				+		_starpu_write_double(f, "%e", reg_model->a);
			
 
				+		fprintf(f, "\" b=\"");
			
 
				+		_starpu_write_double(f, "%e", reg_model->b);
			
 
				+		fprintf(f, "\" c=\"");
			
 
				+		_starpu_write_double(f, "%e", reg_model->c);
			
 
				+		fprintf(f, "\"/>\n");
			
 
				+	}
			
 
				+
			
 
				+	else if (model->type == STARPU_MULTIPLE_REGRESSION_BASED)
			
 
				+	{
			
 
				+		if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL)
			
 
				+			fprintf(f, "      <ml_regression constant=\"nan\"/>\n");
			
 
				+		else
			
 
				+		{
			
 
				+			unsigned i;
			
 
				+			fprintf(f, "      <ml_regression constant=\"%e\">\n", reg_model->coeff[0]);
			
 
				+			for (i=0; i < model->ncombinations; i++)
			
 
				+			{
			
 
				+				fprintf(f, "        <monomial name=\"");
			
 
				+				if (model->parameters_names == NULL)
			
 
				+					fprintf(f, "c%u", i+1);
			
 
				+				else
			
 
				+				{
			
 
				+					unsigned j;
			
 
				+					int first=1;
			
 
				+					for(j=0; j < model->nparameters; j++)
			
 
				+					{
			
 
				+						if (model->combinations[i][j] > 0)
			
 
				+						{
			
 
				+							if (first)
			
 
				+								first=0;
			
 
				+							else
			
 
				+								fprintf(f, "*");
			
 
				+
			
 
				+							if(model->parameters_names[j] != NULL)
			
 
				+								fprintf(f, "%s", model->parameters_names[j]);
			
 
				+							else
			
 
				+								fprintf(f, "P%u", j);
			
 
				+
			
 
				+							if (model->combinations[i][j] > 1)
			
 
				+								fprintf(f, "^%d", model->combinations[i][j]);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				fprintf(f, "\" coef=\"%e\"/>\n", reg_model->coeff[i+1]);
			
 
				+			}
			
 
				+			fprintf(f, "      </ml_regression>\n");
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl)
			
 
				+{
			
 
				+	struct starpu_perfmodel_per_arch *per_arch_model;
			
 
				+
			
 
				+	per_arch_model = &model->state->per_arch[comb][impl];
			
 
				+	/* count the number of elements in the lists */
			
 
				+	struct starpu_perfmodel_history_list *ptr = NULL;
			
 
				+
			
 
				+	dump_reg_model_xml(f, model, comb, impl);
			
 
				+
			
 
				+	/* Dump the history into the model file in case it is necessary */
			
 
				+	ptr = per_arch_model->list;
			
 
				+	while (ptr)
			
 
				+	{
			
 
				+		dump_history_entry_xml(f, ptr->entry);
			
 
				+		ptr = ptr->next;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
			
 
				+{
			
 
				+	fprintf(f, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
			
 
				+	fprintf(f, "<!DOCTYPE StarPUPerfmodel SYSTEM \"starpu-perfmodel.dtd\">\n");
			
 
				+	fprintf(f, "<!-- symbol %s -->\n", model->symbol);
			
 
				+	fprintf(f, "<!-- All times in us -->\n");
			
 
				+	fprintf(f, "<perfmodel version=\"%u\">\n", _STARPU_PERFMODEL_VERSION);
			
 
				+
			
 
				+	STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
			
 
				+	int ncombs = model->state->ncombs;
			
 
				+	int i, impl, dev;
			
 
				+
			
 
				+	for(i = 0; i < ncombs; i++)
			
 
				+	{
			
 
				+		int comb = model->state->combs[i];
			
 
				+		int ndevices = arch_combs[comb]->ndevices;
			
 
				+
			
 
				+		fprintf(f, "  <combination>\n");
			
 
				+		for(dev = 0; dev < ndevices; dev++)
			
 
				+		{
			
 
				+			const char *type;
			
 
				+			switch (arch_combs[comb]->devices[dev].type) {
			
 
				+				case STARPU_CPU_WORKER: type = "CPU"; break;
			
 
				+				case STARPU_CUDA_WORKER: type = "CUDA"; break;
			
 
				+				case STARPU_OPENCL_WORKER: type = "OpenCL"; break;
			
 
				+				case STARPU_MIC_WORKER: type = "MIC"; break;
			
 
				+				case STARPU_SCC_WORKER: type = "SCC"; break;
			
 
				+				case STARPU_MPI_MS_WORKER: type = "MPI_MS"; break;
			
 
				+				default: STARPU_ASSERT(0);
			
 
				+			}
			
 
				+			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
			
 
				+					type,
			
 
				+					arch_combs[comb]->devices[dev].devid);
			
 
				+			if (arch_combs[comb]->devices[dev].type == STARPU_CPU_WORKER)
			
 
				+				fprintf(f, " ncores=\"%d\"",
			
 
				+						arch_combs[comb]->devices[dev].ncores);
			
 
				+			fprintf(f, "/>\n");
			
 
				+		}
			
 
				+		int nimpls = model->state->nimpls[comb];
			
 
				+		for (impl = 0; impl < nimpls; impl++)
			
 
				+		{
			
 
				+			fprintf(f, "    <implementation id=\"%u\">\n", impl);
			
 
				+			char archname[STR_SHORT_LENGTH];
			
 
				+			starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  sizeof(archname), impl);
			
 
				+			fprintf(f, "      <!-- %s -->\n", archname);
			
 
				+			dump_per_arch_model_xml(f, model, comb, impl);
			
 
				+			fprintf(f, "    </implementation>\n");
			
 
				+		}
			
 
				+		fprintf(f, "  </combination>\n");
			
 
				+	}
			
 
				+	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
			
 
				+	fprintf(f, "</perfmodel>\n");
			
 
				+}
			
 
				+
			
 
				 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
			
 
				 {
			
 
				 	int i;
			
--- a/src/core/perfmodel/starpu-perfmodel.dtd
+++ b/src/core/perfmodel/starpu-perfmodel.dtd
@@ -0,0 +1,64 @@
 
				+<!--
			
 
				+  StarPU - Runtime system for heterogeneous multicore architectures.
			
 
				+  
			
 
				+  Copyright (C) 2019                                     Université de Bordeaux
			
 
				+  
			
 
				+  StarPU is free software; you can redistribute it and/or modify
			
 
				+  it under the terms of the GNU Lesser General Public License as published by
			
 
				+  the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+  your option) any later version.
			
 
				+  
			
 
				+  StarPU is distributed in the hope that it will be useful, but
			
 
				+  WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+  
			
 
				+  See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+  
			
 
				+  This is the DTD for StarPU performance models.
			
 
				+ -->
			
 
				+
			
 
				+<!ELEMENT perfmodel (combination)+>
			
 
				+<!ATTLIST perfmodel version CDATA "">
			
 
				+
			
 
				+<!ELEMENT combination (device,implementation+)>
			
 
				+
			
 
				+<!ELEMENT device EMPTY>
			
 
				+<!ATTLIST device type CDATA #REQUIRED>
			
 
				+<!ATTLIST device id CDATA #REQUIRED>
			
 
				+<!ATTLIST device ncores CDATA "">
			
 
				+
			
 
				+<!ELEMENT implementation (l_regression*,nl_regression*,ml_regression*,entry*)>
			
 
				+<!ATTLIST implementation id CDATA #REQUIRED>
			
 
				+
			
 
				+<!ELEMENT l_regression EMPTY>
			
 
				+<!ATTLIST l_regression sumlnx CDATA "">
			
 
				+<!ATTLIST l_regression sumlnx2 CDATA "">
			
 
				+<!ATTLIST l_regression sumlny CDATA "">
			
 
				+<!ATTLIST l_regression sumlnxlny CDATA "">
			
 
				+<!ATTLIST l_regression alpha CDATA #REQUIRED>
			
 
				+<!ATTLIST l_regression beta CDATA #REQUIRED>
			
 
				+<!ATTLIST l_regression nsample CDATA "">
			
 
				+<!ATTLIST l_regression minx CDATA "">
			
 
				+<!ATTLIST l_regression maxx CDATA "">
			
 
				+
			
 
				+<!ELEMENT nl_regression EMPTY>
			
 
				+<!ATTLIST nl_regression a CDATA #REQUIRED>
			
 
				+<!ATTLIST nl_regression b CDATA #REQUIRED>
			
 
				+<!ATTLIST nl_regression c CDATA #REQUIRED>
			
 
				+
			
 
				+<!ELEMENT ml_regression (monomial+)>
			
 
				+<!ATTLIST ml_regression constant CDATA #REQUIRED>
			
 
				+
			
 
				+<!ELEMENT monomial EMPTY>
			
 
				+<!ATTLIST monomial name CDATA #REQUIRED>
			
 
				+<!ATTLIST monomial coef CDATA #REQUIRED>
			
 
				+
			
 
				+<!ELEMENT entry EMPTY>
			
 
				+<!ATTLIST entry footprint CDATA #REQUIRED>
			
 
				+<!ATTLIST entry size CDATA #REQUIRED>
			
 
				+<!ATTLIST entry flops CDATA "">
			
 
				+<!ATTLIST entry mean CDATA #REQUIRED>
			
 
				+<!ATTLIST entry deviation CDATA #REQUIRED>
			
 
				+<!ATTLIST entry sum CDATA "">
			
 
				+<!ATTLIST entry sum2 CDATA "">
			
 
				+<!ATTLIST entry nsample CDATA "0">
			
--- a/tools/starpu_perfmodel_display.c
+++ b/tools/starpu_perfmodel_display.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2014,2017                           Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2014,2017,2019                      Université de Bordeaux
			
 
				  * Copyright (C) 2011,2012                                Inria
			
 
				  * Copyright (C) 2010-2017                                CNRS
			
 
				  * Copyright (C) 2011                                     Télécom-SudParis
			
@@ -31,6 +31,8 @@
 
				 
			
 
				 #define PROGNAME "starpu_perfmodel_display"
			
 
				 
			
 
				+/* XML format */
			
 
				+static int xml = 0;
			
 
				 /* display all available models */
			
 
				 static int plist = 0;
			
 
				 /* display directory */
			
@@ -50,10 +52,11 @@ static void usage()
 
				 	fprintf(stderr, "Display a given perfmodel\n\n");
			
 
				 	fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME);
			
 
				         fprintf(stderr, "\n");
			
 
				-        fprintf(stderr, "One must specify either -l or -s\n");
			
 
				+        fprintf(stderr, "One must specify either -l or -s. -x can be used with -s\n");
			
 
				         fprintf(stderr, "Options:\n");
			
 
				         fprintf(stderr, "   -l                  display all available models\n");
			
 
				         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
			
 
				+	fprintf(stderr, "   -x                  display output in XML format\n");
			
 
				         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
			
 
				         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
			
 
				 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
			
@@ -84,7 +87,7 @@ static void parse_args(int argc, char **argv)
 
				 	};
			
 
				 
			
 
				 	int option_index;
			
 
				-	while ((c = getopt_long(argc, argv, "dls:p:a:f:h", long_options, &option_index)) != -1)
			
 
				+	while ((c = getopt_long(argc, argv, "dls:p:a:f:hx", long_options, &option_index)) != -1)
			
 
				 	{
			
 
				 		switch (c)
			
 
				 		{
			
@@ -120,6 +123,11 @@ static void parse_args(int argc, char **argv)
 
				 			pdirectory = 1;
			
 
				 			break;
			
 
				 
			
 
				+		case 'x':
			
 
				+			/* symbol */
			
 
				+			xml = 1;
			
 
				+			break;
			
 
				+
			
 
				 		case 'h':
			
 
				 			usage();
			
 
				 			exit(EXIT_SUCCESS);
			
@@ -169,12 +177,16 @@ int main(int argc, char **argv)
 
				 			fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol);
			
 
				 			return 1;
			
 
				 		}
			
 
				-		uint32_t *footprint = NULL;
			
 
				-		if (pdisplay_specific_footprint == 1)
			
 
				-		{
			
 
				-			footprint = &pspecific_footprint;
			
 
				+		if (xml) {
			
 
				+			starpu_perfmodel_dump_xml(stdout, &model);
			
 
				+		} else {
			
 
				+			uint32_t *footprint = NULL;
			
 
				+			if (pdisplay_specific_footprint == 1)
			
 
				+			{
			
 
				+				footprint = &pspecific_footprint;
			
 
				+			}
			
 
				+			starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
			
 
				 		}
			
 
				-		starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
			
 
				 		starpu_perfmodel_unload_model(&model);
			
 
				         }