6 年之前 · 73d45564a4
--- a/ChangeLog
+++ b/ChangeLog
@@ -24,6 +24,8 @@ New features:
 
																     tile or vector size without reallocating the buffer.
															
 
																   * Application can change the allocation used by StarPU with
															
 
																     starpu_malloc_set_hooks()
															
 
																+  * XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml()
															
 
																+    function
															
 
																 StarPU 1.3.0 (svn revision xxxx)
															
 
																 ==============================================
															
--- a/doc/doxygen/chapters/370_online_performance_tools.doxy
+++ b/doc/doxygen/chapters/370_online_performance_tools.doxy
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2011,2012,2016                           Inria
															
 
																  * Copyright (C) 2010-2019                                CNRS
															
 
																- * Copyright (C) 2009-2011,2014,2016,2018                 Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2011,2014,2016,2018-2019            Université de Bordeaux
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -539,6 +539,10 @@ used to get the footprint used for indexing history-based performance
 
																 models. starpu_task_destroy() needs to be called to destroy the dummy
															
 
																 task afterwards. See <c>tests/perfmodels/regression_based.c</c> for an example.
															
 
																+The application can also request an on-the-fly XML report of the performance
															
 
																+model, by calling starpu_perfmodel_dump_xml() to print the report to a
															
 
																+<c>FILE*</c>.
															
 
																+
															
 
																 \section DataTrace Data trace and tasks length
															
 
																 It is possible to get statistics about tasks length and data size by using :
															
 
																 \verbatim
															
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -335,6 +335,11 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
 
																 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
															
 
																 /**
															
 
																+  Dump performance model \p model to output stream \p output, in XML format.
															
 
																+*/
															
 
																+void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model);
															
 
																+
															
 
																+/**
															
 
																    Free internal memory used for sampling directory
															
 
																    management. It should only be called by an application which is not
															
 
																    calling starpu_shutdown() as this function already calls it. See for
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -23,6 +23,10 @@ CLEANFILES = *.gcno *.gcda *.linkinfo
 
																 EXTRA_DIST = dolib.c
															
 
																+xml_DATA = $(srcdir)/core/perfmodel/starpu-perfmodel.dtd
															
 
																+xmldir = $(pkgdatadir)
															
 
																+EXTRA_DIST += core/perfmodel/starpu-perfmodel.dtd
															
 
																+
															
 
																 ldflags =
															
 
																 libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -556,16 +556,17 @@ static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmode
 
																 	}
															
 
																 }
															
 
																-static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
															
 
																+static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history, struct starpu_perfmodel *model)
															
 
																 {
															
 
																 	unsigned nentries;
															
 
																+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
															
 
																 	_starpu_drop_comments(f);
															
 
																 	int res = fscanf(f, "%u\n", &nentries);
															
 
																 	STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path);
															
 
																-	scan_reg_model(f, path, &per_arch_model->regression);
															
 
																+	scan_reg_model(f, path, reg_model);
															
 
																 	/* parse entries */
															
 
																 	unsigned i;
															
@@ -592,6 +593,20 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 
																 		if (scan_history)
															
 
																 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
															
 
																 	}
															
 
																+
															
 
																+	if (model && model->type == STARPU_PERFMODEL_INVALID)
															
 
																+	{
															
 
																+		/* Tool loading a perfmodel without having the corresponding codelet */
															
 
																+		if (reg_model->ncoeff != 0)
															
 
																+			model->type = STARPU_MULTIPLE_REGRESSION_BASED;
															
 
																+		else if (!isnan(reg_model->a) && !isnan(reg_model->b) && !isnan(reg_model->c))
															
 
																+			model->type = STARPU_NL_REGRESSION_BASED;
															
 
																+		else if (!isnan(reg_model->alpha) && !isnan(reg_model->beta))
															
 
																+			model->type = STARPU_REGRESSION_BASED;
															
 
																+		else if (nentries)
															
 
																+			model->type = STARPU_HISTORY_BASED;
															
 
																+		/* else unknown, leave invalid */
															
 
																+	}
															
 
																 }
															
@@ -623,7 +638,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 
																 		{
															
 
																 			struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
															
 
																 			model->state->per_arch_is_set[comb][impl] = 1;
															
 
																-			parse_per_arch_model_file(f, path, per_arch_model, scan_history);
															
 
																+			parse_per_arch_model_file(f, path, per_arch_model, scan_history, model);
															
 
																 		}
															
 
																 	}
															
 
																 	else
															
@@ -634,7 +649,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 
																 	/* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
															
 
																 	 * we skip the last implementation */
															
 
																 	for (i = impl; i < nimpls; i++)
															
 
																-		parse_per_arch_model_file(f, path, &dummy, 0);
															
 
																+		parse_per_arch_model_file(f, path, &dummy, 0, NULL);
															
 
																 }
															
 
																 static enum starpu_worker_archtype _get_enum_type(int type)
															
@@ -909,6 +924,165 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
																 }
															
 
																 #endif
															
 
																+static void dump_history_entry_xml(FILE *f, struct starpu_perfmodel_history_entry *entry)
															
 
																+{
															
 
																+	fprintf(f, "      <entry footprint=\"%08x\" size=\"%lu\" flops=\"%e\" mean=\"%e\" deviation=\"%e\" sum=\"%e\" sum2=\"%e\" nsample=\"%u\"/>\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
															
 
																+}
															
 
																+
															
 
																+static void dump_reg_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, int impl)
															
 
																+{
															
 
																+	struct starpu_perfmodel_per_arch *per_arch_model;
															
 
																+
															
 
																+	per_arch_model = &model->state->per_arch[comb][impl];
															
 
																+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
															
 
																+
															
 
																+	/*
															
 
																+	 * Linear Regression model
															
 
																+	 */
															
 
																+
															
 
																+	if (model->type == STARPU_REGRESSION_BASED)
															
 
																+	{
															
 
																+		fprintf(f, "      <!-- time = alpha size ^ beta -->\n");
															
 
																+		fprintf(f, "      <l_regression sumlnx=\"%e\" sumlnx2=\"%e\" sumlny=\"%e\" sumlnxlny=\"%e\"", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny);
															
 
																+		fprintf(f, " alpha=\"");
															
 
																+		_starpu_write_double(f, "%e", reg_model->alpha);
															
 
																+		fprintf(f, "\" beta=\"");
															
 
																+		_starpu_write_double(f, "%e", reg_model->beta);
															
 
																+		fprintf(f, "\" nsample=\"%u\" minx=\"%lu\" maxx=\"%lu\"/>\n", reg_model->nsample, reg_model->minx, reg_model->maxx);
															
 
																+	}
															
 
																+
															
 
																+	/*
															
 
																+	 * Non-Linear Regression model
															
 
																+	 */
															
 
																+
															
 
																+	else if (model->type == STARPU_NL_REGRESSION_BASED)
															
 
																+	{
															
 
																+		fprintf(f, "      <!-- time = a size ^b + c -->\n");
															
 
																+		fprintf(f, "      <nl_regression a=\"");
															
 
																+		_starpu_write_double(f, "%e", reg_model->a);
															
 
																+		fprintf(f, "\" b=\"");
															
 
																+		_starpu_write_double(f, "%e", reg_model->b);
															
 
																+		fprintf(f, "\" c=\"");
															
 
																+		_starpu_write_double(f, "%e", reg_model->c);
															
 
																+		fprintf(f, "\"/>\n");
															
 
																+	}
															
 
																+
															
 
																+	else if (model->type == STARPU_MULTIPLE_REGRESSION_BASED)
															
 
																+	{
															
 
																+		if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL)
															
 
																+			fprintf(f, "      <ml_regression constant=\"nan\"/>\n");
															
 
																+		else
															
 
																+		{
															
 
																+			unsigned i;
															
 
																+			fprintf(f, "      <ml_regression constant=\"%e\">\n", reg_model->coeff[0]);
															
 
																+			for (i=0; i < model->ncombinations; i++)
															
 
																+			{
															
 
																+				fprintf(f, "        <monomial name=\"");
															
 
																+				if (model->parameters_names == NULL)
															
 
																+					fprintf(f, "c%u", i+1);
															
 
																+				else
															
 
																+				{
															
 
																+					unsigned j;
															
 
																+					int first=1;
															
 
																+					for(j=0; j < model->nparameters; j++)
															
 
																+					{
															
 
																+						if (model->combinations[i][j] > 0)
															
 
																+						{
															
 
																+							if (first)
															
 
																+								first=0;
															
 
																+							else
															
 
																+								fprintf(f, "*");
															
 
																+
															
 
																+							if(model->parameters_names[j] != NULL)
															
 
																+								fprintf(f, "%s", model->parameters_names[j]);
															
 
																+							else
															
 
																+								fprintf(f, "P%u", j);
															
 
																+
															
 
																+							if (model->combinations[i][j] > 1)
															
 
																+								fprintf(f, "^%d", model->combinations[i][j]);
															
 
																+						}
															
 
																+					}
															
 
																+				}
															
 
																+				fprintf(f, "\" coef=\"%e\"/>\n", reg_model->coeff[i+1]);
															
 
																+			}
															
 
																+			fprintf(f, "      </ml_regression>\n");
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl)
															
 
																+{
															
 
																+	struct starpu_perfmodel_per_arch *per_arch_model;
															
 
																+
															
 
																+	per_arch_model = &model->state->per_arch[comb][impl];
															
 
																+	/* count the number of elements in the lists */
															
 
																+	struct starpu_perfmodel_history_list *ptr = NULL;
															
 
																+
															
 
																+	dump_reg_model_xml(f, model, comb, impl);
															
 
																+
															
 
																+	/* Dump the history into the model file in case it is necessary */
															
 
																+	ptr = per_arch_model->list;
															
 
																+	while (ptr)
															
 
																+	{
															
 
																+		dump_history_entry_xml(f, ptr->entry);
															
 
																+		ptr = ptr->next;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
															
 
																+{
															
 
																+	fprintf(f, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
															
 
																+	fprintf(f, "<!DOCTYPE StarPUPerfmodel SYSTEM \"starpu-perfmodel.dtd\">\n");
															
 
																+	fprintf(f, "<!-- symbol %s -->\n", model->symbol);
															
 
																+	fprintf(f, "<!-- All times in us -->\n");
															
 
																+	fprintf(f, "<perfmodel version=\"%u\">\n", _STARPU_PERFMODEL_VERSION);
															
 
																+
															
 
																+	STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
															
 
																+	int ncombs = model->state->ncombs;
															
 
																+	int i, impl, dev;
															
 
																+
															
 
																+	for(i = 0; i < ncombs; i++)
															
 
																+	{
															
 
																+		int comb = model->state->combs[i];
															
 
																+		int ndevices = arch_combs[comb]->ndevices;
															
 
																+
															
 
																+		fprintf(f, "  <combination>\n");
															
 
																+		for(dev = 0; dev < ndevices; dev++)
															
 
																+		{
															
 
																+			const char *type;
															
 
																+			switch (arch_combs[comb]->devices[dev].type) {
															
 
																+				case STARPU_CPU_WORKER: type = "CPU"; break;
															
 
																+				case STARPU_CUDA_WORKER: type = "CUDA"; break;
															
 
																+				case STARPU_OPENCL_WORKER: type = "OpenCL"; break;
															
 
																+				case STARPU_MIC_WORKER: type = "MIC"; break;
															
 
																+				case STARPU_SCC_WORKER: type = "SCC"; break;
															
 
																+				case STARPU_MPI_MS_WORKER: type = "MPI_MS"; break;
															
 
																+				default: STARPU_ASSERT(0);
															
 
																+			}
															
 
																+			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
															
 
																+					type,
															
 
																+					arch_combs[comb]->devices[dev].devid);
															
 
																+			if (arch_combs[comb]->devices[dev].type == STARPU_CPU_WORKER)
															
 
																+				fprintf(f, " ncores=\"%d\"",
															
 
																+						arch_combs[comb]->devices[dev].ncores);
															
 
																+			fprintf(f, "/>\n");
															
 
																+		}
															
 
																+		int nimpls = model->state->nimpls[comb];
															
 
																+		for (impl = 0; impl < nimpls; impl++)
															
 
																+		{
															
 
																+			fprintf(f, "    <implementation id=\"%u\">\n", impl);
															
 
																+			char archname[STR_SHORT_LENGTH];
															
 
																+			starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  sizeof(archname), impl);
															
 
																+			fprintf(f, "      <!-- %s -->\n", archname);
															
 
																+			dump_per_arch_model_xml(f, model, comb, impl);
															
 
																+			fprintf(f, "    </implementation>\n");
															
 
																+		}
															
 
																+		fprintf(f, "  </combination>\n");
															
 
																+	}
															
 
																+	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
															
 
																+	fprintf(f, "</perfmodel>\n");
															
 
																+}
															
 
																+
															
 
																 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
															
 
																 {
															
 
																 	int i;
															
--- a/src/core/perfmodel/starpu-perfmodel.dtd
+++ b/src/core/perfmodel/starpu-perfmodel.dtd
@@ -0,0 +1,64 @@
 
																+<!--
															
 
																+  StarPU - Runtime system for heterogeneous multicore architectures.
															
 
																+  
															
 
																+  Copyright (C) 2019                                     Université de Bordeaux
															
 
																+  
															
 
																+  StarPU is free software; you can redistribute it and/or modify
															
 
																+  it under the terms of the GNU Lesser General Public License as published by
															
 
																+  the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+  your option) any later version.
															
 
																+  
															
 
																+  StarPU is distributed in the hope that it will be useful, but
															
 
																+  WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+  
															
 
																+  See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+  
															
 
																+  This is the DTD for StarPU performance models.
															
 
																+ -->
															
 
																+
															
 
																+<!ELEMENT perfmodel (combination)+>
															
 
																+<!ATTLIST perfmodel version CDATA "">
															
 
																+
															
 
																+<!ELEMENT combination (device,implementation+)>
															
 
																+
															
 
																+<!ELEMENT device EMPTY>
															
 
																+<!ATTLIST device type CDATA #REQUIRED>
															
 
																+<!ATTLIST device id CDATA #REQUIRED>
															
 
																+<!ATTLIST device ncores CDATA "">
															
 
																+
															
 
																+<!ELEMENT implementation (l_regression*,nl_regression*,ml_regression*,entry*)>
															
 
																+<!ATTLIST implementation id CDATA #REQUIRED>
															
 
																+
															
 
																+<!ELEMENT l_regression EMPTY>
															
 
																+<!ATTLIST l_regression sumlnx CDATA "">
															
 
																+<!ATTLIST l_regression sumlnx2 CDATA "">
															
 
																+<!ATTLIST l_regression sumlny CDATA "">
															
 
																+<!ATTLIST l_regression sumlnxlny CDATA "">
															
 
																+<!ATTLIST l_regression alpha CDATA #REQUIRED>
															
 
																+<!ATTLIST l_regression beta CDATA #REQUIRED>
															
 
																+<!ATTLIST l_regression nsample CDATA "">
															
 
																+<!ATTLIST l_regression minx CDATA "">
															
 
																+<!ATTLIST l_regression maxx CDATA "">
															
 
																+
															
 
																+<!ELEMENT nl_regression EMPTY>
															
 
																+<!ATTLIST nl_regression a CDATA #REQUIRED>
															
 
																+<!ATTLIST nl_regression b CDATA #REQUIRED>
															
 
																+<!ATTLIST nl_regression c CDATA #REQUIRED>
															
 
																+
															
 
																+<!ELEMENT ml_regression (monomial+)>
															
 
																+<!ATTLIST ml_regression constant CDATA #REQUIRED>
															
 
																+
															
 
																+<!ELEMENT monomial EMPTY>
															
 
																+<!ATTLIST monomial name CDATA #REQUIRED>
															
 
																+<!ATTLIST monomial coef CDATA #REQUIRED>
															
 
																+
															
 
																+<!ELEMENT entry EMPTY>
															
 
																+<!ATTLIST entry footprint CDATA #REQUIRED>
															
 
																+<!ATTLIST entry size CDATA #REQUIRED>
															
 
																+<!ATTLIST entry flops CDATA "">
															
 
																+<!ATTLIST entry mean CDATA #REQUIRED>
															
 
																+<!ATTLIST entry deviation CDATA #REQUIRED>
															
 
																+<!ATTLIST entry sum CDATA "">
															
 
																+<!ATTLIST entry sum2 CDATA "">
															
 
																+<!ATTLIST entry nsample CDATA "0">
															
--- a/tools/starpu_perfmodel_display.c
+++ b/tools/starpu_perfmodel_display.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2014,2017                           Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2014,2017,2019                      Université de Bordeaux
															
 
																  * Copyright (C) 2011,2012                                Inria
															
 
																  * Copyright (C) 2010-2017                                CNRS
															
 
																  * Copyright (C) 2011                                     Télécom-SudParis
															
@@ -31,6 +31,8 @@
 
																 #define PROGNAME "starpu_perfmodel_display"
															
 
																+/* XML format */
															
 
																+static int xml = 0;
															
 
																 /* display all available models */
															
 
																 static int plist = 0;
															
 
																 /* display directory */
															
@@ -50,10 +52,11 @@ static void usage()
 
																 	fprintf(stderr, "Display a given perfmodel\n\n");
															
 
																 	fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME);
															
 
																         fprintf(stderr, "\n");
															
 
																-        fprintf(stderr, "One must specify either -l or -s\n");
															
 
																+        fprintf(stderr, "One must specify either -l or -s. -x can be used with -s\n");
															
 
																         fprintf(stderr, "Options:\n");
															
 
																         fprintf(stderr, "   -l                  display all available models\n");
															
 
																         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
															
 
																+	fprintf(stderr, "   -x                  display output in XML format\n");
															
 
																         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
															
 
																         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
															
 
																 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
															
@@ -84,7 +87,7 @@ static void parse_args(int argc, char **argv)
 
																 	};
															
 
																 	int option_index;
															
 
																-	while ((c = getopt_long(argc, argv, "dls:p:a:f:h", long_options, &option_index)) != -1)
															
 
																+	while ((c = getopt_long(argc, argv, "dls:p:a:f:hx", long_options, &option_index)) != -1)
															
 
																 	{
															
 
																 		switch (c)
															
 
																 		{
															
@@ -120,6 +123,11 @@ static void parse_args(int argc, char **argv)
 
																 			pdirectory = 1;
															
 
																 			break;
															
 
																+		case 'x':
															
 
																+			/* symbol */
															
 
																+			xml = 1;
															
 
																+			break;
															
 
																+
															
 
																 		case 'h':
															
 
																 			usage();
															
 
																 			exit(EXIT_SUCCESS);
															
@@ -169,12 +177,16 @@ int main(int argc, char **argv)
 
																 			fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol);
															
 
																 			return 1;
															
 
																 		}
															
 
																-		uint32_t *footprint = NULL;
															
 
																-		if (pdisplay_specific_footprint == 1)
															
 
																-		{
															
 
																-			footprint = &pspecific_footprint;
															
 
																+		if (xml) {
															
 
																+			starpu_perfmodel_dump_xml(stdout, &model);
															
 
																+		} else {
															
 
																+			uint32_t *footprint = NULL;
															
 
																+			if (pdisplay_specific_footprint == 1)
															
 
																+			{
															
 
																+				footprint = &pspecific_footprint;
															
 
																+			}
															
 
																+			starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
															
 
																 		}
															
 
																-		starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
															
 
																 		starpu_perfmodel_unload_model(&model);
															
 
																         }