浏览代码

Add XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml() function

Samuel Thibault 6 年之前
父节点
当前提交
73d45564a4

+ 2 - 0
ChangeLog

@@ -24,6 +24,8 @@ New features:
     tile or vector size without reallocating the buffer.
   * Application can change the allocation used by StarPU with
     starpu_malloc_set_hooks()
+  * XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml()
+    function
 
 StarPU 1.3.0 (svn revision xxxx)
 ==============================================

+ 5 - 1
doc/doxygen/chapters/370_online_performance_tools.doxy

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014,2016,2018                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2014,2016,2018-2019            Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -539,6 +539,10 @@ used to get the footprint used for indexing history-based performance
 models. starpu_task_destroy() needs to be called to destroy the dummy
 task afterwards. See <c>tests/perfmodels/regression_based.c</c> for an example.
 
+The application can also request an on-the-fly XML report of the performance
+model, by calling starpu_perfmodel_dump_xml() to print the report to a
+<c>FILE*</c>.
+
 \section DataTrace Data trace and tasks length
 It is possible to get statistics about tasks length and data size by using :
 \verbatim

+ 5 - 0
include/starpu_perfmodel.h

@@ -335,6 +335,11 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
 
 /**
+  Dump performance model \p model to output stream \p output, in XML format.
+*/
+void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model);
+
+/**
    Free internal memory used for sampling directory
    management. It should only be called by an application which is not
    calling starpu_shutdown() as this function already calls it. See for

+ 4 - 0
src/Makefile.am

@@ -23,6 +23,10 @@ CLEANFILES = *.gcno *.gcda *.linkinfo
 
 EXTRA_DIST = dolib.c
 
+xml_DATA = $(srcdir)/core/perfmodel/starpu-perfmodel.dtd
+xmldir = $(pkgdatadir)
+EXTRA_DIST += core/perfmodel/starpu-perfmodel.dtd
+
 ldflags =
 
 libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)

+ 178 - 4
src/core/perfmodel/perfmodel_history.c

@@ -556,16 +556,17 @@ static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmode
 	}
 }
 
-static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
+static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history, struct starpu_perfmodel *model)
 {
 	unsigned nentries;
+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
 
 	_starpu_drop_comments(f);
 
 	int res = fscanf(f, "%u\n", &nentries);
 	STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path);
 
-	scan_reg_model(f, path, &per_arch_model->regression);
+	scan_reg_model(f, path, reg_model);
 
 	/* parse entries */
 	unsigned i;
@@ -592,6 +593,20 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 		if (scan_history)
 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
 	}
+
+	if (model && model->type == STARPU_PERFMODEL_INVALID)
+	{
+		/* Tool loading a perfmodel without having the corresponding codelet */
+		if (reg_model->ncoeff != 0)
+			model->type = STARPU_MULTIPLE_REGRESSION_BASED;
+		else if (!isnan(reg_model->a) && !isnan(reg_model->b) && !isnan(reg_model->c))
+			model->type = STARPU_NL_REGRESSION_BASED;
+		else if (!isnan(reg_model->alpha) && !isnan(reg_model->beta))
+			model->type = STARPU_REGRESSION_BASED;
+		else if (nentries)
+			model->type = STARPU_HISTORY_BASED;
+		/* else unknown, leave invalid */
+	}
 }
 
 
@@ -623,7 +638,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 		{
 			struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
 			model->state->per_arch_is_set[comb][impl] = 1;
-			parse_per_arch_model_file(f, path, per_arch_model, scan_history);
+			parse_per_arch_model_file(f, path, per_arch_model, scan_history, model);
 		}
 	}
 	else
@@ -634,7 +649,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 	/* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
 	 * we skip the last implementation */
 	for (i = impl; i < nimpls; i++)
-		parse_per_arch_model_file(f, path, &dummy, 0);
+		parse_per_arch_model_file(f, path, &dummy, 0, NULL);
 }
 
 static enum starpu_worker_archtype _get_enum_type(int type)
@@ -909,6 +924,165 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 }
 #endif
 
+static void dump_history_entry_xml(FILE *f, struct starpu_perfmodel_history_entry *entry)
+{
+	fprintf(f, "      <entry footprint=\"%08x\" size=\"%lu\" flops=\"%e\" mean=\"%e\" deviation=\"%e\" sum=\"%e\" sum2=\"%e\" nsample=\"%u\"/>\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
+}
+
+static void dump_reg_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, int impl)
+{
+	struct starpu_perfmodel_per_arch *per_arch_model;
+
+	per_arch_model = &model->state->per_arch[comb][impl];
+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
+
+	/*
+	 * Linear Regression model
+	 */
+
+	if (model->type == STARPU_REGRESSION_BASED)
+	{
+		fprintf(f, "      <!-- time = alpha size ^ beta -->\n");
+		fprintf(f, "      <l_regression sumlnx=\"%e\" sumlnx2=\"%e\" sumlny=\"%e\" sumlnxlny=\"%e\"", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny);
+		fprintf(f, " alpha=\"");
+		_starpu_write_double(f, "%e", reg_model->alpha);
+		fprintf(f, "\" beta=\"");
+		_starpu_write_double(f, "%e", reg_model->beta);
+		fprintf(f, "\" nsample=\"%u\" minx=\"%lu\" maxx=\"%lu\"/>\n", reg_model->nsample, reg_model->minx, reg_model->maxx);
+	}
+
+	/*
+	 * Non-Linear Regression model
+	 */
+
+	else if (model->type == STARPU_NL_REGRESSION_BASED)
+	{
+		fprintf(f, "      <!-- time = a size ^b + c -->\n");
+		fprintf(f, "      <nl_regression a=\"");
+		_starpu_write_double(f, "%e", reg_model->a);
+		fprintf(f, "\" b=\"");
+		_starpu_write_double(f, "%e", reg_model->b);
+		fprintf(f, "\" c=\"");
+		_starpu_write_double(f, "%e", reg_model->c);
+		fprintf(f, "\"/>\n");
+	}
+
+	else if (model->type == STARPU_MULTIPLE_REGRESSION_BASED)
+	{
+		if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL)
+			fprintf(f, "      <ml_regression constant=\"nan\"/>\n");
+		else
+		{
+			unsigned i;
+			fprintf(f, "      <ml_regression constant=\"%e\">\n", reg_model->coeff[0]);
+			for (i=0; i < model->ncombinations; i++)
+			{
+				fprintf(f, "        <monomial name=\"");
+				if (model->parameters_names == NULL)
+					fprintf(f, "c%u", i+1);
+				else
+				{
+					unsigned j;
+					int first=1;
+					for(j=0; j < model->nparameters; j++)
+					{
+						if (model->combinations[i][j] > 0)
+						{
+							if (first)
+								first=0;
+							else
+								fprintf(f, "*");
+
+							if(model->parameters_names[j] != NULL)
+								fprintf(f, "%s", model->parameters_names[j]);
+							else
+								fprintf(f, "P%u", j);
+
+							if (model->combinations[i][j] > 1)
+								fprintf(f, "^%d", model->combinations[i][j]);
+						}
+					}
+				}
+				fprintf(f, "\" coef=\"%e\"/>\n", reg_model->coeff[i+1]);
+			}
+			fprintf(f, "      </ml_regression>\n");
+		}
+	}
+}
+
+static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl)
+{
+	struct starpu_perfmodel_per_arch *per_arch_model;
+
+	per_arch_model = &model->state->per_arch[comb][impl];
+	/* count the number of elements in the lists */
+	struct starpu_perfmodel_history_list *ptr = NULL;
+
+	dump_reg_model_xml(f, model, comb, impl);
+
+	/* Dump the history into the model file in case it is necessary */
+	ptr = per_arch_model->list;
+	while (ptr)
+	{
+		dump_history_entry_xml(f, ptr->entry);
+		ptr = ptr->next;
+	}
+}
+
+void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
+{
+	fprintf(f, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+	fprintf(f, "<!DOCTYPE StarPUPerfmodel SYSTEM \"starpu-perfmodel.dtd\">\n");
+	fprintf(f, "<!-- symbol %s -->\n", model->symbol);
+	fprintf(f, "<!-- All times in us -->\n");
+	fprintf(f, "<perfmodel version=\"%u\">\n", _STARPU_PERFMODEL_VERSION);
+
+	STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
+	int ncombs = model->state->ncombs;
+	int i, impl, dev;
+
+	for(i = 0; i < ncombs; i++)
+	{
+		int comb = model->state->combs[i];
+		int ndevices = arch_combs[comb]->ndevices;
+
+		fprintf(f, "  <combination>\n");
+		for(dev = 0; dev < ndevices; dev++)
+		{
+			const char *type;
+			switch (arch_combs[comb]->devices[dev].type) {
+				case STARPU_CPU_WORKER: type = "CPU"; break;
+				case STARPU_CUDA_WORKER: type = "CUDA"; break;
+				case STARPU_OPENCL_WORKER: type = "OpenCL"; break;
+				case STARPU_MIC_WORKER: type = "MIC"; break;
+				case STARPU_SCC_WORKER: type = "SCC"; break;
+				case STARPU_MPI_MS_WORKER: type = "MPI_MS"; break;
+				default: STARPU_ASSERT(0);
+			}
+			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
+					type,
+					arch_combs[comb]->devices[dev].devid);
+			if (arch_combs[comb]->devices[dev].type == STARPU_CPU_WORKER)
+				fprintf(f, " ncores=\"%d\"",
+						arch_combs[comb]->devices[dev].ncores);
+			fprintf(f, "/>\n");
+		}
+		int nimpls = model->state->nimpls[comb];
+		for (impl = 0; impl < nimpls; impl++)
+		{
+			fprintf(f, "    <implementation id=\"%u\">\n", impl);
+			char archname[STR_SHORT_LENGTH];
+			starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  sizeof(archname), impl);
+			fprintf(f, "      <!-- %s -->\n", archname);
+			dump_per_arch_model_xml(f, model, comb, impl);
+			fprintf(f, "    </implementation>\n");
+		}
+		fprintf(f, "  </combination>\n");
+	}
+	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
+	fprintf(f, "</perfmodel>\n");
+}
+
 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
 {
 	int i;

+ 64 - 0
src/core/perfmodel/starpu-perfmodel.dtd

@@ -0,0 +1,64 @@
+<!--
+  StarPU - Runtime system for heterogeneous multicore architectures.
+  
+  Copyright (C) 2019                                     Université de Bordeaux
+  
+  StarPU is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or (at
+  your option) any later version.
+  
+  StarPU is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  
+  See the GNU Lesser General Public License in COPYING.LGPL for more details.
+  
+  This is the DTD for StarPU performance models.
+ -->
+
+<!ELEMENT perfmodel (combination)+>
+<!ATTLIST perfmodel version CDATA "">
+
+<!ELEMENT combination (device,implementation+)>
+
+<!ELEMENT device EMPTY>
+<!ATTLIST device type CDATA #REQUIRED>
+<!ATTLIST device id CDATA #REQUIRED>
+<!ATTLIST device ncores CDATA "">
+
+<!ELEMENT implementation (l_regression*,nl_regression*,ml_regression*,entry*)>
+<!ATTLIST implementation id CDATA #REQUIRED>
+
+<!ELEMENT l_regression EMPTY>
+<!ATTLIST l_regression sumlnx CDATA "">
+<!ATTLIST l_regression sumlnx2 CDATA "">
+<!ATTLIST l_regression sumlny CDATA "">
+<!ATTLIST l_regression sumlnxlny CDATA "">
+<!ATTLIST l_regression alpha CDATA #REQUIRED>
+<!ATTLIST l_regression beta CDATA #REQUIRED>
+<!ATTLIST l_regression nsample CDATA "">
+<!ATTLIST l_regression minx CDATA "">
+<!ATTLIST l_regression maxx CDATA "">
+
+<!ELEMENT nl_regression EMPTY>
+<!ATTLIST nl_regression a CDATA #REQUIRED>
+<!ATTLIST nl_regression b CDATA #REQUIRED>
+<!ATTLIST nl_regression c CDATA #REQUIRED>
+
+<!ELEMENT ml_regression (monomial+)>
+<!ATTLIST ml_regression constant CDATA #REQUIRED>
+
+<!ELEMENT monomial EMPTY>
+<!ATTLIST monomial name CDATA #REQUIRED>
+<!ATTLIST monomial coef CDATA #REQUIRED>
+
+<!ELEMENT entry EMPTY>
+<!ATTLIST entry footprint CDATA #REQUIRED>
+<!ATTLIST entry size CDATA #REQUIRED>
+<!ATTLIST entry flops CDATA "">
+<!ATTLIST entry mean CDATA #REQUIRED>
+<!ATTLIST entry deviation CDATA #REQUIRED>
+<!ATTLIST entry sum CDATA "">
+<!ATTLIST entry sum2 CDATA "">
+<!ATTLIST entry nsample CDATA "0">

+ 20 - 8
tools/starpu_perfmodel_display.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2014,2017                           Université de Bordeaux
+ * Copyright (C) 2009-2014,2017,2019                      Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -31,6 +31,8 @@
 
 #define PROGNAME "starpu_perfmodel_display"
 
+/* XML format */
+static int xml = 0;
 /* display all available models */
 static int plist = 0;
 /* display directory */
@@ -50,10 +52,11 @@ static void usage()
 	fprintf(stderr, "Display a given perfmodel\n\n");
 	fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME);
         fprintf(stderr, "\n");
-        fprintf(stderr, "One must specify either -l or -s\n");
+        fprintf(stderr, "One must specify either -l or -s. -x can be used with -s\n");
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
+	fprintf(stderr, "   -x                  display output in XML format\n");
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
@@ -84,7 +87,7 @@ static void parse_args(int argc, char **argv)
 	};
 
 	int option_index;
-	while ((c = getopt_long(argc, argv, "dls:p:a:f:h", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dls:p:a:f:hx", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -120,6 +123,11 @@ static void parse_args(int argc, char **argv)
 			pdirectory = 1;
 			break;
 
+		case 'x':
+			/* symbol */
+			xml = 1;
+			break;
+
 		case 'h':
 			usage();
 			exit(EXIT_SUCCESS);
@@ -169,12 +177,16 @@ int main(int argc, char **argv)
 			fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol);
 			return 1;
 		}
-		uint32_t *footprint = NULL;
-		if (pdisplay_specific_footprint == 1)
-		{
-			footprint = &pspecific_footprint;
+		if (xml) {
+			starpu_perfmodel_dump_xml(stdout, &model);
+		} else {
+			uint32_t *footprint = NULL;
+			if (pdisplay_specific_footprint == 1)
+			{
+				footprint = &pspecific_footprint;
+			}
+			starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
 		}
-		starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
 		starpu_perfmodel_unload_model(&model);
         }