浏览代码

Add XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml() function

Samuel Thibault 6 年之前
父节点
当前提交
73d45564a4

+ 2 - 0
ChangeLog

@@ -24,6 +24,8 @@ New features:
     tile or vector size without reallocating the buffer.
     tile or vector size without reallocating the buffer.
   * Application can change the allocation used by StarPU with
   * Application can change the allocation used by StarPU with
     starpu_malloc_set_hooks()
     starpu_malloc_set_hooks()
+  * XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml()
+    function
 
 
 StarPU 1.3.0 (svn revision xxxx)
 StarPU 1.3.0 (svn revision xxxx)
 ==============================================
 ==============================================

+ 5 - 1
doc/doxygen/chapters/370_online_performance_tools.doxy

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2011,2012,2016                           Inria
  * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014,2016,2018                 Université de Bordeaux
+ * Copyright (C) 2009-2011,2014,2016,2018-2019            Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -539,6 +539,10 @@ used to get the footprint used for indexing history-based performance
 models. starpu_task_destroy() needs to be called to destroy the dummy
 models. starpu_task_destroy() needs to be called to destroy the dummy
 task afterwards. See <c>tests/perfmodels/regression_based.c</c> for an example.
 task afterwards. See <c>tests/perfmodels/regression_based.c</c> for an example.
 
 
+The application can also request an on-the-fly XML report of the performance
+model, by calling starpu_perfmodel_dump_xml() to print the report to a
+<c>FILE*</c>.
+
 \section DataTrace Data trace and tasks length
 \section DataTrace Data trace and tasks length
 It is possible to get statistics about tasks length and data size by using :
 It is possible to get statistics about tasks length and data size by using :
 \verbatim
 \verbatim

+ 5 - 0
include/starpu_perfmodel.h

@@ -335,6 +335,11 @@ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
 void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
 
 
 /**
 /**
+  Dump performance model \p model to output stream \p output, in XML format.
+*/
+void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model);
+
+/**
    Free internal memory used for sampling directory
    Free internal memory used for sampling directory
    management. It should only be called by an application which is not
    management. It should only be called by an application which is not
    calling starpu_shutdown() as this function already calls it. See for
    calling starpu_shutdown() as this function already calls it. See for

+ 4 - 0
src/Makefile.am

@@ -23,6 +23,10 @@ CLEANFILES = *.gcno *.gcda *.linkinfo
 
 
 EXTRA_DIST = dolib.c
 EXTRA_DIST = dolib.c
 
 
+xml_DATA = $(srcdir)/core/perfmodel/starpu-perfmodel.dtd
+xmldir = $(pkgdatadir)
+EXTRA_DIST += core/perfmodel/starpu-perfmodel.dtd
+
 ldflags =
 ldflags =
 
 
 libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)
 libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)

+ 178 - 4
src/core/perfmodel/perfmodel_history.c

@@ -556,16 +556,17 @@ static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmode
 	}
 	}
 }
 }
 
 
-static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history)
+static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history, struct starpu_perfmodel *model)
 {
 {
 	unsigned nentries;
 	unsigned nentries;
+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
 
 
 	_starpu_drop_comments(f);
 	_starpu_drop_comments(f);
 
 
 	int res = fscanf(f, "%u\n", &nentries);
 	int res = fscanf(f, "%u\n", &nentries);
 	STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path);
 	STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path);
 
 
-	scan_reg_model(f, path, &per_arch_model->regression);
+	scan_reg_model(f, path, reg_model);
 
 
 	/* parse entries */
 	/* parse entries */
 	unsigned i;
 	unsigned i;
@@ -592,6 +593,20 @@ static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_p
 		if (scan_history)
 		if (scan_history)
 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
 	}
 	}
+
+	if (model && model->type == STARPU_PERFMODEL_INVALID)
+	{
+		/* Tool loading a perfmodel without having the corresponding codelet */
+		if (reg_model->ncoeff != 0)
+			model->type = STARPU_MULTIPLE_REGRESSION_BASED;
+		else if (!isnan(reg_model->a) && !isnan(reg_model->b) && !isnan(reg_model->c))
+			model->type = STARPU_NL_REGRESSION_BASED;
+		else if (!isnan(reg_model->alpha) && !isnan(reg_model->beta))
+			model->type = STARPU_REGRESSION_BASED;
+		else if (nentries)
+			model->type = STARPU_HISTORY_BASED;
+		/* else unknown, leave invalid */
+	}
 }
 }
 
 
 
 
@@ -623,7 +638,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 		{
 		{
 			struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
 			struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl];
 			model->state->per_arch_is_set[comb][impl] = 1;
 			model->state->per_arch_is_set[comb][impl] = 1;
-			parse_per_arch_model_file(f, path, per_arch_model, scan_history);
+			parse_per_arch_model_file(f, path, per_arch_model, scan_history, model);
 		}
 		}
 	}
 	}
 	else
 	else
@@ -634,7 +649,7 @@ static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model
 	/* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
 	/* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS
 	 * we skip the last implementation */
 	 * we skip the last implementation */
 	for (i = impl; i < nimpls; i++)
 	for (i = impl; i < nimpls; i++)
-		parse_per_arch_model_file(f, path, &dummy, 0);
+		parse_per_arch_model_file(f, path, &dummy, 0, NULL);
 }
 }
 
 
 static enum starpu_worker_archtype _get_enum_type(int type)
 static enum starpu_worker_archtype _get_enum_type(int type)
@@ -909,6 +924,165 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 }
 }
 #endif
 #endif
 
 
+static void dump_history_entry_xml(FILE *f, struct starpu_perfmodel_history_entry *entry)
+{
+	fprintf(f, "      <entry footprint=\"%08x\" size=\"%lu\" flops=\"%e\" mean=\"%e\" deviation=\"%e\" sum=\"%e\" sum2=\"%e\" nsample=\"%u\"/>\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
+}
+
+static void dump_reg_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, int impl)
+{
+	struct starpu_perfmodel_per_arch *per_arch_model;
+
+	per_arch_model = &model->state->per_arch[comb][impl];
+	struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression;
+
+	/*
+	 * Linear Regression model
+	 */
+
+	if (model->type == STARPU_REGRESSION_BASED)
+	{
+		fprintf(f, "      <!-- time = alpha size ^ beta -->\n");
+		fprintf(f, "      <l_regression sumlnx=\"%e\" sumlnx2=\"%e\" sumlny=\"%e\" sumlnxlny=\"%e\"", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny);
+		fprintf(f, " alpha=\"");
+		_starpu_write_double(f, "%e", reg_model->alpha);
+		fprintf(f, "\" beta=\"");
+		_starpu_write_double(f, "%e", reg_model->beta);
+		fprintf(f, "\" nsample=\"%u\" minx=\"%lu\" maxx=\"%lu\"/>\n", reg_model->nsample, reg_model->minx, reg_model->maxx);
+	}
+
+	/*
+	 * Non-Linear Regression model
+	 */
+
+	else if (model->type == STARPU_NL_REGRESSION_BASED)
+	{
+		fprintf(f, "      <!-- time = a size ^b + c -->\n");
+		fprintf(f, "      <nl_regression a=\"");
+		_starpu_write_double(f, "%e", reg_model->a);
+		fprintf(f, "\" b=\"");
+		_starpu_write_double(f, "%e", reg_model->b);
+		fprintf(f, "\" c=\"");
+		_starpu_write_double(f, "%e", reg_model->c);
+		fprintf(f, "\"/>\n");
+	}
+
+	else if (model->type == STARPU_MULTIPLE_REGRESSION_BASED)
+	{
+		if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL)
+			fprintf(f, "      <ml_regression constant=\"nan\"/>\n");
+		else
+		{
+			unsigned i;
+			fprintf(f, "      <ml_regression constant=\"%e\">\n", reg_model->coeff[0]);
+			for (i=0; i < model->ncombinations; i++)
+			{
+				fprintf(f, "        <monomial name=\"");
+				if (model->parameters_names == NULL)
+					fprintf(f, "c%u", i+1);
+				else
+				{
+					unsigned j;
+					int first=1;
+					for(j=0; j < model->nparameters; j++)
+					{
+						if (model->combinations[i][j] > 0)
+						{
+							if (first)
+								first=0;
+							else
+								fprintf(f, "*");
+
+							if(model->parameters_names[j] != NULL)
+								fprintf(f, "%s", model->parameters_names[j]);
+							else
+								fprintf(f, "P%u", j);
+
+							if (model->combinations[i][j] > 1)
+								fprintf(f, "^%d", model->combinations[i][j]);
+						}
+					}
+				}
+				fprintf(f, "\" coef=\"%e\"/>\n", reg_model->coeff[i+1]);
+			}
+			fprintf(f, "      </ml_regression>\n");
+		}
+	}
+}
+
+static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl)
+{
+	struct starpu_perfmodel_per_arch *per_arch_model;
+
+	per_arch_model = &model->state->per_arch[comb][impl];
+	/* count the number of elements in the lists */
+	struct starpu_perfmodel_history_list *ptr = NULL;
+
+	dump_reg_model_xml(f, model, comb, impl);
+
+	/* Dump the history into the model file in case it is necessary */
+	ptr = per_arch_model->list;
+	while (ptr)
+	{
+		dump_history_entry_xml(f, ptr->entry);
+		ptr = ptr->next;
+	}
+}
+
+void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
+{
+	fprintf(f, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+	fprintf(f, "<!DOCTYPE StarPUPerfmodel SYSTEM \"starpu-perfmodel.dtd\">\n");
+	fprintf(f, "<!-- symbol %s -->\n", model->symbol);
+	fprintf(f, "<!-- All times in us -->\n");
+	fprintf(f, "<perfmodel version=\"%u\">\n", _STARPU_PERFMODEL_VERSION);
+
+	STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock);
+	int ncombs = model->state->ncombs;
+	int i, impl, dev;
+
+	for(i = 0; i < ncombs; i++)
+	{
+		int comb = model->state->combs[i];
+		int ndevices = arch_combs[comb]->ndevices;
+
+		fprintf(f, "  <combination>\n");
+		for(dev = 0; dev < ndevices; dev++)
+		{
+			const char *type;
+			switch (arch_combs[comb]->devices[dev].type) {
+				case STARPU_CPU_WORKER: type = "CPU"; break;
+				case STARPU_CUDA_WORKER: type = "CUDA"; break;
+				case STARPU_OPENCL_WORKER: type = "OpenCL"; break;
+				case STARPU_MIC_WORKER: type = "MIC"; break;
+				case STARPU_SCC_WORKER: type = "SCC"; break;
+				case STARPU_MPI_MS_WORKER: type = "MPI_MS"; break;
+				default: STARPU_ASSERT(0);
+			}
+			fprintf(f, "    <device type=\"%s\" id=\"%d\"",
+					type,
+					arch_combs[comb]->devices[dev].devid);
+			if (arch_combs[comb]->devices[dev].type == STARPU_CPU_WORKER)
+				fprintf(f, " ncores=\"%d\"",
+						arch_combs[comb]->devices[dev].ncores);
+			fprintf(f, "/>\n");
+		}
+		int nimpls = model->state->nimpls[comb];
+		for (impl = 0; impl < nimpls; impl++)
+		{
+			fprintf(f, "    <implementation id=\"%u\">\n", impl);
+			char archname[STR_SHORT_LENGTH];
+			starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  sizeof(archname), impl);
+			fprintf(f, "      <!-- %s -->\n", archname);
+			dump_per_arch_model_xml(f, model, comb, impl);
+			fprintf(f, "    </implementation>\n");
+		}
+		fprintf(f, "  </combination>\n");
+	}
+	STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock);
+	fprintf(f, "</perfmodel>\n");
+}
+
 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
 void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb)
 {
 {
 	int i;
 	int i;

+ 64 - 0
src/core/perfmodel/starpu-perfmodel.dtd

@@ -0,0 +1,64 @@
+<!--
+  StarPU - Runtime system for heterogeneous multicore architectures.
+  
+  Copyright (C) 2019                                     Université de Bordeaux
+  
+  StarPU is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or (at
+  your option) any later version.
+  
+  StarPU is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+  
+  See the GNU Lesser General Public License in COPYING.LGPL for more details.
+  
+  This is the DTD for StarPU performance models.
+ -->
+
+<!ELEMENT perfmodel (combination)+>
+<!ATTLIST perfmodel version CDATA "">
+
+<!ELEMENT combination (device,implementation+)>
+
+<!ELEMENT device EMPTY>
+<!ATTLIST device type CDATA #REQUIRED>
+<!ATTLIST device id CDATA #REQUIRED>
+<!ATTLIST device ncores CDATA "">
+
+<!ELEMENT implementation (l_regression*,nl_regression*,ml_regression*,entry*)>
+<!ATTLIST implementation id CDATA #REQUIRED>
+
+<!ELEMENT l_regression EMPTY>
+<!ATTLIST l_regression sumlnx CDATA "">
+<!ATTLIST l_regression sumlnx2 CDATA "">
+<!ATTLIST l_regression sumlny CDATA "">
+<!ATTLIST l_regression sumlnxlny CDATA "">
+<!ATTLIST l_regression alpha CDATA #REQUIRED>
+<!ATTLIST l_regression beta CDATA #REQUIRED>
+<!ATTLIST l_regression nsample CDATA "">
+<!ATTLIST l_regression minx CDATA "">
+<!ATTLIST l_regression maxx CDATA "">
+
+<!ELEMENT nl_regression EMPTY>
+<!ATTLIST nl_regression a CDATA #REQUIRED>
+<!ATTLIST nl_regression b CDATA #REQUIRED>
+<!ATTLIST nl_regression c CDATA #REQUIRED>
+
+<!ELEMENT ml_regression (monomial+)>
+<!ATTLIST ml_regression constant CDATA #REQUIRED>
+
+<!ELEMENT monomial EMPTY>
+<!ATTLIST monomial name CDATA #REQUIRED>
+<!ATTLIST monomial coef CDATA #REQUIRED>
+
+<!ELEMENT entry EMPTY>
+<!ATTLIST entry footprint CDATA #REQUIRED>
+<!ATTLIST entry size CDATA #REQUIRED>
+<!ATTLIST entry flops CDATA "">
+<!ATTLIST entry mean CDATA #REQUIRED>
+<!ATTLIST entry deviation CDATA #REQUIRED>
+<!ATTLIST entry sum CDATA "">
+<!ATTLIST entry sum2 CDATA "">
+<!ATTLIST entry nsample CDATA "0">

+ 20 - 8
tools/starpu_perfmodel_display.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2009-2014,2017                           Université de Bordeaux
+ * Copyright (C) 2009-2014,2017,2019                      Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2011,2012                                Inria
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -31,6 +31,8 @@
 
 
 #define PROGNAME "starpu_perfmodel_display"
 #define PROGNAME "starpu_perfmodel_display"
 
 
+/* XML format */
+static int xml = 0;
 /* display all available models */
 /* display all available models */
 static int plist = 0;
 static int plist = 0;
 /* display directory */
 /* display directory */
@@ -50,10 +52,11 @@ static void usage()
 	fprintf(stderr, "Display a given perfmodel\n\n");
 	fprintf(stderr, "Display a given perfmodel\n\n");
 	fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME);
 	fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME);
         fprintf(stderr, "\n");
         fprintf(stderr, "\n");
-        fprintf(stderr, "One must specify either -l or -s\n");
+        fprintf(stderr, "One must specify either -l or -s. -x can be used with -s\n");
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -l                  display all available models\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
         fprintf(stderr, "   -s <symbol>         specify the symbol\n");
+	fprintf(stderr, "   -x                  display output in XML format\n");
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
@@ -84,7 +87,7 @@ static void parse_args(int argc, char **argv)
 	};
 	};
 
 
 	int option_index;
 	int option_index;
-	while ((c = getopt_long(argc, argv, "dls:p:a:f:h", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dls:p:a:f:hx", long_options, &option_index)) != -1)
 	{
 	{
 		switch (c)
 		switch (c)
 		{
 		{
@@ -120,6 +123,11 @@ static void parse_args(int argc, char **argv)
 			pdirectory = 1;
 			pdirectory = 1;
 			break;
 			break;
 
 
+		case 'x':
+			/* symbol */
+			xml = 1;
+			break;
+
 		case 'h':
 		case 'h':
 			usage();
 			usage();
 			exit(EXIT_SUCCESS);
 			exit(EXIT_SUCCESS);
@@ -169,12 +177,16 @@ int main(int argc, char **argv)
 			fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol);
 			fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol);
 			return 1;
 			return 1;
 		}
 		}
-		uint32_t *footprint = NULL;
+		if (xml) {
-		if (pdisplay_specific_footprint == 1)
+			starpu_perfmodel_dump_xml(stdout, &model);
-		{
+		} else {
-			footprint = &pspecific_footprint;
+			uint32_t *footprint = NULL;
+			if (pdisplay_specific_footprint == 1)
+			{
+				footprint = &pspecific_footprint;
+			}
+			starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
 		}
 		}
-		starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout);
 		starpu_perfmodel_unload_model(&model);
 		starpu_perfmodel_unload_model(&model);
         }
         }