Browse Source

Merge branch 'master' of git+ssh://scm.gforge.inria.fr/gitroot/starpu/starpu

Samuel Thibault 6 years ago
parent
commit
4c9baf958b

+ 1 - 0
AUTHORS

@@ -21,6 +21,7 @@ Khorsi Yanis, Inria, <yanis.khorsi@inria.fr>
 Lambert Thibaut, Inria, <thibaud.lambert@inria.fr>
 Lambert Thibaut, Inria, <thibaud.lambert@inria.fr>
 Leria Erwan, University of Bordeaux, <erwan.leria@etu.u-bordeaux.fr>
 Leria Erwan, University of Bordeaux, <erwan.leria@etu.u-bordeaux.fr>
 Lizé Benoît, Airbus, <benoit.lize@gmail.com>
 Lizé Benoît, Airbus, <benoit.lize@gmail.com>
+Makni Mariem, Inria, <mariem.makni@inria.fr>
 Nakov Stojce, Inria, <stojce.nakov@inria.fr>
 Nakov Stojce, Inria, <stojce.nakov@inria.fr>
 Namyst Raymond, University of Bordeaux, <raymond.namyst@labri.fr>
 Namyst Raymond, University of Bordeaux, <raymond.namyst@labri.fr>
 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>

+ 4 - 2
doc/doxygen/chapters/101_building.doxy

@@ -504,8 +504,10 @@ multiplication using BLAS and cuBLAS. They output the obtained GFlops.
 It can also be convenient to try simulated benchmarks, if you want to give a try
 It can also be convenient to try simulated benchmarks, if you want to give a try
 at CPU-GPU scheduling without actually having a GPU at hand. This can be done by
 at CPU-GPU scheduling without actually having a GPU at hand. This can be done by
 using the simgrid version of StarPU: first install the simgrid simulator from
 using the simgrid version of StarPU: first install the simgrid simulator from
-http://simgrid.gforge.inria.fr/ (we tested with simgrid 3.11, 3.12 and 3.13, other versions
-may have compatibility issues), then configure StarPU with \ref enable-simgrid
+http://simgrid.gforge.inria.fr/ (we tested with simgrid from 3.11 to 3.16, and
+3.18 to 3.22, other versions may have compatibility issues, 3.17 notably does
+not build at all. MPI simulation does not work with version 3.22),
+then configure StarPU with \ref enable-simgrid
 "--enable-simgrid" and rebuild and install it, and then you can simulate the performance for a
 "--enable-simgrid" and rebuild and install it, and then you can simulate the performance for a
 few virtualized systems shipped along StarPU: attila, mirage, idgraf, and sirocco.
 few virtualized systems shipped along StarPU: attila, mirage, idgraf, and sirocco.
 
 

+ 26 - 12
doc/doxygen/chapters/320_scheduling.doxy

@@ -166,20 +166,34 @@ be obtained from the machine power supplier.
 The energy actually consumed by the total execution can be displayed by setting
 The energy actually consumed by the total execution can be displayed by setting
 <c>export STARPU_PROFILING=1 STARPU_WORKER_STATS=1</c> .
 <c>export STARPU_PROFILING=1 STARPU_WORKER_STATS=1</c> .
 
 
-On-line task consumption measurement is currently only supported through the
+For OpenCL devices, on-line task consumption measurement is currently supported through the
 <c>CL_PROFILING_POWER_CONSUMED</c> OpenCL extension, implemented in the MoviSim
 <c>CL_PROFILING_POWER_CONSUMED</c> OpenCL extension, implemented in the MoviSim
-simulator. Applications can however provide explicit measurements by
-using the function starpu_perfmodel_update_history() (examplified in \ref PerformanceModelExample
-with the <c>energy_model</c> performance model). Fine-grain
-measurement is often not feasible with the feedback provided by the hardware, so
-the user can for instance run a given task a thousand times, measure the global
+simulator.
+
+For CUDA devices, on-line task consumption measurement is supported on V100
+cards and beyond. This however only works for quite long tasks, since the
+measurement granularity is about 10ms.
+
+Applications can however provide explicit measurements by using the function
+starpu_perfmodel_update_history() (examplified in \ref PerformanceModelExample
+with the <c>energy_model</c> performance model). Fine-grain measurement
+is often not feasible with the feedback provided by the hardware, so the
+user can for instance run a given task a thousand times, measure the global
 consumption for that series of tasks, divide it by a thousand, repeat for
 consumption for that series of tasks, divide it by a thousand, repeat for
-varying kinds of tasks and task sizes, and eventually feed StarPU
-with these manual measurements through starpu_perfmodel_update_history().
-For instance, for CUDA devices, <c>nvidia-smi -q -d POWER</c> can be used to get
-the current consumption in Watt. Multiplying this value by the average duration
-of a single task gives the consumption of the task in Joules, which can be given
-to starpu_perfmodel_update_history().
+varying kinds of tasks and task sizes, and eventually feed StarPU with these
+manual measurements through starpu_perfmodel_update_history().  For instance,
+for CUDA devices, <c>nvidia-smi -q -d POWER</c> can be used to get the current
+consumption in Watt. Multiplying this value by the average duration of a
+single task gives the consumption of the task in Joules, which can be given to
+starpu_perfmodel_update_history().
+
+Another way to provide the energy performance is to define a
+perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH, and set the
+starpu_perfmodel::arch_cost_function field to a function which shall return the
+estimated consumption of the task in Joules. Such a function can for instance
+use starpu_task_expected_length() on the task (in µs), multiplied by the
+typical power consumption of the device, e.g. in W, and divided by 1000000. to
+get Joules.
 
 
 \section ExistingModularizedSchedulers Modularized Schedulers
 \section ExistingModularizedSchedulers Modularized Schedulers
 
 

+ 4 - 0
doc/doxygen/chapters/470_simgrid.doxy

@@ -16,6 +16,10 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
+/*
+ * NOTE: XXX: also update simgrid versions in 101_building.doxy !!
+ */
+
 /*! \page SimGridSupport SimGrid Support
 /*! \page SimGridSupport SimGrid Support
 
 
 StarPU can use Simgrid in order to simulate execution on an arbitrary
 StarPU can use Simgrid in order to simulate execution on an arbitrary

+ 5 - 5
src/core/perfmodel/perfmodel_history.c

@@ -742,7 +742,7 @@ static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsig
 	struct starpu_perfmodel_history_list *ptr = NULL;
 	struct starpu_perfmodel_history_list *ptr = NULL;
 	unsigned nentries = 0;
 	unsigned nentries = 0;
 
 
-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED  || model->type == STARPU_REGRESSION_BASED)
 	{
 	{
 		/* Dump the list of all entries in the history */
 		/* Dump the list of all entries in the history */
 		ptr = per_arch_model->list;
 		ptr = per_arch_model->list;
@@ -760,7 +760,7 @@ static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsig
 	check_reg_model(model, comb, impl);
 	check_reg_model(model, comb, impl);
 
 
 	/* Dump the history into the model file in case it is necessary */
 	/* Dump the history into the model file in case it is necessary */
-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
 	{
 	{
 		ptr = per_arch_model->list;
 		ptr = per_arch_model->list;
 		while (ptr)
 		while (ptr)
@@ -779,7 +779,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 	struct starpu_perfmodel_history_list *ptr = NULL;
 	struct starpu_perfmodel_history_list *ptr = NULL;
 	unsigned nentries = 0;
 	unsigned nentries = 0;
 
 
-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+       if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
 	{
 	{
 		/* Dump the list of all entries in the history */
 		/* Dump the list of all entries in the history */
 		ptr = per_arch_model->list;
 		ptr = per_arch_model->list;
@@ -800,7 +800,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 	dump_reg_model(f, model, comb, impl);
 	dump_reg_model(f, model, comb, impl);
 
 
 	/* Dump the history into the model file in case it is necessary */
 	/* Dump the history into the model file in case it is necessary */
-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+       if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
 	{
 	{
 		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\tsum\t\tsum2\t\tn\n");
 		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\tsum\t\tsum2\t\tn\n");
 		ptr = per_arch_model->list;
 		ptr = per_arch_model->list;
@@ -1861,7 +1861,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			model->state->per_arch_is_set[comb][impl] = 1;
 			model->state->per_arch_is_set[comb][impl] = 1;
 		}
 		}
 
 
-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
 		{
 		{
 			struct starpu_perfmodel_history_entry *entry;
 			struct starpu_perfmodel_history_entry *entry;
 			struct starpu_perfmodel_history_table *elt;
 			struct starpu_perfmodel_history_table *elt;