6 years ago · ca2192b8bc
--- a/AUTHORS
+++ b/AUTHORS
@@ -21,6 +21,7 @@ Khorsi Yanis, Inria, <yanis.khorsi@inria.fr>
 
				 Lambert Thibaut, Inria, <thibaud.lambert@inria.fr>
			
 
				 Leria Erwan, University of Bordeaux, <erwan.leria@etu.u-bordeaux.fr>
			
 
				 Lizé Benoît, Airbus, <benoit.lize@gmail.com>
			
 
				+Makni Mariem, Inria, <mariem.makni@inria.fr>
			
 
				 Nakov Stojce, Inria, <stojce.nakov@inria.fr>
			
 
				 Namyst Raymond, University of Bordeaux, <raymond.namyst@labri.fr>
			
 
				 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
			
--- a/doc/doxygen/chapters/101_building.doxy
+++ b/doc/doxygen/chapters/101_building.doxy
@@ -504,8 +504,10 @@ multiplication using BLAS and cuBLAS. They output the obtained GFlops.
 
				 It can also be convenient to try simulated benchmarks, if you want to give a try
			
 
				 at CPU-GPU scheduling without actually having a GPU at hand. This can be done by
			
 
				 using the simgrid version of StarPU: first install the simgrid simulator from
			
 
				-http://simgrid.gforge.inria.fr/ (we tested with simgrid 3.11, 3.12 and 3.13, other versions
			
 
				-may have compatibility issues), then configure StarPU with \ref enable-simgrid
			
 
				+http://simgrid.gforge.inria.fr/ (we tested with simgrid from 3.11 to 3.16, and
			
 
				+3.18 to 3.22, other versions may have compatibility issues, 3.17 notably does
			
 
				+not build at all. MPI simulation does not work with version 3.22),
			
 
				+then configure StarPU with \ref enable-simgrid
			
 
				 "--enable-simgrid" and rebuild and install it, and then you can simulate the performance for a
			
 
				 few virtualized systems shipped along StarPU: attila, mirage, idgraf, and sirocco.
			
 
				 
			
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -166,20 +166,34 @@ be obtained from the machine power supplier.
 
				 The energy actually consumed by the total execution can be displayed by setting
			
 
				 <c>export STARPU_PROFILING=1 STARPU_WORKER_STATS=1</c> .
			
 
				 
			
 
				-On-line task consumption measurement is currently only supported through the
			
 
				+For OpenCL devices, on-line task consumption measurement is currently supported through the
			
 
				 <c>CL_PROFILING_POWER_CONSUMED</c> OpenCL extension, implemented in the MoviSim
			
 
				-simulator. Applications can however provide explicit measurements by
			
 
				-using the function starpu_perfmodel_update_history() (examplified in \ref PerformanceModelExample
			
 
				-with the <c>energy_model</c> performance model). Fine-grain
			
 
				-measurement is often not feasible with the feedback provided by the hardware, so
			
 
				-the user can for instance run a given task a thousand times, measure the global
			
 
				+simulator.
			
 
				+
			
 
				+For CUDA devices, on-line task consumption measurement is supported on V100
			
 
				+cards and beyond. This however only works for quite long tasks, since the
			
 
				+measurement granularity is about 10ms.
			
 
				+
			
 
				+Applications can however provide explicit measurements by using the function
			
 
				+starpu_perfmodel_update_history() (examplified in \ref PerformanceModelExample
			
 
				+with the <c>energy_model</c> performance model). Fine-grain measurement
			
 
				+is often not feasible with the feedback provided by the hardware, so the
			
 
				+user can for instance run a given task a thousand times, measure the global
			
 
				 consumption for that series of tasks, divide it by a thousand, repeat for
			
 
				-varying kinds of tasks and task sizes, and eventually feed StarPU
			
 
				-with these manual measurements through starpu_perfmodel_update_history().
			
 
				-For instance, for CUDA devices, <c>nvidia-smi -q -d POWER</c> can be used to get
			
 
				-the current consumption in Watt. Multiplying this value by the average duration
			
 
				-of a single task gives the consumption of the task in Joules, which can be given
			
 
				-to starpu_perfmodel_update_history().
			
 
				+varying kinds of tasks and task sizes, and eventually feed StarPU with these
			
 
				+manual measurements through starpu_perfmodel_update_history().  For instance,
			
 
				+for CUDA devices, <c>nvidia-smi -q -d POWER</c> can be used to get the current
			
 
				+consumption in Watt. Multiplying this value by the average duration of a
			
 
				+single task gives the consumption of the task in Joules, which can be given to
			
 
				+starpu_perfmodel_update_history().
			
 
				+
			
 
				+Another way to provide the energy performance is to define a
			
 
				+perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH, and set the
			
 
				+starpu_perfmodel::arch_cost_function field to a function which shall return the
			
 
				+estimated consumption of the task in Joules. Such a function can for instance
			
 
				+use starpu_task_expected_length() on the task (in µs), multiplied by the
			
 
				+typical power consumption of the device, e.g. in W, and divided by 1000000. to
			
 
				+get Joules.
			
 
				 
			
 
				 \section ExistingModularizedSchedulers Modularized Schedulers
			
 
				 
			
--- a/doc/doxygen/chapters/470_simgrid.doxy
+++ b/doc/doxygen/chapters/470_simgrid.doxy
@@ -16,6 +16,10 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * NOTE: XXX: also update simgrid versions in 101_building.doxy !!
			
 
				+ */
			
 
				+
			
 
				 /*! \page SimGridSupport SimGrid Support
			
 
				 
			
 
				 StarPU can use Simgrid in order to simulate execution on an arbitrary
			
--- a/examples/mult/xgemm.c
+++ b/examples/mult/xgemm.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009-2017                                Université de Bordeaux
			
 
				+ * Copyright (C) 2009-2017, 2019                                Université de Bordeaux
			
 
				  * Copyright (C) 2012,2013                                Inria
			
 
				  * Copyright (C) 2017                                     Erwan Leria
			
 
				  * Copyright (C) 2010                                     Mehdi Juhoor
			
@@ -269,6 +269,12 @@ static void parse_args(int argc, char **argv)
 
				 			xdim = strtol(argv[++i], &argptr, 10);
			
 
				 		}
			
 
				 
			
 
				+		else if (strcmp(argv[i], "-xy") == 0)
			
 
				+		{
			
 
				+			char *argptr;
			
 
				+			xdim = ydim = strtol(argv[++i], &argptr, 10);
			
 
				+		}
			
 
				+
			
 
				 		else if (strcmp(argv[i], "-y") == 0)
			
 
				 		{
			
 
				 			char *argptr;
			
@@ -310,7 +316,7 @@ static void parse_args(int argc, char **argv)
 
				 
			
 
				 		else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
			
 
				 		{
			
 
				-			fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-z z] [-size size] [-iter iter] [-bound] [-check] [-spmd]\n", argv[0]);
			
 
				+			fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-xy n] [-z z] [-size size] [-iter iter] [-bound] [-check] [-spmd]\n", argv[0]);
			
 
				 			fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks, %u iterations\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, niter);
			
 
				 			exit(EXIT_SUCCESS);
			
 
				 		}
			
--- a/mpi/examples/user_datatype/my_interface.c
+++ b/mpi/examples/user_datatype/my_interface.c
@@ -111,8 +111,6 @@ static void data_register_data_handle(starpu_data_handle_t handle, unsigned home
 
				 {
			
 
				 	struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface;
			
 
				 
			
 
				-	struct starpu_my_data *data = (struct starpu_my_data *)my_data_interface->ptr;
			
 
				-
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
--- a/mpi/src/starpu_mpi_cache.c
+++ b/mpi/src/starpu_mpi_cache.c
@@ -98,7 +98,6 @@ void _starpu_mpi_cache_shutdown()
 
				 	}
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex);
			
 
				 	STARPU_PTHREAD_MUTEX_DESTROY(&_cache_mutex);
			
 
				-	free(_cache_data);
			
 
				 	_starpu_mpi_cache_stats_shutdown();
			
 
				 }
			
 
				 
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -742,7 +742,7 @@ static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsig
 
				 	struct starpu_perfmodel_history_list *ptr = NULL;
			
 
				 	unsigned nentries = 0;
			
 
				 
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED  || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		/* Dump the list of all entries in the history */
			
 
				 		ptr = per_arch_model->list;
			
@@ -760,7 +760,7 @@ static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsig
 
				 	check_reg_model(model, comb, impl);
			
 
				 
			
 
				 	/* Dump the history into the model file in case it is necessary */
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		ptr = per_arch_model->list;
			
 
				 		while (ptr)
			
@@ -779,7 +779,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 
				 	struct starpu_perfmodel_history_list *ptr = NULL;
			
 
				 	unsigned nentries = 0;
			
 
				 
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+       if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		/* Dump the list of all entries in the history */
			
 
				 		ptr = per_arch_model->list;
			
@@ -800,7 +800,7 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, in
 
				 	dump_reg_model(f, model, comb, impl);
			
 
				 
			
 
				 	/* Dump the history into the model file in case it is necessary */
			
 
				-	if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+       if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 	{
			
 
				 		fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us)\tdev (us)\tsum\t\tsum2\t\tn\n");
			
 
				 		ptr = per_arch_model->list;
			
@@ -1046,7 +1046,7 @@ void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model)
 
				 		int nimpls = model->state->nimpls[comb];
			
 
				 		for (impl = 0; impl < nimpls; impl++)
			
 
				 		{
			
 
				-			fprintf(f, "    <implementation id=\"%u\">\n", impl);
			
 
				+			fprintf(f, "    <implementation id=\"%d\">\n", impl);
			
 
				 			char archname[STR_SHORT_LENGTH];
			
 
				 			starpu_perfmodel_get_arch_name(arch_combs[comb], archname,  sizeof(archname), impl);
			
 
				 			fprintf(f, "      <!-- %s -->\n", archname);
			
@@ -1410,25 +1410,27 @@ int starpu_perfmodel_list(FILE *output)
 
				 {
			
 
				 #if !defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__)
			
 
				         char *path;
			
 
				-        DIR *dp;
			
 
				+	struct dirent **list;
			
 
				+	int n;
			
 
				 
			
 
				 	path = _starpu_get_perf_model_dir_codelet();
			
 
				-        dp = opendir(path);
			
 
				-        if (dp != NULL)
			
 
				-	{
			
 
				-		struct dirent *ep;
			
 
				-                while ((ep = readdir(dp)))
			
 
				-		{
			
 
				-                        if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
			
 
				-                                fprintf(output, "file: <%s>\n", ep->d_name);
			
 
				-                }
			
 
				-                closedir (dp);
			
 
				-        }
			
 
				-        else
			
 
				+	n = scandir(path, &list, NULL, alphasort);
			
 
				+	if (n < 0)
			
 
				 	{
			
 
				 		_STARPU_DISP("Could not open the perfmodel directory <%s>: %s\n", path, strerror(errno));
			
 
				-        }
			
 
				-	return 0;
			
 
				+		return 1;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		int i;
			
 
				+		for (i = 0; i < n; i++) {
			
 
				+			if (strcmp(list[i]->d_name, ".") && strcmp(list[i]->d_name, ".."))
			
 
				+				fprintf(output, "file: <%s>\n", list[i]->d_name);
			
 
				+			free(list[i]);
			
 
				+		}
			
 
				+		free(list);
			
 
				+		return 0;
			
 
				+	}
			
 
				 #else
			
 
				 	_STARPU_MSG("Listing perfmodels is not implemented on pure Windows yet\n");
			
 
				 	return 1;
			
@@ -1859,7 +1861,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			model->state->per_arch_is_set[comb][impl] = 1;
			
 
				 		}
			
 
				 
			
 
				-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED)
			
 
				 		{
			
 
				 			struct starpu_perfmodel_history_entry *entry;
			
 
				 			struct starpu_perfmodel_history_table *elt;
			
--- a/src/debug/traces/anim.c
+++ b/src/debug/traces/anim.c
@@ -409,10 +409,13 @@ void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options
 
				 	}
			
 
				 	else
			
 
				 		nflowing++;
			
 
				-	if (prio)
			
 
				-		to_p->npriotasks++;
			
 
				-	else
			
 
				-		to_p->ntasks++;
			
 
				+	if (to_p)
			
 
				+	{
			
 
				+		if (prio)
			
 
				+			to_p->npriotasks++;
			
 
				+		else
			
 
				+			to_p->ntasks++;
			
 
				+	}
			
 
				 
			
 
				 	// fprintf(stderr,"push from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none");
			
 
				 	fxt_component_print_step(output, options, timestamp, workerid, 1, from_p, to_p);
			
@@ -435,10 +438,13 @@ void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options
 
				 		COMPONENT_FIND(components, to, to_p);
			
 
				 		STARPU_ASSERT(to_p);
			
 
				 	}
			
 
				-	if (prio)
			
 
				-		from_p->npriotasks--;
			
 
				-	else
			
 
				-		from_p->ntasks--;
			
 
				+	if (from_p)
			
 
				+	{
			
 
				+		if (prio)
			
 
				+			from_p->npriotasks--;
			
 
				+		else
			
 
				+			from_p->ntasks--;
			
 
				+	}
			
 
				 	if (to_p)
			
 
				 	{
			
 
				 		if (prio)
			
--- a/src/util/starpu_task_insert.c
+++ b/src/util/starpu_task_insert.c
@@ -142,7 +142,7 @@ int _starpu_task_insert_v(struct starpu_codelet *cl, va_list varg_list)
 
				 
			
 
				 	if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				 	{
			
 
				-		_STARPU_MSG("submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				+		_STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				 			    task, task->cl,
			
 
				 			    (cl == NULL) ? "none" :
			
 
				 			    task->cl->name ? task->cl->name :
			
--- a/tests/datawizard/partition_init.c
+++ b/tests/datawizard/partition_init.c
@@ -27,7 +27,7 @@ void my_func(void *buffers[], void *cl_arg)
 
				 	for(i=0 ; i<nb ; i++)
			
 
				 	{
			
 
				 		v[i] = i+42;
			
 
				-		FPRINTF(stderr, "setting v[%d] to %d\n",i, v[i]);
			
 
				+		FPRINTF(stderr, "setting v[%u] to %d\n",i, v[i]);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -44,7 +44,7 @@ void display_func(void *buffers[], void *cl_arg)
 
				         int *v = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				 
			
 
				 	unsigned i;
			
 
				-	for(i=0 ; i<nb ; i++) FPRINTF(stderr, "v[%d] = %d\n", i, v[i]);
			
 
				+	for(i=0 ; i<nb ; i++) FPRINTF(stderr, "v[%u] = %d\n", i, v[i]);
			
 
				 }
			
 
				 
			
 
				 struct starpu_codelet display_codelet =
			
--- a/tests/parallel_tasks/parallel_kernels_trivial.c
+++ b/tests/parallel_tasks/parallel_kernels_trivial.c
@@ -94,8 +94,6 @@ int main(void)
 
				 	starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned));
			
 
				 	starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned));
			
 
				 
			
 
				-	unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count();
			
 
				-
			
 
				 	/* First submit a sequential task */
			
 
				 	ret = starpu_task_insert(&cl_seq, STARPU_R, v_handle, 0);
			
 
				 	if (ret == -ENODEV) goto enodev;