Selaa lähdekoodia

Merge branch 'master' into starpurm

Olivier Aumage 7 vuotta sitten
vanhempi
commit
9c4194f8ce

+ 9 - 0
configure.ac

@@ -1327,10 +1327,19 @@ if test x$enable_cuda = xyes; then
 		NVCCFLAGS="${NVCCFLAGS} -m64"
 	fi
 
+	SAVED_CPPFLAGS="${CPPFLAGS}"
+	CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}"
+	SAVED_LDFLAGS="${LDFLAGS}"
+	LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}"
 	AC_CHECK_HEADERS([cuda_gl_interop.h])
 
 	AC_CHECK_LIB([cusparse], [cusparseCreate])
 	AC_CHECK_DECLS([cusparseSetStream], [], [], [[#include <cusparse.h>]])
+
+	AC_CHECK_HEADER([nvml.h],
+	  [AC_CHECK_LIB([nvidia-ml], [nvmlDeviceGetTotalEnergyConsumption])])
+        CPPFLAGS="${SAVED_CPPFLAGS}"
+	LDFLAGS="${SAVED_LDFLAGS}"
 fi
 
 dnl Hey dude, are you around?

+ 132 - 0
contrib/ci.inria.fr/Jenkinsfile-basic

@@ -0,0 +1,132 @@
+#!groovy
+// StarPU --- Runtime system for heterogeneous multicore architectures.
+//
+// Copyright (C) 2018                                CNRS
+//
+// StarPU is free software; you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation; either version 2.1 of the License, or (at
+// your option) any later version.
+//
+// StarPU is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See the GNU Lesser General Public License in COPYING.LGPL for more details.
+//
+
+def statusHasChanged = false
+
+pipeline
+{
+	agent none
+
+	// Trigger the build
+	triggers
+	{
+		// Poll gitlab explicitly every 15mn
+		pollSCM('00-59/15 * * * *')
+	}
+
+	stages
+	{
+		stage('Tarball')
+		{
+			steps
+			{
+				node('autotools')
+				{
+					checkout scm
+					sh 'contrib/ci.inria.fr/job-0-tarball.sh'
+					script
+					{
+					       env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim()
+					}
+					stash includes: "${env.tarballgz}", name: 'tarballgz'
+					stash includes: "starpu.pdf", name: 'doc'
+					// Stash those scripts because they are not in make dist
+					dir('contrib/ci.inria.fr')
+					{
+						stash includes: "job-1-check.sh", name: 'script-unix-check'
+					}
+					archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf", fingerprint: true, onlyIfSuccessful: true
+					deleteDir()
+
+				}
+			}
+		}
+		stage('Check')
+		{
+			steps
+			{
+				script
+				{
+					labelToSelect = 'unix'
+					listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect
+					{
+						node -> node.getLabelString().contains(labelToSelect) ? node.name : null
+					}
+					listOfNodeNames.removeAll(Collections.singleton(null))
+
+					def p = listOfNodeNames.collectEntries
+					{
+						[ (it):
+						{
+							node(it)
+							{
+								dir('check-unix')
+								{
+									unstash 'tarballgz'
+									unstash 'script-unix-check'
+									sh 'chmod 755 job-1-check.sh && ./job-1-check.sh'
+									deleteDir()
+								}
+							}
+						}
+					]}
+					parallel p;
+				}
+			}
+		}
+	}
+
+	post
+	{
+		// hooks are called in order: always, changed, aborted, failure, success, unstable
+		changed
+		{
+			echo "Build status has changed."
+			script
+			{
+
+				statusHasChanged = true
+			}
+		}
+		success
+		{
+			echo "Build success."
+			// email when changed to success
+			script
+			{
+				if (statusHasChanged)
+				{
+					emailext(body: '${DEFAULT_CONTENT}',
+						 subject: '${DEFAULT_SUBJECT}',
+						 replyTo: '$DEFAULT_REPLYTO',
+						 to: '$DEFAULT_RECIPIENTS',
+						 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+				}
+			}
+		}
+		failure
+		{
+			echo "Build failure."
+			// always email on failure
+			emailext(body: '${DEFAULT_CONTENT}',
+				 subject: '${DEFAULT_SUBJECT}',
+				 replyTo: '$DEFAULT_REPLYTO',
+				 to: '$DEFAULT_RECIPIENTS',
+				 recipientProviders: [[$class: 'CulpritsRecipientProvider'],[$class: 'RequesterRecipientProvider']])
+		}
+	}
+}

+ 30 - 0
contrib/ci.inria.fr/job-0-tarball.sh

@@ -0,0 +1,30 @@
+#!/bin/sh
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2018                                CNRS
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+export PKG_CONFIG_PATH=/home/ci/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
+export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH
+
+./autogen.sh
+if test -d build ; then chmod -R 777 build && rm -rf build ; fi
+mkdir build && cd build
+../configure
+make V=1
+make dist
+cp *gz ..
+cp doc/doxygen/starpu.pdf ..
+make clean
+

+ 85 - 0
contrib/ci.inria.fr/job-1-check.sh

@@ -0,0 +1,85 @@
+#!/bin/sh
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2013-2018                                CNRS
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+#
+
+set -e
+set -x
+
+export PKG_CONFIG_PATH=/home/ci/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
+export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH
+
+tarball=$(ls -tr starpu-*.tar.gz | tail -1)
+
+if test -z "$tarball"
+then
+    echo Error. No tar.gz file
+    ls
+    pwd
+    exit 1
+fi
+
+basename=$(basename $tarball .tar.gz)
+export STARPU_HOME=$PWD/$basename/home
+mkdir -p $basename
+cd $basename
+env > $PWD/env
+
+test -d $basename && chmod -R u+rwX $basename && rm -rf $basename
+tar xfz ../$tarball
+cd $basename
+mkdir build
+cd build
+
+STARPU_CONFIGURE_OPTIONS=""
+suname=$(uname)
+if test "$suname" == "Darwin"
+then
+    STARPU_CONFIGURE_OPTIONS="--without-hwloc"
+fi
+if test "$suname" == "OpenBSD"
+then
+    STARPU_CONFIGURE_OPTIONS="--without-hwloc --disable-mlr"
+fi
+if test "$suname" == "FreeBSD"
+then
+    STARPU_CONFIGURE_OPTIONS="--disable-fortran"
+fi
+
+export CC=gcc
+
+day=$(date +%u)
+if test $day -le 5
+then
+    ../configure --enable-quick-check --enable-verbose --enable-mpi-check --disable-build-doc $STARPU_CONFIGURE_OPTIONS
+else
+    ../configure --enable-long-check --enable-verbose --enable-mpi-check --disable-build-doc $STARPU_CONFIGURE_OPTIONS
+fi
+
+make
+#make check
+(make -k check || true) > ../check_$$ 2>&1
+cat ../check_$$
+make showcheck
+
+grep "^FAIL:" ../check_$$ || true
+
+make clean
+
+grep "^FAIL:" ../check_$$ || true
+
+echo "Running on $(uname -a)"
+exit $(grep "^FAIL:" ../check_$$ | wc -l)
+

+ 3 - 3
doc/doxygen/chapters/api/profiling.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014,2016, 2018                      Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -72,7 +72,7 @@ profiling was enabled.
     Number of cycles stalled within the task, only available in the MoviSim
 
 \var double starpu_profiling_task_info::energy_consumed
-Energy consumed by the task, only available in the MoviSim
+Energy consumed by the task, in Joules
 
 \struct starpu_profiling_worker_info
 This structure contains the profiling information associated to
@@ -94,7 +94,7 @@ starpu_profiling_worker_get_info()
 \var uint64_t starpu_profiling_worker_info::stall_cycles
         Number of cycles stalled within the worker, only available in the MoviSim
 \var double starpu_profiling_worker_info::energy_consumed
-        Energy consumed by the worker, only available in the MoviSim
+        Energy consumed by the worker, in Joules
 
 \struct starpu_profiling_bus_info
 todo

+ 2 - 1
examples/Makefile.am

@@ -3,7 +3,7 @@
 # Copyright (C) 2011-2017                                Inria
 # Copyright (C) 2017                                     Erwan Leria
 # Copyright (C) 2009-2018                                Université de Bordeaux
-# Copyright (C) 2010-2015,2017                           CNRS
+# Copyright (C) 2010-2015,2017,2018                           CNRS
 # Copyright (C) 2011                                     Télécom-SudParis
 # Copyright (C) 2016                                     Uppsala University
 #
@@ -227,6 +227,7 @@ STARPU_EXAMPLES +=				\
 	filters/fmultiple_submit		\
 	filters/fmultiple_submit_readonly	\
 	filters/fmultiple_submit_implicit	\
+	filters/frecursive			\
 	tag_example/tag_example			\
 	tag_example/tag_example2		\
 	tag_example/tag_example3		\

+ 170 - 0
examples/filters/frecursive.c

@@ -0,0 +1,170 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+void cpu_codelet(void *buffers[], void *cl_arg)
+{
+        unsigned i, j;
+        int factor;
+
+	starpu_codelet_unpack_args(cl_arg, &factor, 0);
+        /* length of the matrix */
+        unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]);
+        unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]);
+        unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]);
+        /* local copy of the matrix pointer */
+        int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]);
+
+	FPRINTF(stderr, "computing on matrix with nx=%d, ny=%d, ld=%d\n", nx, ny, ld);
+        for(j=0; j<ny ; j++)
+	{
+                for(i=0; i<nx ; i++)
+                        val[(j*ld)+i] *= factor;
+        }
+}
+
+static struct starpu_codelet cl =
+{
+        .cpu_funcs[0] = cpu_codelet,
+        .nbuffers = 1,
+	.modes[0] = STARPU_RW,
+};
+
+#define NX 400
+#define NY 80
+#define LD NX
+#define PARTS 4
+
+int main(void)
+{
+        int *matrix;
+	starpu_data_handle_t matrix_handle;
+	starpu_data_handle_t subhandle_l1[PARTS];
+	starpu_data_handle_t subhandle_l2[PARTS][PARTS];
+	starpu_data_handle_t subhandle_l3[PARTS][PARTS][PARTS];
+	int ret, submit;
+
+	int factor = 12;
+	int n=1;
+	int i,j,k;
+
+        ret = starpu_init(NULL);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		return 77;
+	}
+
+	if (starpu_cpu_worker_get_count() < 1)
+	{
+		FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+		starpu_shutdown();
+		return 77;
+	}
+
+	matrix = (int*)malloc(NX * NY * sizeof(int));
+        assert(matrix);
+	starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, LD, NX, NY, sizeof(int));
+
+        for(j=0 ; j<NY ; j++)
+	{
+                for(i=0 ; i<NX ; i++)
+		{
+                        matrix[(j*LD)+i] = n++;
+                }
+        }
+
+	/* Split the matrix in PARTS sub-matrices, each sub-matrix in PARTS sub-sub-matrices, and each sub-sub matrix in PARTS sub-sub-sub-matrices */
+	struct starpu_data_filter f =
+	{
+		.filter_func = starpu_matrix_filter_block,
+		.nchildren = PARTS
+	};
+	struct starpu_data_filter f2 =
+	{
+		.filter_func = starpu_matrix_filter_vertical_block,
+		.nchildren = PARTS
+	};
+	starpu_data_partition_plan(matrix_handle, &f, subhandle_l1);
+	for(i=0 ; i<PARTS ; i++)
+	{
+		starpu_data_partition_plan(subhandle_l1[i], &f2, subhandle_l2[i]);
+		for(j=0 ; j<PARTS ; j++)
+		{
+			starpu_data_partition_plan(subhandle_l2[i][j], &f, subhandle_l3[i][j]);
+		}
+	}
+
+        /* Submit a task on the first sub-matrix and sub-sub matrix, and on all others sub-sub-matrices */
+	ret = starpu_task_insert(&cl,
+				 STARPU_RW, subhandle_l1[0],
+				 STARPU_VALUE, &factor, sizeof(factor),
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+	for (i=1; i<PARTS; i++)
+	{
+		ret = starpu_task_insert(&cl,
+					 STARPU_RW, subhandle_l2[i][0],
+					 STARPU_VALUE, &factor, sizeof(factor),
+					 0);
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+		for (j=1; j<PARTS; j++)
+		{
+			for (k=0; k<PARTS; k++)
+			{
+				ret = starpu_task_insert(&cl,
+							 STARPU_RW, subhandle_l3[i][j][k],
+							 STARPU_VALUE, &factor, sizeof(factor),
+							 0);
+				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+			}
+		}
+	}
+
+	for(i=0 ; i<PARTS ; i++)
+	{
+		for(j=0 ; j<PARTS ; j++)
+		{
+			starpu_data_partition_clean(subhandle_l2[i][j], PARTS, subhandle_l3[i][j]);
+
+		}
+		starpu_data_partition_clean(subhandle_l1[i], PARTS, subhandle_l2[i]);
+	}
+	starpu_data_partition_clean(matrix_handle, PARTS, subhandle_l1);
+	starpu_data_unregister(matrix_handle);
+
+	/* Print result matrix */
+	n=1;
+	for(j=0 ; j<NY ; j++)
+	{
+		for(i=0 ; i<NX ; i++)
+		{
+			if (matrix[(j*LD)+i] != (int) n*12)
+			{
+				FPRINTF(stderr, "Incorrect result %4d != %4d", matrix[(j*LD)+i], n*12);
+				ret=1;
+			}
+			n++;
+		}
+	}
+
+	free(matrix);
+        starpu_shutdown();
+
+	return ret;
+}

+ 3 - 1
src/core/perfmodel/perfmodel_history.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2013,2016-2017                      Inria
- * Copyright (C) 2008-2017                                Université de Bordeaux
+ * Copyright (C) 2008-2018                                Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -477,6 +477,8 @@ static void scan_reg_model(FILE *f, const char *path, struct starpu_perfmodel_re
 			multi_invalid = (multi_invalid||isnan(reg_model->coeff[i]));
 		}
 		reg_model->multi_valid = !multi_invalid;
+		res = fscanf(f, "\n");
+		STARPU_ASSERT_MSG(res == 0, "Incorrect performance model file %s", path);
 	}
 }
 

+ 1 - 1
src/datawizard/malloc.c

@@ -360,7 +360,7 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl
 				ret = -ENOMEM;
 		}
 
-#if defined(STARPU_SIMGRID) || defined(STARPU_USE_CUDA)
+#if (defined(STARPU_SIMGRID) && (SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559)) || defined(STARPU_USE_CUDA)
 end:
 #endif
 	if (ret == 0)

+ 38 - 1
src/drivers/cuda/driver_cuda.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2012,2014,2016-2017                 Inria
- * Copyright (C) 2008-2017                                Université de Bordeaux
+ * Copyright (C) 2008-2018                                Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2011                                     Télécom-SudParis
@@ -31,6 +31,9 @@
 #ifdef HAVE_CUDA_GL_INTEROP_H
 #include <cuda_gl_interop.h>
 #endif
+#ifdef HAVE_LIBNVIDIA_ML
+#include <nvml.h>
+#endif
 #include <datawizard/memory_manager.h>
 #include <datawizard/memory_nodes.h>
 #include <datawizard/malloc.h>
@@ -53,6 +56,9 @@
 static int ncudagpus = -1;
 
 static size_t global_mem[STARPU_MAXCUDADEVS];
+#ifdef HAVE_LIBNVIDIA_ML
+static nvmlDevice_t nvmlDev[STARPU_MAXCUDADEVS];
+#endif
 int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES];
 #ifdef STARPU_USE_CUDA
 static cudaStream_t streams[STARPU_NMAXWORKERS];
@@ -106,6 +112,9 @@ _starpu_cuda_discover_devices (struct _starpu_machine_config *config)
 	if (STARPU_UNLIKELY(cures != cudaSuccess))
 		cnt = 0;
 	config->topology.nhwcudagpus = cnt;
+#ifdef HAVE_LIBNVIDIA_ML
+	nvmlInit();
+#endif
 #endif
 }
 
@@ -520,7 +529,30 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worke
 			_starpu_simgrid_submit_job(workerid, j, &worker->perf_arch, NAN,
 				async ? &task_finished[workerid][pipeline_idx] : NULL);
 #else
+#ifdef HAVE_LIBNVIDIA_ML
+		unsigned long long energy_start = 0;
+		nvmlReturn_t nvmlRet = -1;
+		if (profiling || (cl->energy_model && cl->energy_model->benchmarking))
+		{
+			nvmlRet = nvmlDeviceGetTotalEnergyConsumption(nvmlDev[worker->devid], &energy_start);
+		}
+#endif
+
 		func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
+
+#ifdef HAVE_LIBNVIDIA_ML
+		if (nvmlRet == NVML_SUCCESS &&
+			(profiling || (cl->energy_model && cl->energy_model->benchmarking)))
+		{
+			unsigned long long energy_end;
+			nvmlRet = nvmlDeviceGetTotalEnergyConsumption(nvmlDev[worker->devid], &energy_end);
+#ifdef STARPU_DEVEL
+#warning TODO: measure idle consumption to subtract it
+#endif
+			if (nvmlRet == NVML_SUCCESS)
+				task->profiling_info->energy_consumed += (energy_end - energy_start) / 1000.;
+		}
+#endif
 #endif
 		_STARPU_TRACE_END_EXECUTING();
 	}
@@ -682,6 +714,11 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
 #if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID)
 #if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID)
+#ifdef HAVE_LIBNVIDIA_ML
+		char busid[13];
+		snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
+		nvmlDeviceGetHandleByPciBusId(busid, &nvmlDev[devid]);
+#endif
 		if (props[devid].pciDomainID)
 			snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID);
 		else