소스 검색

Merge branch 'master' into fpga

Nathalie Furmento 5 년 전
부모
커밋
9c3b782b8a

+ 33 - 45
configure.ac

@@ -249,8 +249,18 @@ AC_ARG_WITH(simgrid-lib-dir,
 	], [simgrid_lib_dir=no])
 
 if test x$enable_simgrid = xyes ; then
-	PKG_CHECK_MODULES([SIMGRID], [simgrid])
+	PKG_CHECK_MODULES([SIMGRID], [simgrid], [], [:])
 
+	if test "$simgrid_include_dir" != "no" ; then
+	   	SIMGRID_CFLAGS="$SIMGRID_CFLAGS -I$simgrid_include_dir"
+	fi
+	if test "$simgrid_lib_dir" != "no" ; then
+	   	SIMGRID_LIBS="$SIMGRID_LIBS -L$simgrid_lib_dir"
+	fi
+	if test "$simgrid_dir" != "no" ; then
+	   	SIMGRID_CFLAGS="$SIMGRID_CFLAGS -I$simgrid_dir/include"
+	   	SIMGRID_LIBS="$SIMGRID_LIBS -L$simgrid_dir/lib"
+	fi
    	if test -n "$SIMGRID_CFLAGS" ; then
 	   	CFLAGS="$SIMGRID_CFLAGS $CFLAGS"
 	   	CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS"
@@ -259,20 +269,6 @@ if test x$enable_simgrid = xyes ; then
 	if test -n "$SIMGRID_LIBS" ; then
 		LDFLAGS="$SIMGRID_LIBS $LDFLAGS"
 	fi
-	if test "$simgrid_dir" != "no" ; then
-	   	CFLAGS="-I$simgrid_dir/include $CFLAGS"
-	   	CXXFLAGS="-I$simgrid_dir/include $CXXFLAGS"
-	   	NVCCFLAGS="-I$simgrid_dir/include $NVCCFLAGS"
-	   	LDFLAGS="-L$simgrid_dir/lib $LDFLAGS"
-	fi
-	if test "$simgrid_include_dir" != "no" ; then
-	   	CFLAGS="-I$simgrid_include_dir $CFLAGS"
-	   	CXXFLAGS="-I$simgrid_include_dir $CXXFLAGS"
-	   	NVCCFLAGS="-I$simgrid_include_dir $NVCCFLAGS"
-	fi
-	if test "$simgrid_lib_dir" != "no" ; then
-	   	LDFLAGS="-L$simgrid_lib_dir $LDFLAGS"
-	fi
 	AC_HAVE_LIBRARY([simgrid], [],
 		[
 			AC_MSG_ERROR(Simgrid support needs simgrid installed)
@@ -330,9 +326,9 @@ if test x$enable_simgrid = xyes ; then
 		LIBS="$LIBS -lstdc++"
 	fi
 
-	case \ $CXXFLAGS\  in 
+	case \ $CXXFLAGS\  in
 	*\ -std=*\ *) ;;
-	*) 
+	*)
 		AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 				  #ifdef STARPU_HAVE_SIMGRID_MSG_H
 				  #include <simgrid/msg.h>
@@ -1402,9 +1398,9 @@ if test x$enable_cuda = xyes; then
 	STARPU_CUFFT_LDFLAGS="-lcufft"
 
 	AC_LANG_PUSH([C++])
-	case \ $NVCCFLAGS\  in 
+	case \ $NVCCFLAGS\  in
 	*\ -std=*\ *) ;;
-	*) 
+	*)
 		SAVED_CXX="$CXX"
 		CXX="$NVCC"
 		AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
@@ -1427,7 +1423,7 @@ if test x$enable_cuda = xyes; then
 
 	#in case this is a 64bit setup, we tell nvcc to use a -m64 flag, if missing from existing flags
 	if test x$SIZEOF_VOID_P = x8; then
-		case \ $NVCCFLAGS\  in 
+		case \ $NVCCFLAGS\  in
 			*\ -m64\ *) ;;
 			*) NVCCFLAGS="${NVCCFLAGS} -m64" ;;
 		esac
@@ -2818,7 +2814,7 @@ if test "x$use_mpi_master_slave" = "xyes" ; then
       AC_MSG_ERROR([MPI Master-Slave and SOCL can not be used at the same time !])
    fi
    if test "x$enable_socl" = "xmaybe" ; then
-     enable_socl=no 
+     enable_socl=no
    fi
 fi
 
@@ -3110,15 +3106,9 @@ AC_SUBST(BLAS_LIB,$blas_lib)
 #			 Multiple linear regression			      #
 #                                                                             #
 ###############################################################################
-if test x$enable_simgrid = xyes ; then
-	# There is no need for building mlr models in simgrid mode
-	default_enable_mlr=no
-else
-	default_enable_mlr=yes
-fi
-AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--disable-mlr],
-			[Disable multiple linear regression models])],
-			enable_mlr=$enableval, enable_mlr=$default_enable_mlr)
+AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--enable-mlr],
+			[Enable multiple linear regression models])],
+			enable_mlr=$enableval, enable_mlr=no)
 AC_ARG_ENABLE(mlr-system-blas, [AS_HELP_STRING([--enable-mlr-system-blas],
 			[Make the multiple linear regression models use the system BLAS instead of min-dgels])],
 			enable_mlr_blas=$enableval, enable_mlr_blas=no)
@@ -3132,11 +3122,11 @@ if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then
 	if test x$blas_lib = xnone ; then
 	   use_system_lapack=no
 	fi
-	if test x$enable_mlr_blas = xyes -a test x$use_system_lapack = xyes; then
+	if test x$enable_mlr_blas = xyes -a x$use_system_lapack = xyes; then
 	   	AC_DEFINE(STARPU_MLR_MODEL, [1], [use reflapack library])
 		LDFLAGS="-llapack $LDFLAGS"
 	else
-		if test x$enable_mlr_blas=xyes -a test x$blas_lib = xmkl; then
+		if test x$enable_mlr_blas=xyes -a x$blas_lib = xmkl; then
 		   	AC_DEFINE(STARPU_MLR_MODEL, [1], [use mkl library])
 		else
 			AC_MSG_CHECKING(whether min-dgels is linked)
@@ -3390,9 +3380,9 @@ then
 			[AS_HELP_STRING([--with-dlb-include-dir=<path>],
 			[specify where DLB headers are installed])],
 			[dlb_inc_dirs="$withval"], [dlb_inc_dirs=""])
-	
+
 		dlb_inc_dirs="${dlb_inc_dirs} /usr/include/dlb"
-	
+
 		dlb_incdir_found=no
 		for dlb_incdir in $dlb_inc_dirs
 		do
@@ -3413,18 +3403,18 @@ then
 				unset ac_cv_header_dlb_h
 			fi
 		done
-	
+
 		AC_ARG_WITH(dlb-lib-dir,
 			[AS_HELP_STRING([--with-dlb-lib-dir=<path>],
 			[specify where DLB libraries are installed])],
 			[dlb_lib_dirs="$withval"], [dlb_lib_dirs=""])
-	
+
 		dlb_lib_dirs="${dlb_lib_dirs} /usr/lib/dlb"
-	
+
 		dlb_libdir_found=no
 		for dlb_libdir in $dlb_lib_dirs
 		do
-			if test -n "$dlb_libdir" 
+			if test -n "$dlb_libdir"
 			then
 				SAVED_LDFLAGS="${LDFLAGS}"
 				LDFLAGS=-L${dlb_libdir}
@@ -3441,7 +3431,7 @@ then
 				unset ac_cv_lib_dlb_DLB_Init
 			fi
 		done
-	
+
 		SAVED_CPPFLAGS="${CPPFLAGS}"
 		SAVED_CFLAGS="${CFLAGS}"
 		SAVED_LDFLAGS="${LDFLAGS}"
@@ -3453,7 +3443,7 @@ then
 		CPPFLAGS="$SAVED_CPPFLAGS"
 		CFLAGS="$SAVED_CFLAGS"
 		LIBS="$SAVED_LIBS"
-	
+
 		SAVED_CPPFLAGS="${CPPFLAGS}"
 		SAVED_CFLAGS="${CFLAGS}"
 		SAVED_LDFLAGS="${LDFLAGS}"
@@ -3469,13 +3459,13 @@ then
 		CPPFLAGS="$SAVED_CPPFLAGS"
 		CFLAGS="$SAVED_CFLAGS"
 		LIBS="$SAVED_LIBS"
-	
+
 		if test "x$dlb_incdir_found" != "xyes" -o "x$dlb_libdir_found" != "xyes"
 		then
 			enable_dlb=no
 		fi
 	fi
-	
+
 	AC_MSG_CHECKING(whether DLB support should be enabled)
 	AC_MSG_RESULT($enable_dlb)
 	if test "x$enable_dlb" != "xno"
@@ -3505,8 +3495,6 @@ AC_ARG_ENABLE(starpurm-examples, [AS_HELP_STRING([--enable-starpurm-examples],
 			enable_starpurm_examples=$enableval, enable_starpurm_examples=no)
 AM_CONDITIONAL(STARPU_BUILD_STARPURM_EXAMPLES, [test x$enable_starpurm_examples = xyes])
 
-
-
 ##########################################
 # Documentation                          #
 ##########################################
@@ -3807,7 +3795,7 @@ AC_MSG_NOTICE([
 	       MPI test suite:                                $running_mpi_check
 	       Master-Slave MPI enabled:                      $use_mpi_master_slave
 	       FFT Support:                                   $fft_support
-	       Resource Management enable:                    $starpurm_support
+	       Resource Management enabled:                   $starpurm_support
 	       OpenMP runtime support enabled:                $enable_openmp
 	       Cluster support enabled:                       $enable_cluster
 	       SOCL enabled:                                  $build_socl

+ 4 - 3
doc/doxygen/chapters/370_online_performance_tools.doxy

@@ -366,7 +366,8 @@ are computed automatically by the StarPU at the end of the execution, using leas
 squares method of the <c>dgels_</c> LAPACK function.
 
 <c>examples/mlr/mlr.c</c> example provides more details on
-the usage of ::STARPU_MULTIPLE_REGRESSION_BASED models.
+the usage of ::STARPU_MULTIPLE_REGRESSION_BASED models. The \ref enable-mlr
+"--enable-mlr" configure option needs to be set to calibrate the model.
 
 Coefficients computation is done at the end of the execution, and the
 results are stored in standard codelet perfmodel files. Additional
@@ -379,8 +380,8 @@ executions. By default StarPU uses a lightweight dgels implementation, but the
 \ref enable-mlr-system-blas "--enable-mlr-system-blas" configure option can be
 used to make StarPU use a system-provided dgels BLAS.
 
-Additionally, when multiple linear regression models are
-disabled (using \ref disable-mlr "--disable-mlr" configure option) or when the
+Additionally, when multiple linear regression models are not enabled through 
+\ref enable-mlr "--enable-mlr" or when the
 <c>model->combinations</c> are not defined, StarPU will still write
 output files into <c>.starpu/sampling/codelets/tmp/</c> to allow
 performing an analysis. This analysis typically aims at finding the

+ 4 - 4
doc/doxygen/chapters/510_configure_options.doxy

@@ -761,11 +761,11 @@ of this parameter must be in [0..100]. The default value of
 this parameter is 10. Experimental.
 </dd>
 
-<dt>--disable-mlr</dt>
+<dt>--enable-mlr</dt>
 <dd>
-\anchor disable-mlr
-\addindex __configure__--disable-mlr
-Allow to disable multiple linear regression models (see \ref PerformanceModelExample)
+\anchor enable-mlr
+\addindex __configure__--enable-mlr
+Allow to enable multiple linear regression models (see \ref PerformanceModelExample)
 </dd>
 
 <dt>--enable-mlr-system-blas</dt>

+ 8 - 5
examples/cholesky/cholesky_implicit.c

@@ -206,6 +206,14 @@ static void execute_cholesky(unsigned size, unsigned nblocks)
 {
 	float *mat = NULL;
 
+	/*
+	 * create a simple definite positive symetric matrix example
+	 *
+	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
+	 *
+	 * and make it better conditioned by adding one on the diagonal.
+	 */
+
 #ifndef STARPU_SIMGRID
 	unsigned m,n;
 	starpu_malloc_flags((void **)&mat, (size_t)size*size*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
@@ -324,11 +332,6 @@ static void execute_cholesky(unsigned size, unsigned nblocks)
 
 int main(int argc, char **argv)
 {
-	/* create a simple definite positive symetric matrix example
-	 *
-	 *	Hilbert matrix : h(i,j) = 1/(i+j+1)
-	 * */
-
 #ifdef STARPU_HAVE_MAGMA
 	magma_init();
 #endif

+ 0 - 1
mpi/examples/Makefile.am

@@ -466,7 +466,6 @@ benchs_sendrecv_bench_SOURCES += benchs/abstract_sendrecv_bench.c
 
 benchs_sendrecv_parallel_tasks_bench_SOURCES = benchs/sendrecv_parallel_tasks_bench.c
 benchs_sendrecv_parallel_tasks_bench_SOURCES += benchs/bench_helper.c
-benchs_sendrecv_parallel_tasks_bench_SOURCES += benchs/abstract_sendrecv_bench.c
 
 benchs_burst_SOURCES = benchs/burst.c
 benchs_burst_SOURCES += benchs/burst_helper.c

+ 27 - 6
mpi/examples/benchs/abstract_sendrecv_bench.c

@@ -19,7 +19,7 @@
 
 
 
-void sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier)
+void sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier, int bidir)
 {
 	uint64_t iterations = LOOPS_DEFAULT;
 	uint64_t s = 0;
@@ -62,6 +62,7 @@ void sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier)
 	float* vector_recv = NULL;
 	double t1, t2, global_tstart, global_tend;
 	double* lats = malloc(sizeof(double) * LOOPS_DEFAULT);
+	starpu_mpi_req send_req, recv_req;
 
 	if (thread_barrier != NULL)
 	{
@@ -88,18 +89,38 @@ void sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier)
 			if (mpi_rank == 0)
 			{
 				t1 = starpu_timing_now();
-				starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD);
-				starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, NULL);
+				if (bidir)
+				{
+					starpu_mpi_isend(handle_send, &send_req, 1, 0, MPI_COMM_WORLD);
+					starpu_mpi_irecv(handle_recv, &recv_req, 1, 1, MPI_COMM_WORLD);
+					starpu_mpi_wait(&send_req, MPI_STATUS_IGNORE);
+					starpu_mpi_wait(&recv_req, MPI_STATUS_IGNORE);
+				}
+				else
+				{
+					starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD);
+					starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, NULL);
+				}
 				t2 = starpu_timing_now();
 
-				const double t = (t2 -t1) / 2;
+				const double t = (t2 - t1) / 2;
 
 				lats[j] = t;
 			}
 			else
 			{
-				starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, NULL);
-				starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD);
+				if (bidir)
+				{
+					starpu_mpi_irecv(handle_recv, &recv_req, 0, 0, MPI_COMM_WORLD);
+					starpu_mpi_isend(handle_send, &send_req, 0, 1, MPI_COMM_WORLD);
+					starpu_mpi_wait(&recv_req, MPI_STATUS_IGNORE);
+					starpu_mpi_wait(&send_req, MPI_STATUS_IGNORE);
+				}
+				else
+				{
+					starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, NULL);
+					starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD);
+				}
 			}
 
 			starpu_mpi_barrier(MPI_COMM_WORLD);

+ 1 - 1
mpi/examples/benchs/abstract_sendrecv_bench.h

@@ -17,4 +17,4 @@
 #include <starpu.h>
 
 
-void sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier);
+void sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier, int bidir);

+ 21 - 6
mpi/examples/benchs/sendrecv_bench.c

@@ -16,6 +16,8 @@
 /*
  * Basic send receive benchmark.
  * Inspired a lot from NewMadeleine examples/benchmarks/nm_bench_sendrecv.c
+ *
+ * The option --bidir is available to do full-duplex communications.
  */
 
 #include <starpu_mpi.h>
@@ -23,11 +25,22 @@
 #include "abstract_sendrecv_bench.h"
 
 
+static inline void man()
+{
+	fprintf(stderr, "Options:\n");
+	fprintf(stderr, "\t-h --help   display this help\n");
+	fprintf(stderr, "\t-p          pause workers during benchmark\n");
+	fprintf(stderr, "\t--bidir     full-duplex communications\n");
+	exit(EXIT_SUCCESS);
+}
+
+
 int main(int argc, char **argv)
 {
 	int ret, rank, worldsize;
 	int pause_workers = 0;
 	int i = 0;
+	int bidir = 0;
 
 
 	for (i = 1; i < argc; i++)
@@ -39,15 +52,17 @@ int main(int argc, char **argv)
 		}
 		else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
 		{
-			fprintf(stderr, "Options:\n");
-			fprintf(stderr, "\t-h --help   display this help\n");
-			fprintf(stderr, "\t-p          pause workers during benchmark\n");
-			exit(EXIT_SUCCESS);
+			man();
+		}
+		if (strcmp(argv[i], "--bidir") == 0)
+		{
+			bidir = 1;
+			printf("Communications will be full-duplex.\n");
 		}
 		else
 		{
 			fprintf(stderr,"Unrecognized option %s\n", argv[i]);
-			exit(EXIT_FAILURE);
+			man();
 		}
 	}
 
@@ -75,7 +90,7 @@ int main(int argc, char **argv)
 		starpu_pause();
 	}
 
-	sendrecv_bench(rank, NULL);
+	sendrecv_bench(rank, NULL, bidir);
 
 	if (pause_workers)
 	{

+ 1 - 1
mpi/examples/benchs/sendrecv_gemm_bench.c

@@ -56,7 +56,7 @@ static void* comm_thread_func(void* arg)
 		fprintf(stderr, "[%s] No core was available for the comm thread. You should increase STARPU_RESERVE_NCPU or decrease STARPU_NCPU\n", hostname);
 	}
 
-	sendrecv_bench(mpi_rank, &thread_barrier);
+	sendrecv_bench(mpi_rank, &thread_barrier, /* half-duplex communications */ 0);
 
 	return NULL;
 }

+ 0 - 1
mpi/examples/benchs/sendrecv_parallel_tasks_bench.c

@@ -34,7 +34,6 @@
 #include <starpu_mpi.h>
 #include "helper.h"
 #include "bench_helper.h"
-#include "abstract_sendrecv_bench.h"
 
 #define NB_WARMUP_PINGPONGS 10
 

+ 36 - 27
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -189,8 +189,36 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 	{
 		starpu_iteration_push(n);
 
+		/* First handle the diagonal block */
 		/* Row */
-		for (m = n; m<nblocks; m++)
+		m = n;
+
+		for (k = 0; k < n; k++)
+		{
+			/* Accumulate updates from TRSMs */
+			starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
+					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+					       STARPU_R, data_handles[n][k],
+					       STARPU_R, data_handles[m][k],
+					       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+					       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+					       0);
+
+			/* Nobody else will need it */
+			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
+			starpu_data_wont_use(data_handles[m][k]);
+		}
+
+		k = n;
+		/* Factorize */
+		starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
+				       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
+				       STARPU_RW, data_handles[k][k],
+				       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
+				       0);
+
+		/* Row */
+		for (m = n + 1; m<nblocks; m++)
 		{
 			for (k = 0; k < n; k++)
 			{
@@ -203,34 +231,15 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 
-				if (m == n)
-				{
-					/* Nobody else will need it */
-					starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
-					starpu_data_wont_use(data_handles[m][k]);
-				}
 			}
 			k = n;
-			if (m > n)
-			{
-				/* non-diagonal block, solve */
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
-						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-						       STARPU_R, data_handles[k][k],
-						       STARPU_RW, data_handles[m][k],
-						       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
-						       0);
-			}
-			else
-			{
-				/* diagonal block, factorize */
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
-						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
-						       STARPU_RW, data_handles[k][k],
-						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
-						       0);
-			}
-
+			/* Solve */
+			starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
+					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+					       STARPU_R, data_handles[k][k],
+					       STARPU_RW, data_handles[m][k],
+					       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
+					       0);
 		}
 
 		/* We won't need it any more */

+ 16 - 16
src/core/perfmodel/perfmodel_bus.c

@@ -899,7 +899,7 @@ static void load_bus_affinity_file_content(void)
 	_STARPU_DEBUG("loading affinities from %s\n", path);
 
 	f = fopen(path, "r");
-	STARPU_ASSERT(f);
+	STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path);
 
 	locked = _starpu_frdlock(f) == 0;
 
@@ -914,7 +914,7 @@ static void load_bus_affinity_file_content(void)
 
 		_starpu_drop_comments(f);
 		ret = fscanf(f, "%u\t", &dummy);
-		STARPU_ASSERT(ret == 1);
+		STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 
 		STARPU_ASSERT(dummy == gpu);
 
@@ -922,11 +922,11 @@ static void load_bus_affinity_file_content(void)
 		for (numa = 0; numa < nnumas; numa++)
 		{
 			ret = fscanf(f, "%u\t", &cuda_affinity_matrix[gpu][numa]);
-			STARPU_ASSERT(ret == 1);
+			STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 		}
 
 		ret = fscanf(f, "\n");
-		STARPU_ASSERT(ret == 0);
+		STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path);
 	}
 #endif /* !STARPU_USE_CUDA */
 #ifdef STARPU_USE_OPENCL
@@ -938,7 +938,7 @@ static void load_bus_affinity_file_content(void)
 
 		_starpu_drop_comments(f);
 		ret = fscanf(f, "%u\t", &dummy);
-		STARPU_ASSERT(ret == 1);
+		STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 
 		STARPU_ASSERT(dummy == gpu);
 
@@ -946,11 +946,11 @@ static void load_bus_affinity_file_content(void)
 		for (numa = 0; numa < nnumas; numa++)
 		{
 			ret = fscanf(f, "%u\t", &opencl_affinity_matrix[gpu][numa]);
-			STARPU_ASSERT(ret == 1);
+			STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 		}
 
 		ret = fscanf(f, "\n");
-		STARPU_ASSERT(ret == 0);
+		STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path);
 	}
 #endif /* !STARPU_USE_OPENCL */
 	if (locked)
@@ -1070,12 +1070,12 @@ static int check_bus_affinity_file(void)
 	_STARPU_DEBUG("loading affinities from %s\n", path);
 
 	f = fopen(path, "r");
-	STARPU_ASSERT(f);
+	STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path);
 
 	locked = _starpu_frdlock(f) == 0;
 
 	ret = fscanf(f, "# GPU\t");
-	STARPU_ASSERT(ret == 0);
+	STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path);
 
 	ret = fscanf(f, "NUMA%u\t", &dummy);
 
@@ -1695,7 +1695,7 @@ static void write_bus_bandwidth_file_content(void)
 	_STARPU_DEBUG("writing bandwidth to %s\n", path);
 
 	f = fopen(path, "w+");
-	STARPU_ASSERT(f);
+	STARPU_ASSERT_MSG(f, "Error when opening file (writing) '%s'", path);
 
 	locked = _starpu_fwrlock(f) == 0;
 	_starpu_fftruncate(f, 0);
@@ -2063,24 +2063,24 @@ static void check_bus_config_file(void)
 
 		// Loading configuration from file
 		f = fopen(path, "r");
-		STARPU_ASSERT(f);
+		STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path);
 		locked = _starpu_frdlock(f) == 0;
 		_starpu_drop_comments(f);
 
 		ret = fscanf(f, "%u\t", &read_cpus);
-		STARPU_ASSERT(ret == 1);
+		STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 		_starpu_drop_comments(f);
 
 		ret = fscanf(f, "%u\t", &read_numa);
-		STARPU_ASSERT(ret == 1);
+		STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 		_starpu_drop_comments(f);
 
 		ret = fscanf(f, "%u\t", &read_cuda);
-		STARPU_ASSERT(ret == 1);
+		STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 		_starpu_drop_comments(f);
 
 		ret = fscanf(f, "%u\t", &read_opencl);
-		STARPU_ASSERT(ret == 1);
+		STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path);
 		_starpu_drop_comments(f);
 
 		ret = fscanf(f, "%u\t", &read_mic);
@@ -2135,7 +2135,7 @@ static void write_bus_config_file_content(void)
 	_STARPU_DEBUG("writing config to %s\n", path);
 
 	f = fopen(path, "w+");
-	STARPU_ASSERT(f);
+	STARPU_ASSERT_MSG(f, "Error when opening file (writing) '%s'", path);
 	locked = _starpu_fwrlock(f) == 0;
 	_starpu_fftruncate(f, 0);
 

+ 3 - 1
src/datawizard/coherency.h

@@ -145,8 +145,10 @@ struct _starpu_data_state
 	struct _starpu_data_state *root_handle; /** root of the tree */
 	struct _starpu_data_state *father_handle; /** father of the node, NULL if the current node is the root */
 	starpu_data_handle_t *active_children; /** The currently active set of read-write children */
+	unsigned active_nchildren;
 	starpu_data_handle_t **active_readonly_children; /** The currently active set of read-only children */
-	unsigned nactive_readonly_children; /** Size of active_readonly_children array */
+	unsigned *active_readonly_nchildren; /** Size of active_readonly_children[i] array */
+	unsigned nactive_readonly_children; /** Size of active_readonly_children and active_readonly_nchildren arrays. Actual use is given by 'partitioned' */
 	/** Our siblings in the father partitioning */
 	unsigned nsiblings; /** How many siblings */
 	starpu_data_handle_t *siblings;

+ 9 - 0
src/datawizard/filters.c

@@ -651,6 +651,7 @@ void _starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned
 	STARPU_ASSERT_MSG(initial_handle->part_readonly == 0, "One can't submit a partition planning while a readonly partitioning is active");
 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
 	initial_handle->partitioned++;
+	initial_handle->active_nchildren = children[0]->nsiblings;
 	initial_handle->active_children = children[0]->siblings;
 	_starpu_spin_unlock(&initial_handle->header_lock);
 
@@ -715,9 +716,11 @@ void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle,
 	if (initial_handle->nactive_readonly_children < initial_handle->partitioned)
 	{
 		_STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children[0]));
+		_STARPU_REALLOC(initial_handle->active_readonly_nchildren, initial_handle->partitioned * sizeof(initial_handle->active_readonly_nchildren[0]));
 		initial_handle->nactive_readonly_children = initial_handle->partitioned;
 	}
 	initial_handle->active_readonly_children[initial_handle->partitioned-1] = children[0]->siblings;
+	initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = children[0]->nsiblings;
 	_starpu_spin_unlock(&initial_handle->header_lock);
 
 	for (i = 0; i < nparts; i++)
@@ -748,8 +751,10 @@ void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial
 	STARPU_ASSERT_MSG(initial_handle->part_readonly == 1, "One can only upgrade a readonly partition planning");
 	STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts");
 	initial_handle->part_readonly = 0;
+	initial_handle->active_nchildren = initial_handle->active_readonly_nchildren[0];
 	initial_handle->active_children = initial_handle->active_readonly_children[0];
 	initial_handle->active_readonly_children[0] = NULL;
+	initial_handle->active_readonly_nchildren[0] = 0;
 	_starpu_spin_unlock(&initial_handle->header_lock);
 
 	unsigned i;
@@ -782,18 +787,22 @@ void _starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsign
 			if (initial_handle->active_readonly_children[i] == children[0]->siblings)
 			{
 				initial_handle->active_readonly_children[i] = initial_handle->active_readonly_children[initial_handle->partitioned-1];
+				initial_handle->active_readonly_nchildren[i] = initial_handle->active_readonly_nchildren[initial_handle->partitioned-1];
 				initial_handle->active_readonly_children[initial_handle->partitioned-1] = NULL;
+				initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = 0;
 				break;
 			}
 		}
 	}
 	else
 	{
+		initial_handle->active_nchildren = 0;
 		initial_handle->active_children = NULL;
 	}
 	initial_handle->partitioned--;
 	if (!initial_handle->partitioned)
 		initial_handle->part_readonly = 0;
+	initial_handle->active_nchildren = 0;
 	initial_handle->active_children = NULL;
 	_starpu_spin_unlock(&initial_handle->header_lock);
 

+ 3 - 0
src/datawizard/interfaces/data_interface.c

@@ -415,7 +415,9 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 	//handle->root_handle
 	//handle->father_handle
 	//handle->active_children = NULL;
+	//handle->active_nchildren = 0;
 	//handle->active_readonly_children = NULL;
+	//handle->active_readonly_nchildren = NULL;
 	//handle->nactive_readonly_children = 0;
 	//handle->nsiblings
 	//handle->siblings
@@ -1025,6 +1027,7 @@ retry_busy:
 
 	_starpu_data_clear_implicit(handle);
 	free(handle->active_readonly_children);
+	free(handle->active_readonly_nchildren);
 
 	STARPU_PTHREAD_MUTEX_DESTROY(&handle->busy_mutex);
 	STARPU_PTHREAD_COND_DESTROY(&handle->busy_cond);

+ 28 - 0
src/datawizard/user_interactions.c

@@ -689,6 +689,34 @@ void starpu_data_wont_use(starpu_data_handle_t handle)
 	if (!handle->initialized)
 		/* No value atm actually */
 		return;
+
+	if (starpu_data_get_nb_children(handle) != 0)
+	{
+		int i;
+		for(i=0 ; i<starpu_data_get_nb_children(handle) ; i++)
+			starpu_data_wont_use(starpu_data_get_child(handle, i));
+		return;
+	}
+
+	if (handle->partitioned != 0)
+	{
+		unsigned i;
+		for(i=0 ; i<handle->partitioned; i++)
+		{
+			unsigned j;
+			for(j=0 ; j<handle->active_readonly_nchildren[i] ; j++)
+				starpu_data_wont_use(handle->active_readonly_children[i][j]);
+		}
+	}
+
+	if (handle->active_nchildren != 0)
+	{
+		unsigned j;
+		for(j=0 ; j<handle->active_nchildren ; j++)
+			starpu_data_wont_use(handle->active_children[j]);
+		return;
+	}
+
 	_STARPU_TRACE_DATA_WONT_USE(handle);
 	starpu_data_acquire_on_node_cb_sequential_consistency_quick(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_R, _starpu_data_wont_use, handle, 1, 1);
 }

+ 2 - 1
starpurm/examples/Makefile.am

@@ -13,6 +13,7 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 #
+include $(top_srcdir)/starpu.mk
 SUBDIRS =
 
 CLEANFILES = *.gcno *.gcda *.linkinfo
@@ -20,7 +21,7 @@ CLEANFILES = *.gcno *.gcda *.linkinfo
 AM_CPPFLAGS	= -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include
 AM_CPPFLAGS	+= -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include
 AM_CFLAGS	= -Wall -g $(HWLOC_CFLAGS) $(DLB_CFLAGS)
-LDADD	= $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) $(DLB_LIBS)
+LDADD	= $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) $(DLB_LIBS)
 
 examplebindir = $(libdir)/starpu/examples/starpurm
 

+ 1 - 0
tests/Makefile.am

@@ -325,6 +325,7 @@ myPROGRAMS +=				\
 	datawizard/partition_dep   		\
 	datawizard/partition_lazy		\
 	datawizard/partition_init		\
+	datawizard/partition_wontuse		\
 	datawizard/gpu_register   		\
 	datawizard/gpu_ptr_register   		\
 	datawizard/variable_parameters		\

+ 45 - 0
tests/datawizard/partition_wontuse.c

@@ -0,0 +1,45 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <starpu.h>
+#include "../helper.h"
+
+int main(int argc, char **argv)
+{
+	int ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	struct starpu_data_filter f =
+	{
+	 	.filter_func = starpu_vector_filter_block,
+		.nchildren = 2
+	};
+
+	int v[10];
+	starpu_data_handle_t array_handle;
+	starpu_vector_data_register(&array_handle, STARPU_MAIN_RAM, (uintptr_t)&v, 10, sizeof(int));
+
+	starpu_data_partition(array_handle, &f);
+	starpu_data_wont_use(array_handle);
+	starpu_data_unpartition(array_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(array_handle);
+	starpu_shutdown();
+
+	return 0;
+}

+ 3 - 0
tests/datawizard/temporary_partition.c

@@ -80,6 +80,9 @@ int main(void)
 	/* Invalidate one random piece we don't care coherency about */
 	starpu_data_invalidate_submit(handles[NPARTS/2]);
 
+	/* Try to wontuse the whole thing */
+	starpu_data_wont_use(handle);
+
 	/* Clean */
 	starpu_data_unpartition_submit(handle, NPARTS, handles, -1);
 	starpu_data_partition_clean(handle, NPARTS, handles);

+ 3 - 0
tests/datawizard/temporary_partition_implicit.c

@@ -80,6 +80,9 @@ int main(void)
 	/* Invalidate one random piece we don't care coherency about */
 	starpu_data_invalidate_submit(handles[NPARTS/2]);
 
+	/* Try to wontuse the whole thing */
+	starpu_data_wont_use(handle);
+
 	/* Clean */
 	starpu_data_unpartition_submit(handle, NPARTS, handles, -1);
 	starpu_data_partition_clean(handle, NPARTS, handles);