Browse Source

* Count mpi master-slave in maxnodes and in nmaxworker
* Add --enable-maxmpidev option to increase number of supported mpi master-slave nodes
* Print number of mpi master-slave nodes

Corentin Salingue 8 years ago
parent
commit
78c28d1c2c
1 changed files with 188 additions and 169 deletions
  1. 188 169
      configure.ac

+ 188 - 169
configure.ac

@@ -1700,168 +1700,6 @@ AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"])
 
 
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
-#                  Miscellaneous options for StarPU                           #
-#                                                                             #
-###############################################################################
-
-AC_MSG_CHECKING(how many buffers can be manipulated per task)
-AC_ARG_ENABLE(maxbuffers, [AS_HELP_STRING([--enable-maxbuffers=<nbuffers>],
-			[maximum number of buffers per task])],
-			nmaxbuffers=$enableval, nmaxbuffers=8)
-AC_MSG_RESULT($nmaxbuffers)
-AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
-		[how many buffers can be manipulated per task])
-
-AC_MSG_CHECKING(maximum number of nodes to use)
-AC_ARG_ENABLE(maxnodes, [AS_HELP_STRING([--enable-maxnodes=<nnodes>],
-			[maximum number of nodes])],
-			maxnodes=$enableval, maxnodes=0)
-
-if test x$maxnodes = x0 ; then
-	if test x$enable_simgrid = xyes ; then
-		# We still need the room for the virtual CUDA/OpenCL devices
-		maxnodes=16
-	else
-		# We have one memory node shared by all CPU workers, one node per GPU
-		# and per MIC device
-		# we add nodes to use 3 memory disks
-		nodes=4
-		if test x$enable_cuda = xyes ; then
-			# we could have used nmaxcudadev + 1, but this would certainly give an
-			# odd number.
-			nodes=`expr $nodes + $nmaxcudadev`
-		fi
-		if test x$enable_opencl = xyes ; then
-			# we could have used nmaxcudadev + 1, but this would certainly give an
-			# odd number.
-			nodes=`expr $nodes + $nmaxopencldev`
-		fi
-		if test x$enable_mic = xyes ; then
-			nodes=`expr $nodes + $nmaxmicdev`
-		fi
-		if test x$enable_rcce = xyes ; then
-			# Only 1 memory node for the shared memory.
-			nodes=`expr $nodes + 1`
-		fi
-
-		# set maxnodes to the next power of 2 greater than nodes
-		maxnodes=1
-		while test "$maxnodes" -lt "$nodes"
-		do
-			maxnodes=`expr $maxnodes \* 2`
-		done
- 	fi
-fi
-if test $maxnodes -gt 32 ; then
-	AC_MSG_ERROR([selected number of nodes ($maxnodes) can not be greater than 32])
-fi
-
-AC_MSG_CHECKING(maximum number of memory nodes)
-AC_MSG_RESULT($maxnodes)
-AC_DEFINE_UNQUOTED(STARPU_MAXNODES, [$maxnodes],
-		[maximum number of memory nodes])
-
-
-AC_MSG_CHECKING(whether allocation cache should be used)
-AC_ARG_ENABLE(allocation-cache, [AS_HELP_STRING([--disable-allocation-cache],
-			[disable data allocation cache])],
-			enable_allocation_cache=$enableval, enable_allocation_cache=yes)
-AC_MSG_RESULT($enable_allocation_cache)
-if test x$enable_allocation_cache = xyes; then
-	AC_DEFINE(STARPU_USE_ALLOCATION_CACHE, [1], [enable data allocation cache])
-fi
-
-AC_ARG_WITH(perf-model-dir, [AS_HELP_STRING([--with-perf-model-dir=<dir>], [specify where performance models should be stored])],
-	[
-		if test x$withval = xno; then
-			AC_MSG_ERROR(--without-perf-model-dir is not a valid option)
-		fi
-
-		perf_model_dir="$withval"
-		have_explicit_perf_model_dir=yes
-		AC_DEFINE_UNQUOTED(STARPU_PERF_MODEL_DIR, ["$perf_model_dir"], [performance models location])
-	], [
-		# by default, we put the performance models in
-		# $HOME/.starpu/sampling/
-		have_explicit_perf_model_dir=no
-		perf_model_dir="\$HOME/.starpu/sampling/"
-	]
-	)
-AC_MSG_CHECKING(using explicit performance model location)
-AC_MSG_RESULT($have_explicit_perf_model_dir)
-
-AC_MSG_CHECKING(performance models location)
-AC_MSG_RESULT($perf_model_dir)
-
-# On many multicore CPUs, clock cycles are not synchronized
-AC_CHECK_LIB([rt], [clock_gettime])
-AC_CHECK_FUNCS([clock_gettime])
-
-# Compute the maximum number of workers (we round it to 16 for alignment
-# purposes).
-if test x$enable_simgrid != xyes; then
-	if test x$enable_cpu != xyes; then
-		maxcpus=0
-	fi
-	if test x$enable_cuda != xyes; then
-		nmaxcudadev=0
-	fi
-	if test x$enable_opencl != xyes; then
-		nmaxopencldev=0
-	fi
-	if test x$enable_mic != xyes; then
-		nmaxmicthreads=0
-	fi
-	if test x$enable_rcce != xyes; then
-		nmaxsccdev=0
-	fi
-fi
-nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmicthreads + $nmaxsccdev + 15 \) / 16 \) `
-AC_MSG_CHECKING(Maximum number of workers)
-AC_MSG_RESULT($nmaxworkers)
-AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
-
-# Computes the maximun number of combined worker
-nmaxcombinedworkers=`expr $maxcpus + $nmaxmicthreads`  
-AC_MSG_CHECKING(Maximum number of workers combinations)
-AC_MSG_RESULT($nmaxcombinedworkers)
-AC_DEFINE_UNQUOTED(STARPU_NMAX_COMBINEDWORKERS,
-	[$nmaxcombinedworkers], [Maximum number of worker combinations])
-
-
-
-# Computes the maximum number of implementations per arch
-AC_MSG_CHECKING(maximum number of implementations)
-AC_ARG_ENABLE(maximplementations, [AS_HELP_STRING([--enable-maximplementations=<number>],
-		[maximum number of implementations])],
-		maximplementations=$enableval, maximplementations=4)
-AC_MSG_RESULT($maximplementations)
-AC_DEFINE_UNQUOTED(STARPU_MAXIMPLEMENTATIONS, [$maximplementations],
-		[maximum number of implementations])
-
-# Enable LevelDB support if requested and the lib is found
-AC_ARG_ENABLE(leveldb, [AS_HELP_STRING([--enable-leveldb],
-				   [Enable linking with LevelDB if available])],
-				   enable_leveldb=$enableval, enable_leveldb=no)
-if  test x$enable_leveldb = xyes; then
-AC_LANG_PUSH([C++])
-AC_CHECK_HEADERS([leveldb/db.h], [AC_DEFINE([STARPU_HAVE_LEVELDB], [1], [Define to 1 if you have the <leveldb/db.h> header file.])])
-STARPU_HAVE_LIBRARY(LEVELDB, [leveldb])
-AC_LANG_POP([C++])
-fi
-AM_CONDITIONAL(STARPU_HAVE_LEVELDB, test  "x$enable_leveldb" = "xyes" -a "x$ac_cv_lib_leveldb_main" = "xyes")
-
-# Defines the calibration heuristic for the history-based calibration of StarPU
-AC_MSG_CHECKING(calibration heuristic of history-based StarPU calibrator)
-AC_ARG_ENABLE(calibration-heuristic, [AS_HELP_STRING([--enable-calibration-heuristic=<number>],
-			[Define the maximum authorized deviation of StarPU history-based calibrator.])],
-			calibration_heuristic=$enableval, calibration_heuristic=50)
-AC_MSG_RESULT($calibration_heuristic)
-AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value])
-
-
-###############################################################################
-#                                                                             #
 #                                    MPI                                      #
 #                                    MPI                                      #
 #                                                                             #
 #                                                                             #
 ###############################################################################
 ###############################################################################
@@ -2077,6 +1915,186 @@ AC_MSG_CHECKING(whether the master-slave mode should be enabled)
 AC_MSG_RESULT($build_mpi_master_slave)
 AC_MSG_RESULT($build_mpi_master_slave)
 AM_CONDITIONAL([STARPU_USE_MPI_MASTER_SLAVE], [test x$build_mpi_master_slave = xyes])
 AM_CONDITIONAL([STARPU_USE_MPI_MASTER_SLAVE], [test x$build_mpi_master_slave = xyes])
 
 
+AC_MSG_CHECKING(maximum number of MPI master-slave devices)
+AC_ARG_ENABLE(maxmpidev, [AS_HELP_STRING([--enable-maxmpidev=<number>],
+			[maximum number of MPI master-slave devices])],
+			nmaxmpidev=$enableval,
+            [
+             if test x$build_mpi_master_slave = xyes; then
+                 nmaxmpidev=4
+             else
+                 nmaxmpidev=0
+             fi
+            ])
+AC_MSG_RESULT($nmaxmpidev)
+AC_DEFINE_UNQUOTED(STARPU_MAXMPIDEVS, [$nmaxmpidev], [maximum number of MPI devices])
+
+
+###############################################################################
+#                                                                             #
+#                  Miscellaneous options for StarPU                           #
+#                                                                             #
+###############################################################################
+
+AC_MSG_CHECKING(how many buffers can be manipulated per task)
+AC_ARG_ENABLE(maxbuffers, [AS_HELP_STRING([--enable-maxbuffers=<nbuffers>],
+			[maximum number of buffers per task])],
+			nmaxbuffers=$enableval, nmaxbuffers=8)
+AC_MSG_RESULT($nmaxbuffers)
+AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers],
+		[how many buffers can be manipulated per task])
+
+AC_MSG_CHECKING(maximum number of nodes to use)
+AC_ARG_ENABLE(maxnodes, [AS_HELP_STRING([--enable-maxnodes=<nnodes>],
+			[maximum number of nodes])],
+			maxnodes=$enableval, maxnodes=0)
+
+if test x$maxnodes = x0 ; then
+	if test x$enable_simgrid = xyes ; then
+		# We still need the room for the virtual CUDA/OpenCL devices
+		maxnodes=16
+	else
+		# We have one memory node shared by all CPU workers, one node per GPU
+		# and per MIC device
+		# we add nodes to use 3 memory disks
+		nodes=4
+		if test x$enable_cuda = xyes ; then
+			# we could have used nmaxcudadev + 1, but this would certainly give an
+			# odd number.
+			nodes=`expr $nodes + $nmaxcudadev`
+		fi
+		if test x$enable_opencl = xyes ; then
+			# we could have used nmaxcudadev + 1, but this would certainly give an
+			# odd number.
+			nodes=`expr $nodes + $nmaxopencldev`
+		fi
+		if test x$enable_mic = xyes ; then
+			nodes=`expr $nodes + $nmaxmicdev`
+		fi
+		if test x$enable_rcce = xyes ; then
+			# Only 1 memory node for the shared memory.
+			nodes=`expr $nodes + 1`
+		fi
+
+        #nmaxmpidev = 0 if mpi master-slave is disabled
+        nodes=`expr $nodes + $nmaxmpidev`
+
+		# set maxnodes to the next power of 2 greater than nodes
+		maxnodes=1
+		while test "$maxnodes" -lt "$nodes"
+		do
+			maxnodes=`expr $maxnodes \* 2`
+		done
+ 	fi
+fi
+if test $maxnodes -gt 32 ; then
+	AC_MSG_ERROR([selected number of nodes ($maxnodes) can not be greater than 32])
+fi
+
+AC_MSG_CHECKING(maximum number of memory nodes)
+AC_MSG_RESULT($maxnodes)
+AC_DEFINE_UNQUOTED(STARPU_MAXNODES, [$maxnodes],
+		[maximum number of memory nodes])
+
+
+AC_MSG_CHECKING(whether allocation cache should be used)
+AC_ARG_ENABLE(allocation-cache, [AS_HELP_STRING([--disable-allocation-cache],
+			[disable data allocation cache])],
+			enable_allocation_cache=$enableval, enable_allocation_cache=yes)
+AC_MSG_RESULT($enable_allocation_cache)
+if test x$enable_allocation_cache = xyes; then
+	AC_DEFINE(STARPU_USE_ALLOCATION_CACHE, [1], [enable data allocation cache])
+fi
+
+AC_ARG_WITH(perf-model-dir, [AS_HELP_STRING([--with-perf-model-dir=<dir>], [specify where performance models should be stored])],
+	[
+		if test x$withval = xno; then
+			AC_MSG_ERROR(--without-perf-model-dir is not a valid option)
+		fi
+
+		perf_model_dir="$withval"
+		have_explicit_perf_model_dir=yes
+		AC_DEFINE_UNQUOTED(STARPU_PERF_MODEL_DIR, ["$perf_model_dir"], [performance models location])
+	], [
+		# by default, we put the performance models in
+		# $HOME/.starpu/sampling/
+		have_explicit_perf_model_dir=no
+		perf_model_dir="\$HOME/.starpu/sampling/"
+	]
+	)
+AC_MSG_CHECKING(using explicit performance model location)
+AC_MSG_RESULT($have_explicit_perf_model_dir)
+
+AC_MSG_CHECKING(performance models location)
+AC_MSG_RESULT($perf_model_dir)
+
+# On many multicore CPUs, clock cycles are not synchronized
+AC_CHECK_LIB([rt], [clock_gettime])
+AC_CHECK_FUNCS([clock_gettime])
+
+# Compute the maximum number of workers (we round it to 16 for alignment
+# purposes).
+if test x$enable_simgrid != xyes; then
+	if test x$enable_cpu != xyes; then
+		maxcpus=0
+	fi
+	if test x$enable_cuda != xyes; then
+		nmaxcudadev=0
+	fi
+	if test x$enable_opencl != xyes; then
+		nmaxopencldev=0
+	fi
+	if test x$enable_mic != xyes; then
+		nmaxmicthreads=0
+	fi
+	if test x$enable_rcce != xyes; then
+		nmaxsccdev=0
+	fi
+fi
+#We suppose Master adds nmaxmpidev workers but slaves don't.
+nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmicthreads + $nmaxmpidev + $nmaxsccdev + 15 \) / 16 \) `
+AC_MSG_CHECKING(Maximum number of workers)
+AC_MSG_RESULT($nmaxworkers)
+AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
+
+# Computes the maximun number of combined worker
+nmaxcombinedworkers=`expr $maxcpus + $nmaxmicthreads`  
+AC_MSG_CHECKING(Maximum number of workers combinations)
+AC_MSG_RESULT($nmaxcombinedworkers)
+AC_DEFINE_UNQUOTED(STARPU_NMAX_COMBINEDWORKERS,
+	[$nmaxcombinedworkers], [Maximum number of worker combinations])
+
+
+
+# Computes the maximum number of implementations per arch
+AC_MSG_CHECKING(maximum number of implementations)
+AC_ARG_ENABLE(maximplementations, [AS_HELP_STRING([--enable-maximplementations=<number>],
+		[maximum number of implementations])],
+		maximplementations=$enableval, maximplementations=4)
+AC_MSG_RESULT($maximplementations)
+AC_DEFINE_UNQUOTED(STARPU_MAXIMPLEMENTATIONS, [$maximplementations],
+		[maximum number of implementations])
+
+# Enable LevelDB support if requested and the lib is found
+AC_ARG_ENABLE(leveldb, [AS_HELP_STRING([--enable-leveldb],
+				   [Enable linking with LevelDB if available])],
+				   enable_leveldb=$enableval, enable_leveldb=no)
+if  test x$enable_leveldb = xyes; then
+AC_LANG_PUSH([C++])
+AC_CHECK_HEADERS([leveldb/db.h], [AC_DEFINE([STARPU_HAVE_LEVELDB], [1], [Define to 1 if you have the <leveldb/db.h> header file.])])
+STARPU_HAVE_LIBRARY(LEVELDB, [leveldb])
+AC_LANG_POP([C++])
+fi
+AM_CONDITIONAL(STARPU_HAVE_LEVELDB, test  "x$enable_leveldb" = "xyes" -a "x$ac_cv_lib_leveldb_main" = "xyes")
+
+# Defines the calibration heuristic for the history-based calibration of StarPU
+AC_MSG_CHECKING(calibration heuristic of history-based StarPU calibrator)
+AC_ARG_ENABLE(calibration-heuristic, [AS_HELP_STRING([--enable-calibration-heuristic=<number>],
+			[Define the maximum authorized deviation of StarPU history-based calibrator.])],
+			calibration_heuristic=$enableval, calibration_heuristic=50)
+AC_MSG_RESULT($calibration_heuristic)
+AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value])
+
 
 
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
@@ -3047,13 +3065,14 @@ AC_MSG_NOTICE([
         (Note these numbers do not represent the number of detected
         (Note these numbers do not represent the number of detected
 	devices, but the maximum number of devices StarPU can manage)
 	devices, but the maximum number of devices StarPU can manage)
 
 
-	Maximum number of CPUs:           $maxcpus
+	Maximum number of CPUs:                     $maxcpus
-	Maximum number of CUDA devices:   $nmaxcudadev
+	Maximum number of CUDA devices:             $nmaxcudadev
-	Maximum number of OpenCL devices: $nmaxopencldev
+	Maximum number of OpenCL devices:           $nmaxopencldev
-	Maximum number of SCC devices:    $nmaxsccdev
+	Maximum number of SCC devices:              $nmaxsccdev
-	Maximum number of MIC threads:    $nmaxmicthreads
+	Maximum number of MIC threads:              $nmaxmicthreads
-	Maximum number of memory nodes:   $maxnodes
+	Maximum number of MPI master-slave devices: $nmaxmpidev
-	Maximum number of task buffers:   $nmaxbuffers
+	Maximum number of memory nodes:             $maxnodes
+	Maximum number of task buffers:             $nmaxbuffers
 
 
 	GPU-GPU transfers: $have_cuda_memcpy_peer
 	GPU-GPU transfers: $have_cuda_memcpy_peer
 	Allocation cache:  $enable_allocation_cache
 	Allocation cache:  $enable_allocation_cache