Browse Source

Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
execution of just one MPI node.

Samuel Thibault 8 years ago
parent
commit
37578a0d00

+ 2 - 0
ChangeLog

@@ -42,6 +42,8 @@ New features:
     files.
   * Add STARPU_FXT_TRACE environment variable.
   * Add starpu_data_set_user_data and starpu_data_get_user_data.
+  * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
+    execution of just one MPI node.
 
 StarPU 1.2.0 (svn revision 18521)
 ==============================================

+ 15 - 0
doc/doxygen/chapters/410_mpi_support.doxy

@@ -501,6 +501,21 @@ If the distribution function is not too complex and the compiler is very good,
 the latter can even optimize the <c>for</c> loops, thus dramatically reducing
 the cost of task submission.
 
+To estimate quickly how long task submission takes, and notably how much pruning
+saves, a quick and easy way is to measure the submission time of just one of the
+MPI nodes. This can be achieved by running the application on just one MPI node
+with the following environment variables:
+
+\code
+export STARPU_DISABLE_KERNELS=1
+export STARPU_MPI_FAKE_RANK=2
+export STARPU_MPI_FAKE_SIZE=1024
+\endcode
+
+Here we have disabled the kernel function call to skip the actual computation
+time and only keep submission time, and we have asked StarPU to fake running on
+MPI node 2 out of 1024 nodes.
+
 A function starpu_mpi_task_build() is also provided with the aim to
 only construct the task structure. All MPI nodes need to call the
 function, only the node which is to execute the task will return a

+ 20 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -521,6 +521,26 @@ it prints messages on the standard output when data are added or removed from th
 communication cache.
 </dd>
 
+<dt>STARPU_MPI_FAKE_SIZE</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_SIZE
+\addindex __env__STARPU_MPI_FAKE_SIZE
+Setting to a number makes StarPU believe that there are as many MPI nodes, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
+<dt>STARPU_MPI_FAKE_RANK</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_RANK
+\addindex __env__STARPU_MPI_FAKE_RANK
+Setting to a number makes StarPU believe that it runs the given MPI node, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
 <dd>
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST

+ 25 - 0
mpi/src/starpu_mpi.c

@@ -77,6 +77,8 @@ static int running = 0;
 static int _mpi_world_size;
 static int _mpi_world_rank;
 #endif
+int _starpu_mpi_fake_world_size;
+int _starpu_mpi_fake_world_rank;
 
 /* Count requests posted by the application and not yet submitted to MPI */
 static starpu_pthread_mutex_t mutex_posted_requests;
@@ -296,6 +298,11 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 	_STARPU_MPI_LOG_OUT();
 }
 
+static void nop_acquire_cb(void *arg)
+{
+	starpu_data_release(arg);
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
 							      int srcdst, int data_tag, MPI_Comm comm,
 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
@@ -307,6 +314,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 {
 	struct _starpu_mpi_req *req;
 
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		starpu_data_acquire_cb_sequential_consistency(data_handle, mode, nop_acquire_cb, data_handle, sequential_consistency);
+		return NULL;
+	}
+
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
@@ -1292,6 +1305,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_mpi_world_size = worldsize;
 	_mpi_world_rank = rank;
 #endif
+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
 
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -1898,6 +1913,11 @@ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_r
 
 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 {
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		*size = _starpu_mpi_fake_world_size;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*size = _mpi_world_size;
@@ -1909,6 +1929,11 @@ int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 
 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
 {
+	if (_starpu_mpi_fake_world_rank != -1)
+	{
+		*rank = _starpu_mpi_fake_world_rank;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*rank = _mpi_world_rank;

+ 2 - 0
mpi/src/starpu_mpi_private.h

@@ -39,6 +39,8 @@ extern int _starpu_debug_level_max;
 void _starpu_mpi_set_debug_level_min(int level);
 void _starpu_mpi_set_debug_level_max(int level);
 #endif
+extern int _starpu_mpi_fake_world_size;
+extern int _starpu_mpi_fake_world_rank;
 
 #ifdef STARPU_NO_ASSERT
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)