Browse Source

Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
execution of just one MPI node.

Samuel Thibault 8 years ago
parent
commit
37578a0d00

+ 2 - 0
ChangeLog

@@ -42,6 +42,8 @@ New features:
     files.
     files.
   * Add STARPU_FXT_TRACE environment variable.
   * Add STARPU_FXT_TRACE environment variable.
   * Add starpu_data_set_user_data and starpu_data_get_user_data.
   * Add starpu_data_set_user_data and starpu_data_get_user_data.
+  * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
+    execution of just one MPI node.
 
 
 StarPU 1.2.0 (svn revision 18521)
 StarPU 1.2.0 (svn revision 18521)
 ==============================================
 ==============================================

+ 15 - 0
doc/doxygen/chapters/410_mpi_support.doxy

@@ -501,6 +501,21 @@ If the distribution function is not too complex and the compiler is very good,
 the latter can even optimize the <c>for</c> loops, thus dramatically reducing
 the latter can even optimize the <c>for</c> loops, thus dramatically reducing
 the cost of task submission.
 the cost of task submission.
 
 
+To estimate quickly how long task submission takes, and notably how much pruning
+saves, a quick and easy way is to measure the submission time of just one of the
+MPI nodes. This can be achieved by running the application on just one MPI node
+with the following environment variables:
+
+\code
+export STARPU_DISABLE_KERNELS=1
+export STARPU_MPI_FAKE_RANK=2
+export STARPU_MPI_FAKE_SIZE=1024
+\endcode
+
+Here we have disabled the kernel function call to skip the actual computation
+time and only keep submission time, and we have asked StarPU to fake running on
+MPI node 2 out of 1024 nodes.
+
 A function starpu_mpi_task_build() is also provided with the aim to
 A function starpu_mpi_task_build() is also provided with the aim to
 only construct the task structure. All MPI nodes need to call the
 only construct the task structure. All MPI nodes need to call the
 function, only the node which is to execute the task will return a
 function, only the node which is to execute the task will return a

+ 20 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -521,6 +521,26 @@ it prints messages on the standard output when data are added or removed from th
 communication cache.
 communication cache.
 </dd>
 </dd>
 
 
+<dt>STARPU_MPI_FAKE_SIZE</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_SIZE
+\addindex __env__STARPU_MPI_FAKE_SIZE
+Setting to a number makes StarPU believe that there are as many MPI nodes, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
+<dt>STARPU_MPI_FAKE_RANK</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_RANK
+\addindex __env__STARPU_MPI_FAKE_RANK
+Setting to a number makes StarPU believe that it runs the given MPI node, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
 <dd>
 <dd>
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST

+ 25 - 0
mpi/src/starpu_mpi.c

@@ -77,6 +77,8 @@ static int running = 0;
 static int _mpi_world_size;
 static int _mpi_world_size;
 static int _mpi_world_rank;
 static int _mpi_world_rank;
 #endif
 #endif
+int _starpu_mpi_fake_world_size;
+int _starpu_mpi_fake_world_rank;
 
 
 /* Count requests posted by the application and not yet submitted to MPI */
 /* Count requests posted by the application and not yet submitted to MPI */
 static starpu_pthread_mutex_t mutex_posted_requests;
 static starpu_pthread_mutex_t mutex_posted_requests;
@@ -296,6 +298,11 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();
 }
 }
 
 
+static void nop_acquire_cb(void *arg)
+{
+	starpu_data_release(arg);
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
 							      int srcdst, int data_tag, MPI_Comm comm,
 							      int srcdst, int data_tag, MPI_Comm comm,
 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
@@ -307,6 +314,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 {
 {
 	struct _starpu_mpi_req *req;
 	struct _starpu_mpi_req *req;
 
 
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		starpu_data_acquire_cb_sequential_consistency(data_handle, mode, nop_acquire_cb, data_handle, sequential_consistency);
+		return NULL;
+	}
+
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
 
@@ -1292,6 +1305,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_mpi_world_size = worldsize;
 	_mpi_world_size = worldsize;
 	_mpi_world_rank = rank;
 	_mpi_world_rank = rank;
 #endif
 #endif
+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
 
 
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -1898,6 +1913,11 @@ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_r
 
 
 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 {
 {
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		*size = _starpu_mpi_fake_world_size;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*size = _mpi_world_size;
 	*size = _mpi_world_size;
@@ -1909,6 +1929,11 @@ int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 
 
 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
 {
 {
+	if (_starpu_mpi_fake_world_rank != -1)
+	{
+		*rank = _starpu_mpi_fake_world_rank;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*rank = _mpi_world_rank;
 	*rank = _mpi_world_rank;

+ 2 - 0
mpi/src/starpu_mpi_private.h

@@ -39,6 +39,8 @@ extern int _starpu_debug_level_max;
 void _starpu_mpi_set_debug_level_min(int level);
 void _starpu_mpi_set_debug_level_min(int level);
 void _starpu_mpi_set_debug_level_max(int level);
 void _starpu_mpi_set_debug_level_max(int level);
 #endif
 #endif
+extern int _starpu_mpi_fake_world_size;
+extern int _starpu_mpi_fake_world_rank;
 
 
 #ifdef STARPU_NO_ASSERT
 #ifdef STARPU_NO_ASSERT
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)