8 years ago · 37578a0d00
--- a/ChangeLog
+++ b/ChangeLog
@@ -42,6 +42,8 @@ New features:
 
																     files.
															
 
																   * Add STARPU_FXT_TRACE environment variable.
															
 
																   * Add starpu_data_set_user_data and starpu_data_get_user_data.
															
 
																+  * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
															
 
																+    execution of just one MPI node.
															
 
																 StarPU 1.2.0 (svn revision 18521)
															
 
																 ==============================================
															
--- a/doc/doxygen/chapters/410_mpi_support.doxy
+++ b/doc/doxygen/chapters/410_mpi_support.doxy
@@ -501,6 +501,21 @@ If the distribution function is not too complex and the compiler is very good,
 
																 the latter can even optimize the <c>for</c> loops, thus dramatically reducing
															
 
																 the cost of task submission.
															
 
																+To estimate quickly how long task submission takes, and notably how much pruning
															
 
																+saves, a quick and easy way is to measure the submission time of just one of the
															
 
																+MPI nodes. This can be achieved by running the application on just one MPI node
															
 
																+with the following environment variables:
															
 
																+
															
 
																+\code
															
 
																+export STARPU_DISABLE_KERNELS=1
															
 
																+export STARPU_MPI_FAKE_RANK=2
															
 
																+export STARPU_MPI_FAKE_SIZE=1024
															
 
																+\endcode
															
 
																+
															
 
																+Here we have disabled the kernel function call to skip the actual computation
															
 
																+time and only keep submission time, and we have asked StarPU to fake running on
															
 
																+MPI node 2 out of 1024 nodes.
															
 
																+
															
 
																 A function starpu_mpi_task_build() is also provided with the aim to
															
 
																 only construct the task structure. All MPI nodes need to call the
															
 
																 function, only the node which is to execute the task will return a
															
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -521,6 +521,26 @@ it prints messages on the standard output when data are added or removed from th
 
																 communication cache.
															
 
																 </dd>
															
 
																+<dt>STARPU_MPI_FAKE_SIZE</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_MPI_FAKE_SIZE
															
 
																+\addindex __env__STARPU_MPI_FAKE_SIZE
															
 
																+Setting to a number makes StarPU believe that there are as many MPI nodes, even
															
 
																+if it was run on only one MPI node. This allows e.g. to simulate the execution
															
 
																+of one of the nodes of a big cluster without actually running the rest.
															
 
																+It of course does not provide computation results and timing.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_MPI_FAKE_RANK</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_MPI_FAKE_RANK
															
 
																+\addindex __env__STARPU_MPI_FAKE_RANK
															
 
																+Setting to a number makes StarPU believe that it runs the given MPI node, even
															
 
																+if it was run on only one MPI node. This allows e.g. to simulate the execution
															
 
																+of one of the nodes of a big cluster without actually running the rest.
															
 
																+It of course does not provide computation results and timing.
															
 
																+</dd>
															
 
																+
															
 
																 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
															
 
																 <dd>
															
 
																 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST
															
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -77,6 +77,8 @@ static int running = 0;
 
																 static int _mpi_world_size;
															
 
																 static int _mpi_world_rank;
															
 
																 #endif
															
 
																+int _starpu_mpi_fake_world_size;
															
 
																+int _starpu_mpi_fake_world_rank;
															
 
																 /* Count requests posted by the application and not yet submitted to MPI */
															
 
																 static starpu_pthread_mutex_t mutex_posted_requests;
															
@@ -296,6 +298,11 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 
																 	_STARPU_MPI_LOG_OUT();
															
 
																 }
															
 
																+static void nop_acquire_cb(void *arg)
															
 
																+{
															
 
																+	starpu_data_release(arg);
															
 
																+}
															
 
																+
															
 
																 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
															
 
																 							      int srcdst, int data_tag, MPI_Comm comm,
															
 
																 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
															
@@ -307,6 +314,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 
																 {
															
 
																 	struct _starpu_mpi_req *req;
															
 
																+	if (_starpu_mpi_fake_world_size != -1)
															
 
																+	{
															
 
																+		starpu_data_acquire_cb_sequential_consistency(data_handle, mode, nop_acquire_cb, data_handle, sequential_consistency);
															
 
																+		return NULL;
															
 
																+	}
															
 
																+
															
 
																 	_STARPU_MPI_LOG_IN();
															
 
																 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
															
@@ -1292,6 +1305,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
																 	_mpi_world_size = worldsize;
															
 
																 	_mpi_world_rank = rank;
															
 
																 #endif
															
 
																+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
															
 
																+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 	/* Now that MPI is set up, let the rest of simgrid get initialized */
															
@@ -1898,6 +1913,11 @@ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_r
 
																 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
															
 
																 {
															
 
																+	if (_starpu_mpi_fake_world_size != -1)
															
 
																+	{
															
 
																+		*size = _starpu_mpi_fake_world_size;
															
 
																+		return 0;
															
 
																+	}
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
															
 
																 	*size = _mpi_world_size;
															
@@ -1909,6 +1929,11 @@ int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 
																 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
															
 
																 {
															
 
																+	if (_starpu_mpi_fake_world_rank != -1)
															
 
																+	{
															
 
																+		*rank = _starpu_mpi_fake_world_rank;
															
 
																+		return 0;
															
 
																+	}
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
															
 
																 	*rank = _mpi_world_rank;
															
--- a/mpi/src/starpu_mpi_private.h
+++ b/mpi/src/starpu_mpi_private.h
@@ -39,6 +39,8 @@ extern int _starpu_debug_level_max;
 
																 void _starpu_mpi_set_debug_level_min(int level);
															
 
																 void _starpu_mpi_set_debug_level_max(int level);
															
 
																 #endif
															
 
																+extern int _starpu_mpi_fake_world_size;
															
 
																+extern int _starpu_mpi_fake_world_rank;
															
 
																 #ifdef STARPU_NO_ASSERT
															
 
																 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)