9 yıl önce · 37578a0d00
--- a/ChangeLog
+++ b/ChangeLog
@@ -42,6 +42,8 @@ New features:
 
				     files.
			
 
				   * Add STARPU_FXT_TRACE environment variable.
			
 
				   * Add starpu_data_set_user_data and starpu_data_get_user_data.
			
 
				+  * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
			
 
				+    execution of just one MPI node.
			
 
				 
			
 
				 StarPU 1.2.0 (svn revision 18521)
			
 
				 ==============================================
			
--- a/doc/doxygen/chapters/410_mpi_support.doxy
+++ b/doc/doxygen/chapters/410_mpi_support.doxy
@@ -501,6 +501,21 @@ If the distribution function is not too complex and the compiler is very good,
 
				 the latter can even optimize the <c>for</c> loops, thus dramatically reducing
			
 
				 the cost of task submission.
			
 
				 
			
 
				+To estimate quickly how long task submission takes, and notably how much pruning
			
 
				+saves, a quick and easy way is to measure the submission time of just one of the
			
 
				+MPI nodes. This can be achieved by running the application on just one MPI node
			
 
				+with the following environment variables:
			
 
				+
			
 
				+\code
			
 
				+export STARPU_DISABLE_KERNELS=1
			
 
				+export STARPU_MPI_FAKE_RANK=2
			
 
				+export STARPU_MPI_FAKE_SIZE=1024
			
 
				+\endcode
			
 
				+
			
 
				+Here we have disabled the kernel function call to skip the actual computation
			
 
				+time and only keep submission time, and we have asked StarPU to fake running on
			
 
				+MPI node 2 out of 1024 nodes.
			
 
				+
			
 
				 A function starpu_mpi_task_build() is also provided with the aim to
			
 
				 only construct the task structure. All MPI nodes need to call the
			
 
				 function, only the node which is to execute the task will return a
			
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -521,6 +521,26 @@ it prints messages on the standard output when data are added or removed from th
 
				 communication cache.
			
 
				 </dd>
			
 
				 
			
 
				+<dt>STARPU_MPI_FAKE_SIZE</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_MPI_FAKE_SIZE
			
 
				+\addindex __env__STARPU_MPI_FAKE_SIZE
			
 
				+Setting to a number makes StarPU believe that there are as many MPI nodes, even
			
 
				+if it was run on only one MPI node. This allows e.g. to simulate the execution
			
 
				+of one of the nodes of a big cluster without actually running the rest.
			
 
				+It of course does not provide computation results and timing.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_MPI_FAKE_RANK</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_MPI_FAKE_RANK
			
 
				+\addindex __env__STARPU_MPI_FAKE_RANK
			
 
				+Setting to a number makes StarPU believe that it runs the given MPI node, even
			
 
				+if it was run on only one MPI node. This allows e.g. to simulate the execution
			
 
				+of one of the nodes of a big cluster without actually running the rest.
			
 
				+It of course does not provide computation results and timing.
			
 
				+</dd>
			
 
				+
			
 
				 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
			
 
				 <dd>
			
 
				 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -77,6 +77,8 @@ static int running = 0;
 
				 static int _mpi_world_size;
			
 
				 static int _mpi_world_rank;
			
 
				 #endif
			
 
				+int _starpu_mpi_fake_world_size;
			
 
				+int _starpu_mpi_fake_world_rank;
			
 
				 
			
 
				 /* Count requests posted by the application and not yet submitted to MPI */
			
 
				 static starpu_pthread_mutex_t mutex_posted_requests;
			
@@ -296,6 +298,11 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
 
				+static void nop_acquire_cb(void *arg)
			
 
				+{
			
 
				+	starpu_data_release(arg);
			
 
				+}
			
 
				+
			
 
				 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
			
 
				 							      int srcdst, int data_tag, MPI_Comm comm,
			
 
				 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
			
@@ -307,6 +314,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 
				 {
			
 
				 	struct _starpu_mpi_req *req;
			
 
				 
			
 
				+	if (_starpu_mpi_fake_world_size != -1)
			
 
				+	{
			
 
				+		starpu_data_acquire_cb_sequential_consistency(data_handle, mode, nop_acquire_cb, data_handle, sequential_consistency);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
			
 
				 
			
@@ -1292,6 +1305,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	_mpi_world_size = worldsize;
			
 
				 	_mpi_world_rank = rank;
			
 
				 #endif
			
 
				+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
			
 
				+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	/* Now that MPI is set up, let the rest of simgrid get initialized */
			
@@ -1898,6 +1913,11 @@ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_r
 
				 
			
 
				 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
			
 
				 {
			
 
				+	if (_starpu_mpi_fake_world_size != -1)
			
 
				+	{
			
 
				+		*size = _starpu_mpi_fake_world_size;
			
 
				+		return 0;
			
 
				+	}
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
			
 
				 	*size = _mpi_world_size;
			
@@ -1909,6 +1929,11 @@ int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 
				 
			
 
				 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
			
 
				 {
			
 
				+	if (_starpu_mpi_fake_world_rank != -1)
			
 
				+	{
			
 
				+		*rank = _starpu_mpi_fake_world_rank;
			
 
				+		return 0;
			
 
				+	}
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
			
 
				 	*rank = _mpi_world_rank;
			
--- a/mpi/src/starpu_mpi_private.h
+++ b/mpi/src/starpu_mpi_private.h
@@ -39,6 +39,8 @@ extern int _starpu_debug_level_max;
 
				 void _starpu_mpi_set_debug_level_min(int level);
			
 
				 void _starpu_mpi_set_debug_level_max(int level);
			
 
				 #endif
			
 
				+extern int _starpu_mpi_fake_world_size;
			
 
				+extern int _starpu_mpi_fake_world_rank;
			
 
				 
			
 
				 #ifdef STARPU_NO_ASSERT
			
 
				 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)