Przeglądaj źródła

follow-up r19535 with same fix below

Samuel Thibault 8 lat temu
rodzic
commit
dde9701d14

+ 2 - 0
ChangeLog

@@ -42,6 +42,8 @@ New features:
     files.
   * Add STARPU_FXT_TRACE environment variable.
   * Add starpu_data_set_user_data and starpu_data_get_user_data.
+  * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
+    execution of just one MPI node.
 
 StarPU 1.2.0 (svn revision 18521)
 ==============================================

+ 20 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -521,6 +521,26 @@ it prints messages on the standard output when data are added or removed from th
 communication cache.
 </dd>
 
+<dt>STARPU_MPI_FAKE_SIZE</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_SIZE
+\addindex __env__STARPU_MPI_FAKE_SIZE
+Setting to a number makes StarPU believe that there are as many MPI nodes, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
+<dt>STARPU_MPI_FAKE_RANK</dt>
+<dd>
+\anchor STARPU_MPI_FAKE_RANK
+\addindex __env__STARPU_MPI_FAKE_RANK
+Setting to a number makes StarPU believe that it runs the given MPI node, even
+if it was run on only one MPI node. This allows e.g. to simulate the execution
+of one of the nodes of a big cluster without actually running the rest.
+It of course does not provide computation results and timing.
+</dd>
+
 <dt>STARPU_SIMGRID_CUDA_MALLOC_COST</dt>
 <dd>
 \anchor STARPU_SIMGRID_CUDA_MALLOC_COST

+ 25 - 0
mpi/src/starpu_mpi.c

@@ -77,6 +77,8 @@ static int running = 0;
 static int _mpi_world_size;
 static int _mpi_world_rank;
 #endif
+int _starpu_mpi_fake_world_size;
+int _starpu_mpi_fake_world_rank;
 
 /* Count requests posted by the application and not yet submitted to MPI */
 static starpu_pthread_mutex_t mutex_posted_requests;
@@ -296,6 +298,11 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 	_STARPU_MPI_LOG_OUT();
 }
 
+static void nop_acquire_cb(void *arg)
+{
+	starpu_data_release(arg);
+}
+
 static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle_t data_handle,
 							      int srcdst, int data_tag, MPI_Comm comm,
 							      unsigned detached, unsigned sync, void (*callback)(void *), void *arg,
@@ -307,6 +314,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 {
 	struct _starpu_mpi_req *req;
 
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		starpu_data_acquire_cb_sequential_consistency(data_handle, mode, nop_acquire_cb, data_handle, sequential_consistency);
+		return NULL;
+	}
+
 	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
@@ -1292,6 +1305,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_mpi_world_size = worldsize;
 	_mpi_world_rank = rank;
 #endif
+	_starpu_mpi_fake_world_size = starpu_get_env_number("STARPU_MPI_FAKE_SIZE");
+	_starpu_mpi_fake_world_rank = starpu_get_env_number("STARPU_MPI_FAKE_RANK");
 
 #ifdef STARPU_SIMGRID
 	/* Now that MPI is set up, let the rest of simgrid get initialized */
@@ -1898,6 +1913,11 @@ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_r
 
 int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 {
+	if (_starpu_mpi_fake_world_size != -1)
+	{
+		*size = _starpu_mpi_fake_world_size;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*size = _mpi_world_size;
@@ -1909,6 +1929,11 @@ int starpu_mpi_comm_size(MPI_Comm comm, int *size)
 
 int starpu_mpi_comm_rank(MPI_Comm comm, int *rank)
 {
+	if (_starpu_mpi_fake_world_rank != -1)
+	{
+		*rank = _starpu_mpi_fake_world_rank;
+		return 0;
+	}
 #ifdef STARPU_SIMGRID
 	STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now");
 	*rank = _mpi_world_rank;

+ 2 - 0
mpi/src/starpu_mpi_private.h

@@ -39,6 +39,8 @@ extern int _starpu_debug_level_max;
 void _starpu_mpi_set_debug_level_min(int level);
 void _starpu_mpi_set_debug_level_max(int level);
 #endif
+extern int _starpu_mpi_fake_world_size;
+extern int _starpu_mpi_fake_world_rank;
 
 #ifdef STARPU_NO_ASSERT
 #  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)

+ 7 - 7
src/datawizard/malloc.c

@@ -28,7 +28,7 @@
 #include <datawizard/malloc.h>
 #include <core/simgrid.h>
 
-#ifdef STARPU_SIMGRID
+#if 1 //def STARPU_SIMGRID
 #include <sys/mman.h>
 #include <fcntl.h>
 #endif
@@ -46,7 +46,7 @@ static int disable_pinning;
 static int malloc_on_node_default_flags[STARPU_MAXNODES];
 
 /* This file is used for implementing "folded" allocation */
-#ifdef STARPU_SIMGRID
+#if 1 //def STARPU_SIMGRID
 static int bogusfile = -1;
 static unsigned long _starpu_malloc_simulation_fold;
 #endif
@@ -221,7 +221,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 #endif /* STARPU_SIMGRID */
 	}
 
-#ifdef STARPU_SIMGRID
+#if 1 //def STARPU_SIMGRID
 	if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
 	{
 		/* Use "folded" allocation: the same file is mapped several
@@ -311,7 +311,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 				ret = -ENOMEM;
 		}
 
-#if defined(STARPU_SIMGRID) || defined(STARPU_USE_CUDA)
+#if  1 //defined(STARPU_SIMGRID) || defined(STARPU_USE_CUDA)
 end:
 #endif
 	if (ret == 0)
@@ -370,7 +370,7 @@ static struct starpu_codelet free_pinned_cl =
 
 int starpu_free_flags(void *A, size_t dim, int flags)
 {
-#ifndef STARPU_SIMGRID
+#if 0 //ndef STARPU_SIMGRID
 	if (flags & STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0)
 	{
 		if (_starpu_can_submit_cuda_task())
@@ -437,7 +437,7 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 	}
 #endif /* STARPU_SIMGRID */
 
-#ifdef STARPU_SIMGRID
+#if 1 //def STARPU_SIMGRID
 	if (flags & STARPU_MALLOC_SIMULATION_FOLDED)
 	{
 		munmap(A, dim);
@@ -794,7 +794,7 @@ _starpu_malloc_init(unsigned dst_node)
 	STARPU_PTHREAD_MUTEX_INIT(&chunk_mutex[dst_node], NULL);
 	disable_pinning = starpu_get_env_number("STARPU_DISABLE_PINNING");
 	malloc_on_node_default_flags[dst_node] = STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT;
-#ifdef STARPU_SIMGRID
+#if 1 //def STARPU_SIMGRID
 	/* Reasonably "costless" */
 	_starpu_malloc_simulation_fold = starpu_get_env_number_default("STARPU_MALLOC_SIMULATION_FOLD", 1) << 20;
 #endif

+ 1 - 15
src/drivers/driver_common/driver_common.c

@@ -564,10 +564,7 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 		_starpu_worker_set_status_sleeping(workerid);
 
 		if (_starpu_worker_can_block(memnode, worker)
-#ifndef STARPU_SIMGRID
-				&& !_starpu_sched_ctx_last_worker_awake(worker)
-#endif
-				)
+				&& !_starpu_sched_ctx_last_worker_awake(worker))
 		{
 			STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
@@ -576,18 +573,7 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 		{
 			STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex);
 			if (_starpu_machine_is_running())
-			{
 				_starpu_exponential_backoff(worker);
-#ifdef STARPU_SIMGRID
-				static int warned;
-				if (!warned)
-				{
-					warned = 1;
-					_STARPU_DISP("Has to make simgrid spin for CPU idle time.  You can try to pass --enable-blocking-drivers to ./configure to avoid this\n");
-				}
-				MSG_process_sleep(0.000010);
-#endif
-			}
 		}
 		return 0;
 	}