瀏覽代碼

fix when user don't want to use an MPI slave

Corentin Salingue 8 年之前
父節點
當前提交
8036cbd347

+ 1 - 0
include/starpu_worker.h

@@ -90,6 +90,7 @@ unsigned starpu_cuda_worker_get_count(void);
 unsigned starpu_opencl_worker_get_count(void);
 unsigned starpu_mic_worker_get_count(void);
 unsigned starpu_scc_worker_get_count(void);
+unsigned starpu_mpi_ms_worker_get_count(void);
 
 unsigned starpu_mic_device_get_count(void);
 

+ 19 - 10
src/core/topology.c

@@ -26,6 +26,7 @@
 #include <drivers/mic/driver_mic_source.h>
 #include <drivers/scc/driver_scc_source.h>
 #include <drivers/mpi/driver_mpi_source.h>
+#include <drivers/mpi/driver_mpi_common.h>
 #include <drivers/mp_common/source_common.h>
 #include <drivers/opencl/driver_opencl.h>
 #include <profiling/profiling.h>
@@ -954,7 +955,7 @@ _starpu_init_mpi_config (struct _starpu_machine_config *config,
 
 static void
 _starpu_init_mp_config (struct _starpu_machine_config *config,
-			struct starpu_conf *user_conf)
+			struct starpu_conf *user_conf, int no_mp_config)
 {
 	/* Discover and configure the mp topology. That means:
 	 * - discover the number of mp nodes;
@@ -966,6 +967,7 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
 	struct _starpu_machine_topology *topology = &config->topology;
 
 #ifdef STARPU_USE_MIC
+    if (!no_mp_config)
     {
         /* Discover and initialize the number of MIC nodes through the mp
          * infrastructure. */
@@ -1030,13 +1032,23 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
 
         topology->nmpidevices = reqmpidevices;
 
-        unsigned i;
-        for (i = 0; i < topology->nmpidevices; i++)
-            mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
+        /* if user don't want to use MPI slaves, we close the slave processes */
+        if (no_mp_config && topology->nmpidevices == 0)
+        {
+            _starpu_mpi_common_mp_deinit();
+            exit(0);
+        }
+
+        if (!no_mp_config)
+        {
+            unsigned i;
+            for (i = 0; i < topology->nmpidevices; i++)
+                mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
 
 
-        for (i = 0; i < topology->nmpidevices; i++)
-            _starpu_init_mpi_config (config, user_conf, i);
+            for (i = 0; i < topology->nmpidevices; i++)
+                _starpu_init_mpi_config (config, user_conf, i);
+        }
     }
 #endif
 }
@@ -1376,11 +1388,8 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 	topology->nworkers += topology->nsccdevices;
 #endif /* STARPU_USE_SCC */
 
-	/* Unless not requested, we need to complete configuration with the
-	 * ones of the mp nodes. */
 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_MPI_MASTER_SLAVE)
-	if (! no_mp_config)
-	    _starpu_init_mp_config (config, &config->conf);
+	    _starpu_init_mp_config (config, &config->conf, no_mp_config);
 #endif
 
 /* we put the CPU section after the accelerator : in case there was an

+ 7 - 1
src/core/workers.c

@@ -1331,7 +1331,8 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 	/* Depending on whether we are a MP sink or not, we must build the
 	 * topology with MP nodes or not. */
 	ret = _starpu_build_topology(&_starpu_config, is_a_sink);
-	if (ret)
+    /* sink doesn't exit even if no worker discorvered */
+	if (ret && !is_a_sink)
 	{
 		starpu_perfmodel_free_sampling_directories();
 		STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
@@ -1806,6 +1807,11 @@ unsigned starpu_scc_worker_get_count(void)
 	return _starpu_config.topology.nsccdevices;
 }
 
+unsigned starpu_mpi_ms_worker_get_count(void)
+{
+    return _starpu_config.topology.nmpidevices;
+}
+
 /* When analyzing performance, it is useful to see what is the processing unit
  * that actually performed the task. This function returns the id of the
  * processing unit actually executing it, therefore it makes no sense to use it

+ 0 - 3
src/drivers/mpi/driver_mpi_common.c

@@ -24,9 +24,6 @@
 #define NITER 32
 #define SIZE_BANDWIDTH (1024*1024)
 
-#define SYNC_TAG 44
-#define ASYNC_TAG 45
-
 #define DRIVER_MPI_MASTER_NODE_DEFAULT 0
 
 static int mpi_initialized = 0;

+ 3 - 0
src/drivers/mpi/driver_mpi_common.h

@@ -23,6 +23,9 @@
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 
+#define SYNC_TAG 44
+#define ASYNC_TAG 45
+
 int _starpu_mpi_common_mp_init();
 void _starpu_mpi_common_mp_deinit();
 

+ 0 - 2
src/drivers/mpi/driver_mpi_source.c

@@ -231,8 +231,6 @@ void(* _starpu_mpi_ms_src_get_kernel_from_job(const struct _starpu_mp_node *node
 	return (void (*)(void))kernel;
 }
 
-
-
 unsigned _starpu_mpi_src_get_device_count()
 {
     int nb_mpi_devices;

+ 13 - 8
tests/errorcheck/starpu_init_noworker.c

@@ -57,6 +57,7 @@ int main(int argc, char **argv)
 	conf.nopencl = 0;
 	conf.nmic = 0;
 	conf.nscc = 0;
+    conf.nmpi_ms = 0;
 
 	/* starpu_init should return -ENODEV */
 	ret = starpu_initialize(&conf, &argc, &argv);
@@ -64,14 +65,18 @@ int main(int argc, char **argv)
 	     return EXIT_SUCCESS;
 	else
 	{
-	     	unsigned ncpu = starpu_cpu_worker_get_count();
-		unsigned ncuda = starpu_cuda_worker_get_count();
-		unsigned nopencl = starpu_opencl_worker_get_count();
-		FPRINTF(stderr, "StarPU has found :\n");
-		FPRINTF(stderr, "\t%u CPU cores\n", ncpu);
-		FPRINTF(stderr, "\t%u CUDA devices\n", ncuda);
-		FPRINTF(stderr, "\t%u OpenCL devices\n", nopencl);
-		return EXIT_FAILURE;
+        unsigned ncpu = starpu_cpu_worker_get_count();
+        unsigned ncuda = starpu_cuda_worker_get_count();
+        unsigned nopencl = starpu_opencl_worker_get_count();
+        unsigned nmic = starpu_mic_worker_get_count();
+        unsigned nmpi_ms = starpu_mpi_ms_worker_get_count();
+        FPRINTF(stderr, "StarPU has found :\n");
+        FPRINTF(stderr, "\t%u CPU cores\n", ncpu);
+        FPRINTF(stderr, "\t%u CUDA devices\n", ncuda);
+        FPRINTF(stderr, "\t%u OpenCL devices\n", nopencl);
+        FPRINTF(stderr, "\t%u MIC devices\n", nmic);
+        FPRINTF(stderr, "\t%u MPI Master-Slaves devices\n", nmpi_ms);
+        return EXIT_FAILURE;
 	}