Browse Source

fix indent

Corentin Salingue 8 years ago
parent
commit
cc4eb6b654

+ 1 - 1
include/schedulers/starpu_heteroprio.h

@@ -55,7 +55,7 @@ static const unsigned starpu_heteroprio_types_to_arch[STARPU_NB_TYPES+1] =
 	STARPU_OPENCL,
 	STARPU_MIC,
 	STARPU_SCC,
-    STARPU_MPI_MS,
+        STARPU_MPI_MS,
 	0
 };
 

+ 1 - 1
include/starpu.h

@@ -90,7 +90,7 @@ struct starpu_conf
 	int nopencl;
 	int nmic;
 	int nscc;
-    int nmpi_ms;
+        int nmpi_ms;
 
 	unsigned use_explicit_workers_bindid;
 	unsigned workers_bindid[STARPU_NMAXWORKERS];

+ 165 - 165
src/core/perfmodel/perfmodel_bus.c

@@ -748,7 +748,7 @@ static void benchmark_all_gpu_devices(void)
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
     
-    _starpu_mpi_common_measure_bandwidth_latency(mpi_time_device_to_device, mpi_latency_device_to_device);
+        _starpu_mpi_common_measure_bandwidth_latency(mpi_time_device_to_device, mpi_latency_device_to_device);
 
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 
@@ -942,9 +942,9 @@ static void generate_bus_affinity_file(void)
 		benchmark_all_gpu_devices();
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    /* Slaves don't write files */
-    if (!_starpu_mpi_common_is_src_node())
-        return;
+        /* Slaves don't write files */
+        if (!_starpu_mpi_common_is_src_node())
+                return;
 #endif
 
 	write_bus_affinity_file_content();
@@ -1130,7 +1130,7 @@ static int load_bus_latency_file_content(void)
 #ifndef STARPU_SIMGRID
 static void write_bus_latency_file_content(void)
 {
-    unsigned src, dst, maxnode;
+        unsigned src, dst, maxnode;
 	FILE *f;
 	int locked;
 
@@ -1204,43 +1204,43 @@ static void write_bus_latency_file_content(void)
 				if (dst > ncuda && dst <= ncuda + nopencl)
 					latency += opencldev_latency_htod[dst-ncuda];
 #endif
-                /* TODO Latency MIC */
+                                /* TODO Latency MIC */
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-                /* Modify MPI src and MPI dst if they contain the master node or not 
-                 * Because, we only take care about slaves */
-                int mpi_master = _starpu_mpi_common_get_src_node();
-
-                int mpi_src = src - (ncuda + nopencl + nmic) - 1;
-                mpi_src = (mpi_master <= mpi_src) ? mpi_src+1 : mpi_src;
-                
-                int mpi_dst = dst - (ncuda + nopencl + nmic) - 1;
-                mpi_dst = (mpi_master <= mpi_dst) ? mpi_dst+1 : mpi_dst;
-
-				if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
-                {
-                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
-                    {
-                        /* src and dst identify 2 MPI devices */
-                        latency += mpi_latency_device_to_device[mpi_src][mpi_dst];
-                    }
-                    else
-                    {
-                        /* Only src represents an MPI device 
-                         * So we add latency between src and master */
-                        latency += mpi_latency_device_to_device[mpi_src][mpi_master];
-                    }
-                }
-                else
-                {
-                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
-                    {
-                        /* Only dst identifies an MPI device 
-                         * So we add latency between master and dst */
-                        latency += mpi_latency_device_to_device[mpi_master][mpi_dst];
-                    }
-                }
-#endif
-			}
+                                /* Modify MPI src and MPI dst if they contain the master node or not 
+                                 * Because, we only take care about slaves */
+                                int mpi_master = _starpu_mpi_common_get_src_node();
+
+                                int mpi_src = src - (ncuda + nopencl + nmic) - 1;
+                                mpi_src = (mpi_master <= mpi_src) ? mpi_src+1 : mpi_src;
+
+                                int mpi_dst = dst - (ncuda + nopencl + nmic) - 1;
+                                mpi_dst = (mpi_master <= mpi_dst) ? mpi_dst+1 : mpi_dst;
+
+                                if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
+                                {
+                                        if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                                        {
+                                                /* src and dst identify 2 MPI devices */
+                                                latency += mpi_latency_device_to_device[mpi_src][mpi_dst];
+                                        }
+                                        else
+                                        {
+                                                /* Only src represents an MPI device 
+                                                 * So we add latency between src and master */
+                                                latency += mpi_latency_device_to_device[mpi_src][mpi_master];
+                                        }
+                                }
+                                else
+                                {
+                                        if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                                        {
+                                                /* Only dst identifies an MPI device 
+                                                 * So we add latency between master and dst */
+                                                latency += mpi_latency_device_to_device[mpi_master][mpi_dst];
+                                        }
+                                }
+#endif
+                        }
 
 			if (dst)
 				fputc('\t', f);
@@ -1262,9 +1262,9 @@ static void generate_bus_latency_file(void)
 		benchmark_all_gpu_devices();
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    /* Slaves don't write files */
-    if (!_starpu_mpi_common_is_src_node())
-        return;
+        /* Slaves don't write files */
+        if (!_starpu_mpi_common_is_src_node())
+                return;
 #endif
 
 #ifndef STARPU_SIMGRID
@@ -1476,40 +1476,40 @@ static void write_bus_bandwidth_file_content(void)
 					slowness += mic_time_host_to_device[dst - (ncuda + nopencl)];
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-                /* Modify MPI src and MPI dst if they contain the master node or not 
-                 * Because, we only take care about slaves */
-                int mpi_master = _starpu_mpi_common_get_src_node();
-
-                int mpi_src = src - (ncuda + nopencl + nmic) - 1;
-                mpi_src = (mpi_master <= mpi_src) ? mpi_src+1 : mpi_src;
-                
-                int mpi_dst = dst - (ncuda + nopencl + nmic) - 1;
-                mpi_dst = (mpi_master <= mpi_dst) ? mpi_dst+1 : mpi_dst;
-
-                /* here we have bandwidth */
-				if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
-                {
-                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
-                    {
-                        /* src and dst identify 2 MPI devices */
-					    slowness += 1.0/mpi_time_device_to_device[mpi_src][mpi_dst];
-                    }
-                    else
-                    {
-                        /* Only src represents an MPI device 
-                         * So we add bandwidth between src and master */
-					    slowness += 1.0/mpi_time_device_to_device[mpi_src][mpi_master];
-                    }
-                }
-                else
-                {
-                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
-                    {
-                        /* Only dst identifies an MPI device 
-                         * So we add bandwidth between master and dst */
-					    slowness += 1.0/mpi_time_device_to_device[mpi_master][mpi_dst];
-                    }
-                }
+                                /* Modify MPI src and MPI dst if they contain the master node or not 
+                                 * Because, we only take care about slaves */
+                                int mpi_master = _starpu_mpi_common_get_src_node();
+
+                                int mpi_src = src - (ncuda + nopencl + nmic) - 1;
+                                mpi_src = (mpi_master <= mpi_src) ? mpi_src+1 : mpi_src;
+
+                                int mpi_dst = dst - (ncuda + nopencl + nmic) - 1;
+                                mpi_dst = (mpi_master <= mpi_dst) ? mpi_dst+1 : mpi_dst;
+
+                                /* here we have bandwidth */
+                                if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
+                                {
+                                        if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                                        {
+                                                /* src and dst identify 2 MPI devices */
+                                                slowness += 1.0/mpi_time_device_to_device[mpi_src][mpi_dst];
+                                        }
+                                        else
+                                        {
+                                                /* Only src represents an MPI device 
+                                                 * So we add bandwidth between src and master */
+                                                slowness += 1.0/mpi_time_device_to_device[mpi_src][mpi_master];
+                                        }
+                                }
+                                else
+                                {
+                                        if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                                        {
+                                                /* Only dst identifies an MPI device 
+                                                 * So we add bandwidth between master and dst */
+                                                slowness += 1.0/mpi_time_device_to_device[mpi_master][mpi_dst];
+                                        }
+                                }
 #endif
 				bandwidth = 1.0/slowness;
 			}
@@ -1586,7 +1586,7 @@ void starpu_bus_print_bandwidth(FILE *f)
 			fprintf(f, "OpenCL%u\t", src-ncuda-1);
 		else if (src <= ncuda + nopencl + nmic)
 			fprintf(f, "MIC%u\t", src-ncuda-nopencl-1);
-        else
+                else
 			fprintf(f, "MPI_MS%d\t", src-ncuda-nopencl-nmic-1);
 		for (dst = 0; dst <= maxnode; dst++)
 			fprintf(f, "%.0f\t", bandwidth_matrix[src][dst]);
@@ -1605,7 +1605,7 @@ void starpu_bus_print_bandwidth(FILE *f)
 			fprintf(f, "OpenCL%u\t", src-ncuda-1);
 		else if (src <= ncuda + nopencl + nmic)
 			fprintf(f, "MIC%u\t", src-ncuda-nopencl-1);
-        else
+                else
 			fprintf(f, "MPI_MS%d\t", src-ncuda-nopencl-nmic-1);
 		for (dst = 0; dst <= maxnode; dst++)
 			fprintf(f, "%.0f\t", latency_matrix[src][dst]);
@@ -1664,9 +1664,9 @@ static void generate_bus_bandwidth_file(void)
 		benchmark_all_gpu_devices();
     
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    /* Slaves don't write files */
-    if (!_starpu_mpi_common_is_src_node())
-        return;
+        /* Slaves don't write files */
+        if (!_starpu_mpi_common_is_src_node())
+                return;
 #endif
 
 #ifndef STARPU_SIMGRID
@@ -1702,49 +1702,49 @@ static void get_config_path(char *path, size_t maxlen)
 /* check if the master or one slave has to recalibrate */
 static int mpi_check_recalibrate(int my_recalibrate)
 {
-	int nb_mpi = _starpu_mpi_src_get_device_count() + 1;
-  int mpi_recalibrate[nb_mpi];
+        int nb_mpi = _starpu_mpi_src_get_device_count() + 1;
+        int mpi_recalibrate[nb_mpi];
 
-  MPI_Allgather(&my_recalibrate, 1, MPI_INT, mpi_recalibrate, 1, MPI_INT, MPI_COMM_WORLD);
+        MPI_Allgather(&my_recalibrate, 1, MPI_INT, mpi_recalibrate, 1, MPI_INT, MPI_COMM_WORLD);
 
-  for (int i = 0; i < nb_mpi; i++)
-  {
-    if (mpi_recalibrate[i])
-    {
-      return 1;
-      break;
-    }
-  }
-	return 0;
+        for (int i = 0; i < nb_mpi; i++)
+        {
+                if (mpi_recalibrate[i])
+                {
+                        return 1;
+                        break;
+                }
+        }
+        return 0;
 }
 #endif
 
 static void compare_value_and_recalibrate(char * msg, unsigned val_file, unsigned val_detected)
 {
-		int recalibrate = 0;
-    if (val_file != val_detected)
-			recalibrate = 1;
+        int recalibrate = 0;
+        if (val_file != val_detected)
+                recalibrate = 1;
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 	//Send to each other to know if we had to recalibrate because someone cannot have the correct value in the config file
 	recalibrate = mpi_check_recalibrate(recalibrate);
 #endif
 
-		if (recalibrate)
-    {
+        if (recalibrate)
+        {
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-        /* Only the master prints the message */
-        if (_starpu_mpi_common_is_src_node())
+                /* Only the master prints the message */
+                if (_starpu_mpi_common_is_src_node())
 #endif
+                        _STARPU_DISP("Current configuration does not match the bus performance model (%s: (stored) %d != (current) %d), recalibrating...\n", msg, val_file, val_detected);
 
-        _STARPU_DISP("Current configuration does not match the bus performance model (%s: (stored) %d != (current) %d), recalibrating...\n", msg, val_file, val_detected);
-        _starpu_bus_force_sampling();
+                _starpu_bus_force_sampling();
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-        if (_starpu_mpi_common_is_src_node())
+                if (_starpu_mpi_common_is_src_node())
 #endif
-        _STARPU_DISP("... done\n");
-    }
+                        _STARPU_DISP("... done\n");
+        }
 }
 
 static void check_bus_config_file(void)
@@ -1775,66 +1775,66 @@ static void check_bus_config_file(void)
 	}
 	else
 	{
-        FILE *f;
-        int ret;
-        unsigned read_cuda = -1, read_opencl = -1, read_mic = -1, read_mpi_ms = -1;
-        unsigned read_cpus = -1;
-		int locked;
-
-        // Loading configuration from file
-        f = fopen(path, "r");
-        STARPU_ASSERT(f);
-		locked = _starpu_frdlock(f) == 0;
-        _starpu_drop_comments(f);
-        
-        ret = fscanf(f, "%u\t", &read_cpus);
-        STARPU_ASSERT(ret == 1);
-        _starpu_drop_comments(f);
-
-        ret = fscanf(f, "%u\t", &read_cuda);
-        STARPU_ASSERT(ret == 1);
-        _starpu_drop_comments(f);
-
-        ret = fscanf(f, "%u\t", &read_opencl);
-        STARPU_ASSERT(ret == 1);
-        _starpu_drop_comments(f);
-        
-        ret = fscanf(f, "%u\t", &read_mic);
-        if (ret == 0)
-            read_mic = 0;
-        _starpu_drop_comments(f);
-
-        ret = fscanf(f, "%u\t", &read_mpi_ms);
-        if (ret == 0)
-            read_mpi_ms = 0;
-        _starpu_drop_comments(f);
-
-		if (locked)
-			_starpu_frdunlock(f);
-        fclose(f);
-
-        // Loading current configuration
-        ncpus = _starpu_topology_get_nhwcpu(config);
+                FILE *f;
+                int ret;
+                unsigned read_cuda = -1, read_opencl = -1, read_mic = -1, read_mpi_ms = -1;
+                unsigned read_cpus = -1;
+                int locked;
+
+                // Loading configuration from file
+                f = fopen(path, "r");
+                STARPU_ASSERT(f);
+                locked = _starpu_frdlock(f) == 0;
+                _starpu_drop_comments(f);
+
+                ret = fscanf(f, "%u\t", &read_cpus);
+                STARPU_ASSERT(ret == 1);
+                _starpu_drop_comments(f);
+
+                ret = fscanf(f, "%u\t", &read_cuda);
+                STARPU_ASSERT(ret == 1);
+                _starpu_drop_comments(f);
+
+                ret = fscanf(f, "%u\t", &read_opencl);
+                STARPU_ASSERT(ret == 1);
+                _starpu_drop_comments(f);
+
+                ret = fscanf(f, "%u\t", &read_mic);
+                if (ret == 0)
+                        read_mic = 0;
+                _starpu_drop_comments(f);
+
+                ret = fscanf(f, "%u\t", &read_mpi_ms);
+                if (ret == 0)
+                        read_mpi_ms = 0;
+                _starpu_drop_comments(f);
+
+                if (locked)
+                        _starpu_frdunlock(f);
+                fclose(f);
+
+                // Loading current configuration
+                ncpus = _starpu_topology_get_nhwcpu(config);
 #ifdef STARPU_USE_CUDA
-		ncuda = _starpu_get_cuda_device_count();
+                ncuda = _starpu_get_cuda_device_count();
 #endif
 #ifdef STARPU_USE_OPENCL
-        nopencl = _starpu_opencl_get_device_count();
+                nopencl = _starpu_opencl_get_device_count();
 #endif
 #ifdef STARPU_USE_MIC
-        nmic = _starpu_mic_src_get_device_count();
+                nmic = _starpu_mic_src_get_device_count();
 #endif /* STARPU_USE_MIC */
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-        nmpi_ms = _starpu_mpi_src_get_device_count();
+                nmpi_ms = _starpu_mpi_src_get_device_count();
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 
-        // Checking if both configurations match
-        compare_value_and_recalibrate("CPUS", read_cpus, ncpus);
-        compare_value_and_recalibrate("CUDA", read_cuda, ncuda);
-        compare_value_and_recalibrate("OpenCL", read_opencl, nopencl);
-        compare_value_and_recalibrate("MIC", read_mic, nmic);
-        compare_value_and_recalibrate("MPI Master-Slave", read_mpi_ms, nmpi_ms);
-    }
+                // Checking if both configurations match
+                compare_value_and_recalibrate("CPUS", read_cpus, ncpus);
+                compare_value_and_recalibrate("CUDA", read_cuda, ncuda);
+                compare_value_and_recalibrate("OpenCL", read_opencl, nopencl);
+                compare_value_and_recalibrate("MIC", read_mic, nmic);
+                compare_value_and_recalibrate("MPI Master-Slave", read_mpi_ms, nmpi_ms);
+        }
 }
 
 static void write_bus_config_file_content(void)
@@ -1871,9 +1871,9 @@ static void generate_bus_config_file(void)
 		benchmark_all_gpu_devices();
     
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    /* Slaves don't write files */
-    if (!_starpu_mpi_common_is_src_node())
-        return;
+        /* Slaves don't write files */
+        if (!_starpu_mpi_common_is_src_node())
+                return;
 #endif
 
 	write_bus_config_file_content();
@@ -2606,9 +2606,9 @@ static void generate_bus_platform_file(void)
 		benchmark_all_gpu_devices();
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    /* Slaves don't write files */
-    if (!_starpu_mpi_common_is_src_node())
-        return;
+        /* Slaves don't write files */
+        if (!_starpu_mpi_common_is_src_node())
+                return;
 #endif
 
 	write_bus_platform_file_content(3);
@@ -2665,7 +2665,7 @@ void _starpu_load_bus_performance_files(void)
 	nopencl = _starpu_opencl_get_device_count();
 #endif
 #if defined(STARPU_USE_MPI_MASTER_SLAVE) || defined(STARPU_USE_SIMGRID)
-    nmpi_ms = _starpu_mpi_src_get_device_count();
+        nmpi_ms = _starpu_mpi_src_get_device_count();
 #endif
 #if defined(STARPU_USE_MIC) || defined(STARPU_USE_SIMGRID)
 	nmic = _starpu_mic_src_get_device_count();

+ 2 - 2
src/core/task.c

@@ -1158,7 +1158,7 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 				case STARPU_CUDA_RAM:      /* Fall through */
 				case STARPU_OPENCL_RAM:
 				case STARPU_MIC_RAM:
-                case STARPU_MPI_MS_RAM:
+                                case STARPU_MPI_MS_RAM:
 				case STARPU_SCC_RAM:
 					return 1;
 				default:
@@ -1176,7 +1176,7 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 				case STARPU_CUDA_RAM:
 				case STARPU_OPENCL_RAM:
 				case STARPU_MIC_RAM:
-                case STARPU_MPI_MS_RAM:
+                                case STARPU_MPI_MS_RAM:
 				case STARPU_SCC_RAM:
 					return 0;
 				default:

+ 114 - 114
src/core/topology.c

@@ -614,7 +614,7 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 	config->topology.nhwscc = _starpu_scc_src_get_device_count();
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE 
-    config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
+        config->topology.nhwmpi = _starpu_mpi_src_get_device_count();
 #endif
 
 	topology_is_initialized = 1;
@@ -908,62 +908,62 @@ COIPROCESS _starpu_mic_process[STARPU_MAXMICDEVS];
 #endif
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-static void
+        static void
 _starpu_init_mpi_config (struct _starpu_machine_config *config,
-			 struct starpu_conf *user_conf,
-			 unsigned mpi_idx)
+                struct starpu_conf *user_conf,
+                unsigned mpi_idx)
 {
-	struct _starpu_machine_topology *topology = &config->topology;
+        struct _starpu_machine_topology *topology = &config->topology;
 
-	topology->nhwmpicores[mpi_idx] = 0;
+        topology->nhwmpicores[mpi_idx] = 0;
 
-	_starpu_init_mpi_topology (config, mpi_idx);
+        _starpu_init_mpi_topology (config, mpi_idx);
 
-	int nmpicores;
-	nmpicores = starpu_get_env_number("STARPU_NMPIMSTHREADS");
+        int nmpicores;
+        nmpicores = starpu_get_env_number("STARPU_NMPIMSTHREADS");
 
-	if (nmpicores == -1)
-	{
-		/* Nothing was specified, so let's use the number of
-		 * detected mpi cores. ! */
-		nmpicores = topology->nhwmpicores[mpi_idx];
-	}
-	else
-	{
-		if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
-		{
-			/* The user requires more MPI cores than there is available */
-			fprintf(stderr,
-				"# Warning: %d MPI cores requested. Only %d available.\n",
-				nmpicores, topology->nhwmpicores[mpi_idx]);
-			nmpicores = topology->nhwmpicores[mpi_idx];
-		}
-	}
+        if (nmpicores == -1)
+        {
+                /* Nothing was specified, so let's use the number of
+                 * detected mpi cores. ! */
+                nmpicores = topology->nhwmpicores[mpi_idx];
+        }
+        else
+        {
+                if ((unsigned) nmpicores > topology->nhwmpicores[mpi_idx])
+                {
+                        /* The user requires more MPI cores than there is available */
+                        fprintf(stderr,
+                                        "# Warning: %d MPI cores requested. Only %d available.\n",
+                                        nmpicores, topology->nhwmpicores[mpi_idx]);
+                        nmpicores = topology->nhwmpicores[mpi_idx];
+                }
+        }
 
-	topology->nmpicores[mpi_idx] = nmpicores;
-	STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
-			  "topology->nmpicores[mpi_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
-			  mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
+        topology->nmpicores[mpi_idx] = nmpicores;
+        STARPU_ASSERT_MSG(topology->nmpicores[mpi_idx] + topology->nworkers <= STARPU_NMAXWORKERS,
+                        "topology->nmpicores[mpi_idx(%d)] (%d) + topology->nworkers (%d) <= STARPU_NMAXWORKERS (%d)",
+                        mpi_idx, topology->nmpicores[mpi_idx], topology->nworkers, STARPU_NMAXWORKERS);
 
-	mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
-	unsigned mpicore_id;
-	for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
-	{
-		int worker_idx = topology->nworkers + mpicore_id;
-		config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
-		config->workers[worker_idx].arch = STARPU_MPI_WORKER;
-        _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
-		config->workers[worker_idx].perf_arch.ndevices = 1;
-		config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MPI_WORKER;
-		config->workers[worker_idx].perf_arch.devices[0].devid = mpi_idx;
-		config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
-		config->workers[worker_idx].devid = mpi_idx;
-		config->workers[worker_idx].subworkerid = mpicore_id;
-		config->workers[worker_idx].worker_mask = STARPU_MPI_MS;
-		config->worker_mask |= STARPU_MPI_MS;
-	}
+        mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers];
+        unsigned mpicore_id;
+        for (mpicore_id = 0; mpicore_id < topology->nmpicores[mpi_idx]; mpicore_id++)
+        {
+                int worker_idx = topology->nworkers + mpicore_id;
+                config->workers[worker_idx].set = &mpi_worker_set[mpi_idx];
+                config->workers[worker_idx].arch = STARPU_MPI_WORKER;
+                _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device));
+                config->workers[worker_idx].perf_arch.ndevices = 1;
+                config->workers[worker_idx].perf_arch.devices[0].type = STARPU_MPI_WORKER;
+                config->workers[worker_idx].perf_arch.devices[0].devid = mpi_idx;
+                config->workers[worker_idx].perf_arch.devices[0].ncores = 1;
+                config->workers[worker_idx].devid = mpi_idx;
+                config->workers[worker_idx].subworkerid = mpicore_id;
+                config->workers[worker_idx].worker_mask = STARPU_MPI_MS;
+                config->worker_mask |= STARPU_MPI_MS;
+        }
 
-	topology->nworkers += topology->nmpicores[mpi_idx];
+        topology->nworkers += topology->nmpicores[mpi_idx];
 }  
 #endif
 
@@ -1008,59 +1008,59 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
         topology->nmicdevices = 0;
         unsigned i;
         for (i = 0; i < (unsigned) reqmicdevices; i++)
-            if (0 == _starpu_init_mic_node (config, i, &mic_handles[i], &_starpu_mic_process[i]))
-                topology->nmicdevices++;
+                if (0 == _starpu_init_mic_node (config, i, &mic_handles[i], &_starpu_mic_process[i]))
+                        topology->nmicdevices++;
 
 
         for (i = 0; i < topology->nmicdevices; i++)
-            _starpu_init_mic_config (config, user_conf, i);
+                _starpu_init_mic_config (config, user_conf, i);
     }
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
     {
-        /* Discover and initialize the number of MPI nodes through the mp
-         * infrastructure. */
-        unsigned nhwmpidevices = _starpu_mpi_src_get_device_count();
-
-        int reqmpidevices = starpu_get_env_number("STARPU_NMPI_MS");
-        if (reqmpidevices == -1 && user_conf)
-            reqmpidevices = user_conf->nmpi_ms;
-        if (reqmpidevices == -1)
-            /* Nothing was specified, so let's use the number of
-             * detected mpi devices. ! */
-            reqmpidevices = nhwmpidevices;
-
-        if (reqmpidevices != -1)
-        {
-            if ((unsigned) reqmpidevices > nhwmpidevices)
+            /* Discover and initialize the number of MPI nodes through the mp
+             * infrastructure. */
+            unsigned nhwmpidevices = _starpu_mpi_src_get_device_count();
+
+            int reqmpidevices = starpu_get_env_number("STARPU_NMPI_MS");
+            if (reqmpidevices == -1 && user_conf)
+                    reqmpidevices = user_conf->nmpi_ms;
+            if (reqmpidevices == -1)
+                    /* Nothing was specified, so let's use the number of
+                     * detected mpi devices. ! */
+                    reqmpidevices = nhwmpidevices;
+
+            if (reqmpidevices != -1)
             {
-                /* The user requires more MPI devices than there is available */
-                fprintf(stderr,
-                    "# Warning: %d MPI Master-Slave devices requested. Only %d available.\n",
-                    reqmpidevices, nhwmpidevices);
-                reqmpidevices = nhwmpidevices;
+                    if ((unsigned) reqmpidevices > nhwmpidevices)
+                    {
+                            /* The user requires more MPI devices than there is available */
+                            fprintf(stderr,
+                                            "# Warning: %d MPI Master-Slave devices requested. Only %d available.\n",
+                                            reqmpidevices, nhwmpidevices);
+                            reqmpidevices = nhwmpidevices;
+                    }
             }
-        }
 
-        topology->nmpidevices = reqmpidevices;
+            topology->nmpidevices = reqmpidevices;
 
-        /* if user don't want to use MPI slaves, we close the slave processes */
-        if (no_mp_config && topology->nmpidevices == 0)
-        {
-            _starpu_mpi_common_mp_deinit();
-            exit(0);
-        }
+            /* if user don't want to use MPI slaves, we close the slave processes */
+            if (no_mp_config && topology->nmpidevices == 0)
+            {
+                    _starpu_mpi_common_mp_deinit();
+                    exit(0);
+            }
 
-        if (!no_mp_config)
-        {
-            unsigned i;
-            for (i = 0; i < topology->nmpidevices; i++)
-                mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
+            if (!no_mp_config)
+            {
+                    unsigned i;
+                    for (i = 0; i < topology->nmpidevices; i++)
+                            mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
 
 
-            for (i = 0; i < topology->nmpidevices; i++)
-                _starpu_init_mpi_config (config, user_conf, i);
-        }
+                    for (i = 0; i < topology->nmpidevices; i++)
+                            _starpu_init_mpi_config (config, user_conf, i);
+            }
     }
 #endif
 }
@@ -1080,9 +1080,9 @@ _starpu_deinit_mic_node (unsigned mic_idx)
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 static void _starpu_deinit_mpi_node(int devid)
 {
-    _starpu_mp_common_send_command(mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);                          
+        _starpu_mp_common_send_command(mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);                          
 
-    _starpu_mp_common_node_destroy(mpi_ms_nodes[devid]);
+        _starpu_mp_common_node_destroy(mpi_ms_nodes[devid]);
 }
 #endif
 
@@ -1437,7 +1437,7 @@ _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_con
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
             for (j = 0; j < STARPU_MAXMPIDEVS; j++)
-                mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
+                    mpi_ms_busy_cpus += (topology->nmpicores[j] ? 1 : 0);
 #else
             mpi_ms_busy_cpus = 1; /* we launch one thread to control all slaves */
 #endif
@@ -1781,8 +1781,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				workerarg->bindid = _starpu_get_next_bindid(config, NULL, 0);
 				_starpu_memory_node_add_nworkers(memory_node);
 
-                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
-                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
 
 #ifdef STARPU_SIMGRID
 				starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[memory_node]);
@@ -1876,8 +1876,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				}
 				_starpu_memory_node_add_nworkers(memory_node);
 
-                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
-                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
 #ifdef STARPU_SIMGRID
 				starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[memory_node]);
 				starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
@@ -1918,8 +1918,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				}
 				_starpu_memory_node_add_nworkers(memory_node);
 
-                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
-                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
 #ifdef STARPU_SIMGRID
 				starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[memory_node]);
 				starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
@@ -1952,8 +1952,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				workerarg->bindid = mic_bindid[devid];
 				_starpu_memory_node_add_nworkers(memory_node);
 
-                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
-                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
 #ifdef STARPU_SIMGRID
 				starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[memory_node]);
 				starpu_pthread_queue_register(&workerarg->set->workers[0].wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
@@ -1971,8 +1971,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				memory_node = ram_memory_node;
 				_starpu_memory_node_add_nworkers(memory_node);
 
-                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
-                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
 #ifdef STARPU_SIMGRID
 				starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[memory_node]);
 				starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]);
@@ -1997,20 +1997,20 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					_starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 
 				}
-                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
-                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, STARPU_MAIN_RAM);
+                                _starpu_worker_drives_memory_node(workerarg->workerid, memory_node);
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-                /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */
-                unsigned findworker;
-                for (findworker = 0; findworker < worker; findworker++)
-                {
-                    struct _starpu_worker *findworkerarg = &config->workers[findworker];
-                    if (findworkerarg->arch == STARPU_MPI_WORKER)
-                    {
-                        _starpu_worker_drives_memory_node(workerarg->workerid, findworkerarg->memory_node);
-                        _starpu_worker_drives_memory_node(findworkerarg->workerid, memory_node);
-                    }
-                }
+                                /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */
+                                unsigned findworker;
+                                for (findworker = 0; findworker < worker; findworker++)
+                                {
+                                        struct _starpu_worker *findworkerarg = &config->workers[findworker];
+                                        if (findworkerarg->arch == STARPU_MPI_WORKER)
+                                        {
+                                                _starpu_worker_drives_memory_node(workerarg->workerid, findworkerarg->memory_node);
+                                                _starpu_worker_drives_memory_node(findworkerarg->workerid, memory_node);
+                                        }
+                                }
 #endif
                 
 				workerarg->bindid = mpi_bindid[devid];
@@ -2114,7 +2114,7 @@ _starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
 	config->opencl_nodeid = -1;
 	config->mic_nodeid = -1;
 	config->scc_nodeid = -1;
-    config->mpi_nodeid = -1;
+        config->mpi_nodeid = -1;
 	for (i = 0; i < starpu_worker_get_count(); i++)
 	{
 		switch (starpu_worker_get_type(i))

+ 56 - 56
src/core/workers.c

@@ -143,10 +143,10 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 				if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mic_funcs[impl] != NULL)
 					test_implementation = 1;
 				break;
-			case STARPU_MPI_WORKER:
-				if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mpi_ms_funcs[impl] != NULL)
-					test_implementation = 1;
-				break;
+                        case STARPU_MPI_WORKER:
+                                if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mpi_ms_funcs[impl] != NULL)
+                                        test_implementation = 1;
+                                break;
 			case STARPU_SCC_WORKER:
 				if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->scc_funcs[impl] != NULL)
 					test_implementation = 1;
@@ -538,8 +538,8 @@ static void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu
 	starpu_pthread_wait_init(&workerarg->wait);
 	starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]);
 #endif
-    workerarg->task_sending = NULL;
-    workerarg->nb_buffers_sent = 0;
+        workerarg->task_sending = NULL;
+        workerarg->nb_buffers_sent = 0;
 
 	workerarg->first_task = 0;
 	workerarg->ntasks = 0;
@@ -879,36 +879,36 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 	}
 
 #if defined(STARPU_USE_MPI_MASTER_SLAVE) && !defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
-    if (pconfig->topology.nmpidevices > 0)
-    {
-        struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
-        struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
-        STARPU_PTHREAD_CREATE_ON(
-                worker_zero->name,
-                &worker_set_zero->worker_thread,
-                NULL,
-                _starpu_mpi_src_worker,
-                &mpi_worker_set,
-                _starpu_simgrid_get_host_by_worker(worker_zero));
-
-        /* We use the first worker to know if everything are finished */
+        if (pconfig->topology.nmpidevices > 0)
+        {
+                struct _starpu_worker_set * worker_set_zero = &mpi_worker_set[0];
+                struct _starpu_worker * worker_zero = &worker_set_zero->workers[0];
+                STARPU_PTHREAD_CREATE_ON(
+                                worker_zero->name,
+                                &worker_set_zero->worker_thread,
+                                NULL,
+                                _starpu_mpi_src_worker,
+                                &mpi_worker_set,
+                                _starpu_simgrid_get_host_by_worker(worker_zero));
+
+                /* We use the first worker to know if everything are finished */
 #ifdef STARPU_USE_FXT
-        STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
-        while (!worker_zero->worker_is_running)
-            STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
-        STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
+                STARPU_PTHREAD_MUTEX_LOCK(&worker_zero->mutex);
+                while (!worker_zero->worker_is_running)
+                        STARPU_PTHREAD_COND_WAIT(&worker_zero->started_cond, &worker_zero->mutex);
+                STARPU_PTHREAD_MUTEX_UNLOCK(&worker_zero->mutex);
 #endif
 
-        STARPU_PTHREAD_MUTEX_LOCK(&worker_set_zero->mutex);
-        while (!worker_set_zero->set_is_initialized)
-            STARPU_PTHREAD_COND_WAIT(&worker_set_zero->ready_cond,
-                    &worker_set_zero->mutex);
-        STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set_zero->mutex);
+                STARPU_PTHREAD_MUTEX_LOCK(&worker_set_zero->mutex);
+                while (!worker_set_zero->set_is_initialized)
+                        STARPU_PTHREAD_COND_WAIT(&worker_set_zero->ready_cond,
+                                        &worker_set_zero->mutex);
+                STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set_zero->mutex);
 
-        worker_set_zero->started = 1;
-        worker_set_zero->worker_thread = mpi_worker_set[0].worker_thread;
+                worker_set_zero->started = 1;
+                worker_set_zero->worker_thread = mpi_worker_set[0].worker_thread;
 
-    }
+        }
 
 #endif
 
@@ -966,7 +966,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 				break;
 #endif
 			case STARPU_MIC_WORKER:
-            case STARPU_MPI_WORKER:
+                        case STARPU_MPI_WORKER:
 				/* Already waited above */
 				break;
 			case STARPU_SCC_WORKER:
@@ -1218,17 +1218,17 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 		setenv("STARPU_SINK", "STARPU_SCC", 1);
 #	endif
 
-#   ifdef STARPU_USE_MPI_MASTER_SLAVE
-	if (_starpu_mpi_common_mp_init() == -ENODEV)
-    {
-        initialized = UNINITIALIZED;
-        return -ENODEV;
-    }
+#       ifdef STARPU_USE_MPI_MASTER_SLAVE
+        if (_starpu_mpi_common_mp_init() == -ENODEV)
+        {
+                initialized = UNINITIALIZED;
+                return -ENODEV;
+        }
 
-    /* In MPI case we look at the rank to know if we are a sink */
-    if (!_starpu_mpi_common_is_src_node())
-        setenv("STARPU_SINK", "STARPU_MPI_MS", 1);
-#   endif
+        /* In MPI case we look at the rank to know if we are a sink */
+        if (!_starpu_mpi_common_is_src_node())
+                setenv("STARPU_SINK", "STARPU_MPI_MS", 1);
+#       endif
 
 	/* If StarPU was configured to use MP sinks, we have to control the
 	 * kind on node we are running on : host or sink ? */
@@ -1378,8 +1378,8 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
 			_starpu_scc_src_mp_deinit();
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-        if (_starpu_mpi_common_is_mp_initialized())
-            _starpu_mpi_common_mp_deinit();
+                if (_starpu_mpi_common_is_mp_initialized())
+                        _starpu_mpi_common_mp_deinit();
 #endif
 
 		initialized = UNINITIALIZED;
@@ -1777,16 +1777,16 @@ int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
 		case STARPU_SCC_WORKER:
 			return _starpu_config.topology.nsccdevices;
 
-        case STARPU_MPI_WORKER:
-            return _starpu_config.topology.nmpidevices;
+                case STARPU_MPI_WORKER:
+                        return _starpu_config.topology.nmpidevices;
 
-		case STARPU_ANY_WORKER:
-			return _starpu_config.topology.ncpus+
-			       _starpu_config.topology.ncudagpus+
-			       _starpu_config.topology.nopenclgpus+
-			       _starpu_config.topology.nmicdevices+
-			       _starpu_config.topology.nsccdevices+
-		           _starpu_config.topology.nmpidevices;
+                case STARPU_ANY_WORKER:
+                        return _starpu_config.topology.ncpus+
+                                _starpu_config.topology.ncudagpus+
+                                _starpu_config.topology.nopenclgpus+
+                                _starpu_config.topology.nmicdevices+
+                                _starpu_config.topology.nsccdevices+
+                                _starpu_config.topology.nmpidevices;
 		default:
 			return -EINVAL;
 	}
@@ -1834,7 +1834,7 @@ int starpu_asynchronous_mic_copy_disabled(void)
 
 int starpu_asynchronous_mpi_ms_copy_disabled(void)
 {
-    return _starpu_config.conf.disable_asynchronous_mpi_ms_copy;
+        return _starpu_config.conf.disable_asynchronous_mpi_ms_copy;
 }
 
 unsigned starpu_mic_worker_get_count(void)
@@ -1854,7 +1854,7 @@ unsigned starpu_scc_worker_get_count(void)
 
 unsigned starpu_mpi_ms_worker_get_count(void)
 {
-    return _starpu_config.topology.nmpidevices;
+        return _starpu_config.topology.nmpidevices;
 }
 
 /* When analyzing performance, it is useful to see what is the processing unit
@@ -2384,7 +2384,7 @@ char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 	if (type == STARPU_CUDA_WORKER) return "STARPU_CUDA_WORKER";
 	if (type == STARPU_OPENCL_WORKER) return "STARPU_OPENCL_WORKER";
 	if (type == STARPU_MIC_WORKER) return "STARPU_MIC_WORKER";
-    if (type == STARPU_MPI_WORKER) return "STARPU_MPI_WORKER";
+        if (type == STARPU_MPI_WORKER) return "STARPU_MPI_WORKER";
 	if (type == STARPU_SCC_WORKER) return "STARPU_SCC_WORKER";
 	if (type == STARPU_ANY_WORKER) return "STARPU_ANY_WORKER";
 	return "STARPU_unknown_WORKER";

+ 3 - 3
src/core/workers.h

@@ -115,8 +115,8 @@ LIST_TYPE(_starpu_worker,
 
 	unsigned spinning_backoff ; /* number of cycles to pause when spinning  */
 
-    unsigned nb_buffers_sent; /* number of piece of data already send to remote side */
-    struct starpu_task *task_sending; /* The buffers of this task are being sent */
+        unsigned nb_buffers_sent; /* number of piece of data already send to remote side */
+        struct starpu_task *task_sending; /* The buffers of this task are being sent */
 
 	/* indicate whether the workers shares tasks lists with other workers*/
 	/* in this case when removing him from a context it disapears instantly */
@@ -250,7 +250,7 @@ struct _starpu_machine_topology
 
 	/* Actual number of MPI workers used by StarPU. */
 	unsigned nmpidevices;
-    unsigned nhwmpidevices;
+        unsigned nhwmpidevices;
 
 	unsigned nhwmpicores[STARPU_MAXMPIDEVS]; // Each MPI node has its set of cores.
 	unsigned nmpicores[STARPU_MAXMPIDEVS];

+ 6 - 7
src/datawizard/coherency.c

@@ -147,7 +147,7 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 			if (starpu_node_get_kind(i) == STARPU_CPU_RAM || 
 			    starpu_node_get_kind(i) == STARPU_SCC_RAM ||
 			    starpu_node_get_kind(i) == STARPU_SCC_SHM ||
-                starpu_node_get_kind(i) == STARPU_MPI_MS_RAM)
+                            starpu_node_get_kind(i) == STARPU_MPI_MS_RAM)
 				i_ram = i;
 			if (starpu_node_get_kind(i) == STARPU_DISK_RAM)			
 				i_disk = i;
@@ -260,12 +260,11 @@ static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 		case STARPU_MIC_RAM:
 			/* TODO: We don't handle direct MIC-MIC transfers yet */
 			return 0;
-        case STARPU_MPI_MS_RAM:
-        {
-            /* Don't support MPI-MPI transfers yet */
-            enum starpu_node_kind kind = starpu_node_get_kind(handling_node);
-            return kind == STARPU_MPI_MS_RAM;
-        }
+                case STARPU_MPI_MS_RAM:
+                {
+                        enum starpu_node_kind kind = starpu_node_get_kind(handling_node);
+                        return kind == STARPU_MPI_MS_RAM;
+                }
 		case STARPU_SCC_RAM:
 			return 1;
 		default:

+ 111 - 111
src/datawizard/copy_driver.c

@@ -425,77 +425,77 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 	/* TODO: MIC -> MIC */
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_MPI_MS_RAM):
-        if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() ||
-                !(copy_methods->ram_to_mpi_ms_async || copy_methods->any_to_any))
-        {
-            /* this is not associated to a request so it's synchronous */
-            STARPU_ASSERT(copy_methods->ram_to_mpi_ms || copy_methods->any_to_any);
-            if (copy_methods->ram_to_mpi_ms)
-                copy_methods->ram_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
-            else
-                copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
-        }
-        else
-        {
-            req->async_channel.type = STARPU_MPI_MS_RAM;
-            if(copy_methods->ram_to_mpi_ms_async)
-                ret = copy_methods->ram_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-            else
-            {
-                STARPU_ASSERT(copy_methods->any_to_any);
-                ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-            }
-        }
-		break;
-
-	case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM,STARPU_CPU_RAM):
-        if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() ||
-                !(copy_methods->mpi_ms_to_ram_async || copy_methods->any_to_any))
-        {
-            /* this is not associated to a request so it's synchronous */
-            STARPU_ASSERT(copy_methods->mpi_ms_to_ram || copy_methods->any_to_any);
-            if (copy_methods->mpi_ms_to_ram)
-                copy_methods->mpi_ms_to_ram(src_interface, src_node, dst_interface, dst_node);
-            else
-                copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
-        }
-        else
-        {
-            req->async_channel.type = STARPU_MPI_MS_RAM;
-            if(copy_methods->mpi_ms_to_ram_async)
-                ret = copy_methods->mpi_ms_to_ram_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-            else
-            {
-                STARPU_ASSERT(copy_methods->any_to_any);
-                ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-            }
-        }
-		break;
-
-	case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM,STARPU_MPI_MS_RAM):
-        if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() ||
-                !(copy_methods->mpi_ms_to_mpi_ms_async || copy_methods->any_to_any))
-        {
-            /* this is not associated to a request so it's synchronous */
-            STARPU_ASSERT(copy_methods->mpi_ms_to_mpi_ms || copy_methods->any_to_any);
-            if (copy_methods->mpi_ms_to_mpi_ms)
-                copy_methods->mpi_ms_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
-            else
-                copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
-        }
-        else
-        {
-            req->async_channel.type = STARPU_MPI_MS_RAM;
-            if(copy_methods->mpi_ms_to_mpi_ms_async)
-                ret = copy_methods->mpi_ms_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-            else
-            {
-                STARPU_ASSERT(copy_methods->any_to_any);
-                ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
-            }
-        }
-		break;
+        case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_MPI_MS_RAM):
+                if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() ||
+                                !(copy_methods->ram_to_mpi_ms_async || copy_methods->any_to_any))
+                {
+                        /* this is not associated to a request so it's synchronous */
+                        STARPU_ASSERT(copy_methods->ram_to_mpi_ms || copy_methods->any_to_any);
+                        if (copy_methods->ram_to_mpi_ms)
+                                copy_methods->ram_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
+                        else
+                                copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
+                }
+                else
+                {
+                        req->async_channel.type = STARPU_MPI_MS_RAM;
+                        if(copy_methods->ram_to_mpi_ms_async)
+                                ret = copy_methods->ram_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+                        else
+                        {
+                                STARPU_ASSERT(copy_methods->any_to_any);
+                                ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+                        }
+                }
+                break;
+
+        case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM,STARPU_CPU_RAM):
+                if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() ||
+                                !(copy_methods->mpi_ms_to_ram_async || copy_methods->any_to_any))
+                {
+                        /* this is not associated to a request so it's synchronous */
+                        STARPU_ASSERT(copy_methods->mpi_ms_to_ram || copy_methods->any_to_any);
+                        if (copy_methods->mpi_ms_to_ram)
+                                copy_methods->mpi_ms_to_ram(src_interface, src_node, dst_interface, dst_node);
+                        else
+                                copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
+                }
+                else
+                {
+                        req->async_channel.type = STARPU_MPI_MS_RAM;
+                        if(copy_methods->mpi_ms_to_ram_async)
+                                ret = copy_methods->mpi_ms_to_ram_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+                        else
+                        {
+                                STARPU_ASSERT(copy_methods->any_to_any);
+                                ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+                        }
+                }
+                break;
+
+        case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM,STARPU_MPI_MS_RAM):
+                if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mpi_ms_copy_disabled() ||
+                                !(copy_methods->mpi_ms_to_mpi_ms_async || copy_methods->any_to_any))
+                {
+                        /* this is not associated to a request so it's synchronous */
+                        STARPU_ASSERT(copy_methods->mpi_ms_to_mpi_ms || copy_methods->any_to_any);
+                        if (copy_methods->mpi_ms_to_mpi_ms)
+                                copy_methods->mpi_ms_to_mpi_ms(src_interface, src_node, dst_interface, dst_node);
+                        else
+                                copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
+                }
+                else
+                {
+                        req->async_channel.type = STARPU_MPI_MS_RAM;
+                        if(copy_methods->mpi_ms_to_mpi_ms_async)
+                                ret = copy_methods->mpi_ms_to_mpi_ms_async(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+                        else
+                        {
+                                STARPU_ASSERT(copy_methods->any_to_any);
+                                ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
+                        }
+                }
+                break;
 #endif
 #ifdef STARPU_USE_SCC
 		/* SCC RAM associated to the master process is considered as
@@ -738,40 +738,40 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
 				size);
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM, STARPU_MPI_MS_RAM):
-        if (async_data)
-            return _starpu_mpi_copy_ram_to_mpi_async(
-                    (void*) (src + src_offset), src_node,
-                    (void*) (dst + dst_offset), dst_node,
-                    size, async_data);
-        else
-            return _starpu_mpi_copy_ram_to_mpi_sync(
-                    (void*) (src + src_offset), src_node,
-                    (void*) (dst + dst_offset), dst_node,
-                    size);
-    case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM, STARPU_CPU_RAM):
-        if (async_data)
-            return _starpu_mpi_copy_mpi_to_ram_async(
-                    (void*) (src + src_offset), src_node,
-                    (void*) (dst + dst_offset), dst_node,
-                    size, async_data);
-        else
-            return _starpu_mpi_copy_mpi_to_ram_sync(
-                    (void*) (src + src_offset), src_node,
-                    (void*) (dst + dst_offset), dst_node,
-                    size);
-
-    case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM, STARPU_MPI_MS_RAM):
-        if (async_data)
-            return _starpu_mpi_copy_sink_to_sink_async(
-                    (void*) (src + src_offset), src_node,
-                    (void*) (dst + dst_offset), dst_node,
-                    size, async_data);
-        else
-            return _starpu_mpi_copy_sink_to_sink_sync(
-                    (void*) (src + src_offset), src_node,
-                    (void*) (dst + dst_offset), dst_node,
-                    size);
+        case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM, STARPU_MPI_MS_RAM):
+                if (async_data)
+                        return _starpu_mpi_copy_ram_to_mpi_async(
+                                        (void*) (src + src_offset), src_node,
+                                        (void*) (dst + dst_offset), dst_node,
+                                        size, async_data);
+                else
+                        return _starpu_mpi_copy_ram_to_mpi_sync(
+                                        (void*) (src + src_offset), src_node,
+                                        (void*) (dst + dst_offset), dst_node,
+                                        size);
+        case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM, STARPU_CPU_RAM):
+                if (async_data)
+                        return _starpu_mpi_copy_mpi_to_ram_async(
+                                        (void*) (src + src_offset), src_node,
+                                        (void*) (dst + dst_offset), dst_node,
+                                        size, async_data);
+                else
+                        return _starpu_mpi_copy_mpi_to_ram_sync(
+                                        (void*) (src + src_offset), src_node,
+                                        (void*) (dst + dst_offset), dst_node,
+                                        size);
+
+        case _STARPU_MEMORY_NODE_TUPLE(STARPU_MPI_MS_RAM, STARPU_MPI_MS_RAM):
+                if (async_data)
+                        return _starpu_mpi_copy_sink_to_sink_async(
+                                        (void*) (src + src_offset), src_node,
+                                        (void*) (dst + dst_offset), dst_node,
+                                        size, async_data);
+                else
+                        return _starpu_mpi_copy_sink_to_sink_sync(
+                                        (void*) (src + src_offset), src_node,
+                                        (void*) (dst + dst_offset), dst_node,
+                                        size);
 #endif
 
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM, STARPU_DISK_RAM):
@@ -851,9 +851,9 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 		break;
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    case STARPU_MPI_MS_RAM:
-        _starpu_mpi_common_wait_event(async_channel);
-        break;
+        case STARPU_MPI_MS_RAM:
+                _starpu_mpi_common_wait_event(async_channel);
+                break;
 #endif
 	case STARPU_MAIN_RAM:
 		starpu_disk_wait_request(async_channel);
@@ -920,9 +920,9 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 		break;
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    case STARPU_MPI_MS_RAM:
-        success = _starpu_mpi_common_test_event(async_channel);
-        break;
+        case STARPU_MPI_MS_RAM:
+                success = _starpu_mpi_common_test_event(async_channel);
+                break;
 #endif
 	case STARPU_DISK_RAM:
 		success = starpu_disk_test_request(async_channel);

+ 13 - 13
src/datawizard/copy_driver.h

@@ -67,8 +67,8 @@ LIST_TYPE(_starpu_mpi_ms_event_request,
 
 struct _starpu_mpi_ms_async_event
 {
-    int is_sender;
-    struct _starpu_mpi_ms_event_request_list * requests;
+        int is_sender;
+        struct _starpu_mpi_ms_event_request_list * requests;
 };
 #endif
 
@@ -91,30 +91,30 @@ union _starpu_async_channel_event
 	};
 #endif
 #ifdef STARPU_USE_CUDA
-	cudaEvent_t cuda_event;
+        cudaEvent_t cuda_event;
 #endif
 #ifdef STARPU_USE_OPENCL
-    cl_event opencl_event;
+        cl_event opencl_event;
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    struct _starpu_mpi_ms_async_event mpi_ms_event;
+        struct _starpu_mpi_ms_async_event mpi_ms_event;
 #endif
 #ifdef STARPU_USE_MIC
-	struct _starpu_mic_async_event mic_event;
+        struct _starpu_mic_async_event mic_event;
 #endif
-	struct _starpu_disk_async_event disk_event;
+        struct _starpu_disk_async_event disk_event;
 };
 
 struct _starpu_async_channel
 {
 	union _starpu_async_channel_event event;
 	enum starpu_node_kind type;
-    /* Which node to polling when needing ACK msg */
-    struct _starpu_mp_node *polling_node_sender;
-    struct _starpu_mp_node *polling_node_receiver;
-    /* Used to know if the acknowlegdment msg is arrived from sinks */
-    volatile int starpu_mp_common_finished_sender; 
-    volatile int starpu_mp_common_finished_receiver; 
+        /* Which node to polling when needing ACK msg */
+        struct _starpu_mp_node *polling_node_sender;
+        struct _starpu_mp_node *polling_node_receiver;
+        /* Used to know if the acknowlegdment msg is arrived from sinks */
+        volatile int starpu_mp_common_finished_sender; 
+        volatile int starpu_mp_common_finished_receiver; 
 };
 
 void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid);

+ 5 - 5
src/datawizard/data_request.c

@@ -153,12 +153,12 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha
 	r->dst_replicate = dst_replicate;
 	r->mode = mode;
 	r->async_channel.type = STARPU_UNUSED;
-    r->async_channel.starpu_mp_common_finished_sender = 0;
-    r->async_channel.starpu_mp_common_finished_receiver = 0;
-    r->async_channel.polling_node_sender = NULL;
-    r->async_channel.polling_node_receiver = NULL;
+        r->async_channel.starpu_mp_common_finished_sender = 0;
+        r->async_channel.starpu_mp_common_finished_receiver = 0;
+        r->async_channel.polling_node_sender = NULL;
+        r->async_channel.polling_node_receiver = NULL;
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    r->async_channel.event.mpi_ms_event.requests = NULL;
+        r->async_channel.event.mpi_ms_event.requests = NULL;
 #endif
 	if (handling_node == -1)
 		handling_node = STARPU_MAIN_RAM;

+ 10 - 10
src/datawizard/datawizard.c

@@ -66,21 +66,21 @@ int ___starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsi
 
 int __starpu_datawizard_progress(unsigned may_alloc, unsigned push_requests)
 {
-    int current_worker_id = starpu_worker_get_id();
-    unsigned memnode;
+        int current_worker_id = starpu_worker_get_id();
+        unsigned memnode;
 
-    int ret = 0;
+        int ret = 0;
 
-    for (memnode = 0; memnode < STARPU_MAXNODES; memnode++)
-    {
-        if (_starpu_worker_drives_memory[current_worker_id][memnode] == 1)
-            ret |= ___starpu_datawizard_progress(memnode, may_alloc, push_requests);
-    }
+        for (memnode = 0; memnode < STARPU_MAXNODES; memnode++)
+        {
+                if (_starpu_worker_drives_memory[current_worker_id][memnode] == 1)
+                        ret |= ___starpu_datawizard_progress(memnode, may_alloc, push_requests);
+        }
 
-    return ret;
+        return ret;
 }
 
 void _starpu_datawizard_progress(unsigned may_alloc)
 {
-	__starpu_datawizard_progress(may_alloc, 1);
+        __starpu_datawizard_progress(may_alloc, 1);
 }

+ 5 - 5
src/drivers/driver_common/driver_common.c

@@ -489,11 +489,11 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 		{
 			tasks[i] = NULL;
 		}
-        /* don't push a task if we are already pushing one */
-        else if (workers[i].task_sending != NULL)
-        {
-            tasks[i] = NULL;
-        }
+                /* don't push a task if we are already pushing one */
+                else if (workers[i].task_sending != NULL)
+                {
+                        tasks[i] = NULL;
+                }
 		/*else try to pop a task*/
 		else
 		{

+ 68 - 68
src/drivers/mp_common/mp_common.c

@@ -162,7 +162,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 		node->dt_send = _starpu_mic_common_dt_send;
 		node->dt_recv = _starpu_mic_common_dt_recv;
 
-        node->dt_test = NULL; /* Not used now */
+                node->dt_test = NULL; /* Not used now */
 
 		node->get_kernel_from_job = NULL;
 		node->lookup = _starpu_mic_sink_lookup;
@@ -214,7 +214,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 		node->dt_send_to_device = _starpu_scc_sink_send_to_device;
 		node->dt_recv_from_device = _starpu_scc_sink_recv_from_device;
 
-        node->dt_test = NULL /* not used now */
+                node->dt_test = NULL /* not used now */
 
 		node->get_kernel_from_job = NULL;
 		node->lookup = _starpu_scc_sink_lookup;
@@ -228,68 +228,68 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 	case STARPU_NODE_MPI_SOURCE:
-    {
-    /*
-		node->nb_mp_sinks = 
-		node->devid = 
-    */
-        node->peer_id = (_starpu_mpi_common_get_src_node() <= peer_id ? peer_id+1 : peer_id);
-        node->mp_connection.mpi_remote_nodeid = node->peer_id;
-
-        node->init = _starpu_mpi_source_init;
-        node->launch_workers = NULL;
-        node->deinit = _starpu_mpi_source_deinit;
-   /*     node->report_error = */
-
-	 	node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready;
-		node->mp_send = _starpu_mpi_common_mp_send;
-		node->mp_recv = _starpu_mpi_common_mp_recv;
-		node->dt_send = _starpu_mpi_common_send;
-		node->dt_recv = _starpu_mpi_common_recv;
-        node->dt_send_to_device = _starpu_mpi_common_send_to_device;
-        node->dt_recv_from_device = _starpu_mpi_common_recv_from_device;
-
-		node->get_kernel_from_job = _starpu_mpi_ms_src_get_kernel_from_job;
-		node->lookup = NULL;
-		node->bind_thread = NULL;
-		node->execute = NULL;
-		node->allocate = NULL;
-		node->free = NULL;
-    }
-	break;
-
-	case STARPU_NODE_MPI_SINK:
-    {
-    /*
-		node->nb_mp_sinks = 
-		node->devid = 
-    */
-        node->mp_connection.mpi_remote_nodeid = _starpu_mpi_common_get_src_node();
-
-        node->init = _starpu_mpi_sink_init;
-        node->launch_workers = _starpu_mpi_sink_launch_workers;
-        node->deinit = _starpu_mpi_sink_deinit;
-    /*    node->report_error =  */
-
-    	node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready;
-        node->mp_send = _starpu_mpi_common_mp_send;
-		node->mp_recv = _starpu_mpi_common_mp_recv;
-		node->dt_send = _starpu_mpi_common_send;
-		node->dt_recv = _starpu_mpi_common_recv;
-        node->dt_send_to_device = _starpu_mpi_common_send_to_device;
-        node->dt_recv_from_device = _starpu_mpi_common_recv_from_device;
-
-        node->dt_test = _starpu_mpi_common_test_event;
-
-		node->get_kernel_from_job = NULL;
-		node->lookup = _starpu_mpi_sink_lookup;
-		node->bind_thread = _starpu_mpi_sink_bind_thread;
-		node->execute = _starpu_sink_common_execute;
-		node->allocate = _starpu_sink_common_allocate;
-		node->free = _starpu_sink_common_free;
-
-        
-    }
+        {
+                /*
+                   node->nb_mp_sinks = 
+                   node->devid = 
+                   */
+                node->peer_id = (_starpu_mpi_common_get_src_node() <= peer_id ? peer_id+1 : peer_id);
+                node->mp_connection.mpi_remote_nodeid = node->peer_id;
+
+                node->init = _starpu_mpi_source_init;
+                node->launch_workers = NULL;
+                node->deinit = _starpu_mpi_source_deinit;
+                /*     node->report_error = */
+
+                node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready;
+                node->mp_send = _starpu_mpi_common_mp_send;
+                node->mp_recv = _starpu_mpi_common_mp_recv;
+                node->dt_send = _starpu_mpi_common_send;
+                node->dt_recv = _starpu_mpi_common_recv;
+                node->dt_send_to_device = _starpu_mpi_common_send_to_device;
+                node->dt_recv_from_device = _starpu_mpi_common_recv_from_device;
+
+                node->get_kernel_from_job = _starpu_mpi_ms_src_get_kernel_from_job;
+                node->lookup = NULL;
+                node->bind_thread = NULL;
+                node->execute = NULL;
+                node->allocate = NULL;
+                node->free = NULL;
+        }
+        break;
+
+        case STARPU_NODE_MPI_SINK:
+        {
+                /*
+                   node->nb_mp_sinks = 
+                   node->devid = 
+                   */
+                node->mp_connection.mpi_remote_nodeid = _starpu_mpi_common_get_src_node();
+
+                node->init = _starpu_mpi_sink_init;
+                node->launch_workers = _starpu_mpi_sink_launch_workers;
+                node->deinit = _starpu_mpi_sink_deinit;
+                /*    node->report_error =  */
+
+                node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready;
+                node->mp_send = _starpu_mpi_common_mp_send;
+                node->mp_recv = _starpu_mpi_common_mp_recv;
+                node->dt_send = _starpu_mpi_common_send;
+                node->dt_recv = _starpu_mpi_common_recv;
+                node->dt_send_to_device = _starpu_mpi_common_send_to_device;
+                node->dt_recv_from_device = _starpu_mpi_common_recv_from_device;
+
+                node->dt_test = _starpu_mpi_common_test_event;
+
+                node->get_kernel_from_job = NULL;
+                node->lookup = _starpu_mpi_sink_lookup;
+                node->bind_thread = _starpu_mpi_sink_bind_thread;
+                node->execute = _starpu_sink_common_execute;
+                node->allocate = _starpu_sink_common_allocate;
+                node->free = _starpu_sink_common_free;
+
+
+        }
 		break;
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 
@@ -307,9 +307,9 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 	mp_message_list_init(&node->message_queue);
 	STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL);
 
-    STARPU_PTHREAD_MUTEX_INIT(&node->connection_mutex, NULL);
+        STARPU_PTHREAD_MUTEX_INIT(&node->connection_mutex, NULL);
 
-    _starpu_mp_event_list_init(&node->event_list);
+        _starpu_mp_event_list_init(&node->event_list);
 
 	/* If the node is a sink then we must initialize some field */
 	if(node->kind == STARPU_NODE_MIC_SINK || node->kind == STARPU_NODE_SCC_SINK || node->kind == STARPU_NODE_MPI_SINK)
@@ -370,7 +370,7 @@ void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
 {
 	STARPU_ASSERT_MSG(arg_size <= BUFFER_SIZE, "Too much data (%d) for the static MIC buffer (%d), increase BUFFER_SIZE perhaps?", arg_size, BUFFER_SIZE);
 
-    printf("SEND CMD : %d - arg_size %d by %lu \n", command, arg_size, pthread_self());
+        //printf("SEND CMD : %d - arg_size %d by %lu \n", command, arg_size, pthread_self());
 
 	/* MIC and MPI sizes are given through a int */
 	int command_size = sizeof(enum _starpu_mp_command);
@@ -406,7 +406,7 @@ enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_n
 	command = *((enum _starpu_mp_command *) node->buffer);
 	*arg_size = *((int *) ((uintptr_t)node->buffer + command_size));
 
-    printf("RECV command : %d - arg_size %d by %lu \n", command, *arg_size, pthread_self());
+        //printf("RECV command : %d - arg_size %d by %lu \n", command, *arg_size, pthread_self());
 
 	/* If there is no argument (ie. arg_size == 0),
 	 * let's return the command right now */

+ 72 - 72
src/drivers/mp_common/mp_common.h

@@ -53,14 +53,14 @@ enum _starpu_mp_command
 	STARPU_MP_COMMAND_ANSWER_ALLOCATE,
 	STARPU_MP_COMMAND_ERROR_ALLOCATE,
 	STARPU_MP_COMMAND_FREE,
-    /* Synchronous send */
+        /* Synchronous send */
 	STARPU_MP_COMMAND_RECV_FROM_HOST,
 	STARPU_MP_COMMAND_SEND_TO_HOST,
 	STARPU_MP_COMMAND_RECV_FROM_SINK,
 	STARPU_MP_COMMAND_SEND_TO_SINK,
-    /* Asynchronous send */
-	STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC,
-    STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC_COMPLETED,
+        /* Asynchronous send */
+        STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC,
+        STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC_COMPLETED,
 	STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC,
 	STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC_COMPLETED,
 	STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC,
@@ -109,7 +109,7 @@ struct _starpu_mp_transfer_command
 {
 	size_t size;
 	void *addr;
-    void *event;
+        void *event;
 };
 
 struct _starpu_mp_transfer_command_to_device
@@ -117,7 +117,7 @@ struct _starpu_mp_transfer_command_to_device
 	int devid;
 	size_t size;
 	void *addr;
-    void *event;
+        void *event;
 };
 
 LIST_TYPE(mp_barrier,
@@ -146,9 +146,9 @@ struct mp_task
 };
 
 LIST_TYPE(_starpu_mp_event,
-    struct _starpu_async_channel event;
-    void * remote_event;
-    enum _starpu_mp_command answer_cmd;
+                struct _starpu_async_channel event;
+                void * remote_event;
+                enum _starpu_mp_command answer_cmd;
 );
 
 
@@ -189,74 +189,74 @@ struct _starpu_mp_node
 	 * sink it controls */
 	union _starpu_mp_connection mp_connection;
 
-	/* Only MIC use this for now !!
-	 * Connection used for data transfers between the host and his sink. */
-	union _starpu_mp_connection host_sink_dt_connection;
+        /* Only MIC use this for now !!
+         * Connection used for data transfers between the host and his sink. */
+        union _starpu_mp_connection host_sink_dt_connection;
 
-    /* Mutex to protect the interleaving of communications when using one thread per node,
-     * for instance, when a thread transfers piece of data and an other wants to use
-     * a sink_to_sink communication */
-	starpu_pthread_mutex_t connection_mutex;
+        /* Mutex to protect the interleaving of communications when using one thread per node,
+         * for instance, when a thread transfers piece of data and an other wants to use
+         * a sink_to_sink communication */
+        starpu_pthread_mutex_t connection_mutex;
 
-	/* Only MIC use this for now !!
-	 * Only sink use this for now !!
-	 * Connection used for data transfer between devices.
-	 * A sink opens a connection with each other sink,
-	 * thus each sink can directly send data to each other.
-	 * For sink :
-	 *  - sink_sink_dt_connections[i] is the connection to the sink number i.
-	 *  - sink_sink_dt_connections[j] is not initialized for the sink number j. */
-	union _starpu_mp_connection *sink_sink_dt_connections;
-
-    /* This list contains events
-     * about asynchronous request
-     */
-    struct _starpu_mp_event_list event_list;
-
-	/* */
-	starpu_pthread_barrier_t init_completed_barrier; 
-	
-	/* table to store pointer of the thread workers*/
-	void* thread_table;
+        /* Only MIC use this for now !!
+         * Only sink use this for now !!
+         * Connection used for data transfer between devices.
+         * A sink opens a connection with each other sink,
+         * thus each sink can directly send data to each other.
+         * For sink :
+         *  - sink_sink_dt_connections[i] is the connection to the sink number i.
+         *  - sink_sink_dt_connections[j] is not initialized for the sink number j. */
+        union _starpu_mp_connection *sink_sink_dt_connections;
+
+        /* This list contains events
+         * about asynchronous request
+         */
+        struct _starpu_mp_event_list event_list;
+
+        /* */
+        starpu_pthread_barrier_t init_completed_barrier; 
+
+        /* table to store pointer of the thread workers*/
+        void* thread_table;
 
         /*list where threads add messages to send to the source node */
         struct mp_message_list message_queue;
-	starpu_pthread_mutex_t message_queue_mutex;
-
-	/*list of barrier for combined worker*/
-	struct mp_barrier_list barrier_list;
-	starpu_pthread_mutex_t barrier_mutex;
-
-	/*table where worker comme pick task*/
-	struct mp_task ** run_table;
-	sem_t * sem_run_table;
-
-	/* Node general functions */
-	void (*init)            (struct _starpu_mp_node *node);
-	void (*launch_workers)  (struct _starpu_mp_node *node);
-	void (*deinit)          (struct _starpu_mp_node *node);
-	void (*report_error)    (const char *, const char *, const int, const int);
-
-	/* Message passing */
-	int (*mp_recv_is_ready) (const struct _starpu_mp_node *);
-	void (*mp_send)         (const struct _starpu_mp_node *, void *, int);
-	void (*mp_recv)         (const struct _starpu_mp_node *, void *, int);
-
-	/* Data transfers */
-	void (*dt_send)             (const struct _starpu_mp_node *, void *, int, void *);
-	void (*dt_recv)             (const struct _starpu_mp_node *, void *, int, void *);
-	void (*dt_send_to_device)   (const struct _starpu_mp_node *, int, void *, int, void *);
-	void (*dt_recv_from_device) (const struct _starpu_mp_node *, int, void *, int, void *);
-
-    /* Test async transfers */
-    int (*dt_test) (struct _starpu_async_channel *);
-
-	void (*(*get_kernel_from_job)   (const struct _starpu_mp_node *,struct _starpu_job *))(void);
-	void (*(*lookup)                (const struct _starpu_mp_node *, char* ))(void);
-	void (*bind_thread)             (const struct _starpu_mp_node *, int,int *,int);
-	void (*execute)                 (struct _starpu_mp_node *, void *, int);
-	void (*allocate)                (const struct _starpu_mp_node *, void *, int);
-	void (*free)                    (const struct _starpu_mp_node *, void *, int);
+        starpu_pthread_mutex_t message_queue_mutex;
+
+        /*list of barrier for combined worker*/
+        struct mp_barrier_list barrier_list;
+        starpu_pthread_mutex_t barrier_mutex;
+
+        /*table where worker comme pick task*/
+        struct mp_task ** run_table;
+        sem_t * sem_run_table;
+
+        /* Node general functions */
+        void (*init)            (struct _starpu_mp_node *node);
+        void (*launch_workers)  (struct _starpu_mp_node *node);
+        void (*deinit)          (struct _starpu_mp_node *node);
+        void (*report_error)    (const char *, const char *, const int, const int);
+
+        /* Message passing */
+        int (*mp_recv_is_ready) (const struct _starpu_mp_node *);
+        void (*mp_send)         (const struct _starpu_mp_node *, void *, int);
+        void (*mp_recv)         (const struct _starpu_mp_node *, void *, int);
+
+        /* Data transfers */
+        void (*dt_send)             (const struct _starpu_mp_node *, void *, int, void *);
+        void (*dt_recv)             (const struct _starpu_mp_node *, void *, int, void *);
+        void (*dt_send_to_device)   (const struct _starpu_mp_node *, int, void *, int, void *);
+        void (*dt_recv_from_device) (const struct _starpu_mp_node *, int, void *, int, void *);
+
+        /* Test async transfers */
+        int (*dt_test) (struct _starpu_async_channel *);
+
+        void (*(*get_kernel_from_job)   (const struct _starpu_mp_node *,struct _starpu_job *))(void);
+        void (*(*lookup)                (const struct _starpu_mp_node *, char* ))(void);
+        void (*bind_thread)             (const struct _starpu_mp_node *, int,int *,int);
+        void (*execute)                 (struct _starpu_mp_node *, void *, int);
+        void (*allocate)                (const struct _starpu_mp_node *, void *, int);
+        void (*free)                    (const struct _starpu_mp_node *, void *, int);
 };
 
 struct _starpu_mp_node * _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, int peer_devid) STARPU_ATTRIBUTE_MALLOC;

+ 100 - 101
src/drivers/mp_common/sink_common.c

@@ -113,9 +113,9 @@ static void _starpu_sink_common_copy_from_host_sync(const struct _starpu_mp_node
 {
 	STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command));
 
-    struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
+        struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
 
-    mp_node->dt_recv(mp_node, cmd->addr, cmd->size, NULL);
+        mp_node->dt_recv(mp_node, cmd->addr, cmd->size, NULL);
 }
 
 
@@ -124,25 +124,25 @@ static void _starpu_sink_common_copy_from_host_async(struct _starpu_mp_node *mp_
 {
 	STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command));
 
-    struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
-
-    /* For asynchronous transfers, we store events to test them later when they are finished */
-    struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
-    /* Save the command to send */
-    sink_event->answer_cmd = STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC_COMPLETED;
-    sink_event->remote_event = cmd->event;
-
-    /* Set the sender (host) ready because we don't want to wait its ack */
-    struct _starpu_async_channel * async_channel = &sink_event->event;
-    async_channel->type = STARPU_UNUSED;
-    async_channel->starpu_mp_common_finished_sender = -1;
-    async_channel->starpu_mp_common_finished_receiver = 0;
-    async_channel->polling_node_receiver = NULL;
-    async_channel->polling_node_sender = NULL;
-
-    mp_node->dt_recv(mp_node, cmd->addr, cmd->size, &sink_event->event);
-    /* Push event on the list */
-    _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
+        struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
+
+        /* For asynchronous transfers, we store events to test them later when they are finished */
+        struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
+        /* Save the command to send */
+        sink_event->answer_cmd = STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC_COMPLETED;
+        sink_event->remote_event = cmd->event;
+
+        /* Set the sender (host) ready because we don't want to wait its ack */
+        struct _starpu_async_channel * async_channel = &sink_event->event;
+        async_channel->type = STARPU_UNUSED;
+        async_channel->starpu_mp_common_finished_sender = -1;
+        async_channel->starpu_mp_common_finished_receiver = 0;
+        async_channel->polling_node_receiver = NULL;
+        async_channel->polling_node_sender = NULL;
+
+        mp_node->dt_recv(mp_node, cmd->addr, cmd->size, &sink_event->event);
+        /* Push event on the list */
+        _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
 }
 
 
@@ -153,13 +153,13 @@ static void _starpu_sink_common_copy_to_host_sync(const struct _starpu_mp_node *
 
 	struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
 
-    /* Save values before sending command to prevent the overwriting */
-    size_t size = cmd->size;
-    void * addr = cmd->addr;
+        /* Save values before sending command to prevent the overwriting */
+        size_t size = cmd->size;
+        void * addr = cmd->addr;
 
-    _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST, NULL, 0);
+        _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST, NULL, 0);
 
-    mp_node->dt_send(mp_node, addr, size, NULL);
+        mp_node->dt_send(mp_node, addr, size, NULL);
 }
 
 
@@ -170,25 +170,25 @@ static void _starpu_sink_common_copy_to_host_async(struct _starpu_mp_node *mp_no
 
 	struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
 
-    /* For asynchronous transfers, we need to say dt_send that we are in async mode 
-     * but we don't push event on list because we don't need to know if it's finished
-     */
-    struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
-    /* Save the command to send */
-    sink_event->answer_cmd = STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC_COMPLETED;
-    sink_event->remote_event = cmd->event;
-    
-    /* Set the receiver (host) ready because we don't want to wait its ack */
-    struct _starpu_async_channel * async_channel = &sink_event->event;
-    async_channel->type = STARPU_UNUSED;
-    async_channel->starpu_mp_common_finished_sender = 0;
-    async_channel->starpu_mp_common_finished_receiver = -1;
-    async_channel->polling_node_receiver = NULL;
-    async_channel->polling_node_sender = NULL;
-
-    mp_node->dt_send(mp_node, cmd->addr, cmd->size, &sink_event->event);
-    /* Push event on the list */
-    _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
+        /* For asynchronous transfers, we need to say dt_send that we are in async mode 
+         * but we don't push event on list because we don't need to know if it's finished
+         */
+        struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
+        /* Save the command to send */
+        sink_event->answer_cmd = STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC_COMPLETED;
+        sink_event->remote_event = cmd->event;
+
+        /* Set the receiver (host) ready because we don't want to wait its ack */
+        struct _starpu_async_channel * async_channel = &sink_event->event;
+        async_channel->type = STARPU_UNUSED;
+        async_channel->starpu_mp_common_finished_sender = 0;
+        async_channel->starpu_mp_common_finished_receiver = -1;
+        async_channel->polling_node_receiver = NULL;
+        async_channel->polling_node_sender = NULL;
+
+        mp_node->dt_send(mp_node, cmd->addr, cmd->size, &sink_event->event);
+        /* Push event on the list */
+        _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
 }
 
 
@@ -199,8 +199,8 @@ static void _starpu_sink_common_copy_from_sink_sync(const struct _starpu_mp_node
 
 	struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
 
-    mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL);
-    _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_TRANSFER_COMPLETE, NULL, 0);
+        mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL);
+        _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_TRANSFER_COMPLETE, NULL, 0);
 }
 
 
@@ -211,24 +211,24 @@ static void _starpu_sink_common_copy_from_sink_async(struct _starpu_mp_node *mp_
 
 	struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
 
-    /* For asynchronous transfers, we store events to test them later when they are finished
-     */
-    struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
-    /* Save the command to send */
-    sink_event->answer_cmd = STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC_COMPLETED;
-    sink_event->remote_event = cmd->event;
-
-    /* Set the sender ready because we don't want to wait its ack */
-    struct _starpu_async_channel * async_channel = &sink_event->event;
-    async_channel->type = STARPU_UNUSED;
-    async_channel->starpu_mp_common_finished_sender = -1;
-    async_channel->starpu_mp_common_finished_receiver = 0;
-    async_channel->polling_node_receiver = NULL;
-    async_channel->polling_node_sender = NULL;
-
-    mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event);
-    /* Push event on the list */
-    _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
+        /* For asynchronous transfers, we store events to test them later when they are finished
+        */
+        struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
+        /* Save the command to send */
+        sink_event->answer_cmd = STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC_COMPLETED;
+        sink_event->remote_event = cmd->event;
+
+        /* Set the sender ready because we don't want to wait its ack */
+        struct _starpu_async_channel * async_channel = &sink_event->event;
+        async_channel->type = STARPU_UNUSED;
+        async_channel->starpu_mp_common_finished_sender = -1;
+        async_channel->starpu_mp_common_finished_receiver = 0;
+        async_channel->polling_node_receiver = NULL;
+        async_channel->polling_node_sender = NULL;
+
+        mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event);
+        /* Push event on the list */
+        _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
 }
 
 
@@ -239,7 +239,7 @@ static void _starpu_sink_common_copy_to_sink_sync(const struct _starpu_mp_node *
 
 	struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
 
-    mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL);
+        mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL);
 }
 
 
@@ -250,26 +250,26 @@ static void _starpu_sink_common_copy_to_sink_async(struct _starpu_mp_node *mp_no
 
 	struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
 
-    /* For asynchronous transfers, we need to say dt_send that we are in async mode 
-     * but we don't push event on list because we don't need to know if it's finished
-     */
-    struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
-    /* Save the command to send */
-    sink_event->answer_cmd = STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC_COMPLETED;
-    sink_event->remote_event = cmd->event;
-
-    /* Set the receiver ready because we don't want to wait its ack */
-    struct _starpu_async_channel * async_channel = &sink_event->event;
-    async_channel->type = STARPU_UNUSED;
-    async_channel->starpu_mp_common_finished_sender = 0;
-    async_channel->starpu_mp_common_finished_receiver = -1;
-    async_channel->polling_node_receiver = NULL;
-    async_channel->polling_node_sender = NULL;
-
-    mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event);
-
-    /* Push event on the list */
-    _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
+        /* For asynchronous transfers, we need to say dt_send that we are in async mode 
+         * but we don't push event on list because we don't need to know if it's finished
+         */
+        struct _starpu_mp_event * sink_event = _starpu_mp_event_new();
+        /* Save the command to send */
+        sink_event->answer_cmd = STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC_COMPLETED;
+        sink_event->remote_event = cmd->event;
+
+        /* Set the receiver ready because we don't want to wait its ack */
+        struct _starpu_async_channel * async_channel = &sink_event->event;
+        async_channel->type = STARPU_UNUSED;
+        async_channel->starpu_mp_common_finished_sender = 0;
+        async_channel->starpu_mp_common_finished_receiver = -1;
+        async_channel->polling_node_receiver = NULL;
+        async_channel->polling_node_sender = NULL;
+
+        mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event);
+
+        /* Push event on the list */
+        _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event);
 }
 
 
@@ -404,21 +404,21 @@ void _starpu_sink_common_worker(void)
 					_starpu_sink_common_copy_to_sink_sync(node, arg, arg_size);
 					break;
 
-                case STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC:
-					_starpu_sink_common_copy_from_host_async(node, arg, arg_size);
-                    break;
-                    
-				case STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC:
-					_starpu_sink_common_copy_to_host_async(node, arg, arg_size);
-                    break;
+                                case STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC:
+                                        _starpu_sink_common_copy_from_host_async(node, arg, arg_size);
+                                        break;
 
-				case STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC:
-					_starpu_sink_common_copy_from_sink_async(node, arg, arg_size);
-                    break;
+                                case STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC:
+                                        _starpu_sink_common_copy_to_host_async(node, arg, arg_size);
+                                        break;
 
-                case STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC:
-					_starpu_sink_common_copy_to_sink_async(node, arg, arg_size);
-					break;
+                                case STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC:
+                                        _starpu_sink_common_copy_from_sink_async(node, arg, arg_size);
+                                        break;
+
+                                case STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC:
+                                        _starpu_sink_common_copy_to_sink_async(node, arg, arg_size);
+                                        break;
 
 				case STARPU_MP_COMMAND_SYNC_WORKERS:
 					_starpu_sink_common_recv_workers(node, arg, arg_size);
@@ -446,7 +446,6 @@ void _starpu_sink_common_worker(void)
 			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
 		}
 
-        //XXX: Need mutex here ?
         if(!_starpu_mp_event_list_empty(&node->event_list))
         {
             struct _starpu_mp_event * sink_event = _starpu_mp_event_list_pop_front(&node->event_list);
@@ -472,7 +471,7 @@ void _starpu_sink_common_worker(void)
 	_starpu_mp_common_node_destroy(node);
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-    _starpu_mpi_common_mp_deinit();
+        _starpu_mpi_common_mp_deinit();
 #endif
 
 	exit(0);

File diff suppressed because it is too large
+ 373 - 374
src/drivers/mp_common/source_common.c


+ 356 - 356
src/drivers/mpi/driver_mpi_common.c

@@ -32,385 +32,385 @@ static int src_node_id;
 
 static void _starpu_mpi_set_src_node_id()
 {
-	int node_id = starpu_get_env_number("STARPU_MPI_MASTER_NODE");
+        int node_id = starpu_get_env_number("STARPU_MPI_MASTER_NODE");
 
-	if (node_id != -1)
-	{
-        int nb_proc, id_proc;
-        MPI_Comm_size(MPI_COMM_WORLD, &nb_proc);
-        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+        if (node_id != -1)
+        {
+                int nb_proc, id_proc;
+                MPI_Comm_size(MPI_COMM_WORLD, &nb_proc);
+                MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-		if (node_id < nb_proc)
-		{
-			src_node_id = node_id;
-			return;
-		}
-		else if (id_proc == DRIVER_MPI_MASTER_NODE_DEFAULT)
-		{
-			/* Only one node prints the error message. */
-			_STARPU_DISP("The node you specify to be the master is "
-					"greater than the total number of nodes.\n"
-					"Taking node %d by default...\n", DRIVER_MPI_MASTER_NODE_DEFAULT);
-		}
-	}
-
-	/* Node by default. */
-	src_node_id = DRIVER_MPI_MASTER_NODE_DEFAULT;
+                if (node_id < nb_proc)
+                {
+                        src_node_id = node_id;
+                        return;
+                }
+                else if (id_proc == DRIVER_MPI_MASTER_NODE_DEFAULT)
+                {
+                        /* Only one node prints the error message. */
+                        _STARPU_DISP("The node you specify to be the master is "
+                                        "greater than the total number of nodes.\n"
+                                        "Taking node %d by default...\n", DRIVER_MPI_MASTER_NODE_DEFAULT);
+                }
+        }
+
+        /* Node by default. */
+        src_node_id = DRIVER_MPI_MASTER_NODE_DEFAULT;
 }
 
 int _starpu_mpi_common_mp_init()
 {
-    //Here we supposed the programmer called two times starpu_init.
-    if (mpi_initialized)
-        return -ENODEV;
+        //Here we supposed the programmer called two times starpu_init.
+        if (mpi_initialized)
+                return -ENODEV;
 
-    mpi_initialized = 1;
+        mpi_initialized = 1;
 
-    if (MPI_Initialized(&extern_initialized) != MPI_SUCCESS)
-        STARPU_ABORT_MSG("Cannot check if MPI is initialized or not !");
+        if (MPI_Initialized(&extern_initialized) != MPI_SUCCESS)
+                STARPU_ABORT_MSG("Cannot check if MPI is initialized or not !");
 
-    //Here MPI_Init or MPI_Init_thread is already called
-    if (!extern_initialized)
-    {
+        //Here MPI_Init or MPI_Init_thread is already called
+        if (!extern_initialized)
+        {
 
 #if defined(STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD)
-        int required = MPI_THREAD_MULTIPLE;
+                int required = MPI_THREAD_MULTIPLE;
 #else
-        int required = MPI_THREAD_FUNNELED;
+                int required = MPI_THREAD_FUNNELED;
 #endif
 
-            int thread_support;
-            STARPU_ASSERT(MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) == MPI_SUCCESS);
+                int thread_support;
+                STARPU_ASSERT(MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) == MPI_SUCCESS);
 
-            if (thread_support != required)
-            {
-                if (required == MPI_THREAD_MULTIPLE)
-                    _STARPU_DISP("MPI doesn't support MPI_THREAD_MULTIPLE option. MPI Master-Slave can have problems if multiple slaves are launched. \n");
-                if (required == MPI_THREAD_FUNNELED)
-                    _STARPU_DISP("MPI doesn't support MPI_THREAD_FUNNELED option. Many errors can occur. \n");
-            }
+                if (thread_support != required)
+                {
+                        if (required == MPI_THREAD_MULTIPLE)
+                                _STARPU_DISP("MPI doesn't support MPI_THREAD_MULTIPLE option. MPI Master-Slave can have problems if multiple slaves are launched. \n");
+                        if (required == MPI_THREAD_FUNNELED)
+                                _STARPU_DISP("MPI doesn't support MPI_THREAD_FUNNELED option. Many errors can occur. \n");
+                }
         }
-        
+
         /* Find which node is the master */
         _starpu_mpi_set_src_node_id();
 
         return 1;
-    }
+}
 
 void _starpu_mpi_common_mp_deinit()
 {
-    if (!extern_initialized)
-        MPI_Finalize();    
+        if (!extern_initialized)
+                MPI_Finalize();    
 }
 
 int _starpu_mpi_common_is_src_node()
 {   
-    int id_proc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
-    return id_proc == src_node_id;
+        int id_proc;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+        return id_proc == src_node_id;
 } 
 
 int _starpu_mpi_common_get_src_node()
 {
-    return src_node_id;
+        return src_node_id;
 }
 
 int _starpu_mpi_common_is_mp_initialized()
 {
-    return mpi_initialized;
+        return mpi_initialized;
 }
 
 /* common parts to initialize a source or a sink node */
 void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node)
 {
-    struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
+        struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
 
-    node->nb_cores = topology->nhwcpus;
+        node->nb_cores = topology->nhwcpus;
 }
 
 int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node)
 {
-    int res, source;
-    int flag = 0;
-    int id_proc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
-
-    if (id_proc == src_node_id)
-    {
-        /* Source has mp_node defined */
-        source = mp_node->mp_connection.mpi_remote_nodeid;
-    }
-    else
-    {
-        /* Sink can have sink to sink message */
-        source = MPI_ANY_SOURCE;
-    }
-
-    res = MPI_Iprobe(source, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
-    STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot test if we received a message !");
-
-    return flag;
+        int res, source;
+        int flag = 0;
+        int id_proc;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+
+        if (id_proc == src_node_id)
+        {
+                /* Source has mp_node defined */
+                source = mp_node->mp_connection.mpi_remote_nodeid;
+        }
+        else
+        {
+                /* Sink can have sink to sink message */
+                source = MPI_ANY_SOURCE;
+        }
+
+        res = MPI_Iprobe(source, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
+        STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot test if we received a message !");
+
+        return flag;
 }
 
 /* SEND to source node */
 void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event)
 {
-    int res;
-    int id_proc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+        int res;
+        int id_proc;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-    printf("envoi %d B to %d\n", len, node->mp_connection.mpi_remote_nodeid);
+        //printf("envoi %d B to %d\n", len, node->mp_connection.mpi_remote_nodeid);
 
-    if (event)
-    {
-        /* Asynchronous send */
-        struct _starpu_async_channel * channel = event;
-        channel->event.mpi_ms_event.is_sender = 1;
+        if (event)
+        {
+                /* Asynchronous send */
+                struct _starpu_async_channel * channel = event;
+                channel->event.mpi_ms_event.is_sender = 1;
 
-        /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
-        if (channel->type == STARPU_UNUSED)
-            channel->event.mpi_ms_event.requests = NULL;
+                /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
+                if (channel->type == STARPU_UNUSED)
+                        channel->event.mpi_ms_event.requests = NULL;
 
-        /* Initialize the list */
-        if (channel->event.mpi_ms_event.requests == NULL)
-        {
-            channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
-            _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
-        }
+                /* Initialize the list */
+                if (channel->event.mpi_ms_event.requests == NULL)
+                {
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
+                }
 
-        struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
+                struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
 
-        res = MPI_Isend(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
+                res = MPI_Isend(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
 
-        channel->starpu_mp_common_finished_receiver++;
-        channel->starpu_mp_common_finished_sender++;
+                channel->starpu_mp_common_finished_receiver++;
+                channel->starpu_mp_common_finished_sender++;
 
-        _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-    } 
-    else
-    {
-        /* Synchronous send */
-        res = MPI_Send(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, SYNC_TAG, MPI_COMM_WORLD);
-    }
-    STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
+                _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
+        } 
+        else
+        {
+                /* Synchronous send */
+                res = MPI_Send(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, SYNC_TAG, MPI_COMM_WORLD);
+        }
+        STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
 }
 
 void _starpu_mpi_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len)
 {
-    _starpu_mpi_common_send(node, msg, len, NULL);
+        _starpu_mpi_common_send(node, msg, len, NULL);
 }
 
 
 /* RECV to source node */
 void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event)
 {
-    int res;
-    int id_proc;
-    MPI_Status s;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+        int res;
+        int id_proc;
+        MPI_Status s;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-    printf("recv %d B from %d in %p\n", len, node->mp_connection.mpi_remote_nodeid, msg);
+        //printf("recv %d B from %d in %p\n", len, node->mp_connection.mpi_remote_nodeid, msg);
 
-    if (event)
-    {
-        /* Asynchronous recv */
-        struct _starpu_async_channel * channel = event;
-        channel->event.mpi_ms_event.is_sender = 0;
+        if (event)
+        {
+                /* Asynchronous recv */
+                struct _starpu_async_channel * channel = event;
+                channel->event.mpi_ms_event.is_sender = 0;
 
-        /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
-        if (channel->type == STARPU_UNUSED)
-            channel->event.mpi_ms_event.requests = NULL;
+                /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
+                if (channel->type == STARPU_UNUSED)
+                        channel->event.mpi_ms_event.requests = NULL;
 
-        /* Initialize the list */
-        if (channel->event.mpi_ms_event.requests == NULL)
-        {
-            channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
-            _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
-        }
+                /* Initialize the list */
+                if (channel->event.mpi_ms_event.requests == NULL)
+                {
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
+                }
 
-        struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
+                struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
 
-        res = MPI_Irecv(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
+                res = MPI_Irecv(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
 
-        channel->starpu_mp_common_finished_receiver++;
-        channel->starpu_mp_common_finished_sender++;
+                channel->starpu_mp_common_finished_receiver++;
+                channel->starpu_mp_common_finished_sender++;
 
-        _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-    } 
-    else
-    {
-        /* Synchronous recv */
-        res = MPI_Recv(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, SYNC_TAG, MPI_COMM_WORLD, &s);
-        int num_expected;
-        MPI_Get_count(&s, MPI_BYTE, &num_expected);
+                _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
+        } 
+        else
+        {
+                /* Synchronous recv */
+                res = MPI_Recv(msg, len, MPI_BYTE, node->mp_connection.mpi_remote_nodeid, SYNC_TAG, MPI_COMM_WORLD, &s);
+                int num_expected;
+                MPI_Get_count(&s, MPI_BYTE, &num_expected);
 
-        STARPU_ASSERT_MSG(num_expected == len, "MPI Master/Slave received a msg with a size of %d Bytes (expected %d Bytes) !", num_expected, len);
-    }
-    STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
+                STARPU_ASSERT_MSG(num_expected == len, "MPI Master/Slave received a msg with a size of %d Bytes (expected %d Bytes) !", num_expected, len);
+        }
+        STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
 }
 
 void _starpu_mpi_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len)
 {
-    _starpu_mpi_common_recv(node, msg, len, NULL);
+        _starpu_mpi_common_recv(node, msg, len, NULL);
 }
 
 /* SEND to any node */
 void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len, void * event)
 {   
-    int res;
-    int id_proc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+        int res;
+        int id_proc;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-    printf("S_to_D send %d bytes from %d from %p\n", len, dst_devid, msg);
+        //printf("S_to_D send %d bytes from %d from %p\n", len, dst_devid, msg);
 
-    if (event)
-    {
-        /* Asynchronous send */
-        struct _starpu_async_channel * channel = event;
-        channel->event.mpi_ms_event.is_sender = 1;
+        if (event)
+        {
+                /* Asynchronous send */
+                struct _starpu_async_channel * channel = event;
+                channel->event.mpi_ms_event.is_sender = 1;
 
-        /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
-        if (channel->type == STARPU_UNUSED)
-            channel->event.mpi_ms_event.requests = NULL;
+                /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
+                if (channel->type == STARPU_UNUSED)
+                        channel->event.mpi_ms_event.requests = NULL;
 
-        /* Initialize the list */
-        if (channel->event.mpi_ms_event.requests == NULL)
-        {
-            channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
-            _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
-        }
+                /* Initialize the list */
+                if (channel->event.mpi_ms_event.requests == NULL)
+                {
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
+                }
 
-        struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
+                struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
 
-        res = MPI_Isend(msg, len, MPI_BYTE, dst_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
+                res = MPI_Isend(msg, len, MPI_BYTE, dst_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
 
-        channel->starpu_mp_common_finished_receiver++;
-        channel->starpu_mp_common_finished_sender++;
+                channel->starpu_mp_common_finished_receiver++;
+                channel->starpu_mp_common_finished_sender++;
 
-        _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-    } 
-    else
-    {
-        /* Synchronous send */
-        res = MPI_Send(msg, len, MPI_BYTE, dst_devid, SYNC_TAG, MPI_COMM_WORLD);
-    }    
+                _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
+        } 
+        else
+        {
+                /* Synchronous send */
+                res = MPI_Send(msg, len, MPI_BYTE, dst_devid, SYNC_TAG, MPI_COMM_WORLD);
+        }    
 
-    STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
+        STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
 }
 
 /* RECV to any node */
 void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node, int src_devid, void *msg, int len, void * event)
 {
-    int res;
-    int id_proc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
-
-    printf("R_to_D nop recv %d bytes from %d\n", len, src_devid);
-
-    if (event)
-    {
-        /* Asynchronous recv */
-        struct _starpu_async_channel * channel = event;
-        channel->event.mpi_ms_event.is_sender = 0;
+        int res;
+        int id_proc;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
 
-        /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
-        if (channel->type == STARPU_UNUSED)
-            channel->event.mpi_ms_event.requests = NULL;
+        //printf("R_to_D nop recv %d bytes from %d\n", len, src_devid);
 
-        /* Initialize the list */
-        if (channel->event.mpi_ms_event.requests == NULL)
+        if (event)
         {
-            channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
-            _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
-        }
+                /* Asynchronous recv */
+                struct _starpu_async_channel * channel = event;
+                channel->event.mpi_ms_event.is_sender = 0;
 
-        struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
+                /* call by sink, we need to initialize some parts, for host it's done in data_request.c */
+                if (channel->type == STARPU_UNUSED)
+                        channel->event.mpi_ms_event.requests = NULL;
 
-        res = MPI_Irecv(msg, len, MPI_BYTE, src_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
+                /* Initialize the list */
+                if (channel->event.mpi_ms_event.requests == NULL)
+                {
+                        channel->event.mpi_ms_event.requests = _starpu_mpi_ms_event_request_list_new();            
+                        _starpu_mpi_ms_event_request_list_init(channel->event.mpi_ms_event.requests);
+                }
 
-        channel->starpu_mp_common_finished_receiver++;
-        channel->starpu_mp_common_finished_sender++;
+                struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new();
 
-        _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
-    } 
-    else
-    {
-        /* Synchronous recv */
-        MPI_Status s;
-        res = MPI_Recv(msg, len, MPI_BYTE, src_devid, SYNC_TAG, MPI_COMM_WORLD, &s);
-        int num_expected;
-        MPI_Get_count(&s, MPI_BYTE, &num_expected);
+                res = MPI_Irecv(msg, len, MPI_BYTE, src_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request);
 
-        STARPU_ASSERT_MSG(num_expected == len, "MPI Master/Slave received a msg with a size of %d Bytes (expected %d Bytes) !", num_expected, len);
-        STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
-    }
+                channel->starpu_mp_common_finished_receiver++;
+                channel->starpu_mp_common_finished_sender++;
+
+                _starpu_mpi_ms_event_request_list_push_back(channel->event.mpi_ms_event.requests, req);
+        } 
+        else
+        {
+                /* Synchronous recv */
+                MPI_Status s;
+                res = MPI_Recv(msg, len, MPI_BYTE, src_devid, SYNC_TAG, MPI_COMM_WORLD, &s);
+                int num_expected;
+                MPI_Get_count(&s, MPI_BYTE, &num_expected);
+
+                STARPU_ASSERT_MSG(num_expected == len, "MPI Master/Slave received a msg with a size of %d Bytes (expected %d Bytes) !", num_expected, len);
+                STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len);
+        }
 }
 
 static void _starpu_mpi_common_polling_node(struct _starpu_mp_node * node)
 {
-    /* poll the asynchronous messages.*/
-    if (node != NULL)
-    {
-        STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex);
-        while(node->mp_recv_is_ready(node))
+        /* poll the asynchronous messages.*/
+        if (node != NULL)
         {
-            enum _starpu_mp_command answer;
-            void *arg;
-            int arg_size;
-            answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
-            if(!_starpu_src_common_store_message(node,arg,arg_size,answer))
-            {
-                printf("incorrect commande: unknown command or sync command");
-                STARPU_ASSERT(0);
-            }
+                STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex);
+                while(node->mp_recv_is_ready(node))
+                {
+                        enum _starpu_mp_command answer;
+                        void *arg;
+                        int arg_size;
+                        answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
+                        if(!_starpu_src_common_store_message(node,arg,arg_size,answer))
+                        {
+                                printf("incorrect commande: unknown command or sync command");
+                                STARPU_ASSERT(0);
+                        }
+                }
+                STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex);
         }
-        STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex);
-    }
 }
 
- /* - In device to device communications, the first ack received by host
+/* - In device to device communications, the first ack received by host
  * is considered as the sender (but it cannot be, in fact, the sender)
  */
 int _starpu_mpi_common_test_event(struct _starpu_async_channel * event)
 {
-    if (event->event.mpi_ms_event.requests != NULL && !_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests))
-    {
-        struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(event->event.mpi_ms_event.requests);
-        struct _starpu_mpi_ms_event_request * req_next;
-
-        while (req != _starpu_mpi_ms_event_request_list_end(event->event.mpi_ms_event.requests))
+        if (event->event.mpi_ms_event.requests != NULL && !_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests))
         {
-            req_next = _starpu_mpi_ms_event_request_list_next(req);
-
-            int flag = 0;
-            MPI_Test(&req->request, &flag, MPI_STATUS_IGNORE);
-            if (flag)
-            {
-                _starpu_mpi_ms_event_request_list_erase(event->event.mpi_ms_event.requests, req);
-                _starpu_mpi_ms_event_request_delete(req);
-
-                if (event->event.mpi_ms_event.is_sender)
-                    event->starpu_mp_common_finished_sender--;
-                else
-                    event->starpu_mp_common_finished_receiver--;
-
-            }
-            req = req_next;
-        }
+                struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(event->event.mpi_ms_event.requests);
+                struct _starpu_mpi_ms_event_request * req_next;
 
-        /* When the list is empty, we finished to wait each request */
-        if (_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests))
-        {
-            /* Destroy the list */
-            _starpu_mpi_ms_event_request_list_delete(event->event.mpi_ms_event.requests);
-            event->event.mpi_ms_event.requests = NULL;
+                while (req != _starpu_mpi_ms_event_request_list_end(event->event.mpi_ms_event.requests))
+                {
+                        req_next = _starpu_mpi_ms_event_request_list_next(req);
+
+                        int flag = 0;
+                        MPI_Test(&req->request, &flag, MPI_STATUS_IGNORE);
+                        if (flag)
+                        {
+                                _starpu_mpi_ms_event_request_list_erase(event->event.mpi_ms_event.requests, req);
+                                _starpu_mpi_ms_event_request_delete(req);
+
+                                if (event->event.mpi_ms_event.is_sender)
+                                        event->starpu_mp_common_finished_sender--;
+                                else
+                                        event->starpu_mp_common_finished_receiver--;
+
+                        }
+                        req = req_next;
+                }
+
+                /* When the list is empty, we finished to wait each request */
+                if (_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests))
+                {
+                        /* Destroy the list */
+                        _starpu_mpi_ms_event_request_list_delete(event->event.mpi_ms_event.requests);
+                        event->event.mpi_ms_event.requests = NULL;
+                }
         }
-    }
 
-    _starpu_mpi_common_polling_node(event->polling_node_sender);
-    _starpu_mpi_common_polling_node(event->polling_node_receiver);
-    
-    return !event->starpu_mp_common_finished_sender && !event->starpu_mp_common_finished_receiver;
+        _starpu_mpi_common_polling_node(event->polling_node_sender);
+        _starpu_mpi_common_polling_node(event->polling_node_receiver);
+
+        return !event->starpu_mp_common_finished_sender && !event->starpu_mp_common_finished_receiver;
 }
 
 
@@ -419,48 +419,48 @@ int _starpu_mpi_common_test_event(struct _starpu_async_channel * event)
  */
 void _starpu_mpi_common_wait_event(struct _starpu_async_channel * event)
 {
-    if (event->event.mpi_ms_event.requests != NULL && !_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests))
-    {
-        struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(event->event.mpi_ms_event.requests);
-        struct _starpu_mpi_ms_event_request * req_next;
-
-        while (req != _starpu_mpi_ms_event_request_list_end(event->event.mpi_ms_event.requests))
+        if (event->event.mpi_ms_event.requests != NULL && !_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests))
         {
-            req_next = _starpu_mpi_ms_event_request_list_next(req);
+                struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(event->event.mpi_ms_event.requests);
+                struct _starpu_mpi_ms_event_request * req_next;
 
-            MPI_Wait(&req->request, MPI_STATUS_IGNORE);
-            _starpu_mpi_ms_event_request_list_erase(event->event.mpi_ms_event.requests, req);
+                while (req != _starpu_mpi_ms_event_request_list_end(event->event.mpi_ms_event.requests))
+                {
+                        req_next = _starpu_mpi_ms_event_request_list_next(req);
 
-            _starpu_mpi_ms_event_request_delete(req);
-            req = req_next;
+                        MPI_Wait(&req->request, MPI_STATUS_IGNORE);
+                        _starpu_mpi_ms_event_request_list_erase(event->event.mpi_ms_event.requests, req);
 
-            if (event->event.mpi_ms_event.is_sender)
-                event->starpu_mp_common_finished_sender--;
-            else
-                event->starpu_mp_common_finished_receiver--;
+                        _starpu_mpi_ms_event_request_delete(req);
+                        req = req_next;
 
-        }
+                        if (event->event.mpi_ms_event.is_sender)
+                                event->starpu_mp_common_finished_sender--;
+                        else
+                                event->starpu_mp_common_finished_receiver--;
 
-        STARPU_ASSERT_MSG(_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests), "MPI Request list is not empty after a wait_event !");
+                }
 
-        /* Destroy the list */
-        _starpu_mpi_ms_event_request_list_delete(event->event.mpi_ms_event.requests);
-        event->event.mpi_ms_event.requests = NULL;
-    }
+                STARPU_ASSERT_MSG(_starpu_mpi_ms_event_request_list_empty(event->event.mpi_ms_event.requests), "MPI Request list is not empty after a wait_event !");
 
-    //incoming ack from devices
-    while(event->starpu_mp_common_finished_sender > 0 || event->starpu_mp_common_finished_receiver > 0)
-    {
-        _starpu_mpi_common_polling_node(event->polling_node_sender);
-        _starpu_mpi_common_polling_node(event->polling_node_receiver);
-    }
+                /* Destroy the list */
+                _starpu_mpi_ms_event_request_list_delete(event->event.mpi_ms_event.requests);
+                event->event.mpi_ms_event.requests = NULL;
+        }
+
+        //incoming ack from devices
+        while(event->starpu_mp_common_finished_sender > 0 || event->starpu_mp_common_finished_receiver > 0)
+        {
+                _starpu_mpi_common_polling_node(event->polling_node_sender);
+                _starpu_mpi_common_polling_node(event->polling_node_receiver);
+        }
 }
 
 
 
 void _starpu_mpi_common_barrier(void)
 {
-    MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Barrier(MPI_COMM_WORLD);
 }
 
 /* Compute bandwidth and latency between source and sink nodes
@@ -468,91 +468,91 @@ void _starpu_mpi_common_barrier(void)
  */
 void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS])
 {
-    int ret;
-    unsigned iter;
+        int ret;
+        unsigned iter;
 
-    int nb_proc, id_proc;
-    MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
-    MPI_Comm_size(MPI_COMM_WORLD, &nb_proc);
+        int nb_proc, id_proc;
+        MPI_Comm_rank(MPI_COMM_WORLD, &id_proc);
+        MPI_Comm_size(MPI_COMM_WORLD, &nb_proc);
 
-    char * buf;
-    _STARPU_MALLOC(buf, SIZE_BANDWIDTH);
-    memset(buf, 0, SIZE_BANDWIDTH);
+        char * buf;
+        _STARPU_MALLOC(buf, SIZE_BANDWIDTH);
+        memset(buf, 0, SIZE_BANDWIDTH);
 
-    unsigned sender, receiver;
-    for(sender = 0; sender < nb_proc; sender++)
-    {
-        for(receiver = 0; receiver < nb_proc; receiver++) 
+        unsigned sender, receiver;
+        for(sender = 0; sender < nb_proc; sender++)
         {
-            MPI_Barrier(MPI_COMM_WORLD);
-
-            //Node can't be a sender and a receiver
-            if(sender == receiver)
-                continue;
-
-            if(id_proc == sender)
-            {
-                double start, end;
-
-                /* measure bandwidth sender to receiver */
-                start = starpu_timing_now();
-                for (iter = 0; iter < NITER; iter++)
+                for(receiver = 0; receiver < nb_proc; receiver++) 
                 {
-                    ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
-                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                        MPI_Barrier(MPI_COMM_WORLD);
+
+                        //Node can't be a sender and a receiver
+                        if(sender == receiver)
+                                continue;
+
+                        if(id_proc == sender)
+                        {
+                                double start, end;
+
+                                /* measure bandwidth sender to receiver */
+                                start = starpu_timing_now();
+                                for (iter = 0; iter < NITER; iter++)
+                                {
+                                        ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
+                                        STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                                }
+                                end = starpu_timing_now();
+                                bandwidth_dtod[sender][receiver] = (NITER*1000000)/(end - start);
+
+                                /* measure latency sender to receiver */
+                                start = starpu_timing_now();
+                                for (iter = 0; iter < NITER; iter++)
+                                {
+                                        ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
+                                        STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !");
+                                }
+                                end = starpu_timing_now();
+                                latency_dtod[sender][receiver] = (end - start)/NITER;
+                        }
+
+                        if (id_proc == receiver)
+                        {
+                                /* measure bandwidth sender to receiver*/
+                                for (iter = 0; iter < NITER; iter++)
+                                {
+                                        ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                                        STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                                }
+
+                                /* measure latency sender to receiver */
+                                for (iter = 0; iter < NITER; iter++)
+                                {
+                                        ret = MPI_Recv(buf, 1, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                                        STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                                }
+                        }
                 }
-                end = starpu_timing_now();
-                bandwidth_dtod[sender][receiver] = (NITER*1000000)/(end - start);
 
-                /* measure latency sender to receiver */
-                start = starpu_timing_now();
-                for (iter = 0; iter < NITER; iter++)
-                {
-                    ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
-                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !");
-                }
-                end = starpu_timing_now();
-                latency_dtod[sender][receiver] = (end - start)/NITER;
-            }
-
-            if (id_proc == receiver)
-            {
-                /* measure bandwidth sender to receiver*/
-                for (iter = 0; iter < NITER; iter++)
+                /* When a sender finished its work, it has to send its results to the master */
+
+                /* Sender doesn't need to send to itself its data */
+                if (sender == src_node_id)
+                        continue;
+
+                /* if we are the sender, we send the data */
+                if (sender == id_proc)
                 {
-                    ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                        MPI_Send(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
+                        MPI_Send(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
                 }
 
-                /* measure latency sender to receiver */
-                for (iter = 0; iter < NITER; iter++)
+                /* the master node receives the data */
+                if (src_node_id == id_proc)
                 {
-                    ret = MPI_Recv(buf, 1, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                        MPI_Recv(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                        MPI_Recv(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                 }
-            }
-        }
-
-        /* When a sender finished its work, it has to send its results to the master */
-        
-        /* Sender doesn't need to send to itself its data */
-        if (sender == src_node_id)
-            continue;
-        
-        /* if we are the sender, we send the data */
-        if (sender == id_proc)
-        {
-            MPI_Send(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
-            MPI_Send(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
-        }
 
-        /* the master node receives the data */
-        if (src_node_id == id_proc)
-        {
-            MPI_Recv(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            MPI_Recv(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         }
-
-    }
-    free(buf);
+        free(buf);
 }

+ 31 - 31
src/drivers/mpi/driver_mpi_sink.c

@@ -25,57 +25,57 @@
 
 void _starpu_mpi_sink_init(struct _starpu_mp_node *node)
 {
-    _starpu_mpi_common_mp_initialize_src_sink(node);
+        _starpu_mpi_common_mp_initialize_src_sink(node);
 
-    _STARPU_MALLOC(node->thread_table, sizeof(starpu_pthread_t)*node->nb_cores);
-    //TODO
+        _STARPU_MALLOC(node->thread_table, sizeof(starpu_pthread_t)*node->nb_cores);
+        //TODO
 }
 
 void _starpu_mpi_sink_deinit(struct _starpu_mp_node *node)
 {
-    free(node->thread_table);
-    //TODO
+        free(node->thread_table);
+        //TODO
 }
 
 void (*_starpu_mpi_sink_lookup (const struct _starpu_mp_node * node STARPU_ATTRIBUTE_UNUSED, char* func_name))(void)
 {
-	void *dl_handle = dlopen(NULL, RTLD_NOW);
-	return dlsym(dl_handle, func_name);
+        void *dl_handle = dlopen(NULL, RTLD_NOW);
+        return dlsym(dl_handle, func_name);
 }
 
 void _starpu_mpi_sink_launch_workers(struct _starpu_mp_node *node)
 {
-    //TODO
-    int i, ret;
-    struct arg_sink_thread * arg;
-    cpu_set_t cpuset;
-    starpu_pthread_attr_t attr;
-    starpu_pthread_t thread;
+        //TODO
+        int i, ret;
+        struct arg_sink_thread * arg;
+        cpu_set_t cpuset;
+        starpu_pthread_attr_t attr;
+        starpu_pthread_t thread;
 
-    for(i=0; i < node->nb_cores; i++)
-    {
-        //init the set
-        CPU_ZERO(&cpuset);
-        CPU_SET(i,&cpuset);
+        for(i=0; i < node->nb_cores; i++)
+        {
+                //init the set
+                CPU_ZERO(&cpuset);
+                CPU_SET(i,&cpuset);
 
-        ret = starpu_pthread_attr_init(&attr);
-        STARPU_ASSERT(ret == 0);
-        ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
-        STARPU_ASSERT(ret == 0);
+                ret = starpu_pthread_attr_init(&attr);
+                STARPU_ASSERT(ret == 0);
+                ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+                STARPU_ASSERT(ret == 0);
 
-        /*prepare the argument for the thread*/
-        _STARPU_MALLOC(arg, sizeof(struct arg_sink_thread));
-        arg->coreid = i;
-        arg->node = node;
+                /*prepare the argument for the thread*/
+                _STARPU_MALLOC(arg, sizeof(struct arg_sink_thread));
+                arg->coreid = i;
+                arg->node = node;
 
-        ret = starpu_pthread_create(&thread, &attr, _starpu_sink_thread, arg);
-        STARPU_ASSERT(ret == 0);
-        ((starpu_pthread_t *)node->thread_table)[i] = thread;
+                ret = starpu_pthread_create(&thread, &attr, _starpu_sink_thread, arg);
+                STARPU_ASSERT(ret == 0);
+                ((starpu_pthread_t *)node->thread_table)[i] = thread;
 
-    }
+        }
 }
 
 void _starpu_mpi_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, int coreid, int * core_table, int nb_core)
 {
-    //TODO
+        //TODO
 }

+ 191 - 191
src/drivers/mpi/driver_mpi_source.c

@@ -51,8 +51,8 @@ struct _starpu_mp_node *mpi_ms_nodes[STARPU_MAXMPIDEVS];
 
 void _starpu_mpi_source_init(struct _starpu_mp_node *node)
 {
-    _starpu_mpi_common_mp_initialize_src_sink(node);
-    //TODO
+        _starpu_mpi_common_mp_initialize_src_sink(node);
+        //TODO
 }
 
 void _starpu_mpi_source_deinit(struct _starpu_mp_node *node)
@@ -62,282 +62,282 @@ void _starpu_mpi_source_deinit(struct _starpu_mp_node *node)
 
 struct _starpu_mp_node *_starpu_mpi_src_get_mp_node_from_memory_node(int memory_node)
 {
-    int devid = _starpu_memory_node_get_devid(memory_node);
-    STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMPIDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
+        int devid = _starpu_memory_node_get_devid(memory_node);
+        STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMPIDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
 
-    return mpi_ms_nodes[devid];
+        return mpi_ms_nodes[devid];
 }
 
 int _starpu_mpi_src_allocate_memory(void ** addr, size_t size, unsigned memory_node)
 {
-    const struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(memory_node);
-    return _starpu_src_common_allocate(mp_node, addr, size);
+        const struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(memory_node);
+        return _starpu_src_common_allocate(mp_node, addr, size);
 }
 
 void _starpu_mpi_source_free_memory(void *addr, unsigned memory_node)
 {
-	struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(memory_node);
-    _starpu_src_common_free(mp_node, addr);
+        struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(memory_node);
+        _starpu_src_common_free(mp_node, addr);
 }
 
- /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
-  * node to the address pointed by DST in the DST_NODE memory node
-  */
+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
+ * node to the address pointed by DST in the DST_NODE memory node
+ */
 int _starpu_mpi_copy_ram_to_mpi_sync(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
 {
-    struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(dst_node);
-    return _starpu_src_common_copy_host_to_sink_sync(mp_node, src, dst, size);
+        struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(dst_node);
+        return _starpu_src_common_copy_host_to_sink_sync(mp_node, src, dst, size);
 }   
  
- /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
-  * node to the address pointed by DST in the DST_NODE memory node
-  */    
+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
+ * node to the address pointed by DST in the DST_NODE memory node
+ */    
 int _starpu_mpi_copy_mpi_to_ram_sync(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
 {
-    struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(src_node);
-    return _starpu_src_common_copy_sink_to_host_sync(mp_node, src, dst, size);
+        struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(src_node);
+        return _starpu_src_common_copy_sink_to_host_sync(mp_node, src, dst, size);
 }   
 
 int _starpu_mpi_copy_sink_to_sink_sync(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size)
 {
-    return _starpu_src_common_copy_sink_to_sink_sync(_starpu_mpi_src_get_mp_node_from_memory_node(src_node),
-            _starpu_mpi_src_get_mp_node_from_memory_node(dst_node),
-            src, dst, size);
+        return _starpu_src_common_copy_sink_to_sink_sync(_starpu_mpi_src_get_mp_node_from_memory_node(src_node),
+                        _starpu_mpi_src_get_mp_node_from_memory_node(dst_node),
+                        src, dst, size);
 }
 
 int _starpu_mpi_copy_mpi_to_ram_async(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, void * event)
 {
-    struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(src_node);
-    return _starpu_src_common_copy_sink_to_host_async(mp_node, src, dst, size, event);
+        struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(src_node);
+        return _starpu_src_common_copy_sink_to_host_async(mp_node, src, dst, size, event);
 }
 
 int _starpu_mpi_copy_ram_to_mpi_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size, void * event)
 {
-    struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(dst_node);
-    return _starpu_src_common_copy_host_to_sink_async(mp_node, src, dst, size, event);
+        struct _starpu_mp_node *mp_node = _starpu_mpi_src_get_mp_node_from_memory_node(dst_node);
+        return _starpu_src_common_copy_host_to_sink_async(mp_node, src, dst, size, event);
 }
 
 int _starpu_mpi_copy_sink_to_sink_async(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size, void * event)
 {
-    return _starpu_src_common_copy_sink_to_sink_async(_starpu_mpi_src_get_mp_node_from_memory_node(src_node),
-            _starpu_mpi_src_get_mp_node_from_memory_node(dst_node),
-            src, dst, size, event);
+        return _starpu_src_common_copy_sink_to_sink_async(_starpu_mpi_src_get_mp_node_from_memory_node(src_node),
+                        _starpu_mpi_src_get_mp_node_from_memory_node(dst_node),
+                        src, dst, size, event);
 }
 
 
 int _starpu_mpi_ms_src_register_kernel(starpu_mpi_ms_func_symbol_t *symbol, const char *func_name)
 {
-	unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
-
-	STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
-	struct _starpu_mpi_ms_kernel *kernel;
-	
-	HASH_FIND_STR(kernels, func_name, kernel);
-
-	if (kernel != NULL)
-	{
-		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
-		// Function already in the table.
-		*symbol = kernel;
-		return 0;
-	}
-
-	kernel = malloc(sizeof(*kernel));
-	if (kernel == NULL)
-	{
-		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
-		return -ENOMEM;
-	}
-
-	kernel->name = malloc(func_name_size);
-	if (kernel->name == NULL)
-	{
-		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
-		free(kernel);
-		return -ENOMEM;
-	}
-
-	memcpy(kernel->name, func_name, func_name_size);
-
-	HASH_ADD_STR(kernels, name, kernel);
-
-	unsigned int nb_mpi_devices = _starpu_mpi_src_get_device_count();
-	unsigned int i;
-	for (i = 0; i < nb_mpi_devices; ++i)
-		kernel->func[i] = NULL;
-
-	STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
-
-	*symbol = kernel;
-
-	return 0;
+        unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
+
+        STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
+        struct _starpu_mpi_ms_kernel *kernel;
+
+        HASH_FIND_STR(kernels, func_name, kernel);
+
+        if (kernel != NULL)
+        {
+                STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
+                // Function already in the table.
+                *symbol = kernel;
+                return 0;
+        }
+
+        kernel = malloc(sizeof(*kernel));
+        if (kernel == NULL)
+        {
+                STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
+                return -ENOMEM;
+        }
+
+        kernel->name = malloc(func_name_size);
+        if (kernel->name == NULL)
+        {
+                STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
+                free(kernel);
+                return -ENOMEM;
+        }
+
+        memcpy(kernel->name, func_name, func_name_size);
+
+        HASH_ADD_STR(kernels, name, kernel);
+
+        unsigned int nb_mpi_devices = _starpu_mpi_src_get_device_count();
+        unsigned int i;
+        for (i = 0; i < nb_mpi_devices; ++i)
+                kernel->func[i] = NULL;
+
+        STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
+
+        *symbol = kernel;
+
+        return 0;
 }
 
 
 starpu_mpi_ms_kernel_t _starpu_mpi_ms_src_get_kernel(starpu_mpi_ms_func_symbol_t symbol)
 {
-	int workerid = starpu_worker_get_id();
-	
-	/* This function has to be called in the codelet only, by the thread
-	 * which will handle the task */
-	if (workerid < 0)
-		return NULL;
-
-	int devid = starpu_worker_get_devid(workerid);
-
-	struct _starpu_mpi_ms_kernel *kernel = symbol;
-
-	if (kernel->func[devid] == NULL)
-	{
-		struct _starpu_mp_node *node = mpi_ms_nodes[devid];
-		int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name);
-		if (ret)
-			return NULL;
-	}
-
-	return kernel->func[devid];
+        int workerid = starpu_worker_get_id();
+
+        /* This function has to be called in the codelet only, by the thread
+         * which will handle the task */
+        if (workerid < 0)
+                return NULL;
+
+        int devid = starpu_worker_get_devid(workerid);
+
+        struct _starpu_mpi_ms_kernel *kernel = symbol;
+
+        if (kernel->func[devid] == NULL)
+        {
+                struct _starpu_mp_node *node = mpi_ms_nodes[devid];
+                int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name);
+                if (ret)
+                        return NULL;
+        }
+
+        return kernel->func[devid];
 }
 
 void(* _starpu_mpi_ms_src_get_kernel_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void)
 {
-	starpu_mpi_ms_kernel_t kernel = NULL;
-
-	starpu_mpi_ms_func_t func = _starpu_task_get_mpi_ms_nth_implementation(j->task->cl, j->nimpl);
-	if (func)
-	{
-		/* We execute the function contained in the codelet, it must return a
-		 * pointer to the function to execute on the device, either specified
-		 * directly by the user or by a call to starpu_mpi_ms_get_func().
-		 */
-		kernel = func();
-	}
-	else
-	{
-		/* If user dont define any starpu_mpi_ms_fun_t in cl->mpi_ms_func we try to use
-		 * cpu_func_name.
-		 */
-		const char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl);
-		if (func_name)
-		{
-			starpu_mpi_ms_func_symbol_t symbol;
-
-			_starpu_mpi_ms_src_register_kernel(&symbol, func_name);
-
-			kernel = _starpu_mpi_ms_src_get_kernel(symbol);
-		}
-	}
-	STARPU_ASSERT(kernel);
-
-	return (void (*)(void))kernel;
+        starpu_mpi_ms_kernel_t kernel = NULL;
+
+        starpu_mpi_ms_func_t func = _starpu_task_get_mpi_ms_nth_implementation(j->task->cl, j->nimpl);
+        if (func)
+        {
+                /* We execute the function contained in the codelet, it must return a
+                 * pointer to the function to execute on the device, either specified
+                 * directly by the user or by a call to starpu_mpi_ms_get_func().
+                 */
+                kernel = func();
+        }
+        else
+        {
+                /* If user dont define any starpu_mpi_ms_fun_t in cl->mpi_ms_func we try to use
+                 * cpu_func_name.
+                 */
+                const char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl);
+                if (func_name)
+                {
+                        starpu_mpi_ms_func_symbol_t symbol;
+
+                        _starpu_mpi_ms_src_register_kernel(&symbol, func_name);
+
+                        kernel = _starpu_mpi_ms_src_get_kernel(symbol);
+                }
+        }
+        STARPU_ASSERT(kernel);
+
+        return (void (*)(void))kernel;
 }
 
 unsigned _starpu_mpi_src_get_device_count()
 {
-    int nb_mpi_devices;
+        int nb_mpi_devices;
 
-    if (!_starpu_mpi_common_is_mp_initialized())
-        return 0;
-    
-    MPI_Comm_size(MPI_COMM_WORLD, &nb_mpi_devices);
+        if (!_starpu_mpi_common_is_mp_initialized())
+                return 0;
 
-    //Remove one for master
-    nb_mpi_devices = nb_mpi_devices - 1;
+        MPI_Comm_size(MPI_COMM_WORLD, &nb_mpi_devices);
 
-    return nb_mpi_devices;
+        //Remove one for master
+        nb_mpi_devices = nb_mpi_devices - 1;
+
+        return nb_mpi_devices;
 
 }
 
 void *_starpu_mpi_src_worker(void *arg)
 {
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-    struct _starpu_worker_set *worker_set_mpi = (struct _starpu_worker_set *) arg;
-    int nbsinknodes = _starpu_mpi_src_get_device_count();
+        struct _starpu_worker_set *worker_set_mpi = (struct _starpu_worker_set *) arg;
+        int nbsinknodes = _starpu_mpi_src_get_device_count();
 
-    int workersetnum;
-    for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++)
-    {
-        struct _starpu_worker_set * worker_set = &worker_set_mpi[workersetnum];
+        int workersetnum;
+        for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++)
+        {
+                struct _starpu_worker_set * worker_set = &worker_set_mpi[workersetnum];
 #else
-        struct _starpu_worker_set *worker_set = arg;
+                struct _starpu_worker_set *worker_set = arg;
 #endif
 
-        /* As all workers of a set share common data, we just use the first
-         *       * one for intializing the following stuffs. */
-        struct _starpu_worker *baseworker = &worker_set->workers[0];
-        struct _starpu_machine_config *config = baseworker->config;
-        unsigned baseworkerid = baseworker - config->workers;
-        unsigned devid = baseworker->devid;
-        unsigned i;
+                /* As all workers of a set share common data, we just use the first
+                 *       * one for intializing the following stuffs. */
+                struct _starpu_worker *baseworker = &worker_set->workers[0];
+                struct _starpu_machine_config *config = baseworker->config;
+                unsigned baseworkerid = baseworker - config->workers;
+                unsigned devid = baseworker->devid;
+                unsigned i;
 
-        /* unsigned memnode = baseworker->memory_node; */
+                /* unsigned memnode = baseworker->memory_node; */
 
-        _starpu_driver_start(baseworker, _STARPU_FUT_MPI_KEY, 0);
+                _starpu_driver_start(baseworker, _STARPU_FUT_MPI_KEY, 0);
 
 #ifdef STARPU_USE_FXT             
-        for (i = 1; i < worker_set->nworkers; i++)
-            _starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MPI_KEY, 0);
+                for (i = 1; i < worker_set->nworkers; i++)
+                        _starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MPI_KEY, 0);
 #endif          
 
-        // Current task for a thread managing a worker set has no sense.
-        _starpu_set_current_task(NULL);
+                // Current task for a thread managing a worker set has no sense.
+                _starpu_set_current_task(NULL);
 
-        for (i = 0; i < config->topology.nmpicores[devid]; i++)
-        {
-            struct _starpu_worker *worker = &config->workers[baseworkerid+i];
-            snprintf(worker->name, sizeof(worker->name), "MPI_MS %d core %u", devid, i);
-            snprintf(worker->short_name, sizeof(worker->short_name), "MPI_MS %d.%u", devid, i);
-        }
+                for (i = 0; i < config->topology.nmpicores[devid]; i++)
+                {
+                        struct _starpu_worker *worker = &config->workers[baseworkerid+i];
+                        snprintf(worker->name, sizeof(worker->name), "MPI_MS %d core %u", devid, i);
+                        snprintf(worker->short_name, sizeof(worker->short_name), "MPI_MS %d.%u", devid, i);
+                }
 
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-        {
-            char thread_name[16];
-            snprintf(thread_name, sizeof(thread_name), "MPI_MS");
-            starpu_pthread_setname(thread_name);
-        }
+                {
+                        char thread_name[16];
+                        snprintf(thread_name, sizeof(thread_name), "MPI_MS");
+                        starpu_pthread_setname(thread_name);
+                }
 #else
-        {
-            char thread_name[16];
-            snprintf(thread_name, sizeof(thread_name), "MPI_MS %d", devid);
-            starpu_pthread_setname(thread_name);
-        }
+                {
+                        char thread_name[16];
+                        snprintf(thread_name, sizeof(thread_name), "MPI_MS %d", devid);
+                        starpu_pthread_setname(thread_name);
+                }
 #endif
 
-        for (i = 0; i < worker_set->nworkers; i++)
-        {
-            struct _starpu_worker *worker = &worker_set->workers[i];
-            _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
-        }
-    
+                for (i = 0; i < worker_set->nworkers; i++)
+                {
+                        struct _starpu_worker *worker = &worker_set->workers[i];
+                        _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
+                }
+
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-        _starpu_src_common_init_switch_env(workersetnum);
-    }  /* for */
-
-    /* set the worker zero for the main thread */
-    for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++)
-    {
-        struct _starpu_worker_set * worker_set = &worker_set_mpi[workersetnum];
-        struct _starpu_worker *baseworker = &worker_set->workers[0];
+                _starpu_src_common_init_switch_env(workersetnum);
+        }  /* for */
+
+        /* set the worker zero for the main thread */
+        for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++)
+        {
+                struct _starpu_worker_set * worker_set = &worker_set_mpi[workersetnum];
+                struct _starpu_worker *baseworker = &worker_set->workers[0];
 #endif
 
-        /* tell the main thread that this one is ready */
-        STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
-        baseworker->status = STATUS_UNKNOWN;
-        worker_set->set_is_initialized = 1;
-        STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond);
-        STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
+                /* tell the main thread that this one is ready */
+                STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
+                baseworker->status = STATUS_UNKNOWN;
+                worker_set->set_is_initialized = 1;
+                STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond);
+                STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
 
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-    }
+        }
 #endif
 
 
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-    _starpu_src_common_workers_set(worker_set_mpi, nbsinknodes, mpi_ms_nodes);
+        _starpu_src_common_workers_set(worker_set_mpi, nbsinknodes, mpi_ms_nodes);
 #else
-    _starpu_src_common_worker(worker_set, baseworkerid, mpi_ms_nodes[devid]);
+        _starpu_src_common_worker(worker_set, baseworkerid, mpi_ms_nodes[devid]);
 #endif
 
-    return NULL;
+        return NULL;
+
 
-    
 }

+ 2 - 2
src/drivers/scc/driver_scc_common.c

@@ -141,7 +141,7 @@ void _starpu_scc_common_send(const struct _starpu_mp_node *node, void *msg, int
 {
 	int ret;
 
-    STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
+        STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
 
 	/* There are potentially 48 threads running on the master core and RCCE_send write
 	 * data in the MPB associated to this core. It's not thread safe, so we have to protect it.
@@ -159,7 +159,7 @@ void _starpu_scc_common_send(const struct _starpu_mp_node *node, void *msg, int
 
 void _starpu_scc_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event)
 {
-    STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
+        STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
 
 	int ret;
 	if ((ret = RCCE_recv(msg, len, node->mp_connection.scc_nodeid)) != RCCE_SUCCESS)

+ 2 - 2
src/drivers/scc/driver_scc_sink.c

@@ -59,7 +59,7 @@ void _starpu_scc_sink_send_to_device(const struct _starpu_mp_node *node, int dst
 {
 	int ret;
 
-    STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
+        STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
 
 	if ((ret = RCCE_send(msg, len, STARPU_TO_SCC_SINK_ID(dst_devid))) != RCCE_SUCCESS)
 		STARPU_MP_COMMON_REPORT_ERROR(node, ret);
@@ -69,7 +69,7 @@ void _starpu_scc_sink_recv_from_device(const struct _starpu_mp_node *node, int s
 {
 	int ret;
 
-    STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
+        STARPU_ASSERT_MSG(!event, "Asynchronous msg is not used here");
 
 	if ((ret = RCCE_recv(msg, len, STARPU_TO_SCC_SINK_ID(src_devid))) != RCCE_SUCCESS)
 		STARPU_MP_COMMON_REPORT_ERROR(node, ret);

+ 19 - 19
tests/errorcheck/starpu_init_noworker.c

@@ -57,27 +57,27 @@ int main(int argc, char **argv)
 	conf.nopencl = 0;
 	conf.nmic = 0;
 	conf.nscc = 0;
-    conf.nmpi_ms = 0;
+        conf.nmpi_ms = 0;
 
 	/* starpu_init should return -ENODEV */
-	ret = starpu_initialize(&conf, &argc, &argv);
-	if (ret == -ENODEV)
-	     return EXIT_SUCCESS;
-	else
-	{
-        unsigned ncpu = starpu_cpu_worker_get_count();
-        unsigned ncuda = starpu_cuda_worker_get_count();
-        unsigned nopencl = starpu_opencl_worker_get_count();
-        unsigned nmic = starpu_mic_worker_get_count();
-        unsigned nmpi_ms = starpu_mpi_ms_worker_get_count();
-        FPRINTF(stderr, "StarPU has found :\n");
-        FPRINTF(stderr, "\t%u CPU cores\n", ncpu);
-        FPRINTF(stderr, "\t%u CUDA devices\n", ncuda);
-        FPRINTF(stderr, "\t%u OpenCL devices\n", nopencl);
-        FPRINTF(stderr, "\t%u MIC devices\n", nmic);
-        FPRINTF(stderr, "\t%u MPI Master-Slaves devices\n", nmpi_ms);
-        return EXIT_FAILURE;
-	}
+        ret = starpu_initialize(&conf, &argc, &argv);
+        if (ret == -ENODEV)
+                return EXIT_SUCCESS;
+        else
+        {
+                unsigned ncpu = starpu_cpu_worker_get_count();
+                unsigned ncuda = starpu_cuda_worker_get_count();
+                unsigned nopencl = starpu_opencl_worker_get_count();
+                unsigned nmic = starpu_mic_worker_get_count();
+                unsigned nmpi_ms = starpu_mpi_ms_worker_get_count();
+                FPRINTF(stderr, "StarPU has found :\n");
+                FPRINTF(stderr, "\t%u CPU cores\n", ncpu);
+                FPRINTF(stderr, "\t%u CUDA devices\n", ncuda);
+                FPRINTF(stderr, "\t%u OpenCL devices\n", nopencl);
+                FPRINTF(stderr, "\t%u MIC devices\n", nmic);
+                FPRINTF(stderr, "\t%u MPI Master-Slaves devices\n", nmpi_ms);
+                return EXIT_FAILURE;
+        }
 
 
 }