Browse Source

compute bandwidth and latency also for sink to sink links

Corentin Salingue 8 years ago
parent
commit
e7278f623c

+ 67 - 11
src/core/perfmodel/perfmodel_bus.c

@@ -123,10 +123,8 @@ static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
 #endif /* STARPU_USE_MIC */
 #endif /* STARPU_USE_MIC */
 
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
-static double mpi_time_host_to_device[STARPU_MAXNODES] = {0.0};
-static double mpi_time_device_to_host[STARPU_MAXNODES] = {0.0};
-static double mpi_latency_host_to_device[STARPU_MAXNODES] = {0.0};
-static double mpi_latency_device_to_host[STARPU_MAXNODES] = {0.0};
+static double mpi_time_device_to_device[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS] = {{0.0}};
+static double mpi_latency_device_to_device[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS] = {{0.0}};
 #endif
 #endif
 
 
 #ifdef STARPU_HAVE_HWLOC
 #ifdef STARPU_HAVE_HWLOC
@@ -749,7 +747,7 @@ static void benchmark_all_gpu_devices(void)
 
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
     
     
-    _starpu_mpi_common_measure_bandwidth_latency(mpi_time_host_to_device, mpi_time_device_to_host, mpi_latency_host_to_device, mpi_latency_device_to_host);
+    _starpu_mpi_common_measure_bandwidth_latency(mpi_time_device_to_device, mpi_latency_device_to_device);
 
 
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
 
 
@@ -1207,10 +1205,39 @@ static void write_bus_latency_file_content(void)
 #endif
 #endif
                 /* TODO Latency MIC */
                 /* TODO Latency MIC */
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
+                /* Modify MPI src and MPI dst if they contain the master node or not 
+                 * Because, we only take care about slaves */
+                int mpi_master = _starpu_mpi_common_get_src_node();
+
+                int mpi_src = src - (ncuda + nopencl + nmic) - 1;
+                mpi_src = (mpi_master <= mpi_src) ? mpi_src+1 : mpi_src;
+                
+                int mpi_dst = dst - (ncuda + nopencl + nmic) - 1;
+                mpi_dst = (mpi_master <= mpi_dst) ? mpi_dst+1 : mpi_dst;
+
 				if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
 				if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
-					latency += mpi_latency_device_to_host[src - (ncuda + nopencl + nmic) - 1];
-				if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
-					latency += mpi_latency_host_to_device[dst - (ncuda + nopencl + nmic) - 1];
+                {
+                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                    {
+                        /* src and dst identify 2 MPI devices */
+                        latency += mpi_latency_device_to_device[mpi_src][mpi_dst];
+                    }
+                    else
+                    {
+                        /* Only src represents an MPI device 
+                         * So we add latency between src and master */
+                        latency += mpi_latency_device_to_device[mpi_src][mpi_master];
+                    }
+                }
+                else
+                {
+                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                    {
+                        /* Only dst identifies an MPI device 
+                         * So we add latency between master and dst */
+                        latency += mpi_latency_device_to_device[mpi_master][mpi_dst];
+                    }
+                }
 #endif
 #endif
 			}
 			}
 
 
@@ -1448,11 +1475,40 @@ static void write_bus_bandwidth_file_content(void)
 					slowness += mic_time_host_to_device[dst - (ncuda + nopencl)];
 					slowness += mic_time_host_to_device[dst - (ncuda + nopencl)];
 #endif
 #endif
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
+                /* Modify MPI src and MPI dst if they contain the master node or not 
+                 * Because, we only take care about slaves */
+                int mpi_master = _starpu_mpi_common_get_src_node();
+
+                int mpi_src = src - (ncuda + nopencl + nmic) - 1;
+                mpi_src = (mpi_master <= mpi_src) ? mpi_src+1 : mpi_src;
+                
+                int mpi_dst = dst - (ncuda + nopencl + nmic) - 1;
+                mpi_dst = (mpi_master <= mpi_dst) ? mpi_dst+1 : mpi_dst;
+
                 /* here we have bandwidth */
                 /* here we have bandwidth */
 				if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
 				if (src > ncuda + nopencl + nmic && src <= ncuda + nopencl + nmic + nmpi_ms)
-					slowness += 1.0/mpi_time_device_to_host[src - (ncuda + nopencl + nmic) - 1];
-				if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic +nmpi_ms)
-					slowness += 1.0/mpi_time_host_to_device[dst - (ncuda + nopencl + nmic) - 1];
+                {
+                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                    {
+                        /* src and dst identify 2 MPI devices */
+					    slowness += 1.0/mpi_time_device_to_device[mpi_src][mpi_dst];
+                    }
+                    else
+                    {
+                        /* Only src represents an MPI device 
+                         * So we add bandwidth between src and master */
+					    slowness += 1.0/mpi_time_device_to_device[mpi_src][mpi_master];
+                    }
+                }
+                else
+                {
+                    if (dst > ncuda + nopencl + nmic && dst <= ncuda + nopencl + nmic + nmpi_ms)
+                    {
+                        /* Only dst identifies an MPI device 
+                         * So we add bandwidth between master and dst */
+					    slowness += 1.0/mpi_time_device_to_device[mpi_master][mpi_dst];
+                    }
+                }
 #endif
 #endif
 				bandwidth = 1.0/slowness;
 				bandwidth = 1.0/slowness;
 			}
 			}

+ 61 - 74
src/drivers/mpi/driver_mpi_common.c

@@ -469,7 +469,7 @@ void _starpu_mpi_common_barrier(void)
 /* Compute bandwidth and latency between source and sink nodes
 /* Compute bandwidth and latency between source and sink nodes
  * Source node has to have the entire set of times at the end
  * Source node has to have the entire set of times at the end
  */
  */
-void _starpu_mpi_common_measure_bandwidth_latency(double * bandwidth_htod, double * bandwidth_dtoh, double * latency_htod, double * latency_dtoh)
+void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS])
 {
 {
     int ret;
     int ret;
     unsigned iter;
     unsigned iter;
@@ -482,93 +482,80 @@ void _starpu_mpi_common_measure_bandwidth_latency(double * bandwidth_htod, doubl
     _STARPU_MALLOC(buf, SIZE_BANDWIDTH);
     _STARPU_MALLOC(buf, SIZE_BANDWIDTH);
     memset(buf, 0, SIZE_BANDWIDTH);
     memset(buf, 0, SIZE_BANDWIDTH);
 
 
-    unsigned node;
-    unsigned id = 0;
-    for(node = 0; node < nb_proc; node++)
+    unsigned sender, receiver;
+    for(sender = 0; sender < nb_proc; sender++)
     {
     {
-        MPI_Barrier(MPI_COMM_WORLD);
-
-        //Don't measure link master <-> master
-        if(node == src_node_id)
-            continue;
-
-        if(_starpu_mpi_common_is_src_node())
+        for(receiver = 0; receiver < nb_proc; receiver++) 
         {
         {
-            double start, end;
+            MPI_Barrier(MPI_COMM_WORLD);
 
 
-            /* measure bandwidth host to device */
-            start = starpu_timing_now();
-            for (iter = 0; iter < NITER; iter++)
-            {
-                ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, node, node, MPI_COMM_WORLD); 
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
-            }
-            end = starpu_timing_now();
-            bandwidth_htod[id] = (NITER*1000000)/(end - start);
+            //Node can't be a sender and a receiver
+            if(sender == receiver)
+                continue;
 
 
-            /* measure bandwidth device to host */
-            start = starpu_timing_now();
-            for (iter = 0; iter < NITER; iter++)
+            if(id_proc == sender)
             {
             {
-                ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, node, node, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                double start, end;
+
+                /* measure bandwidth sender to receiver */
+                start = starpu_timing_now();
+                for (iter = 0; iter < NITER; iter++)
+                {
+                    ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
+                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                }
+                end = starpu_timing_now();
+                bandwidth_dtod[sender][receiver] = (NITER*1000000)/(end - start);
+
+                /* measure latency sender to receiver */
+                start = starpu_timing_now();
+                for (iter = 0; iter < NITER; iter++)
+                {
+                    ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); 
+                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !");
+                }
+                end = starpu_timing_now();
+                latency_dtod[sender][receiver] = (end - start)/NITER;
             }
             }
-            end = starpu_timing_now();
-            bandwidth_dtoh[id] = (NITER*1000000)/(end - start);
 
 
-            /* measure latency host to device */
-            start = starpu_timing_now();
-            for (iter = 0; iter < NITER; iter++)
+            if (id_proc == receiver)
             {
             {
-                ret = MPI_Send(buf, 1, MPI_BYTE, node, node, MPI_COMM_WORLD); 
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !");
+                /* measure bandwidth sender to receiver*/
+                for (iter = 0; iter < NITER; iter++)
+                {
+                    ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                }
+
+                /* measure latency sender to receiver */
+                for (iter = 0; iter < NITER; iter++)
+                {
+                    ret = MPI_Recv(buf, 1, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                    STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
+                }
             }
             }
-            end = starpu_timing_now();
-            latency_htod[id] = (end - start)/NITER;
-
-            /* measure latency device to host */
-            start = starpu_timing_now();
-            for (iter = 0; iter < NITER; iter++)
-            {
-                ret = MPI_Recv(buf, 1, MPI_BYTE, node, node, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
-            }
-            end = starpu_timing_now();
-            latency_dtoh[id] = (end - start)/NITER;
-
         }
         }
-        else if (node == id_proc) /* if we are the sink node evaluated */
-        {
-            /* measure bandwidth host to device */
-            for (iter = 0; iter < NITER; iter++)
-            {
-                ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, src_node_id, node, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
-            }
 
 
-            /* measure bandwidth device to host */
-            for (iter = 0; iter < NITER; iter++)
-            {
-                ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, src_node_id, node, MPI_COMM_WORLD); 
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
-            }
-
-            /* measure latency host to device */
-            for (iter = 0; iter < NITER; iter++)
-            {
-                ret = MPI_Recv(buf, 1, MPI_BYTE, src_node_id, node, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !");
-            }
+        /* When a sender finished its work, it has to send its results to the master */
+        
+        /* Sender doesn't need to send to itself its data */
+        if (sender == src_node_id)
+            continue;
+        
+        /* if we are the sender, we send the data */
+        if (sender == id_proc)
+        {
+            MPI_Send(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
+            MPI_Send(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD);
+        }
 
 
-            /* measure latency device to host */
-            for (iter = 0; iter < NITER; iter++)
-            {
-                ret = MPI_Send(buf, 1, MPI_BYTE, src_node_id, node, MPI_COMM_WORLD); 
-                STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !");
-            }
+        /* the master node receives the data */
+        if (src_node_id == id_proc)
+        {
+            MPI_Recv(bandwidth_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            MPI_Recv(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         }
         }
 
 
-        id++;
     }
     }
     free(buf);
     free(buf);
 }
 }

+ 1 - 1
src/drivers/mpi/driver_mpi_common.h

@@ -48,7 +48,7 @@ void _starpu_mpi_common_wait_event(struct _starpu_async_channel * event);
 
 
 void _starpu_mpi_common_barrier(void);
 void _starpu_mpi_common_barrier(void);
 
 
-void _starpu_mpi_common_measure_bandwidth_latency(double * bandwidth_htod, double * bandwidth_dtoh, double * latency_htod, double * latency_dtoh);
+void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS]);
 
 
 
 
 #endif  /* STARPU_USE_MPI_MASTER_SLAVE */
 #endif  /* STARPU_USE_MPI_MASTER_SLAVE */