瀏覽代碼

Add a memcpy_peer property to CUDA gpus, to record whether it is able to run gpu-gpu transfers, so that simgrid can enable them when appropriate

Samuel Thibault 10 年之前
父節點
當前提交
88a89370aa

+ 13 - 2
src/core/perfmodel/perfmodel_bus.c

@@ -1707,10 +1707,21 @@ static void write_bus_platform_file_content(void)
 		fprintf(f, "   <host id='CPU%d' power='2000000000'/>\n", i);
 
 	for (i = 0; i < ncuda; i++)
-		fprintf(f, "   <host id='CUDA%d' power='2000000000'>\n    <prop id='memsize' value='%llu'/>\n   </host>\n", i, (unsigned long long) cuda_size[i]);
+	{
+		fprintf(f, "   <host id='CUDA%d' power='2000000000'>\n", i);
+		fprintf(f, "     <prop id='memsize' value='%llu'/>\n", (unsigned long long) cuda_size[i]);
+#ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
+		fprintf(f, "     <prop id='memcpy_peer' value='1'/>\n");
+#endif
+		fprintf(f, "   </host>\n");
+	}
 
 	for (i = 0; i < nopencl; i++)
-		fprintf(f, "   <host id='OpenCL%d' power='2000000000'>\n    <prop id='memsize' value='%llu'/>\n   </host>\n", i, (unsigned long long) opencl_size[i]);
+	{
+		fprintf(f, "   <host id='OpenCL%d' power='2000000000'>\n", i);
+		fprintf(f, "     <prop id='memsize' value='%llu'/>\n", (unsigned long long) opencl_size[i]);
+		fprintf(f, "   </host>\n");
+	}
 
 	fprintf(f, "\n   <host id='RAM' power='1'/>\n");
 

+ 20 - 9
src/core/topology.c

@@ -1325,24 +1325,35 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 					_starpu_register_bus(STARPU_MAIN_RAM, memory_node);
 					_starpu_register_bus(memory_node, STARPU_MAIN_RAM);
 #ifdef STARPU_SIMGRID
+					const char* cuda_memcpy_peer;
 					snprintf(name, sizeof(name), "CUDA%d", devid);
 					host = _starpu_simgrid_get_host_by_name(name);
 					STARPU_ASSERT(host);
 					_starpu_simgrid_memory_node_set_host(memory_node, host);
+					cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
 #endif /* SIMGRID */
-#ifdef HAVE_CUDA_MEMCPY_PEER
-					unsigned worker2;
-					for (worker2 = 0; worker2 < worker; worker2++)
+					if (
+#ifdef STARPU_SIMGRID
+						cuda_memcpy_peer && atoll(cuda_memcpy_peer)
+#elif defined(HAVE_CUDA_MEMCPY_PEER)
+						1
+#else /* MEMCPY_PEER */
+						0
+#endif /* MEMCPY_PEER */
+					   )
 					{
-						struct _starpu_worker *workerarg2 = &config->workers[worker2];
-						if (workerarg2->arch == STARPU_CUDA_WORKER)
+						unsigned worker2;
+						for (worker2 = 0; worker2 < worker; worker2++)
 						{
-							unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
-							_starpu_register_bus(memory_node2, memory_node);
-							_starpu_register_bus(memory_node, memory_node2);
+							struct _starpu_worker *workerarg2 = &config->workers[worker2];
+							if (workerarg2->arch == STARPU_CUDA_WORKER)
+							{
+								unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
+								_starpu_register_bus(memory_node2, memory_node);
+								_starpu_register_bus(memory_node, memory_node2);
+							}
 						}
 					}
-#endif /* MEMCPY_PEER */
 				}
 				_starpu_memory_node_add_nworkers(memory_node);
 				break;

+ 24 - 5
src/datawizard/coherency.c

@@ -28,6 +28,11 @@
 #include <starpu_scheduler.h>
 #include <core/workers.h>
 
+#ifdef STARPU_SIMGRID
+#include <msg/msg.h>
+#include <core/simgrid.h>
+#endif
+
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
 int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 {
@@ -217,17 +222,31 @@ static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 	switch (type)
 	{
 		case STARPU_CUDA_RAM:
-#ifdef HAVE_CUDA_MEMCPY_PEER
 		{
-			enum starpu_node_kind kind = starpu_node_get_kind(handling_node);
 			/* GPUs not always allow direct remote access: if CUDA4
 			 * is enabled, we allow two CUDA devices to communicate. */
+#ifdef STARPU_SIMGRID
+			if (starpu_node_get_kind(handling_node) == STARPU_CUDA_RAM)
+			{
+				char name[16];
+				msg_host_t host;
+				const char* cuda_memcpy_peer;
+				snprintf(name, sizeof(name), "CUDA%d", _starpu_memory_node_get_devid(handling_node));
+				host = _starpu_simgrid_get_host_by_name(name);
+				cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer");
+				return cuda_memcpy_peer && atoll(cuda_memcpy_peer);
+			}
+			else
+				return 0;
+#elif defined(HAVE_CUDA_MEMCPY_PEER)
+			/* simgrid */
+			enum starpu_node_kind kind = starpu_node_get_kind(handling_node);
 			return kind == STARPU_CUDA_RAM;
-		}
-#else
+#else /* HAVE_CUDA_MEMCPY_PEER */
 			/* Direct GPU-GPU transfers are not allowed in general */
 			return 0;
-#endif
+#endif /* HAVE_CUDA_MEMCPY_PEER */
+		}
 		case STARPU_OPENCL_RAM:
 			return 0;
 		case STARPU_MIC_RAM:

+ 3 - 0
tools/perfmodels/sampling/bus/attila.platform.xml

@@ -22,12 +22,15 @@
    <host id='CPU11' power='2000000000'/>
    <host id='CUDA0' power='2000000000'>
     <prop id='memsize' value='3220897792'/>
+    <prop id='memcpy_peer' value='1'/>
    </host>
    <host id='CUDA1' power='2000000000'>
     <prop id='memsize' value='3220897792'/>
+    <prop id='memcpy_peer' value='1'/>
    </host>
    <host id='CUDA2' power='2000000000'>
     <prop id='memsize' value='3220897792'/>
+    <prop id='memcpy_peer' value='1'/>
    </host>
    <host id='OpenCL0' power='2000000000'>
     <prop id='memsize' value='3220897792'/>

+ 3 - 0
tools/perfmodels/sampling/bus/mirage.platform.xml

@@ -22,12 +22,15 @@
    <host id='CPU11' power='2000000000'/>
    <host id='CUDA0' power='2000000000'>
     <prop id='memsize' value='5636554752'/>
+    <prop id='memcpy_peer' value='1'/>
    </host>
    <host id='CUDA1' power='2000000000'>
     <prop id='memsize' value='5636554752'/>
+    <prop id='memcpy_peer' value='1'/>
    </host>
    <host id='CUDA2' power='2000000000'>
     <prop id='memsize' value='5636554752'/>
+    <prop id='memcpy_peer' value='1'/>
    </host>
    <host id='OpenCL0' power='2000000000'>
     <prop id='memsize' value='5636554752'/>