Pārlūkot izejas kodu

mpi: untested proof of concept for choosing the node to acquire

Samuel Thibault 4 gadi atpakaļ
vecāks
revīzija
b24794b377
1 mainītis faili ar 81 papildinājumiem un 2 dzēšanām
  1. 81 2
      mpi/src/starpu_mpi.c

+ 81 - 2
mpi/src/starpu_mpi.c

@@ -34,10 +34,89 @@
 #include <core/task.h>
 #include <core/topology.h>
 
-int _starpu_mpi_choose_node(starpu_data_handle_t data_handle, enum starpu_data_access_mode mode)
+int _starpu_mpi_choose_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
 {
-	/* TODO */
 	return STARPU_MAIN_RAM;
+
+	/* TODO: this is completely untested */
+	if (mode & STARPU_W)
+	{
+		/* TODO: lookup NIC location */
+		/* Where to receive the data? */
+		if (handle->home_node >= 0 && starpu_node_get_kind(handle->home_node) == STARPU_CPU_RAM)
+			/* For now, better use the home node to avoid duplicates */
+			return handle->home_node;
+
+		if (starpu_memory_nodes_get_numa_count() == 1)
+			return STARPU_MAIN_RAM;
+
+		/* Several potential places */
+		unsigned i;
+		for (i = 0; i < STARPU_MAXNODES; i++)
+		{
+			/* TODO: we may want to take as a hint that it's allocated on the GPU as
+			 * a clue that we want to push to the GPU */
+			if (starpu_node_get_kind(i) == STARPU_CPU_RAM &&
+				handle->per_node[i].allocated)
+				/* This node already has allocated buffers, let's just use it */
+				return i;
+		}
+
+		/* No luck, take the least loaded node */
+		starpu_ssize_t maximum = 0;
+		starpu_ssize_t needed = _starpu_data_get_alloc_size(handle);
+		unsigned node;
+
+		for (i = 0; i < STARPU_MAXNODES; i++)
+		{
+			if (starpu_node_get_kind(i) == STARPU_CPU_RAM)
+			{
+				starpu_ssize_t size = starpu_memory_get_available(i);
+				if (size >= needed && size > maximum)
+				{
+					node = i;
+					maximum = size;
+				}
+			}
+		}
+		return node;
+	}
+	else
+	{
+		if (starpu_memory_nodes_get_numa_count() == 1)
+			return STARPU_MAIN_RAM;
+
+		/* Several potential places */
+		unsigned i;
+		for (i = 0; i < STARPU_MAXNODES; i++)
+		{
+			/* TODO: GPUDirect */
+			if (starpu_node_get_kind(i) == STARPU_CPU_RAM &&
+				handle->per_node[i].state != STARPU_INVALID)
+				/* This node already has the value, let's just use it */
+				/* TODO: rather pick up place next to NIC */
+				return i;
+		}
+
+		/* No luck, take the least loaded node, to transfer from e.g. GPU */
+		starpu_ssize_t maximum = 0;
+		starpu_ssize_t needed = _starpu_data_get_alloc_size(handle);
+		unsigned node;
+
+		for (i = 0; i < STARPU_MAXNODES; i++)
+		{
+			if (starpu_node_get_kind(i) == STARPU_CPU_RAM)
+			{
+				starpu_ssize_t size = starpu_memory_get_available(i);
+				if (size >= needed && size > maximum)
+				{
+					node = i;
+					maximum = size;
+				}
+			}
+		}
+		return node;
+	}
 }
 
 static void _starpu_mpi_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode)