|
@@ -34,10 +34,89 @@
|
|
#include <core/task.h>
|
|
#include <core/task.h>
|
|
#include <core/topology.h>
|
|
#include <core/topology.h>
|
|
|
|
|
|
-int _starpu_mpi_choose_node(starpu_data_handle_t data_handle, enum starpu_data_access_mode mode)
|
|
|
|
|
|
+int _starpu_mpi_choose_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode)
|
|
{
|
|
{
|
|
- /* TODO */
|
|
|
|
return STARPU_MAIN_RAM;
|
|
return STARPU_MAIN_RAM;
|
|
|
|
+
|
|
|
|
+ /* TODO: this is completely untested */
|
|
|
|
+ if (mode & STARPU_W)
|
|
|
|
+ {
|
|
|
|
+ /* TODO: lookup NIC location */
|
|
|
|
+ /* Where to receive the data? */
|
|
|
|
+ if (handle->home_node >= 0 && starpu_node_get_kind(handle->home_node) == STARPU_CPU_RAM)
|
|
|
|
+ /* For now, better use the home node to avoid duplicates */
|
|
|
|
+ return handle->home_node;
|
|
|
|
+
|
|
|
|
+ if (starpu_memory_nodes_get_numa_count() == 1)
|
|
|
|
+ return STARPU_MAIN_RAM;
|
|
|
|
+
|
|
|
|
+ /* Several potential places */
|
|
|
|
+ unsigned i;
|
|
|
|
+ for (i = 0; i < STARPU_MAXNODES; i++)
|
|
|
|
+ {
|
|
|
|
+ /* TODO: we may want to take as a hint that it's allocated on the GPU as
|
|
|
|
+ * a clue that we want to push to the GPU */
|
|
|
|
+ if (starpu_node_get_kind(i) == STARPU_CPU_RAM &&
|
|
|
|
+ handle->per_node[i].allocated)
|
|
|
|
+ /* This node already has allocated buffers, let's just use it */
|
|
|
|
+ return i;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* No luck, take the least loaded node */
|
|
|
|
+ starpu_ssize_t maximum = 0;
|
|
|
|
+ starpu_ssize_t needed = _starpu_data_get_alloc_size(handle);
|
|
|
|
+ unsigned node;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < STARPU_MAXNODES; i++)
|
|
|
|
+ {
|
|
|
|
+ if (starpu_node_get_kind(i) == STARPU_CPU_RAM)
|
|
|
|
+ {
|
|
|
|
+ starpu_ssize_t size = starpu_memory_get_available(i);
|
|
|
|
+ if (size >= needed && size > maximum)
|
|
|
|
+ {
|
|
|
|
+ node = i;
|
|
|
|
+ maximum = size;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return node;
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ if (starpu_memory_nodes_get_numa_count() == 1)
|
|
|
|
+ return STARPU_MAIN_RAM;
|
|
|
|
+
|
|
|
|
+ /* Several potential places */
|
|
|
|
+ unsigned i;
|
|
|
|
+ for (i = 0; i < STARPU_MAXNODES; i++)
|
|
|
|
+ {
|
|
|
|
+ /* TODO: GPUDirect */
|
|
|
|
+ if (starpu_node_get_kind(i) == STARPU_CPU_RAM &&
|
|
|
|
+ handle->per_node[i].state != STARPU_INVALID)
|
|
|
|
+ /* This node already has the value, let's just use it */
|
|
|
|
+ /* TODO: rather pick up place next to NIC */
|
|
|
|
+ return i;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* No luck, take the least loaded node, to transfer from e.g. GPU */
|
|
|
|
+ starpu_ssize_t maximum = 0;
|
|
|
|
+ starpu_ssize_t needed = _starpu_data_get_alloc_size(handle);
|
|
|
|
+ unsigned node;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i < STARPU_MAXNODES; i++)
|
|
|
|
+ {
|
|
|
|
+ if (starpu_node_get_kind(i) == STARPU_CPU_RAM)
|
|
|
|
+ {
|
|
|
|
+ starpu_ssize_t size = starpu_memory_get_available(i);
|
|
|
|
+ if (size >= needed && size > maximum)
|
|
|
|
+ {
|
|
|
|
+ node = i;
|
|
|
|
+ maximum = size;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return node;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
static void _starpu_mpi_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode)
|
|
static void _starpu_mpi_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode)
|