|
@@ -36,9 +36,9 @@ unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
|
|
/* first find a valid copy, either a STARPU_OWNER or a STARPU_SHARED */
|
|
/* first find a valid copy, either a STARPU_OWNER or a STARPU_SHARED */
|
|
unsigned node;
|
|
unsigned node;
|
|
|
|
|
|
- unsigned src_node_mask = 0;
|
|
|
|
size_t size = _starpu_data_get_size(handle);
|
|
size_t size = _starpu_data_get_size(handle);
|
|
double cost = INFINITY;
|
|
double cost = INFINITY;
|
|
|
|
+ unsigned src_node_mask = 0;
|
|
|
|
|
|
for (node = 0; node < nnodes; node++)
|
|
for (node = 0; node < nnodes; node++)
|
|
{
|
|
{
|
|
@@ -90,22 +90,19 @@ unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
|
|
int i_ram = -1;
|
|
int i_ram = -1;
|
|
int i_gpu = -1;
|
|
int i_gpu = -1;
|
|
int i_disk = -1;
|
|
int i_disk = -1;
|
|
-
|
|
|
|
|
|
+
|
|
/* Revert to dumb strategy: take RAM unless only a GPU has it */
|
|
/* Revert to dumb strategy: take RAM unless only a GPU has it */
|
|
for (i = 0; i < nnodes; i++)
|
|
for (i = 0; i < nnodes; i++)
|
|
{
|
|
{
|
|
|
|
+
|
|
if (src_node_mask & (1<<i))
|
|
if (src_node_mask & (1<<i))
|
|
{
|
|
{
|
|
/* however GPU are expensive sources, really !
|
|
/* however GPU are expensive sources, really !
|
|
* Unless peer transfer is supported.
|
|
* Unless peer transfer is supported.
|
|
* Other should be ok */
|
|
* Other should be ok */
|
|
|
|
|
|
- if (
|
|
|
|
-#ifndef HAVE_CUDA_MEMCPY_PEER
|
|
|
|
- starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
|
|
|
|
-#endif
|
|
|
|
- starpu_node_get_kind(i) == STARPU_OPENCL_RAM)
|
|
|
|
- /* we save it, but we don't use it (reason above) */
|
|
|
|
|
|
+ if (starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
|
|
|
|
+ starpu_node_get_kind(i) == STARPU_OPENCL_RAM)
|
|
i_gpu = i;
|
|
i_gpu = i;
|
|
|
|
|
|
if (starpu_node_get_kind(i) == STARPU_CPU_RAM)
|
|
if (starpu_node_get_kind(i) == STARPU_CPU_RAM)
|
|
@@ -114,13 +111,15 @@ unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
|
|
i_disk = i;
|
|
i_disk = i;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
/* we have to use cpu_ram in first */
|
|
/* we have to use cpu_ram in first */
|
|
if (i_ram != -1)
|
|
if (i_ram != -1)
|
|
src_node = i_ram;
|
|
src_node = i_ram;
|
|
/* no luck we have to use the disk memory */
|
|
/* no luck we have to use the disk memory */
|
|
|
|
+ else if (i_gpu != -1)
|
|
|
|
+ src_node = i_gpu;
|
|
else
|
|
else
|
|
src_node = i_disk;
|
|
src_node = i_disk;
|
|
-
|
|
|
|
|
|
|
|
STARPU_ASSERT(src_node != -1);
|
|
STARPU_ASSERT(src_node != -1);
|
|
|
|
|