|
@@ -56,7 +56,7 @@ uint32_t _starpu_select_src_node(starpu_data_handle handle)
|
|
uint32_t src_node_mask = 0;
|
|
uint32_t src_node_mask = 0;
|
|
for (node = 0; node < nnodes; node++)
|
|
for (node = 0; node < nnodes; node++)
|
|
{
|
|
{
|
|
- if (handle->per_node[node]->state != STARPU_INVALID) {
|
|
|
|
|
|
+ if (handle->per_node[node].state != STARPU_INVALID) {
|
|
/* we found a copy ! */
|
|
/* we found a copy ! */
|
|
src_node_mask |= (1<<node);
|
|
src_node_mask |= (1<<node);
|
|
}
|
|
}
|
|
@@ -107,7 +107,7 @@ void _starpu_update_data_state(starpu_data_handle handle,
|
|
/* the requesting node now has the only valid copy */
|
|
/* the requesting node now has the only valid copy */
|
|
uint32_t node;
|
|
uint32_t node;
|
|
for (node = 0; node < nnodes; node++)
|
|
for (node = 0; node < nnodes; node++)
|
|
- handle->per_node[node]->state = STARPU_INVALID;
|
|
|
|
|
|
+ handle->per_node[node].state = STARPU_INVALID;
|
|
|
|
|
|
requesting_replicate->state = STARPU_OWNER;
|
|
requesting_replicate->state = STARPU_OWNER;
|
|
}
|
|
}
|
|
@@ -118,7 +118,7 @@ void _starpu_update_data_state(starpu_data_handle handle,
|
|
uint32_t node;
|
|
uint32_t node;
|
|
for (node = 0; node < nnodes; node++)
|
|
for (node = 0; node < nnodes; node++)
|
|
{
|
|
{
|
|
- struct starpu_data_replicate_s *replicate = handle->per_node[node];
|
|
|
|
|
|
+ struct starpu_data_replicate_s *replicate = &handle->per_node[node];
|
|
if (replicate->state != STARPU_INVALID)
|
|
if (replicate->state != STARPU_INVALID)
|
|
replicate->state = STARPU_SHARED;
|
|
replicate->state = STARPU_SHARED;
|
|
}
|
|
}
|
|
@@ -169,7 +169,7 @@ static starpu_data_request_t create_new_request_to_fetch_data(starpu_data_handle
|
|
unsigned src_is_a_gpu = (_starpu_get_node_kind(src_node) == STARPU_CUDA_RAM || _starpu_get_node_kind(src_node) == STARPU_OPENCL_RAM);
|
|
unsigned src_is_a_gpu = (_starpu_get_node_kind(src_node) == STARPU_CUDA_RAM || _starpu_get_node_kind(src_node) == STARPU_OPENCL_RAM);
|
|
unsigned dst_is_a_gpu = (_starpu_get_node_kind(requesting_node) == STARPU_CUDA_RAM || _starpu_get_node_kind(requesting_node) == STARPU_OPENCL_RAM);
|
|
unsigned dst_is_a_gpu = (_starpu_get_node_kind(requesting_node) == STARPU_CUDA_RAM || _starpu_get_node_kind(requesting_node) == STARPU_OPENCL_RAM);
|
|
|
|
|
|
- struct starpu_data_replicate_s *src_replicate = handle->per_node[src_node];
|
|
|
|
|
|
+ struct starpu_data_replicate_s *src_replicate = &handle->per_node[src_node];
|
|
|
|
|
|
/* we have to perform 2 successive requests for GPU->GPU transfers */
|
|
/* we have to perform 2 successive requests for GPU->GPU transfers */
|
|
if ((mode & STARPU_R) && (src_is_a_gpu && dst_is_a_gpu)) {
|
|
if ((mode & STARPU_R) && (src_is_a_gpu && dst_is_a_gpu)) {
|
|
@@ -177,7 +177,7 @@ static starpu_data_request_t create_new_request_to_fetch_data(starpu_data_handle
|
|
starpu_data_request_t r_src_to_ram;
|
|
starpu_data_request_t r_src_to_ram;
|
|
starpu_data_request_t r_ram_to_dst;
|
|
starpu_data_request_t r_ram_to_dst;
|
|
|
|
|
|
- struct starpu_data_replicate_s *ram_replicate = handle->per_node[0];
|
|
|
|
|
|
+ struct starpu_data_replicate_s *ram_replicate = &handle->per_node[0];
|
|
|
|
|
|
/* XXX we hardcore 0 as the RAM node ... */
|
|
/* XXX we hardcore 0 as the RAM node ... */
|
|
/* We put a 1 in the number of dependencies because this
|
|
/* We put a 1 in the number of dependencies because this
|
|
@@ -326,7 +326,7 @@ static int fetch_data(starpu_data_handle handle, struct starpu_data_replicate_s
|
|
|
|
|
|
inline uint32_t _starpu_get_data_refcnt(starpu_data_handle handle, uint32_t node)
|
|
inline uint32_t _starpu_get_data_refcnt(starpu_data_handle handle, uint32_t node)
|
|
{
|
|
{
|
|
- return handle->per_node[node]->refcnt;
|
|
|
|
|
|
+ return handle->per_node[node].refcnt;
|
|
}
|
|
}
|
|
|
|
|
|
size_t _starpu_data_get_size(starpu_data_handle handle)
|
|
size_t _starpu_data_get_size(starpu_data_handle handle)
|
|
@@ -394,7 +394,7 @@ int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
|
|
if (mode & (STARPU_SCRATCH|STARPU_REDUX))
|
|
if (mode & (STARPU_SCRATCH|STARPU_REDUX))
|
|
continue;
|
|
continue;
|
|
|
|
|
|
- struct starpu_data_replicate_s *replicate = handle->per_node[node];
|
|
|
|
|
|
+ struct starpu_data_replicate_s *replicate = &handle->per_node[node];
|
|
prefetch_data_on_node(handle, replicate, mode);
|
|
prefetch_data_on_node(handle, replicate, mode);
|
|
|
|
|
|
_starpu_set_data_requested_flag_if_needed(replicate);
|
|
_starpu_set_data_requested_flag_if_needed(replicate);
|
|
@@ -403,33 +403,6 @@ int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static struct starpu_data_replicate_s *initialize_data_replicate(starpu_data_handle handle, unsigned local_memory_node)
|
|
|
|
-{
|
|
|
|
- struct starpu_data_replicate_s *replicate;
|
|
|
|
-
|
|
|
|
- replicate = malloc(sizeof(struct starpu_data_replicate_s));
|
|
|
|
- STARPU_ASSERT(replicate);
|
|
|
|
-
|
|
|
|
- replicate->memory_node = local_memory_node;
|
|
|
|
- replicate->relaxed_coherency = 1;
|
|
|
|
-
|
|
|
|
- size_t interfacesize = handle->ops->interface_size;
|
|
|
|
- replicate->interface = malloc(interfacesize);
|
|
|
|
- STARPU_ASSERT(replicate->interface);
|
|
|
|
- void *ram_interface = handle->per_node[0]->interface;
|
|
|
|
- memcpy(replicate->interface, ram_interface, interfacesize);
|
|
|
|
- replicate->allocated = 0;
|
|
|
|
- replicate->automatically_allocated = 0;
|
|
|
|
-
|
|
|
|
- replicate->state = STARPU_INVALID;
|
|
|
|
- replicate->refcnt = 0;
|
|
|
|
- replicate->handle = handle;
|
|
|
|
- replicate->requested = 0;
|
|
|
|
- replicate->request = NULL;
|
|
|
|
-
|
|
|
|
- return replicate;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
|
|
int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
|
|
{
|
|
{
|
|
STARPU_TRACE_START_FETCH_INPUT(NULL);
|
|
STARPU_TRACE_START_FETCH_INPUT(NULL);
|
|
@@ -446,44 +419,23 @@ int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
|
|
starpu_data_handle handle = descrs[index].handle;
|
|
starpu_data_handle handle = descrs[index].handle;
|
|
starpu_access_mode mode = descrs[index].mode;
|
|
starpu_access_mode mode = descrs[index].mode;
|
|
|
|
|
|
- void *interface;
|
|
|
|
|
|
+ struct starpu_data_replicate_s *local_replicate;
|
|
|
|
|
|
if (mode & (STARPU_SCRATCH|STARPU_REDUX))
|
|
if (mode & (STARPU_SCRATCH|STARPU_REDUX))
|
|
{
|
|
{
|
|
int workerid = starpu_worker_get_id();
|
|
int workerid = starpu_worker_get_id();
|
|
- struct starpu_data_replicate_s *local_replicate;
|
|
|
|
-
|
|
|
|
- while (_starpu_spin_trylock(&handle->header_lock))
|
|
|
|
- _starpu_datawizard_progress(local_memory_node, 1);
|
|
|
|
-
|
|
|
|
- local_replicate = handle->per_worker[workerid];
|
|
|
|
- if (!local_replicate)
|
|
|
|
- {
|
|
|
|
- local_replicate = initialize_data_replicate(handle, local_memory_node);
|
|
|
|
- handle->per_worker[workerid] = local_replicate;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- _starpu_spin_unlock(&handle->header_lock);
|
|
|
|
-
|
|
|
|
- ret = fetch_data(handle, local_replicate, mode);
|
|
|
|
- if (STARPU_UNLIKELY(ret))
|
|
|
|
- goto enomem;
|
|
|
|
-
|
|
|
|
- interface = local_replicate->interface;
|
|
|
|
|
|
+ local_replicate = &handle->per_worker[workerid];
|
|
}
|
|
}
|
|
else {
|
|
else {
|
|
/* That's a "normal" buffer (R/W) */
|
|
/* That's a "normal" buffer (R/W) */
|
|
|
|
+ local_replicate = &handle->per_node[local_memory_node];
|
|
|
|
+ }
|
|
|
|
|
|
- struct starpu_data_replicate_s *local_replicate;
|
|
|
|
- local_replicate = handle->per_node[local_memory_node];
|
|
|
|
- ret = fetch_data(handle, local_replicate, mode);
|
|
|
|
- if (STARPU_UNLIKELY(ret))
|
|
|
|
- goto enomem;
|
|
|
|
|
|
+ ret = fetch_data(handle, local_replicate, mode);
|
|
|
|
+ if (STARPU_UNLIKELY(ret))
|
|
|
|
+ goto enomem;
|
|
|
|
|
|
- interface = starpu_data_get_interface_on_node(handle, local_memory_node);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- task->interface[index] = interface;
|
|
|
|
|
|
+ task->interface[index] = local_replicate->interface;
|
|
}
|
|
}
|
|
|
|
|
|
STARPU_TRACE_END_FETCH_INPUT(NULL);
|
|
STARPU_TRACE_END_FETCH_INPUT(NULL);
|
|
@@ -517,12 +469,12 @@ void _starpu_push_task_output(struct starpu_task *task, uint32_t mask)
|
|
if (mode & STARPU_RW)
|
|
if (mode & STARPU_RW)
|
|
{
|
|
{
|
|
unsigned local_node = _starpu_get_local_memory_node();
|
|
unsigned local_node = _starpu_get_local_memory_node();
|
|
- replicate = handle->per_node[local_node];
|
|
|
|
|
|
+ replicate = &handle->per_node[local_node];
|
|
}
|
|
}
|
|
else
|
|
else
|
|
{
|
|
{
|
|
int workerid = starpu_worker_get_id();
|
|
int workerid = starpu_worker_get_id();
|
|
- replicate = handle->per_worker[workerid];
|
|
|
|
|
|
+ replicate = &handle->per_worker[workerid];
|
|
}
|
|
}
|
|
|
|
|
|
_starpu_release_data_on_node(handle, mask, replicate);
|
|
_starpu_release_data_on_node(handle, mask, replicate);
|
|
@@ -541,8 +493,8 @@ unsigned _starpu_is_data_present_or_requested(starpu_data_handle handle, uint32_
|
|
// XXX : this is just a hint, so we don't take the lock ...
|
|
// XXX : this is just a hint, so we don't take the lock ...
|
|
// pthread_spin_lock(&handle->header_lock);
|
|
// pthread_spin_lock(&handle->header_lock);
|
|
|
|
|
|
- if (handle->per_node[node]->state != STARPU_INVALID
|
|
|
|
- || handle->per_node[node]->requested || handle->per_node[node]->request)
|
|
|
|
|
|
+ if (handle->per_node[node].state != STARPU_INVALID
|
|
|
|
+ || handle->per_node[node].requested || handle->per_node[node].request)
|
|
ret = 1;
|
|
ret = 1;
|
|
|
|
|
|
// pthread_spin_unlock(&handle->header_lock);
|
|
// pthread_spin_unlock(&handle->header_lock);
|