fetch_data_on_node(STARPU_W) will not actually transfer the data so it won't do useless work. And we do want to allocate the data for the task, otherwise we might fill the GPU with prefetched data, and end up having no room for the allocation.
@@ -932,10 +932,6 @@ int _starpu_prefetch_task_input_prio(struct starpu_task *task, int target_node,
if (mode & (STARPU_SCRATCH|STARPU_REDUX))
continue;
- if (!(mode & STARPU_R))
- /* Don't bother prefetching some data which will be overwritten */
- continue;
-
int node;
if (target_node >= 0)
node = _starpu_task_data_get_node_on_node(task, index, target_node);