Pārlūkot izejas kodu

Rework prefetching

Always prefetching when we know the memory node is not necessarily a
good idea since the scheduler might be reordering tasks...

Move counting nb_tasks_prefetch to when the request actually completes,
otherwise another task prefetch might come in between and be missed in
the req->nb_tasks_prefetch.

decrementing nb_tasks_prefetch should only be done for data that we
actually prefetch (not scratch, not redux, actual read)

make starpu_prefetch_task_input_on_node_prio and
starpu_prefetch_task_input_for_prio set the prefetched flag, not the
scheduler, so we really control that there is only one prefetch.

Drop spurious starpu_prefetch_task_input_for call.
Samuel Thibault 5 gadi atpakaļ
vecāks
revīzija
2324d664c2

+ 2 - 4
src/core/sched_policy.c

@@ -570,9 +570,6 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 	int ret = 0;
 	if (STARPU_UNLIKELY(task->execute_on_a_specific_worker))
 	{
-		if (starpu_get_prefetch_flag())
-			starpu_prefetch_task_input_for(task, task->workerid);
-
 		ret = _starpu_push_task_on_specific_worker(task, task->workerid);
 	}
 	else
@@ -582,7 +579,8 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 		/* When a task can only be executed on a given arch and we have
 		 * only one memory node for that arch, we can systematically
 		 * prefetch before the scheduling decision. */
-		if (starpu_get_prefetch_flag() && starpu_memory_nodes_get_count() > 1)
+		/* XXX: no, this is definitely not a good idea if the scheduler reorders tasks... */
+		if (0 && starpu_get_prefetch_flag() && starpu_memory_nodes_get_count() > 1)
 		{
 			if (task->where == STARPU_CPU && config->cpus_nodeid >= 0)
 				starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);

+ 6 - 1
src/datawizard/coherency.c

@@ -761,6 +761,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _st
 		if (src_node_mask == 0)
 		{
 			/* no valid copy, nothing to prefetch */
+			_STARPU_DISP("Warning: no valid copy to prefetch?! that's not supposed to happen, please report\n");
 			_starpu_spin_unlock(&handle->header_lock);
 			return 0;
 		}
@@ -937,6 +938,7 @@ int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned t
 
 		_starpu_set_data_requested_flag_if_needed(handle, replicate);
 	}
+	task->prefetched = 1;
 
 	return 0;
 }
@@ -1014,6 +1016,7 @@ int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worke
 
 		_starpu_set_data_requested_flag_if_needed(handle, replicate);
 	}
+	task->prefetched = 1;
 
 	return 0;
 }
@@ -1247,7 +1250,9 @@ void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job
 		if (local_replicate->mc)
 		{
 			local_replicate->mc->diduse = 1;
-			if (task->prefetched)
+			if (task->prefetched &&
+				!(mode & (STARPU_SCRATCH|STARPU_REDUX)) &&
+				(mode & STARPU_R))
 			{
 				/* Allocations or transfer prefetchs should have been done by now and marked
 				 * this mc as needed for us.

+ 0 - 4
src/datawizard/copy_driver.c

@@ -244,10 +244,6 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d
 
 		dst_replicate->initialized = 1;
 
-		if (dst_replicate->mc)
-			/* When we have the data there, make sure it stays there for the task.  */
-			dst_replicate->mc->nb_tasks_prefetch += req->nb_tasks_prefetch;
-
 		_STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle);
 		int ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req);
 		if (!req)

+ 4 - 0
src/datawizard/data_request.c

@@ -414,6 +414,10 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 	/* Remove a reference on the destination replicate for the request */
 	if (dst_replicate)
 	{
+		if (dst_replicate->mc)
+			/* Make sure it stays there for the task.  */
+			dst_replicate->mc->nb_tasks_prefetch += r->nb_tasks_prefetch;
+
 		STARPU_ASSERT(dst_replicate->refcnt > 0);
 		dst_replicate->refcnt--;
 	}

+ 0 - 1
src/sched_policies/component_sched.c

@@ -160,7 +160,6 @@ void starpu_sched_component_prefetch_on_node(struct starpu_sched_component * com
 		int worker = starpu_bitmap_first(&component->workers_in_ctx);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 		starpu_prefetch_task_input_on_node(task, memory_node);
-		task->prefetched = 1;
 	}
 }