6 years ago · aeadbc9262
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -328,7 +328,20 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 
				 		/* Will just create it in place. Ideally we should take the
			
 
				 		 * time to create it into account */
			
 
				 		return 0.0;
			
 
				-	return starpu_transfer_predict(src_node, memory_node, size);
			
 
				+
			
 
				+#define MAX_REQUESTS 4
			
 
				+	unsigned src_nodes[MAX_REQUESTS];
			
 
				+	unsigned dst_nodes[MAX_REQUESTS];
			
 
				+	unsigned handling_nodes[MAX_REQUESTS];
			
 
				+	int nhops = _starpu_determine_request_path(handle, src_node, memory_node, mode,
			
 
				+			MAX_REQUESTS,
			
 
				+			src_nodes, dst_nodes, handling_nodes, 0);
			
 
				+	int i;
			
 
				+	double duration = 0.;
			
 
				+
			
 
				+	for (i = 0; i < nhops; i++)
			
 
				+		duration += starpu_transfer_predict(src_nodes[i], dst_nodes[i], size);
			
 
				+	return duration;
			
 
				 }
			
 
				 
			
 
				 /* Data transfer performance modeling */
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -2997,6 +2997,9 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 
				 	int direct = starpu_bus_get_direct(busid);
			
 
				 #endif
			
 
				 	float ngpus = topology->ncudagpus+topology->nopenclgpus;
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning FIXME: ngpus shouldn't be used e.g. for slow disk transfers...
			
 
				+#endif
			
 
				 
			
 
				 #if 0
			
 
				 	/* Ideally we should take into account that some GPUs are directly
			
@@ -3011,6 +3014,7 @@ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				+
			
 
				 	return latency + (size/bandwidth)*2*ngpus;
			
 
				 }
			
 
				 
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -346,7 +346,7 @@ static unsigned chose_best_numa_between_src_and_dest(int src, int dst)
 
				  * node that handles the hop. The returned value indicates the number of hops,
			
 
				  * and the max_len is the maximum number of hops (ie. the size of the
			
 
				  * src_nodes, dst_nodes and handling_nodes arrays. */
			
 
				-static int determine_request_path(starpu_data_handle_t handle,
			
 
				+int _starpu_determine_request_path(starpu_data_handle_t handle,
			
 
				 				  int src_node, int dst_node,
			
 
				 				  enum starpu_data_access_mode mode, int max_len,
			
 
				 				  unsigned *src_nodes, unsigned *dst_nodes,
			
@@ -630,7 +630,7 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
				 	unsigned src_nodes[MAX_REQUESTS], dst_nodes[MAX_REQUESTS], handling_nodes[MAX_REQUESTS];
			
 
				 	/* keep one slot for the last W request, if any */
			
 
				 	int write_invalidation = (mode & STARPU_W) && nwait && !is_prefetch;
			
 
				-	int nhops = determine_request_path(handle, src_node, requesting_node, mode, MAX_REQUESTS,
			
 
				+	int nhops = _starpu_determine_request_path(handle, src_node, requesting_node, mode, MAX_REQUESTS,
			
 
				 					   src_nodes, dst_nodes, handling_nodes, write_invalidation);
			
 
				 
			
 
				 	STARPU_ASSERT(nhops >= 0 && nhops <= MAX_REQUESTS-1);
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -323,6 +323,11 @@ void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job
 
				 void _starpu_fetch_nowhere_task_input(struct _starpu_job *j);
			
 
				 
			
 
				 int _starpu_select_src_node(struct _starpu_data_state *state, unsigned destination);
			
 
				+int _starpu_determine_request_path(starpu_data_handle_t handle,
			
 
				+				  int src_node, int dst_node,
			
 
				+				  enum starpu_data_access_mode mode, int max_len,
			
 
				+				  unsigned *src_nodes, unsigned *dst_nodes,
			
 
				+				  unsigned *handling_nodes, unsigned write_invalidation);
			
 
				 
			
 
				 /* is_prefetch is whether the DSM may drop the request (when there is not enough memory for instance
			
 
				  * async is whether the caller wants a reference on the last request, to be