浏览代码

Let interfaces declare which transfers they allow with the can_copy methode.

Samuel Thibault 10 年之前
父节点
当前提交
aa5c3e5c50
共有 4 个文件被更改,包括 41 次插入2 次删除
  1. 2 0
      ChangeLog
  2. 4 0
      doc/doxygen/chapters/api/data_interfaces.doxy
  3. 2 0
      include/starpu_data_interfaces.h
  4. 33 2
      src/datawizard/coherency.c

+ 2 - 0
ChangeLog

@@ -101,6 +101,8 @@ New features:
   * One can register an existing on-GPU buffer to be used by a handle.
   * Add the starpu_paje_summary statistics tool.
   * Enable gpu-gpu transfers for matrices.
+  * Let interfaces declare which transfers they allow with the can_copy
+    methode.
 
 Small changes:
   * Lock performance model files while writing and reading them to avoid

+ 4 - 0
doc/doxygen/chapters/api/data_interfaces.doxy

@@ -55,6 +55,10 @@ provided, it will be used by default if no more specific method is
 provided. It can still be useful to provide more specific method in
 case of e.g. available particular CUDA or OpenCL support.
 \ingroup API_Data_Interfaces
+\var starpu_data_copy_methods::can_copy
+If defined, allows the interface to declare whether it supports transferring
+from \p src_interface on node \p src_node to \p dst_interface on node \p. If not
+defined, it is assumed that the interface supports all transfers.
 \var starpu_data_copy_methods::ram_to_ram
 Define how to copy data from the \p src_interface interface on the \p
 src_node CPU node to the \p dst_interface interface on the \p dst_node

+ 2 - 0
include/starpu_data_interfaces.h

@@ -37,6 +37,8 @@ extern "C"
 
 struct starpu_data_copy_methods
 {
+	int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+
 	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 	int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);

+ 33 - 2
src/datawizard/coherency.c

@@ -41,6 +41,8 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 	double cost = INFINITY;
 	unsigned src_node_mask = 0;
 
+	const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
+
 	for (node = 0; node < nnodes; node++)
 	{
 		if (handle->per_node[node].state != STARPU_INVALID)
@@ -72,6 +74,15 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 				double time = starpu_transfer_predict(i, destination, size);
 				unsigned handling_node;
 
+				/* Avoid transfers which the interface does not want */
+				if (copy_methods->can_copy)
+				{
+					void *src_interface = handle->per_node[i].data_interface;
+					void *dst_interface = handle->per_node[destination].data_interface;
+					if (!copy_methods->can_copy(src_interface, i, dst_interface, destination))
+						continue;
+				}
+
 				/* Avoid indirect transfers */
 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
 					continue;
@@ -104,8 +115,28 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 		
 		if (src_node_mask & (1<<i))
 		{
+			/* Avoid transfers which the interface does not want */
+			if (copy_methods->can_copy)
+			{
+				void *src_interface = handle->per_node[i].data_interface;
+				void *dst_interface = handle->per_node[destination].data_interface;
+				unsigned handling_node;
+
+				if (!copy_methods->can_copy(src_interface, i, dst_interface, destination))
+					continue;
+
+				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
+				{
+					/* Avoid through RAM if the interface does not want it */
+					void *ram_interface = handle->per_node[STARPU_MAIN_RAM].data_interface;
+					if (!copy_methods->can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM)
+					 || !copy_methods->can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination))
+						continue;
+				}
+			}
+
 			/* however GPU are expensive sources, really !
-			 * 	Unless peer transfer is supported.
+			 * 	Unless peer transfer is supported (and it would then have been selected above).
 			 * 	Other should be ok */
 
 			if (starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
@@ -222,7 +253,7 @@ static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned
 {
 	(void) handle; // unused
 
-	/* XXX That's a hack until we get cudaMemcpy3DPeerAsync to work !
+	/* XXX That's a hack until we fix cudaMemcpy3DPeerAsync in the block interface
 	 * Perhaps not all data interface provide a direct GPU-GPU transfer
 	 * method ! */
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)