Explorar o código

Complete data interface documentation

Samuel Thibault %!s(int64=4) %!d(string=hai) anos
pai
achega
5643195edc
Modificáronse 1 ficheiros con 179 adicións e 2 borrados
  1. 179 2
      doc/doxygen/chapters/310_data_management.doxy

+ 179 - 2
doc/doxygen/chapters/310_data_management.doxy

@@ -890,6 +890,8 @@ Note: one should not take pointers into such structures, because StarPU needs
 to be able to copy over the content of it to various places, for instance to
 efficiently migrate a data buffer from one data handle to another data handle.
 
+\subsection DefiningANewDataInterface_registration Data registration
+
 Registering such a data to StarPU is easily done using the function
 starpu_data_register(). The last
 parameter of the function, <c>interface_complex_ops</c>, will be
@@ -971,8 +973,8 @@ static struct starpu_data_interface_ops interface_complex_ops =
 };
 \endcode
 
-Functions need to be defined to access the different fields of the
-complex interface from a StarPU data handle.
+Convenience functions can defined to access the different fields of the
+complex interface from a StarPU data handle after a starpu_data_acquire() call:
 
 \code{.c}
 double *starpu_complex_get_real(starpu_data_handle_t handle)
@@ -1022,6 +1024,181 @@ The whole code for this complex data interface is available in the
 directory <c>examples/interface/</c>.
 
 
+\subsection DefiningANewDataInterface_allocation Data allocation
+
+To be able to run tasks on GPUs etc. StarPU needs to know how to allocate a
+buffer for the interface. In our example, two allocations are needed in the
+allocation complex_allocate_data_on_node() method: one for the real part and one
+for the imaginary part.
+
+\code{.c}
+static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node)
+{
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
+
+	double *addr_real = NULL;
+	double *addr_imaginary = NULL;
+	starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]);
+
+	addr_real = (double*) starpu_malloc_on_node(node, requested_memory);
+	if (!addr_real)
+		goto fail_real;
+	addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory);
+	if (!addr_imaginary)
+		goto fail_imaginary;
+
+	/* update the data properly in consequence */
+	complex_interface->real = addr_real;
+	complex_interface->imaginary = addr_imaginary;
+
+	return 2*requested_memory;
+
+fail_imaginary:
+	starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory);
+fail_real:
+	return -ENOMEM;
+}
+\endcode
+
+Here we try to allocate the two parts. If either of them fails, we return
+-ENOMEM. If they succeed, we can record the obtained pointers and returned the
+amount of allocated memory (for memory usage accounting).
+
+Conversely, complex_free_data_on_node() frees the two parts:
+
+\code{.c}
+static void complex_free_data_on_node(void *data_interface, unsigned node)
+{
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
+	starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]);
+
+	starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory);
+	starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory);
+}
+\endcode
+
+We we have not made anything particular for GPUs or whatsoever: it is
+starpu_free_on_node() which knows how to actually make the allocation, and
+returns the resulting pointer, be it in main memory, in GPU memory, etc.
+
+\subsection DefiningANewDataInterface_copy Data copy
+
+Now that StarPU knows how to allocate/free a buffer, it needs to be able to
+copy over data into/from it. Defining a copy_any_to_any method allows StarPU to
+perform direct transfers between main memory and GPU memory.
+
+\code{.c}
+static int copy_any_to_any(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node,
+			   void *async_data)
+{
+	struct starpu_complex_interface *src_complex = src_interface;
+	struct starpu_complex_interface *dst_complex = dst_interface;
+	int ret = 0;
+
+
+	if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node,
+				    (uintptr_t) dst_complex->real, 0, dst_node,
+				     src_complex->nx*sizeof(src_complex->real[0]),
+				     async_data))
+		ret = -EAGAIN;
+	if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node,
+				    (uintptr_t) dst_complex->imaginary, 0, dst_node,
+				     src_complex->nx*sizeof(src_complex->imaginary[0]),
+				     async_data))
+		ret = -EAGAIN;
+	return ret;
+}
+\endcode
+
+We here again have no idea what is main memory or GPU memory, or even if the
+copy is synchronous or asynchronous: we just call starpu_interface_copy()
+according to the interface, passing it the pointers, and checking whether it
+returned -EAGAIN, which means the copy is asynchronous, and StarPU will
+appropriately wait for it thanks to the \c async_data pointer.
+
+This copy method is referenced in a \ref starpu_data_copy_methods structure:
+
+\code{.c}
+static const struct starpu_data_copy_methods complex_copy_methods =
+{
+	.any_to_any = copy_any_to_any
+};
+\endcode
+
+which was referenced in the \ref starpu_data_interface_ops structure above.
+
+Other fields of \ref starpu_data_copy_methods allow to provide optimized
+variants, notably for the case of 2D or 3D matrix tiles with non-trivial ld.
+
+\subsection DefiningANewDataInterface_pack Data pack/peek/unpack
+
+The copy methods allow for RAM/GPU transfers, but is not enough for e.g.
+transferring over MPI. That requires defining the pack/peek/unpack methods. The
+principle is that the starpu_data_interface_ops::pack_data method concatenates
+the buffer data into a newly-allocated contiguous bytes array, conversely
+starpu_data_interface_ops::peek_data extracts from a bytes array into the
+buffer data, and starpu_data_interface_ops::unpack_data does the same as
+starpu_data_interface_ops::peek_data but also frees the bytes array.
+
+\code{.c}
+static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	*count = complex_get_size(handle);
+	if (ptr != NULL)
+	{
+		char *data;
+		data = (void*) starpu_malloc_on_node_flags(node, *count, 0);
+		*ptr = data;
+		memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double));
+		memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
+	}
+
+	return 0;
+}
+\endcode
+
+complex_pack_data() first computes the size to be allocated, then allocates it,
+and copies over into it the content of the two real and imaginary arrays.
+
+\code{.c}
+static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
+{
+	char *data = ptr;
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double));
+	memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double));
+	memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double));
+
+	return 0;
+}
+\endcode
+
+complex_peek_data() simply uses memcpy to copy over from the bytes array into the data buffer.
+
+\code{.c}
+static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
+{
+	complex_peek_data(handle, node, ptr, count);
+
+	starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0);
+
+	return 0;
+}
+\encode
+
+And complex_unpack_data() just calls complex_peek_data() and releases the bytes array.
+
+
 \section SpecifyingATargetNode Specifying A Target Node For Task Data
 
 When executing a task on a GPU for instance, StarPU would normally copy all the