Browse Source

Complete data interface documentation

Samuel Thibault 4 years ago
parent
commit
5643195edc
1 changed files with 179 additions and 2 deletions
  1. 179 2
      doc/doxygen/chapters/310_data_management.doxy

+ 179 - 2
doc/doxygen/chapters/310_data_management.doxy

@@ -890,6 +890,8 @@ Note: one should not take pointers into such structures, because StarPU needs
 to be able to copy over the content of it to various places, for instance to
 to be able to copy over the content of it to various places, for instance to
 efficiently migrate a data buffer from one data handle to another data handle.
 efficiently migrate a data buffer from one data handle to another data handle.
 
 
+\subsection DefiningANewDataInterface_registration Data registration
+
 Registering such a data to StarPU is easily done using the function
 Registering such a data to StarPU is easily done using the function
 starpu_data_register(). The last
 starpu_data_register(). The last
 parameter of the function, <c>interface_complex_ops</c>, will be
 parameter of the function, <c>interface_complex_ops</c>, will be
@@ -971,8 +973,8 @@ static struct starpu_data_interface_ops interface_complex_ops =
 };
 };
 \endcode
 \endcode
 
 
-Functions need to be defined to access the different fields of the
-complex interface from a StarPU data handle.
+Convenience functions can defined to access the different fields of the
+complex interface from a StarPU data handle after a starpu_data_acquire() call:
 
 
 \code{.c}
 \code{.c}
 double *starpu_complex_get_real(starpu_data_handle_t handle)
 double *starpu_complex_get_real(starpu_data_handle_t handle)
@@ -1022,6 +1024,181 @@ The whole code for this complex data interface is available in the
 directory <c>examples/interface/</c>.
 directory <c>examples/interface/</c>.
 
 
 
 
+\subsection DefiningANewDataInterface_allocation Data allocation
+
+To be able to run tasks on GPUs etc. StarPU needs to know how to allocate a
+buffer for the interface. In our example, two allocations are needed in the
+allocation complex_allocate_data_on_node() method: one for the real part and one
+for the imaginary part.
+
+\code{.c}
+static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node)
+{
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
+
+	double *addr_real = NULL;
+	double *addr_imaginary = NULL;
+	starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]);
+
+	addr_real = (double*) starpu_malloc_on_node(node, requested_memory);
+	if (!addr_real)
+		goto fail_real;
+	addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory);
+	if (!addr_imaginary)
+		goto fail_imaginary;
+
+	/* update the data properly in consequence */
+	complex_interface->real = addr_real;
+	complex_interface->imaginary = addr_imaginary;
+
+	return 2*requested_memory;
+
+fail_imaginary:
+	starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory);
+fail_real:
+	return -ENOMEM;
+}
+\endcode
+
+Here we try to allocate the two parts. If either of them fails, we return
+-ENOMEM. If they succeed, we can record the obtained pointers and returned the
+amount of allocated memory (for memory usage accounting).
+
+Conversely, complex_free_data_on_node() frees the two parts:
+
+\code{.c}
+static void complex_free_data_on_node(void *data_interface, unsigned node)
+{
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
+	starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]);
+
+	starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory);
+	starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory);
+}
+\endcode
+
+We we have not made anything particular for GPUs or whatsoever: it is
+starpu_free_on_node() which knows how to actually make the allocation, and
+returns the resulting pointer, be it in main memory, in GPU memory, etc.
+
+\subsection DefiningANewDataInterface_copy Data copy
+
+Now that StarPU knows how to allocate/free a buffer, it needs to be able to
+copy over data into/from it. Defining a copy_any_to_any method allows StarPU to
+perform direct transfers between main memory and GPU memory.
+
+\code{.c}
+static int copy_any_to_any(void *src_interface, unsigned src_node,
+			   void *dst_interface, unsigned dst_node,
+			   void *async_data)
+{
+	struct starpu_complex_interface *src_complex = src_interface;
+	struct starpu_complex_interface *dst_complex = dst_interface;
+	int ret = 0;
+
+
+	if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node,
+				    (uintptr_t) dst_complex->real, 0, dst_node,
+				     src_complex->nx*sizeof(src_complex->real[0]),
+				     async_data))
+		ret = -EAGAIN;
+	if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node,
+				    (uintptr_t) dst_complex->imaginary, 0, dst_node,
+				     src_complex->nx*sizeof(src_complex->imaginary[0]),
+				     async_data))
+		ret = -EAGAIN;
+	return ret;
+}
+\endcode
+
+We here again have no idea what is main memory or GPU memory, or even if the
+copy is synchronous or asynchronous: we just call starpu_interface_copy()
+according to the interface, passing it the pointers, and checking whether it
+returned -EAGAIN, which means the copy is asynchronous, and StarPU will
+appropriately wait for it thanks to the \c async_data pointer.
+
+This copy method is referenced in a \ref starpu_data_copy_methods structure:
+
+\code{.c}
+static const struct starpu_data_copy_methods complex_copy_methods =
+{
+	.any_to_any = copy_any_to_any
+};
+\endcode
+
+which was referenced in the \ref starpu_data_interface_ops structure above.
+
+Other fields of \ref starpu_data_copy_methods allow to provide optimized
+variants, notably for the case of 2D or 3D matrix tiles with non-trivial ld.
+
+\subsection DefiningANewDataInterface_pack Data pack/peek/unpack
+
+The copy methods allow for RAM/GPU transfers, but is not enough for e.g.
+transferring over MPI. That requires defining the pack/peek/unpack methods. The
+principle is that the starpu_data_interface_ops::pack_data method concatenates
+the buffer data into a newly-allocated contiguous bytes array, conversely
+starpu_data_interface_ops::peek_data extracts from a bytes array into the
+buffer data, and starpu_data_interface_ops::unpack_data does the same as
+starpu_data_interface_ops::peek_data but also frees the bytes array.
+
+\code{.c}
+static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
+{
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	*count = complex_get_size(handle);
+	if (ptr != NULL)
+	{
+		char *data;
+		data = (void*) starpu_malloc_on_node_flags(node, *count, 0);
+		*ptr = data;
+		memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double));
+		memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
+	}
+
+	return 0;
+}
+\endcode
+
+complex_pack_data() first computes the size to be allocated, then allocates it,
+and copies over into it the content of the two real and imaginary arrays.
+
+\code{.c}
+static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
+{
+	char *data = ptr;
+	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+	struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
+		starpu_data_get_interface_on_node(handle, node);
+
+	STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double));
+	memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double));
+	memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double));
+
+	return 0;
+}
+\endcode
+
+complex_peek_data() simply uses memcpy to copy over from the bytes array into the data buffer.
+
+\code{.c}
+static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
+{
+	complex_peek_data(handle, node, ptr, count);
+
+	starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0);
+
+	return 0;
+}
+\encode
+
+And complex_unpack_data() just calls complex_peek_data() and releases the bytes array.
+
+
 \section SpecifyingATargetNode Specifying A Target Node For Task Data
 \section SpecifyingATargetNode Specifying A Target Node For Task Data
 
 
 When executing a task on a GPU for instance, StarPU would normally copy all the
 When executing a task on a GPU for instance, StarPU would normally copy all the