|
@@ -890,6 +890,8 @@ Note: one should not take pointers into such structures, because StarPU needs
|
|
to be able to copy over the content of it to various places, for instance to
|
|
to be able to copy over the content of it to various places, for instance to
|
|
efficiently migrate a data buffer from one data handle to another data handle.
|
|
efficiently migrate a data buffer from one data handle to another data handle.
|
|
|
|
|
|
|
|
+\subsection DefiningANewDataInterface_registration Data registration
|
|
|
|
+
|
|
Registering such a data to StarPU is easily done using the function
|
|
Registering such a data to StarPU is easily done using the function
|
|
starpu_data_register(). The last
|
|
starpu_data_register(). The last
|
|
parameter of the function, <c>interface_complex_ops</c>, will be
|
|
parameter of the function, <c>interface_complex_ops</c>, will be
|
|
@@ -971,8 +973,8 @@ static struct starpu_data_interface_ops interface_complex_ops =
|
|
};
|
|
};
|
|
\endcode
|
|
\endcode
|
|
|
|
|
|
-Functions need to be defined to access the different fields of the
|
|
|
|
-complex interface from a StarPU data handle.
|
|
|
|
|
|
+Convenience functions can defined to access the different fields of the
|
|
|
|
+complex interface from a StarPU data handle after a starpu_data_acquire() call:
|
|
|
|
|
|
\code{.c}
|
|
\code{.c}
|
|
double *starpu_complex_get_real(starpu_data_handle_t handle)
|
|
double *starpu_complex_get_real(starpu_data_handle_t handle)
|
|
@@ -1022,6 +1024,181 @@ The whole code for this complex data interface is available in the
|
|
directory <c>examples/interface/</c>.
|
|
directory <c>examples/interface/</c>.
|
|
|
|
|
|
|
|
|
|
|
|
+\subsection DefiningANewDataInterface_allocation Data allocation
|
|
|
|
+
|
|
|
|
+To be able to run tasks on GPUs etc. StarPU needs to know how to allocate a
|
|
|
|
+buffer for the interface. In our example, two allocations are needed in the
|
|
|
|
+allocation complex_allocate_data_on_node() method: one for the real part and one
|
|
|
|
+for the imaginary part.
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node)
|
|
|
|
+{
|
|
|
|
+ struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
|
|
|
|
+
|
|
|
|
+ double *addr_real = NULL;
|
|
|
|
+ double *addr_imaginary = NULL;
|
|
|
|
+ starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]);
|
|
|
|
+
|
|
|
|
+ addr_real = (double*) starpu_malloc_on_node(node, requested_memory);
|
|
|
|
+ if (!addr_real)
|
|
|
|
+ goto fail_real;
|
|
|
|
+ addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory);
|
|
|
|
+ if (!addr_imaginary)
|
|
|
|
+ goto fail_imaginary;
|
|
|
|
+
|
|
|
|
+ /* update the data properly in consequence */
|
|
|
|
+ complex_interface->real = addr_real;
|
|
|
|
+ complex_interface->imaginary = addr_imaginary;
|
|
|
|
+
|
|
|
|
+ return 2*requested_memory;
|
|
|
|
+
|
|
|
|
+fail_imaginary:
|
|
|
|
+ starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory);
|
|
|
|
+fail_real:
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+}
|
|
|
|
+\endcode
|
|
|
|
+
|
|
|
|
+Here we try to allocate the two parts. If either of them fails, we return
|
|
|
|
+-ENOMEM. If they succeed, we can record the obtained pointers and returned the
|
|
|
|
+amount of allocated memory (for memory usage accounting).
|
|
|
|
+
|
|
|
|
+Conversely, complex_free_data_on_node() frees the two parts:
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static void complex_free_data_on_node(void *data_interface, unsigned node)
|
|
|
|
+{
|
|
|
|
+ struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
|
|
|
|
+ starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]);
|
|
|
|
+
|
|
|
|
+ starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory);
|
|
|
|
+ starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory);
|
|
|
|
+}
|
|
|
|
+\endcode
|
|
|
|
+
|
|
|
|
+We we have not made anything particular for GPUs or whatsoever: it is
|
|
|
|
+starpu_free_on_node() which knows how to actually make the allocation, and
|
|
|
|
+returns the resulting pointer, be it in main memory, in GPU memory, etc.
|
|
|
|
+
|
|
|
|
+\subsection DefiningANewDataInterface_copy Data copy
|
|
|
|
+
|
|
|
|
+Now that StarPU knows how to allocate/free a buffer, it needs to be able to
|
|
|
|
+copy over data into/from it. Defining a copy_any_to_any method allows StarPU to
|
|
|
|
+perform direct transfers between main memory and GPU memory.
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static int copy_any_to_any(void *src_interface, unsigned src_node,
|
|
|
|
+ void *dst_interface, unsigned dst_node,
|
|
|
|
+ void *async_data)
|
|
|
|
+{
|
|
|
|
+ struct starpu_complex_interface *src_complex = src_interface;
|
|
|
|
+ struct starpu_complex_interface *dst_complex = dst_interface;
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node,
|
|
|
|
+ (uintptr_t) dst_complex->real, 0, dst_node,
|
|
|
|
+ src_complex->nx*sizeof(src_complex->real[0]),
|
|
|
|
+ async_data))
|
|
|
|
+ ret = -EAGAIN;
|
|
|
|
+ if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node,
|
|
|
|
+ (uintptr_t) dst_complex->imaginary, 0, dst_node,
|
|
|
|
+ src_complex->nx*sizeof(src_complex->imaginary[0]),
|
|
|
|
+ async_data))
|
|
|
|
+ ret = -EAGAIN;
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+\endcode
|
|
|
|
+
|
|
|
|
+We here again have no idea what is main memory or GPU memory, or even if the
|
|
|
|
+copy is synchronous or asynchronous: we just call starpu_interface_copy()
|
|
|
|
+according to the interface, passing it the pointers, and checking whether it
|
|
|
|
+returned -EAGAIN, which means the copy is asynchronous, and StarPU will
|
|
|
|
+appropriately wait for it thanks to the \c async_data pointer.
|
|
|
|
+
|
|
|
|
+This copy method is referenced in a \ref starpu_data_copy_methods structure:
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static const struct starpu_data_copy_methods complex_copy_methods =
|
|
|
|
+{
|
|
|
|
+ .any_to_any = copy_any_to_any
|
|
|
|
+};
|
|
|
|
+\endcode
|
|
|
|
+
|
|
|
|
+which was referenced in the \ref starpu_data_interface_ops structure above.
|
|
|
|
+
|
|
|
|
+Other fields of \ref starpu_data_copy_methods allow to provide optimized
|
|
|
|
+variants, notably for the case of 2D or 3D matrix tiles with non-trivial ld.
|
|
|
|
+
|
|
|
|
+\subsection DefiningANewDataInterface_pack Data pack/peek/unpack
|
|
|
|
+
|
|
|
|
+The copy methods allow for RAM/GPU transfers, but is not enough for e.g.
|
|
|
|
+transferring over MPI. That requires defining the pack/peek/unpack methods. The
|
|
|
|
+principle is that the starpu_data_interface_ops::pack_data method concatenates
|
|
|
|
+the buffer data into a newly-allocated contiguous bytes array, conversely
|
|
|
|
+starpu_data_interface_ops::peek_data extracts from a bytes array into the
|
|
|
|
+buffer data, and starpu_data_interface_ops::unpack_data does the same as
|
|
|
|
+starpu_data_interface_ops::peek_data but also frees the bytes array.
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count)
|
|
|
|
+{
|
|
|
|
+ STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
|
|
|
|
+
|
|
|
|
+ struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
|
|
|
|
+ starpu_data_get_interface_on_node(handle, node);
|
|
|
|
+
|
|
|
|
+ *count = complex_get_size(handle);
|
|
|
|
+ if (ptr != NULL)
|
|
|
|
+ {
|
|
|
|
+ char *data;
|
|
|
|
+ data = (void*) starpu_malloc_on_node_flags(node, *count, 0);
|
|
|
|
+ *ptr = data;
|
|
|
|
+ memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double));
|
|
|
|
+ memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+\endcode
|
|
|
|
+
|
|
|
|
+complex_pack_data() first computes the size to be allocated, then allocates it,
|
|
|
|
+and copies over into it the content of the two real and imaginary arrays.
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
|
|
|
|
+{
|
|
|
|
+ char *data = ptr;
|
|
|
|
+ STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
|
|
|
|
+
|
|
|
|
+ struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *)
|
|
|
|
+ starpu_data_get_interface_on_node(handle, node);
|
|
|
|
+
|
|
|
|
+ STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double));
|
|
|
|
+ memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double));
|
|
|
|
+ memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double));
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+\endcode
|
|
|
|
+
|
|
|
|
+complex_peek_data() simply uses memcpy to copy over from the bytes array into the data buffer.
|
|
|
|
+
|
|
|
|
+\code{.c}
|
|
|
|
+static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count)
|
|
|
|
+{
|
|
|
|
+ complex_peek_data(handle, node, ptr, count);
|
|
|
|
+
|
|
|
|
+ starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+\encode
|
|
|
|
+
|
|
|
|
+And complex_unpack_data() just calls complex_peek_data() and releases the bytes array.
|
|
|
|
+
|
|
|
|
+
|
|
\section SpecifyingATargetNode Specifying A Target Node For Task Data
|
|
\section SpecifyingATargetNode Specifying A Target Node For Task Data
|
|
|
|
|
|
When executing a task on a GPU for instance, StarPU would normally copy all the
|
|
When executing a task on a GPU for instance, StarPU would normally copy all the
|