|
@@ -54,6 +54,13 @@ starpu_data_handle_t vector_handle;
|
|
|
starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0]));
|
|
|
\endcode
|
|
|
|
|
|
+Vectors can be partitioned into pieces by using
|
|
|
+starpu_vector_filter_block(). They can also be partitioned with some overlapping
|
|
|
+by using starpu_vector_filter_block_shadow(). By default StarPU
|
|
|
+uses the same size for each piece. If different sizes are desired,
|
|
|
+starpu_vector_filter_list() or starpu_vector_filter_list_long() can be used
|
|
|
+instead. To just divide in two pieces, starpu_vector_filter_divide_in_2() can be used.
|
|
|
+
|
|
|
\subsection MatrixDataInterface Matrix Data Interface
|
|
|
|
|
|
To register 2-D matrices with a potential padding, one can use the
|
|
@@ -67,9 +74,15 @@ matrix = (float*)malloc(width * height * sizeof(float));
|
|
|
starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float));
|
|
|
\endcode
|
|
|
|
|
|
+2D matrices can be partitioned into 2D matrices along the x dimension by
|
|
|
+using starpu_matrix_filter_block(), and along the y dimension by using
|
|
|
+starpu_matrix_filter_vertical_block(). They can also be partitioned
|
|
|
+with some overlapping by using starpu_matrix_filter_block_shadow() and
|
|
|
+starpu_matrix_filter_vertical_block_shadow().
|
|
|
+
|
|
|
\subsection BlockDataInterface Block Data Interface
|
|
|
|
|
|
-To register 3-D blocks with potential paddings on Y and Z dimensions,
|
|
|
+To register 3-D matrices with potential paddings on Y and Z dimensions,
|
|
|
one can use the block data interface. Here an example of how to
|
|
|
register a block data to StarPU by using starpu_block_data_register().
|
|
|
|
|
@@ -80,6 +93,14 @@ block = (float*)malloc(nx*ny*nz*sizeof(float));
|
|
|
starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float));
|
|
|
\endcode
|
|
|
|
|
|
+3D matrices can be partitioned along the x dimension by
|
|
|
+using starpu_block_filter_block(), or along the y dimension
|
|
|
+by using starpu_block_filter_vertical_block, or along the
|
|
|
+z dimension by using starpu_block_filter_depth_block. They
|
|
|
+can also be partitioned with some overlapping by using
|
|
|
+starpu_block_filter_block_shadow(), starpu_block_filter_vertical_block_shadow(),
|
|
|
+or starpu_block_filter_depth_block_shadow().
|
|
|
+
|
|
|
\subsection BCSRDataInterface BCSR Data Interface
|
|
|
|
|
|
BCSR (Blocked Compressed Sparse Row Representation) sparse matrix data
|
|
@@ -147,10 +168,16 @@ starpu_bcsr_data_register(&bcsr_handle,
|
|
|
StarPU provides an example on how to deal with such matrices in
|
|
|
<c>examples/spmv</c>.
|
|
|
|
|
|
+BCSR data handles can be partitioned into its dense matrix blocks by using
|
|
|
+starpu_bcsr_filter_canonical_block().
|
|
|
+
|
|
|
\subsection CSRDataInterface CSR Data Interface
|
|
|
|
|
|
TODO
|
|
|
|
|
|
+CSR data handles can be partitioned into vertical CSR matrices by using
|
|
|
+starpu_csr_filter_vertical_block().
|
|
|
+
|
|
|
\subsection VariableSizeDataInterface Data Interface with Variable Size
|
|
|
|
|
|
Tasks are actually allowed to change the size of data interfaces.
|
|
@@ -763,7 +790,11 @@ A full example may be found in <c>examples/basic_examples/multiformat.c</c>.
|
|
|
|
|
|
\section DefiningANewDataInterface Defining A New Data Interface
|
|
|
|
|
|
-Let's define a new data interface to manage complex numbers.
|
|
|
+This section proposes an example how to define your own interface, when the
|
|
|
+StarPU-provided interface do not fit your needs. Here we take a dumb example of
|
|
|
+an array of complex numbers represented by two arrays of double values.
|
|
|
+
|
|
|
+Let's thus define a new data interface to manage arrays of complex numbers:
|
|
|
|
|
|
\code{.c}
|
|
|
/* interface for complex numbers */
|
|
@@ -775,6 +806,15 @@ struct starpu_complex_interface
|
|
|
};
|
|
|
\endcode
|
|
|
|
|
|
+That structure stores enough to describe <b>one</b> buffer of such kind of
|
|
|
+data. It is used for the buffer stored in the main memory, another instance
|
|
|
+is used for the buffer stored in a GPU, etc. A <i>data handle</i> is thus a
|
|
|
+collection of such structures, to remember each buffer on each memory node.
|
|
|
+
|
|
|
+Note: one should not take pointers into such structures, because StarPU needs
|
|
|
+to be able to copy over the content of it to various places, for instance to
|
|
|
+efficiently migrate a data buffer from one data handle to another data handle.
|
|
|
+
|
|
|
Registering such a data to StarPU is easily done using the function
|
|
|
starpu_data_register(). The last
|
|
|
parameter of the function, <c>interface_complex_ops</c>, will be
|
|
@@ -800,12 +840,41 @@ void starpu_complex_data_register(starpu_data_handle_t *handle,
|
|
|
}
|
|
|
\endcode
|
|
|
|
|
|
-The <c>starpu_complex_interface</c> structure is here used just to store the
|
|
|
+The <c>struct starpu_complex_interface complex</c> is here used just to store the
|
|
|
parameters that the user provided to <c>starpu_complex_data_register</c>.
|
|
|
starpu_data_register() will first allocate the handle, and
|
|
|
then pass the <c>starpu_complex_interface</c> structure to the
|
|
|
starpu_data_interface_ops::register_data_handle method, which records them
|
|
|
-within the data handle (it is called once per node by starpu_data_register()).
|
|
|
+within the data handle (it is called once per node by starpu_data_register()):
|
|
|
+
|
|
|
+\code{.c}
|
|
|
+static void complex_register_data_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
|
|
|
+{
|
|
|
+ struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface;
|
|
|
+
|
|
|
+ unsigned node;
|
|
|
+ for (node = 0; node < STARPU_MAXNODES; node++)
|
|
|
+ {
|
|
|
+ struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *)
|
|
|
+ starpu_data_get_interface_on_node(handle, node);
|
|
|
+
|
|
|
+ local_interface->nx = complex_interface->nx;
|
|
|
+ if (node == home_node)
|
|
|
+ {
|
|
|
+ local_interface->real = complex_interface->real;
|
|
|
+ local_interface->imaginary = complex_interface->imaginary;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ local_interface->real = NULL;
|
|
|
+ local_interface->imaginary = NULL;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+\endcode
|
|
|
+
|
|
|
+If the application provided a home node, the corresponding pointers will be
|
|
|
+recorded for that node. Others have no buffer allocated yet.
|
|
|
|
|
|
Different operations need to be defined for a data interface through
|
|
|
the type starpu_data_interface_ops. We only define here the basic
|
|
@@ -932,4 +1001,21 @@ when the kernel does not make so many accesses to the second data, and thus data
|
|
|
being remote e.g. over a PCI bus is not a performance problem, and avoids
|
|
|
filling the fast local memory with data which does not need the performance.
|
|
|
|
|
|
+In cases where the kernel is fine with some data being either local or in the
|
|
|
+main memory, ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU can be used. StarPU will then
|
|
|
+be free to leave the data in the main memory and let the kernel access it from
|
|
|
+accelerators, or to move it to the accelerator before starting the kernel, for
|
|
|
+instance:
|
|
|
+
|
|
|
+\code{.c}
|
|
|
+struct starpu_codelet cl =
|
|
|
+{
|
|
|
+ .cuda_funcs = { kernel },
|
|
|
+ .nbuffers = 2,
|
|
|
+ .modes = {STARPU_RW, STARPU_R},
|
|
|
+ .specific_nodes = 1,
|
|
|
+ .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU},
|
|
|
+};
|
|
|
+\endcode
|
|
|
+
|
|
|
*/
|