|
@@ -173,10 +173,10 @@ configuration is used if the passed argument is @code{NULL}.
|
|
|
@subsection @code{struct starpu_conf} -- StarPU runtime configuration
|
|
|
|
|
|
@table @asis
|
|
|
-
|
|
|
@item @emph{Description}:
|
|
|
TODO
|
|
|
-
|
|
|
+@item @emph{Definition}:
|
|
|
+TODO
|
|
|
@end table
|
|
|
|
|
|
|
|
@@ -220,6 +220,15 @@ garanteed to be available until this method has been called.
|
|
|
|
|
|
@node starpu_task_create
|
|
|
@subsection @code{starpu_task_create} -- Allocate and Initialize a Task
|
|
|
+@table @asis
|
|
|
+@item @emph{Description}:
|
|
|
+TODO
|
|
|
+@item @emph{Prototype}:
|
|
|
+@code{struct starpu_task *starpu_task_create(void);}
|
|
|
+@end table
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|
|
|
@c Callbacks : what can we put in callbacks ?
|
|
|
|
|
@@ -239,7 +248,10 @@ garanteed to be available until this method has been called.
|
|
|
@node starpu_tag_t
|
|
|
@subsection @code{starpu_tag_t} -- Task identifier
|
|
|
@c mention the tag_id field of the task structure
|
|
|
+@table @asis
|
|
|
+@item @emph{Definition}:
|
|
|
TODO
|
|
|
+@end table
|
|
|
|
|
|
@node starpu_tag_declare_deps
|
|
|
@subsection @code{starpu_tag_declare_deps} -- Declare the Dependencies of a Tag
|
|
@@ -313,10 +325,10 @@ The Makefile could for instance contain the following lines to define which
|
|
|
options must be given to the compiler and to the linker:
|
|
|
|
|
|
@example
|
|
|
-@cartouche
|
|
|
+@c @cartouche
|
|
|
CFLAGS+=$$(pkg-config --cflags libstarpu)
|
|
|
LIBS+=$$(pkg-config --libs libstarpu)
|
|
|
-@end cartouche
|
|
|
+@c @end cartouche
|
|
|
@end example
|
|
|
|
|
|
@section Hello World
|
|
@@ -328,16 +340,16 @@ In this section, we show how to implement a simple program that submits a task t
|
|
|
The @code{starpu.h} header should be included in any code using StarPU.
|
|
|
|
|
|
@example
|
|
|
-@cartouche
|
|
|
+@c @cartouche
|
|
|
#include <starpu.h>
|
|
|
-@end cartouche
|
|
|
+@c @end cartouche
|
|
|
@end example
|
|
|
|
|
|
|
|
|
@subsection Defining a Codelet
|
|
|
|
|
|
@example
|
|
|
-@cartouche
|
|
|
+@c @cartouche
|
|
|
void cpu_func(starpu_data_interface_t *buffers, void *func_arg)
|
|
|
@{
|
|
|
float *array = func_arg;
|
|
@@ -351,7 +363,7 @@ starpu_codelet cl =
|
|
|
.core_func = cpu_func,
|
|
|
.nbuffers = 0
|
|
|
@};
|
|
|
-@end cartouche
|
|
|
+@c @end cartouche
|
|
|
@end example
|
|
|
|
|
|
A codelet is a structure that represents a computational kernel. Such a codelet
|
|
@@ -388,7 +400,7 @@ cannot be used as a synchronization medium.
|
|
|
@subsection Submitting a Task
|
|
|
|
|
|
@example
|
|
|
-@cartouche
|
|
|
+@c @cartouche
|
|
|
void callback_func(void *callback_arg)
|
|
|
@{
|
|
|
printf("Callback function (arg %x)\n", callback_arg);
|
|
@@ -421,7 +433,7 @@ int main(int argc, char **argv)
|
|
|
|
|
|
return 0;
|
|
|
@}
|
|
|
-@end cartouche
|
|
|
+@c @end cartouche
|
|
|
@end example
|
|
|
|
|
|
Before submitting any tasks to StarPU, @code{starpu_init} must be called. The
|
|
@@ -461,11 +473,90 @@ synchronous: the @code{starpu_submit_task} function will not return until the
|
|
|
task was executed. Note that the @code{starpu_shutdown} method does not
|
|
|
guaranty that asynchronous tasks have been executed before it returns.
|
|
|
|
|
|
-@section Scaling a Vector
|
|
|
+@section Manipulating Data: Scaling a Vector
|
|
|
|
|
|
-In this example, we show how data can be manipulated within StarPU tasks.
|
|
|
+The previous example has shown how to submit tasks, in this section we show how
|
|
|
+StarPU tasks can manipulate data.
|
|
|
|
|
|
-TODO
|
|
|
+Programmers can describe the data layout of their application so that StarPU is
|
|
|
+responsible for enforcing data coherency and availability accross the machine.
|
|
|
+Instead of handling complex (and non-portable) mechanisms to perform data
|
|
|
+movements, programmers only declare which piece of data is accessed and/or
|
|
|
+modify by a task, and StarPU makes sure that when a computational kernel starts
|
|
|
+somewhere (eg. on a GPU), its data are available locally.
|
|
|
+
|
|
|
+Before submitting those tasks, the programmer first need to declare the
|
|
|
+different piece of data to StarPU using the @code{starpu_monitor_*_data}
|
|
|
+functions. To ease the development of applications for StarPU, it is possible
|
|
|
+to describe multiple types of data layout. A type of data layout is called an
|
|
|
+@b{interface}. By default, there are different interfaces available in StarPU:
|
|
|
+here we will consider the @b{vector interface}.
|
|
|
+
|
|
|
+The following lines show how to declare an array of @code{n} elements of type
|
|
|
+@code{float} using the vector interface:
|
|
|
+@example
|
|
|
+float tab[n];
|
|
|
+
|
|
|
+starpu_data_handle tab_handle;
|
|
|
+starpu_monitor_vector_data(&tab_handle, 0, tab, n, sizeof(float));
|
|
|
+@end example
|
|
|
+
|
|
|
+The first argument, called the @b{data handle} is an opaque pointer which
|
|
|
+designates the array in StarPU. This is also the structure which is used to
|
|
|
+describe which data is used by a task. It is possible to construct a StarPU
|
|
|
+task that multiplies this vector by a constant factor:
|
|
|
+@example
|
|
|
+float factor;
|
|
|
+struct starpu_task *task = starpu_task_create();
|
|
|
+
|
|
|
+task->cl = &cl;
|
|
|
+
|
|
|
+task->cl_arg = &factor;
|
|
|
+task->cl_arg_size = sizeof(float);
|
|
|
+
|
|
|
+task->buffers[0].state = &tab_handle;
|
|
|
+task->buffers[0].mode = RW;
|
|
|
+@end example
|
|
|
+
|
|
|
+The constant factor can be passed as a simple parameter of the task, but the
|
|
|
+size and the content of the vector is not known in advance so that we describe
|
|
|
+the vector by its handle. There are two fields in each element @code{buffers} array.
|
|
|
+@code{.state} is the handle of the data, and @code{.mode} specifies how the
|
|
|
+kernel will access the data (@code{R} for read-only, @code{W} for write-only
|
|
|
+and @code{RW} for read and write access).
|
|
|
+
|
|
|
+@example
|
|
|
+void scal_func(starpu_data_interface_t *buffers, void *arg)
|
|
|
+@{
|
|
|
+ unsigned i;
|
|
|
+ float *factor = arg;
|
|
|
+
|
|
|
+ /* length of the vector */
|
|
|
+ unsigned n = buffers[0].vector.nx;
|
|
|
+ /* local copy of the vector */
|
|
|
+ float *val = (float *)buffers[0].vector.ptr;
|
|
|
+
|
|
|
+ for (i = 0; i < n; i++)
|
|
|
+ val[0] *= *factor;
|
|
|
+@}
|
|
|
+
|
|
|
+starpu_codelet cl = @{
|
|
|
+ .where = CORE,
|
|
|
+ .core_func = scal_func,
|
|
|
+ .nbuffers = 1
|
|
|
+@};
|
|
|
+@end example
|
|
|
+
|
|
|
+The @code{.nbuffers} field of the codelet structure specifies that there is
|
|
|
+only one piece of data that is handled by the codelet. The second argument of
|
|
|
+the @code{scal_func} function contains a pointer to the parameters of the
|
|
|
+codelet (given in @code{task->cl_arg}), so the we read the constant factor from
|
|
|
+this pointer. The first argument is an array that gives a description of every
|
|
|
+buffers passed in the @code{task->buffers}@ array. In the @b{vector interface},
|
|
|
+the location of the vector (resp. its length) is accessible in the
|
|
|
+@code{.vector.ptr} (resp. @code{.vector.nx}) of this array. Since the vector is
|
|
|
+accessed in a read-write fashion, any modification will automatically affect
|
|
|
+future accesses to that vector.
|
|
|
|
|
|
@section Vector Scaling on an Hybrid CPU/GPU Machine
|
|
|
|