16 years ago · ee6e55e4c3
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -173,10 +173,10 @@ configuration is used if the passed argument is @code{NULL}.
 
				 @subsection @code{struct starpu_conf} -- StarPU runtime configuration
			
 
				 
			
 
				 @table @asis
			
 
				-
			
 
				 @item @emph{Description}:
			
 
				 TODO
			
 
				-
			
 
				+@item @emph{Definition}:
			
 
				+TODO
			
 
				 @end table
			
 
				 
			
 
				 
			
@@ -220,6 +220,15 @@ garanteed to be available until this method has been called.
 
				 
			
 
				 @node starpu_task_create
			
 
				 @subsection @code{starpu_task_create} -- Allocate and Initialize a Task
			
 
				+@table @asis
			
 
				+@item @emph{Description}:
			
 
				+TODO
			
 
				+@item @emph{Prototype}:
			
 
				+@code{struct starpu_task *starpu_task_create(void);}
			
 
				+@end table
			
 
				+
			
 
				+
			
 
				+
			
 
				 
			
 
				 @c Callbacks : what can we put in callbacks ?
			
 
				 
			
@@ -239,7 +248,10 @@ garanteed to be available until this method has been called.
 
				 @node starpu_tag_t 
			
 
				 @subsection @code{starpu_tag_t} -- Task identifier
			
 
				 @c mention the tag_id field of the task structure
			
 
				+@table @asis
			
 
				+@item @emph{Definition}:
			
 
				 TODO
			
 
				+@end table
			
 
				 
			
 
				 @node starpu_tag_declare_deps
			
 
				 @subsection @code{starpu_tag_declare_deps} -- Declare the Dependencies of a Tag
			
@@ -313,10 +325,10 @@ The Makefile could for instance contain the following lines to define which
 
				 options must be given to the compiler and to the linker:
			
 
				 
			
 
				 @example
			
 
				-@cartouche
			
 
				+@c @cartouche
			
 
				 CFLAGS+=$$(pkg-config --cflags libstarpu)
			
 
				 LIBS+=$$(pkg-config --libs libstarpu)
			
 
				-@end cartouche
			
 
				+@c @end cartouche
			
 
				 @end example
			
 
				 
			
 
				 @section Hello World
			
@@ -328,16 +340,16 @@ In this section, we show how to implement a simple program that submits a task t
 
				 The @code{starpu.h} header should be included in any code using StarPU.
			
 
				 
			
 
				 @example 
			
 
				-@cartouche
			
 
				+@c @cartouche
			
 
				 #include <starpu.h>
			
 
				-@end cartouche
			
 
				+@c @end cartouche
			
 
				 @end example
			
 
				 
			
 
				 
			
 
				 @subsection Defining a Codelet
			
 
				 
			
 
				 @example
			
 
				-@cartouche
			
 
				+@c @cartouche
			
 
				 void cpu_func(starpu_data_interface_t *buffers, void *func_arg)
			
 
				 @{
			
 
				     float *array = func_arg;
			
@@ -351,7 +363,7 @@ starpu_codelet cl =
 
				     .core_func = cpu_func,
			
 
				     .nbuffers = 0
			
 
				 @};
			
 
				-@end cartouche
			
 
				+@c @end cartouche
			
 
				 @end example
			
 
				 
			
 
				 A codelet is a structure that represents a computational kernel. Such a codelet
			
@@ -388,7 +400,7 @@ cannot be used as a synchronization medium.
 
				 @subsection Submitting a Task
			
 
				 
			
 
				 @example
			
 
				-@cartouche
			
 
				+@c @cartouche
			
 
				 void callback_func(void *callback_arg)
			
 
				 @{
			
 
				     printf("Callback function (arg %x)\n", callback_arg);
			
@@ -421,7 +433,7 @@ int main(int argc, char **argv)
 
				 
			
 
				     return 0;
			
 
				 @}
			
 
				-@end cartouche
			
 
				+@c @end cartouche
			
 
				 @end example
			
 
				 
			
 
				 Before submitting any tasks to StarPU, @code{starpu_init} must be called. The
			
@@ -461,11 +473,90 @@ synchronous: the @code{starpu_submit_task} function will not return until the
 
				 task was executed. Note that the @code{starpu_shutdown} method does not
			
 
				 guaranty that asynchronous tasks have been executed before it returns.
			
 
				 
			
 
				-@section Scaling a Vector
			
 
				+@section Manipulating Data: Scaling a Vector
			
 
				 
			
 
				-In this example, we show how data can be manipulated within StarPU tasks.
			
 
				+The previous example has shown how to submit tasks, in this section we show how
			
 
				+StarPU tasks can manipulate data.
			
 
				 
			
 
				-TODO
			
 
				+Programmers can describe the data layout of their application so that StarPU is
			
 
				+responsible for enforcing data coherency and availability accross the machine.
			
 
				+Instead of handling complex (and non-portable) mechanisms to perform data
			
 
				+movements, programmers only declare which piece of data is accessed and/or
			
 
				+modify by a task, and StarPU makes sure that when a computational kernel starts
			
 
				+somewhere (eg. on a GPU), its data are available locally.
			
 
				+
			
 
				+Before submitting those tasks, the programmer first need to declare the
			
 
				+different piece of data to StarPU using the @code{starpu_monitor_*_data}
			
 
				+functions. To ease the development of applications for StarPU, it is possible
			
 
				+to describe multiple types of data layout. A type of data layout is called an
			
 
				+@b{interface}. By default, there are different interfaces available in StarPU:
			
 
				+here we will consider the @b{vector interface}. 
			
 
				+
			
 
				+The following lines show how to declare an array of @code{n} elements of type
			
 
				+@code{float} using the vector interface:
			
 
				+@example
			
 
				+float tab[n];
			
 
				+
			
 
				+starpu_data_handle tab_handle;
			
 
				+starpu_monitor_vector_data(&tab_handle, 0, tab, n, sizeof(float));
			
 
				+@end example
			
 
				+
			
 
				+The first argument, called the @b{data handle} is an opaque pointer which
			
 
				+designates the array in StarPU. This is also the structure which is used to
			
 
				+describe which data is used by a task. It is possible to construct a StarPU
			
 
				+task that multiplies this vector by a constant factor:
			
 
				+@example
			
 
				+float factor;
			
 
				+struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+task->cl = &cl;
			
 
				+
			
 
				+task->cl_arg = &factor;
			
 
				+task->cl_arg_size = sizeof(float);
			
 
				+
			
 
				+task->buffers[0].state = &tab_handle;
			
 
				+task->buffers[0].mode = RW;
			
 
				+@end example
			
 
				+
			
 
				+The constant factor can be passed as a simple parameter of the task, but the
			
 
				+size and the content of the vector is not known in advance so that we describe
			
 
				+the vector by its handle. There are two fields in each element @code{buffers} array.
			
 
				+@code{.state} is the handle of the data, and @code{.mode} specifies how the
			
 
				+kernel will access the data (@code{R} for read-only, @code{W} for write-only
			
 
				+and @code{RW} for read and write access).
			
 
				+
			
 
				+@example
			
 
				+void scal_func(starpu_data_interface_t *buffers, void *arg)
			
 
				+@{
			
 
				+    unsigned i;
			
 
				+    float *factor = arg;
			
 
				+
			
 
				+    /* length of the vector */
			
 
				+    unsigned n = buffers[0].vector.nx;
			
 
				+    /* local copy of the vector */
			
 
				+    float *val = (float *)buffers[0].vector.ptr;
			
 
				+
			
 
				+    for (i = 0; i < n; i++)
			
 
				+        val[0] *= *factor;
			
 
				+@}
			
 
				+
			
 
				+starpu_codelet cl = @{
			
 
				+    .where = CORE,
			
 
				+    .core_func = scal_func,
			
 
				+    .nbuffers = 1
			
 
				+@};
			
 
				+@end example
			
 
				+
			
 
				+The @code{.nbuffers} field of the codelet structure specifies that there is
			
 
				+only one piece of data that is handled by the codelet. The second argument of
			
 
				+the @code{scal_func} function contains a pointer to the parameters of the
			
 
				+codelet (given in @code{task->cl_arg}), so the we read the constant factor from
			
 
				+this pointer. The first argument is an array that gives a description of every
			
 
				+buffers passed in the @code{task->buffers}@ array. In the @b{vector interface},
			
 
				+the location of the vector (resp. its length) is accessible in the
			
 
				+@code{.vector.ptr} (resp. @code{.vector.nx}) of this array. Since the vector is
			
 
				+accessed in a read-write fashion, any modification will automatically affect
			
 
				+future accesses to that vector.
			
 
				 
			
 
				 @section Vector Scaling on an Hybrid CPU/GPU Machine