15 年之前 · 33a4fe80da
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -1139,15 +1139,14 @@ typically need to be explicitly casted. Using the
 
				 @code{void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);}
			
 
				 
			
 
				 @item @emph{Example}:
			
 
				+@cartouche
			
 
				 @example
			
 
				-@c @cartouche
			
 
				 /*  Tag 0x1 depends on tags 0x32 and 0x52 */
			
 
				 starpu_tag_declare_deps((starpu_tag_t)0x1,
			
 
				         2, (starpu_tag_t)0x32, (starpu_tag_t)0x52);
			
 
				 
			
 
				-@c @end cartouche
			
 
				 @end example
			
 
				-
			
 
				+@end cartouche
			
 
				 
			
 
				 @end table
			
 
				 
			
@@ -1161,11 +1160,13 @@ does not take a variable number of arguments but an array of tags of size
 
				 @item @emph{Prototype}:
			
 
				 @code{void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array);}
			
 
				 @item @emph{Example}:
			
 
				+@cartouche
			
 
				 @example
			
 
				 /*  Tag 0x1 depends on tags 0x32 and 0x52 */
			
 
				 starpu_tag_t tag_array[2] = @{0x32, 0x52@};
			
 
				 starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 
			
 
				 @end table
			
@@ -1291,13 +1292,13 @@ enabled by default. To enable OpenCL, you need either to disable CUDA
 
				 when configuring StarPU:
			
 
				 
			
 
				 @example
			
 
				-./configure --disable-cuda
			
 
				+$ ./configure --disable-cuda
			
 
				 @end example
			
 
				 
			
 
				 or when running applications:
			
 
				 
			
 
				 @example
			
 
				-STARPU_NCUDA=0 ./application
			
 
				+$ STARPU_NCUDA=0 ./application
			
 
				 @end example
			
 
				 
			
 
				 OpenCL will automatically be started on any device not yet used by
			
@@ -1306,7 +1307,7 @@ enable CUDA on 2 devices, and OpenCL on the 2 other devices by doing
 
				 so:
			
 
				 
			
 
				 @example
			
 
				-STARPU_NCUDA=2 ./application
			
 
				+$ STARPU_NCUDA=2 ./application
			
 
				 @end example
			
 
				 
			
 
				 @node Compiling OpenCL codelets
			
@@ -1367,12 +1368,12 @@ instance.
 
				 The Makefile could for instance contain the following lines to define which
			
 
				 options must be given to the compiler and to the linker:
			
 
				 
			
 
				+@cartouche
			
 
				 @example
			
 
				-@c @cartouche
			
 
				 CFLAGS+=$$(pkg-config --cflags libstarpu)
			
 
				 LIBS+=$$(pkg-config --libs libstarpu)
			
 
				-@c @end cartouche
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 @node Hello World
			
 
				 @section Hello World
			
@@ -1383,17 +1384,17 @@ In this section, we show how to implement a simple program that submits a task t
 
				 
			
 
				 The @code{starpu.h} header should be included in any code using StarPU.
			
 
				 
			
 
				-@example 
			
 
				-@c @cartouche
			
 
				+@cartouche
			
 
				+@example
			
 
				 #include <starpu.h>
			
 
				-@c @end cartouche
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 
			
 
				 @subsection Defining a Codelet
			
 
				 
			
 
				+@cartouche
			
 
				 @example
			
 
				-@c @cartouche
			
 
				 void cpu_func(void *buffers[], void *cl_arg)
			
 
				 @{
			
 
				     float *array = cl_arg;
			
@@ -1407,8 +1408,8 @@ starpu_codelet cl =
 
				     .cpu_func = cpu_func,
			
 
				     .nbuffers = 0
			
 
				 @};
			
 
				-@c @end cartouche
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 A codelet is a structure that represents a computational kernel. Such a codelet
			
 
				 may contain an implementation of the same kernel on different architectures
			
@@ -1446,8 +1447,8 @@ cannot be used as a synchronization medium.
 
				 
			
 
				 @subsection Submitting a Task
			
 
				 
			
 
				+@cartouche
			
 
				 @example
			
 
				-@c @cartouche
			
 
				 void callback_func(void *callback_arg)
			
 
				 @{
			
 
				     printf("Callback function (arg %x)\n", callback_arg);
			
@@ -1461,7 +1462,7 @@ int main(int argc, char **argv)
 
				     struct starpu_task *task = starpu_task_create();
			
 
				 
			
 
				     task->cl = &cl;
			
 
				-    
			
 
				+
			
 
				     float *array[2] = @{1.0f, -1.0f@};
			
 
				     task->cl_arg = &array;
			
 
				     task->cl_arg_size = 2*sizeof(float);
			
@@ -1480,8 +1481,8 @@ int main(int argc, char **argv)
 
				 
			
 
				     return 0;
			
 
				 @}
			
 
				-@c @end cartouche
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 Before submitting any tasks to StarPU, @code{starpu_init} must be called. The
			
 
				 @code{NULL} argument specifies that we use default configuration. Tasks cannot
			
@@ -1511,9 +1512,12 @@ While the computational kernel could be offloaded on various architectures, the
 
				 callback function is always executed on a CPU. The @code{callback_arg}
			
 
				 pointer is passed as an argument of the callback. The prototype of a callback
			
 
				 function must be:
			
 
				+
			
 
				+@cartouche
			
 
				 @example
			
 
				 void (*callback_function)(void *);
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 If the @code{synchronous} field is non-null, task submission will be
			
 
				 synchronous: the @code{starpu_task_submit} function will not return until the
			
@@ -1542,12 +1546,15 @@ here we will consider the @b{vector interface}.
 
				 
			
 
				 The following lines show how to declare an array of @code{n} elements of type
			
 
				 @code{float} using the vector interface:
			
 
				+
			
 
				+@cartouche
			
 
				 @example
			
 
				 float tab[n];
			
 
				 
			
 
				 starpu_data_handle tab_handle;
			
 
				 starpu_vector_data_register(&tab_handle, 0, tab, n, sizeof(float));
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 The first argument, called the @b{data handle}, is an opaque pointer which
			
 
				 designates the array in StarPU. This is also the structure which is used to
			
@@ -1557,6 +1564,8 @@ the main memory. Then comes the pointer @code{tab} where the data can be found,
 
				 the number of elements in the vector and the size of each element.
			
 
				 It is possible to construct a StarPU
			
 
				 task that multiplies this vector by a constant factor:
			
 
				+
			
 
				+@cartouche
			
 
				 @example
			
 
				 float factor;
			
 
				 struct starpu_task *task = starpu_task_create();
			
@@ -1569,6 +1578,7 @@ task->buffers[0].mode = STARPU_RW;
 
				 task->cl_arg = &factor;
			
 
				 task->cl_arg_size = sizeof(float);
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 Since the factor is constant, it does not need a preliminary declaration, and
			
 
				 can just be passed through the @code{cl_arg} pointer like in the previous
			
@@ -1580,6 +1590,7 @@ write-only and @code{STARPU_RW} for read and write access).
 
				 
			
 
				 The definition of the codelet can be written as follows:
			
 
				 
			
 
				+@cartouche
			
 
				 @example
			
 
				 void scal_func(void *buffers[], void *cl_arg)
			
 
				 @{
			
@@ -1603,7 +1614,7 @@ starpu_codelet cl = @{
 
				     .nbuffers = 1
			
 
				 @};
			
 
				 @end example
			
 
				-
			
 
				+@end cartouche
			
 
				 
			
 
				 The second argument of the @code{scal_func} function contains a pointer to the
			
 
				 parameters of the codelet (given in @code{task->cl_arg}), so that we read the
			
@@ -1635,10 +1646,12 @@ The CUDA implementation can be written as follows. It needs to be
 
				 compiled with a CUDA compiler such as nvcc, the NVIDIA CUDA compiler
			
 
				 driver.
			
 
				 
			
 
				+@cartouche
			
 
				 @example
			
 
				 #include <starpu.h>
			
 
				 
			
 
				-static __global__ void vector_mult_cuda(float *vector, int nx, float *multiplier)
			
 
				+static __global__ void vector_mult_cuda(float *vector, int nx,
			
 
				+                                        float *multiplier)
			
 
				 @{
			
 
				         int i;
			
 
				         for(i=0 ; i<nx ; i++) vector[i] *= *multiplier;
			
@@ -1653,8 +1666,11 @@ extern "C" void cuda_codelet(void *descr[], void *_args)
 
				         vector_mult_cuda<<<1,1>>>(vector, nx, multiplier);
			
 
				 @}
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 The CPU implementation can be as follows.
			
 
				+
			
 
				+@cartouche
			
 
				 @example
			
 
				 #include <starpu.h>
			
 
				 
			
@@ -1668,6 +1684,7 @@ void cpu_codelet(void *descr[], void *_args)
 
				         for(i=0 ; i<nx ; i++) vector[i] *= *multiplier;
			
 
				 @}
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 Here the source of the application. You can notice the value of the
			
 
				 field @code{where} for the codelet. We specify
			
@@ -1761,7 +1778,8 @@ $ PKG_CONFIG_PATH=$STARPU_DIR/lib/pkgconfig:$PKG_CONFIG_PATH
 
				 $ LD_LIBRARY_PATH=$STARPU_DIR/lib:$LD_LIBRARY_PATH
			
 
				 @end example
			
 
				 
			
 
				-It is then possible the application using the following makefile:
			
 
				+It is then possible to compile the application using the following
			
 
				+makefile:
			
 
				 
			
 
				 @cartouche
			
 
				 @example