15 years ago · 81325c6de0
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -1621,9 +1621,9 @@ In this section, we show how to implement a simple program that submits a task t
 
																 The @code{starpu.h} header should be included in any code using StarPU.
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 #include <starpu.h>
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
@@ -1631,7 +1631,7 @@ The @code{starpu.h} header should be included in any code using StarPU.
 
																 @subsection Defining a Codelet
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 void cpu_func(void *buffers[], void *cl_arg)
															
 
																 @{
															
 
																     float *array = cl_arg;
															
@@ -1645,7 +1645,7 @@ starpu_codelet cl =
 
																     .cpu_func = cpu_func,
															
 
																     .nbuffers = 0
															
 
																 @};
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 A codelet is a structure that represents a computational kernel. Such a codelet
															
@@ -1686,7 +1686,7 @@ cannot be used as a synchronization medium.
 
																 @subsection Submitting a Task
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 void callback_func(void *callback_arg)
															
 
																 @{
															
 
																     printf("Callback function (arg %x)\n", callback_arg);
															
@@ -1719,7 +1719,7 @@ int main(int argc, char **argv)
 
																     return 0;
															
 
																 @}
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 Before submitting any tasks to StarPU, @code{starpu_init} must be called. The
															
@@ -1761,13 +1761,13 @@ guarantee that asynchronous tasks have been executed before it returns.
 
																 @node Execution of Hello World
															
 
																 @subsection Execution of Hello World
															
 
																-@example
															
 
																+@smallexample
															
 
																 % make helloWorld
															
 
																 cc $(pkg-config --cflags libstarpu)  $(pkg-config --libs libstarpu) helloWorld.c -o helloWorld
															
 
																 % ./helloWorld
															
 
																 Hello world (array = @{1.000000, -1.000000@} )
															
 
																 Callback function (arg 42)
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @node Scaling a Vector
															
 
																 @section Manipulating Data: Scaling a Vector
															
@@ -1802,13 +1802,13 @@ The following lines show how to declare an array of @code{NX} elements of type
 
																 @code{float} using the vector interface:
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 float vector[NX];
															
 
																 starpu_data_handle vector_handle;
															
 
																 starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX,
															
 
																                             sizeof(float));
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 The first argument, called the @b{data handle}, is an opaque pointer which
															
@@ -1821,7 +1821,7 @@ It is possible to construct a StarPU task that will manipulate the
 
																 vector and a constant factor.
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 float factor = 3.14;
															
 
																 struct starpu_task *task = starpu_task_create();
															
@@ -1833,7 +1833,7 @@ task->cl_arg_size = sizeof(float);
 
																 task->synchronous = 1;
															
 
																 starpu_task_submit(task);
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 Since the factor is constant, it does not need a preliminary declaration, and
															
@@ -1847,7 +1847,7 @@ write-only and @code{STARPU_RW} for read and write access).
 
																 The definition of the codelet can be written as follows:
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 void scal_func(void *buffers[], void *cl_arg)
															
 
																 @{
															
 
																     unsigned i;
															
@@ -1869,7 +1869,7 @@ starpu_codelet cl = @{
 
																     .cpu_func = scal_func,
															
 
																     .nbuffers = 1
															
 
																 @};
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 The second argument of the @code{scal_func} function contains a pointer to the
															
@@ -1887,12 +1887,12 @@ to this vector made by other tasks.
 
																 @node Execution of Vector Scaling
															
 
																 @subsection Execution of Vector Scaling
															
 
																-@example
															
 
																+@smallexample
															
 
																 % make vector
															
 
																 cc $(pkg-config --cflags libstarpu)  $(pkg-config --libs libstarpu)  vector.c   -o vector
															
 
																 % ./vector
															
 
																 0.000000 3.000000 6.000000 9.000000 12.000000
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @node Vector Scaling on an Hybrid CPU/GPU Machine
															
 
																 @section Vector Scaling on an Hybrid CPU/GPU Machine
															
@@ -1901,20 +1901,21 @@ Contrary to the previous examples, the task submitted in this example may not
 
																 only be executed by the CPUs, but also by a CUDA device.
															
 
																 @menu
															
 
																-* Source code of Hybrid Vector Scaling::  
															
 
																-* Compilation and execution of Hybrid Vector Scaling::  
															
 
																+* Definition of the CUDA Codelet::  
															
 
																 * Definition of the OpenCL Codelet::  
															
 
																+* Definition of the Main Code::  
															
 
																+* Compilation and execution of Hybrid Vector Scaling::  
															
 
																 @end menu
															
 
																-@node Source code of Hybrid Vector Scaling
															
 
																-@subsection Source code of Hybrid Vector Scaling
															
 
																+@node Definition of the CUDA Codelet
															
 
																+@subsection Definition of the CUDA Codelet
															
 
																 The CUDA implementation can be written as follows. It needs to be
															
 
																 compiled with a CUDA compiler such as nvcc, the NVIDIA CUDA compiler
															
 
																 driver.
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 #include <starpu.h>
															
 
																 static __global__ void vector_mult_cuda(float *val, unsigned n,
															
@@ -1940,9 +1941,78 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args)
 
																     cudaThreadSynchronize();
															
 
																 @}
															
 
																-@end example
															
 
																+@end smallexample
															
 
																+@end cartouche
															
 
																+
															
 
																+@node Definition of the OpenCL Codelet
															
 
																+@subsection Definition of the OpenCL Codelet
															
 
																+
															
 
																+The OpenCL implementation can be written as follows. StarPU provides
															
 
																+tools to compile a OpenCL codelet stored in a file.
															
 
																+
															
 
																+@cartouche
															
 
																+@smallexample
															
 
																+__kernel void vector_mult_opencl(__global float* val, int nx, float factor)
															
 
																+@{
															
 
																+        const int i = get_global_id(0);
															
 
																+        if (i < nx) @{
															
 
																+                val[i] *= factor;
															
 
																+        @}
															
 
																+@}
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																+@cartouche
															
 
																+@smallexample
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+
															
 
																+void scal_opencl_func(void *buffers[], void *_args)
															
 
																+@{
															
 
																+    float *factor = (float *)_args;
															
 
																+    struct starpu_vector_interface_s *vector = (struct starpu_vector_interface_s *) buffers[0];
															
 
																+    int id, devid, err;
															
 
																+    cl_kernel kernel;
															
 
																+    cl_command_queue queue;
															
 
																+
															
 
																+    /* length of the vector */
															
 
																+    unsigned n = STARPU_GET_VECTOR_NX(vector);
															
 
																+    /* local copy of the vector pointer */
															
 
																+    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
															
 
																+
															
 
																+    id = starpu_worker_get_id();
															
 
																+    devid = starpu_worker_get_devid(id);
															
 
																+
															
 
																+    err = starpu_opencl_load_kernel(&kernel, &queue,
															
 
																+                    "examples/basic_examples/vector_scal_opencl_codelet.cl",
															
 
																+                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */
															
 
																+    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+    err = 0;
															
 
																+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);
															
 
																+    err = clSetKernelArg(kernel, 1, sizeof(int), &n);
															
 
																+    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);
															
 
																+    if (err) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+    @{
															
 
																+        size_t global=1;
															
 
																+        size_t local=1;
															
 
																+        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
															
 
																+        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+    @}
															
 
																+
															
 
																+    clFinish(queue);
															
 
																+
															
 
																+    starpu_opencl_release(kernel);
															
 
																+@}
															
 
																+@end smallexample
															
 
																+@end cartouche
															
 
																+
															
 
																+
															
 
																+@node Definition of the Main Code
															
 
																+@subsection Definition of the Main Code
															
 
																+
															
 
																+
															
 
																 The CPU implementation is the same as in the previous section.
															
 
																 Here is the source of the main application. You can notice the value of the
															
@@ -1951,7 +2021,7 @@ field @code{where} for the codelet. We specify
 
																 can be executed either on a CPU or on a CUDA device.
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 #include <starpu.h>
															
 
																 #define NX 5
															
@@ -1981,11 +2051,11 @@ int main(int argc, char **argv)
 
																     vector = (float*)malloc(NX*sizeof(float));
															
 
																     assert(vector);
															
 
																     for(i=0 ; i<NX ; i++) vector[i] = i;
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																     /* @b{Registering data within StarPU} */
															
 
																     starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
															
 
																                                 NX, sizeof(float));
															
@@ -1997,11 +2067,11 @@ int main(int argc, char **argv)
 
																     task->buffers[0].mode = STARPU_RW;
															
 
																     task->cl_arg = &factor;
															
 
																     task->cl_arg_size = sizeof(float);
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																     /* @b{Submitting the task} */
															
 
																     ret = starpu_task_submit(task);
															
 
																     if (ret == -ENODEV) @{
															
@@ -2014,11 +2084,11 @@ int main(int argc, char **argv)
 
																     /* @b{Update the vector in RAM} */
															
 
																     starpu_data_sync_with_mem(vector_handle, STARPU_R);
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																     /* @b{Access the data} */
															
 
																     for(i=0 ; i<NX; i++) @{
															
 
																       fprintf(stderr, "%f ", vector[i]);
															
@@ -2031,7 +2101,7 @@ int main(int argc, char **argv)
 
																     return 0;
															
 
																 @}
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																 @node Compilation and execution of Hybrid Vector Scaling
															
@@ -2041,7 +2111,7 @@ The Makefile given at the beginning of the section must be extended to
 
																 give the rules to compile the CUDA source code.
															
 
																 @cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 CFLAGS	+=	$(shell pkg-config --cflags libstarpu)
															
 
																 LDFLAGS	+=	$(shell pkg-config --libs libstarpu)
															
 
																 CC	=	gcc
															
@@ -2053,36 +2123,33 @@ vector: vector.o vector_cpu.o vector_cuda.o
 
																 clean:
															
 
																        rm -f vector *.o
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 @end cartouche
															
 
																-@example
															
 
																+@smallexample
															
 
																 % make
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 and to execute it, with the default configuration:
															
 
																-@example
															
 
																+@smallexample
															
 
																 % ./vector
															
 
																 0.000000 3.000000 6.000000 9.000000 12.000000
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 or for example, by disabling CPU devices:
															
 
																-@example
															
 
																+@smallexample
															
 
																 % STARPU_NCPUS=0 ./vector
															
 
																 0.000000 3.000000 6.000000 9.000000 12.000000
															
 
																-@end example
															
 
																+@end smallexample
															
 
																 or by disabling CUDA devices:
															
 
																-@example
															
 
																+@smallexample
															
 
																 % STARPU_NCUDA=0 ./vector
															
 
																 0.000000 3.000000 6.000000 9.000000 12.000000
															
 
																-@end example
															
 
																-
															
 
																-@node Definition of the OpenCL Codelet
															
 
																-@subsection Definition of the OpenCL Codelet
															
 
																+@end smallexample
															
 
																 @c TODO: Add performance model example (and update basic_examples)
															
--- a/doc/vector_scal_opencl.texi
+++ b/doc/vector_scal_opencl.texi
@@ -39,15 +39,15 @@ void scal_opencl_func(void *buffers[], void *_args)
 
																     err = starpu_opencl_load_kernel(&kernel, &queue,
															
 
																                     "examples/basic_examples/vector_scal_opencl_codelet.cl",
															
 
																-                    "vectorScal", devid);
															
 
																+                    "vector_mult_opencl", devid);
															
 
																     if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																     err = 0;
															
 
																     err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);
															
 
																     err = clSetKernelArg(kernel, 1, sizeof(int), &n);
															
 
																     err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);
															
 
																-
															
 
																     if (err) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																     @{
															
 
																         size_t global=1;
															
 
																         size_t local=1;
															
--- a/doc/vector_scal_opencl_codelet.texi
+++ b/doc/vector_scal_opencl_codelet.texi
@@ -14,7 +14,7 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																-__kernel void vectorScal(__global float* val, int nx, float factor)
															
 
																+__kernel void vector_mult_opencl(__global float* val, int nx, float factor)
															
 
																 @{
															
 
																         const int i = get_global_id(0);
															
 
																         if (i < nx) @{