15 years ago · 89dc648048
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -1631,6 +1631,9 @@ to this vector made by other tasks.
 
				 @node Scaling a Vector (hybrid)
			
 
				 @section Vector Scaling on an Hybrid CPU/GPU Machine
			
 
				 
			
 
				+Contrary to the previous examples, the task submitted in this example may not
			
 
				+only be executed by the CPUs, but also by a CUDA device.
			
 
				+
			
 
				 @menu
			
 
				 * Source code::                  Source of the StarPU application
			
 
				 * Compilation and execution::    Executing the StarPU application
			
@@ -1639,9 +1642,6 @@ to this vector made by other tasks.
 
				 @node Source code
			
 
				 @subsection Source code
			
 
				 
			
 
				-Contrary to the previous examples, the task submitted in this example may not
			
 
				-only be executed by the CPUs, but also by a CUDA device.
			
 
				-
			
 
				 The CUDA implementation can be written as follows. It needs to be
			
 
				 compiled with a CUDA compiler such as nvcc, the NVIDIA CUDA compiler
			
 
				 driver.
			
@@ -1691,10 +1691,11 @@ field @code{where} for the codelet. We specify
 
				 @code{STARPU_CPU|STARPU_CUDA} to indicate to StarPU that the codelet
			
 
				 can be executed either on a CPU or on a CUDA device.
			
 
				 
			
 
				+@cartouche
			
 
				 @example
			
 
				 #include <starpu.h>
			
 
				 
			
 
				-#define NX 10
			
 
				+#define NX 5
			
 
				 
			
 
				 extern void cuda_codelet(void *descr[], void *_args);
			
 
				 extern void cpu_codelet(void *descr[], void *_args);
			
@@ -1714,15 +1715,20 @@ int main(int argc, char **argv)
 
				         vector = (float*)malloc(NX*sizeof(float));
			
 
				         assert(vector);
			
 
				         for(i=0 ; i<NX ; i++) vector[i] = i;
			
 
				+@end example
			
 
				+@end cartouche
			
 
				 
			
 
				+@cartouche
			
 
				+@example
			
 
				         /* @b{Registering data within StarPU} */
			
 
				         starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
			
 
				                                     NX, sizeof(float));
			
 
				-        starpu_variable_data_register(&multiplier_handle, 0, (uintptr_t)&multiplier,
			
 
				-                                      sizeof(float));
			
 
				+        starpu_variable_data_register(&multiplier_handle, 0,
			
 
				+                                      (uintptr_t)&multiplier, sizeof(float));
			
 
				 
			
 
				         /* @b{Definition of the codelet} */
			
 
				-        cl.where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU or on CUDA device} */
			
 
				+        cl.where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU} */
			
 
				+                                           /* @b{or on a CUDA device} */
			
 
				         cl.cuda_func = cuda_codelet;
			
 
				         cl.cpu_func = cpu_codelet;
			
 
				         cl.nbuffers = 2;
			
@@ -1736,7 +1742,11 @@ int main(int argc, char **argv)
 
				         task->buffers[0].mode = STARPU_RW;
			
 
				         task->buffers[1].handle = multiplier_handle;
			
 
				         task->buffers[1].mode = STARPU_RW;
			
 
				+@end example
			
 
				+@end cartouche
			
 
				 
			
 
				+@cartouche
			
 
				+@example
			
 
				         /* @b{Submitting the task} */
			
 
				         ret = starpu_task_submit(task);
			
 
				         if (ret == -ENODEV) @{
			
@@ -1749,7 +1759,11 @@ int main(int argc, char **argv)
 
				 
			
 
				         /* @b{Update the vector in RAM} */
			
 
				         starpu_data_sync_with_mem(vector_handle, STARPU_R);
			
 
				+@end example
			
 
				+@end cartouche
			
 
				 
			
 
				+@cartouche
			
 
				+@example
			
 
				         /* @b{Access the data} */
			
 
				         for(i=0 ; i<NX; i++) @{
			
 
				           fprintf(stderr, "%f ", vector[i]);
			
@@ -1763,6 +1777,7 @@ int main(int argc, char **argv)
 
				         return 0;
			
 
				 @}
			
 
				 @end example
			
 
				+@end cartouche
			
 
				 
			
 
				 @node Compilation and execution
			
 
				 @subsection Compilation and execution
			
@@ -1805,21 +1820,21 @@ and to execute it, with the default configuration:
 
				 
			
 
				 @example
			
 
				 $ ./vector
			
 
				-0.000000 3.000000 6.000000 9.000000 12.000000 15.000000 18.000000 21.000000 24.000000 27.000000
			
 
				+0.000000 3.000000 6.000000 9.000000 12.000000
			
 
				 @end example
			
 
				 
			
 
				 or for example, by disabling CPU devices:
			
 
				 
			
 
				 @example
			
 
				 $ STARPU_NCPUS=0 ./vector
			
 
				-0.000000 3.000000 6.000000 9.000000 12.000000 15.000000 18.000000 21.000000 24.000000 27.000000
			
 
				+0.000000 3.000000 6.000000 9.000000 12.000000
			
 
				 @end example
			
 
				 
			
 
				 or by disabling CUDA devices:
			
 
				 
			
 
				 @example
			
 
				 $ STARPU_NCUDA=0 ./vector
			
 
				-0.000000 3.000000 6.000000 9.000000 12.000000 15.000000 18.000000 21.000000 24.000000 27.000000
			
 
				+0.000000 3.000000 6.000000 9.000000 12.000000
			
 
				 @end example