Просмотр исходного кода

doc: add example with CUDA/CPU implementation

Nathalie Furmento лет назад: 15
Родитель
Сommit
eca8e4ce51
1 измененных файлов с 105 добавлено и 2 удалено
  1. 105 2
      doc/starpu.texi

+ 105 - 2
doc/starpu.texi

@@ -1554,10 +1554,113 @@ to that vector made by other tasks.
 @node Scaling a Vector (hybrid)
 @section Vector Scaling on an Hybrid CPU/GPU Machine
 
-Contrary to the previous examples, the task submitted in the example may not
+Contrary to the previous examples, the task submitted in this example may not
 only be executed by the CPUs, but also by a CUDA device.
 
-TODO
+@example
+#include <starpu.h>
+
+static __global__ void vector_mult_cuda(float *vector, int nx, float *multiplier)
+@{
+        int i;
+        for(i=0 ; i<nx ; i++) vector[i] *= *multiplier;
+@}
+
+extern "C" void cuda_codelet(void *descr[], void *_args)
+@{
+        float *vector = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
+        int nx = STARPU_GET_VECTOR_NX(descr[0]);
+        float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
+
+        vector_mult_cuda<<<1,1>>>(vector, nx, multiplier);
+@}
+@end example
+
+@example
+#include <starpu.h>
+
+void cpu_codelet(void *descr[], void *_args)
+@{
+        float *vector = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
+        int nx = (int)STARPU_GET_VECTOR_NX(descr[0]);
+        float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
+        int i;
+
+        for(i=0 ; i<nx ; i++) vector[i] *= *multiplier;
+@}
+@end example
+
+@example
+#include <starpu.h>
+
+#define NX 10
+
+extern void cuda_codelet(void *descr[], void *_args);
+extern void cpu_codelet(void *descr[], void *_args);
+
+int main(int argc, char **argv)
+@{
+        float *vector;
+        int i, ret;
+        float multiplier=3.0;
+        starpu_codelet cl;
+        struct starpu_task *task;
+        starpu_data_handle vector_handle;
+        starpu_data_handle multiplier_handle;
+
+        starpu_init(NULL);                            /* @b{Initialising StarPU} */
+
+        vector = (float*)malloc(NX*sizeof(float));
+        assert(vector);
+        for(i=0 ; i<NX ; i++) vector[i] = i;
+
+        /* @b{Registering data within StarPU} */
+        starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(float));
+        starpu_register_variable_data(&multiplier_handle, 0, (uintptr_t)&multiplier, sizeof(float));
+
+        /* @b{Definition of the codelet} */
+        cl.where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU or on CUDA device} */
+        cl.cuda_func = cuda_codelet;
+        cl.cpu_func = cpu_codelet;
+        cl.nbuffers = 2;
+        cl.model = NULL;
+
+        /* @b{Definition of the task} */
+        task = starpu_task_create();
+        task->cl = &cl;
+        task->callback_func = NULL;
+        task->buffers[0].handle = vector_handle;
+        task->buffers[0].mode = STARPU_RW;
+        task->buffers[1].handle = multiplier_handle;
+        task->buffers[1].mode = STARPU_RW;
+
+        /* @b{Submitting the task} */
+        ret = starpu_task_submit(task);
+        if (ret == -ENODEV) @{
+                fprintf(stderr, "No worker may execute this task\n");
+                return 1;
+        @}
+
+        /* @b{Waiting for its termination} */
+        starpu_task_wait_for_all();
+
+        /* @b{Update the vector in RAM} */
+        starpu_data_sync_with_mem(vector_handle, STARPU_R);
+
+        /* @b{Access the data} */
+        for(i=0 ; i<NX; i++) @{
+          fprintf(stderr, "%f ", vector[i]);
+        @}
+        fprintf(stderr, "\n");
+
+        /* @b{Release the data and shutdown StarPU} */
+        starpu_data_release_from_mem(vector_handle);
+        starpu_shutdown();
+
+        return 0;
+@}
+@end example
+
 
 @c ---------------------------------------------------------------------
 @c Advanced Topics