|
|
@@ -1554,10 +1554,113 @@ to that vector made by other tasks.
|
|
|
@node Scaling a Vector (hybrid)
|
|
|
@section Vector Scaling on an Hybrid CPU/GPU Machine
|
|
|
|
|
|
-Contrary to the previous examples, the task submitted in the example may not
|
|
|
+Contrary to the previous examples, the task submitted in this example may not
|
|
|
only be executed by the CPUs, but also by a CUDA device.
|
|
|
|
|
|
-TODO
|
|
|
+@example
|
|
|
+#include <starpu.h>
|
|
|
+
|
|
|
+static __global__ void vector_mult_cuda(float *vector, int nx, float *multiplier)
|
|
|
+@{
|
|
|
+ int i;
|
|
|
+ for(i=0 ; i<nx ; i++) vector[i] *= *multiplier;
|
|
|
+@}
|
|
|
+
|
|
|
+extern "C" void cuda_codelet(void *descr[], void *_args)
|
|
|
+@{
|
|
|
+ float *vector = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
|
|
|
+ int nx = STARPU_GET_VECTOR_NX(descr[0]);
|
|
|
+ float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
|
|
|
+
|
|
|
+ vector_mult_cuda<<<1,1>>>(vector, nx, multiplier);
|
|
|
+@}
|
|
|
+@end example
|
|
|
+
|
|
|
+@example
|
|
|
+#include <starpu.h>
|
|
|
+
|
|
|
+void cpu_codelet(void *descr[], void *_args)
|
|
|
+@{
|
|
|
+ float *vector = (float *)STARPU_GET_VECTOR_PTR(descr[0]);
|
|
|
+ int nx = (int)STARPU_GET_VECTOR_NX(descr[0]);
|
|
|
+ float *multiplier = (float *)STARPU_GET_VARIABLE_PTR(descr[1]);
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for(i=0 ; i<nx ; i++) vector[i] *= *multiplier;
|
|
|
+@}
|
|
|
+@end example
|
|
|
+
|
|
|
+@example
|
|
|
+#include <starpu.h>
|
|
|
+
|
|
|
+#define NX 10
|
|
|
+
|
|
|
+extern void cuda_codelet(void *descr[], void *_args);
|
|
|
+extern void cpu_codelet(void *descr[], void *_args);
|
|
|
+
|
|
|
+int main(int argc, char **argv)
|
|
|
+@{
|
|
|
+ float *vector;
|
|
|
+ int i, ret;
|
|
|
+ float multiplier=3.0;
|
|
|
+ starpu_codelet cl;
|
|
|
+ struct starpu_task *task;
|
|
|
+ starpu_data_handle vector_handle;
|
|
|
+ starpu_data_handle multiplier_handle;
|
|
|
+
|
|
|
+ starpu_init(NULL); /* @b{Initialising StarPU} */
|
|
|
+
|
|
|
+ vector = (float*)malloc(NX*sizeof(float));
|
|
|
+ assert(vector);
|
|
|
+ for(i=0 ; i<NX ; i++) vector[i] = i;
|
|
|
+
|
|
|
+ /* @b{Registering data within StarPU} */
|
|
|
+ starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector, NX, sizeof(float));
|
|
|
+ starpu_register_variable_data(&multiplier_handle, 0, (uintptr_t)&multiplier, sizeof(float));
|
|
|
+
|
|
|
+ /* @b{Definition of the codelet} */
|
|
|
+ cl.where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU or on CUDA device} */
|
|
|
+ cl.cuda_func = cuda_codelet;
|
|
|
+ cl.cpu_func = cpu_codelet;
|
|
|
+ cl.nbuffers = 2;
|
|
|
+ cl.model = NULL;
|
|
|
+
|
|
|
+ /* @b{Definition of the task} */
|
|
|
+ task = starpu_task_create();
|
|
|
+ task->cl = &cl;
|
|
|
+ task->callback_func = NULL;
|
|
|
+ task->buffers[0].handle = vector_handle;
|
|
|
+ task->buffers[0].mode = STARPU_RW;
|
|
|
+ task->buffers[1].handle = multiplier_handle;
|
|
|
+ task->buffers[1].mode = STARPU_RW;
|
|
|
+
|
|
|
+ /* @b{Submitting the task} */
|
|
|
+ ret = starpu_task_submit(task);
|
|
|
+ if (ret == -ENODEV) @{
|
|
|
+ fprintf(stderr, "No worker may execute this task\n");
|
|
|
+ return 1;
|
|
|
+ @}
|
|
|
+
|
|
|
+ /* @b{Waiting for its termination} */
|
|
|
+ starpu_task_wait_for_all();
|
|
|
+
|
|
|
+ /* @b{Update the vector in RAM} */
|
|
|
+ starpu_data_sync_with_mem(vector_handle, STARPU_R);
|
|
|
+
|
|
|
+ /* @b{Access the data} */
|
|
|
+ for(i=0 ; i<NX; i++) @{
|
|
|
+ fprintf(stderr, "%f ", vector[i]);
|
|
|
+ @}
|
|
|
+ fprintf(stderr, "\n");
|
|
|
+
|
|
|
+ /* @b{Release the data and shutdown StarPU} */
|
|
|
+ starpu_data_release_from_mem(vector_handle);
|
|
|
+ starpu_shutdown();
|
|
|
+
|
|
|
+ return 0;
|
|
|
+@}
|
|
|
+@end example
|
|
|
+
|
|
|
|
|
|
@c ---------------------------------------------------------------------
|
|
|
@c Advanced Topics
|