|
@@ -2,7 +2,7 @@
|
|
|
|
|
|
@c This file is part of the StarPU Handbook.
|
|
@c This file is part of the StarPU Handbook.
|
|
@c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
|
|
@c Copyright (C) 2009--2011 Universit@'e de Bordeaux 1
|
|
-@c Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
|
|
|
|
|
|
+@c Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
|
|
@c Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
|
|
@c Copyright (C) 2011, 2012 Institut National de Recherche en Informatique et Automatique
|
|
@c See the file starpu.texi for copying conditions.
|
|
@c See the file starpu.texi for copying conditions.
|
|
|
|
|
|
@@ -68,10 +68,10 @@ has a single implementation for CPU:
|
|
|
|
|
|
@cartouche
|
|
@cartouche
|
|
@smallexample
|
|
@smallexample
|
|
-/* Task declaration. */
|
|
|
|
|
|
+/* @b{Task declaration.} */
|
|
static void my_task (int x) __attribute__ ((task));
|
|
static void my_task (int x) __attribute__ ((task));
|
|
|
|
|
|
-/* Definition of the CPU implementation of `my_task'. */
|
|
|
|
|
|
+/* @b{Definition of the CPU implementation of `my_task'.} */
|
|
static void my_task (int x)
|
|
static void my_task (int x)
|
|
@{
|
|
@{
|
|
printf ("Hello, world! With x = %d\n", x);
|
|
printf ("Hello, world! With x = %d\n", x);
|
|
@@ -79,16 +79,16 @@ static void my_task (int x)
|
|
|
|
|
|
int main ()
|
|
int main ()
|
|
@{
|
|
@{
|
|
- /* Initialize StarPU. */
|
|
|
|
|
|
+ /* @b{Initialize StarPU.} */
|
|
#pragma starpu initialize
|
|
#pragma starpu initialize
|
|
|
|
|
|
- /* Do an asynchronous call to `my_task'. */
|
|
|
|
|
|
+ /* @b{Do an asynchronous call to `my_task'.} */
|
|
my_task (42);
|
|
my_task (42);
|
|
|
|
|
|
- /* Wait for the call to complete. */
|
|
|
|
|
|
+ /* @b{Wait for the call to complete.} */
|
|
#pragma starpu wait
|
|
#pragma starpu wait
|
|
|
|
|
|
- /* Terminate. */
|
|
|
|
|
|
+ /* @b{Terminate.} */
|
|
#pragma starpu shutdown
|
|
#pragma starpu shutdown
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
@@ -139,7 +139,8 @@ The @code{starpu.h} header should be included in any code using StarPU.
|
|
|
|
|
|
@cartouche
|
|
@cartouche
|
|
@smallexample
|
|
@smallexample
|
|
-struct params @{
|
|
|
|
|
|
+struct params
|
|
|
|
+@{
|
|
int i;
|
|
int i;
|
|
float f;
|
|
float f;
|
|
@};
|
|
@};
|
|
@@ -328,12 +329,12 @@ has to be defined:
|
|
|
|
|
|
@cartouche
|
|
@cartouche
|
|
@smallexample
|
|
@smallexample
|
|
-/* Declare the `vector_scal' task. */
|
|
|
|
|
|
+/* @b{Declare the `vector_scal' task.} */
|
|
static void vector_scal (unsigned size, float vector[size],
|
|
static void vector_scal (unsigned size, float vector[size],
|
|
float factor)
|
|
float factor)
|
|
__attribute__ ((task));
|
|
__attribute__ ((task));
|
|
|
|
|
|
-/* Define the standard CPU implementation. */
|
|
|
|
|
|
+/* @b{Define the standard CPU implementation.} */
|
|
static void
|
|
static void
|
|
vector_scal (unsigned size, float vector[size], float factor)
|
|
vector_scal (unsigned size, float vector[size], float factor)
|
|
@{
|
|
@{
|
|
@@ -368,7 +369,7 @@ main (void)
|
|
vector_scal (NX, vector, FACTOR);
|
|
vector_scal (NX, vector, FACTOR);
|
|
|
|
|
|
#pragma starpu wait
|
|
#pragma starpu wait
|
|
- @} /* VECTOR is automatically freed here. */
|
|
|
|
|
|
+ @} /* @b{VECTOR is automatically freed here.} */
|
|
|
|
|
|
#pragma starpu shutdown
|
|
#pragma starpu shutdown
|
|
|
|
|
|
@@ -437,7 +438,7 @@ in our C file like this:
|
|
|
|
|
|
@cartouche
|
|
@cartouche
|
|
@smallexample
|
|
@smallexample
|
|
-/* The OpenCL programs, loaded from `main' (see below). */
|
|
|
|
|
|
+/* @b{The OpenCL programs, loaded from 'main' (see below).} */
|
|
static struct starpu_opencl_program cl_programs;
|
|
static struct starpu_opencl_program cl_programs;
|
|
|
|
|
|
static void vector_scal_opencl (unsigned size, float vector[size],
|
|
static void vector_scal_opencl (unsigned size, float vector[size],
|
|
@@ -452,14 +453,14 @@ vector_scal_opencl (unsigned size, float vector[size], float factor)
|
|
cl_command_queue queue;
|
|
cl_command_queue queue;
|
|
cl_event event;
|
|
cl_event event;
|
|
|
|
|
|
- /* VECTOR is GPU memory pointer, not a main memory pointer. */
|
|
|
|
|
|
+ /* @b{VECTOR is GPU memory pointer, not a main memory pointer.} */
|
|
cl_mem val = (cl_mem) vector;
|
|
cl_mem val = (cl_mem) vector;
|
|
|
|
|
|
id = starpu_worker_get_id ();
|
|
id = starpu_worker_get_id ();
|
|
devid = starpu_worker_get_devid (id);
|
|
devid = starpu_worker_get_devid (id);
|
|
|
|
|
|
- /* Prepare to invoke the kernel. In the future, this will be largely
|
|
|
|
- automated. */
|
|
|
|
|
|
+ /* @b{Prepare to invoke the kernel. In the future, this will be largely
|
|
|
|
+ automated.} */
|
|
err = starpu_opencl_load_kernel (&kernel, &queue, &cl_programs,
|
|
err = starpu_opencl_load_kernel (&kernel, &queue, &cl_programs,
|
|
"vector_mult_opencl", devid);
|
|
"vector_mult_opencl", devid);
|
|
if (err != CL_SUCCESS)
|
|
if (err != CL_SUCCESS)
|
|
@@ -481,7 +482,7 @@ vector_scal_opencl (unsigned size, float vector[size], float factor)
|
|
starpu_opencl_collect_stats (event);
|
|
starpu_opencl_collect_stats (event);
|
|
clReleaseEvent (event);
|
|
clReleaseEvent (event);
|
|
|
|
|
|
- /* Done with KERNEL. */
|
|
|
|
|
|
+ /* @b{Done with KERNEL.} */
|
|
starpu_opencl_release_kernel (kernel);
|
|
starpu_opencl_release_kernel (kernel);
|
|
@}
|
|
@}
|
|
@end smallexample
|
|
@end smallexample
|
|
@@ -528,8 +529,8 @@ the CUDA Kernel}).
|
|
|
|
|
|
@cartouche
|
|
@cartouche
|
|
@smallexample
|
|
@smallexample
|
|
-/* CUDA implementation of the `vector_scal' task, to be compiled
|
|
|
|
- with `nvcc'. */
|
|
|
|
|
|
+/* @b{CUDA implementation of the `vector_scal' task, to be compiled
|
|
|
|
+ with `nvcc'.} */
|
|
|
|
|
|
#include <starpu.h>
|
|
#include <starpu.h>
|
|
#include <stdlib.h>
|
|
#include <stdlib.h>
|
|
@@ -543,7 +544,7 @@ vector_mult_cuda (float *val, unsigned n, float factor)
|
|
val[i] *= factor;
|
|
val[i] *= factor;
|
|
@}
|
|
@}
|
|
|
|
|
|
-/* Definition of the task implementation declared in the C file. */
|
|
|
|
|
|
+/* @b{Definition of the task implementation declared in the C file.} */
|
|
extern "C" void
|
|
extern "C" void
|
|
vector_scal_cuda (size_t size, float vector[], float factor)
|
|
vector_scal_cuda (size_t size, float vector[], float factor)
|
|
@{
|
|
@{
|
|
@@ -652,9 +653,9 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
|
|
unsigned i;
|
|
unsigned i;
|
|
float *factor = cl_arg;
|
|
float *factor = cl_arg;
|
|
|
|
|
|
- /* length of the vector */
|
|
|
|
|
|
+ /* @b{length of the vector} */
|
|
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
- /* CPU copy of the vector pointer */
|
|
|
|
|
|
+ /* @b{CPU copy of the vector pointer} */
|
|
float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]);
|
|
float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]);
|
|
|
|
|
|
for (i = 0; i < n; i++)
|
|
for (i = 0; i < n; i++)
|
|
@@ -732,9 +733,9 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args)
|
|
@{
|
|
@{
|
|
float *factor = (float *)_args;
|
|
float *factor = (float *)_args;
|
|
|
|
|
|
- /* length of the vector */
|
|
|
|
|
|
+ /* @b{length of the vector} */
|
|
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
- /* CUDA copy of the vector pointer */
|
|
|
|
|
|
+ /* @b{CUDA copy of the vector pointer} */
|
|
float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]);
|
|
float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]);
|
|
unsigned threads_per_block = 64;
|
|
unsigned threads_per_block = 64;
|
|
unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
|
|
unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
|
|
@@ -784,9 +785,9 @@ void scal_opencl_func(void *buffers[], void *_args)
|
|
@i{ cl_command_queue queue;}
|
|
@i{ cl_command_queue queue;}
|
|
@i{ cl_event event;}
|
|
@i{ cl_event event;}
|
|
|
|
|
|
- /* length of the vector */
|
|
|
|
|
|
+ /* @b{length of the vector} */
|
|
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
|
|
- /* OpenCL copy of the vector pointer */
|
|
|
|
|
|
+ /* @b{OpenCL copy of the vector pointer} */
|
|
cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
|
|
cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
|
|
|
|
|
|
@i{ id = starpu_worker_get_id();}
|
|
@i{ id = starpu_worker_get_id();}
|