Browse Source

doc: vector scaling example

Nathalie Furmento 15 years ago
parent
commit
3bfb66c3f8
3 changed files with 95 additions and 66 deletions
  1. 41 34
      doc/starpu.texi
  2. 1 32
      doc/vector_scal_c.texi
  3. 53 0
      doc/vector_scal_cpu.texi

+ 41 - 34
doc/starpu.texi

@@ -1963,7 +1963,7 @@ The definition of the codelet can be written as follows:
 
 
 @cartouche
 @cartouche
 @smallexample
 @smallexample
-void scal_func(void *buffers[], void *cl_arg)
+void scal_cpu_func(void *buffers[], void *cl_arg)
 @{
 @{
     unsigned i;
     unsigned i;
     float *factor = cl_arg;
     float *factor = cl_arg;
@@ -1981,13 +1981,13 @@ void scal_func(void *buffers[], void *cl_arg)
 
 
 starpu_codelet cl = @{
 starpu_codelet cl = @{
     .where = STARPU_CPU,
     .where = STARPU_CPU,
-    .cpu_func = scal_func,
+    .cpu_func = scal_cpu_func,
     .nbuffers = 1
     .nbuffers = 1
 @};
 @};
 @end smallexample
 @end smallexample
 @end cartouche
 @end cartouche
 
 
-The second argument of the @code{scal_func} function contains a pointer to the
+The second argument of the @code{scal_cpu_func} function contains a pointer to the
 parameters of the codelet (given in @code{task->cl_arg}), so that we read the
 parameters of the codelet (given in @code{task->cl_arg}), so that we read the
 constant factor from this pointer. The first argument is an array that gives
 constant factor from this pointer. The first argument is an array that gives
 a description of all the buffers passed in the @code{task->buffers}@ array. The
 a description of all the buffers passed in the @code{task->buffers}@ array. The
@@ -2052,9 +2052,9 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args)
     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
 
 
     /* TODO: use more blocks and threads in blocks */
     /* TODO: use more blocks and threads in blocks */
-    vector_mult_cuda<<<1,1>>>(val, n, *factor);
+@i{    vector_mult_cuda<<<1,1>>>(val, n, *factor);}
 
 
-    cudaThreadSynchronize();
+@i{    cudaThreadSynchronize();}
 @}
 @}
 @end smallexample
 @end smallexample
 @end cartouche
 @end cartouche
@@ -2080,46 +2080,46 @@ __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
 @cartouche
 @cartouche
 @smallexample
 @smallexample
 #include <starpu.h>
 #include <starpu.h>
-#include <starpu_opencl.h>
+@i{#include <starpu_opencl.h>}
 
 
-extern struct starpu_opencl_codelet codelet;
+@i{extern struct starpu_opencl_codelet codelet;}
 
 
 void scal_opencl_func(void *buffers[], void *_args)
 void scal_opencl_func(void *buffers[], void *_args)
 @{
 @{
     float *factor = (float *)_args;
     float *factor = (float *)_args;
     struct starpu_vector_interface_s *vector = (struct starpu_vector_interface_s *) buffers[0];
     struct starpu_vector_interface_s *vector = (struct starpu_vector_interface_s *) buffers[0];
-    int id, devid, err;
-    cl_kernel kernel;
-    cl_command_queue queue;
+@i{    int id, devid, err;}
+@i{    cl_kernel kernel;}
+@i{    cl_command_queue queue;}
 
 
     /* length of the vector */
     /* length of the vector */
     unsigned n = STARPU_GET_VECTOR_NX(vector);
     unsigned n = STARPU_GET_VECTOR_NX(vector);
     /* local copy of the vector pointer */
     /* local copy of the vector pointer */
     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
 
 
-    id = starpu_worker_get_id();
-    devid = starpu_worker_get_devid(id);
+@i{    id = starpu_worker_get_id();}
+@i{    devid = starpu_worker_get_devid(id);}
 
 
-    err = starpu_opencl_load_kernel(&kernel, &queue, &codelet,
-                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */
-    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+@i{    err = starpu_opencl_load_kernel(&kernel, &queue, &codelet,}
+@i{                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */}
+@i{    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);}
 
 
-    err = 0;
-    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);
-    err = clSetKernelArg(kernel, 1, sizeof(int), &n);
-    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);
-    if (err) STARPU_OPENCL_REPORT_ERROR(err);
+@i{    err = 0;}
+@i{    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);}
+@i{    err = clSetKernelArg(kernel, 1, sizeof(int), &n);}
+@i{    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);}
+@i{    if (err) STARPU_OPENCL_REPORT_ERROR(err);}
 
 
-    @{
-        size_t global=1;
-        size_t local=1;
-        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
-        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-    @}
+@i{    @{}
+@i{        size_t global=1;}
+@i{        size_t local=1;}
+@i{        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);}
+@i{        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);}
+@i{    @}}
 
 
-    clFinish(queue);
+@i{    clFinish(queue);}
 
 
-    starpu_opencl_release(kernel);
+@i{    starpu_opencl_release_kernel(kernel);}
 @}
 @}
 @end smallexample
 @end smallexample
 @end cartouche
 @end cartouche
@@ -2128,29 +2128,28 @@ void scal_opencl_func(void *buffers[], void *_args)
 @node Definition of the Main Code
 @node Definition of the Main Code
 @subsection Definition of the Main Code
 @subsection Definition of the Main Code
 
 
-
 The CPU implementation is the same as in the previous section.
 The CPU implementation is the same as in the previous section.
 
 
 Here is the source of the main application. You can notice the value of the
 Here is the source of the main application. You can notice the value of the
 field @code{where} for the codelet. We specify
 field @code{where} for the codelet. We specify
-@code{STARPU_CPU|STARPU_CUDA} to indicate to StarPU that the codelet
-can be executed either on a CPU or on a CUDA device.
+@code{STARPU_CPU|STARPU_CUDA|STARPU_OPENCL} to indicate to StarPU that the codelet
+can be executed either on a CPU or on a CUDA or an OpenCL device.
 
 
 @cartouche
 @cartouche
 @smallexample
 @smallexample
 #include <starpu.h>
 #include <starpu.h>
 
 
-#define NX 5
+#define NX 2048
 
 
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
-extern void scal_func(void *buffers[], void *_args);
+extern void scal_cpu_func(void *buffers[], void *_args);
 
 
 /* @b{Definition of the codelet} */
 /* @b{Definition of the codelet} */
 static starpu_codelet cl = @{
 static starpu_codelet cl = @{
     .where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU} */
     .where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU} */
                                      /* @b{or on a CUDA device} */
                                      /* @b{or on a CUDA device} */
     .cuda_func = scal_cuda_func;
     .cuda_func = scal_cuda_func;
-    .cpu_func = scal_func;
+    .cpu_func = scal_cpu_func;
     .nbuffers = 1;
     .nbuffers = 1;
 @}
 @}
 
 
@@ -2289,6 +2288,7 @@ or by disabling CUDA devices:
 
 
 @menu
 @menu
 * Main application::            
 * Main application::            
+* CPU Codelet::                 
 * CUDA Codelet::                
 * CUDA Codelet::                
 * OpenCL Codelet::              
 * OpenCL Codelet::              
 @end menu
 @end menu
@@ -2300,6 +2300,13 @@ or by disabling CUDA devices:
 @include vector_scal_c.texi
 @include vector_scal_c.texi
 @end smallexample
 @end smallexample
 
 
+@node CPU Codelet
+@section CPU Codelet
+
+@smallexample
+@include vector_scal_cpu.texi
+@end smallexample
+
 @node CUDA Codelet
 @node CUDA Codelet
 @section CUDA Codelet
 @section CUDA Codelet
 
 

+ 1 - 32
doc/vector_scal_c.texi

@@ -30,38 +30,7 @@
 
 
 #define    NX    2048
 #define    NX    2048
 
 
-/* This kernel takes a buffer and scales it by a constant factor */
-static void scal_cpu_func(void *buffers[], void *cl_arg)
-@{
-    unsigned i;
-    float *factor = cl_arg;
-
-    /* 
-     * The "buffers" array matches the task->buffers array: for instance
-     * task->buffers[0].handle is a handle that corresponds to a data with
-     * vector "interface", so that the first entry of the array in the
-     * codelet  is a pointer to a structure describing such a vector (ie.
-     * struct starpu_vector_interface_s *). Here, we therefore manipulate
-     * the buffers[0] element as a vector: nx gives the number of elements
-     * in the array, ptr gives the location of the array (that was possibly
-     * migrated/replicated), and elemsize gives the size of each elements.
-     */
-
-    starpu_vector_interface_t *vector = buffers[0];
-
-    /* length of the vector */
-    unsigned n = STARPU_GET_VECTOR_NX(vector);
-
-    /* get a pointer to the local copy of the vector : note that we have to
-     * cast it in (float *) since a vector could contain any type of
-     * elements so that the .ptr field is actually a uintptr_t */
-    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
-
-    /* scale the vector */
-    for (i = 0; i < n; i++)
-        val[i] *= *factor;
-@}
-
+extern void scal_cpu_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 
 

+ 53 - 0
doc/vector_scal_cpu.texi

@@ -0,0 +1,53 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example complements vector_scale.c: here we implement a CPU version.
+ */
+
+#include <starpu.h>
+
+/* This kernel takes a buffer and scales it by a constant factor */
+void scal_cpu_func(void *buffers[], void *cl_arg)
+@{
+    unsigned i;
+    float *factor = cl_arg;
+
+    /* 
+     * The "buffers" array matches the task->buffers array: for instance
+     * task->buffers[0].handle is a handle that corresponds to a data with
+     * vector "interface", so that the first entry of the array in the
+     * codelet  is a pointer to a structure describing such a vector (ie.
+     * struct starpu_vector_interface_s *). Here, we therefore manipulate
+     * the buffers[0] element as a vector: nx gives the number of elements
+     * in the array, ptr gives the location of the array (that was possibly
+     * migrated/replicated), and elemsize gives the size of each elements.
+     */
+
+    starpu_vector_interface_t *vector = buffers[0];
+
+    /* length of the vector */
+    unsigned n = STARPU_GET_VECTOR_NX(vector);
+
+    /* get a pointer to the local copy of the vector : note that we have to
+     * cast it in (float *) since a vector could contain any type of
+     * elements so that the .ptr field is actually a uintptr_t */
+    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
+
+    /* scale the vector */
+    for (i = 0; i < n; i++)
+        val[i] *= *factor;
+@}