瀏覽代碼

doc: vector scaling example

Nathalie Furmento 15 年之前
父節點
當前提交
3bfb66c3f8
共有 3 個文件被更改,包括 95 次插入66 次删除
  1. 41 34
      doc/starpu.texi
  2. 1 32
      doc/vector_scal_c.texi
  3. 53 0
      doc/vector_scal_cpu.texi

+ 41 - 34
doc/starpu.texi

@@ -1963,7 +1963,7 @@ The definition of the codelet can be written as follows:
 
 @cartouche
 @smallexample
-void scal_func(void *buffers[], void *cl_arg)
+void scal_cpu_func(void *buffers[], void *cl_arg)
 @{
     unsigned i;
     float *factor = cl_arg;
@@ -1981,13 +1981,13 @@ void scal_func(void *buffers[], void *cl_arg)
 
 starpu_codelet cl = @{
     .where = STARPU_CPU,
-    .cpu_func = scal_func,
+    .cpu_func = scal_cpu_func,
     .nbuffers = 1
 @};
 @end smallexample
 @end cartouche
 
-The second argument of the @code{scal_func} function contains a pointer to the
+The second argument of the @code{scal_cpu_func} function contains a pointer to the
 parameters of the codelet (given in @code{task->cl_arg}), so that we read the
 constant factor from this pointer. The first argument is an array that gives
 a description of all the buffers passed in the @code{task->buffers}@ array. The
@@ -2052,9 +2052,9 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args)
     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
 
     /* TODO: use more blocks and threads in blocks */
-    vector_mult_cuda<<<1,1>>>(val, n, *factor);
+@i{    vector_mult_cuda<<<1,1>>>(val, n, *factor);}
 
-    cudaThreadSynchronize();
+@i{    cudaThreadSynchronize();}
 @}
 @end smallexample
 @end cartouche
@@ -2080,46 +2080,46 @@ __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
 @cartouche
 @smallexample
 #include <starpu.h>
-#include <starpu_opencl.h>
+@i{#include <starpu_opencl.h>}
 
-extern struct starpu_opencl_codelet codelet;
+@i{extern struct starpu_opencl_codelet codelet;}
 
 void scal_opencl_func(void *buffers[], void *_args)
 @{
     float *factor = (float *)_args;
     struct starpu_vector_interface_s *vector = (struct starpu_vector_interface_s *) buffers[0];
-    int id, devid, err;
-    cl_kernel kernel;
-    cl_command_queue queue;
+@i{    int id, devid, err;}
+@i{    cl_kernel kernel;}
+@i{    cl_command_queue queue;}
 
     /* length of the vector */
     unsigned n = STARPU_GET_VECTOR_NX(vector);
     /* local copy of the vector pointer */
     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
 
-    id = starpu_worker_get_id();
-    devid = starpu_worker_get_devid(id);
+@i{    id = starpu_worker_get_id();}
+@i{    devid = starpu_worker_get_devid(id);}
 
-    err = starpu_opencl_load_kernel(&kernel, &queue, &codelet,
-                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */
-    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+@i{    err = starpu_opencl_load_kernel(&kernel, &queue, &codelet,}
+@i{                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */}
+@i{    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);}
 
-    err = 0;
-    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);
-    err = clSetKernelArg(kernel, 1, sizeof(int), &n);
-    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);
-    if (err) STARPU_OPENCL_REPORT_ERROR(err);
+@i{    err = 0;}
+@i{    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);}
+@i{    err = clSetKernelArg(kernel, 1, sizeof(int), &n);}
+@i{    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);}
+@i{    if (err) STARPU_OPENCL_REPORT_ERROR(err);}
 
-    @{
-        size_t global=1;
-        size_t local=1;
-        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
-        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-    @}
+@i{    @{}
+@i{        size_t global=1;}
+@i{        size_t local=1;}
+@i{        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);}
+@i{        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);}
+@i{    @}}
 
-    clFinish(queue);
+@i{    clFinish(queue);}
 
-    starpu_opencl_release(kernel);
+@i{    starpu_opencl_release_kernel(kernel);}
 @}
 @end smallexample
 @end cartouche
@@ -2128,29 +2128,28 @@ void scal_opencl_func(void *buffers[], void *_args)
 @node Definition of the Main Code
 @subsection Definition of the Main Code
 
-
 The CPU implementation is the same as in the previous section.
 
 Here is the source of the main application. You can notice the value of the
 field @code{where} for the codelet. We specify
-@code{STARPU_CPU|STARPU_CUDA} to indicate to StarPU that the codelet
-can be executed either on a CPU or on a CUDA device.
+@code{STARPU_CPU|STARPU_CUDA|STARPU_OPENCL} to indicate to StarPU that the codelet
+can be executed either on a CPU or on a CUDA or an OpenCL device.
 
 @cartouche
 @smallexample
 #include <starpu.h>
 
-#define NX 5
+#define NX 2048
 
 extern void scal_cuda_func(void *buffers[], void *_args);
-extern void scal_func(void *buffers[], void *_args);
+extern void scal_cpu_func(void *buffers[], void *_args);
 
 /* @b{Definition of the codelet} */
 static starpu_codelet cl = @{
     .where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU} */
                                      /* @b{or on a CUDA device} */
     .cuda_func = scal_cuda_func;
-    .cpu_func = scal_func;
+    .cpu_func = scal_cpu_func;
     .nbuffers = 1;
 @}
 
@@ -2289,6 +2288,7 @@ or by disabling CUDA devices:
 
 @menu
 * Main application::            
+* CPU Codelet::                 
 * CUDA Codelet::                
 * OpenCL Codelet::              
 @end menu
@@ -2300,6 +2300,13 @@ or by disabling CUDA devices:
 @include vector_scal_c.texi
 @end smallexample
 
+@node CPU Codelet
+@section CPU Codelet
+
+@smallexample
+@include vector_scal_cpu.texi
+@end smallexample
+
 @node CUDA Codelet
 @section CUDA Codelet
 

+ 1 - 32
doc/vector_scal_c.texi

@@ -30,38 +30,7 @@
 
 #define    NX    2048
 
-/* This kernel takes a buffer and scales it by a constant factor */
-static void scal_cpu_func(void *buffers[], void *cl_arg)
-@{
-    unsigned i;
-    float *factor = cl_arg;
-
-    /* 
-     * The "buffers" array matches the task->buffers array: for instance
-     * task->buffers[0].handle is a handle that corresponds to a data with
-     * vector "interface", so that the first entry of the array in the
-     * codelet  is a pointer to a structure describing such a vector (ie.
-     * struct starpu_vector_interface_s *). Here, we therefore manipulate
-     * the buffers[0] element as a vector: nx gives the number of elements
-     * in the array, ptr gives the location of the array (that was possibly
-     * migrated/replicated), and elemsize gives the size of each elements.
-     */
-
-    starpu_vector_interface_t *vector = buffers[0];
-
-    /* length of the vector */
-    unsigned n = STARPU_GET_VECTOR_NX(vector);
-
-    /* get a pointer to the local copy of the vector : note that we have to
-     * cast it in (float *) since a vector could contain any type of
-     * elements so that the .ptr field is actually a uintptr_t */
-    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
-
-    /* scale the vector */
-    for (i = 0; i < n; i++)
-        val[i] *= *factor;
-@}
-
+extern void scal_cpu_func(void *buffers[], void *_args);
 extern void scal_cuda_func(void *buffers[], void *_args);
 extern void scal_opencl_func(void *buffers[], void *_args);
 

+ 53 - 0
doc/vector_scal_cpu.texi

@@ -0,0 +1,53 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example complements vector_scale.c: here we implement a CPU version.
+ */
+
+#include <starpu.h>
+
+/* This kernel takes a buffer and scales it by a constant factor */
+void scal_cpu_func(void *buffers[], void *cl_arg)
+@{
+    unsigned i;
+    float *factor = cl_arg;
+
+    /* 
+     * The "buffers" array matches the task->buffers array: for instance
+     * task->buffers[0].handle is a handle that corresponds to a data with
+     * vector "interface", so that the first entry of the array in the
+     * codelet  is a pointer to a structure describing such a vector (ie.
+     * struct starpu_vector_interface_s *). Here, we therefore manipulate
+     * the buffers[0] element as a vector: nx gives the number of elements
+     * in the array, ptr gives the location of the array (that was possibly
+     * migrated/replicated), and elemsize gives the size of each elements.
+     */
+
+    starpu_vector_interface_t *vector = buffers[0];
+
+    /* length of the vector */
+    unsigned n = STARPU_GET_VECTOR_NX(vector);
+
+    /* get a pointer to the local copy of the vector : note that we have to
+     * cast it in (float *) since a vector could contain any type of
+     * elements so that the .ptr field is actually a uintptr_t */
+    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
+
+    /* scale the vector */
+    for (i = 0; i < n; i++)
+        val[i] *= *factor;
+@}