15 years ago · 3bfb66c3f8
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -1963,7 +1963,7 @@ The definition of the codelet can be written as follows:
 
																 @cartouche
															
 
																 @smallexample
															
 
																-void scal_func(void *buffers[], void *cl_arg)
															
 
																+void scal_cpu_func(void *buffers[], void *cl_arg)
															
 
																 @{
															
 
																     unsigned i;
															
 
																     float *factor = cl_arg;
															
@@ -1981,13 +1981,13 @@ void scal_func(void *buffers[], void *cl_arg)
 
																 starpu_codelet cl = @{
															
 
																     .where = STARPU_CPU,
															
 
																-    .cpu_func = scal_func,
															
 
																+    .cpu_func = scal_cpu_func,
															
 
																     .nbuffers = 1
															
 
																 @};
															
 
																 @end smallexample
															
 
																 @end cartouche
															
 
																-The second argument of the @code{scal_func} function contains a pointer to the
															
 
																+The second argument of the @code{scal_cpu_func} function contains a pointer to the
															
 
																 parameters of the codelet (given in @code{task->cl_arg}), so that we read the
															
 
																 constant factor from this pointer. The first argument is an array that gives
															
 
																 a description of all the buffers passed in the @code{task->buffers}@ array. The
															
@@ -2052,9 +2052,9 @@ extern "C" void scal_cuda_func(void *buffers[], void *_args)
 
																     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
															
 
																     /* TODO: use more blocks and threads in blocks */
															
 
																-    vector_mult_cuda<<<1,1>>>(val, n, *factor);
															
 
																+@i{    vector_mult_cuda<<<1,1>>>(val, n, *factor);}
															
 
																-    cudaThreadSynchronize();
															
 
																+@i{    cudaThreadSynchronize();}
															
 
																 @}
															
 
																 @end smallexample
															
 
																 @end cartouche
															
@@ -2080,46 +2080,46 @@ __kernel void vector_mult_opencl(__global float* val, int nx, float factor)
 
																 @cartouche
															
 
																 @smallexample
															
 
																 #include <starpu.h>
															
 
																-#include <starpu_opencl.h>
															
 
																+@i{#include <starpu_opencl.h>}
															
 
																-extern struct starpu_opencl_codelet codelet;
															
 
																+@i{extern struct starpu_opencl_codelet codelet;}
															
 
																 void scal_opencl_func(void *buffers[], void *_args)
															
 
																 @{
															
 
																     float *factor = (float *)_args;
															
 
																     struct starpu_vector_interface_s *vector = (struct starpu_vector_interface_s *) buffers[0];
															
 
																-    int id, devid, err;
															
 
																-    cl_kernel kernel;
															
 
																-    cl_command_queue queue;
															
 
																+@i{    int id, devid, err;}
															
 
																+@i{    cl_kernel kernel;}
															
 
																+@i{    cl_command_queue queue;}
															
 
																     /* length of the vector */
															
 
																     unsigned n = STARPU_GET_VECTOR_NX(vector);
															
 
																     /* local copy of the vector pointer */
															
 
																     float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
															
 
																-    id = starpu_worker_get_id();
															
 
																-    devid = starpu_worker_get_devid(id);
															
 
																+@i{    id = starpu_worker_get_id();}
															
 
																+@i{    devid = starpu_worker_get_devid(id);}
															
 
																-    err = starpu_opencl_load_kernel(&kernel, &queue, &codelet,
															
 
																-                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */
															
 
																-    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+@i{    err = starpu_opencl_load_kernel(&kernel, &queue, &codelet,}
															
 
																+@i{                    "vector_mult_opencl", devid);   /* @b{Name of the codelet defined above} */}
															
 
																+@i{    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);}
															
 
																-    err = 0;
															
 
																-    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);
															
 
																-    err = clSetKernelArg(kernel, 1, sizeof(int), &n);
															
 
																-    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);
															
 
																-    if (err) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+@i{    err = 0;}
															
 
																+@i{    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &val);}
															
 
																+@i{    err = clSetKernelArg(kernel, 1, sizeof(int), &n);}
															
 
																+@i{    err |= clSetKernelArg(kernel, 2, sizeof(float), (void*)factor);}
															
 
																+@i{    if (err) STARPU_OPENCL_REPORT_ERROR(err);}
															
 
																-    @{
															
 
																-        size_t global=1;
															
 
																-        size_t local=1;
															
 
																-        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
															
 
																-        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-    @}
															
 
																+@i{    @{}
															
 
																+@i{        size_t global=1;}
															
 
																+@i{        size_t local=1;}
															
 
																+@i{        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);}
															
 
																+@i{        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);}
															
 
																+@i{    @}}
															
 
																-    clFinish(queue);
															
 
																+@i{    clFinish(queue);}
															
 
																-    starpu_opencl_release(kernel);
															
 
																+@i{    starpu_opencl_release_kernel(kernel);}
															
 
																 @}
															
 
																 @end smallexample
															
 
																 @end cartouche
															
@@ -2128,29 +2128,28 @@ void scal_opencl_func(void *buffers[], void *_args)
 
																 @node Definition of the Main Code
															
 
																 @subsection Definition of the Main Code
															
 
																-
															
 
																 The CPU implementation is the same as in the previous section.
															
 
																 Here is the source of the main application. You can notice the value of the
															
 
																 field @code{where} for the codelet. We specify
															
 
																-@code{STARPU_CPU|STARPU_CUDA} to indicate to StarPU that the codelet
															
 
																-can be executed either on a CPU or on a CUDA device.
															
 
																+@code{STARPU_CPU|STARPU_CUDA|STARPU_OPENCL} to indicate to StarPU that the codelet
															
 
																+can be executed either on a CPU or on a CUDA or an OpenCL device.
															
 
																 @cartouche
															
 
																 @smallexample
															
 
																 #include <starpu.h>
															
 
																-#define NX 5
															
 
																+#define NX 2048
															
 
																 extern void scal_cuda_func(void *buffers[], void *_args);
															
 
																-extern void scal_func(void *buffers[], void *_args);
															
 
																+extern void scal_cpu_func(void *buffers[], void *_args);
															
 
																 /* @b{Definition of the codelet} */
															
 
																 static starpu_codelet cl = @{
															
 
																     .where = STARPU_CPU|STARPU_CUDA; /* @b{It can be executed on a CPU} */
															
 
																                                      /* @b{or on a CUDA device} */
															
 
																     .cuda_func = scal_cuda_func;
															
 
																-    .cpu_func = scal_func;
															
 
																+    .cpu_func = scal_cpu_func;
															
 
																     .nbuffers = 1;
															
 
																 @}
															
@@ -2289,6 +2288,7 @@ or by disabling CUDA devices:
 
																 @menu
															
 
																 * Main application::            
															
 
																+* CPU Codelet::                 
															
 
																 * CUDA Codelet::                
															
 
																 * OpenCL Codelet::              
															
 
																 @end menu
															
@@ -2300,6 +2300,13 @@ or by disabling CUDA devices:
 
																 @include vector_scal_c.texi
															
 
																 @end smallexample
															
 
																+@node CPU Codelet
															
 
																+@section CPU Codelet
															
 
																+
															
 
																+@smallexample
															
 
																+@include vector_scal_cpu.texi
															
 
																+@end smallexample
															
 
																+
															
 
																 @node CUDA Codelet
															
 
																 @section CUDA Codelet
															
--- a/doc/vector_scal_c.texi
+++ b/doc/vector_scal_c.texi
@@ -30,38 +30,7 @@
 
																 #define    NX    2048
															
 
																-/* This kernel takes a buffer and scales it by a constant factor */
															
 
																-static void scal_cpu_func(void *buffers[], void *cl_arg)
															
 
																-@{
															
 
																-    unsigned i;
															
 
																-    float *factor = cl_arg;
															
 
																-
															
 
																-    /* 
															
 
																-     * The "buffers" array matches the task->buffers array: for instance
															
 
																-     * task->buffers[0].handle is a handle that corresponds to a data with
															
 
																-     * vector "interface", so that the first entry of the array in the
															
 
																-     * codelet  is a pointer to a structure describing such a vector (ie.
															
 
																-     * struct starpu_vector_interface_s *). Here, we therefore manipulate
															
 
																-     * the buffers[0] element as a vector: nx gives the number of elements
															
 
																-     * in the array, ptr gives the location of the array (that was possibly
															
 
																-     * migrated/replicated), and elemsize gives the size of each elements.
															
 
																-     */
															
 
																-
															
 
																-    starpu_vector_interface_t *vector = buffers[0];
															
 
																-
															
 
																-    /* length of the vector */
															
 
																-    unsigned n = STARPU_GET_VECTOR_NX(vector);
															
 
																-
															
 
																-    /* get a pointer to the local copy of the vector : note that we have to
															
 
																-     * cast it in (float *) since a vector could contain any type of
															
 
																-     * elements so that the .ptr field is actually a uintptr_t */
															
 
																-    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
															
 
																-
															
 
																-    /* scale the vector */
															
 
																-    for (i = 0; i < n; i++)
															
 
																-        val[i] *= *factor;
															
 
																-@}
															
 
																-
															
 
																+extern void scal_cpu_func(void *buffers[], void *_args);
															
 
																 extern void scal_cuda_func(void *buffers[], void *_args);
															
 
																 extern void scal_opencl_func(void *buffers[], void *_args);
															
--- a/doc/vector_scal_cpu.texi
+++ b/doc/vector_scal_cpu.texi
@@ -0,0 +1,53 @@
 
																+/*
															
 
																+ * StarPU
															
 
																+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
															
 
																+ *
															
 
																+ * This program is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * This program is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/*
															
 
																+ * This example complements vector_scale.c: here we implement a CPU version.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+/* This kernel takes a buffer and scales it by a constant factor */
															
 
																+void scal_cpu_func(void *buffers[], void *cl_arg)
															
 
																+@{
															
 
																+    unsigned i;
															
 
																+    float *factor = cl_arg;
															
 
																+
															
 
																+    /* 
															
 
																+     * The "buffers" array matches the task->buffers array: for instance
															
 
																+     * task->buffers[0].handle is a handle that corresponds to a data with
															
 
																+     * vector "interface", so that the first entry of the array in the
															
 
																+     * codelet  is a pointer to a structure describing such a vector (ie.
															
 
																+     * struct starpu_vector_interface_s *). Here, we therefore manipulate
															
 
																+     * the buffers[0] element as a vector: nx gives the number of elements
															
 
																+     * in the array, ptr gives the location of the array (that was possibly
															
 
																+     * migrated/replicated), and elemsize gives the size of each elements.
															
 
																+     */
															
 
																+
															
 
																+    starpu_vector_interface_t *vector = buffers[0];
															
 
																+
															
 
																+    /* length of the vector */
															
 
																+    unsigned n = STARPU_GET_VECTOR_NX(vector);
															
 
																+
															
 
																+    /* get a pointer to the local copy of the vector : note that we have to
															
 
																+     * cast it in (float *) since a vector could contain any type of
															
 
																+     * elements so that the .ptr field is actually a uintptr_t */
															
 
																+    float *val = (float *)STARPU_GET_VECTOR_PTR(vector);
															
 
																+
															
 
																+    /* scale the vector */
															
 
																+    for (i = 0; i < n; i++)
															
 
																+        val[i] *= *factor;
															
 
																+@}