Przeglądaj źródła

a tiny tutorial

Nathalie Furmento 14 lat temu
rodzic
commit
68c6d458e0

+ 28 - 0
doc/tutorial/Makefile

@@ -0,0 +1,28 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009-2011  Université de Bordeaux 1
+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+CFLAGS          +=      $$(pkg-config --cflags libstarpu)
+LDFLAGS         +=      $$(pkg-config --libs libstarpu)
+
+%.o: %.cu
+	nvcc $(CFLAGS) $< -c $
+
+all: hello_world vector_scal
+vector_scal: vector_scal.o vector_scal_cpu.o vector_scal_cuda.o vector_scal_opencl.o
+
+clean:
+	rm -f hello_world vector_scal *.o
+

+ 33 - 0
doc/tutorial/README

@@ -0,0 +1,33 @@
+# StarPU --- Runtime system for heterogeneous multicore architectures.
+#
+# Copyright (C) 2009-2011  Université de Bordeaux 1
+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+#
+# StarPU is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# StarPU is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
+
+
+Instructions on how to compile and run StarPU examples
+------------------------------------------------------
+
+% export STARPU_DIR=<directory where StarPU is installed>
+% export PKG_CONFIG_PATH=$STARPU_DIR/lib/pkgconfig:$PKG_CONFIG_PATH
+% export LD_LIBRARY_PATH=$STARPU_DIR/lib:$LD_LIBRARY_PATH
+
+% make hello_world
+% ./hello_world
+
+% make vector_scal
+% ./vector_scal
+
+% STARPU_NCPUS=0 ./vector_scal
+% STARPU_NCPUS=0 STARPU_NCUDA=0 ./vector_scal
+

+ 70 - 0
doc/tutorial/hello_world.c

@@ -0,0 +1,70 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+struct params {
+    int i;
+    float f;
+};
+
+void cpu_func(void *buffers[], void *cl_arg)
+{
+    struct params *params = cl_arg;
+
+    printf("Hello world (params = {%i, %f} )\n", params->i, params->f);
+}
+
+starpu_codelet cl =
+{
+    .where = STARPU_CPU,
+    .cpu_func = cpu_func,
+    .nbuffers = 0
+};
+
+void callback_func(void *callback_arg)
+{
+    printf("Callback function (arg %x)\n", callback_arg);
+}
+
+int main(int argc, char **argv)
+{
+    /* initialize StarPU */
+    starpu_init(NULL);
+
+    struct starpu_task *task = starpu_task_create();
+
+    task->cl = &cl; /* Pointer to the codelet defined above */
+
+    struct params params = { 1, 2.0f };
+    task->cl_arg = &params;
+    task->cl_arg_size = sizeof(params);
+
+    task->callback_func = callback_func;
+    task->callback_arg = 0x42;
+
+    /* starpu_task_submit will be a blocking call */
+    task->synchronous = 1;
+
+    /* submit the task to StarPU */
+    starpu_task_submit(task);
+
+    /* terminate StarPU */
+    starpu_shutdown();
+
+    return 0;
+}

+ 124 - 0
doc/tutorial/vector_scal.c

@@ -0,0 +1,124 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example demonstrates how to use StarPU to scale an array by a factor.
+ * It shows how to manipulate data with StarPU's data management library.
+ *  1- how to declare a piece of data to StarPU (starpu_vector_data_register)
+ *  2- how to describe which data are accessed by a task (task->buffers[0])
+ *  3- how a kernel can manipulate the data (buffers[0].vector.ptr)
+ */
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+#define    NX    2048
+
+extern void scal_cpu_func(void *buffers[], void *_args);
+extern void scal_cuda_func(void *buffers[], void *_args);
+extern void scal_opencl_func(void *buffers[], void *_args);
+
+static starpu_codelet cl = {
+    .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
+    /* CPU implementation of the codelet */
+    .cpu_func = scal_cpu_func,
+#ifdef STARPU_USE_CUDA
+    /* CUDA implementation of the codelet */
+    .cuda_func = scal_cuda_func,
+#endif
+#ifdef STARPU_USE_OPENCL
+    /* OpenCL implementation of the codelet */
+    .opencl_func = scal_opencl_func,
+#endif
+    .nbuffers = 1
+};
+
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program programs;
+#endif
+
+int main(int argc, char **argv)
+{
+    /* We consider a vector of float that is initialized just as any of C
+      * data */
+    float vector[NX];
+    unsigned i;
+    for (i = 0; i < NX; i++)
+        vector[i] = 1.0f;
+
+    fprintf(stderr, "BEFORE : First element was %f\n", vector[0]);
+
+    /* Initialize StarPU with default configuration */
+    starpu_init(NULL);
+
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL);
+#endif
+
+    /* Tell StaPU to associate the "vector" vector with the "vector_handle"
+     * identifier. When a task needs to access a piece of data, it should
+     * refer to the handle that is associated to it.
+     * In the case of the "vector" data interface:
+     *  - the first argument of the registration method is a pointer to the
+     *    handle that should describe the data
+     *  - the second argument is the memory node where the data (ie. "vector")
+     *    resides initially: 0 stands for an address in main memory, as
+     *    opposed to an adress on a GPU for instance.
+     *  - the third argument is the adress of the vector in RAM
+     *  - the fourth argument is the number of elements in the vector
+     *  - the fifth argument is the size of each element.
+     */
+    starpu_data_handle vector_handle;
+    starpu_vector_data_register(&vector_handle, 0, (uintptr_t)vector,
+                                NX, sizeof(vector[0]));
+
+    float factor = 3.14;
+
+    /* create a synchronous task: any call to starpu_task_submit will block
+      * until it is terminated */
+    struct starpu_task *task = starpu_task_create();
+    task->synchronous = 1;
+
+    task->cl = &cl;
+
+    /* the codelet manipulates one buffer in RW mode */
+    task->buffers[0].handle = vector_handle;
+    task->buffers[0].mode = STARPU_RW;
+
+    /* an argument is passed to the codelet, beware that this is a
+     * READ-ONLY buffer and that the codelet may be given a pointer to a
+     * COPY of the argument */
+    task->cl_arg = &factor;
+    task->cl_arg_size = sizeof(factor);
+
+    /* execute the task on any eligible computational ressource */
+    starpu_task_submit(task);
+
+    /* StarPU does not need to manipulate the array anymore so we can stop
+      * monitoring it */
+    starpu_data_unregister(vector_handle);
+
+#ifdef STARPU_USE_OPENCL
+    starpu_opencl_unload_opencl(&programs);
+#endif
+
+    /* terminate StarPU, no task can be submitted after */
+    starpu_shutdown();
+
+    fprintf(stderr, "AFTER First element is %f\n", vector[0]);
+
+    return 0;
+}

+ 50 - 0
doc/tutorial/vector_scal_cpu.c

@@ -0,0 +1,50 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+/* This kernel takes a buffer and scales it by a constant factor */
+void scal_cpu_func(void *buffers[], void *cl_arg)
+{
+    unsigned i;
+    float *factor = cl_arg;
+
+    /*
+     * The "buffers" array matches the task->buffers array: for instance
+     * task->buffers[0].handle is a handle that corresponds to a data with
+     * vector "interface", so that the first entry of the array in the
+     * codelet  is a pointer to a structure describing such a vector (ie.
+     * struct starpu_vector_interface_s *). Here, we therefore manipulate
+     * the buffers[0] element as a vector: nx gives the number of elements
+     * in the array, ptr gives the location of the array (that was possibly
+     * migrated/replicated), and elemsize gives the size of each elements.
+     */
+    starpu_vector_interface_t *vector = buffers[0];
+
+    /* length of the vector */
+    unsigned n = STARPU_VECTOR_GET_NX(vector);
+
+    /* get a pointer to the local copy of the vector : note that we have to
+     * cast it in (float *) since a vector could contain any type of
+     * elements so that the .ptr field is actually a uintptr_t */
+    float *val = (float *)STARPU_VECTOR_GET_PTR(vector);
+
+    /* scale the vector */
+    for (i = 0; i < n; i++)
+        val[i] *= *factor;
+}
+

+ 43 - 0
doc/tutorial/vector_scal_cuda.cu

@@ -0,0 +1,43 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+
+static __global__ void vector_mult_cuda(float *val, unsigned n, float factor)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+        if (i < n)
+               val[i] *= factor;
+}
+
+extern "C" void scal_cuda_func(void *buffers[], void *_args)
+{
+        float *factor = (float *)_args;
+
+        /* length of the vector */
+        unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+        /* local copy of the vector pointer */
+        float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]);
+        unsigned threads_per_block = 64;
+        unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
+
+        vector_mult_cuda<<<nblocks,threads_per_block, 0, starpu_cuda_get_local_stream()>>>(val, n, *factor);
+
+        cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}
+

+ 60 - 0
doc/tutorial/vector_scal_opencl.c

@@ -0,0 +1,60 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+
+extern struct starpu_opencl_program programs;
+
+void scal_opencl_func(void *buffers[], void *_args)
+{
+    float *factor = _args;
+    int id, devid, err;
+    cl_kernel kernel;
+    cl_command_queue queue;
+    cl_event event;
+
+    /* length of the vector */
+    unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+    /* OpenCL copy of the vector pointer */
+    cl_mem val = (cl_mem) STARPU_VECTOR_GET_PTR(buffers[0]);
+
+    id = starpu_worker_get_id();
+    devid = starpu_worker_get_devid(id);
+
+    err = starpu_opencl_load_kernel(&kernel, &queue, &programs,
+                    "vector_mult_opencl", devid);   /* Name of the codelet defined above */
+    if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+
+    err = clSetKernelArg(kernel, 0, sizeof(val), &val);
+    err |= clSetKernelArg(kernel, 1, sizeof(n), &n);
+    err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor);
+    if (err) STARPU_OPENCL_REPORT_ERROR(err);
+
+    {
+        size_t global=1;
+        size_t local=1;
+        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+        if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+    }
+
+    clFinish(queue);
+    starpu_opencl_collect_stats(event);
+    clReleaseEvent(event);
+
+    starpu_opencl_release_kernel(kernel);
+}

+ 25 - 0
doc/tutorial/vector_scal_opencl_kernel.cl

@@ -0,0 +1,25 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+__kernel void vector_mult_opencl(__global float* val, int nx, float factor)
+{
+        const int i = get_global_id(0);
+        if (i < nx) {
+                val[i] *= factor;
+        }
+}
+