Explorar o código

backport branches/starpu-1.1@10194: doc/doxygen/: add missing file

Nathalie Furmento %!s(int64=12) %!d(string=hai) anos
pai
achega
a4d122374d
Modificáronse 1 ficheiros con 45 adicións e 0 borrados
  1. 45 0
      doc/doxygen/chapters/code/scal_pragma.cu

+ 45 - 0
doc/doxygen/chapters/code/scal_pragma.cu

@@ -0,0 +1,45 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+//! [To be included]
+/* CUDA implementation of the `vector_scal' task, to be compiled with `nvcc'. */
+
+#include <starpu.h>
+#include <stdlib.h>
+
+static __global__ void
+vector_mult_cuda (unsigned n, float *val, float factor)
+{
+  unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (i < n)
+    val[i] *= factor;
+}
+
+/* Definition of the task implementation declared in the C file. */
+extern "C" void
+vector_scal_cuda (size_t size, float vector[], float factor)
+{
+  unsigned threads_per_block = 64;
+  unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
+
+  vector_mult_cuda <<< nblocks, threads_per_block, 0,
+    starpu_cuda_get_local_stream () >>> (size, vector, factor);
+
+  cudaStreamSynchronize (starpu_cuda_get_local_stream ());
+}
+//! [To be included]