Bladeren bron

gcc: Add a CUDA task implementation for `vector_scal'.

* gcc-plugin/examples/Makefile.am (vector_scal_vector_scal_SOURCES): New
  variable.
  (.cu.o): New rule.

* gcc-plugin/examples/vector_scal/vector_scal.c (vector_scal_cuda): New
  declaration.

* gcc-plugin/examples/vector_scal/vector_scal_cuda.cu: New file.
Ludovic Courtès 13 jaren geleden
bovenliggende
commit
5fb82461dc

+ 15 - 0
gcc-plugin/examples/Makefile.am

@@ -53,3 +53,18 @@ cholesky_cholesky_LDADD	=	\
 	$(STARPU_BLAS_LDFLAGS)
 endif
 
+vector_scal_vector_scal_SOURCES = vector_scal/vector_scal.c
+
+if STARPU_USE_CUDA
+
+vector_scal_vector_scal_SOURCES += vector_scal/vector_scal_cuda.cu
+
+.cu.o:
+	$(NVCC) $< -c -o $@ $(NVCCFLAGS)			\
+	  -I$(top_builddir)/include -I$(top_srcdir)/include
+
+else !STARPU_USE_CUDA
+
+EXTRA_DIST += vector_scal/vector_scal_cuda.cu
+
+endif

+ 11 - 0
gcc-plugin/examples/vector_scal/vector_scal.c

@@ -137,6 +137,17 @@ vector_scal_opencl (size_t size, float vector[size], float factor)
 #endif
 
 
+#ifdef STARPU_USE_CUDA
+
+/* Declaration of the CUDA implementation.  The definition itself is in the
+   `.cu' file itself.  */
+
+extern void vector_scal_cuda (size_t size, float vector[size], float factor)
+  __attribute__ ((task_implementation ("cuda", vector_scal)));
+
+#endif
+
+
 #define EPSILON 1e-3
 static bool
 check (size_t size, float vector[size], float factor)

+ 44 - 0
gcc-plugin/examples/vector_scal/vector_scal_cuda.cu

@@ -0,0 +1,44 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 Institut National de Recherche en Informatique et Automatique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* CUDA implementation of the `vector_scal' task.  */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include <stdlib.h>
+
+static __global__ void
+vector_mult_cuda (float *val, unsigned n, float factor)
+{
+  unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (i < n)
+    val[i] *= factor;
+}
+
+extern "C" void
+vector_scal_cuda (size_t size, float vector[], float factor)
+{
+  unsigned threads_per_block = 64;
+  unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
+
+  vector_mult_cuda <<< nblocks, threads_per_block, 0,
+    starpu_cuda_get_local_stream () >>> (vector, size, factor);
+
+  cudaStreamSynchronize (starpu_cuda_get_local_stream ());
+}