ソースを参照

Implement the "scratch" test on CUDA too

Cédric Augonnet 15 年 前
コミット
31ba3b65e0
共有3 個のファイルを変更した63 個の追加1 個の削除を含む
  1. 5 0
      tests/Makefile.am
  2. 8 1
      tests/datawizard/scratch.c
  3. 50 0
      tests/datawizard/scratch_cuda.cu

+ 5 - 0
tests/Makefile.am

@@ -183,6 +183,11 @@ datawizard_data_implicit_deps_SOURCES =		\
 datawizard_scratch_SOURCES =			\
 	datawizard/scratch.c
 
+if STARPU_USE_CUDA
+datawizard_scratch_SOURCES +=		\
+	datawizard/scratch_cuda.cu
+endif
+
 datawizard_dsm_stress_SOURCES =			\
 	datawizard/dsm_stress.c
 

+ 8 - 1
tests/datawizard/scratch.c

@@ -28,6 +28,10 @@ starpu_data_handle A_handle, B_handle;
 
 static unsigned var = 0;
 
+#ifdef STARPU_USE_CUDA
+extern void cuda_f(void *descr[], __attribute__ ((unused)) void *_args);
+#endif
+
 static void cpu_f(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	unsigned *v = (unsigned *)STARPU_GET_VECTOR_PTR(descr[0]);
@@ -46,8 +50,11 @@ static void cpu_f(void *descr[], __attribute__ ((unused)) void *_args)
 }
 
 static starpu_codelet cl_f = {
-	.where = STARPU_CPU,
+	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_func = cpu_f,
+#ifdef STARPU_USE_CUDA
+	.cuda_func = cuda_f,
+#endif
 	.nbuffers = 2
 };
 

+ 50 - 0
tests/datawizard/scratch_cuda.cu

@@ -0,0 +1,50 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <starpu.h>
+
+#define MAXNBLOCKS		32
+#define MAXTHREADSPERBLOCK	128
+
+static __global__ void increment_vector(unsigned *v, unsigned *tmp, int nx)
+{
+	const int tid = threadIdx.x + blockIdx.x*blockDim.x;
+	const int nthreads = gridDim.x * blockDim.x;
+
+	int i;
+	for (i = tid; i < nx; i += nthreads)
+	{
+		v[i] = tmp[i] + 1;
+	}
+}
+
+extern "C" void cuda_f(void *descr[], __attribute__ ((unused)) void *_args)
+{
+	unsigned *v = (unsigned *)STARPU_GET_VECTOR_PTR(descr[0]);
+	unsigned *tmp = (unsigned *)STARPU_GET_VECTOR_PTR(descr[1]);
+
+	unsigned nx = STARPU_GET_VECTOR_NX(descr[0]);
+	size_t elemsize = STARPU_GET_VECTOR_ELEMSIZE(descr[0]);
+	
+	cudaMemcpy(tmp, v, nx*elemsize, cudaMemcpyDeviceToDevice);
+
+	unsigned nblocks = 128;
+	unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (nx / nblocks));
+	
+	increment_vector<<<nblocks, nthread_per_block>>>(v, tmp, nx);
+	cudaThreadSynchronize();
+}