Selaa lähdekoodia

examples: cg example needs a CUDA card which supports double

Nathalie Furmento 12 vuotta sitten
vanhempi
commit
c2dbabb2e3
2 muutettua tiedostoa jossa 28 lisäystä ja 0 poistoa
  1. 2 0
      examples/Makefile.am
  2. 26 0
      examples/cg/cg_kernels.c

+ 2 - 0
examples/Makefile.am

@@ -673,6 +673,8 @@ cg_cg_SOURCES =					\
 if STARPU_USE_CUDA
 cg_cg_SOURCES +=				\
 	cg/cg_dot_kernel.cu
+cg/cg_dot_kernel.o: cg/cg_dot_kernel.cu
+	$(NVCC) $< -c -o $@ $(NVCCFLAGS) -arch sm_13
 endif
 
 cg_cg_LDADD =					\

+ 26 - 0
examples/cg/cg_kernels.c

@@ -44,6 +44,23 @@ static void print_matrix_from_descr(unsigned nx, unsigned ny, unsigned ld, TYPE
 }
 #endif
 
+static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl)
+{
+	enum starpu_archtype type = starpu_worker_get_type(workerid);
+	if (type == STARPU_CPU_WORKER || type == STARPU_OPENCL_WORKER)
+		return 1;
+
+#ifdef STARPU_USE_CUDA
+	/* Cuda device */
+	const struct cudaDeviceProp *props;
+	props = starpu_cuda_get_device_properties(workerid);
+	if (props->major >= 2 || props->minor >= 3)
+		/* At least compute capability 1.3, supports doubles */
+		return 1;
+#endif
+	/* Old card, does not support doubles */
+	return 0;
+}
 
 /*
  *	Reduction accumulation methods
@@ -76,6 +93,7 @@ static struct starpu_perfmodel accumulate_variable_model =
 
 struct starpu_codelet accumulate_variable_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {accumulate_variable_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -114,6 +132,7 @@ static struct starpu_perfmodel accumulate_vector_model =
 
 struct starpu_codelet accumulate_vector_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {accumulate_vector_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -154,6 +173,7 @@ static struct starpu_perfmodel bzero_variable_model =
 
 struct starpu_codelet bzero_variable_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {bzero_variable_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -191,6 +211,7 @@ static struct starpu_perfmodel bzero_vector_model =
 
 struct starpu_codelet bzero_vector_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {bzero_vector_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -246,6 +267,7 @@ static struct starpu_perfmodel dot_kernel_model =
 
 static struct starpu_codelet dot_kernel_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {dot_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -321,6 +343,7 @@ static struct starpu_perfmodel scal_kernel_model =
 
 static struct starpu_codelet scal_kernel_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {scal_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -394,6 +417,7 @@ static struct starpu_perfmodel gemv_kernel_model =
 
 static struct starpu_codelet gemv_kernel_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
@@ -493,6 +517,7 @@ static struct starpu_perfmodel scal_axpy_kernel_model =
 
 static struct starpu_codelet scal_axpy_kernel_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {scal_axpy_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA
@@ -567,6 +592,7 @@ static struct starpu_perfmodel axpy_kernel_model =
 
 static struct starpu_codelet axpy_kernel_cl =
 {
+	.can_execute = can_execute,
 	.where = STARPU_CPU|STARPU_CUDA,
 	.cpu_funcs = {axpy_kernel_cpu, NULL},
 #ifdef STARPU_USE_CUDA