소스 검색

Make the "ring" example more challenging as we now have both MPI and CUDA
transfers.

Cédric Augonnet 15 년 전
부모
커밋
2a2758ac3e
3개의 변경된 파일84개의 추가작업 그리고 2개의 파일을 삭제
  1. 17 0
      mpi/Makefile.am
  2. 38 2
      mpi/tests/ring.c
  3. 29 0
      mpi/tests/ring_kernel.cu

+ 17 - 0
mpi/Makefile.am

@@ -16,6 +16,19 @@
 
 CC=mpicc
 
+if USE_CUDA
+# TODO define NVCCFLAGS
+NVCC ?= nvcc
+
+.cu.cubin:
+	$(MKDIR_P) `dirname $@`
+	$(NVCC) -cubin $< -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS)
+
+.cu.o:
+	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I$(top_srcdir)/include/
+endif
+
+
 LIBS = $(top_builddir)/src/libstarpu.la @LIBS@
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/src/
 
@@ -43,3 +56,7 @@ tests_ring_LDADD =					\
 
 tests_ring_SOURCES =					\
 	tests/ring.c
+
+if USE_CUDA
+tests_ring_SOURCES += tests/ring_kernel.cu
+endif

+ 38 - 2
mpi/tests/ring.c

@@ -21,6 +21,39 @@
 unsigned token = 42;
 starpu_data_handle token_handle;
 
+#ifdef USE_CUDA
+extern void increment_cuda(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args);
+#endif
+
+void increment_core(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+{
+	unsigned *tokenptr = (unsigned *)buffers[0].vector.ptr;
+	(*tokenptr)++;
+}
+
+static starpu_codelet increment_cl = {
+	.where = CORE|CUDA,
+#ifdef USE_CUDA
+	.cuda_func = increment_cuda,
+#endif
+	.core_func = increment_core,
+	.nbuffers = 1
+};
+
+void increment_token(void)
+{
+	struct starpu_task *task = starpu_task_create();
+
+	task->cl = &increment_cl;
+	
+	task->buffers[0].handle = token_handle;
+	task->buffers[0].mode = STARPU_RW;
+
+	task->synchronous = 1;
+
+	starpu_submit_task(task);
+}
+
 int main(int argc, char **argv)
 {
 	MPI_Init(NULL, NULL);
@@ -64,17 +97,20 @@ int main(int argc, char **argv)
 			fprintf(stdout, "Start with token value %d\n", token);
 		}
 
-		token += 1;
+		increment_token();
 		
 		if (!((loop == last_loop) && (rank == last_rank)))
 		{
 			starpu_mpi_send(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD);
 		}
 		else {
+
+			starpu_sync_data_with_mem(token_handle, STARPU_R);
 			fprintf(stdout, "Finished : token value %d\n", token);
+			starpu_release_data_from_mem(token_handle);
 		}
 	}
-	
+
 	starpu_mpi_shutdown();
 	starpu_shutdown();
 

+ 29 - 0
mpi/tests/ring_kernel.cu

@@ -0,0 +1,29 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+static __global__ void cuda_incrementer(unsigned *token)
+{
+	(*token)++;
+}
+
+extern "C" void increment_cuda(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+{
+	unsigned *tokenptr = (unsigned *)buffers[0].vector.ptr;
+
+	cuda_incrementer<<<1,1>>>(tokenptr);
+}