9 年之前 · 68658f16bf
--- a/configure.ac
+++ b/configure.ac
@@ -1533,6 +1533,12 @@ if test x$enable_coverage = xyes; then
 
				 	LIBS="${LIBS} -lgcov"
			
 
				 fi
			
 
				 
			
 
				+AC_MSG_CHECKING(whether coverity mode should be enabled)
			
 
				+AC_ARG_ENABLE(coverity, [AS_HELP_STRING([--enable-coverity], [enable coverity mode])],
			
 
				+			enable_coverity=$enableval, enable_coverity=no)
			
 
				+AC_MSG_RESULT($enable_coverity)
			
 
				+AM_CONDITIONAL(STARPU_COVERITY, test x$enable_coverity = xyes)
			
 
				+
			
 
				 # shall we use FxT to generate trace of the execution ?
			
 
				 AC_MSG_CHECKING(whether FxT traces should be generated)
			
 
				 AC_ARG_WITH(fxt, [AS_HELP_STRING([--with-fxt[=<dir>]], [generate fxt traces])],
			
--- a/doc/doxygen/chapters/code/scal_pragma.cu
+++ b/doc/doxygen/chapters/code/scal_pragma.cu
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2013  CNRS
			
 
				- * Copyright (C) 2010-2013  Université de Bordeaux
			
 
				+ * Copyright (C) 2010-2013, 2016  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -31,8 +31,7 @@ vector_mult_cuda (unsigned n, float *val, float factor)
 
				 }
			
 
				 
			
 
				 /* Definition of the task implementation declared in the C file. */
			
 
				-extern "C" void
			
 
				-vector_scal_cuda (size_t size, float vector[], float factor)
			
 
				+extern "C" void vector_scal_cuda (size_t size, float vector[], float factor)
			
 
				 {
			
 
				   unsigned threads_per_block = 64;
			
 
				   unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
			
--- a/doc/tutorial/vector_scal_plugin_cuda.cu
+++ b/doc/tutorial/vector_scal_plugin_cuda.cu
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012 INRIA
			
 
				  * Copyright (C) 2010, 2011, 2013  CNRS
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2016  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -31,8 +31,7 @@ vector_mult_cuda (unsigned int n, float *val, float factor)
 
				     val[i] *= factor;
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-vector_scal_cuda (unsigned int size, float vector[], float factor)
			
 
				+extern "C" void vector_scal_cuda (unsigned int size, float vector[], float factor)
			
 
				 {
			
 
				   unsigned threads_per_block = 64;
			
 
				   unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -71,10 +71,14 @@ CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log
 
				 
			
 
				 if STARPU_USE_CUDA
			
 
				 
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ -I$(top_srcdir)/examples/  $(HWLOC_CFLAGS)
			
 
				 
			
 
				 .cu.o:
			
 
				 	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
			
 
				+endif
			
 
				 
			
 
				 endif
			
 
				 
			
--- a/examples/pi/SobolQRNG/sobol_gpu.cu
+++ b/examples/pi/SobolQRNG/sobol_gpu.cu
@@ -132,8 +132,7 @@ __global__ void sobolGPU_kernel(unsigned n_vectors, unsigned n_dimensions, unsig
 
				     }
			
 
				 }
			
 
				 
			
 
				-extern "C"
			
 
				-void sobolGPU(int n_vectors, int n_dimensions, unsigned int *d_directions, float *d_output)
			
 
				+extern "C" void sobolGPU(int n_vectors, int n_dimensions, unsigned int *d_directions, float *d_output)
			
 
				 {
			
 
				     const int threadsperblock = 64;
			
 
				 
			
--- a/examples/spmv/spmv_cuda.cu
+++ b/examples/spmv/spmv_cuda.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010, 2014-2015  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2014-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,8 +21,7 @@
 
				 
			
 
				 #define MIN(a,b)	((a)<(b)?(a):(b))
			
 
				 
			
 
				-extern "C" __global__ 
			
 
				-void spmv_kernel(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, 
			
 
				+extern "C" __global__ void spmv_kernel(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, 
			
 
				 			uint32_t firstentry, uint32_t elemsize, 
			
 
				 			float *vecin, uint32_t nx_in, uint32_t elemsize1, float * vecout, uint32_t nx_out, uint32_t elemsize2)
			
 
				 {
			
@@ -51,8 +50,7 @@ void spmv_kernel(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, ui
 
				 	}
			
 
				 }
			
 
				 
			
 
				-extern "C" __global__ 
			
 
				-void spmv_kernel_3(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, 
			
 
				+extern "C" __global__ void spmv_kernel_3(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, 
			
 
				 			uint32_t firstentry, 
			
 
				 			float *vecin, uint32_t nx_in, float * vecout, uint32_t nx_out)
			
 
				 {
			
--- a/examples/stencil/Makefile.am
+++ b/examples/stencil/Makefile.am
@@ -29,6 +29,9 @@ CC = $(CC_OR_MPICC)
 
				 
			
 
				 if STARPU_USE_CUDA
			
 
				 
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 NVCCFLAGS += $(HWLOC_CFLAGS)
			
 
				 
			
 
				 .cu.cubin:
			
@@ -38,6 +41,7 @@ NVCCFLAGS += $(HWLOC_CFLAGS)
 
				 .cu.o:
			
 
				 	$(MKDIR_P) `dirname $@`
			
 
				 	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(NVCCFLAGS)
			
 
				+endif
			
 
				 
			
 
				 
			
 
				 endif
			
--- a/examples/stencil/life_cuda.cu
+++ b/examples/stencil/life_cuda.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2016  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,8 +19,7 @@
 
				 
			
 
				 /* Heart of the stencil computation: compute a new state from an old one. */
			
 
				 
			
 
				-extern "C" __global__ void
			
 
				-cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
			
 
				+extern "C" __global__ void cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
			
 
				 {
			
 
				 	unsigned idx = threadIdx.x + blockIdx.x * blockDim.x;
			
 
				 	unsigned idy = threadIdx.y + blockIdx.y * blockDim.y;
			
@@ -56,8 +55,7 @@ cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, in
 
				 		}
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
			
 
				+extern "C" void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)
			
 
				 {
			
 
				 	unsigned max_parallelism = 512;
			
 
				 	unsigned threads_per_dim_x = max_parallelism;
			
--- a/examples/stencil/shadow.cu
+++ b/examples/stencil/shadow.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2016  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,8 +20,7 @@
 
				 /* Perform replication of data on X and Y edges, to fold the domain on 
			
 
				    itself through mere replication of the source state. */
			
 
				 
			
 
				-extern "C" __global__ void
			
 
				-cuda_shadow( int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
			
 
				+extern "C" __global__ void cuda_shadow( int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
			
 
				 {
			
 
				 	unsigned idx = threadIdx.x + blockIdx.x * blockDim.x;
			
 
				 	unsigned idy = threadIdx.y + blockIdx.y * blockDim.y;
			
@@ -36,8 +35,7 @@ cuda_shadow( int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
 
				 #include "shadow.h"
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
			
 
				+extern "C" void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)
			
 
				 {
			
 
				 	unsigned max_parallelism = 512;
			
 
				 	unsigned threads_per_dim_x = max_parallelism;
			
--- a/gcc-plugin/examples/Makefile.am
+++ b/gcc-plugin/examples/Makefile.am
@@ -91,9 +91,13 @@ if STARPU_USE_CUDA
 
				 
			
 
				 vector_scal_vector_scal_SOURCES += vector_scal/vector_scal_cuda.cu
			
 
				 
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 .cu.o:
			
 
				 	$(NVCC) $< -c -o $@ $(NVCCFLAGS)			\
			
 
				 	  -I$(top_builddir)/include -I$(top_srcdir)/include
			
 
				+endif
			
 
				 
			
 
				 else !STARPU_USE_CUDA
			
 
				 
			
--- a/gcc-plugin/examples/vector_scal/vector_scal_cuda.cu
+++ b/gcc-plugin/examples/vector_scal/vector_scal_cuda.cu
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2012 INRIA
			
 
				  * Copyright (C) 2010, 2011, 2013  CNRS
			
 
				- * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2016  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -31,8 +31,7 @@ vector_mult_cuda (unsigned int n, float *val, float factor)
 
				     val[i] *= factor;
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-vector_scal_cuda (unsigned int size, float vector[], float factor)
			
 
				+extern "C" void vector_scal_cuda (unsigned int size, float vector[], float factor)
			
 
				 {
			
 
				   unsigned threads_per_block = 64;
			
 
				   unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
			
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -78,6 +78,9 @@ examplebindir = $(libdir)/starpu/mpi
 
				 examplebin_PROGRAMS =
			
 
				 
			
 
				 if STARPU_USE_CUDA
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(HWLOC_CFLAGS)
			
 
				 
			
 
				 .cu.cubin:
			
@@ -87,6 +90,7 @@ NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -
 
				 .cu.o:
			
 
				 	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
			
 
				 endif
			
 
				+endif
			
 
				 
			
 
				 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
			
 
				 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
			
--- a/mpi/tests/Makefile.am
+++ b/mpi/tests/Makefile.am
@@ -60,6 +60,9 @@ examplebindir = $(libdir)/starpu/examples/mpi
 
				 examplebin_PROGRAMS =
			
 
				 
			
 
				 if STARPU_USE_CUDA
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -I$(top_builddir)/include/ $(HWLOC_CFLAGS)
			
 
				 
			
 
				 .cu.cubin:
			
@@ -69,6 +72,7 @@ NVCCFLAGS += --compiler-options -fno-strict-aliasing  -I$(top_srcdir)/include/ -
 
				 .cu.o:
			
 
				 	$(NVCC) $< -c -o $@ $(NVCCFLAGS)
			
 
				 endif
			
 
				+endif
			
 
				 
			
 
				 AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
			
 
				 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
			
--- a/starpufft/src/Makefile.am
+++ b/starpufft/src/Makefile.am
@@ -40,6 +40,9 @@ libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined
 
				   -version-info $(LIBSTARPUFFT_INTERFACE_CURRENT):$(LIBSTARPUFFT_INTERFACE_REVISION):$(LIBSTARPUFFT_INTERFACE_AGE)
			
 
				 
			
 
				 if STARPU_USE_CUDA
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 NVCCFLAGS += -Xcompiler -fPIC -Xlinker -fPIC
			
 
				 
			
 
				 cudaf_kernels.o: cudaf_kernels.cu
			
@@ -58,3 +61,4 @@ endif
 
				 
			
 
				 libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD +=  $(STARPU_CUDA_LDFLAGS)
			
 
				 endif
			
 
				+endif
			
--- a/starpufft/src/cudax_kernels.cu
+++ b/starpufft/src/cudax_kernels.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2009, 2010, 2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -40,8 +40,7 @@
 
				 	} \
			
 
				 	cudaStreamSynchronize(starpu_cuda_get_local_stream()); \
			
 
				 
			
 
				-extern "C" __global__ void
			
 
				-STARPUFFT(cuda_twist1_1d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2)
			
 
				+extern "C" __global__ void STARPUFFT(cuda_twist1_1d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2)
			
 
				 {
			
 
				 	unsigned j;
			
 
				 	VARS_1d
			
@@ -51,14 +50,12 @@ STARPUFFT(cuda_twist1_1d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i
 
				 		twisted1[j] = in[i+j*n1];
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2)
			
 
				+extern "C" void STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2)
			
 
				 {
			
 
				 	DISTRIB_1d(n2, STARPUFFT(cuda_twist1_1d), (in, twisted1, i, n1, n2));
			
 
				 }
			
 
				 
			
 
				-extern "C" __global__ void
			
 
				-STARPUFFT(cuda_twiddle_1d)(_cuComplex * out, const _cuComplex * roots, unsigned n, unsigned i)
			
 
				+extern "C" __global__ void STARPUFFT(cuda_twiddle_1d)(_cuComplex * out, const _cuComplex * roots, unsigned n, unsigned i)
			
 
				 {
			
 
				 	unsigned j;
			
 
				 	VARS_1d
			
@@ -69,8 +66,7 @@ STARPUFFT(cuda_twiddle_1d)(_cuComplex * out, const _cuComplex * roots, unsigned
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i)
			
 
				+extern "C" void STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i)
			
 
				 {
			
 
				 	DISTRIB_1d(n, STARPUFFT(cuda_twiddle_1d), (out, roots, n, i));
			
 
				 }
			
@@ -115,8 +111,7 @@ STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsign
 
				 	} \
			
 
				 	cudaStreamSynchronize(starpu_cuda_get_local_stream()); \
			
 
				 
			
 
				-extern "C" __global__ void
			
 
				-STARPUFFT(cuda_twist1_2d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2)
			
 
				+extern "C" __global__ void STARPUFFT(cuda_twist1_2d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2)
			
 
				 {
			
 
				 	unsigned k, l;
			
 
				 	VARS_2d
			
@@ -129,14 +124,12 @@ STARPUFFT(cuda_twist1_2d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i
 
				 			twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1];
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2)
			
 
				+extern "C" void STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2)
			
 
				 {
			
 
				 	DISTRIB_2d(n2, m2, STARPUFFT(cuda_twist1_2d), (in, twisted1, i, j, n1, n2, m1, m2));
			
 
				 }
			
 
				 
			
 
				-extern "C" __global__ void
			
 
				-STARPUFFT(cuda_twiddle_2d)(_cuComplex * out, const _cuComplex * roots0, const _cuComplex * roots1, unsigned n2, unsigned m2, unsigned i, unsigned j)
			
 
				+extern "C" __global__ void STARPUFFT(cuda_twiddle_2d)(_cuComplex * out, const _cuComplex * roots0, const _cuComplex * roots1, unsigned n2, unsigned m2, unsigned i, unsigned j)
			
 
				 {
			
 
				 	unsigned k, l;
			
 
				 	VARS_2d
			
@@ -149,8 +142,7 @@ STARPUFFT(cuda_twiddle_2d)(_cuComplex * out, const _cuComplex * roots0, const _c
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-extern "C" void
			
 
				-STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j)
			
 
				+extern "C" void STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j)
			
 
				 {
			
 
				 	DISTRIB_2d(n2, m2, STARPUFFT(cuda_twiddle_2d), (out, roots0, roots1, n2, m2, i, j));
			
 
				 }
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -62,6 +62,9 @@ endif
 
				 
			
 
				 if STARPU_USE_CUDA
			
 
				 
			
 
				+if STARPU_COVERITY
			
 
				+include $(top_srcdir)/starpu-mynvcc.mk
			
 
				+else
			
 
				 # TODO define NVCCFLAGS
			
 
				 NVCC ?= nvcc
			
 
				 
			
@@ -74,6 +77,7 @@ NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_srcdir)/src -I$(top_builddir)/src
 
				 .cu.o:
			
 
				 	$(MKDIR_P) `dirname $@`
			
 
				 	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I${includedir}
			
 
				+endif
			
 
				 
			
 
				 endif
			
 
				 
			
--- a/tests/datawizard/sync_and_notify_data_kernels.cu
+++ b/tests/datawizard/sync_and_notify_data_kernels.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2014, 2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2012  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -21,7 +21,7 @@
 
				  *	increment a (val[0])
			
 
				  */
			
 
				 
			
 
				-extern "C" __global__ void _cuda_incA(unsigned *v)
			
 
				+static __global__ void _cuda_incA(unsigned *v)
			
 
				 {
			
 
				 	v[0]++;
			
 
				 }
			
@@ -37,7 +37,7 @@ extern "C" void cuda_codelet_incA(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_
 
				  *	increment c (val[2])
			
 
				  */
			
 
				 
			
 
				-extern "C" __global__ void _cuda_incC(unsigned *v)
			
 
				+static __global__ void _cuda_incC(unsigned *v)
			
 
				 {
			
 
				 	v[2]++;
			
 
				 }
			
--- a/tests/overlap/long_kernel.cu
+++ b/tests/overlap/long_kernel.cu
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2014  Université de Bordeaux
			
 
				+ * Copyright (C) 2014, 2016  Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -24,8 +24,7 @@ void long_kernel(unsigned long niters)
 
				 		__syncthreads();
			
 
				 }
			
 
				 
			
 
				-extern "C"
			
 
				-void long_kernel_cuda(unsigned long niters)
			
 
				+extern "C" void long_kernel_cuda(unsigned long niters)
			
 
				 {
			
 
				 	dim3 dimBlock(1,1);
			
 
				 	dim3 dimGrid(1,1);