15 år sedan · 092b3a7635
--- a/examples/lu/xlu_kernels.c
+++ b/examples/lu/xlu_kernels.c
@@ -59,7 +59,7 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 
				 				STARPU_ABORT();
			
 
				 
			
 
				 			if (STARPU_UNLIKELY((cures = cudaThreadSynchronize()) != cudaSuccess))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
@@ -121,7 +121,7 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 
				 				STARPU_ABORT();
			
 
				 
			
 
				 			if (STARPU_UNLIKELY((cures = cudaThreadSynchronize()) != cudaSuccess))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
--- a/examples/starpufft/testx.c
+++ b/examples/starpufft/testx.c
@@ -128,7 +128,7 @@ int main(int argc, char *argv[]) {
 
				 	if (cufftExecC2C(cuda_plan, (cufftComplex*) in, (cufftComplex*) out_cuda, CUFFT_FORWARD) != CUFFT_SUCCESS)
			
 
				 		printf("erf2\n");
			
 
				 	if ((cures = cudaThreadSynchronize()) != cudaSuccess)
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	gettimeofday(&end, NULL);
			
 
				 	cufftDestroy(cuda_plan);
			
 
				 	timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec));
			
--- a/examples/strassen2/strassen2_kernels.c
+++ b/examples/strassen2/strassen2_kernels.c
@@ -81,7 +81,7 @@ static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  v
 
				 			cublasSgemm('n', 'n', n, n, n, 1.0f, right, ld12, left, ld21, 0.0f, center, ld22);
			
 
				 			cublasres = cublasGetError();
			
 
				 			if (STARPU_UNLIKELY(cublasres))
			
 
				-				CUBLAS_REPORT_ERROR(cublasres);
			
 
				+				STARPU_CUBLAS_REPORT_ERROR(cublasres);
			
 
				 			break;
			
 
				 #endif
			
 
				 		default:
			
@@ -145,12 +145,12 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 
				 				cublasSaxpy(n, 1.0f, &A[line*ldA], 1, &C[line*ldC], 1);
			
 
				 				cublasres = cublasGetError();
			
 
				 				if (STARPU_UNLIKELY(cublasres))
			
 
				-					CUBLAS_REPORT_ERROR(cublasres);
			
 
				+					STARPU_CUBLAS_REPORT_ERROR(cublasres);
			
 
				 				/* add line B to C = A */
			
 
				 				cublasSaxpy(n, alpha, &B[line*ldB], 1, &C[line*ldC], 1);
			
 
				 				cublasres = cublasGetError();
			
 
				 				if (STARPU_UNLIKELY(cublasres))
			
 
				-					CUBLAS_REPORT_ERROR(cublasres);
			
 
				+					STARPU_CUBLAS_REPORT_ERROR(cublasres);
			
 
				 			}
			
 
				 
			
 
				 			break;
			
@@ -224,7 +224,7 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 
				 				cublasSaxpy(n, alpha, &A[line*ldA], 1, &C[line*ldC], 1);
			
 
				 				cublasres = cublasGetError();
			
 
				 				if (STARPU_UNLIKELY(cublasres))
			
 
				-					CUBLAS_REPORT_ERROR(cublasres);
			
 
				+					STARPU_CUBLAS_REPORT_ERROR(cublasres);
			
 
				 			}
			
 
				 			break;
			
 
				 #endif
			
--- a/include/starpu-util.h
+++ b/include/starpu-util.h
@@ -125,7 +125,7 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 
			
 
				-#define CUBLAS_REPORT_ERROR(status) 					\
			
 
				+#define STARPU_CUBLAS_REPORT_ERROR(status) 					\
			
 
				 	do {								\
			
 
				 		char *errormsg;						\
			
 
				 		switch (status) {					\
			
@@ -160,7 +160,7 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 
				 
			
 
				 
			
 
				 
			
 
				-#define CUDA_REPORT_ERROR(status) 					\
			
 
				+#define STARPU_CUDA_REPORT_ERROR(status) 					\
			
 
				 	do {								\
			
 
				 		char *errormsg;						\
			
 
				 		switch (status) {					\
			
--- a/mpi/examples/mpi_lu/pxlu_kernels.c
+++ b/mpi/examples/mpi_lu/pxlu_kernels.c
@@ -70,7 +70,7 @@ static inline void STARPU_PLU(common_u22)(void *descr[],
 
				 				STARPU_ABORT();
			
 
				 
			
 
				 			if (STARPU_UNLIKELY((cures = cudaThreadSynchronize()) != cudaSuccess))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
@@ -174,7 +174,7 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 
				 				STARPU_ABORT();
			
 
				 
			
 
				 			if (STARPU_UNLIKELY((cures = cudaThreadSynchronize()) != cudaSuccess))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
--- a/src/datawizard/copy-driver.c
+++ b/src/datawizard/copy-driver.c
@@ -266,11 +266,11 @@ void driver_wait_request_completion(starpu_async_channel *async_channel __attrib
 
				 
			
 
				 			cures = cudaEventSynchronize(event);
			
 
				 			if (STARPU_UNLIKELY(cures))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 			cures = cudaEventDestroy(event);
			
 
				 			if (STARPU_UNLIKELY(cures))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -366,15 +366,15 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_bcsr->nzval, (char *)src_bcsr->nzval, nnz*r*c*elemsize, cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_bcsr->colind, (char *)src_bcsr->colind, nnz*sizeof(uint32_t), cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_bcsr->rowptr, (char *)src_bcsr->rowptr, (nrow+1)*sizeof(uint32_t), cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cudaThreadSynchronize();
			
 
				 
			
@@ -402,15 +402,15 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_bcsr->nzval, (char *)src_bcsr->nzval, nnz*r*c*elemsize, cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_bcsr->colind, (char *)src_bcsr->colind, nnz*sizeof(uint32_t), cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_bcsr->rowptr, (char *)src_bcsr->rowptr, (nrow+1)*sizeof(uint32_t), cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cudaThreadSynchronize();
			
 
				 
			
--- a/src/datawizard/interfaces/blas_interface.c
+++ b/src/datawizard/interfaces/blas_interface.c
@@ -247,7 +247,7 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 
				 			if (!addr || status != cudaSuccess)
			
 
				 			{
			
 
				 				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
			
 
				-					 CUDA_REPORT_ERROR(status);
			
 
				+					 STARPU_CUDA_REPORT_ERROR(status);
			
 
				 					
			
 
				 				fail = 1;
			
 
				 			}
			
@@ -292,7 +292,7 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 
				 		case CUDA_RAM:
			
 
				 			status = cudaFree((void*)blas_interface->ptr);			
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				-				CUDA_REPORT_ERROR(status);
			
 
				+				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
@@ -317,7 +317,7 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 			(char *)src_blas->ptr, src_blas->ld*elemsize,
			
 
				 			src_blas->nx*elemsize, src_blas->ny, cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
			
 
				 
			
@@ -339,11 +339,11 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 			(char *)src_blas->ptr, src_blas->ld*elemsize,
			
 
				 			src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		
			
 
				 	cures = cudaThreadSynchronize();
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		
			
 
				 	TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
			
 
				 
			
@@ -373,11 +373,11 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 			cudaMemcpyDeviceToHost);
			
 
				 
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 		cures = cudaThreadSynchronize();
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		
			
 
				 
			
 
				 		return 0;
			
@@ -411,7 +411,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 		cudaThreadSynchronize();
			
 
				 
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 		return 0;
			
 
				 	}
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -271,7 +271,7 @@ static size_t allocate_block_buffer_on_node(starpu_data_handle handle, uint32_t
 
				 			if (!addr || status != cudaSuccess)
			
 
				 			{
			
 
				 				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
			
 
				-					CUDA_REPORT_ERROR(status);
			
 
				+					STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
 
				 				fail = 1;
			
 
				 			}
			
@@ -315,7 +315,7 @@ static void liberate_block_buffer_on_node(void *interface, uint32_t node)
 
				 		case CUDA_RAM:
			
 
				 			status = cudaFree((void*)block_interface->ptr);
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				-				CUDA_REPORT_ERROR(status);
			
 
				+				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
 
				 			break;
			
 
				 #endif
			
@@ -346,7 +346,7 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 		cures = cudaMemcpy((char *)dst_block->ptr, (char *)src_block->ptr,
			
 
				 					nx*ny*nz*elemsize, cudaMemcpyDeviceToHost);
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				 	else {
			
 
				 		unsigned layer;
			
@@ -362,7 +362,7 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 				nx*elemsize, ny, cudaMemcpyDeviceToHost);
			
 
				 
			
 
				 			if (STARPU_UNLIKELY(cures))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		}
			
 
				 	}
			
 
				 	
			
@@ -404,7 +404,7 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 				cures = cudaMemcpy((char *)dst_block->ptr, (char *)src_block->ptr,
			
 
				 					nx*ny*nz*elemsize, cudaMemcpyDeviceToHost);
			
 
				 				if (STARPU_UNLIKELY(cures))
			
 
				-					CUDA_REPORT_ERROR(cures);
			
 
				+					STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 				cudaThreadSynchronize();
			
 
				 
			
 
				 				ret = 0;
			
@@ -425,7 +425,7 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 						(char *)src_block->ptr, src_block->ldz*elemsize,
			
 
				 						nx*ny*elemsize, nz, cudaMemcpyDeviceToHost);
			
 
				 				if (STARPU_UNLIKELY(cures))
			
 
				-					CUDA_REPORT_ERROR(cures);
			
 
				+					STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 				cudaThreadSynchronize();
			
 
				 
			
 
				 				ret = 0;
			
@@ -481,7 +481,7 @@ no_async_default:
 
				 				nx*elemsize, ny, cudaMemcpyDeviceToHost);
			
 
				 
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		
			
 
				 	}
			
 
				 	cudaThreadSynchronize();
			
@@ -524,7 +524,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 				cures = cudaMemcpy((char *)dst_block->ptr, (char *)src_block->ptr,
			
 
				 					nx*ny*nz*elemsize, cudaMemcpyHostToDevice);
			
 
				 				if (STARPU_UNLIKELY(cures))
			
 
				-					CUDA_REPORT_ERROR(cures);
			
 
				+					STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 				cudaThreadSynchronize();
			
 
				 
			
 
				 				ret = 0;
			
@@ -545,7 +545,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 						(char *)src_block->ptr, src_block->ldz*elemsize,
			
 
				 						nx*ny*elemsize, nz, cudaMemcpyHostToDevice);
			
 
				 				if (STARPU_UNLIKELY(cures))
			
 
				-					CUDA_REPORT_ERROR(cures);
			
 
				+					STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 				cudaThreadSynchronize();
			
 
				 
			
 
				 				ret = 0;
			
@@ -601,7 +601,7 @@ no_async_default:
 
				 				nx*elemsize, ny, cudaMemcpyHostToDevice);
			
 
				 
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		
			
 
				 	}
			
 
				 	cudaThreadSynchronize();
			
@@ -632,7 +632,7 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 		cures = cudaMemcpy((char *)dst_block->ptr, (char *)src_block->ptr,
			
 
				 						nx*ny*nz*elemsize, cudaMemcpyHostToDevice);
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				 	else {
			
 
				 		unsigned layer;
			
@@ -648,7 +648,7 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 				nx*elemsize, ny, cudaMemcpyHostToDevice);
			
 
				 
			
 
				 			if (STARPU_UNLIKELY(cures))
			
 
				-				CUDA_REPORT_ERROR(cures);
			
 
				+				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -335,15 +335,15 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_csr->nzval, (char *)src_csr->nzval, nnz*elemsize, cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_csr->colind, (char *)src_csr->colind, nnz*sizeof(uint32_t), cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_csr->rowptr, (char *)src_csr->rowptr, (nrow+1)*sizeof(uint32_t), cudaMemcpyDeviceToHost);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cudaThreadSynchronize();
			
 
				 
			
@@ -368,15 +368,15 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_csr->nzval, (char *)src_csr->nzval, nnz*elemsize, cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_csr->colind, (char *)src_csr->colind, nnz*sizeof(uint32_t), cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cures = cudaMemcpy((char *)dst_csr->rowptr, (char *)src_csr->rowptr, (nrow+1)*sizeof(uint32_t), cudaMemcpyHostToDevice);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	cudaThreadSynchronize();
			
 
				 
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -210,7 +210,7 @@ static size_t allocate_vector_buffer_on_node(starpu_data_handle handle, uint32_t
 
				 			if (!addr || (status != cudaSuccess))
			
 
				 			{
			
 
				 				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
			
 
				-					CUDA_REPORT_ERROR(status);
			
 
				+					STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
 
				 				fail = 1;
			
 
				 			}
			
@@ -265,7 +265,7 @@ static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	cudaThreadSynchronize();
			
 
				 
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
			
 
				 
			
@@ -285,7 +285,7 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 
				 	cudaThreadSynchronize();
			
 
				 
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
			
 
				 
			
@@ -309,7 +309,7 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 
				 		cudaThreadSynchronize();
			
 
				 
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 		return 0;
			
 
				 	}
			
@@ -337,7 +337,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 
				 		cudaThreadSynchronize();
			
 
				 
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 		return 0;
			
 
				 	}
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -37,14 +37,14 @@ static void init_context(int devid)
 
				 
			
 
				 	cures = cudaSetDevice(devid);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				 	/* force CUDA to initialize the context for real */
			
 
				 	cudaFree(0);
			
 
				 
			
 
				 	cures = cudaStreamCreate(starpu_get_local_cuda_stream());
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
 
				 
			
 
				 static void deinit_context(int workerid)
			
@@ -56,7 +56,7 @@ static void deinit_context(int workerid)
 
				 	/* cleanup the runtime API internal stuffs (which CUBLAS is using) */
			
 
				 	cures = cudaThreadExit();
			
 
				 	if (cures)
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
 
				 
			
 
				 unsigned get_cuda_device_count(void)
			
@@ -66,7 +66,7 @@ unsigned get_cuda_device_count(void)
 
				 	cudaError_t cures;
			
 
				 	cures = cudaGetDeviceCount(&cnt);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		 CUDA_REPORT_ERROR(cures);
			
 
				+		 STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	
			
 
				 	return (unsigned)cnt;
			
 
				 }
			
@@ -104,7 +104,7 @@ static int execute_job_on_cuda(job_t j, struct worker_s *args)
 
				 	{
			
 
				 		cures = cudaThreadSynchronize();
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		GET_TICK(codelet_start_comm);
			
 
				 	}
			
 
				 
			
@@ -120,7 +120,7 @@ static int execute_job_on_cuda(job_t j, struct worker_s *args)
 
				 	{
			
 
				 		cures = cudaThreadSynchronize();
			
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				-			CUDA_REPORT_ERROR(cures);
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 		GET_TICK(codelet_end_comm);
			
 
				 	}
			
 
				 
			
--- a/src/util/malloc.c
+++ b/src/util/malloc.c
@@ -37,7 +37,7 @@ static void malloc_pinned_codelet(void *buffers[] __attribute__((unused)), void
 
				 	cudaError_t cures;
			
 
				 	cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
 
				 
			
 
				 static starpu_codelet malloc_pinned_cl = {
			
@@ -91,7 +91,7 @@ static void free_pinned_codelet(void *buffers[] __attribute__((unused)), void *a
 
				 	cudaError_t cures;
			
 
				 	cures = cudaFreeHost(arg);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				-		CUDA_REPORT_ERROR(cures);
			
 
				+		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 }
			
 
				 
			
 
				 static starpu_codelet free_pinned_cl = {
			
--- a/src/util/starpu_cublas.c
+++ b/src/util/starpu_cublas.c
@@ -22,7 +22,7 @@ static void init_cublas_func(void *args __attribute__((unused)))
 
				 {
			
 
				 	cublasStatus cublasst = cublasInit();
			
 
				 	if (STARPU_UNLIKELY(cublasst))
			
 
				-		CUBLAS_REPORT_ERROR(cublasst);
			
 
				+		STARPU_CUBLAS_REPORT_ERROR(cublasst);
			
 
				 }
			
 
				 
			
 
				 static void shutdown_cublas_func(void *args __attribute__((unused)))