Browse Source

API: move cuda related API in starpu_cuda.h and define stuff in starpu_opencl.h only when OpenCL is enabled

Nathalie Furmento 15 years ago
parent
commit
cf7be76766

+ 1 - 0
examples/lu/xlu.h

@@ -20,6 +20,7 @@
 /* for STARPU_USE_CUDA */
 #include <starpu_config.h>
 #include <starpu.h>
+#include <starpu_cuda.h>
 
 #include <common/blas.h>
 

+ 1 - 0
examples/mult/sgemm_kernels.c

@@ -15,6 +15,7 @@
  */
 
 #include <starpu.h>
+#include <starpu_cuda.h>
 #include <common/blas.h>
 
 #define COMMON_CODE			\

+ 1 - 0
examples/mult/xgemm_kernels.c

@@ -15,6 +15,7 @@
  */
 
 #include <starpu.h>
+#include <starpu_cuda.h>
 #include <common/blas.h>
 
 #define COMMON_CODE			\

+ 0 - 66
include/starpu_util.h

@@ -24,12 +24,6 @@
 #include <starpu_config.h>
 #include <starpu_task.h>
 
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#include <cuda_runtime_api.h>
-#include <cublas.h>
-#endif
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -123,62 +117,6 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 #define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" ::: "memory")
 #endif
 
-#ifdef STARPU_USE_CUDA
-
-#if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS)
-#define STARPU_CUBLAS_OOPS() do { \
-		printf("oops  %s \n", errormsg); \
-		*(int*)NULL = 0; \
-	} while (0);
-#else
-#define STARPU_CUBLAS_OOPS() do { \
-		printf("oops  in %s ... %s \n", __func__, errormsg); \
-		assert(0);						\
-	} while (0);
-#endif
-
-#define STARPU_CUBLAS_REPORT_ERROR(status) 					\
-	do {								\
-		char *errormsg;						\
-		switch (status) {					\
-			case CUBLAS_STATUS_SUCCESS:			\
-				errormsg = "success";			\
-				break;					\
-			case CUBLAS_STATUS_NOT_INITIALIZED:		\
-				errormsg = "not initialized";		\
-				break;					\
-			case CUBLAS_STATUS_ALLOC_FAILED:		\
-				errormsg = "alloc failed";		\
-				break;					\
-			case CUBLAS_STATUS_INVALID_VALUE:		\
-				errormsg = "invalid value";		\
-				break;					\
-			case CUBLAS_STATUS_ARCH_MISMATCH:		\
-				errormsg = "arch mismatch";		\
-				break;					\
-			case CUBLAS_STATUS_EXECUTION_FAILED:		\
-				errormsg = "execution failed";		\
-				break;					\
-			case CUBLAS_STATUS_INTERNAL_ERROR:		\
-				errormsg = "internal error";		\
-				break;					\
-			default:					\
-				errormsg = "unknown error";		\
-				break;					\
-		}							\
-		STARPU_CUBLAS_OOPS();					\
-	} while (0)  
-
-
-
-#define STARPU_CUDA_REPORT_ERROR(status) 				\
-	do {								\
-		const char *errormsg = cudaGetErrorString(status);	\
-		STARPU_CUBLAS_OOPS();					\
-	} while (0)  
-
-#endif // STARPU_USE_CUDA
-
 static inline int starpu_get_env_number(const char *str)
 {
 	char *strval;
@@ -221,10 +159,6 @@ void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t whe
 void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps,
 				void (*callback)(void *), void *callback_arg);
 
-#ifdef STARPU_USE_CUDA
-cudaStream_t *starpu_cuda_get_local_stream(void);
-#endif
-
 #ifdef __cplusplus
 }
 #endif

+ 2 - 1
src/datawizard/copy_driver.c

@@ -22,7 +22,8 @@
 #include <common/fxt.h>
 #include "copy_driver.h"
 #include "memalloc.h"
-#include "starpu_opencl.h"
+#include <starpu_opencl.h>
+#include <starpu_cuda.h>
 #include <profiling/profiling.h>
 
 void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid)

+ 1 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -22,10 +22,9 @@
 #include <datawizard/filters.h>
 #include <common/hash.h>
 
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <starpu_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 /*
  * BCSR : blocked CSR, we use blocks of size (r x c)

+ 1 - 2
src/datawizard/interfaces/block_interface.c

@@ -22,10 +22,9 @@
 
 #include <common/hash.h>
 
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <starpu_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
 #ifdef STARPU_USE_CUDA

+ 1 - 2
src/datawizard/interfaces/csr_interface.c

@@ -22,10 +22,9 @@
 
 #include <common/hash.h>
 
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <starpu_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
 #ifdef STARPU_USE_CUDA

+ 1 - 8
src/datawizard/interfaces/matrix_interface.c

@@ -19,17 +19,10 @@
 #include <datawizard/coherency.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/filters.h>
-
 #include <common/hash.h>
-
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#include <cuda_runtime.h>
-#endif
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <starpu_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
 #ifdef STARPU_USE_CUDA

+ 1 - 7
src/datawizard/interfaces/variable_interface.c

@@ -20,16 +20,10 @@
 #include <datawizard/coherency.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/filters.h>
-
 #include <common/hash.h>
-
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#endif
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <starpu_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node __attribute__((unused)));
 #ifdef STARPU_USE_CUDA

+ 1 - 7
src/datawizard/interfaces/vector_interface.c

@@ -19,16 +19,10 @@
 #include <datawizard/coherency.h>
 #include <datawizard/copy_driver.h>
 #include <datawizard/filters.h>
-
 #include <common/hash.h>
-
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#endif
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <starpu_opencl.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 static int dummy_copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node);
 #ifdef STARPU_USE_CUDA

+ 1 - 0
src/drivers/cuda/driver_cuda.c

@@ -15,6 +15,7 @@
  */
 
 #include <starpu.h>
+#include <starpu_cuda.h>
 #include <starpu_profiling.h>
 #include <common/utils.h>
 #include <common/config.h>

+ 3 - 0
src/drivers/opencl/driver_opencl.h

@@ -21,6 +21,8 @@
 #define _GNU_SOURCE
 #endif
 
+#ifdef STARPU_USE_OPENCL
+
 #include <CL/cl.h>
 
 extern
@@ -65,4 +67,5 @@ void _starpu_opencl_init(void);
 extern
 void *_starpu_opencl_worker(void *);
 
+#endif // STARPU_USE_OPENCL
 #endif //  __DRIVER_OPENCL_H__

+ 1 - 7
src/util/malloc.c

@@ -19,14 +19,8 @@
 #include <core/workers.h>
 #include <common/config.h>
 #include <starpu.h>
-
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#endif
-
-#ifdef STARPU_USE_OPENCL
+#include <starpu_cuda.h>
 #include <drivers/opencl/driver_opencl.h>
-#endif
 
 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
 struct malloc_pinned_codelet_struct {

+ 1 - 0
src/util/starpu_cublas.c

@@ -15,6 +15,7 @@
  */
 
 #include <starpu.h>
+#include <starpu_cuda.h>
 #include <common/config.h>
 
 #ifdef STARPU_USE_CUDA