14 years ago · 29e147adb4
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -384,9 +384,6 @@ datawizard_mpi_like_SOURCES +=			\
 
				 
			
 
				 datawizard_mpi_like_async_SOURCES +=		\
			
 
				 	datawizard/cuda_codelet_unsigned_inc.cu
			
 
				-
			
 
				-datawizard_manual_reduction_SOURCES +=		\
			
 
				-	datawizard/cuda_codelet_unsigned_inc.cu
			
 
				 endif
			
 
				 
			
 
				 testbin_PROGRAMS +=				\
			
--- a/tests/datawizard/manual_reduction.c
+++ b/tests/datawizard/manual_reduction.c
@@ -18,6 +18,9 @@
 
				 #ifdef STARPU_USE_CUDA
			
 
				 #include <cuda.h>
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#include <CL/cl.h>
			
 
				+#endif
			
 
				 
			
 
				 #define INIT_VALUE	42
			
 
				 #define NTASKS		10000
			
@@ -29,21 +32,32 @@ static uintptr_t per_worker[STARPU_NMAXWORKERS];
 
				 static starpu_data_handle per_worker_handle[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 /* Create per-worker handles */
			
 
				-
			
 
				 static void initialize_per_worker_handle(void *arg __attribute__((unused)))
			
 
				 {
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	
			
 
				 	/* Allocate memory on the worker, and initialize it to 0 */
			
 
				-	
			
 
				 	switch (starpu_worker_get_type(workerid)) {
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 			per_worker[workerid] = (uintptr_t)calloc(1, sizeof(variable));
			
 
				 			break;
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				 		case STARPU_OPENCL_WORKER:
			
 
				-			/* Not supported yet */
			
 
				-			STARPU_ABORT();
			
 
				+			{
			
 
				+			cl_context context;
			
 
				+			cl_command_queue queue;
			
 
				+			starpu_opencl_get_current_context(&context);
			
 
				+			starpu_opencl_get_current_queue(&queue);
			
 
				+
			
 
				+			cl_mem ptr = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(variable), NULL, NULL);
			
 
				+			/* Poor's man memset */
			
 
				+			unsigned zero = 0;
			
 
				+			clEnqueueWriteBuffer(queue, ptr, CL_TRUE, 0, sizeof(variable), (void *)&zero, 0, NULL, NULL); 
			
 
				+			per_worker[workerid] = (uintptr_t)ptr;
			
 
				+			}
			
 
				+			
			
 
				 			break;
			
 
				+#endif
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case STARPU_CUDA_WORKER:
			
 
				 			cudaMalloc((void **)&per_worker[workerid], sizeof(variable));
			
@@ -90,15 +104,43 @@ static void cpu_func_incr(void *descr[], void *cl_arg __attribute__((unused)))
 
				 	*val = *val + 1;
			
 
				 }
			
 
				 
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-extern void cuda_codelet_unsigned_inc(void *descr[], void *cl_arg);
			
 
				+#ifdef STARPU_USE_CUDA 
			
 
				+/* dummy CUDA implementation */
			
 
				+static void cuda_func_incr(void *descr[], void *cl_arg __attribute__((unused)))
			
 
				+{
			
 
				+	unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				+
			
 
				+	unsigned h_val;
			
 
				+	cudaMemcpy(&h_val, val, sizeof(unsigned), cudaMemcpyDeviceToHost);
			
 
				+	h_val++;
			
 
				+	cudaMemcpy(val, h_val, sizeof(unsigned), cudaMemcpyHostToDevice);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+/* dummy OpenCL implementation */
			
 
				+static void opencl_func_incr(void *descr[], void *cl_arg __attribute__((unused)))
			
 
				+{
			
 
				+	cl_mem d_val = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
			
 
				+	unsigned h_val;
			
 
				+
			
 
				+	cl_command_queue queue;
			
 
				+	starpu_opencl_get_current_queue(&queue);
			
 
				+
			
 
				+	clEnqueueReadBuffer(queue, d_val, CL_TRUE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL);
			
 
				+	h_val++;
			
 
				+	clEnqueueWriteBuffer(queue, d_val, CL_TRUE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL); 
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 static struct starpu_codelet_t use_data_on_worker_codelet = {
			
 
				-	.where = STARPU_CPU|STARPU_CUDA,
			
 
				+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
			
 
				 	.cpu_func = cpu_func_incr,
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-	.cuda_func = cuda_codelet_unsigned_inc,
			
 
				+	.cuda_func = cuda_func_incr,
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_func = opencl_func_incr,
			
 
				 #endif
			
 
				 	.nbuffers = 1,
			
 
				 	.model = NULL
			
@@ -118,7 +160,7 @@ int main(int argc, char **argv)
 
				 	starpu_variable_data_register(&variable_handle, 0, (uintptr_t)&variable, sizeof(unsigned));
			
 
				 
			
 
				 	/* Allocate a per-worker handle on each worker (and initialize it to 0) */
			
 
				-	starpu_execute_on_each_worker(initialize_per_worker_handle, NULL, STARPU_CPU|STARPU_CUDA);
			
 
				+	starpu_execute_on_each_worker(initialize_per_worker_handle, NULL, STARPU_CPU|STARPU_CUDA|STARPU_OPENCL);
			
 
				 
			
 
				 	/* Register all per-worker handles */
			
 
				 	for (worker = 0; worker < nworkers; worker++)