Sfoglia il codice sorgente

Fix the manual_reduction test for OpenCL (and use dummy kernels rather than
linking with some external CUDA code for instance).

Cédric Augonnet 14 anni fa
parent
commit
29e147adb4
2 ha cambiato i file con 51 aggiunte e 12 eliminazioni
  1. 0 3
      tests/Makefile.am
  2. 51 9
      tests/datawizard/manual_reduction.c

+ 0 - 3
tests/Makefile.am

@@ -384,9 +384,6 @@ datawizard_mpi_like_SOURCES +=			\
 
 datawizard_mpi_like_async_SOURCES +=		\
 	datawizard/cuda_codelet_unsigned_inc.cu
-
-datawizard_manual_reduction_SOURCES +=		\
-	datawizard/cuda_codelet_unsigned_inc.cu
 endif
 
 testbin_PROGRAMS +=				\

+ 51 - 9
tests/datawizard/manual_reduction.c

@@ -18,6 +18,9 @@
 #ifdef STARPU_USE_CUDA
 #include <cuda.h>
 #endif
+#ifdef STARPU_USE_OPENCL
+#include <CL/cl.h>
+#endif
 
 #define INIT_VALUE	42
 #define NTASKS		10000
@@ -29,21 +32,32 @@ static uintptr_t per_worker[STARPU_NMAXWORKERS];
 static starpu_data_handle per_worker_handle[STARPU_NMAXWORKERS];
 
 /* Create per-worker handles */
-
 static void initialize_per_worker_handle(void *arg __attribute__((unused)))
 {
 	int workerid = starpu_worker_get_id();
 	
 	/* Allocate memory on the worker, and initialize it to 0 */
-	
 	switch (starpu_worker_get_type(workerid)) {
 		case STARPU_CPU_WORKER:
 			per_worker[workerid] = (uintptr_t)calloc(1, sizeof(variable));
 			break;
+#ifdef STARPU_USE_OPENCL
 		case STARPU_OPENCL_WORKER:
-			/* Not supported yet */
-			STARPU_ABORT();
+			{
+			cl_context context;
+			cl_command_queue queue;
+			starpu_opencl_get_current_context(&context);
+			starpu_opencl_get_current_queue(&queue);
+
+			cl_mem ptr = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(variable), NULL, NULL);
+			/* Poor's man memset */
+			unsigned zero = 0;
+			clEnqueueWriteBuffer(queue, ptr, CL_TRUE, 0, sizeof(variable), (void *)&zero, 0, NULL, NULL); 
+			per_worker[workerid] = (uintptr_t)ptr;
+			}
+			
 			break;
+#endif
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_WORKER:
 			cudaMalloc((void **)&per_worker[workerid], sizeof(variable));
@@ -90,15 +104,43 @@ static void cpu_func_incr(void *descr[], void *cl_arg __attribute__((unused)))
 	*val = *val + 1;
 }
 
-#ifdef STARPU_USE_CUDA
-extern void cuda_codelet_unsigned_inc(void *descr[], void *cl_arg);
+#ifdef STARPU_USE_CUDA 
+/* dummy CUDA implementation */
+static void cuda_func_incr(void *descr[], void *cl_arg __attribute__((unused)))
+{
+	unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
+
+	unsigned h_val;
+	cudaMemcpy(&h_val, val, sizeof(unsigned), cudaMemcpyDeviceToHost);
+	h_val++;
+	cudaMemcpy(val, h_val, sizeof(unsigned), cudaMemcpyHostToDevice);
+}
+#endif
+
+#ifdef STARPU_USE_OPENCL
+/* dummy OpenCL implementation */
+static void opencl_func_incr(void *descr[], void *cl_arg __attribute__((unused)))
+{
+	cl_mem d_val = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
+	unsigned h_val;
+
+	cl_command_queue queue;
+	starpu_opencl_get_current_queue(&queue);
+
+	clEnqueueReadBuffer(queue, d_val, CL_TRUE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL);
+	h_val++;
+	clEnqueueWriteBuffer(queue, d_val, CL_TRUE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL); 
+}
 #endif
 
 static struct starpu_codelet_t use_data_on_worker_codelet = {
-	.where = STARPU_CPU|STARPU_CUDA,
+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
 	.cpu_func = cpu_func_incr,
 #ifdef STARPU_USE_CUDA
-	.cuda_func = cuda_codelet_unsigned_inc,
+	.cuda_func = cuda_func_incr,
+#endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_func = opencl_func_incr,
 #endif
 	.nbuffers = 1,
 	.model = NULL
@@ -118,7 +160,7 @@ int main(int argc, char **argv)
 	starpu_variable_data_register(&variable_handle, 0, (uintptr_t)&variable, sizeof(unsigned));
 
 	/* Allocate a per-worker handle on each worker (and initialize it to 0) */
-	starpu_execute_on_each_worker(initialize_per_worker_handle, NULL, STARPU_CPU|STARPU_CUDA);
+	starpu_execute_on_each_worker(initialize_per_worker_handle, NULL, STARPU_CPU|STARPU_CUDA|STARPU_OPENCL);
 
 	/* Register all per-worker handles */
 	for (worker = 0; worker < nworkers; worker++)