13 years ago · ca4fb46d3e
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -24,6 +24,7 @@ EXTRA_DIST =					\
 
				 	helper.h				\
			
 
				 	datawizard/scal.h			\
			
 
				 	microbenchs/null_kernel_gordon.c	\
			
 
				+	datawizard/scratch_opencl_kernel.cl     \
			
 
				 	datawizard/sync_and_notify_data_gordon_kernels.c \
			
 
				 	datawizard/sync_and_notify_data_opencl_codelet.cl\
			
 
				 	coverage/coverage.sh			\
			
@@ -250,6 +251,12 @@ if STARPU_USE_CUDA
 
				 datawizard_scratch_SOURCES +=		\
			
 
				 	datawizard/scratch_cuda.cu
			
 
				 endif
			
 
				+if STARPU_USE_OPENCL
			
 
				+datawizard_scratch_SOURCES += \
			
 
				+	datawizard/scratch_opencl.c
			
 
				+nobase_STARPU_OPENCL_DATA_DATA += \
			
 
				+	datawizard/scratch_opencl_kernel.cl
			
 
				+endif
			
 
				 
			
 
				 datawizard_mpi_like_SOURCES =		\
			
 
				 	datawizard/mpi_like.c
			
--- a/tests/datawizard/scratch.c
+++ b/tests/datawizard/scratch.c
@@ -20,6 +20,9 @@
 
				 #include <unistd.h>
			
 
				 #include <errno.h>
			
 
				 #include <starpu.h>
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				 #include <stdlib.h>
			
 
				 #include "../helper.h"
			
 
				 
			
@@ -34,6 +37,9 @@ starpu_data_handle_t A_handle, B_handle;
 
				 #ifdef STARPU_USE_CUDA
			
 
				 extern void cuda_f(void *descr[], __attribute__ ((unused)) void *_args);
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+extern void opencl_f(void *buffers[], void *args);
			
 
				+#endif
			
 
				 
			
 
				 static void cpu_f(void *descr[], __attribute__ ((unused)) void *_args)
			
 
				 {
			
@@ -56,15 +62,21 @@ static void cpu_f(void *descr[], __attribute__ ((unused)) void *_args)
 
				 
			
 
				 static struct starpu_codelet cl_f =
			
 
				 {
			
 
				-	.where = STARPU_CPU|STARPU_CUDA,
			
 
				 	.cpu_funcs = {cpu_f, NULL},
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	.cuda_funcs = {cuda_f, NULL},
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_funcs = {opencl_f, NULL},
			
 
				+#endif
			
 
				 	.nbuffers = 2,
			
 
				 	.modes = {STARPU_RW, STARPU_SCRATCH}
			
 
				 };
			
 
				 
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+struct starpu_opencl_program opencl_program;
			
 
				+#endif
			
 
				+
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				 	int ret;
			
@@ -73,6 +85,11 @@ int main(int argc, char **argv)
 
				 	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl",
			
 
				+						  &opencl_program, NULL);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
			
 
				+#endif
			
 
				 	A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned));
			
 
				 
			
 
				 	starpu_vector_data_register(&A_handle, 0, (uintptr_t)A, VECTORSIZE, sizeof(unsigned));
			
@@ -96,6 +113,9 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_data_unregister(A_handle);
			
 
				 	starpu_data_unregister(B_handle);
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        starpu_opencl_unload_opencl(&opencl_program);
			
 
				+#endif
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Check result */
			
@@ -116,6 +136,9 @@ int main(int argc, char **argv)
 
				 enodev:
			
 
				 	starpu_data_unregister(A_handle);
			
 
				 	starpu_data_unregister(B_handle);
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        starpu_opencl_unload_opencl(&opencl_program);
			
 
				+#endif
			
 
				 	starpu_shutdown();
			
 
				 	/* yes, we do not perform the computation but we did detect that no one
			
 
				  	 * could perform the kernel, so this is not an error from StarPU */
			
--- a/tests/datawizard/scratch_opencl.c
+++ b/tests/datawizard/scratch_opencl.c
@@ -0,0 +1,72 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_opencl.h>
			
 
				+
			
 
				+extern struct starpu_opencl_program opencl_program;
			
 
				+
			
 
				+void opencl_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) args;
			
 
				+	int id, devid;
			
 
				+        cl_int err;
			
 
				+	cl_kernel kernel;
			
 
				+	cl_command_queue queue;
			
 
				+	cl_event event;
			
 
				+
			
 
				+	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
			
 
				+	cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]);
			
 
				+	cl_mem tmp = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]);
			
 
				+
			
 
				+	id = starpu_worker_get_id();
			
 
				+	devid = starpu_worker_get_devid(id);
			
 
				+
			
 
				+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "increment_vector_opencl", devid);
			
 
				+	if (err != CL_SUCCESS)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	err = clSetKernelArg(kernel, 0, sizeof(val), &val);
			
 
				+	err|= clSetKernelArg(kernel, 1, sizeof(tmp), &tmp);
			
 
				+	err|= clSetKernelArg(kernel, 2, sizeof(n), &n);
			
 
				+	if (err)
			
 
				+		STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+
			
 
				+	{
			
 
				+		size_t global=n;
			
 
				+		size_t local;
			
 
				+                size_t s;
			
 
				+                cl_device_id device;
			
 
				+
			
 
				+                starpu_opencl_get_device(devid, &device);
			
 
				+
			
 
				+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
			
 
				+                if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+                if (local > global)
			
 
				+			local=global;
			
 
				+
			
 
				+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
			
 
				+		if (err != CL_SUCCESS)
			
 
				+			STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+	}
			
 
				+
			
 
				+	clFinish(queue);
			
 
				+	starpu_opencl_collect_stats(event);
			
 
				+	clReleaseEvent(event);
			
 
				+
			
 
				+	starpu_opencl_release_kernel(kernel);
			
 
				+}
			
--- a/tests/datawizard/scratch_opencl_kernel.cl
+++ b/tests/datawizard/scratch_opencl_kernel.cl
@@ -0,0 +1,29 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+__kernel void increment_vector_opencl(__global unsigned *val,
			
 
				+				      __global unsigned *tmp,
			
 
				+				      unsigned nx)
			
 
				+{
			
 
				+        const int tid = get_global_id(0);
			
 
				+	const uint nthreads = get_local_size(0);
			
 
				+
			
 
				+	int i;
			
 
				+	for (i = tid; i < nx; i += nthreads)
			
 
				+	{
			
 
				+		val[i] = tmp[i] + 1;
			
 
				+	}
			
 
				+}