浏览代码

Make examples/interface/complex work with OpenCL.

Cyril Roelandt 13 年之前
父节点
当前提交
2912649808

+ 9 - 1
examples/Makefile.am

@@ -52,7 +52,8 @@ EXTRA_DIST = 					\
 	openmp/vector_scal.c			\
 	filters/fblock_opencl_kernel.cl		\
 	filters/custom_mf/conversion_opencl.cl  \
-	filters/custom_mf/custom_opencl.cl
+	filters/custom_mf/custom_opencl.cl \
+	interface/complex_kernels.cl
 
 CLEANFILES = 					\
 	gordon/null_kernel_gordon.spuelf
@@ -714,6 +715,13 @@ interface_complex_SOURCES	+=	\
 	interface/complex_kernels.cu
 endif
 
+if STARPU_USE_OPENCL
+interface_complex_SOURCES +=\
+	interface/complex_kernels_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	interface/complex_kernels.cl
+endif
+
 ######################
 # matVecMult example #
 ######################

+ 24 - 0
examples/interface/complex.c

@@ -16,10 +16,16 @@
 
 #include <starpu.h>
 #include "complex_interface.h"
+#ifdef STARPU_USE_OPENCL
+#include <starpu_opencl.h>
+#endif
 
 #ifdef STARPU_USE_CUDA
 extern void copy_complex_codelet_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
+#ifdef STARPU_USE_OPENCL
+extern void copy_complex_codelet_opencl(void *buffers[], void *args);
+#endif
 
 void compare_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
@@ -72,6 +78,9 @@ struct starpu_codelet cl_copy =
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {copy_complex_codelet_cuda, NULL},
 #endif
+#ifdef STARPU_USE_OPENCL
+	.opencl_funcs = {copy_complex_codelet_opencl, NULL},
+#endif
 	.nbuffers = 2,
 	.modes = {STARPU_R, STARPU_W}
 };
@@ -83,6 +92,10 @@ struct starpu_codelet cl_compare =
 	.modes = {STARPU_R, STARPU_R}
 };
 
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program opencl_program;
+#endif
+
 int main(int argc, char **argv)
 {
 	int ret = 0;
@@ -98,6 +111,11 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) return 77;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
+#ifdef STARPU_USE_OPENCL
+	ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl",
+						  &opencl_program, NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+#endif
 	starpu_complex_data_register(&handle1, 0, &real, &imaginary, 1);
 	starpu_complex_data_register(&handle2, 0, &copy_real, &copy_imaginary, 1);
 
@@ -141,10 +159,16 @@ int main(int argc, char **argv)
 
 	starpu_task_wait_for_all();
 
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_shutdown();
 	return 0;
 
 enodev:
+#ifdef STARPU_USE_OPENCL
+        starpu_opencl_unload_opencl(&opencl_program);
+#endif
 	starpu_data_unregister(handle1);
 	starpu_data_unregister(handle2);
 	starpu_shutdown();

+ 95 - 1
examples/interface/complex_interface.c

@@ -16,6 +16,7 @@
 
 #include <starpu.h>
 #include <starpu_cuda.h>
+#include <starpu_opencl.h>
 #include <starpu_hash.h>
 
 #include "complex_interface.h"
@@ -109,7 +110,30 @@ static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, uint32
 #ifdef STARPU_USE_OPENCL
 	        case STARPU_OPENCL_RAM:
 		{
-			STARPU_ASSERT(0);
+			int ret;
+			cl_mem real, imaginary;
+			ret = starpu_opencl_allocate_memory(&real, requested_memory, CL_MEM_READ_WRITE);
+			if (ret != CL_SUCCESS)
+			{
+				fail = 1;
+				break;
+			}
+			else
+			{
+				addr_real = (double *) real;
+			}
+
+			ret = starpu_opencl_allocate_memory(&imaginary, requested_memory, CL_MEM_READ_WRITE);
+			if (ret != CL_SUCCESS)
+			{
+				fail = 1;
+				break;
+			}
+			else
+			{
+				addr_imaginary = (double *) imaginary;
+			}
+			break;
 		}
 #endif
 		default:
@@ -171,12 +195,82 @@ static int copy_cuda_to_ram(void *src_interface, unsigned src_node, void *dst_in
 #endif
 
 
+#ifdef STARPU_USE_OPENCL
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node,
+                              void *dst_interface, unsigned dst_node)
+{
+	struct starpu_complex_interface *src_complex = src_interface;
+	struct starpu_complex_interface *dst_complex = dst_interface;
+
+	cl_int err;
+
+	err = starpu_opencl_copy_ram_to_opencl(
+		src_complex->real,
+		src_node,
+		(cl_mem) dst_complex->real,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->real[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = starpu_opencl_copy_ram_to_opencl(
+		src_complex->imaginary,
+		src_node,
+		(cl_mem) dst_complex->imaginary,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->imaginary[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	return 0;
+}
+
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node,
+			      void *dst_interface, unsigned dst_node)
+{
+	struct starpu_complex_interface *src_complex = src_interface;
+	struct starpu_complex_interface *dst_complex = dst_interface;
+
+	cl_int err;
+	err = starpu_opencl_copy_opencl_to_ram(
+		(cl_mem) src_complex->real,
+		src_node,
+		dst_complex->real,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->real[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = starpu_opencl_copy_opencl_to_ram(
+		(cl_mem) src_complex->imaginary,
+		src_node,
+		dst_complex->imaginary,
+		dst_node,
+		src_complex->nx * sizeof(src_complex->imaginary[0]),
+		0,
+		NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	return 0;
+}
+#endif
 static const struct starpu_data_copy_methods complex_copy_methods =
 {
 #ifdef STARPU_USE_CUDA
 	.ram_to_cuda = copy_ram_to_cuda,
 	.cuda_to_ram = copy_cuda_to_ram,
 #endif
+#ifdef STARPU_USE_OPENCL
+	.ram_to_opencl = copy_ram_to_opencl,
+	.opencl_to_ram = copy_opencl_to_ram,
+#endif
 };
 
 static struct starpu_data_interface_ops interface_complex_ops =

+ 32 - 0
examples/interface/complex_kernels.cl

@@ -0,0 +1,32 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* Use the "double" type */
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void complex_copy_opencl(__global double *o_real,
+				  __global double *o_imaginary,
+				  __global double *i_real,
+				  __global double *i_imaginary,
+				  unsigned nx) 
+{
+        const int i = get_global_id(0);
+        if (i < nx)
+	{
+		o_real[i] = i_real[i];
+		o_imaginary[i] = i_imaginary[i];
+        }
+}

+ 80 - 0
examples/interface/complex_kernels_opencl.c

@@ -0,0 +1,80 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <starpu_opencl.h>
+#include "complex_interface.h"
+
+extern struct starpu_opencl_program opencl_program;
+
+void copy_complex_codelet_opencl(void *buffers[], void *_args)
+{
+	(void) _args;
+
+	int id, devid;
+        cl_int err;
+	cl_kernel kernel;
+	cl_command_queue queue;
+	cl_event event;
+
+	/* length of the vector */
+	unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
+	/* OpenCL copy of the vector pointer */
+	cl_mem *i_real      = (cl_mem *) STARPU_COMPLEX_GET_REAL(buffers[0]);
+	cl_mem *i_imaginary = (cl_mem *) STARPU_COMPLEX_GET_IMAGINARY(buffers[0]);
+	cl_mem *o_real      = (cl_mem *) STARPU_COMPLEX_GET_REAL(buffers[1]);
+	cl_mem *o_imaginary = (cl_mem *) STARPU_COMPLEX_GET_IMAGINARY(buffers[1]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+
+	err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "complex_copy_opencl", devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	err = clSetKernelArg(kernel, 0, sizeof(o_real), &o_real);
+	err|= clSetKernelArg(kernel, 1, sizeof(o_imaginary), &o_imaginary);
+	err|= clSetKernelArg(kernel, 2, sizeof(i_real), &i_real);
+	err|= clSetKernelArg(kernel, 3, sizeof(i_imaginary), &i_imaginary);
+	err|= clSetKernelArg(kernel, 4, sizeof(n), &n);
+	if (err)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+                if (local > global)
+			local=global;
+
+		err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+}