Sfoglia il codice sorgente

Fixing the multiformat example.

An array of structs is used on CPUs.
A struct of arrays is used on GPUs.
Cyril Roelandt 13 anni fa
parent
commit
104b94813b

+ 18 - 18
examples/basic_examples/multiformat.c

@@ -19,20 +19,20 @@
 #endif
 #include "multiformat_types.h"
 
-static struct struct_of_arrays global_struct_of_arrays;
-static  starpu_data_handle global_struct_of_arrays_handle;
+static struct point array_of_structs[N_ELEMENTS];
+static starpu_data_handle array_of_structs_handle;
 
 static void
 multiformat_scal_cpu_func(void *buffers[], void *args)
 {
-	struct struct_of_arrays *s;
+	struct point *aos;
 	unsigned int n, i;
 
-	s = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	aos = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 
 	for (i = 0; i < n; i++) {
-		s->x[i] *= s->y[i];
+		aos[i].x *= aos[i].y;
 	}
 }
 
@@ -48,16 +48,16 @@ extern starpu_codelet opencl_to_cpu_cl;
 
 static struct starpu_multiformat_data_interface_ops format_ops = {
 #ifdef STARPU_USE_CUDA
-	.cuda_elemsize = sizeof(struct point),
+	.cuda_elemsize = 2* sizeof(float),
 	.cpu_to_cuda_cl = &cpu_to_cuda_cl,
 	.cuda_to_cpu_cl = &cuda_to_cpu_cl,
 #endif
 #ifdef STARPU_USE_OPENCL
-	.opencl_elemsize = sizeof(struct point),
+	.opencl_elemsize = 2 * sizeof(float),
 	.cpu_to_opencl_cl = &cpu_to_opencl_cl,
 	.opencl_to_cpu_cl = &opencl_to_cpu_cl,
 #endif
-	.cpu_elemsize = sizeof(global_struct_of_arrays),
+	.cpu_elemsize = sizeof(struct point),
 
 };
 
@@ -74,7 +74,7 @@ static struct starpu_perfmodel_t conversion_model = {
 };
 
 static starpu_codelet  cl = {
-	.where = STARPU_CUDA | STARPU_OPENCL,
+	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
 	.cpu_func = multiformat_scal_cpu_func,
 #ifdef STARPU_USE_CUDA
 	.cuda_func = multiformat_scal_cuda_func,
@@ -94,17 +94,17 @@ init_problem_data(void)
 {
 	int i; 
 	for (i = 0; i < N_ELEMENTS; i++) {
-		global_struct_of_arrays.x[i] = 1.0f + i;
-		global_struct_of_arrays.y[i] = 42.0;
+		array_of_structs[i].x = 1.0 + i;
+		array_of_structs[i].y = 42.0;
 	}
 }
 
 static void
 register_data(void)
 {
-	starpu_multiformat_data_register(&global_struct_of_arrays_handle,
+	starpu_multiformat_data_register(&array_of_structs_handle,
 					 0,
-					 &global_struct_of_arrays,
+					 &array_of_structs,
 					 N_ELEMENTS,
 					 &format_ops);
 }
@@ -116,7 +116,7 @@ create_and_submit_tasks(void)
 
 	task->cl = &cl;
 	task->synchronous = 1;
-	task->buffers[0].handle = global_struct_of_arrays_handle;
+	task->buffers[0].handle = array_of_structs_handle;
 	task->buffers[0].mode = STARPU_RW;
 	task->cl_arg = NULL;
 	task->cl_arg_size = 0;
@@ -125,7 +125,7 @@ create_and_submit_tasks(void)
 	struct starpu_task *task2 = starpu_task_create();
 	task2->cl = &cl;
 	task2->synchronous = 1;
-	task2->buffers[0].handle = global_struct_of_arrays_handle;
+	task2->buffers[0].handle = array_of_structs_handle;
 	task2->buffers[0].mode = STARPU_RW;
 	task2->cl_arg = NULL;
 	task2->cl_arg_size = 0;
@@ -135,7 +135,7 @@ create_and_submit_tasks(void)
 static void
 unregister_data(void)
 {
-	starpu_data_unregister(global_struct_of_arrays_handle);
+	starpu_data_unregister(array_of_structs_handle);
 }
 
 static void
@@ -144,8 +144,8 @@ print_it(void)
 	int i;
 	for (i = 0; i < N_ELEMENTS; i++) {
 		fprintf(stderr, "(%.2f %.2f) ",
-			global_struct_of_arrays.x[i],
-			global_struct_of_arrays.y[i]);
+			array_of_structs[i].x,
+			array_of_structs[i].y);
 	}
 	fprintf(stderr, "\n");
 }

+ 16 - 16
examples/basic_examples/multiformat_conversion_codelets.c

@@ -19,25 +19,25 @@
 #ifdef STARPU_USE_CUDA
 void cpu_to_cuda(void *buffers[], void *arg)
 {
-	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
-	struct point *dst = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+	struct point *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
 	for (i = 0; i < n; i++) {
-		dst[i].x = src->x[i];
-		dst[i].y = src->y[i];
+		dst->x[i] = src[i].x;
+		dst->y[i] = src[i].y;
 	}
 }
 
 void cuda_to_cpu(void *buffers[], void *arg)
 {
-	struct point *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
-	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
 	for (i = 0; i < n; i++) {
-		dst->x[i] = src[i].x;
-		dst->y[i] = src[i].y;
+		dst[i].x = src->x[i];
+		dst[i].y = src->y[i];
 	}
 }
 
@@ -60,26 +60,26 @@ starpu_codelet cuda_to_cpu_cl = {
 void cpu_to_opencl(void *buffers[], void *arg)
 {
 	fprintf(stderr, "User Entering %s\n", __func__);
-	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
-	struct point *dst = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
+	struct point *src = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
 	for (i = 0; i < n; i++) {
-		dst[i].x = src->x[i];
-		dst[i].y = src->y[i];
+		dst->x[i] = src[i].x;
+		dst->y[i] = src[i].y;
 	}
 }
 
 void opencl_to_cpu(void *buffers[], void *arg)
 {
 	fprintf(stderr, "User Entering %s\n", __func__);
-	struct point *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
-	struct struct_of_arrays *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]);
+	struct point *dst = STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 	int i;
 	for (i = 0; i < n; i++) {
-		dst->x[i] = src[i].x;
-		dst->y[i] = src[i].y;
+		dst[i].x = src->x[i];
+		dst[i].y = src->y[i];
 	}
 }
 

+ 12 - 10
examples/basic_examples/multiformat_conversion_codelets_cuda.cu

@@ -17,24 +17,26 @@
 #include <starpu_cuda.h>
 #include "multiformat_types.h"
 
-static __global__ void cpu_to_cuda_cuda(struct struct_of_arrays *src,
-	struct point *dst, unsigned n)
+static __global__ void cpu_to_cuda_cuda(struct point *src,
+	struct struct_of_arrays *dst, unsigned n)
 {
         unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
 
-	if (i >= n)
-		return;
-	dst[i].x = src->x[i];
-	dst[i].y = src->y[i];
+	if (i < n) {
+		dst->x[i] = src[i].x;
+		dst->y[i] = src[i].y;
+	}
 
 }
 
 extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args)
 {
-	struct struct_of_arrays *src;
-	src = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
-	struct point *dst;
-	dst = (struct point *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+	struct point *src;
+	struct struct_of_arrays *dst;
+
+	src = (struct point *) STARPU_MULTIFORMAT_GET_PTR(buffers[0]);
+	dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+
 	int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
 
 	unsigned threads_per_block = 64;

+ 5 - 5
examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl

@@ -14,13 +14,13 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 #include "multiformat_types.h"
-__kernel void cpu_to_opencl_opencl(__global struct struct_of_arrays *src,
-				__global struct point *dst,
-				unsigned int n)
+__kernel void cpu_to_opencl_opencl(__global struct point *src,
+				   __global struct struct_of_arrays *dst,
+				   unsigned int n)
 {
 	const unsigned int i = get_global_id(0);
 	if (i < n) {
-		dst[i].x = src->x[i];
-		dst[i].y = src->y[i];
+		dst->x[i] = src[i].x;
+		dst->y[i] = src[i].y;
 	}
 }

+ 7 - 7
examples/basic_examples/multiformat_cuda.cu

@@ -16,14 +16,12 @@
 #include <starpu.h>
 #include <starpu_cuda.h>
 #include "multiformat_types.h"
-static __global__ void multiformat_cuda(struct point *val, unsigned n)
+static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n)
 {
         unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
 
-	if (i >= n)
-		return;
-	
-	val[i].x *= val[i].y;
+	if (i < n)
+		soa->x[i] *= soa->y[i];
 }
 
 extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args)
@@ -32,10 +30,12 @@ extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args)
 
 	fprintf(stderr, "Running the cuda kernel (%s)\n", __func__);
 	unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]);
-	struct point *val =  (struct point *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
+	struct struct_of_arrays *soa;
+
+	soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]);
 	unsigned threads_per_block = 64;
 	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
-        multiformat_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(val, n);
+        multiformat_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(soa, n);
 
 	cudaStreamSynchronize(starpu_cuda_get_local_stream());
 }

+ 3 - 4
examples/basic_examples/multiformat_opencl_kernel.cl

@@ -15,10 +15,9 @@
  */
 
 #include "multiformat_types.h"
-__kernel void multiformat_opencl(__global struct point* val, int nx)
+__kernel void multiformat_opencl(__global struct struct_of_arrays *soa, int nx)
 {
         const int i = get_global_id(0);
-        if (i < nx) {
-                val[i].x *= val[i].y;
-        }
+        if (i < nx)
+		soa->x[i] *= soa->y[i];
 }