Browse Source

Add starpu_opencl_copy_async_sync to factorize interface code yet more

Samuel Thibault 12 years ago
parent
commit
6223c3c46c

+ 2 - 0
include/starpu_opencl.h

@@ -108,6 +108,8 @@ cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *
 
 cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, cl_mem dst, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
 
+cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size, size_t offset, cl_event *event);
+
 #ifdef __cplusplus
 }
 #endif

+ 18 - 31
src/datawizard/interfaces/bcsr_interface.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -40,6 +40,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 #ifdef STARPU_USE_OPENCL
 static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
 #endif
 
 static struct starpu_data_copy_methods bcsr_copy_data_methods_s =
@@ -53,6 +54,7 @@ static struct starpu_data_copy_methods bcsr_copy_data_methods_s =
 #ifdef STARPU_USE_OPENCL
 	.ram_to_opencl = copy_ram_to_opencl,
 	.opencl_to_ram = copy_opencl_to_ram,
+	.opencl_to_opencl = copy_opencl_to_opencl,
 #endif
 };
 
@@ -362,7 +364,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
+static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
 {
 	struct starpu_bcsr_interface *src_bcsr = src_interface;
 	struct starpu_bcsr_interface *dst_bcsr = dst_interface;
@@ -376,15 +378,15 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->nzval, src_node, (void *)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL, NULL);
+	err = starpu_opencl_copy_async_sync(src_bcsr->nzval, src_node, dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->colind, src_node, (void *)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->colind, src_node, (uintptr_t)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->rowptr, src_node, (void *)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->rowptr, src_node, (uintptr_t)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
 	if (STARPU_UNLIKELY(err))
 		STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -393,36 +395,21 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 	return 0;
 }
 
-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	struct starpu_bcsr_interface *src_bcsr = src_interface;
-	struct starpu_bcsr_interface *dst_bcsr = dst_interface;
-
-	uint32_t nnz = src_bcsr->nnz;
-	uint32_t nrow = src_bcsr->nrow;
-	size_t elemsize = src_bcsr->elemsize;
-
-	uint32_t r = src_bcsr->r;
-	uint32_t c = src_bcsr->c;
-
-        int err;
-
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->nzval, src_node, (cl_mem)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL, NULL);
-	if (STARPU_UNLIKELY(err))
-		STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->colind, src_node, (cl_mem)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
-	if (STARPU_UNLIKELY(err))
-		STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->rowptr, src_node, (cl_mem)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
-	if (STARPU_UNLIKELY(err))
-		STARPU_OPENCL_REPORT_ERROR(err);
+	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
+}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*r*c*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
+}
 
-	return 0;
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
 }
+
 #endif // STARPU_USE_OPENCL
 
 /* as not all platform easily have a BLAS lib installed ... */

+ 17 - 27
src/datawizard/interfaces/csr_interface.c

@@ -40,6 +40,7 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void
 #ifdef STARPU_USE_OPENCL
 static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 
 static struct starpu_data_copy_methods csr_copy_data_methods_s =
@@ -61,6 +62,7 @@ static struct starpu_data_copy_methods csr_copy_data_methods_s =
 #ifdef STARPU_USE_OPENCL
 	.ram_to_opencl = copy_ram_to_opencl,
 	.opencl_to_ram = copy_opencl_to_ram,
+	.opencl_to_opencl = copy_opencl_to_opencl,
 #endif
 };
 
@@ -465,7 +467,7 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void
 #endif // STARPU_USE_CUDA
 
 #ifdef STARPU_USE_OPENCL
-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
+static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
 {
 	struct starpu_csr_interface *src_csr = src_interface;
 	struct starpu_csr_interface *dst_csr = dst_interface;
@@ -476,15 +478,15 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 
         int err;
 
-        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->nzval, src_node, (void *)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL, NULL);
+	err = starpu_opencl_copy_async_sync(src_csr->nzval, src_node, dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->colind, src_node, (void *)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->colind, src_node, (uintptr_t)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-        err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->rowptr, src_node, (void *)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
+	err = starpu_opencl_copy_async_sync((uintptr_t)src_csr->rowptr, src_node, (uintptr_t)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
 	if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
@@ -493,33 +495,21 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
 	return 0;
 }
 
-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
 {
-	struct starpu_csr_interface *src_csr = src_interface;
-	struct starpu_csr_interface *dst_csr = dst_interface;
-
-	uint32_t nnz = src_csr->nnz;
-	uint32_t nrow = src_csr->nrow;
-	size_t elemsize = src_csr->elemsize;
-
-        int err;
-
-        err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->nzval, src_node, (cl_mem)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL, NULL);
-	if (STARPU_UNLIKELY(err))
-                STARPU_OPENCL_REPORT_ERROR(err);
-
-	err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->colind, src_node, (cl_mem)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
-        if (STARPU_UNLIKELY(err))
-                STARPU_OPENCL_REPORT_ERROR(err);
-
-        err = starpu_opencl_copy_ram_to_opencl((void *)src_csr->rowptr, src_node, (cl_mem)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
-	if (STARPU_UNLIKELY(err))
-                STARPU_OPENCL_REPORT_ERROR(err);
+	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
+}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
+}
 
-	return 0;
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
 }
+
 #endif // STARPU_USE_OPENCL
 
 /* as not all platform easily have a BLAS lib installed ... */

+ 44 - 0
src/drivers/opencl/driver_opencl.c

@@ -370,6 +370,50 @@ cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_
 	return err;
 }
 
+#ifdef STARPU_USE_OPENCL
+cl_int starpu_opencl_copy_async_sync(uintptr_t src, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size, size_t offset, cl_event *event)
+{
+	enum starpu_node_kind src_kind = starpu_node_get_kind(src_node);
+	enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node);
+	cl_int err;
+	int ret;
+
+	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind))
+	{
+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
+		err = starpu_opencl_copy_opencl_to_ram(
+				(cl_mem) src, src_node,
+				(void*) dst, dst_node,
+				size, offset, event, &ret);
+		if (STARPU_UNLIKELY(err))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		return ret;
+
+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
+		err = starpu_opencl_copy_ram_to_opencl(
+				(void*) src, src_node,
+				(cl_mem) dst, dst_node,
+				size, offset, event, &ret);
+		if (STARPU_UNLIKELY(err))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		return ret;
+
+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
+		err = starpu_opencl_copy_opencl_to_opencl(
+				(cl_mem) src, src_node,
+				(cl_mem) dst, dst_node,
+				size, offset, event, &ret);
+		if (STARPU_UNLIKELY(err))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		return ret;
+
+	default:
+		STARPU_ABORT();
+		break;
+	}
+}
+#endif
+
 #if 0
 cl_int _starpu_opencl_copy_rect_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3],
                                               const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch,