|
@@ -1,6 +1,6 @@
|
|
|
/* StarPU --- Runtime system for heterogeneous multicore architectures.
|
|
|
*
|
|
|
- * Copyright (C) 2009, 2010-2012 Université de Bordeaux 1
|
|
|
+ * Copyright (C) 2009-2013 Université de Bordeaux 1
|
|
|
* Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
|
|
|
*
|
|
|
* StarPU is free software; you can redistribute it and/or modify
|
|
@@ -40,6 +40,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
|
|
|
#ifdef STARPU_USE_OPENCL
|
|
|
static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
#endif
|
|
|
|
|
|
static struct starpu_data_copy_methods bcsr_copy_data_methods_s =
|
|
@@ -53,6 +54,7 @@ static struct starpu_data_copy_methods bcsr_copy_data_methods_s =
|
|
|
#ifdef STARPU_USE_OPENCL
|
|
|
.ram_to_opencl = copy_ram_to_opencl,
|
|
|
.opencl_to_ram = copy_opencl_to_ram,
|
|
|
+ .opencl_to_opencl = copy_opencl_to_opencl,
|
|
|
#endif
|
|
|
};
|
|
|
|
|
@@ -362,7 +364,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
|
|
|
#endif // STARPU_USE_CUDA
|
|
|
|
|
|
#ifdef STARPU_USE_OPENCL
|
|
|
-static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
+static int copy_opencl_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
|
struct starpu_bcsr_interface *src_bcsr = src_interface;
|
|
|
struct starpu_bcsr_interface *dst_bcsr = dst_interface;
|
|
@@ -376,15 +378,15 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
|
|
|
|
|
|
int err;
|
|
|
|
|
|
- err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->nzval, src_node, (void *)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL, NULL);
|
|
|
+ err = starpu_opencl_copy_async_sync(src_bcsr->nzval, src_node, dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
|
- err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->colind, src_node, (void *)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
|
|
|
+ err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->colind, src_node, (uintptr_t)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
|
- err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_bcsr->rowptr, src_node, (void *)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
|
|
|
+ err = starpu_opencl_copy_async_sync((uintptr_t)src_bcsr->rowptr, src_node, (uintptr_t)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
@@ -393,36 +395,21 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
|
|
|
{
|
|
|
- struct starpu_bcsr_interface *src_bcsr = src_interface;
|
|
|
- struct starpu_bcsr_interface *dst_bcsr = dst_interface;
|
|
|
-
|
|
|
- uint32_t nnz = src_bcsr->nnz;
|
|
|
- uint32_t nrow = src_bcsr->nrow;
|
|
|
- size_t elemsize = src_bcsr->elemsize;
|
|
|
-
|
|
|
- uint32_t r = src_bcsr->r;
|
|
|
- uint32_t c = src_bcsr->c;
|
|
|
-
|
|
|
- int err;
|
|
|
-
|
|
|
- err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->nzval, src_node, (cl_mem)dst_bcsr->nzval, dst_node, nnz*r*c*elemsize, 0, NULL, NULL);
|
|
|
- if (STARPU_UNLIKELY(err))
|
|
|
- STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
-
|
|
|
- err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->colind, src_node, (cl_mem)dst_bcsr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL, NULL);
|
|
|
- if (STARPU_UNLIKELY(err))
|
|
|
- STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
-
|
|
|
- err = starpu_opencl_copy_ram_to_opencl((void *)src_bcsr->rowptr, src_node, (cl_mem)dst_bcsr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL, NULL);
|
|
|
- if (STARPU_UNLIKELY(err))
|
|
|
- STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
+ return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
|
|
|
+}
|
|
|
|
|
|
- _STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*r*c*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
|
|
|
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
|
|
|
+{
|
|
|
+ return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
|
|
|
+}
|
|
|
|
|
|
- return 0;
|
|
|
+static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
|
|
|
+{
|
|
|
+ return copy_opencl_common(src_interface, src_node, dst_interface, dst_node);
|
|
|
}
|
|
|
+
|
|
|
#endif // STARPU_USE_OPENCL
|
|
|
|
|
|
/* as not all platform easily have a BLAS lib installed ... */
|