|
@@ -1,6 +1,6 @@
|
|
|
/* StarPU --- Runtime system for heterogeneous multicore architectures.
|
|
|
*
|
|
|
- * Copyright (C) 2009, 2010 Université de Bordeaux 1
|
|
|
+ * Copyright (C) 2009-2011 Université de Bordeaux 1
|
|
|
* Copyright (C) 2010 Mehdi Juhoor <mjuhoor@gmail.com>
|
|
|
* Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
|
|
|
*
|
|
@@ -427,10 +427,12 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_
|
|
|
|
|
|
int synchronous_fallback = 0;
|
|
|
|
|
|
+ STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpyAsync((char *)dst_csr->nzval, (char *)src_csr->nzval, nnz*elemsize, kind, stream);
|
|
|
if (cures)
|
|
|
{
|
|
|
synchronous_fallback = 1;
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpy((char *)dst_csr->nzval, (char *)src_csr->nzval, nnz*elemsize, kind);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
@@ -444,6 +446,7 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_
|
|
|
if (synchronous_fallback || cures != cudaSuccess)
|
|
|
{
|
|
|
synchronous_fallback = 1;
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpy((char *)dst_csr->colind, (char *)src_csr->colind, nnz*sizeof(uint32_t), kind);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
@@ -457,6 +460,7 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_
|
|
|
if (synchronous_fallback || cures != cudaSuccess)
|
|
|
{
|
|
|
synchronous_fallback = 1;
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpy((char *)dst_csr->rowptr, (char *)src_csr->rowptr, (nrow+1)*sizeof(uint32_t), kind);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
@@ -468,6 +472,7 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_
|
|
|
return 0;
|
|
|
}
|
|
|
else {
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
}
|
|
@@ -526,10 +531,12 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
|
|
|
|
|
|
int synchronous_fallback = 0;
|
|
|
|
|
|
+ STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpyPeerAsync((char *)dst_csr->nzval, dst_dev, (char *)src_csr->nzval, src_dev, nnz*elemsize, stream);
|
|
|
if (cures)
|
|
|
{
|
|
|
synchronous_fallback = 1;
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpyPeer((char *)dst_csr->nzval, dst_dev, (char *)src_csr->nzval, src_dev, nnz*elemsize);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
@@ -543,6 +550,7 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
|
|
|
if (synchronous_fallback || cures != cudaSuccess)
|
|
|
{
|
|
|
synchronous_fallback = 1;
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpyPeer((char *)dst_csr->colind, dst_dev, (char *)src_csr->colind, src_dev, nnz*sizeof(uint32_t));
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
@@ -556,6 +564,7 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
|
|
|
if (synchronous_fallback || cures != cudaSuccess)
|
|
|
{
|
|
|
synchronous_fallback = 1;
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
cures = cudaMemcpyPeer((char *)dst_csr->rowptr, dst_dev, (char *)src_csr->rowptr, src_dev, (nrow+1)*sizeof(uint32_t));
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
@@ -567,6 +576,7 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
|
|
|
return 0;
|
|
|
}
|
|
|
else {
|
|
|
+ STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
#else
|
|
@@ -626,15 +636,15 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTR
|
|
|
|
|
|
int err;
|
|
|
|
|
|
- err = _starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->nzval, (void *)dst_csr->nzval, nnz*elemsize, 0, NULL);
|
|
|
+ err = _starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->nzval, src_node, (void *)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
|
- err = _starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->colind, (void *)dst_csr->colind, nnz*sizeof(uint32_t), 0, NULL);
|
|
|
+ err = _starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->colind, src_node, (void *)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
|
- err = _starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->rowptr, (void *)dst_csr->rowptr, (nrow+1)*sizeof(uint32_t), 0, NULL);
|
|
|
+ err = _starpu_opencl_copy_opencl_to_ram((cl_mem)src_csr->rowptr, src_node, (void *)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
@@ -654,15 +664,15 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTR
|
|
|
|
|
|
int err;
|
|
|
|
|
|
- err = _starpu_opencl_copy_ram_to_opencl((void *)src_csr->nzval, (cl_mem)dst_csr->nzval, nnz*elemsize, 0, NULL);
|
|
|
+ err = _starpu_opencl_copy_ram_to_opencl((void *)src_csr->nzval, src_node, (cl_mem)dst_csr->nzval, dst_node, nnz*elemsize, 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
|
- err = _starpu_opencl_copy_ram_to_opencl((void *)src_csr->colind, (cl_mem)dst_csr->colind, nnz*sizeof(uint32_t), 0, NULL);
|
|
|
+ err = _starpu_opencl_copy_ram_to_opencl((void *)src_csr->colind, src_node, (cl_mem)dst_csr->colind, dst_node, nnz*sizeof(uint32_t), 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|
|
|
- err = _starpu_opencl_copy_ram_to_opencl((void *)src_csr->rowptr, (cl_mem)dst_csr->rowptr, (nrow+1)*sizeof(uint32_t), 0, NULL);
|
|
|
+ err = _starpu_opencl_copy_ram_to_opencl((void *)src_csr->rowptr, src_node, (cl_mem)dst_csr->rowptr, dst_node, (nrow+1)*sizeof(uint32_t), 0, NULL);
|
|
|
if (STARPU_UNLIKELY(err))
|
|
|
STARPU_OPENCL_REPORT_ERROR(err);
|
|
|
|