|
@@ -35,7 +35,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
|
|
|
static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
|
|
|
#endif
|
|
|
|
|
|
-static const struct starpu_copy_data_methods_s blas_copy_data_methods_s = {
|
|
|
+static const struct starpu_copy_data_methods_s matrix_copy_data_methods_s = {
|
|
|
.ram_to_ram = dummy_copy_ram_to_ram,
|
|
|
.ram_to_spu = NULL,
|
|
|
#ifdef STARPU_USE_CUDA
|
|
@@ -51,40 +51,40 @@ static const struct starpu_copy_data_methods_s blas_copy_data_methods_s = {
|
|
|
.spu_to_spu = NULL
|
|
|
};
|
|
|
|
|
|
-static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
|
|
|
-static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
|
|
|
-static void liberate_blas_buffer_on_node(void *interface, uint32_t node);
|
|
|
-static size_t blas_interface_get_size(starpu_data_handle handle);
|
|
|
-static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle);
|
|
|
-static void display_blas_interface(starpu_data_handle handle, FILE *f);
|
|
|
+static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
|
|
|
+static size_t allocate_matrix_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
|
|
|
+static void liberate_matrix_buffer_on_node(void *interface, uint32_t node);
|
|
|
+static size_t matrix_interface_get_size(starpu_data_handle handle);
|
|
|
+static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle);
|
|
|
+static void display_matrix_interface(starpu_data_handle handle, FILE *f);
|
|
|
#ifdef STARPU_USE_GORDON
|
|
|
-static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss);
|
|
|
+static int convert_matrix_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss);
|
|
|
#endif
|
|
|
|
|
|
-struct starpu_data_interface_ops_t _starpu_interface_blas_ops = {
|
|
|
- .register_data_handle = register_blas_handle,
|
|
|
- .allocate_data_on_node = allocate_blas_buffer_on_node,
|
|
|
- .liberate_data_on_node = liberate_blas_buffer_on_node,
|
|
|
- .copy_methods = &blas_copy_data_methods_s,
|
|
|
- .get_size = blas_interface_get_size,
|
|
|
- .footprint = footprint_blas_interface_crc32,
|
|
|
+struct starpu_data_interface_ops_t _starpu_interface_matrix_ops = {
|
|
|
+ .register_data_handle = register_matrix_handle,
|
|
|
+ .allocate_data_on_node = allocate_matrix_buffer_on_node,
|
|
|
+ .liberate_data_on_node = liberate_matrix_buffer_on_node,
|
|
|
+ .copy_methods = &matrix_copy_data_methods_s,
|
|
|
+ .get_size = matrix_interface_get_size,
|
|
|
+ .footprint = footprint_matrix_interface_crc32,
|
|
|
#ifdef STARPU_USE_GORDON
|
|
|
- .convert_to_gordon = convert_blas_to_gordon,
|
|
|
+ .convert_to_gordon = convert_matrix_to_gordon,
|
|
|
#endif
|
|
|
.interfaceid = STARPU_BLAS_INTERFACE_ID,
|
|
|
- .interface_size = sizeof(starpu_blas_interface_t),
|
|
|
- .display = display_blas_interface
|
|
|
+ .interface_size = sizeof(starpu_matrix_interface_t),
|
|
|
+ .display = display_matrix_interface
|
|
|
};
|
|
|
|
|
|
#ifdef STARPU_USE_GORDON
|
|
|
-static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss)
|
|
|
+static int convert_matrix_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss)
|
|
|
{
|
|
|
size_t elemsize = GET_BLAS_ELEMSIZE(interface);
|
|
|
- uint32_t nx = STARPU_GET_BLAS_NX(interface);
|
|
|
- uint32_t ny = STARPU_GET_BLAS_NY(interface);
|
|
|
- uint32_t ld = STARPU_GET_BLAS_LD(interface);
|
|
|
+ uint32_t nx = STARPU_GET_MATRIX_NX(interface);
|
|
|
+ uint32_t ny = STARPU_GET_MATRIX_NY(interface);
|
|
|
+ uint32_t ld = STARPU_GET_MATRIX_LD(interface);
|
|
|
|
|
|
- *ptr = STARPU_GET_BLAS_PTR(interface);
|
|
|
+ *ptr = STARPU_GET_MATRIX_PTR(interface);
|
|
|
|
|
|
|
|
|
* in case nx = ld (in that case, (*ss).size = elemsize*nx*ny */
|
|
@@ -94,37 +94,37 @@ static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideS
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
|
|
|
+static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
|
|
|
{
|
|
|
- starpu_blas_interface_t *blas_interface = interface;
|
|
|
+ starpu_matrix_interface_t *matrix_interface = interface;
|
|
|
|
|
|
unsigned node;
|
|
|
for (node = 0; node < STARPU_MAXNODES; node++)
|
|
|
{
|
|
|
- starpu_blas_interface_t *local_interface =
|
|
|
+ starpu_matrix_interface_t *local_interface =
|
|
|
starpu_data_get_interface_on_node(handle, node);
|
|
|
|
|
|
if (node == home_node) {
|
|
|
- local_interface->ptr = blas_interface->ptr;
|
|
|
- local_interface->ld = blas_interface->ld;
|
|
|
+ local_interface->ptr = matrix_interface->ptr;
|
|
|
+ local_interface->ld = matrix_interface->ld;
|
|
|
}
|
|
|
else {
|
|
|
local_interface->ptr = 0;
|
|
|
local_interface->ld = 0;
|
|
|
}
|
|
|
|
|
|
- local_interface->nx = blas_interface->nx;
|
|
|
- local_interface->ny = blas_interface->ny;
|
|
|
- local_interface->elemsize = blas_interface->elemsize;
|
|
|
+ local_interface->nx = matrix_interface->nx;
|
|
|
+ local_interface->ny = matrix_interface->ny;
|
|
|
+ local_interface->elemsize = matrix_interface->elemsize;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
-void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node,
|
|
|
+void starpu_register_matrix_data(starpu_data_handle *handleptr, uint32_t home_node,
|
|
|
uintptr_t ptr, uint32_t ld, uint32_t nx,
|
|
|
uint32_t ny, size_t elemsize)
|
|
|
{
|
|
|
- starpu_blas_interface_t interface = {
|
|
|
+ starpu_matrix_interface_t interface = {
|
|
|
.ptr = ptr,
|
|
|
.ld = ld,
|
|
|
.nx = nx,
|
|
@@ -132,25 +132,25 @@ void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node
|
|
|
.elemsize = elemsize
|
|
|
};
|
|
|
|
|
|
- _starpu_register_data_handle(handleptr, home_node, &interface, &_starpu_interface_blas_ops);
|
|
|
+ _starpu_register_data_handle(handleptr, home_node, &interface, &_starpu_interface_matrix_ops);
|
|
|
}
|
|
|
|
|
|
-static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle)
|
|
|
+static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle)
|
|
|
{
|
|
|
- return _starpu_crc32_be(starpu_get_blas_nx(handle), starpu_get_blas_ny(handle));
|
|
|
+ return _starpu_crc32_be(starpu_get_matrix_nx(handle), starpu_get_matrix_ny(handle));
|
|
|
}
|
|
|
|
|
|
-static void display_blas_interface(starpu_data_handle handle, FILE *f)
|
|
|
+static void display_matrix_interface(starpu_data_handle handle, FILE *f)
|
|
|
{
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, 0);
|
|
|
|
|
|
fprintf(f, "%u\t%u\t", interface->nx, interface->ny);
|
|
|
}
|
|
|
|
|
|
-static size_t blas_interface_get_size(starpu_data_handle handle)
|
|
|
+static size_t matrix_interface_get_size(starpu_data_handle handle)
|
|
|
{
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, 0);
|
|
|
|
|
|
size_t size;
|
|
@@ -160,51 +160,51 @@ static size_t blas_interface_get_size(starpu_data_handle handle)
|
|
|
}
|
|
|
|
|
|
|
|
|
-uint32_t starpu_get_blas_nx(starpu_data_handle handle)
|
|
|
+uint32_t starpu_get_matrix_nx(starpu_data_handle handle)
|
|
|
{
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, 0);
|
|
|
|
|
|
return interface->nx;
|
|
|
}
|
|
|
|
|
|
-uint32_t starpu_get_blas_ny(starpu_data_handle handle)
|
|
|
+uint32_t starpu_get_matrix_ny(starpu_data_handle handle)
|
|
|
{
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, 0);
|
|
|
|
|
|
return interface->ny;
|
|
|
}
|
|
|
|
|
|
-uint32_t starpu_get_blas_local_ld(starpu_data_handle handle)
|
|
|
+uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle)
|
|
|
{
|
|
|
unsigned node;
|
|
|
node = _starpu_get_local_memory_node();
|
|
|
|
|
|
STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
|
|
|
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, node);
|
|
|
|
|
|
return interface->ld;
|
|
|
}
|
|
|
|
|
|
-uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle)
|
|
|
+uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle)
|
|
|
{
|
|
|
unsigned node;
|
|
|
node = _starpu_get_local_memory_node();
|
|
|
|
|
|
STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
|
|
|
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, node);
|
|
|
|
|
|
return interface->ptr;
|
|
|
}
|
|
|
|
|
|
-size_t starpu_get_blas_elemsize(starpu_data_handle handle)
|
|
|
+size_t starpu_get_matrix_elemsize(starpu_data_handle handle)
|
|
|
{
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, 0);
|
|
|
|
|
|
return interface->elemsize;
|
|
@@ -213,7 +213,7 @@ size_t starpu_get_blas_elemsize(starpu_data_handle handle)
|
|
|
|
|
|
|
|
|
|
|
|
-static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
|
|
|
+static size_t allocate_matrix_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
|
|
|
{
|
|
|
uintptr_t addr = 0;
|
|
|
unsigned fail = 0;
|
|
@@ -224,7 +224,7 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
|
|
|
size_t pitch;
|
|
|
#endif
|
|
|
|
|
|
- starpu_blas_interface_t *interface =
|
|
|
+ starpu_matrix_interface_t *interface =
|
|
|
starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
|
|
|
uint32_t nx = interface->nx;
|
|
@@ -275,9 +275,9 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
|
|
|
return allocated_memory;
|
|
|
}
|
|
|
|
|
|
-static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
|
|
|
+static void liberate_matrix_buffer_on_node(void *interface, uint32_t node)
|
|
|
{
|
|
|
- starpu_blas_interface_t *blas_interface = interface;
|
|
|
+ starpu_matrix_interface_t *matrix_interface = interface;
|
|
|
|
|
|
#ifdef STARPU_USE_CUDA
|
|
|
cudaError_t status;
|
|
@@ -286,11 +286,11 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
|
|
|
starpu_node_kind kind = _starpu_get_node_kind(node);
|
|
|
switch(kind) {
|
|
|
case STARPU_RAM:
|
|
|
- free((void*)blas_interface->ptr);
|
|
|
+ free((void*)matrix_interface->ptr);
|
|
|
break;
|
|
|
#ifdef STARPU_USE_CUDA
|
|
|
case STARPU_CUDA_RAM:
|
|
|
- status = cudaFree((void*)blas_interface->ptr);
|
|
|
+ status = cudaFree((void*)matrix_interface->ptr);
|
|
|
if (STARPU_UNLIKELY(status))
|
|
|
STARPU_CUDA_REPORT_ERROR(status);
|
|
|
|
|
@@ -304,40 +304,40 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
|
|
|
#ifdef STARPU_USE_CUDA
|
|
|
static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
|
|
|
{
|
|
|
- starpu_blas_interface_t *src_blas;
|
|
|
- starpu_blas_interface_t *dst_blas;
|
|
|
+ starpu_matrix_interface_t *src_matrix;
|
|
|
+ starpu_matrix_interface_t *dst_matrix;
|
|
|
|
|
|
- src_blas = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
- dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
+ src_matrix = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
+ dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
|
|
|
- size_t elemsize = src_blas->elemsize;
|
|
|
+ size_t elemsize = src_matrix->elemsize;
|
|
|
|
|
|
cudaError_t cures;
|
|
|
- cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
|
|
|
- (char *)src_blas->ptr, src_blas->ld*elemsize,
|
|
|
- src_blas->nx*elemsize, src_blas->ny, cudaMemcpyDeviceToHost);
|
|
|
+ cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
|
|
|
+ (char *)src_matrix->ptr, src_matrix->ld*elemsize,
|
|
|
+ src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyDeviceToHost);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
|
|
|
+ STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
|
|
|
{
|
|
|
- starpu_blas_interface_t *src_blas;
|
|
|
- starpu_blas_interface_t *dst_blas;
|
|
|
+ starpu_matrix_interface_t *src_matrix;
|
|
|
+ starpu_matrix_interface_t *dst_matrix;
|
|
|
|
|
|
- src_blas = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
- dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
+ src_matrix = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
+ dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
|
|
|
- size_t elemsize = src_blas->elemsize;
|
|
|
+ size_t elemsize = src_matrix->elemsize;
|
|
|
|
|
|
cudaError_t cures;
|
|
|
- cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
|
|
|
- (char *)src_blas->ptr, src_blas->ld*elemsize,
|
|
|
- src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
|
|
|
+ cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
|
|
|
+ (char *)src_matrix->ptr, src_matrix->ld*elemsize,
|
|
|
+ src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyHostToDevice);
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
@@ -345,31 +345,31 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
|
STARPU_CUDA_REPORT_ERROR(cures);
|
|
|
|
|
|
- STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
|
|
|
+ STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
|
|
|
{
|
|
|
- starpu_blas_interface_t *src_blas;
|
|
|
- starpu_blas_interface_t *dst_blas;
|
|
|
+ starpu_matrix_interface_t *src_matrix;
|
|
|
+ starpu_matrix_interface_t *dst_matrix;
|
|
|
|
|
|
- src_blas = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
- dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
+ src_matrix = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
+ dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
|
|
|
- size_t elemsize = src_blas->elemsize;
|
|
|
+ size_t elemsize = src_matrix->elemsize;
|
|
|
|
|
|
cudaError_t cures;
|
|
|
- cures = cudaMemcpy2DAsync((char *)dst_blas->ptr, dst_blas->ld*elemsize,
|
|
|
- (char *)src_blas->ptr, (size_t)src_blas->ld*elemsize,
|
|
|
- (size_t)src_blas->nx*elemsize, src_blas->ny,
|
|
|
+ cures = cudaMemcpy2DAsync((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
|
|
|
+ (char *)src_matrix->ptr, (size_t)src_matrix->ld*elemsize,
|
|
|
+ (size_t)src_matrix->nx*elemsize, src_matrix->ny,
|
|
|
cudaMemcpyDeviceToHost, *stream);
|
|
|
if (cures)
|
|
|
{
|
|
|
- cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
|
|
|
- (char *)src_blas->ptr, (size_t)src_blas->ld*elemsize,
|
|
|
- (size_t)src_blas->nx*elemsize, (size_t)src_blas->ny,
|
|
|
+ cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
|
|
|
+ (char *)src_matrix->ptr, (size_t)src_matrix->ld*elemsize,
|
|
|
+ (size_t)src_matrix->nx*elemsize, (size_t)src_matrix->ny,
|
|
|
cudaMemcpyDeviceToHost);
|
|
|
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
@@ -383,31 +383,31 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
|
|
|
+ STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
|
|
|
|
|
|
return EAGAIN;
|
|
|
}
|
|
|
|
|
|
static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
|
|
|
{
|
|
|
- starpu_blas_interface_t *src_blas;
|
|
|
- starpu_blas_interface_t *dst_blas;
|
|
|
+ starpu_matrix_interface_t *src_matrix;
|
|
|
+ starpu_matrix_interface_t *dst_matrix;
|
|
|
|
|
|
- src_blas = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
- dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
+ src_matrix = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
+ dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
|
|
|
- size_t elemsize = src_blas->elemsize;
|
|
|
+ size_t elemsize = src_matrix->elemsize;
|
|
|
|
|
|
cudaError_t cures;
|
|
|
- cures = cudaMemcpy2DAsync((char *)dst_blas->ptr, dst_blas->ld*elemsize,
|
|
|
- (char *)src_blas->ptr, src_blas->ld*elemsize,
|
|
|
- src_blas->nx*elemsize, src_blas->ny,
|
|
|
+ cures = cudaMemcpy2DAsync((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
|
|
|
+ (char *)src_matrix->ptr, src_matrix->ld*elemsize,
|
|
|
+ src_matrix->nx*elemsize, src_matrix->ny,
|
|
|
cudaMemcpyHostToDevice, *stream);
|
|
|
if (cures)
|
|
|
{
|
|
|
- cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
|
|
|
- (char *)src_blas->ptr, src_blas->ld*elemsize,
|
|
|
- src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
|
|
|
+ cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
|
|
|
+ (char *)src_matrix->ptr, src_matrix->ld*elemsize,
|
|
|
+ src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyHostToDevice);
|
|
|
cudaThreadSynchronize();
|
|
|
|
|
|
if (STARPU_UNLIKELY(cures))
|
|
@@ -416,7 +416,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
- STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
|
|
|
+ STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
|
|
|
|
|
|
return EAGAIN;
|
|
|
}
|
|
@@ -426,22 +426,22 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
|
|
|
|
|
|
static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
|
|
|
{
|
|
|
- starpu_blas_interface_t *src_blas;
|
|
|
- starpu_blas_interface_t *dst_blas;
|
|
|
+ starpu_matrix_interface_t *src_matrix;
|
|
|
+ starpu_matrix_interface_t *dst_matrix;
|
|
|
|
|
|
- src_blas = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
- dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
+ src_matrix = starpu_data_get_interface_on_node(handle, src_node);
|
|
|
+ dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
|
|
|
|
|
|
unsigned y;
|
|
|
- uint32_t nx = dst_blas->nx;
|
|
|
- uint32_t ny = dst_blas->ny;
|
|
|
- size_t elemsize = dst_blas->elemsize;
|
|
|
+ uint32_t nx = dst_matrix->nx;
|
|
|
+ uint32_t ny = dst_matrix->ny;
|
|
|
+ size_t elemsize = dst_matrix->elemsize;
|
|
|
|
|
|
- uint32_t ld_src = src_blas->ld;
|
|
|
- uint32_t ld_dst = dst_blas->ld;
|
|
|
+ uint32_t ld_src = src_matrix->ld;
|
|
|
+ uint32_t ld_dst = dst_matrix->ld;
|
|
|
|
|
|
- uintptr_t ptr_src = src_blas->ptr;
|
|
|
- uintptr_t ptr_dst = dst_blas->ptr;
|
|
|
+ uintptr_t ptr_src = src_matrix->ptr;
|
|
|
+ uintptr_t ptr_dst = dst_matrix->ptr;
|
|
|
|
|
|
|
|
|
for (y = 0; y < ny; y++)
|