|
@@ -25,20 +25,20 @@
|
|
|
#include <starpu_opencl.h>
|
|
#include <starpu_opencl.h>
|
|
|
#include <drivers/opencl/driver_opencl.h>
|
|
#include <drivers/opencl/driver_opencl.h>
|
|
|
|
|
|
|
|
-static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
|
|
|
|
|
|
|
+static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
#ifdef STARPU_USE_CUDA
|
|
#ifdef STARPU_USE_CUDA
|
|
|
-static int copy_ram_to_cuda(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
|
|
|
|
|
-static int copy_cuda_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
|
|
|
|
|
-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
|
|
|
|
|
-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), cudaStream_t stream);
|
|
|
|
|
-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), cudaStream_t stream);
|
|
|
|
|
-//static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), cudaStream_t stream);
|
|
|
|
|
|
|
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
|
|
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
|
|
+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
|
|
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream);
|
|
|
|
|
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream);
|
|
|
|
|
+//static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream);
|
|
|
#endif
|
|
#endif
|
|
|
#ifdef STARPU_USE_OPENCL
|
|
#ifdef STARPU_USE_OPENCL
|
|
|
-static int copy_ram_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
|
|
|
|
|
-static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)));
|
|
|
|
|
-static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event);
|
|
|
|
|
-static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event);
|
|
|
|
|
|
|
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
|
|
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
|
|
|
|
|
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
|
|
|
|
|
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
static const struct starpu_data_copy_methods matrix_copy_data_methods_s = {
|
|
static const struct starpu_data_copy_methods matrix_copy_data_methods_s = {
|
|
@@ -365,7 +365,7 @@ static void free_matrix_buffer_on_node(void *data_interface, uint32_t node)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#ifdef STARPU_USE_CUDA
|
|
#ifdef STARPU_USE_CUDA
|
|
|
-static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), enum cudaMemcpyKind kind, int is_async, cudaStream_t stream)
|
|
|
|
|
|
|
+static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind, int is_async, cudaStream_t stream)
|
|
|
{
|
|
{
|
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
@@ -417,7 +417,7 @@ static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__
|
|
|
|
|
|
|
|
/* XXX this is broken : we need to find a way to fix that ! */
|
|
/* XXX this is broken : we need to find a way to fix that ! */
|
|
|
#if 0
|
|
#if 0
|
|
|
-static int copy_cuda_peer(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), int is_async, cudaStream_t stream)
|
|
|
|
|
|
|
+static int copy_cuda_peer(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, int is_async, cudaStream_t stream)
|
|
|
{
|
|
{
|
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
@@ -494,17 +494,17 @@ static int copy_cuda_peer(void *src_interface, unsigned src_node __attribute__((
|
|
|
}
|
|
}
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
-static int copy_cuda_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
|
|
|
|
|
|
|
+static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
{
|
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost, 0, 0);
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost, 0, 0);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_ram_to_cuda(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
|
|
|
|
|
|
|
+static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
{
|
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice, 0, 0);
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice, 0, 0);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_cuda_to_cuda(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
|
|
|
|
|
|
|
+static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
{
|
|
|
if (src_node == dst_node)
|
|
if (src_node == dst_node)
|
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice, 0, 0);
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice, 0, 0);
|
|
@@ -516,18 +516,18 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node __attribute_
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), cudaStream_t stream)
|
|
|
|
|
|
|
+static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream)
|
|
|
{
|
|
{
|
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost, 1, stream);
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost, 1, stream);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), cudaStream_t stream)
|
|
|
|
|
|
|
+static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream)
|
|
|
{
|
|
{
|
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice, 1, stream);
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice, 1, stream);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
#if 0
|
|
|
-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), cudaStream_t stream)
|
|
|
|
|
|
|
+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream)
|
|
|
{
|
|
{
|
|
|
if (src_node == dst_node)
|
|
if (src_node == dst_node)
|
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice, 1, stream);
|
|
return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice, 1, stream);
|
|
@@ -538,7 +538,7 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node __attr
|
|
|
#endif // STARPU_USE_CUDA
|
|
#endif // STARPU_USE_CUDA
|
|
|
|
|
|
|
|
#ifdef STARPU_USE_OPENCL
|
|
#ifdef STARPU_USE_OPENCL
|
|
|
-static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event)
|
|
|
|
|
|
|
+static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event)
|
|
|
{
|
|
{
|
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
@@ -558,7 +558,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node __att
|
|
|
return ret;
|
|
return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)), void *_event)
|
|
|
|
|
|
|
+static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event)
|
|
|
{
|
|
{
|
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
@@ -579,12 +579,12 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node __att
|
|
|
return ret;
|
|
return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_ram_to_opencl(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
|
|
|
|
|
|
|
+static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
{
|
|
|
return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
|
|
return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
|
|
|
|
|
|
|
+static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
{
|
|
|
return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
|
|
return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
|
|
|
}
|
|
}
|
|
@@ -592,7 +592,7 @@ static int copy_opencl_to_ram(void *src_interface, unsigned src_node __attribute
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
/* as not all platform easily have a lib installed ... */
|
|
/* as not all platform easily have a lib installed ... */
|
|
|
-static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__((unused)), void *dst_interface, unsigned dst_node __attribute__((unused)))
|
|
|
|
|
|
|
+static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
|
|
|
{
|
|
{
|
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
starpu_matrix_interface_t *src_matrix = src_interface;
|
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|
|
starpu_matrix_interface_t *dst_matrix = dst_interface;
|