Переглянути джерело

Add a new interface: COO matrix.

Cyril Roelandt 12 роки тому
батько
коміт
81549ea081

+ 1 - 0
ChangeLog

@@ -35,6 +35,7 @@ New features:
 	  unregistered memory buffers.
   * SOCL
         - Manual mapping of commands on specific devices is now possible
+  * New interface: COO matrix.
 
 Changes:
   * Fix the block filter functions.

+ 39 - 0
doc/chapters/basic-api.texi

@@ -686,6 +686,12 @@ Sparse Row Representation) sparse matrix interface.
 TODO
 @end deftypefun
 
+@deftypefun void starpu_coo_data_register(starpu_data_handle_t *@var{handleptr}, uint32_t @var{home_node}, uint32_t @var{nx}, uint32_t @var{ny}, uint32_t @var{n_values}, uint32_t *@var{columns}, uint32_t *@var{rows}, uintptr_t @var{values}, size_t @var{elemsize});
+Register the @var{nx}x@var{ny} 2D matrix given in the COO format, using the
+@var{columns}, @var{rows}, @var{values} arrays, which must have @var{n_values}
+elements of size @var{elemsize}. Initialize @var{handleptr}.
+@end deftypefun
+
 @deftypefun {void *} starpu_data_get_interface_on_node (starpu_data_handle_t @var{handle}, unsigned @var{memory_node})
 Return the interface associated with @var{handle} on @var{memory_node}.
 @end deftypefun
@@ -708,6 +714,7 @@ The different values are:
 @item @code{STARPU_VARIABLE_INTERFACE_ID}
 @item @code{STARPU_VOID_INTERFACE_ID}
 @item @code{STARPU_MULTIFORMAT_INTERFACE_ID}
+@item @code{STARPU_COO_INTERCACE_ID}
 @item @code{STARPU_NINTERFACES_ID}: number of data interfaces
 @end table
 @end deftp
@@ -720,6 +727,7 @@ The different values are:
 * Accessing Block Data Interfaces::
 * Accessing BCSR Data Interfaces::
 * Accessing CSR Data Interfaces::
+* Accessing COO Data Interfaces::
 @end menu
 
 @node Accessing Handle
@@ -1068,6 +1076,37 @@ of the @var{interface} start.
 Return the size of the elements registered into the matrix designated by @var{interface}.
 @end defmac
 
+
+@node Accessing COO Data Interfaces
+@subsubsection COO Data Interfaces
+@defmac STARPU_COO_GET_COLUMNS({void *}@var{interface})
+Return a pointer to the column array of the matrix designated by
+@var{interface}.
+@end defmac
+@defmac STARPU_COO_GET_ROWS(interface)
+Return a pointer to the rows array of the matrix designated by @var{interface}.
+@end defmac
+@defmac STARPU_COO_GET_VALUES(interface)
+Return a pointer to the values array of the matrix designated by
+@var{interface}.
+@end defmac
+@defmac STARPU_COO_GET_NX(interface)
+Return the number of elements on the x-axis of the matrix designated by
+@var{interface}.
+@end defmac
+@defmac STARPU_COO_GET_NY(interface)
+Return the number of elements on the y-axis of the matrix designated by
+@var{interface}.
+@end defmac
+@defmac STARPU_COO_GET_NVALUES(interface)
+Return the number of values registered in the matrix designated by
+@var{interface}.
+@end defmac
+@defmac STARPU_COO_GET_ELEMSIZE(interface)
+Return the size of the elements registered into the matrix designated by
+@var{interface}.
+@end defmac
+
 @node Data Partition
 @section Data Partition
 

+ 36 - 1
include/starpu_data_interfaces.h

@@ -95,7 +95,8 @@ enum starpu_data_interface_id
 	STARPU_VARIABLE_INTERFACE_ID=5,
 	STARPU_VOID_INTERFACE_ID=6,
 	STARPU_MULTIFORMAT_INTERFACE_ID=7,
-	STARPU_MAX_INTERFACE_ID=8 /* maximum number of data interfaces */
+	STARPU_COO_INTERFACE_ID=8,
+	STARPU_MAX_INTERFACE_ID=9 /* maximum number of data interfaces */
 };
 
 struct starpu_data_interface_ops
@@ -194,6 +195,40 @@ size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle);
 #define STARPU_MATRIX_GET_LD(interface)	(((struct starpu_matrix_interface *)(interface))->ld)
 #define STARPU_MATRIX_GET_ELEMSIZE(interface)	(((struct starpu_matrix_interface *)(interface))->elemsize)
 
+/*
+ * COO matrices.
+ */
+struct starpu_coo_interface
+{
+	uint32_t  *columns;
+	uint32_t  *rows;
+	uintptr_t values;
+	uint32_t  nx;
+	uint32_t  ny;
+	uint32_t  n_values;
+	size_t    elemsize;
+};
+
+void
+starpu_coo_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
+			 uint32_t nx, uint32_t ny, uint32_t n_values,
+			 uint32_t *columns, uint32_t *rows,
+			 uintptr_t values, size_t elemsize);
+
+#define STARPU_COO_GET_COLUMNS(interface) \
+	(((struct starpu_coo_interface *)(interface))->columns)
+#define STARPU_COO_GET_ROWS(interface) \
+	(((struct starpu_coo_interface *)(interface))->rows)
+#define STARPU_COO_GET_VALUES(interface) \
+	(((struct starpu_coo_interface *)(interface))->values)
+#define STARPU_COO_GET_NX(interface) \
+	(((struct starpu_coo_interface *)(interface))->nx)
+#define STARPU_COO_GET_NY(interface) \
+	(((struct starpu_coo_interface *)(interface))->ny)
+#define STARPU_COO_GET_NVALUES(interface) \
+	(((struct starpu_coo_interface *)(interface))->n_values)
+#define STARPU_COO_GET_ELEMSIZE(interface) \
+	(((struct starpu_coo_interface *)(interface))->elemsize)
 
 /* BLOCK interface for 3D dense blocks */
 /* TODO: rename to 3dmatrix? */

+ 1 - 0
src/Makefile.am

@@ -176,6 +176,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 						\
 	datawizard/reduction.c					\
 	datawizard/interfaces/data_interface.c			\
 	datawizard/interfaces/bcsr_interface.c			\
+	datawizard/interfaces/coo_interface.c                   \
 	datawizard/interfaces/csr_interface.c			\
 	datawizard/interfaces/matrix_filters.c			\
 	datawizard/interfaces/matrix_interface.c		\

+ 607 - 0
src/datawizard/interfaces/coo_interface.c

@@ -0,0 +1,607 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <common/fxt.h>
+
+static int
+copy_ram_to_ram(void *src_interface, STARPU_ATTRIBUTE_UNUSED unsigned src_node,
+		void *dst_interface, STARPU_ATTRIBUTE_UNUSED unsigned dst_node)
+{
+	size_t size = 0, total_size = 0;
+	struct starpu_coo_interface *src_coo, *dst_coo;
+
+	src_coo = (struct starpu_coo_interface *) src_interface;
+	dst_coo = (struct starpu_coo_interface *) dst_interface;
+
+	size = src_coo->n_values * sizeof(src_coo->columns[0]);
+	total_size += size;
+	memcpy((void *) dst_coo->columns, (void *) src_coo->columns, size);
+
+	total_size += size;
+	memcpy((void *) dst_coo->rows, (void *) src_coo->rows, size);
+
+	size = src_coo->n_values * src_coo->elemsize;
+	total_size += size;
+	memcpy((void *) dst_coo->values, (void *) src_coo->values, size);
+
+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, total_size);
+
+	return 0;
+}
+
+#ifdef STARPU_USE_CUDA
+static int
+copy_cuda_async_sync(void *src_interface, unsigned src_node,
+		     void *dst_interface, unsigned dst_node,
+		     cudaStream_t stream, enum cudaMemcpyKind kind)
+{
+	int ret;
+	size_t size = 0, total_size = 0;
+	struct starpu_coo_interface *src_coo, *dst_coo;
+
+	src_coo = (struct starpu_coo_interface *) src_interface;
+	dst_coo = (struct starpu_coo_interface *) dst_interface;
+
+	size = src_coo->n_values * sizeof(src_coo->columns[0]);
+	total_size += size;
+	ret = starpu_cuda_copy_async_sync(
+		(void *) src_coo->columns,
+		src_node,
+		(void *) dst_coo->columns,
+		dst_node,
+		size,
+		stream,
+		kind);
+	if (ret == 0)
+		stream = NULL;
+
+	total_size += size;
+	ret = starpu_cuda_copy_async_sync(
+		(void *) src_coo->rows,
+		src_node,
+		(void *) dst_coo->rows,
+		dst_node,
+		size,
+		stream,
+		kind);
+	if (ret == 0)
+		stream = NULL;
+
+	size = src_coo->n_values * src_coo->elemsize;
+	total_size += size;
+	ret = starpu_cuda_copy_async_sync(
+		(void *) src_coo->values,
+		src_node,
+		(void *) dst_coo->values,
+		dst_node,
+		size,
+		stream,
+		kind);
+
+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, total_size);
+	return ret;
+}
+
+static int
+copy_ram_to_cuda(void *src_interface, unsigned src_node,
+		 void *dst_interface, unsigned dst_node)
+{
+	return copy_cuda_async_sync(src_interface, src_node,
+				    dst_interface, dst_node,
+				    NULL, cudaMemcpyHostToDevice);
+}
+
+static int
+copy_cuda_to_ram(void *src_interface, unsigned src_node,
+		 void *dst_interface, unsigned dst_node)
+{
+	return copy_cuda_async_sync(src_interface, src_node,
+				    dst_interface, dst_node,
+				    NULL, cudaMemcpyDeviceToHost);
+}
+
+static int
+copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
+		       void *dst_interface, unsigned dst_node,
+		       cudaStream_t stream)
+{
+	return copy_cuda_async_sync(src_interface, src_node,
+				    dst_interface, dst_node,
+				    stream, cudaMemcpyHostToDevice);
+}
+
+static int
+copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
+		       void *dst_interface, unsigned dst_node,
+		       cudaStream_t stream)
+{
+	return copy_cuda_async_sync(src_interface, src_node,
+				    dst_interface, dst_node,
+				    stream, cudaMemcpyDeviceToHost);
+}
+#endif /* !STARPU_USE_CUDA */
+
+#ifdef STARPU_USE_OPENCL
+static int
+copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
+			 void *dst_interface, unsigned dst_node,
+			 cl_event *event)
+{
+	int ret = 0;
+	cl_int err;
+	size_t size = 0, total_size = 0;
+	struct starpu_coo_interface *src_coo, *dst_coo;
+
+	src_coo = (struct starpu_coo_interface *) src_interface;
+	dst_coo = (struct starpu_coo_interface *) dst_interface;
+
+
+	size = src_coo->n_values * sizeof(src_coo->columns[0]);
+	total_size += size;
+	err = starpu_opencl_copy_ram_to_opencl(
+		(void *) src_coo->columns,
+		src_node,
+		(cl_mem) dst_coo->columns,
+		dst_node,
+		size,
+		0,
+		event,
+		NULL);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	total_size += size;
+	err = starpu_opencl_copy_ram_to_opencl(
+		(void *) src_coo->rows,
+		src_node,
+		(cl_mem) dst_coo->rows,
+		dst_node,
+		size,
+		0,
+		event,
+		NULL);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	size = src_coo->n_values * src_coo->elemsize;
+	total_size += size;
+	err = starpu_opencl_copy_ram_to_opencl(
+		(void *) src_coo->values,
+		src_node,
+		(cl_mem) dst_coo->values,
+		dst_node,
+		size,
+		0,
+		event,
+		&ret);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, total_size);
+
+	return ret;
+}
+
+static int
+copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
+			 void *dst_interface, unsigned dst_node,
+			 cl_event *event)
+{
+	int ret = 0;
+	cl_int err;
+	size_t size = 0, total_size = 0;
+	struct starpu_coo_interface *src_coo, *dst_coo;
+
+	src_coo = (struct starpu_coo_interface *) src_interface;
+	dst_coo = (struct starpu_coo_interface *) dst_interface;
+
+	size = src_coo->n_values * sizeof(src_coo->columns[0]);
+	total_size += size;
+	err = starpu_opencl_copy_opencl_to_ram(
+		(void *) src_coo->columns,
+		src_node,
+		(cl_mem) dst_coo->columns,
+		dst_node,
+		size,
+		0,
+		event,
+		NULL);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	total_size += size;
+	err = starpu_opencl_copy_opencl_to_ram(
+		(void *) src_coo->rows,
+		src_node,
+		(cl_mem) dst_coo->rows,
+		dst_node,
+		size,
+		0,
+		event,
+		NULL);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	size = src_coo->n_values * src_coo->elemsize;
+	total_size += size;
+	err = starpu_opencl_copy_opencl_to_ram(
+		(void *) src_coo->values,
+		src_node,
+		(cl_mem) dst_coo->values,
+		dst_node,
+		size,
+		0,
+		event,
+		&ret);
+	if (STARPU_UNLIKELY(err))
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, total_size);
+
+	return ret;
+}
+
+static int
+copy_ram_to_opencl(void *src_interface, unsigned src_node,
+		   void *dst_interface, unsigned dst_node)
+{
+	return copy_ram_to_opencl_async(src_interface, src_node,
+					dst_interface, dst_node,
+					NULL);
+}
+static int
+copy_opencl_to_ram(void *src_interface, unsigned src_node,
+		   void *dst_interface, unsigned dst_node)
+{
+	return copy_opencl_to_ram_async(src_interface, src_node,
+					dst_interface, dst_node,
+					NULL);
+}
+#endif /* !STARPU_USE_OPENCL */
+
+static struct starpu_data_copy_methods coo_copy_data_methods =
+{
+	.ram_to_ram          = copy_ram_to_ram,
+#ifdef STARPU_USE_CUDA
+	.ram_to_cuda         = copy_ram_to_cuda,
+	.cuda_to_ram         = copy_cuda_to_ram,
+	.ram_to_cuda_async   = copy_ram_to_cuda_async,
+	.cuda_to_ram_async   = copy_cuda_to_ram_async,
+	.cuda_to_cuda        = NULL, /* TODO */
+#ifdef NO_STRIDE
+	.cuda_to_cuda_async  = NULL, /* TODO */
+#endif
+#endif /* !STARPU_USE_CUDA */
+#ifdef STARPU_USE_OPENCL
+	.ram_to_opencl       = copy_ram_to_opencl,
+	.opencl_to_ram       = copy_opencl_to_ram,
+	.ram_to_opencl_async = copy_ram_to_opencl_async,
+	.opencl_to_ram_async = copy_opencl_to_ram_async,
+#endif /* !STARPU_USE_OPENCL */
+};
+
+static void
+register_coo_handle(starpu_data_handle_t handle, uint32_t home_node,
+		    void *data_interface)
+{
+	struct starpu_coo_interface *coo_interface =
+		(struct starpu_coo_interface *) data_interface;
+
+	unsigned node;
+	for (node = 0; node < STARPU_MAXNODES; node++)
+	{
+		struct starpu_coo_interface *local_interface;
+		local_interface = (struct starpu_coo_interface *)
+			starpu_data_get_interface_on_node(handle, node);
+
+		if (node == home_node)
+		{
+			local_interface->values = coo_interface->values;
+			local_interface->columns = coo_interface->columns;
+			local_interface->rows = coo_interface->rows;
+		}
+		else
+		{
+			local_interface->values = 0;
+			local_interface->columns = 0;
+			local_interface->rows = 0;
+		}
+
+		local_interface->nx = coo_interface->nx;
+		local_interface->ny = coo_interface->ny;
+		local_interface->n_values = coo_interface->n_values;
+		local_interface->elemsize = coo_interface->elemsize;
+	}
+}
+
+static ssize_t
+allocate_coo_buffer_on_node(void *data_interface, uint32_t dst_node)
+{
+	uint32_t *addr_columns = NULL;
+	uint32_t *addr_rows = NULL;
+	uintptr_t addr_values = 0;
+
+	struct starpu_coo_interface *coo_interface =
+		(struct starpu_coo_interface *) data_interface;
+
+	uint32_t n_values = coo_interface->n_values;
+	size_t elemsize = coo_interface->elemsize;
+	size_t size = 0, allocated_memory = 0;
+
+	switch (starpu_node_get_kind(dst_node))
+	{
+	case STARPU_CPU_RAM:
+	{
+		addr_columns = malloc(n_values * sizeof(coo_interface->columns[0]));
+		addr_rows = malloc(n_values * sizeof(coo_interface->rows[0]));
+		addr_values = (uintptr_t) malloc(n_values * elemsize);
+		break;
+	}
+#ifdef STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+	{
+		cudaError_t err;
+		err = cudaMalloc((void **) &addr_columns,
+				 n_values * sizeof(coo_interface->columns[0]));
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			goto fail_columns;
+		err = cudaMalloc((void **) &addr_rows,
+				 n_values * sizeof(coo_interface->rows[0]));
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			goto fail_rows;
+		err = cudaMalloc((void **) &addr_values,
+				 n_values * elemsize);
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			goto fail_values;
+		break;
+	}
+#endif /* !STARPU_USE_CUDA */
+#ifdef STARPU_USE_OPENCL
+	case STARPU_OPENCL_RAM:
+	{
+		cl_int ret;
+		cl_mem ptr;
+		const cl_mem_flags flags = CL_MEM_READ_WRITE;
+
+		size = n_values * sizeof(uint32_t);
+		ret = starpu_opencl_allocate_memory(&ptr, size, flags);
+		if (STARPU_UNLIKELY(ret != CL_SUCCESS))
+			goto fail_columns;
+		addr_columns = (uint32_t *) ptr;
+		allocated_memory += size;
+
+		ret = starpu_opencl_allocate_memory(&ptr, size, flags);
+		if (STARPU_UNLIKELY(ret != CL_SUCCESS))
+			goto fail_rows;
+		addr_rows = (uint32_t *) ptr;
+		allocated_memory += size;
+
+		size = n_values * elemsize;
+		ret = starpu_opencl_allocate_memory(&ptr, size, flags);
+		if (STARPU_UNLIKELY(ret != CL_SUCCESS))
+			goto fail_values;
+		addr_values = (uintptr_t) ptr;
+		allocated_memory += size;
+		break;
+	}
+#endif /* !STARPU_USE_OPENCL */
+	default:
+		STARPU_ABORT();
+	}
+
+	coo_interface->columns = addr_columns;
+	coo_interface->rows = addr_rows;
+	coo_interface->values = addr_values;
+
+	return allocated_memory;
+
+fail_values:
+	switch (starpu_node_get_kind(dst_node))
+	{
+	case STARPU_CPU_RAM:
+		free((void *) coo_interface->rows);
+		break;
+#ifdef STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+	{
+		cudaError_t err;
+		err = cudaFree((void *) coo_interface->rows);
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			STARPU_CUDA_REPORT_ERROR(err);
+		break;
+	}
+#endif /* !STARPU_USE_CUDA */
+#ifdef STARPU_USE_OPENCL
+	case STARPU_OPENCL_RAM:
+	{
+		cl_int err;
+		err = clReleaseMemObject((void *) coo_interface->rows);
+		if (STARPU_UNLIKELY(err != CL_SUCCESS))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		break;
+	}
+#endif /* !STARPU_USE_OPENCL */
+	default:
+		STARPU_ABORT();
+	}
+
+fail_rows:
+	switch (starpu_node_get_kind(dst_node))
+	{
+	case STARPU_CPU_RAM:
+		free((void *) coo_interface->columns);
+		break;
+#ifdef STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+	{
+		cudaError_t err;
+		err = cudaFree((void *) coo_interface->columns);
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			STARPU_CUDA_REPORT_ERROR(err);
+		break;
+	}
+#endif /* !STARPU_USE_CUDA */
+#ifdef STARPU_USE_OPENCL
+	case STARPU_OPENCL_RAM:
+	{
+		cl_int err;
+		err = clReleaseMemObject((void *) coo_interface->columns);
+		if (STARPU_UNLIKELY(err != CL_SUCCESS))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		break;
+	}
+#endif /* !STARPU_USE_OPENCL */
+	default:
+		STARPU_ABORT();
+	}
+
+fail_columns:
+	return -ENOMEM;
+}
+
+static void
+free_coo_buffer_on_node(void *data_interface, uint32_t node)
+{
+	struct starpu_coo_interface *coo_interface =
+		(struct starpu_coo_interface *) data_interface;
+
+	switch (starpu_node_get_kind(node))
+	{
+	case STARPU_CPU_RAM:
+		free((void *) coo_interface->columns);
+		free((void *) coo_interface->rows);
+		free((void *) coo_interface->values);
+		break;
+#ifdef STARPU_USE_CUDA
+	case STARPU_CUDA_RAM:
+	{
+		cudaError_t err;
+		err = cudaFree((void *) coo_interface->columns);
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			STARPU_CUDA_REPORT_ERROR(err);
+		err = cudaFree((void *) coo_interface->rows);
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			STARPU_CUDA_REPORT_ERROR(err);
+		err = cudaFree((void *) coo_interface->values);
+		if (STARPU_UNLIKELY(err != cudaSuccess))
+			STARPU_CUDA_REPORT_ERROR(err);
+		break;
+	}
+#endif /* !STARPU_USE_CUDA */
+#ifdef STARPU_USE_OPENCL
+	case STARPU_OPENCL_RAM:
+	{
+		cl_int err;
+		err = clReleaseMemObject((void *) coo_interface->columns);
+		if (STARPU_UNLIKELY(err != CL_SUCCESS))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		err = clReleaseMemObject((void *) coo_interface->rows);
+		if (STARPU_UNLIKELY(err != CL_SUCCESS))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		err = clReleaseMemObject((void *) coo_interface->values);
+		if (STARPU_UNLIKELY(err != CL_SUCCESS))
+			STARPU_OPENCL_REPORT_ERROR(err);
+		break;
+	}
+#endif /* !STARPU_USE_OPENCL */
+	default:
+		STARPU_ABORT();
+	}
+}
+
+static size_t
+coo_interface_get_size(starpu_data_handle_t handle)
+{
+	struct starpu_coo_interface *coo_interface;
+	coo_interface = (struct starpu_coo_interface *)
+		starpu_data_get_interface_on_node(handle, 0);
+
+	return coo_interface->nx * coo_interface->ny * coo_interface->elemsize;
+}
+
+static uint32_t
+coo_interface_footprint(starpu_data_handle_t handle)
+{
+	struct starpu_coo_interface *coo_interface;
+	coo_interface = (struct starpu_coo_interface *)
+		starpu_data_get_interface_on_node(handle, 0);
+
+	return starpu_crc32_be(coo_interface->nx * coo_interface->ny, 0);
+}
+
+static int
+coo_compare(void *a, void *b)
+{
+	struct starpu_coo_interface *coo_a, *coo_b;
+
+	coo_a = (struct starpu_coo_interface *) a;
+	coo_b = (struct starpu_coo_interface *) b;
+
+	return (coo_a->nx == coo_b->nx &&
+		coo_a->ny == coo_b->ny &&
+		coo_a->n_values == coo_b->n_values &&
+		coo_a->elemsize == coo_b->elemsize);
+}
+
+static void
+display_coo_interface(starpu_data_handle_t handle, FILE *f)
+{
+	struct starpu_coo_interface *coo_interface =
+	coo_interface = (struct starpu_coo_interface *)
+		starpu_data_get_interface_on_node(handle, 0);
+
+	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
+}
+
+struct starpu_data_interface_ops _starpu_interface_coo_ops =
+{
+	.register_data_handle  = register_coo_handle,
+	.allocate_data_on_node = allocate_coo_buffer_on_node,
+	.handle_to_pointer     = NULL,
+	.free_data_on_node     = free_coo_buffer_on_node,
+	.copy_methods          = &coo_copy_data_methods,
+	.get_size              = coo_interface_get_size,
+	.footprint             = coo_interface_footprint,
+	.compare               = coo_compare,
+#ifdef STARPU_USE_GORDON
+	.convert_to_gordon     = NULL,
+#endif
+	.interfaceid           = STARPU_COO_INTERFACE_ID,
+	.interface_size        = sizeof(struct starpu_coo_interface),
+	.display               = display_coo_interface
+};
+
+void
+starpu_coo_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
+			 uint32_t nx, uint32_t ny, uint32_t n_values,
+			 uint32_t *columns, uint32_t *rows,
+			 uintptr_t values, size_t elemsize)
+{
+	struct starpu_coo_interface coo_interface =
+	{
+		.values = values,
+		.columns = columns,
+		.rows = rows,
+		.nx = nx,
+		.ny = ny,
+		.n_values = n_values,
+		.elemsize = elemsize,
+	};
+
+	starpu_data_register(handleptr, home_node, &coo_interface,
+			     &_starpu_interface_coo_ops);
+}

+ 21 - 0
tests/Makefile.am

@@ -34,6 +34,7 @@ EXTRA_DIST =					\
 	datawizard/acquire_release_opencl_kernel.cl \
 	datawizard/interfaces/test_interfaces.h	\
 	datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \
+	datawizard/interfaces/coo/coo_opencl_kernel.cl \
 	datawizard/interfaces/matrix/matrix_opencl_kernel.cl \
 	datawizard/interfaces/variable/variable_opencl_kernel.cl \
 	datawizard/interfaces/vector/test_vector_opencl_kernel.cl \
@@ -194,6 +195,7 @@ noinst_PROGRAMS =				\
 	datawizard/interfaces/copy_interfaces	\
 	datawizard/interfaces/block/block_interface \
 	datawizard/interfaces/bcsr/bcsr_interface \
+	datawizard/interfaces/coo/coo_interface \
 	datawizard/interfaces/csr/csr_interface \
 	datawizard/interfaces/matrix/matrix_interface \
 	datawizard/interfaces/multiformat/multiformat_interface \
@@ -432,6 +434,25 @@ nobase_STARPU_OPENCL_DATA_DATA += \
 endif
 
 #################
+# COO interface #
+#################
+datawizard_interfaces_coo_coo_interface_SOURCES= \
+	datawizard/interfaces/test_interfaces.c \
+	datawizard/interfaces/coo/coo_interface.c 
+
+if STARPU_USE_CUDA
+datawizard_interfaces_coo_coo_interface_SOURCES+= \
+	datawizard/interfaces/coo/coo_cuda.cu
+endif
+
+if STARPU_USE_OPENCL
+datawizard_interfaces_coo_coo_interface_SOURCES+= \
+	datawizard/interfaces/coo/coo_opencl.c
+nobase_STARPU_OPENCL_DATA_DATA += \
+	datawizard/interfaces/coo/coo_opencl_kernel.cl
+endif
+
+#################
 # CSR interface #
 #################
 datawizard_interfaces_csr_csr_interface_SOURCES= \

+ 68 - 0
tests/datawizard/interfaces/coo/coo_cuda.cu

@@ -0,0 +1,68 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include "../test_interfaces.h"
+
+extern struct test_config coo_config;
+
+__global__ void coo_cuda(int *val, uint32_t n, int *err, int factor)
+{
+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (i >= n)
+		return;
+
+	if (val[i] != i * factor)
+		*err = 1;
+	else
+		val[i] *= -1;
+}
+
+extern "C" void test_coo_cuda_func(void *buffers[], void *args)
+{
+	int factor;
+	int *ret;
+	int *val;
+	cudaError_t error;
+	uint32_t nvalues = STARPU_COO_GET_NVALUES(buffers[0]);
+	unsigned threads_per_block = 64;
+	unsigned nblocks = (nvalues + threads_per_block-1) / threads_per_block;
+
+	factor = *(int *) args;
+	val = (int *) STARPU_COO_GET_VALUES(buffers[0]);
+
+	error = cudaMalloc(&ret, sizeof(int));
+	if (error != cudaSuccess)
+		STARPU_CUDA_REPORT_ERROR(error);
+
+	error = cudaMemcpy(ret,
+			   &coo_config.copy_failed,
+			   sizeof(int),
+			   cudaMemcpyHostToDevice);
+	if (error != cudaSuccess)
+		STARPU_CUDA_REPORT_ERROR(error);
+
+        coo_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>
+		(val, nvalues, ret, factor);
+
+	error = cudaMemcpy(&coo_config.copy_failed,
+			   ret,
+			   sizeof(int),
+			   cudaMemcpyDeviceToHost);
+	
+	cudaFree(ret);
+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
+}

+ 175 - 0
tests/datawizard/interfaces/coo/coo_interface.c

@@ -0,0 +1,175 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+#include <starpu.h>
+#include "../test_interfaces.h"
+
+#define NX 2
+#define NY 2
+#define MATRIX_SIZE (NX*NY)
+
+#ifdef STARPU_USE_CPU
+static void test_coo_cpu_func(void *buffers[], void *args);
+#endif
+#ifdef STARPU_USE_CUDA
+extern void test_coo_cuda_func(void *buffers[], void *args);
+#endif
+#ifdef STARPU_USE_OPENCL
+extern void test_coo_opencl_func(void *buffers[], void *args);
+#endif
+
+static starpu_data_handle_t coo_handle, coo2_handle;
+
+struct test_config coo_config =
+{
+#ifdef STARPU_USE_CPU
+	.cpu_func      = test_coo_cpu_func,
+#endif /* ! STARPU_USE_CPU */
+#ifdef STARPU_USE_CUDA
+	.cuda_func     = test_coo_cuda_func,
+#endif /* !STARPU_USE_CUDA */
+#ifdef STARPU_USE_OPENCL
+	.opencl_func   = test_coo_opencl_func,
+#endif /* !STARPU_USE_OPENCL */
+	.handle        = &coo_handle,
+	.dummy_handle  = &coo2_handle,
+	.copy_failed   = SUCCESS,
+	.name          = "coo_interface"
+};
+
+static void
+test_coo_cpu_func(void *buffers[], void *args)
+{
+	int factor = *(int *) args;
+	int *values = (int *) STARPU_COO_GET_VALUES(buffers[0]);
+	unsigned size = STARPU_COO_GET_NVALUES(buffers[0]);
+
+	unsigned i;
+	for (i = 0; i < size; i++)
+	{
+		if (values[i] != i * factor)
+		{
+			coo_config.copy_failed = FAILURE;
+			return;
+		}
+		values[i] *= -1;
+	}
+}
+
+
+static uint32_t columns[MATRIX_SIZE];
+static uint32_t rows[MATRIX_SIZE];
+static int values[MATRIX_SIZE];
+static uint32_t columns2[MATRIX_SIZE];
+static uint32_t rows2[MATRIX_SIZE];
+static int values2[MATRIX_SIZE];
+
+static void
+register_data(void)
+{
+	/*
+ 	   We use the following matrix :
+
+		+---+---+
+		| 0 | 1 |
+		+---+---+
+		| 2 | 3 |
+		+---+---+
+
+	   Of course, we're not supposed to register the zeros, but it does not
+	   matter for this test.
+	 */
+
+	columns[0] = 0;
+	rows[0] = 0;
+	values[0] = 0;
+
+	columns[1] = 1;
+	rows[1] = 0;
+	values[1] = 1;
+
+	columns[2] = 0;
+	rows[2] = 1;
+	values[2] = 2;
+
+	columns[3] = 1;
+	rows[3] = 1;
+	values[3] = 3;
+
+
+	int i;
+	for (i = 0; i < MATRIX_SIZE; i++)
+	{
+		columns2[i] = -1;
+		rows2[i] = -1;
+		values2[i] = -1;
+	}
+
+	starpu_coo_data_register(&coo_handle,
+				0,
+				NX,
+				NY,
+				MATRIX_SIZE,
+				columns,
+				rows,
+				(uintptr_t) values,
+				sizeof(values[0]));
+	starpu_coo_data_register(&coo2_handle,
+				0,
+				NX,
+				NY,
+				MATRIX_SIZE,
+				columns2,
+				rows2,
+				(uintptr_t) values2,
+				sizeof(values2[0]));
+}
+
+static void
+unregister_data(void)
+{
+	starpu_data_unregister(coo_handle);
+	starpu_data_unregister(coo2_handle);
+}
+
+int
+main(void)
+{
+	struct starpu_conf conf;
+	data_interface_test_summary *summary;
+
+	starpu_conf_init(&conf);
+	conf.ncuda = 2;
+	conf.nopencl = 1;
+
+	if (starpu_init(&conf) == -ENODEV || starpu_cpu_worker_get_count() == 0)
+		goto enodev;
+
+	register_data();
+
+	summary = run_tests(&coo_config);
+	if (!summary)
+		exit(EXIT_FAILURE);
+
+	unregister_data();
+
+	data_interface_test_summary_print(stderr, summary);
+
+	starpu_shutdown();
+	return data_interface_test_summary_success(summary);
+
+enodev:
+	return STARPU_TEST_SKIPPED;
+}

+ 136 - 0
tests/datawizard/interfaces/coo/coo_opencl.c

@@ -0,0 +1,136 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <config.h>
+#include <starpu.h>
+#include "../test_interfaces.h"
+
+#define KERNEL_LOCATION "tests/datawizard/interfaces/coo/coo_opencl_kernel.cl"
+
+extern struct test_config coo_config;
+static struct starpu_opencl_program coo_program;
+
+void
+test_coo_opencl_func(void *buffers[], void *args)
+{
+	STARPU_SKIP_IF_VALGRIND;
+
+	int id, devid, factor, ret;
+	unsigned int n;
+
+        cl_int             err;
+	cl_kernel          kernel;
+	cl_command_queue   queue;
+	cl_event           event;
+	cl_context         context;
+	cl_mem             val, fail;
+
+	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
+						  &coo_program,
+						  NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
+
+	factor = *(int *)args;
+	n = STARPU_COO_GET_NVALUES(buffers[0]);
+	val = (cl_mem) STARPU_COO_GET_VALUES(buffers[0]);
+
+	id = starpu_worker_get_id();
+	devid = starpu_worker_get_devid(id);
+	starpu_opencl_get_context(devid, &context);
+
+	err = starpu_opencl_load_kernel(&kernel,
+					&queue,
+					&coo_program,
+					"test_coo_opencl",
+					devid);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+		sizeof(int), &coo_config.copy_failed, &err);
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	/* Setting args */
+	int nargs;
+	nargs = starpu_opencl_set_kernel_args(&err, &kernel,
+					sizeof(val), &val,
+					sizeof(n), &n,
+					sizeof(fail), &fail,
+					sizeof(factor), &factor,
+					0);
+	if (nargs != 4)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	{
+		size_t global=n;
+		size_t local;
+                size_t s;
+                cl_device_id device;
+
+                starpu_opencl_get_device(devid, &device);
+
+                err = clGetKernelWorkGroupInfo (kernel,
+						device,
+						CL_KERNEL_WORK_GROUP_SIZE,
+						sizeof(local),
+						&local,
+						&s);
+                if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+
+                if (local > global)
+			local = global;
+
+		err = clEnqueueNDRangeKernel(queue,
+					kernel,
+					1,
+					NULL,
+					&global,
+					&local,
+					0,
+					NULL,
+					&event);
+
+		if (err != CL_SUCCESS)
+			STARPU_OPENCL_REPORT_ERROR(err);
+	}
+
+	err = clEnqueueReadBuffer(queue,
+				  fail,
+				  CL_TRUE,
+				  0, 
+				  sizeof(int),
+				  &coo_config.copy_failed,
+				  0,
+				  NULL,
+				  NULL);
+	if (coo_config.copy_failed != 0)
+	{
+		fprintf(stderr, "FUCK copy_failed  %d\n",
+			coo_config.copy_failed);
+	}
+	if (err != CL_SUCCESS)
+		STARPU_OPENCL_REPORT_ERROR(err);
+
+	clFinish(queue);
+	starpu_opencl_collect_stats(event);
+	clReleaseEvent(event);
+
+	starpu_opencl_release_kernel(kernel);
+        ret = starpu_opencl_unload_opencl(&coo_program);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
+}

+ 29 - 0
tests/datawizard/interfaces/coo/coo_opencl_kernel.cl

@@ -0,0 +1,29 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+__kernel void test_coo_opencl(__global int *val,
+			      unsigned int nx,
+			      __global int *err,
+			      int factor)
+{
+        const int i = get_global_id(0);
+        if (i >=  nx)
+		return;
+
+	if (val[i] != i * factor)
+		*err = val[1];
+	else
+		val[i] = - val[i];
+}