16 年之前 · d482a9effe
--- a/include/starpu-data-interfaces.h
+++ b/include/starpu-data-interfaces.h
@@ -37,6 +37,27 @@ uint32_t starpu_get_blas_ny(starpu_data_handle handle);
 
				 uint32_t starpu_get_blas_local_ld(starpu_data_handle handle);
			
 
				 uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle);
			
 
				 
			
 
				+/* BLOCK interface for 3D dense blocks */
			
 
				+typedef struct starpu_block_interface_s {
			
 
				+	uintptr_t ptr;
			
 
				+	uint32_t nx;
			
 
				+	uint32_t ny;
			
 
				+	uint32_t nz;
			
 
				+	uint32_t ldy;	/* number of elements between two lines */
			
 
				+	uint32_t ldz;	/* number of elements between two planes */
			
 
				+	size_t elemsize;
			
 
				+} starpu_block_interface_t;
			
 
				+
			
 
				+void starpu_register_block_data(starpu_data_handle *handle, uint32_t home_node,
			
 
				+                        uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
			
 
				+                        uint32_t ny, uint32_t nz, size_t elemsize);
			
 
				+uint32_t starpu_get_block_nx(starpu_data_handle handle);
			
 
				+uint32_t starpu_get_block_ny(starpu_data_handle handle);
			
 
				+uint32_t starpu_get_block_nz(starpu_data_handle handle);
			
 
				+uint32_t starpu_get_block_local_ldy(starpu_data_handle handle);
			
 
				+uint32_t starpu_get_block_local_ldz(starpu_data_handle handle);
			
 
				+uintptr_t starpu_get_block_local_ptr(starpu_data_handle handle);
			
 
				+
			
 
				 /* vector interface for contiguous (non-strided) buffers */
			
 
				 typedef struct starpu_vector_interface_s {
			
 
				 	uintptr_t ptr;
			
@@ -123,6 +144,7 @@ uint32_t starpu_get_bcsr_c(starpu_data_handle);
 
				 
			
 
				 typedef union {
			
 
				 	starpu_blas_interface_t blas;	/* dense BLAS representation */
			
 
				+	starpu_block_interface_t block;	/* BLOCK interface for 3D dense blocks */
			
 
				 	starpu_vector_interface_t vector; /* continuous vector */
			
 
				 	starpu_csr_interface_t csr;	/* compressed sparse row */
			
 
				 	starpu_csc_interface_t csc; 	/* compressed sparse column */
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -61,6 +61,7 @@ noinst_HEADERS = 						\
 
				 	datawizard/coherency.h					\
			
 
				 	datawizard/memory_nodes.h				\
			
 
				 	datawizard/interfaces/blas_interface.h			\
			
 
				+	datawizard/interfaces/block_interface.h			\
			
 
				 	datawizard/interfaces/csr_filters.h			\
			
 
				 	datawizard/interfaces/csc_interface.h			\
			
 
				 	datawizard/interfaces/bcsr_filters.h			\
			
@@ -124,6 +125,7 @@ libstarpu_la_SOURCES = 						\
 
				 	datawizard/interfaces/csr_interface.c			\
			
 
				 	datawizard/interfaces/blas_filters.c			\
			
 
				 	datawizard/interfaces/blas_interface.c			\
			
 
				+	datawizard/interfaces/block_interface.c			\
			
 
				 	datawizard/interfaces/vector_interface.c		\
			
 
				 	datawizard/interfaces/bcsr_filters.c			\
			
 
				 	datawizard/interfaces/csr_filters.c			\
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -0,0 +1,465 @@
 
				+/*
			
 
				+ * StarPU
			
 
				+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+#include <datawizard/data_parameters.h>
			
 
				+#include <datawizard/coherency.h>
			
 
				+#include <datawizard/copy-driver.h>
			
 
				+#include <datawizard/hierarchy.h>
			
 
				+
			
 
				+#include <common/hash.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef USE_CUDA
			
 
				+#include <cuda.h>
			
 
				+#endif
			
 
				+
			
 
				+size_t allocate_block_buffer_on_node(data_state *state, uint32_t dst_node);
			
 
				+void liberate_block_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
			
 
				+int do_copy_block_buffer_1_to_1(data_state *state, uint32_t src_node, uint32_t dst_node);
			
 
				+size_t dump_block_interface(starpu_data_interface_t *interface, void *buffer);
			
 
				+size_t block_interface_get_size(struct starpu_data_state_t *state);
			
 
				+uint32_t footprint_block_interface_crc32(data_state *state, uint32_t hstate);
			
 
				+void display_block_interface(data_state *state, FILE *f);
			
 
				+#ifdef USE_GORDON
			
 
				+int convert_block_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				+#endif
			
 
				+
			
 
				+struct data_interface_ops_t interface_block_ops = {
			
 
				+	.allocate_data_on_node = allocate_block_buffer_on_node,
			
 
				+	.liberate_data_on_node = liberate_block_buffer_on_node,
			
 
				+	.copy_data_1_to_1 = do_copy_block_buffer_1_to_1,
			
 
				+	.dump_data_interface = dump_block_interface,
			
 
				+	.get_size = block_interface_get_size,
			
 
				+	.footprint = footprint_block_interface_crc32,
			
 
				+#ifdef USE_GORDON
			
 
				+	.convert_to_gordon = convert_block_to_gordon,
			
 
				+#endif
			
 
				+	.interfaceid = BLAS_INTERFACE, 
			
 
				+	.display = display_block_interface
			
 
				+};
			
 
				+
			
 
				+#ifdef USE_GORDON
			
 
				+int convert_block_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
			
 
				+{
			
 
				+	/* TODO */
			
 
				+	STARPU_ASSERT(0);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/* declare a new data with the BLAS interface */
			
 
				+void starpu_register_block_data(struct starpu_data_state_t **handle, uint32_t home_node,
			
 
				+			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
			
 
				+			uint32_t ny, uint32_t nz, size_t elemsize)
			
 
				+{
			
 
				+	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
			
 
				+	STARPU_ASSERT(state);
			
 
				+
			
 
				+	STARPU_ASSERT(handle);
			
 
				+	*handle = state;
			
 
				+
			
 
				+	unsigned node;
			
 
				+	for (node = 0; node < MAXNODES; node++)
			
 
				+	{
			
 
				+		starpu_block_interface_t *local_interface = &state->interface[node].block;
			
 
				+
			
 
				+		if (node == home_node) {
			
 
				+			local_interface->ptr = ptr;
			
 
				+			local_interface->ldy  = ldy;
			
 
				+			local_interface->ldz  = ldz;
			
 
				+		}
			
 
				+		else {
			
 
				+			local_interface->ptr = 0;
			
 
				+			local_interface->ldy  = 0;
			
 
				+			local_interface->ldz  = 0;
			
 
				+		}
			
 
				+
			
 
				+		local_interface->nx = nx;
			
 
				+		local_interface->ny = ny;
			
 
				+		local_interface->nz = nz;
			
 
				+		local_interface->elemsize = elemsize;
			
 
				+	}
			
 
				+
			
 
				+	state->ops = &interface_block_ops;
			
 
				+
			
 
				+	register_new_data(state, home_node, 0);
			
 
				+}
			
 
				+
			
 
				+static inline uint32_t footprint_block_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), data_state *state, uint32_t hstate)
			
 
				+{
			
 
				+	uint32_t hash;
			
 
				+
			
 
				+	hash = hstate;
			
 
				+	hash = hash_func(starpu_get_block_nx(state), hash);
			
 
				+	hash = hash_func(starpu_get_block_ny(state), hash);
			
 
				+	hash = hash_func(starpu_get_block_nz(state), hash);
			
 
				+
			
 
				+	return hash;
			
 
				+}
			
 
				+
			
 
				+uint32_t footprint_block_interface_crc32(data_state *state, uint32_t hstate)
			
 
				+{
			
 
				+	return footprint_block_interface_generic(crc32_be, state, hstate);
			
 
				+}
			
 
				+
			
 
				+struct dumped_block_interface_s {
			
 
				+	uintptr_t ptr;
			
 
				+	uint32_t nx;
			
 
				+	uint32_t ny;
			
 
				+	uint32_t nz;
			
 
				+	uint32_t ldy;
			
 
				+	uint32_t ldz;
			
 
				+} __attribute__ ((packed));
			
 
				+
			
 
				+void display_block_interface(data_state *state, FILE *f)
			
 
				+{
			
 
				+	starpu_block_interface_t *interface;
			
 
				+
			
 
				+	interface = &state->interface[0].block;
			
 
				+
			
 
				+	fprintf(f, "%d\t%d\t%d\t", interface->nx, interface->ny, interface->nz);
			
 
				+}
			
 
				+
			
 
				+size_t dump_block_interface(starpu_data_interface_t *interface, void *_buffer)
			
 
				+{
			
 
				+	/* yes, that's DIRTY ... */
			
 
				+	struct dumped_block_interface_s *buffer = _buffer;
			
 
				+
			
 
				+	buffer->ptr = (*interface).block.ptr;
			
 
				+	buffer->nx = (*interface).block.nx;
			
 
				+	buffer->ny = (*interface).block.ny;
			
 
				+	buffer->nz = (*interface).block.nz;
			
 
				+	buffer->ldy = (*interface).block.ldy;
			
 
				+	buffer->ldz = (*interface).block.ldz;
			
 
				+
			
 
				+	return (sizeof(struct dumped_block_interface_s));
			
 
				+}
			
 
				+
			
 
				+size_t block_interface_get_size(struct starpu_data_state_t *state)
			
 
				+{
			
 
				+	size_t size;
			
 
				+	starpu_block_interface_t *interface;
			
 
				+
			
 
				+	interface = &state->interface[0].block;
			
 
				+
			
 
				+	size = interface->nx*interface->ny*interface->nz*interface->elemsize; 
			
 
				+
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+/* offer an access to the data parameters */
			
 
				+uint32_t starpu_get_block_nx(data_state *state)
			
 
				+{
			
 
				+	return (state->interface[0].block.nx);
			
 
				+}
			
 
				+
			
 
				+uint32_t starpu_get_block_ny(data_state *state)
			
 
				+{
			
 
				+	return (state->interface[0].block.ny);
			
 
				+}
			
 
				+
			
 
				+uint32_t starpu_get_block_nz(data_state *state)
			
 
				+{
			
 
				+	return (state->interface[0].block.nz);
			
 
				+}
			
 
				+
			
 
				+uint32_t starpu_get_block_local_ldy(data_state *state)
			
 
				+{
			
 
				+	unsigned node;
			
 
				+	node = get_local_memory_node();
			
 
				+
			
 
				+	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+
			
 
				+	return (state->interface[node].block.ldy);
			
 
				+}
			
 
				+
			
 
				+uint32_t starpu_get_block_local_ldz(data_state *state)
			
 
				+{
			
 
				+	unsigned node;
			
 
				+	node = get_local_memory_node();
			
 
				+
			
 
				+	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+
			
 
				+	return (state->interface[node].block.ldz);
			
 
				+}
			
 
				+
			
 
				+uintptr_t starpu_get_block_local_ptr(data_state *state)
			
 
				+{
			
 
				+	unsigned node;
			
 
				+	node = get_local_memory_node();
			
 
				+
			
 
				+	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+
			
 
				+	return (state->interface[node].block.ptr);
			
 
				+}
			
 
				+
			
 
				+/* memory allocation/deallocation primitives for the BLAS interface */
			
 
				+
			
 
				+/* returns the size of the allocated area */
			
 
				+size_t allocate_block_buffer_on_node(data_state *state, uint32_t dst_node)
			
 
				+{
			
 
				+	uintptr_t addr = 0;
			
 
				+	unsigned fail = 0;
			
 
				+	size_t allocated_memory;
			
 
				+
			
 
				+#ifdef USE_CUDA
			
 
				+	cublasStatus status;
			
 
				+#endif
			
 
				+	uint32_t nx = state->interface[dst_node].block.nx;
			
 
				+	uint32_t ny = state->interface[dst_node].block.ny;
			
 
				+	uint32_t nz = state->interface[dst_node].block.nz;
			
 
				+	size_t elemsize = state->interface[dst_node].block.elemsize;
			
 
				+
			
 
				+	node_kind kind = get_node_kind(dst_node);
			
 
				+
			
 
				+	switch(kind) {
			
 
				+		case RAM:
			
 
				+			addr = (uintptr_t)malloc(nx*ny*nz*elemsize);
			
 
				+			if (!addr) 
			
 
				+				fail = 1;
			
 
				+
			
 
				+			break;
			
 
				+#ifdef USE_CUDA
			
 
				+		case CUDA_RAM:
			
 
				+			status = cublasAlloc(nx*ny*nz, elemsize, (void **)&addr);
			
 
				+
			
 
				+			if (!addr || status != CUBLAS_STATUS_SUCCESS)
			
 
				+			{
			
 
				+				STARPU_ASSERT(status != CUBLAS_STATUS_INTERNAL_ERROR);
			
 
				+				STARPU_ASSERT(status != CUBLAS_STATUS_NOT_INITIALIZED);
			
 
				+				STARPU_ASSERT(status != CUBLAS_STATUS_INVALID_VALUE);
			
 
				+				STARPU_ASSERT(status == CUBLAS_STATUS_ALLOC_FAILED);
			
 
				+				fail = 1;
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+#endif
			
 
				+		default:
			
 
				+			assert(0);
			
 
				+	}
			
 
				+
			
 
				+	if (!fail) {
			
 
				+		/* allocation succeeded */
			
 
				+		allocated_memory = nx*ny*nz*elemsize;
			
 
				+
			
 
				+		/* update the data properly in consequence */
			
 
				+		state->interface[dst_node].block.ptr = addr;
			
 
				+		state->interface[dst_node].block.ldy = nx;
			
 
				+		state->interface[dst_node].block.ldz = nx*ny;
			
 
				+	} else {
			
 
				+		/* allocation failed */
			
 
				+		allocated_memory = 0;
			
 
				+	}
			
 
				+	
			
 
				+	return allocated_memory;
			
 
				+}
			
 
				+
			
 
				+void liberate_block_buffer_on_node(starpu_data_interface_t *interface, uint32_t node)
			
 
				+{
			
 
				+#ifdef USE_CUDA
			
 
				+	cublasStatus status;
			
 
				+#endif
			
 
				+
			
 
				+	node_kind kind = get_node_kind(node);
			
 
				+	switch(kind) {
			
 
				+		case RAM:
			
 
				+			free((void*)interface->block.ptr);
			
 
				+			break;
			
 
				+#ifdef USE_CUDA
			
 
				+		case CUDA_RAM:
			
 
				+			status = cublasFree((void*)interface->block.ptr);
			
 
				+			
			
 
				+			STARPU_ASSERT(status != CUBLAS_STATUS_INTERNAL_ERROR);
			
 
				+			STARPU_ASSERT(status == CUBLAS_STATUS_SUCCESS);
			
 
				+
			
 
				+			break;
			
 
				+#endif
			
 
				+		default:
			
 
				+			assert(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#ifdef USE_CUDA
			
 
				+static void copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+{
			
 
				+	starpu_block_interface_t *src_block;
			
 
				+	starpu_block_interface_t *dst_block;
			
 
				+
			
 
				+	src_block = &state->interface[src_node].block;
			
 
				+	dst_block = &state->interface[dst_node].block;
			
 
				+
			
 
				+	if ((src_block->nx == src_block->ldy) && (src_block->ldy == dst_block->ldy))
			
 
				+	{
			
 
				+		/* we are lucky */
			
 
				+		cublasGetMatrix(src_block->nx*src_block->ny, src_block->nz, src_block->elemsize,
			
 
				+			(uint8_t *)src_block->ptr, src_block->ldz,
			
 
				+			(uint8_t *)dst_block->ptr, dst_block->ldz);
			
 
				+	}
			
 
				+	else {
			
 
				+		unsigned layer;
			
 
				+		for (layer = 0; layer < src_block->nz; layer++)
			
 
				+		{
			
 
				+			uint8_t *src_ptr = ((uint8_t *)src_block->ptr) 
			
 
				+						+ src_block->ldz*src_block->elemsize;
			
 
				+			uint8_t *dst_ptr = ((uint8_t *)dst_block->ptr) 
			
 
				+						+ dst_block->ldz*dst_block->elemsize;
			
 
				+
			
 
				+			cublasGetMatrix(src_block->nx, src_block->ny, src_block->elemsize,
			
 
				+				src_ptr, src_block->ldy, dst_ptr, dst_block->ldy);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	TRACE_DATA_COPY(src_node, dst_node, src_block->nx*src_block->ny*src_block->elemsize*src_block->elemsize);
			
 
				+}
			
 
				+
			
 
				+static void copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+{
			
 
				+	starpu_block_interface_t *src_block;
			
 
				+	starpu_block_interface_t *dst_block;
			
 
				+
			
 
				+	src_block = &state->interface[src_node].block;
			
 
				+	dst_block = &state->interface[dst_node].block;
			
 
				+
			
 
				+	if ((src_block->nx == src_block->ldy) && (src_block->ldy == dst_block->ldy))
			
 
				+	{
			
 
				+		/* we are lucky */
			
 
				+		cublasSetMatrix(src_block->nx*src_block->ny, src_block->nz, src_block->elemsize,
			
 
				+			(uint8_t *)src_block->ptr, src_block->ldz,
			
 
				+			(uint8_t *)dst_block->ptr, dst_block->ldz);
			
 
				+	}
			
 
				+	else {
			
 
				+		unsigned layer;
			
 
				+		for (layer = 0; layer < src_block->nz; layer++)
			
 
				+		{
			
 
				+			uint8_t *src_ptr = ((uint8_t *)src_block->ptr) 
			
 
				+						+ src_block->ldz*src_block->elemsize;
			
 
				+			uint8_t *dst_ptr = ((uint8_t *)dst_block->ptr) 
			
 
				+						+ dst_block->ldz*dst_block->elemsize;
			
 
				+
			
 
				+			cublasSetMatrix(src_block->nx, src_block->ny, src_block->elemsize,
			
 
				+				src_ptr, src_block->ldy, dst_ptr, dst_block->ldy);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	TRACE_DATA_COPY(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
			
 
				+}
			
 
				+#endif // USE_CUDA
			
 
				+
			
 
				+/* as not all platform easily have a BLAS lib installed ... */
			
 
				+static void dummy_copy_ram_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+{
			
 
				+	uint32_t nx = state->interface[dst_node].block.nx;
			
 
				+	uint32_t ny = state->interface[dst_node].block.ny;
			
 
				+	uint32_t nz = state->interface[dst_node].block.nz;
			
 
				+	size_t elemsize = state->interface[dst_node].block.elemsize;
			
 
				+
			
 
				+	uint32_t ldy_src = state->interface[src_node].block.ldy;
			
 
				+	uint32_t ldz_src = state->interface[src_node].block.ldz;
			
 
				+	uint32_t ldy_dst = state->interface[dst_node].block.ldy;
			
 
				+	uint32_t ldz_dst = state->interface[dst_node].block.ldz;
			
 
				+
			
 
				+	uintptr_t ptr_src = state->interface[src_node].block.ptr;
			
 
				+	uintptr_t ptr_dst = state->interface[dst_node].block.ptr;
			
 
				+
			
 
				+	unsigned y, z;
			
 
				+	for (z = 0; z < nz; z++)
			
 
				+	for (y = 0; y < ny; y++)
			
 
				+	{
			
 
				+		uint32_t src_offset = (y*ldy_src + y*z*ldz_src)*elemsize;
			
 
				+		uint32_t dst_offset = (y*ldy_dst + y*z*ldz_dst)*elemsize;
			
 
				+
			
 
				+		memcpy((void *)(ptr_dst + dst_offset), 
			
 
				+			(void *)(ptr_src + src_offset), nx*elemsize);
			
 
				+	}
			
 
				+
			
 
				+	TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int do_copy_block_buffer_1_to_1(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+{
			
 
				+	node_kind src_kind = get_node_kind(src_node);
			
 
				+	node_kind dst_kind = get_node_kind(dst_node);
			
 
				+
			
 
				+	switch (dst_kind) {
			
 
				+	case RAM:
			
 
				+		switch (src_kind) {
			
 
				+			case RAM:
			
 
				+				/* RAM -> RAM */
			
 
				+				 dummy_copy_ram_to_ram(state, src_node, dst_node);
			
 
				+				 break;
			
 
				+#ifdef USE_CUDA
			
 
				+			case CUDA_RAM:
			
 
				+				/* CUBLAS_RAM -> RAM */
			
 
				+				if (get_local_memory_node() == src_node)
			
 
				+				{
			
 
				+					/* only the proper CUBLAS thread can initiate this directly ! */
			
 
				+					copy_cublas_to_ram(state, src_node, dst_node);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					/* put a request to the corresponding GPU */
			
 
				+		//			fprintf(stderr, "post_data_request state %p src %d dst %d\n", state, src_node, dst_node);
			
 
				+					post_data_request(state, src_node, dst_node);
			
 
				+		//			fprintf(stderr, "post %p OK\n", state);
			
 
				+				}
			
 
				+				break;
			
 
				+#endif
			
 
				+			case SPU_LS:
			
 
				+				STARPU_ASSERT(0); // TODO
			
 
				+				break;
			
 
				+			case UNUSED:
			
 
				+				printf("error node %d UNUSED\n", src_node);
			
 
				+			default:
			
 
				+				assert(0);
			
 
				+				break;
			
 
				+		}
			
 
				+		break;
			
 
				+#ifdef USE_CUDA
			
 
				+	case CUDA_RAM:
			
 
				+		switch (src_kind) {
			
 
				+			case RAM:
			
 
				+				/* RAM -> CUBLAS_RAM */
			
 
				+				/* only the proper CUBLAS thread can initiate this ! */
			
 
				+				STARPU_ASSERT(get_local_memory_node() == dst_node);
			
 
				+				copy_ram_to_cublas(state, src_node, dst_node);
			
 
				+				break;
			
 
				+			case CUDA_RAM:
			
 
				+			case SPU_LS:
			
 
				+				STARPU_ASSERT(0); // TODO 
			
 
				+				break;
			
 
				+			case UNUSED:
			
 
				+			default:
			
 
				+				STARPU_ASSERT(0);
			
 
				+				break;
			
 
				+		}
			
 
				+		break;
			
 
				+#endif
			
 
				+	case SPU_LS:
			
 
				+		STARPU_ASSERT(0); // TODO
			
 
				+		break;
			
 
				+	case UNUSED:
			
 
				+	default:
			
 
				+		assert(0);
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
--- a/src/datawizard/interfaces/block_interface.h
+++ b/src/datawizard/interfaces/block_interface.h
@@ -0,0 +1,24 @@
 
				+/*
			
 
				+ * StarPU
			
 
				+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __BLOCK_INTERFACE_H__
			
 
				+#define __BLOCK_INTERFACE_H__
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#define BLOCK_INTERFACE   0x118512
			
 
				+
			
 
				+#endif // __BLOCK_INTERFACE_H__