Browse Source

Add a new "variable" data interface which stores a single variable. This
interface was written by Sebastien Fremal from the Faculty of Engineering at
University of Mons.

Cédric Augonnet 15 years ago
parent
commit
d773a1aeb6
3 changed files with 363 additions and 1 deletions
  1. 17 1
      include/starpu-data-interfaces.h
  2. 1 0
      src/Makefile.am
  3. 345 0
      src/datawizard/interfaces/variable_interface.c

+ 17 - 1
include/starpu-data-interfaces.h

@@ -101,6 +101,21 @@ uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle);
 #define STARPU_GET_VECTOR_NX(interface)	(((starpu_vector_interface_t *)(interface))->nx)
 #define STARPU_GET_VECTOR_ELEMSIZE(interface)	(((starpu_vector_interface_t *)(interface))->elemsize)
 
+/* variable interface for a single data (not a vector, a matrix, a list, ...) */
+typedef struct starpu_variable_interface_s {
+	uintptr_t ptr;
+	size_t elemsize;
+} starpu_variable_interface_t;
+
+void starpu_register_variable_data(starpu_data_handle *handle, uint32_t home_node,
+                        uintptr_t ptr, size_t elemsize);
+size_t starpu_get_variable_elemsize(starpu_data_handle handle);
+uintptr_t starpu_get_variable_local_ptr(starpu_data_handle handle);
+
+/* helper methods */
+#define STARPU_GET_VARIABLE_PTR(interface)	(((starpu_variable_interface_t *)(interface))->ptr)
+#define STARPU_GET_VARIABLE_ELEMSIZE(interface)	(((starpu_variable_interface_t *)(interface))->elemsize)
+
 /* CSR interface for sparse matrices (compressed sparse row representation) */
 typedef struct starpu_csr_interface_s {
 	uint32_t nnz; /* number of non-zero entries */
@@ -189,7 +204,8 @@ size_t starpu_get_bcsr_elemsize(starpu_data_handle);
 #define STARPU_CSR_INTERFACE_ID		3
 #define STARPU_CSC_INTERFACE_ID		4
 #define STARPU_BCSCR_INTERFACE_ID	5
-#define STARPU_NINTERFACES_ID		6 /* number of data interfaces */
+#define STARPU_VARIABLE_INTERFACE_ID	6
+#define STARPU_NINTERFACES_ID		7 /* number of data interfaces */
 
 unsigned starpu_get_handle_interface_id(starpu_data_handle);
 

+ 1 - 0
src/Makefile.am

@@ -136,6 +136,7 @@ libstarpu_la_SOURCES = 						\
 	datawizard/interfaces/bcsr_filters.c			\
 	datawizard/interfaces/csr_filters.c			\
 	datawizard/interfaces/vector_filters.c			\
+	datawizard/interfaces/variable_interface.c		\
 	util/malloc.c						\
 	util/execute_on_all.c					\
 	util/starpu_create_sync_task.c				\

+ 345 - 0
src/datawizard/interfaces/variable_interface.c

@@ -0,0 +1,345 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ * Copyright (C) Sebastien Fremal 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <common/config.h>
+#include <datawizard/coherency.h>
+#include <datawizard/copy-driver.h>
+#include <datawizard/hierarchy.h>
+
+#include <common/hash.h>
+
+#ifdef STARPU_USE_CUDA
+#include <cuda.h>
+#endif
+
+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+#ifdef STARPU_USE_CUDA
+static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
+static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
+static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
+#endif
+
+static const struct starpu_copy_data_methods_s variable_copy_data_methods_s = {
+	.ram_to_ram = dummy_copy_ram_to_ram,
+	.ram_to_spu = NULL,
+#ifdef STARPU_USE_CUDA
+	.ram_to_cuda = copy_ram_to_cuda,
+	.cuda_to_ram = copy_cuda_to_ram,
+	.ram_to_cuda_async = copy_ram_to_cuda_async,
+	.cuda_to_ram_async = copy_cuda_to_ram_async,
+#endif
+	.cuda_to_cuda = NULL,
+	.cuda_to_spu = NULL,
+	.spu_to_ram = NULL,
+	.spu_to_cuda = NULL,
+	.spu_to_spu = NULL
+};
+
+static void register_variable_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
+static size_t allocate_variable_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
+static void liberate_variable_buffer_on_node(void *interface, uint32_t node);
+static size_t variable_interface_get_size(starpu_data_handle handle);
+static uint32_t footprint_variable_interface_crc32(starpu_data_handle handle);
+static void display_variable_interface(starpu_data_handle handle, FILE *f);
+#ifdef STARPU_USE_GORDON
+static int convert_variable_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+#endif
+
+static struct starpu_data_interface_ops_t interface_variable_ops = {
+	.register_data_handle = register_variable_handle,
+	.allocate_data_on_node = allocate_variable_buffer_on_node,
+	.liberate_data_on_node = liberate_variable_buffer_on_node,
+	.copy_methods = &variable_copy_data_methods_s,
+	.get_size = variable_interface_get_size,
+	.footprint = footprint_variable_interface_crc32,
+#ifdef STARPU_USE_GORDON
+	.convert_to_gordon = convert_variable_to_gordon,
+#endif
+	.interfaceid = STARPU_VARIABLE_INTERFACE_ID,
+	.interface_size = sizeof(starpu_variable_interface_t), 
+	.display = display_variable_interface
+};
+
+static void register_variable_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
+{
+	unsigned node;
+	for (node = 0; node < STARPU_MAXNODES; node++)
+	{
+		starpu_variable_interface_t *local_interface = 
+			starpu_data_get_interface_on_node(handle, node);
+
+		if (node == home_node) {
+			local_interface->ptr = STARPU_GET_VARIABLE_PTR(interface);
+		}
+		else {
+			local_interface->ptr = 0;
+		}
+
+		local_interface->elemsize = STARPU_GET_VARIABLE_ELEMSIZE(interface);
+	}
+}
+
+#ifdef STARPU_USE_GORDON
+int convert_variable_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+{
+	*ptr = STARPU_GET_VARIABLE_PTR(interface);
+	(*ss).size = STARPU_GET_VARIABLE_ELEMSIZE(interface);
+
+	return 0;
+}
+#endif
+
+/* declare a new data with the variable interface */
+void starpu_register_variable_data(starpu_data_handle *handleptr, uint32_t home_node,
+                        uintptr_t ptr, size_t elemsize)
+{
+	starpu_variable_interface_t variable = {
+		.ptr = ptr,
+		.elemsize = elemsize
+	};	
+
+	_starpu_register_data_handle(handleptr, home_node, &variable, &interface_variable_ops); 
+}
+
+
+static uint32_t footprint_variable_interface_crc32(starpu_data_handle handle)
+{
+	return _starpu_crc32_be(starpu_get_variable_elemsize(handle), 0);
+}
+
+static void display_variable_interface(starpu_data_handle handle, FILE *f)
+{
+	starpu_variable_interface_t *interface =
+		starpu_data_get_interface_on_node(handle, 0);
+
+	fprintf(f, "%ld\t", (long)interface->elemsize);
+}
+
+static size_t variable_interface_get_size(starpu_data_handle handle)
+{
+	starpu_variable_interface_t *interface =
+		starpu_data_get_interface_on_node(handle, 0);
+
+	return interface->elemsize;
+}
+
+uintptr_t starpu_get_variable_local_ptr(starpu_data_handle handle)
+{
+	unsigned node;
+	node = _starpu_get_local_memory_node();
+
+	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
+
+	return STARPU_GET_VARIABLE_PTR(starpu_data_get_interface_on_node(handle, node));
+}
+
+size_t starpu_get_variable_elemsize(starpu_data_handle handle)
+{
+	return STARPU_GET_VARIABLE_ELEMSIZE(starpu_data_get_interface_on_node(handle, 0));
+}
+
+/* memory allocation/deallocation primitives for the variable interface */
+
+/* returns the size of the allocated area */
+static size_t allocate_variable_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
+{
+	starpu_variable_interface_t *interface =
+		starpu_data_get_interface_on_node(handle, dst_node);
+
+	unsigned fail = 0;
+	uintptr_t addr = 0;
+	size_t allocated_memory;
+
+	size_t elemsize = interface->elemsize;
+
+	starpu_node_kind kind = _starpu_get_node_kind(dst_node);
+
+#ifdef STARPU_USE_CUDA
+	cudaError_t status;
+#endif
+
+	switch(kind) {
+		case STARPU_RAM:
+			addr = (uintptr_t)malloc(elemsize);
+			if (!addr)
+				fail = 1;
+			break;
+#ifdef STARPU_USE_CUDA
+		case STARPU_CUDA_RAM:
+			status = cudaMalloc((void **)&addr, elemsize);
+			if (!addr || (status != cudaSuccess))
+			{
+				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
+					STARPU_CUDA_REPORT_ERROR(status);
+
+				fail = 1;
+			}
+			break;
+#endif
+		default:
+			assert(0);
+	}
+
+	if (fail)
+		return 0;
+
+	/* allocation succeeded */
+	allocated_memory = elemsize;
+
+	/* update the data properly in consequence */
+	interface->ptr = addr;
+	
+	return allocated_memory;
+}
+
+static void liberate_variable_buffer_on_node(void *interface, uint32_t node)
+{
+	starpu_node_kind kind = _starpu_get_node_kind(node);
+	switch(kind) {
+		case STARPU_RAM:
+			free((void*)STARPU_GET_VARIABLE_PTR(interface));
+			break;
+#ifdef STARPU_USE_CUDA
+		case STARPU_CUDA_RAM:
+			cudaFree((void*)STARPU_GET_VARIABLE_PTR(interface));
+			break;
+#endif
+		default:
+			assert(0);
+	}
+}
+
+#ifdef STARPU_USE_CUDA
+static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+{
+	starpu_variable_interface_t *src_variable;
+	starpu_variable_interface_t *dst_variable;
+
+	src_variable = starpu_data_get_interface_on_node(handle, src_node);
+	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+
+	cudaError_t cures;
+	cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost);
+	cudaThreadSynchronize();
+
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
+
+	return 0;
+}
+
+static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+{
+	starpu_variable_interface_t *src_variable;
+	starpu_variable_interface_t *dst_variable;
+
+	src_variable = starpu_data_get_interface_on_node(handle, src_node);
+	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+
+	cudaError_t cures;
+	cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyHostToDevice);
+	cudaThreadSynchronize();
+
+	if (STARPU_UNLIKELY(cures))
+		STARPU_CUDA_REPORT_ERROR(cures);
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
+
+	return 0;
+}
+
+static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+{
+	starpu_variable_interface_t *src_variable;
+	starpu_variable_interface_t *dst_variable;
+
+	src_variable = starpu_data_get_interface_on_node(handle, src_node);
+	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+
+	cudaError_t cures;
+	cures = cudaMemcpyAsync((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost, *stream);
+	if (cures)
+	{
+		/* do it in a synchronous fashion */
+		cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyDeviceToHost);
+		cudaThreadSynchronize();
+
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
+
+		return 0;
+	}
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
+
+	return EAGAIN;
+}
+
+static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
+{
+	starpu_variable_interface_t *src_variable;
+	starpu_variable_interface_t *dst_variable;
+
+	src_variable = starpu_data_get_interface_on_node(handle, src_node);
+	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+
+	cudaError_t cures;
+	
+	cures = cudaMemcpyAsync((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyHostToDevice, *stream);
+	if (cures)
+	{
+		/* do it in a synchronous fashion */
+		cures = cudaMemcpy((char *)dst_variable->ptr, (char *)src_variable->ptr, src_variable->elemsize, cudaMemcpyHostToDevice);
+		cudaThreadSynchronize();
+
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
+
+		return 0;
+	}
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, src_variable->elemsize);
+
+	return EAGAIN;
+}
+
+
+#endif // STARPU_USE_CUDA
+
+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
+{
+	starpu_variable_interface_t *src_variable;
+	starpu_variable_interface_t *dst_variable;
+
+	src_variable = starpu_data_get_interface_on_node(handle, src_node);
+	dst_variable = starpu_data_get_interface_on_node(handle, dst_node);
+
+	size_t elemsize = dst_variable->elemsize;
+
+	uintptr_t ptr_src = src_variable->ptr;
+	uintptr_t ptr_dst = dst_variable->ptr;
+
+	memcpy((void *)ptr_dst, (void *)ptr_src, elemsize);
+
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, elemsize);
+
+	return 0;
+}