Browse Source

make public functions which are needed by data interfaces implementations

Nathalie Furmento 6 years ago
parent
commit
edc3ee4902

+ 6 - 0
include/starpu_data_interfaces.h

@@ -683,6 +683,12 @@ void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_no
 void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start);
 
 /**
+   Record in offline execution traces the copy of \p size bytes from
+   node \p src_node to node \p dst_node
+ */
+void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size);
+
+/**
    Allocate \p size bytes on node \p dst_node with the given allocation \p flags. This returns 0 if
    allocation failed, the allocation method should then return <c>-ENOMEM</c> as
    allocated size. Deallocation must be done with starpu_free_on_node_flags().

+ 9 - 1
include/starpu_scc.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015,2017,2019                                CNRS
+ * Copyright (C) 2015,2017,2019                           CNRS
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2012                                     Inria
  *
@@ -47,6 +47,14 @@ int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *fun
 */
 starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
 
+/**
+   Assign the offset to \p offset between \p ptr and the start of the
+   shared memory.
+   Assign \p dev_handle with the start of the shared memory is useful
+   for data partionning.
+ */
+void starpu_scc_get_offset_in_shared_memory(void *ptr, void **dev_handle, size_t *offset);
+
 #endif /* STARPU_USE_SCC */
 
 /** @} */

+ 7 - 0
include/starpu_worker.h

@@ -339,6 +339,13 @@ hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
 hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
 #endif
 
+int starpu_memory_node_get_devid(unsigned node);
+
+/**
+   Return the memory node associated to the current worker
+*/
+unsigned starpu_worker_get_local_memory_node(void);
+
 /**
    Return the identifier of the memory node associated to the worker
    identified by \p workerid.

+ 4 - 4
src/core/simgrid.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2016,2017                                Inria
- * Copyright (C) 2012,2013,2015-2018                      CNRS
+ * Copyright (C) 2012,2013,2015-2019                      CNRS
  * Copyright (C) 2012-2019                                Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  *
@@ -947,7 +947,7 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 	transfer->task = task;
 	transfer->src_node = src_node;
 	transfer->dst_node = dst_node;
-	transfer->run_node = _starpu_memory_node_get_local_key();
+	transfer->run_node = starpu_worker_get_local_memory_node();
 
 	if (req)
 		event = &req->async_channel.event;
@@ -974,7 +974,7 @@ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node,
 	if (req)
 	{
 		starpu_interface_end_driver_copy_async(src_node, dst_node, start);
-		_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+		starpu_interface_data_copy(src_node, dst_node, size);
 		return -EAGAIN;
 	}
 	else
@@ -1029,7 +1029,7 @@ _starpu_simgrid_get_memnode_host(unsigned node)
 			STARPU_ABORT();
 			break;
 	}
-	snprintf(name, sizeof(name), fmt, _starpu_memory_node_get_devid(node));
+	snprintf(name, sizeof(name), fmt, starpu_memory_node_get_devid(node));
 
 	return _starpu_simgrid_get_host_by_name(name);
 }

+ 12 - 7
src/datawizard/copy_driver.c

@@ -184,12 +184,12 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		if ((src_kind == STARPU_CUDA_RAM) && (dst_kind == STARPU_CUDA_RAM))
 		{
 			/* GPU-GPU transfer, issue it from the destination */
-			devid = _starpu_memory_node_get_devid(dst_node);
+			devid = starpu_memory_node_get_devid(dst_node);
 		}
 		else
 		{
 			unsigned node = (dst_kind == STARPU_CUDA_RAM)?dst_node:src_node;
-			devid = _starpu_memory_node_get_devid(node);
+			devid = starpu_memory_node_get_devid(node);
 		}
 		starpu_cuda_set_device(devid);
 	}
@@ -208,7 +208,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
 		/* only the proper CUBLAS thread can initiate this directly ! */
 #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
-		STARPU_ASSERT(_starpu_memory_node_get_local_key() == src_node);
+		STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node);
 #endif
 		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() ||
 				!(copy_methods->cuda_to_ram_async || copy_methods->any_to_any))
@@ -243,7 +243,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		/* STARPU_CPU_RAM -> CUBLAS_RAM */
 		/* only the proper CUBLAS thread can initiate this ! */
 #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
-		STARPU_ASSERT(_starpu_memory_node_get_local_key() == dst_node);
+		STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node);
 #endif
 		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() ||
 				!(copy_methods->ram_to_cuda_async || copy_methods->any_to_any))
@@ -311,7 +311,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 #ifdef STARPU_USE_OPENCL
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
 		/* OpenCL -> RAM */
-		STARPU_ASSERT(_starpu_memory_node_get_local_key() == src_node);
+		STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node);
 		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() ||
 				!(copy_methods->opencl_to_ram_async || copy_methods->any_to_any))
 		{
@@ -336,7 +336,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		break;
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_OPENCL_RAM):
 		/* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
-		STARPU_ASSERT(_starpu_memory_node_get_local_key() == dst_node);
+		STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node);
 		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() ||
 				!(copy_methods->ram_to_opencl_async || copy_methods->any_to_any))
 		{
@@ -361,7 +361,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		break;
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_OPENCL_RAM):
 		/* STARPU_OPENCL_RAM -> STARPU_OPENCL_RAM */
-		STARPU_ASSERT(_starpu_memory_node_get_local_key() == dst_node || _starpu_memory_node_get_local_key() == src_node);
+		STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node || starpu_worker_get_local_memory_node() == src_node);
 		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() ||
 				!(copy_methods->opencl_to_opencl_async || copy_methods->any_to_any))
 		{
@@ -687,6 +687,11 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d
 	return 0;
 }
 
+void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size)
+{
+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+}
+
 void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start)
 {
 	*start = starpu_timing_now();

+ 2 - 2
src/datawizard/data_request.c

@@ -3,7 +3,7 @@
  * Copyright (C) 2008-2017                                Université de Bordeaux
  * Copyright (C) 2011,2016,2017                           Inria
  * Copyright (C) 2018                                     Federal University of Rio Grande do Sul (UFRGS)
- * Copyright (C) 2010-2018                                CNRS
+ * Copyright (C) 2010-2019                                CNRS
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -204,7 +204,7 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 	int completed;
 
 #ifdef STARPU_SIMGRID
-	unsigned local_node = _starpu_memory_node_get_local_key();
+	unsigned local_node = starpu_worker_get_local_memory_node();
 
 	starpu_pthread_wait_t wait;
 

+ 2 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -259,7 +259,7 @@ size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle)
 uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -408,7 +408,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 	if (starpu_interface_copy((uintptr_t)src_bcsr->rowptr, 0, src_node, (uintptr_t)dst_bcsr->rowptr, 0, dst_node, (nrow+1)*sizeof(uint32_t), async_data))
 		ret = -EAGAIN;
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize*r*c + (nnz+nrow+1)*sizeof(uint32_t));
+	starpu_interface_data_copy(src_node, dst_node, nnz*elemsize*r*c + (nnz+nrow+1)*sizeof(uint32_t));
 
 	return ret;
 }

+ 12 - 13
src/datawizard/interfaces/block_interface.c

@@ -157,8 +157,8 @@ static void register_block_handle(starpu_data_handle_t handle, unsigned home_nod
 
 /* declare a new data with the BLAS interface */
 void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node,
-			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
-			uint32_t ny, uint32_t nz, size_t elemsize)
+				uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
+				uint32_t ny, uint32_t nz, size_t elemsize)
 {
 	struct starpu_block_interface block_interface =
 	{
@@ -182,15 +182,14 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node,
 #endif
 
 #ifdef STARPU_USE_SCC
-	_starpu_scc_set_offset_in_shared_memory((void*)block_interface.ptr,
-			(void**)&(block_interface.dev_handle), &(block_interface.offset));
+	starpu_scc_get_offset_in_shared_memory((void*)block_interface.ptr, (void**)&(block_interface.dev_handle), &(block_interface.offset));
 #endif
 
 	starpu_data_register(handleptr, home_node, &block_interface, &starpu_interface_block_ops);
 }
 
 void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node,
-			uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz)
+			       uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz)
 {
 	struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(handle, node);
 	starpu_data_ptr_register(handle, node);
@@ -352,7 +351,7 @@ uint32_t starpu_block_get_nz(starpu_data_handle_t handle)
 uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -369,7 +368,7 @@ uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle)
 uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -386,7 +385,7 @@ uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle)
 uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -513,7 +512,7 @@ static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIB
 		}
 	}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_block->nx*src_block->ny*src_block->elemsize*src_block->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, src_block->nx*src_block->ny*src_block->elemsize*src_block->elemsize);
 
 	return 0;
 }
@@ -597,7 +596,7 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
 	}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
 
 	return ret;
 
@@ -620,7 +619,7 @@ no_async_default:
 			STARPU_CUDA_REPORT_ERROR(cures);
 	}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
 	return 0;
 	}
 }
@@ -701,7 +700,7 @@ static int copy_opencl_common(void *src_interface, unsigned src_node, void *dst_
                 }
         }
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, src_block->nx*src_block->ny*src_block->nz*src_block->elemsize);
 
 	return ret;
 }
@@ -796,7 +795,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 		}
 	}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
+	starpu_interface_data_copy(src_node, dst_node, nx*ny*nz*elemsize);
 
 	return ret;
 }

+ 3 - 3
src/datawizard/interfaces/coo_interface.c

@@ -53,9 +53,9 @@ copy_any_to_any(void *src_interface, unsigned src_node,
 		size, async_data))
 		ret = -EAGAIN;
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node,
-		src_coo->n_values *
-		(2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
+	starpu_interface_data_copy(src_node, dst_node,
+				   src_coo->n_values *
+				   (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
 
 	return ret;
 }

+ 4 - 4
src/datawizard/interfaces/csr_interface.c

@@ -211,7 +211,7 @@ size_t starpu_csr_get_elemsize(starpu_data_handle_t handle)
 uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -228,7 +228,7 @@ uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle)
 uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -245,7 +245,7 @@ uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle)
 uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -350,7 +350,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 	if (starpu_interface_copy((uintptr_t)src_csr->rowptr, 0, src_node, (uintptr_t)dst_csr->rowptr, 0, dst_node, (nrow+1)*sizeof(uint32_t), async_data))
 		ret = -EAGAIN;
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
+	starpu_interface_data_copy(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
 
 	return ret;
 }

+ 3 - 5
src/datawizard/interfaces/data_interface.c

@@ -542,8 +542,7 @@ int starpu_data_pointer_is_inside(starpu_data_handle_t handle, unsigned node, vo
 
 void *starpu_data_get_local_ptr(starpu_data_handle_t handle)
 {
-	return starpu_data_handle_to_pointer(handle,
-					_starpu_memory_node_get_local_key());
+	return starpu_data_handle_to_pointer(handle, starpu_worker_get_local_memory_node());
 }
 
 struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle)
@@ -554,7 +553,6 @@ struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_hand
 /*
  * Stop monitoring a piece of data
  */
-
 void _starpu_data_unregister_ram_pointer(starpu_data_handle_t handle, unsigned node)
 {
 	if (starpu_node_get_kind(node) != STARPU_CPU_RAM)
@@ -1091,14 +1089,14 @@ int starpu_data_interface_get_next_id(void)
 int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count)
 {
 	STARPU_ASSERT_MSG(handle->ops->pack_data, "The datatype interface %s (%d) does not have a pack operation", handle->ops->name, handle->ops->interfaceid);
-	return handle->ops->pack_data(handle, _starpu_memory_node_get_local_key(), ptr, count);
+	return handle->ops->pack_data(handle, starpu_worker_get_local_memory_node(), ptr, count);
 }
 
 int starpu_data_unpack(starpu_data_handle_t handle, void *ptr, size_t count)
 {
 	STARPU_ASSERT_MSG(handle->ops->unpack_data, "The datatype interface %s (%d) does not have an unpack operation", handle->ops->name, handle->ops->interfaceid);
 	int ret;
-	ret = handle->ops->unpack_data(handle, _starpu_memory_node_get_local_key(), ptr, count);
+	ret = handle->ops->unpack_data(handle, starpu_worker_get_local_memory_node(), ptr, count);
 	return ret;
 }
 

+ 14 - 15
src/datawizard/interfaces/matrix_interface.c

@@ -185,8 +185,8 @@ static int matrix_pointer_is_inside(void *data_interface, unsigned node, void *p
 
 /* declare a new data with the matrix interface */
 void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node,
-			uintptr_t ptr, uint32_t ld, uint32_t nx,
-			uint32_t ny, size_t elemsize, size_t allocsize)
+					   uintptr_t ptr, uint32_t ld, uint32_t nx,
+					   uint32_t ny, size_t elemsize, size_t allocsize)
 {
 	struct starpu_matrix_interface matrix_interface =
 	{
@@ -209,22 +209,21 @@ void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int
 #endif
 
 #ifdef STARPU_USE_SCC
-	_starpu_scc_set_offset_in_shared_memory((void*)matrix_interface.ptr,
-			(void**)&(matrix_interface.dev_handle), &(matrix_interface.offset));
+	starpu_scc_get_offset_in_shared_memory((void*)matrix_interface.ptr, (void**)&(matrix_interface.dev_handle), &(matrix_interface.offset));
 #endif
 
 	starpu_data_register(handleptr, home_node, &matrix_interface, &starpu_interface_matrix_ops);
 }
 
 void starpu_matrix_data_register(starpu_data_handle_t *handleptr, int home_node,
-			uintptr_t ptr, uint32_t ld, uint32_t nx,
-			uint32_t ny, size_t elemsize)
+				 uintptr_t ptr, uint32_t ld, uint32_t nx,
+				 uint32_t ny, size_t elemsize)
 {
 	starpu_matrix_data_register_allocsize(handleptr, home_node, ptr, ld, nx, ny, elemsize, nx * ny * elemsize);
 }
 
 void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node,
-			uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld)
+				uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld)
 {
 	struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(handle, node);
 	starpu_data_ptr_register(handle, node);
@@ -378,7 +377,7 @@ uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle)
 uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -395,7 +394,7 @@ uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle)
 uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -502,7 +501,7 @@ static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIB
 		if (ret) STARPU_CUDA_REPORT_ERROR(cures);
 	}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return 0;
 }
@@ -517,8 +516,8 @@ static int copy_cuda_peer(void *src_interface, unsigned src_node STARPU_ATTRIBUT
 	size_t elemsize = src_matrix->elemsize;
 	cudaError_t cures;
 
-	int src_dev = _starpu_memory_node_get_devid(src_node);
-	int dst_dev = _starpu_memory_node_get_devid(dst_node);
+	int src_dev = starpu_memory_node_get_devid(src_node);
+	int dst_dev = starpu_memory_node_get_devid(dst_node);
 
 	struct cudaMemcpy3DPeerParms p;
 	memset(&p, 0, sizeof(p));
@@ -545,7 +544,7 @@ static int copy_cuda_peer(void *src_interface, unsigned src_node STARPU_ATTRIBUT
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return 0;
 #else
@@ -611,7 +610,7 @@ static int copy_opencl_common(void *src_interface, unsigned src_node, void *dst_
 					    src_matrix->nx*src_matrix->ny*src_matrix->elemsize,
 					    event);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return ret;
 }
@@ -684,7 +683,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 		}
 	}
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)nx*ny*elemsize);
+	starpu_interface_data_copy(src_node, dst_node, (size_t)nx*ny*elemsize);
 
 	return ret;
 }

+ 8 - 8
src/datawizard/interfaces/multiformat_interface.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011,2012                                Inria
- * Copyright (C) 2011-2017                                CNRS
+ * Copyright (C) 2011-2017,2019                           CNRS
  * Copyright (C) 2011-2013,2015,2016,2018-2019            Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -559,8 +559,8 @@ static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
 
 	cudaError_t status;
 	int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
-	int src_dev = _starpu_memory_node_get_devid(src_node);
-	int dst_dev = _starpu_memory_node_get_devid(dst_node);
+	int src_dev = starpu_memory_node_get_devid(src_node);
+	int dst_dev = starpu_memory_node_get_devid(dst_node);
 
 	if (stream)
 	{
@@ -586,7 +586,7 @@ static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
 	if (STARPU_UNLIKELY(status != cudaSuccess))
 		STARPU_CUDA_REPORT_ERROR(status);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+	starpu_interface_data_copy(src_node, dst_node, size);
 
 	return 0;
 }
@@ -663,7 +663,7 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+	starpu_interface_data_copy(src_node, dst_node, size);
 	return ret;
 }
 
@@ -702,7 +702,7 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+	starpu_interface_data_copy(src_node, dst_node, size);
 
 
 	return ret;
@@ -754,7 +754,7 @@ static int copy_mic_common_ram_to_mic(void *src_interface, unsigned src_node, vo
 
 	copy_func(src_multiformat->cpu_ptr, src_node, dst_multiformat->cpu_ptr, dst_node, size);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+	starpu_interface_data_copy(src_node, dst_node, size);
 
 	return 0;
 }
@@ -772,7 +772,7 @@ static int copy_mic_common_mic_to_ram(void *src_interface, unsigned src_node, vo
 	size_t size = src_multiformat->nx * src_multiformat->ops->mic_elemsize;
 	copy_func(src_multiformat->mic_ptr, src_node, dst_multiformat->mic_ptr, dst_node, size);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
+	starpu_interface_data_copy(src_node, dst_node, size);
 
 	return 0;
 }

+ 5 - 6
src/datawizard/interfaces/variable_interface.c

@@ -113,7 +113,7 @@ static void register_variable_handle(starpu_data_handle_t handle, unsigned home_
 
 /* declare a new data with the variable interface */
 void starpu_variable_data_register(starpu_data_handle_t *handleptr, int home_node,
-                        uintptr_t ptr, size_t elemsize)
+				   uintptr_t ptr, size_t elemsize)
 {
 	struct starpu_variable_interface variable =
 	{
@@ -132,15 +132,14 @@ void starpu_variable_data_register(starpu_data_handle_t *handleptr, int home_nod
 #endif
 
 #ifdef STARPU_USE_SCC
-	_starpu_scc_set_offset_in_shared_memory((void*)variable.ptr, (void**)&(variable.dev_handle),
-			&(variable.offset));
+	starpu_scc_get_offset_in_shared_memory((void*)variable.ptr, (void**)&(variable.dev_handle), &(variable.offset));
 #endif
 
 	starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops);
 }
 
 void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node,
-			uintptr_t ptr, uintptr_t dev_handle, size_t offset)
+				  uintptr_t ptr, uintptr_t dev_handle, size_t offset)
 {
 	struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(handle, node);
 	starpu_data_ptr_register(handle, node);
@@ -221,7 +220,7 @@ static size_t variable_interface_get_size(starpu_data_handle_t handle)
 uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -270,7 +269,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_int
 
 	ret = starpu_interface_copy(ptr_src, 0, src_node, ptr_dst, 0, dst_node, elemsize, async_data);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, elemsize);
+	starpu_interface_data_copy(src_node, dst_node, elemsize);
 
 	return ret;
 }

+ 5 - 5
src/datawizard/interfaces/vector_interface.c

@@ -134,7 +134,7 @@ static void register_vector_handle(starpu_data_handle_t handle, unsigned home_no
 
 /* declare a new data with the vector interface */
 void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node,
-		uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize)
+					   uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize)
 {
 	struct starpu_vector_interface vector =
 	{
@@ -156,14 +156,14 @@ void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int
 #endif
 
 #ifdef STARPU_USE_SCC
-	_starpu_scc_set_offset_in_shared_memory((void*)vector.ptr, (void**)&(vector.dev_handle), &(vector.offset));
+	starpu_scc_get_offset_in_shared_memory((void*)vector.ptr, (void**)&(vector.dev_handle), &(vector.offset));
 #endif
 
 	starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops);
 }
 
 void starpu_vector_data_register(starpu_data_handle_t *handleptr, int home_node,
-                        uintptr_t ptr, uint32_t nx, size_t elemsize)
+				 uintptr_t ptr, uint32_t nx, size_t elemsize)
 {
 	starpu_vector_data_register_allocsize(handleptr, home_node, ptr, nx, elemsize, nx * elemsize);
 }
@@ -296,7 +296,7 @@ uint32_t starpu_vector_get_nx(starpu_data_handle_t handle)
 uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle)
 {
 	unsigned node;
-	node = _starpu_memory_node_get_local_key();
+	node = starpu_worker_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
 
@@ -377,7 +377,7 @@ static int copy_any_to_any(void *src_interface, unsigned src_node,
 				    dst_vector->dev_handle, dst_vector->offset, dst_node,
 				    src_vector->nx*src_vector->elemsize, async_data);
 
-	_STARPU_TRACE_DATA_COPY(src_node, dst_node, src_vector->nx*src_vector->elemsize);
+	starpu_interface_data_copy(src_node, dst_node, src_vector->nx*src_vector->elemsize);
 	return ret;
 }
 

+ 3 - 3
src/datawizard/malloc.c

@@ -633,7 +633,7 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size, int flags)
 			STARPU_ASSERT(last[dst_node] >= addr);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
 #else
-			unsigned devid = _starpu_memory_node_get_devid(dst_node);
+			unsigned devid = starpu_memory_node_get_devid(dst_node);
 #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
 			starpu_cuda_set_device(devid);
 #else
@@ -680,7 +680,7 @@ _starpu_malloc_on_node(unsigned dst_node, size_t size, int flags)
                                 int ret;
 				cl_mem ptr;
 
-				ret = starpu_opencl_allocate_memory(_starpu_memory_node_get_devid(dst_node), &ptr, size, CL_MEM_READ_WRITE);
+				ret = starpu_opencl_allocate_memory(starpu_memory_node_get_devid(dst_node), &ptr, size, CL_MEM_READ_WRITE);
 				if (ret)
 				{
 					addr = 0;
@@ -760,7 +760,7 @@ _starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int f
 			_starpu_simgrid_sync_gpus();
 #else
 			cudaError_t err;
-			unsigned devid = _starpu_memory_node_get_devid(dst_node);
+			unsigned devid = starpu_memory_node_get_devid(dst_node);
 #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER)
 			starpu_cuda_set_device(devid);
 #else

+ 2 - 2
src/datawizard/memalloc.c

@@ -383,7 +383,7 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 			 * proper CUDA device in case it is needed. This avoids
 			 * having to set it again in the free method of each
 			 * interface. */
-			starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
+			starpu_cuda_set_device(starpu_memory_node_get_devid(node));
 		}
 #endif
 
@@ -1424,7 +1424,7 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st
 			 * proper CUDA device in case it is needed. This avoids
 			 * having to set it again in the malloc method of each
 			 * interface. */
-			starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
+			starpu_cuda_set_device(starpu_memory_node_get_devid(dst_node));
 		}
 #endif
 

+ 14 - 1
src/datawizard/memory_nodes.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2017,2019                           Université de Bordeaux
  * Copyright (C) 2011-2013,2016,2017                      Inria
- * Copyright (C) 2010-2015,2017,2018                      CNRS
+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -196,3 +196,16 @@ void _starpu_worker_drives_memory_node(struct _starpu_worker *worker, unsigned m
 	}
 }
 
+unsigned starpu_worker_get_local_memory_node(void)
+{
+	struct _starpu_worker *worker = _starpu_get_local_worker_key();
+	if (!worker)
+		return STARPU_MAIN_RAM;
+	return worker->memory_node;
+}
+
+int starpu_memory_node_get_devid(unsigned node)
+{
+	return _starpu_descr.devid[node];
+}
+

+ 1 - 14
src/datawizard/memory_nodes.h

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2009-2012,2014-2017,2019                 Université de Bordeaux
  * Copyright (C) 2012,2016,2017                           Inria
- * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
+ * Copyright (C) 2010,2011,2013,2015,2017,2019            CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -76,14 +76,6 @@ extern struct _starpu_memory_node_descr _starpu_descr;
 void _starpu_memory_nodes_init(void);
 void _starpu_memory_nodes_deinit(void);
 
-static inline unsigned _starpu_memory_node_get_local_key(void)
-{
-	struct _starpu_worker *worker = _starpu_get_local_worker_key();
-	if (!worker)
-		return STARPU_MAIN_RAM;
-	return worker->memory_node;
-}
-
 static inline void _starpu_memory_node_add_nworkers(unsigned node)
 {
 	_starpu_descr.nworkers[node]++;
@@ -112,11 +104,6 @@ unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid);
 //void _starpu_memory_node_attach_queue(struct starpu_jobq_s *q, unsigned nodeid);
 void _starpu_memory_node_register_condition(struct _starpu_worker *worker, starpu_pthread_cond_t *cond, unsigned nodeid);
 
-static inline int _starpu_memory_node_get_devid(unsigned node)
-{
-	return _starpu_descr.devid[node];
-}
-
 static inline struct _starpu_memory_node_descr *_starpu_memory_node_get_description(void)
 {
 	return &_starpu_descr;

+ 6 - 6
src/drivers/cuda/driver_cuda.c

@@ -181,7 +181,7 @@ cudaStream_t starpu_cuda_get_local_in_transfer_stream()
 
 cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node)
 {
-	int dst_devid = _starpu_memory_node_get_devid(dst_node);
+	int dst_devid = starpu_memory_node_get_devid(dst_node);
 	cudaStream_t stream;
 
 	stream = in_transfer_streams[dst_devid];
@@ -202,7 +202,7 @@ cudaStream_t starpu_cuda_get_local_out_transfer_stream()
 
 cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node)
 {
-	int src_devid = _starpu_memory_node_get_devid(src_node);
+	int src_devid = starpu_memory_node_get_devid(src_node);
 	cudaStream_t stream;
 
 	stream = out_transfer_streams[src_devid];
@@ -212,8 +212,8 @@ cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node)
 
 cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node)
 {
-	int src_devid = _starpu_memory_node_get_devid(src_node);
-	int dst_devid = _starpu_memory_node_get_devid(dst_node);
+	int src_devid = starpu_memory_node_get_devid(src_node);
+	int dst_devid = starpu_memory_node_get_devid(dst_node);
 	cudaStream_t stream;
 
 	stream = in_peer_transfer_streams[src_devid][dst_devid];
@@ -1131,8 +1131,8 @@ starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node,
 	{
 #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
 		peer_copy = 1;
-		src_dev = _starpu_memory_node_get_devid(src_node);
-		dst_dev = _starpu_memory_node_get_devid(dst_node);
+		src_dev = starpu_memory_node_get_devid(src_node);
+		dst_dev = starpu_memory_node_get_devid(dst_node);
 #else
 		STARPU_ABORT();
 #endif

+ 2 - 2
src/drivers/mic/driver_mic_source.c

@@ -90,7 +90,7 @@ struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
 
 struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node)
 {
-	int devid = _starpu_memory_node_get_devid(memory_node);
+	int devid = starpu_memory_node_get_devid(memory_node);
 	STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMICDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
 
 	return _starpu_mic_nodes[devid];
@@ -381,7 +381,7 @@ int _starpu_mic_allocate_memory(void **addr, size_t size, unsigned memory_node)
 	 * transfert with scif is not possible when the MIC
 	 * doesn't have enought free memory.
 	 * In this cas we can't tell any things to the host. */
-	//int devid = _starpu_memory_node_get_devid(memory_node);
+	//int devid = starpu_memory_node_get_devid(memory_node);
 	//if (_starpu_mic_get_free_mem_size(devid) < size * 1.25)
 	//	return 1;
 

+ 1 - 1
src/drivers/mpi/driver_mpi_source.c

@@ -74,7 +74,7 @@ void _starpu_mpi_source_deinit(struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNU
 
 struct _starpu_mp_node *_starpu_mpi_src_get_mp_node_from_memory_node(int memory_node)
 {
-        int devid = _starpu_memory_node_get_devid(memory_node);
+        int devid = starpu_memory_node_get_devid(memory_node);
         STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMPIDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
 
         return _starpu_mpi_ms_nodes[devid];

+ 3 - 7
src/drivers/scc/driver_scc_source.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012,2016,2017                           Inria
- * Copyright (C) 2015-2017                                CNRS
+ * Copyright (C) 2015-2017,2019                           CNRS
  * Copyright (C) 2013-2016                                Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  *
@@ -44,7 +44,7 @@ starpu_pthread_mutex_t htbl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 
 static struct _starpu_mp_node *_starpu_scc_src_memory_node_to_mp_node(unsigned memory_node)
 {
-	int devid = _starpu_memory_node_get_devid(memory_node);
+	int devid = starpu_memory_node_get_devid(memory_node);
 
 	STARPU_ASSERT(devid < STARPU_MAXSCCDEVS);
 	return scc_mp_nodes[devid];
@@ -231,11 +231,7 @@ void _starpu_scc_free_shared_memory(void *addr)
 	RCCE_shfree(addr);
 }
 
-/* Assigns the offset to "offset" between "ptr" and the start of the shared memory.
- * Affect "dev_handle" with the start of the shared memory is useful for data
- * partionning.
- */
-void _starpu_scc_set_offset_in_shared_memory(void *ptr, void **dev_handle, size_t *offset)
+void starpu_scc_get_offset_in_shared_memory(void *ptr, void **dev_handle, size_t *offset)
 {
 	/* We're on SCC... */
 	if (_starpu_can_submit_scc_task())

+ 1 - 3
src/drivers/scc/driver_scc_source.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2015,2017                                CNRS
+ * Copyright (C) 2015,2017,2019                           CNRS
  * Copyright (C) 2013                                     Université de Bordeaux
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2012                                     Inria
@@ -46,8 +46,6 @@ void _starpu_scc_free_memory(void *addr, unsigned memory_node);
 int _starpu_scc_allocate_shared_memory(void **addr, size_t size);
 void _starpu_scc_free_shared_memory(void *addr);
 
-void _starpu_scc_set_offset_in_shared_memory(void *ptr, void **dev_handle, size_t *offset);
-
 int _starpu_scc_copy_src_to_sink(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size);
 int _starpu_scc_copy_sink_to_src(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size);
 int _starpu_scc_copy_sink_to_sink(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size);