15 lat temu · 2df0b5bd6c
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,7 +15,7 @@ The asynchronous heterogeneous multi-accelerator release
 
				     - All data transfers use data requests now
			
 
				     - Implement asynchronous data transfers
			
 
				     - Implement prefetch mechanism
			
 
				-    - Chain data requests to support GPU->RAM->GPU transfers 
			
 
				+    - Chain data requests to support GPU->STARPU_RAM->GPU transfers 
			
 
				   * Make it possible to bypass the scheduler and to assign a task to a specific
			
 
				     worker
			
 
				   * Support restartable tasks to reinstanciate dependencies task graphs
			
--- a/examples/audio/starpu-audio-processing.c
+++ b/examples/audio/starpu-audio-processing.c
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
 
				 	if (outputfilename)
			
 
				 		fprintf(stderr, "Writing output data\n");
			
 
				 
			
 
				-	/* make sure that the output is in RAM before quitting StarPU */
			
 
				+	/* make sure that the output is in STARPU_RAM before quitting StarPU */
			
 
				 	starpu_unpartition_data(A_handle, 0);
			
 
				 	starpu_delete_data(A_handle);
			
 
				 
			
--- a/examples/basic-examples/mult.c
+++ b/examples/basic-examples/mult.c
@@ -103,7 +103,7 @@ static void callback_func(void *arg)
 
				 
			
 
				 /*
			
 
				  * The codelet is passed 3 matrices, the "descr" union-type field gives a
			
 
				- * description of the layout of those 3 matrices in the local memory (ie. RAM
			
 
				+ * description of the layout of those 3 matrices in the local memory (ie. STARPU_RAM
			
 
				  * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have
			
 
				  * registered data with the "blas" data interface, we manipulate the .blas
			
 
				  * field of the descr[x] elements which are union types.
			
@@ -392,7 +392,7 @@ int main(__attribute__ ((unused)) int argc,
 
				  	 * it's not possible to manipulate a subset of C using get_sub_data until
			
 
				 	 * starpu_map_filters is called again on C_handle.
			
 
				 	 * The second argument is the memory node where the different subsets
			
 
				-	 * should be reassembled, 0 = main memory (RAM) */
			
 
				+	 * should be reassembled, 0 = main memory (STARPU_RAM) */
			
 
				 	starpu_unpartition_data(C_handle, 0);
			
 
				 
			
 
				 	/* stop monitoring matrix C : after this, it is not possible to pass C 
			
--- a/examples/basic-examples/vector-scal.c
+++ b/examples/basic-examples/vector-scal.c
@@ -85,7 +85,7 @@ int main(int argc, char **argv)
 
				 	 *  - the second argument is the memory node where the data (ie. "tab")
			
 
				 	 *    resides initially: 0 stands for an address in main memory, as
			
 
				 	 *    opposed to an adress on a GPU for instance.
			
 
				-	 *  - the third argument is the adress of the vector in RAM
			
 
				+	 *  - the third argument is the adress of the vector in STARPU_RAM
			
 
				 	 *  - the fourth argument is the number of elements in the vector
			
 
				 	 *  - the fifth argument is the size of each element.
			
 
				 	 */
			
--- a/examples/incrementer/incrementer.c
+++ b/examples/incrementer/incrementer.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	starpu_wait_all_tasks();
			
 
				 
			
 
				-	/* update the array in RAM */
			
 
				+	/* update the array in STARPU_RAM */
			
 
				 	starpu_sync_data_with_mem(float_array_handle, STARPU_R);
			
 
				 	
			
 
				 	gettimeofday(&end, NULL);
			
--- a/src/core/dependencies/data-concurrency.c
+++ b/src/core/dependencies/data-concurrency.c
@@ -118,7 +118,7 @@ static unsigned attempt_to_submit_data_request_from_job(starpu_job_t j, unsigned
 
				 	starpu_access_mode mode = j->task->buffers[buffer_index].mode;
			
 
				 
			
 
				 	while (starpu_spin_trylock(&handle->header_lock))
			
 
				-		_starpu_datawizard_progress(get_local_memory_node(), 0);
			
 
				+		_starpu_datawizard_progress(starpu_get_local_memory_node(), 0);
			
 
				 
			
 
				 	if (handle->refcnt == 0)
			
 
				 	{
			
--- a/src/core/policies/sched_policy.c
+++ b/src/core/policies/sched_policy.c
@@ -256,7 +256,7 @@ void wait_on_sched_event(void)
 
				 
			
 
				 	pthread_mutex_lock(&q->activity_mutex);
			
 
				 
			
 
				-	starpu_handle_all_pending_node_data_requests(get_local_memory_node());
			
 
				+	starpu_handle_all_pending_node_data_requests(starpu_get_local_memory_node());
			
 
				 
			
 
				 	if (_starpu_machine_is_running())
			
 
				 	{
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -464,9 +464,9 @@ static void _starpu_init_workers_binding(struct machine_config_s *config)
 
				 	/* a single cpu is dedicated for the accelerators */
			
 
				 	int accelerator_bindid = -1;
			
 
				 
			
 
				-	/* note that even if the CPU cpu are not used, we always have a RAM node */
			
 
				+	/* note that even if the CPU cpu are not used, we always have a STARPU_RAM node */
			
 
				 	/* TODO : support NUMA  ;) */
			
 
				-	ram_memory_node = _starpu_register_memory_node(RAM);
			
 
				+	ram_memory_node = _starpu_register_memory_node(STARPU_RAM);
			
 
				 
			
 
				 	unsigned worker;
			
 
				 	for (worker = 0; worker < config->nworkers; worker++)
			
@@ -501,7 +501,7 @@ static void _starpu_init_workers_binding(struct machine_config_s *config)
 
				 					npreferred = config->nhwcpus;
			
 
				 				}
			
 
				 				is_a_set_of_accelerators = 0;
			
 
				-				memory_node = _starpu_register_memory_node(CUDA_RAM);
			
 
				+				memory_node = _starpu_register_memory_node(STARPU_CUDA_RAM);
			
 
				 				break;
			
 
				 #endif
			
 
				 			default:
			
@@ -534,7 +534,7 @@ int starpu_build_topology(struct machine_config_s *config)
 
				 		return ret;
			
 
				 
			
 
				 	/* for the data management library */
			
 
				-	init_memory_nodes();
			
 
				+	starpu_init_memory_nodes();
			
 
				 
			
 
				 	_starpu_init_workers_binding(config);
			
 
				 
			
@@ -544,7 +544,7 @@ int starpu_build_topology(struct machine_config_s *config)
 
				 void starpu_destroy_topology(struct machine_config_s *config __attribute__ ((unused)))
			
 
				 {
			
 
				 	/* cleanup StarPU internal data structures */
			
 
				-	deinit_memory_nodes();
			
 
				+	starpu_deinit_memory_nodes();
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	hwloc_topology_destroy(config->hwtopology);
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -89,7 +89,7 @@ static void _starpu_init_worker_queue(struct worker_s *workerarg)
 
				 			STARPU_ABORT();
			
 
				 	}
			
 
				 		
			
 
				-	memory_node_attach_queue(jobq, workerarg->memory_node);
			
 
				+	starpu_memory_node_attach_queue(jobq, workerarg->memory_node);
			
 
				 }
			
 
				 
			
 
				 static void _starpu_init_workers(struct machine_config_s *config)
			
@@ -335,7 +335,7 @@ static void _starpu_operate_on_all_queues_attached_to_node(unsigned nodeid, queu
 
				 	unsigned q_id;
			
 
				 	struct jobq_s *q;
			
 
				 
			
 
				-	mem_node_descr * const descr = get_memory_node_description();
			
 
				+	starpu_mem_node_descr * const descr = starpu_get_memory_node_description();
			
 
				 
			
 
				 	pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
			
 
				 
			
@@ -380,7 +380,7 @@ static void _starpu_operate_on_all_queues(queue_op op)
 
				 	unsigned q_id;
			
 
				 	struct jobq_s *q;
			
 
				 
			
 
				-	mem_node_descr * const descr = get_memory_node_description();
			
 
				+	starpu_mem_node_descr * const descr = starpu_get_memory_node_description();
			
 
				 
			
 
				 	pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
			
 
				 
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -25,8 +25,8 @@ uint32_t starpu_select_node_to_handle_request(uint32_t src_node, uint32_t dst_no
 
				 	/* in case one of the node is a GPU, it needs to perform the transfer,
			
 
				 	 * if both of them are GPU, it's a bit more complicated (TODO !) */
			
 
				 
			
 
				-	unsigned src_is_a_gpu = (get_node_kind(src_node) == CUDA_RAM);
			
 
				-	unsigned dst_is_a_gpu = (get_node_kind(dst_node) == CUDA_RAM);
			
 
				+	unsigned src_is_a_gpu = (starpu_get_node_kind(src_node) == STARPU_CUDA_RAM);
			
 
				+	unsigned dst_is_a_gpu = (starpu_get_node_kind(dst_node) == STARPU_CUDA_RAM);
			
 
				 
			
 
				 	/* we do not handle GPU->GPU transfers yet ! */
			
 
				 	STARPU_ASSERT( !(src_is_a_gpu && dst_is_a_gpu) );
			
@@ -40,7 +40,7 @@ uint32_t starpu_select_node_to_handle_request(uint32_t src_node, uint32_t dst_no
 
				 	/* otherwise perform it locally, since we should be on a "sane" arch
			
 
				 	 * where anyone can do the transfer. NB: in StarPU this should actually never
			
 
				 	 * happen */
			
 
				-	return get_local_memory_node();
			
 
				+	return starpu_get_local_memory_node();
			
 
				 }
			
 
				 
			
 
				 uint32_t starpu_select_src_node(starpu_data_handle handle)
			
@@ -48,7 +48,7 @@ uint32_t starpu_select_src_node(starpu_data_handle handle)
 
				 	unsigned src_node = 0;
			
 
				 	unsigned i;
			
 
				 
			
 
				-	unsigned nnodes = get_memory_nodes_count();
			
 
				+	unsigned nnodes = starpu_get_memory_nodes_count();
			
 
				 
			
 
				 	/* first find a valid copy, either a STARPU_OWNER or a STARPU_SHARED */
			
 
				 	uint32_t node;
			
@@ -75,7 +75,7 @@ uint32_t starpu_select_src_node(starpu_data_handle handle)
 
				 
			
 
				 			/* however GPU are expensive sources, really !
			
 
				 			 * 	other should be ok */
			
 
				-			if (get_node_kind(i) != CUDA_RAM)
			
 
				+			if (starpu_get_node_kind(i) != STARPU_CUDA_RAM)
			
 
				 				break;
			
 
				 
			
 
				 			/* XXX do a better algorithm to distribute the memory copies */
			
@@ -89,7 +89,7 @@ uint32_t starpu_select_src_node(starpu_data_handle handle)
 
				 /* this may be called once the data is fetched with header and STARPU_RW-lock hold */
			
 
				 void starpu_update_data_state(starpu_data_handle handle, uint32_t requesting_node, uint8_t write)
			
 
				 {
			
 
				-	unsigned nnodes = get_memory_nodes_count();
			
 
				+	unsigned nnodes = starpu_get_memory_nodes_count();
			
 
				 
			
 
				 	/* the data is present now */
			
 
				 	handle->per_node[requesting_node].requested = 0;
			
@@ -141,7 +141,7 @@ void starpu_update_data_state(starpu_data_handle handle, uint32_t requesting_nod
 
				 int starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_node,
			
 
				 			uint8_t read, uint8_t write, unsigned is_prefetch)
			
 
				 {
			
 
				-	uint32_t local_node = get_local_memory_node();
			
 
				+	uint32_t local_node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 		_starpu_datawizard_progress(local_node, 1);
			
@@ -181,8 +181,8 @@ int starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_nod
 
				 			STARPU_ASSERT(src_node != requesting_node);
			
 
				 		}
			
 
				 	
			
 
				-		unsigned src_is_a_gpu = (get_node_kind(src_node) == CUDA_RAM);
			
 
				-		unsigned dst_is_a_gpu = (get_node_kind(requesting_node) == CUDA_RAM);
			
 
				+		unsigned src_is_a_gpu = (starpu_get_node_kind(src_node) == STARPU_CUDA_RAM);
			
 
				+		unsigned dst_is_a_gpu = (starpu_get_node_kind(requesting_node) == STARPU_CUDA_RAM);
			
 
				 
			
 
				 		/* we have to perform 2 successive requests for GPU->GPU transfers */
			
 
				 		if (read && (src_is_a_gpu && dst_is_a_gpu)) {
			
@@ -190,7 +190,7 @@ int starpu_fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_nod
 
				 			starpu_data_request_t r_src_to_ram;
			
 
				 			starpu_data_request_t r_ram_to_dst;
			
 
				 
			
 
				-			/* XXX we hardcore 0 as the RAM node ... */
			
 
				+			/* XXX we hardcore 0 as the STARPU_RAM node ... */
			
 
				 			r_ram_to_dst = starpu_create_data_request(handle, 0, requesting_node, requesting_node, read, write, is_prefetch);
			
 
				 
			
 
				 			if (!is_prefetch)
			
@@ -276,7 +276,7 @@ static int prefetch_data_on_node(starpu_data_handle handle, uint8_t read, uint8_
 
				 
			
 
				 static int fetch_data(starpu_data_handle handle, starpu_access_mode mode)
			
 
				 {
			
 
				-	uint32_t requesting_node = get_local_memory_node(); 
			
 
				+	uint32_t requesting_node = starpu_get_local_memory_node(); 
			
 
				 
			
 
				 	uint8_t read, write;
			
 
				 	read = (mode != STARPU_W); /* then R or STARPU_RW */
			
@@ -306,7 +306,7 @@ void starpu_release_data_on_node(starpu_data_handle handle, uint32_t default_wb_
 
				 		write_through_data(handle, memory_node, wb_mask);
			
 
				 	}
			
 
				 
			
 
				-	uint32_t local_node = get_local_memory_node();
			
 
				+	uint32_t local_node = starpu_get_local_memory_node();
			
 
				 	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 		_starpu_datawizard_progress(local_node, 1);
			
 
				 
			
@@ -350,7 +350,7 @@ int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 
				 
			
 
				 //	fprintf(stderr, "_starpu_fetch_task_input\n");
			
 
				 
			
 
				-	uint32_t local_memory_node = get_local_memory_node();
			
 
				+	uint32_t local_memory_node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	starpu_buffer_descr *descrs = task->buffers;
			
 
				 	unsigned nbuffers = task->cl->nbuffers;
			
@@ -396,7 +396,7 @@ void starpu_push_task_output(struct starpu_task *task, uint32_t mask)
 
				         starpu_buffer_descr *descrs = task->buffers;
			
 
				         unsigned nbuffers = task->cl->nbuffers;
			
 
				 
			
 
				-	uint32_t local_node = get_local_memory_node();
			
 
				+	uint32_t local_node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	unsigned index;
			
 
				 	for (index = 0; index < nbuffers; index++)
			
--- a/src/datawizard/copy-driver.c
+++ b/src/datawizard/copy-driver.c
@@ -27,7 +27,7 @@ void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid)
 
				 	/* wake up all queues on that node */
			
 
				 	unsigned q_id;
			
 
				 
			
 
				-	mem_node_descr * const descr = get_memory_node_description();
			
 
				+	starpu_mem_node_descr * const descr = starpu_get_memory_node_description();
			
 
				 
			
 
				 	pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
			
 
				 
			
@@ -59,7 +59,7 @@ void starpu_wake_all_blocked_workers(void)
 
				 
			
 
				 	/* workers may be blocked on the various queues' conditions */
			
 
				 	unsigned node;
			
 
				-	unsigned nnodes = get_memory_nodes_count();
			
 
				+	unsigned nnodes = starpu_get_memory_nodes_count();
			
 
				 	for (node = 0; node < nnodes; node++)
			
 
				 	{
			
 
				 		_starpu_wake_all_blocked_workers_on_node(node);
			
@@ -81,8 +81,8 @@ static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node
 
				 
			
 
				 	const struct starpu_copy_data_methods_s *copy_methods = handle->ops->copy_methods;
			
 
				 
			
 
				-	node_kind src_kind = get_node_kind(src_node);
			
 
				-	node_kind dst_kind = get_node_kind(dst_node);
			
 
				+	starpu_node_kind src_kind = starpu_get_node_kind(src_node);
			
 
				+	starpu_node_kind dst_kind = starpu_get_node_kind(dst_node);
			
 
				 
			
 
				 	STARPU_ASSERT(handle->per_node[src_node].refcnt);
			
 
				 	STARPU_ASSERT(handle->per_node[dst_node].refcnt);
			
@@ -96,18 +96,18 @@ cudaStream_t *stream;
 
				 #endif
			
 
				 
			
 
				 	switch (dst_kind) {
			
 
				-	case RAM:
			
 
				+	case STARPU_RAM:
			
 
				 		switch (src_kind) {
			
 
				-			case RAM:
			
 
				-				/* RAM -> RAM */
			
 
				+			case STARPU_RAM:
			
 
				+				/* STARPU_RAM -> STARPU_RAM */
			
 
				 				STARPU_ASSERT(copy_methods->ram_to_ram);
			
 
				 				copy_methods->ram_to_ram(handle, src_node, dst_node);
			
 
				 				break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-			case CUDA_RAM:
			
 
				-				/* CUBLAS_RAM -> RAM */
			
 
				+			case STARPU_CUDA_RAM:
			
 
				+				/* CUBLAS_RAM -> STARPU_RAM */
			
 
				 				/* only the proper CUBLAS thread can initiate this ! */
			
 
				-				if (get_local_memory_node() == src_node)
			
 
				+				if (starpu_get_local_memory_node() == src_node)
			
 
				 				{
			
 
				 					/* only the proper CUBLAS thread can initiate this directly ! */
			
 
				 					STARPU_ASSERT(copy_methods->cuda_to_ram);
			
@@ -134,23 +134,23 @@ cudaStream_t *stream;
 
				 				}
			
 
				 				break;
			
 
				 #endif
			
 
				-			case SPU_LS:
			
 
				+			case STARPU_SPU_LS:
			
 
				 				STARPU_ABORT(); // TODO
			
 
				 				break;
			
 
				-			case UNUSED:
			
 
				-				printf("error node %u UNUSED\n", src_node);
			
 
				+			case STARPU_UNUSED:
			
 
				+				printf("error node %u STARPU_UNUSED\n", src_node);
			
 
				 			default:
			
 
				 				assert(0);
			
 
				 				break;
			
 
				 		}
			
 
				 		break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-	case CUDA_RAM:
			
 
				+	case STARPU_CUDA_RAM:
			
 
				 		switch (src_kind) {
			
 
				-			case RAM:
			
 
				-				/* RAM -> CUBLAS_RAM */
			
 
				+			case STARPU_RAM:
			
 
				+				/* STARPU_RAM -> CUBLAS_RAM */
			
 
				 				/* only the proper CUBLAS thread can initiate this ! */
			
 
				-				STARPU_ASSERT(get_local_memory_node() == dst_node);
			
 
				+				STARPU_ASSERT(starpu_get_local_memory_node() == dst_node);
			
 
				 				STARPU_ASSERT(copy_methods->ram_to_cuda);
			
 
				 				if (!req || !copy_methods->ram_to_cuda_async)
			
 
				 				{
			
@@ -168,21 +168,21 @@ cudaStream_t *stream;
 
				 					STARPU_ASSERT(cures == cudaSuccess);
			
 
				 				}
			
 
				 				break;
			
 
				-			case CUDA_RAM:
			
 
				-			case SPU_LS:
			
 
				+			case STARPU_CUDA_RAM:
			
 
				+			case STARPU_SPU_LS:
			
 
				 				STARPU_ABORT(); // TODO 
			
 
				 				break;
			
 
				-			case UNUSED:
			
 
				+			case STARPU_UNUSED:
			
 
				 			default:
			
 
				 				STARPU_ABORT();
			
 
				 				break;
			
 
				 		}
			
 
				 		break;
			
 
				 #endif
			
 
				-	case SPU_LS:
			
 
				+	case STARPU_SPU_LS:
			
 
				 		STARPU_ABORT(); // TODO
			
 
				 		break;
			
 
				-	case UNUSED:
			
 
				+	case STARPU_UNUSED:
			
 
				 	default:
			
 
				 		assert(0);
			
 
				 		break;
			
@@ -253,7 +253,7 @@ nomem:
 
				 void starpu_driver_wait_request_completion(starpu_async_channel *async_channel __attribute__ ((unused)),
			
 
				 					unsigned handling_node)
			
 
				 {
			
 
				-	node_kind kind = get_node_kind(handling_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(handling_node);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	cudaEvent_t event;
			
 
				 	cudaError_t cures;
			
@@ -261,7 +261,7 @@ void starpu_driver_wait_request_completion(starpu_async_channel *async_channel _
 
				 
			
 
				 	switch (kind) {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			event = (*async_channel).cuda_event;
			
 
				 
			
 
				 			cures = cudaEventSynchronize(event);
			
@@ -274,7 +274,7 @@ void starpu_driver_wait_request_completion(starpu_async_channel *async_channel _
 
				 
			
 
				 			break;
			
 
				 #endif
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 	}
			
@@ -283,7 +283,7 @@ void starpu_driver_wait_request_completion(starpu_async_channel *async_channel _
 
				 unsigned starpu_driver_test_request_completion(starpu_async_channel *async_channel __attribute__ ((unused)),
			
 
				 					unsigned handling_node)
			
 
				 {
			
 
				-	node_kind kind = get_node_kind(handling_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(handling_node);
			
 
				 	unsigned success;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	cudaEvent_t event;
			
@@ -291,7 +291,7 @@ unsigned starpu_driver_test_request_completion(starpu_async_channel *async_chann
 
				 
			
 
				 	switch (kind) {
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			event = (*async_channel).cuda_event;
			
 
				 
			
 
				 			success = (cudaEventQuery(event) == cudaSuccess);
			
@@ -300,7 +300,7 @@ unsigned starpu_driver_test_request_completion(starpu_async_channel *async_chann
 
				 
			
 
				 			break;
			
 
				 #endif
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 			success = 0;
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -141,7 +141,7 @@ int starpu_wait_data_request_completion(starpu_data_request_t r, unsigned may_al
 
				 	int retval;
			
 
				 	int do_delete = 0;
			
 
				 
			
 
				-	uint32_t local_node = get_local_memory_node();
			
 
				+	uint32_t local_node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	do {
			
 
				 		starpu_spin_lock(&r->lock);
			
--- a/src/datawizard/hierarchy.c
+++ b/src/datawizard/hierarchy.c
@@ -240,7 +240,7 @@ void starpu_unpartition_data(starpu_data_handle root_handle, uint32_t gathering_
 
				 
			
 
				 		int ret;
			
 
				 		ret = starpu_fetch_data_on_node(&root_handle->children[child], gathering_node, 1, 0, 0);
			
 
				-		/* for now we pretend that the RAM is almost unlimited and that gathering 
			
 
				+		/* for now we pretend that the STARPU_RAM is almost unlimited and that gathering 
			
 
				 		 * data should be possible from the node that does the unpartionning ... we
			
 
				 		 * don't want to have the programming deal with memory shortage at that time,
			
 
				 		 * really */
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -176,7 +176,7 @@ size_t starpu_get_bcsr_elemsize(starpu_data_handle handle)
 
				 uintptr_t starpu_get_bcsr_local_nzval(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -241,10 +241,10 @@ static size_t allocate_bcsr_buffer_on_node(starpu_data_handle handle, uint32_t d
 
				 	uint32_t r = interface->r;
			
 
				 	uint32_t c = interface->c;
			
 
				 
			
 
				-	node_kind kind = get_node_kind(dst_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(dst_node);
			
 
				 
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			addr_nzval = (uintptr_t)malloc(nnz*r*c*elemsize);
			
 
				 			if (!addr_nzval)
			
 
				 				goto fail_nzval;
			
@@ -259,7 +259,7 @@ static size_t allocate_bcsr_buffer_on_node(starpu_data_handle handle, uint32_t d
 
				 
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaMalloc((void **)&addr_nzval, nnz*r*c*elemsize);
			
 
				 			if (!addr_nzval)
			
 
				 				goto fail_nzval;
			
@@ -291,10 +291,10 @@ static size_t allocate_bcsr_buffer_on_node(starpu_data_handle handle, uint32_t d
 
				 
			
 
				 fail_rowptr:
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void *)addr_colind);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)addr_colind);
			
 
				 			break;
			
 
				 #endif
			
@@ -304,10 +304,10 @@ fail_rowptr:
 
				 
			
 
				 fail_colind:
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void *)addr_nzval);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)addr_nzval);
			
 
				 			break;
			
 
				 #endif
			
@@ -327,15 +327,15 @@ static void liberate_bcsr_buffer_on_node(void *interface, uint32_t node)
 
				 {
			
 
				 	starpu_bcsr_interface_t *bcsr_interface = interface;	
			
 
				 
			
 
				-	node_kind kind = get_node_kind(node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(node);
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void*)bcsr_interface->nzval);
			
 
				 			free((void*)bcsr_interface->colind);
			
 
				 			free((void*)bcsr_interface->rowptr);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)bcsr_interface->nzval);
			
 
				 			cudaFree((void*)bcsr_interface->colind);
			
 
				 			cudaFree((void*)bcsr_interface->rowptr);
			
--- a/src/datawizard/interfaces/blas_interface.c
+++ b/src/datawizard/interfaces/blas_interface.c
@@ -179,7 +179,7 @@ uint32_t starpu_get_blas_ny(starpu_data_handle handle)
 
				 uint32_t starpu_get_blas_local_ld(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -192,7 +192,7 @@ uint32_t starpu_get_blas_local_ld(starpu_data_handle handle)
 
				 uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -232,17 +232,17 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 
				 	uint32_t ld = nx; // by default
			
 
				 	size_t elemsize = interface->elemsize;
			
 
				 
			
 
				-	node_kind kind = get_node_kind(dst_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(dst_node);
			
 
				 
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			addr = (uintptr_t)malloc((size_t)nx*ny*elemsize);
			
 
				 			if (!addr) 
			
 
				 				fail = 1;
			
 
				 
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			status = cudaMallocPitch((void **)&addr, &pitch, (size_t)nx*elemsize, (size_t)ny);
			
 
				 			if (!addr || status != cudaSuccess)
			
 
				 			{
			
@@ -283,13 +283,13 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 
				 	cudaError_t status;
			
 
				 #endif
			
 
				 
			
 
				-	node_kind kind = get_node_kind(node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(node);
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void*)blas_interface->ptr);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			status = cudaFree((void*)blas_interface->ptr);			
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -188,7 +188,7 @@ uint32_t starpu_get_block_nz(starpu_data_handle handle)
 
				 uint32_t starpu_get_block_local_ldy(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 	
			
@@ -201,7 +201,7 @@ uint32_t starpu_get_block_local_ldy(starpu_data_handle handle)
 
				 uint32_t starpu_get_block_local_ldz(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -214,7 +214,7 @@ uint32_t starpu_get_block_local_ldz(starpu_data_handle handle)
 
				 uintptr_t starpu_get_block_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -253,17 +253,17 @@ static size_t allocate_block_buffer_on_node(starpu_data_handle handle, uint32_t
 
				 	uint32_t nz = dst_block->nz;
			
 
				 	size_t elemsize = dst_block->elemsize;
			
 
				 
			
 
				-	node_kind kind = get_node_kind(dst_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(dst_node);
			
 
				 
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			addr = (uintptr_t)malloc(nx*ny*nz*elemsize);
			
 
				 			if (!addr) 
			
 
				 				fail = 1;
			
 
				 
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			status = cudaMalloc((void **)&addr, nx*ny*nz*elemsize);
			
 
				 
			
 
				 			//fprintf(stderr, "cudaMalloc -> addr %p\n", addr);
			
@@ -306,13 +306,13 @@ static void liberate_block_buffer_on_node(void *interface, uint32_t node)
 
				 	cudaError_t status;
			
 
				 #endif
			
 
				 
			
 
				-	node_kind kind = get_node_kind(node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(node);
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void*)block_interface->ptr);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			status = cudaFree((void*)block_interface->ptr);
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -147,7 +147,7 @@ size_t starpu_get_csr_elemsize(starpu_data_handle handle)
 
				 uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -160,7 +160,7 @@ uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle)
 
				 uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -173,7 +173,7 @@ uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle)
 
				 uint32_t *starpu_get_csr_local_rowptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -213,10 +213,10 @@ static size_t allocate_csr_buffer_on_node(starpu_data_handle handle, uint32_t ds
 
				 	uint32_t nrow = interface->nrow;
			
 
				 	size_t elemsize = interface->elemsize;
			
 
				 
			
 
				-	node_kind kind = get_node_kind(dst_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(dst_node);
			
 
				 
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			addr_nzval = (uintptr_t)malloc(nnz*elemsize);
			
 
				 			if (!addr_nzval)
			
 
				 				goto fail_nzval;
			
@@ -231,7 +231,7 @@ static size_t allocate_csr_buffer_on_node(starpu_data_handle handle, uint32_t ds
 
				 
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaMalloc((void **)&addr_nzval, nnz*elemsize);
			
 
				 			if (!addr_nzval)
			
 
				 				goto fail_nzval;
			
@@ -263,10 +263,10 @@ static size_t allocate_csr_buffer_on_node(starpu_data_handle handle, uint32_t ds
 
				 
			
 
				 fail_rowptr:
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void *)addr_colind);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)addr_colind);
			
 
				 			break;
			
 
				 #endif
			
@@ -276,10 +276,10 @@ fail_rowptr:
 
				 
			
 
				 fail_colind:
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void *)addr_nzval);
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)addr_nzval);
			
 
				 			break;
			
 
				 #endif
			
@@ -299,15 +299,15 @@ static void liberate_csr_buffer_on_node(void *interface, uint32_t node)
 
				 {
			
 
				 	starpu_csr_interface_t *csr_interface = interface;	
			
 
				 
			
 
				-	node_kind kind = get_node_kind(node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(node);
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void*)csr_interface->nzval);
			
 
				 			free((void*)csr_interface->colind);
			
 
				 			free((void*)csr_interface->rowptr);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)csr_interface->nzval);
			
 
				 			cudaFree((void*)csr_interface->colind);
			
 
				 			cudaFree((void*)csr_interface->rowptr);
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -159,7 +159,7 @@ uint32_t starpu_get_vector_nx(starpu_data_handle handle)
 
				 uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	node = get_local_memory_node();
			
 
				+	node = starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
			
 
				 
			
@@ -192,20 +192,20 @@ static size_t allocate_vector_buffer_on_node(starpu_data_handle handle, uint32_t
 
				 	uint32_t nx = interface->nx;
			
 
				 	size_t elemsize = interface->elemsize;
			
 
				 
			
 
				-	node_kind kind = get_node_kind(dst_node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(dst_node);
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	cudaError_t status;
			
 
				 #endif
			
 
				 
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			addr = (uintptr_t)malloc(nx*elemsize);
			
 
				 			if (!addr)
			
 
				 				fail = 1;
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			status = cudaMalloc((void **)&addr, nx*elemsize);
			
 
				 			if (!addr || (status != cudaSuccess))
			
 
				 			{
			
@@ -236,13 +236,13 @@ static void liberate_vector_buffer_on_node(void *interface, uint32_t node)
 
				 {
			
 
				 	starpu_vector_interface_t *vector_interface = interface;
			
 
				 
			
 
				-	node_kind kind = get_node_kind(node);
			
 
				+	starpu_node_kind kind = starpu_get_node_kind(node);
			
 
				 	switch(kind) {
			
 
				-		case RAM:
			
 
				+		case STARPU_RAM:
			
 
				 			free((void*)vector_interface->ptr);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case CUDA_RAM:
			
 
				+		case STARPU_CUDA_RAM:
			
 
				 			cudaFree((void*)vector_interface->ptr);
			
 
				 			break;
			
 
				 #endif
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -50,7 +50,7 @@ static void lock_all_subtree(starpu_data_handle handle)
 
				 	{
			
 
				 		/* this is a leaf */
			
 
				 		while (starpu_spin_trylock(&handle->header_lock))
			
 
				-			_starpu_datawizard_progress(get_local_memory_node(), 0);
			
 
				+			_starpu_datawizard_progress(starpu_get_local_memory_node(), 0);
			
 
				 	}
			
 
				 	else {
			
 
				 		/* lock all sub-subtrees children */
			
@@ -494,7 +494,7 @@ static size_t liberate_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
 
				 	starpu_data_handle handle = mc->data;
			
 
				 
			
 
				 //	while (starpu_spin_trylock(&handle->header_lock))
			
 
				-//		_starpu_datawizard_progress(get_local_memory_node());
			
 
				+//		_starpu_datawizard_progress(starpu_get_local_memory_node());
			
 
				 
			
 
				 #warning can we block here ?
			
 
				 //	starpu_spin_lock(&handle->header_lock);
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -22,10 +22,10 @@
 
				 #include "copy-driver.h"
			
 
				 #include "memalloc.h"
			
 
				 
			
 
				-static mem_node_descr descr;
			
 
				+static starpu_mem_node_descr descr;
			
 
				 static pthread_key_t memory_node_key;
			
 
				 
			
 
				-void init_memory_nodes(void)
			
 
				+void starpu_init_memory_nodes(void)
			
 
				 {
			
 
				 	/* there is no node yet, subsequent nodes will be 
			
 
				 	 * added using _starpu_register_memory_node */
			
@@ -35,7 +35,7 @@ void init_memory_nodes(void)
 
				 
			
 
				 	unsigned i;
			
 
				 	for (i = 0; i < STARPU_MAXNODES; i++) 
			
 
				-		descr.nodes[i] = UNUSED; 
			
 
				+		descr.nodes[i] = STARPU_UNUSED; 
			
 
				 
			
 
				 	_starpu_init_mem_chunk_lists();
			
 
				 	starpu_init_data_request_lists();
			
@@ -44,7 +44,7 @@ void init_memory_nodes(void)
 
				 	descr.total_queues_count = 0;
			
 
				 }
			
 
				 
			
 
				-void deinit_memory_nodes(void)
			
 
				+void starpu_deinit_memory_nodes(void)
			
 
				 {
			
 
				 	starpu_deinit_data_request_lists();
			
 
				 	_starpu_deinit_mem_chunk_lists();
			
@@ -52,17 +52,17 @@ void deinit_memory_nodes(void)
 
				 	pthread_key_delete(memory_node_key);
			
 
				 }
			
 
				 
			
 
				-void set_local_memory_node_key(unsigned *node)
			
 
				+void starpu_set_local_memory_node_key(unsigned *node)
			
 
				 {
			
 
				 	pthread_setspecific(memory_node_key, node);
			
 
				 }
			
 
				 
			
 
				-unsigned get_local_memory_node(void)
			
 
				+unsigned starpu_get_local_memory_node(void)
			
 
				 {
			
 
				 	unsigned *memory_node;
			
 
				 	memory_node = pthread_getspecific(memory_node_key);
			
 
				 	
			
 
				-	/* in case this is called by the programmer, we assume the RAM node 
			
 
				+	/* in case this is called by the programmer, we assume the STARPU_RAM node 
			
 
				 	   is the appropriate memory node ... so we return 0 XXX */
			
 
				 	if (STARPU_UNLIKELY(!memory_node))
			
 
				 		return 0;
			
@@ -70,22 +70,22 @@ unsigned get_local_memory_node(void)
 
				 	return *memory_node;
			
 
				 }
			
 
				 
			
 
				-inline mem_node_descr *get_memory_node_description(void)
			
 
				+inline starpu_mem_node_descr *starpu_get_memory_node_description(void)
			
 
				 {
			
 
				 	return &descr;
			
 
				 }
			
 
				 
			
 
				-inline node_kind get_node_kind(uint32_t node)
			
 
				+inline starpu_node_kind starpu_get_node_kind(uint32_t node)
			
 
				 {
			
 
				 	return descr.nodes[node];
			
 
				 }
			
 
				 
			
 
				-unsigned get_memory_nodes_count(void)
			
 
				+unsigned starpu_get_memory_nodes_count(void)
			
 
				 {
			
 
				 	return descr.nnodes;
			
 
				 }
			
 
				 
			
 
				-unsigned _starpu_register_memory_node(node_kind kind)
			
 
				+unsigned _starpu_register_memory_node(starpu_node_kind kind)
			
 
				 {
			
 
				 	unsigned nnodes;
			
 
				 	/* ATOMIC_ADD returns the new value ... */
			
@@ -102,7 +102,7 @@ unsigned _starpu_register_memory_node(node_kind kind)
 
				 
			
 
				 /* TODO move in a more appropriate file  !! */
			
 
				 /* attach a queue to a memory node (if it's not already attached) */
			
 
				-void memory_node_attach_queue(struct jobq_s *q, unsigned nodeid)
			
 
				+void starpu_memory_node_attach_queue(struct jobq_s *q, unsigned nodeid)
			
 
				 {
			
 
				 	unsigned queue;
			
 
				 	unsigned nqueues_total, nqueues;
			
--- a/src/datawizard/memory_nodes.h
+++ b/src/datawizard/memory_nodes.h
@@ -25,37 +25,37 @@
 
				 #endif
			
 
				 
			
 
				 typedef enum {
			
 
				-	UNUSED,
			
 
				-	SPU_LS,
			
 
				-	RAM,
			
 
				-	CUDA_RAM
			
 
				-} node_kind;
			
 
				+	STARPU_UNUSED,
			
 
				+	STARPU_SPU_LS,
			
 
				+	STARPU_RAM,
			
 
				+	STARPU_CUDA_RAM
			
 
				+} starpu_node_kind;
			
 
				 
			
 
				 typedef struct {
			
 
				 	unsigned nnodes;
			
 
				-	node_kind nodes[STARPU_MAXNODES];
			
 
				+	starpu_node_kind nodes[STARPU_MAXNODES];
			
 
				 
			
 
				 	/* the list of queues that are attached to a given node */
			
 
				 	// XXX 32 is set randomly !
			
 
				-	// TODO move this 2 lists outside mem_node_descr
			
 
				+	// TODO move this 2 lists outside starpu_mem_node_descr
			
 
				 	pthread_rwlock_t attached_queues_rwlock;
			
 
				 	struct jobq_s *attached_queues_per_node[STARPU_MAXNODES][32];
			
 
				 	struct jobq_s *attached_queues_all[STARPU_MAXNODES*32];
			
 
				 	/* the number of queues attached to each node */
			
 
				 	unsigned total_queues_count;
			
 
				 	unsigned queues_count[STARPU_MAXNODES];
			
 
				-} mem_node_descr;
			
 
				+} starpu_mem_node_descr;
			
 
				 
			
 
				-void init_memory_nodes(void);
			
 
				-void deinit_memory_nodes(void);
			
 
				-void set_local_memory_node_key(unsigned *node);
			
 
				-unsigned get_local_memory_node(void);
			
 
				-unsigned _starpu_register_memory_node(node_kind kind);
			
 
				-void memory_node_attach_queue(struct jobq_s *q, unsigned nodeid);
			
 
				+void starpu_init_memory_nodes(void);
			
 
				+void starpu_deinit_memory_nodes(void);
			
 
				+void starpu_set_local_memory_node_key(unsigned *node);
			
 
				+unsigned starpu_get_local_memory_node(void);
			
 
				+unsigned _starpu_register_memory_node(starpu_node_kind kind);
			
 
				+void starpu_memory_node_attach_queue(struct jobq_s *q, unsigned nodeid);
			
 
				 
			
 
				-node_kind get_node_kind(uint32_t node);
			
 
				-unsigned get_memory_nodes_count(void);
			
 
				+starpu_node_kind starpu_get_node_kind(uint32_t node);
			
 
				+unsigned starpu_get_memory_nodes_count(void);
			
 
				 
			
 
				-inline mem_node_descr *get_memory_node_description(void);
			
 
				+inline starpu_mem_node_descr *starpu_get_memory_node_description(void);
			
 
				 
			
 
				 #endif // __MEMORY_NODES_H__
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -49,7 +49,7 @@ struct state_and_node {
 
				 	void *callback_arg;
			
 
				 };
			
 
				 
			
 
				-/* put the current value of the data into RAM */
			
 
				+/* put the current value of the data into STARPU_RAM */
			
 
				 static inline void _starpu_sync_data_with_mem_continuation(void *arg)
			
 
				 {
			
 
				 	int ret;
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -109,7 +109,7 @@ void *_starpu_cpu_worker(void *arg)
 
				         fprintf(stderr, "cpu worker %d is ready on logical cpu %d\n", cpu_arg->id, cpu_arg->bindid);
			
 
				 #endif
			
 
				 
			
 
				-	set_local_memory_node_key(&cpu_arg->memory_node);
			
 
				+	starpu_set_local_memory_node_key(&cpu_arg->memory_node);
			
 
				 
			
 
				 	set_local_queue(cpu_arg->jobq);
			
 
				 
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -177,7 +177,7 @@ void *_starpu_cuda_worker(void *arg)
 
				 
			
 
				 	_starpu_bind_thread_on_cpu(args->config, args->bindid);
			
 
				 
			
 
				-	set_local_memory_node_key(&(args->memory_node));
			
 
				+	starpu_set_local_memory_node_key(&(args->memory_node));
			
 
				 
			
 
				 	set_local_queue(args->jobq);
			
 
				 
			
--- a/tests/datawizard/sync_and_notify_data.c
+++ b/tests/datawizard/sync_and_notify_data.c
@@ -115,7 +115,7 @@ int main(int argc, char **argv)
 
				 				goto enodev;
			
 
				 		}
			
 
				 
			
 
				-		/* synchronize v in RAM */
			
 
				+		/* synchronize v in STARPU_RAM */
			
 
				 		starpu_sync_data_with_mem(v_handle, STARPU_RW);
			
 
				 
			
 
				 		/* increment b */
			
--- a/tests/errorcheck/invalid_blocking_calls.c
+++ b/tests/errorcheck/invalid_blocking_calls.c
@@ -25,7 +25,7 @@ static void wrong_func(void *descr[], void *arg)
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	/* try to fetch data in the RAM while we are in a codelet, such a
			
 
				+	/* try to fetch data in the STARPU_RAM while we are in a codelet, such a
			
 
				 	 * blocking call is forbidden */
			
 
				 	ret = starpu_sync_data_with_mem(handle, STARPU_RW);
			
 
				 	if (ret != -EDEADLK)
			
--- a/tests/experiments/latency/cuda-latency.c
+++ b/tests/experiments/latency/cuda-latency.c
@@ -52,7 +52,7 @@ void send_data(unsigned src, unsigned dst)
 
				 {
			
 
				 	cudaError_t cures;
			
 
				 
			
 
				-	/* Copy data from GPU to RAM */
			
 
				+	/* Copy data from GPU to STARPU_RAM */
			
 
				 #ifdef DO_TRANSFER_GPU_TO_RAM
			
 
				 #ifdef ASYNC
			
 
				 	cures = cudaMemcpyAsync(cpu_buffer, gpu_buffer[src], buffer_size, cudaMemcpyDeviceToHost, stream[src]);
			
@@ -69,7 +69,7 @@ void send_data(unsigned src, unsigned dst)
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				-	/* Tell the other GPU that data is in RAM */
			
 
				+	/* Tell the other GPU that data is in STARPU_RAM */
			
 
				 	pthread_mutex_lock(&mutex_gpu);
			
 
				 	data_is_available[src] = 0;
			
 
				 	data_is_available[dst] = 1;
			
@@ -82,7 +82,7 @@ void recv_data(unsigned src, unsigned dst)
 
				 {
			
 
				 	cudaError_t cures;
			
 
				 
			
 
				-	/* Wait for the data to be in RAM */
			
 
				+	/* Wait for the data to be in STARPU_RAM */
			
 
				 	pthread_mutex_lock(&mutex_gpu);
			
 
				 	while (!data_is_available[dst])
			
 
				 	{