пре 15 година · 077020f485
--- a/src/core/dependencies/data-concurrency.c
+++ b/src/core/dependencies/data-concurrency.c
@@ -37,42 +37,42 @@ static unsigned unlock_one_requester(data_requester_t r)
 
				 }
			
 
				 
			
 
				 /* the header lock must be taken by the caller */
			
 
				-static unsigned may_unlock_data_req_list_head(data_state *data)
			
 
				+static unsigned may_unlock_data_req_list_head(starpu_data_handle handle)
			
 
				 {
			
 
				 	/* if there is no one to unlock ... */
			
 
				-	if (data_requester_list_empty(data->req_list))
			
 
				+	if (data_requester_list_empty(handle->req_list))
			
 
				 		return 0;
			
 
				 
			
 
				 	/* if there is no reference to the data anymore, we can use it */
			
 
				-	if (data->refcnt == 0)
			
 
				+	if (handle->refcnt == 0)
			
 
				 	{
			
 
				-		STARPU_ASSERT(!data->per_node[0].request);
			
 
				-		STARPU_ASSERT(!data->per_node[1].request);
			
 
				+		STARPU_ASSERT(!handle->per_node[0].request);
			
 
				+		STARPU_ASSERT(!handle->per_node[1].request);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	if (data->current_mode == STARPU_W)
			
 
				+	if (handle->current_mode == STARPU_W)
			
 
				 		return 0;
			
 
				 
			
 
				 	/* data->current_mode == STARPU_R, so we can process more readers */
			
 
				-	data_requester_t r = data_requester_list_front(data->req_list);
			
 
				+	data_requester_t r = data_requester_list_front(handle->req_list);
			
 
				 	
			
 
				 	return (r->mode == STARPU_R);
			
 
				 }
			
 
				 
			
 
				 
			
 
				-unsigned attempt_to_submit_data_request_from_apps(data_state *data, starpu_access_mode mode,
			
 
				+unsigned attempt_to_submit_data_request_from_apps(starpu_data_handle handle, starpu_access_mode mode,
			
 
				 						void (*callback)(void *), void *argcb)
			
 
				 {
			
 
				 	unsigned ret;
			
 
				 
			
 
				-	starpu_spin_lock(&data->header_lock);
			
 
				+	starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-	if (data->refcnt == 0)
			
 
				+	if (handle->refcnt == 0)
			
 
				 	{
			
 
				 		/* there is nobody currently about to manipulate the data */
			
 
				-		data->refcnt++;
			
 
				-		data->current_mode = mode;
			
 
				+		handle->refcnt++;
			
 
				+		handle->current_mode = mode;
			
 
				 
			
 
				 		/* success */
			
 
				 		ret = 0;
			
@@ -80,9 +80,9 @@ unsigned attempt_to_submit_data_request_from_apps(data_state *data, starpu_acces
 
				 	else
			
 
				 	{
			
 
				 		/* there is already someone that may access the data */
			
 
				-		if ( (mode == STARPU_R) && (data->current_mode == STARPU_R))
			
 
				+		if ( (mode == STARPU_R) && (handle->current_mode == STARPU_R))
			
 
				 		{
			
 
				-			data->refcnt++;
			
 
				+			handle->refcnt++;
			
 
				 
			
 
				 			/* success : there is a new reader */
			
 
				 			ret = 0;
			
@@ -99,14 +99,14 @@ unsigned attempt_to_submit_data_request_from_apps(data_state *data, starpu_acces
 
				 				r->ready_data_callback = callback;
			
 
				 				r->argcb = argcb;
			
 
				 
			
 
				-			data_requester_list_push_back(data->req_list, r);
			
 
				+			data_requester_list_push_back(handle->req_list, r);
			
 
				 
			
 
				 			/* failed */
			
 
				 			ret = 1;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_spin_unlock(&data->header_lock);
			
 
				+	starpu_spin_unlock(&handle->header_lock);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -114,17 +114,17 @@ static unsigned attempt_to_submit_data_request_from_job(job_t j, unsigned buffer
 
				 {
			
 
				 	unsigned ret;
			
 
				 
			
 
				-	data_state *data = j->task->buffers[buffer_index].handle;
			
 
				+	starpu_data_handle handle = j->task->buffers[buffer_index].handle;
			
 
				 	starpu_access_mode mode = j->task->buffers[buffer_index].mode;
			
 
				 
			
 
				-	while (starpu_spin_trylock(&data->header_lock))
			
 
				+	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 		datawizard_progress(get_local_memory_node(), 0);
			
 
				 
			
 
				-	if (data->refcnt == 0)
			
 
				+	if (handle->refcnt == 0)
			
 
				 	{
			
 
				 		/* there is nobody currently about to manipulate the data */
			
 
				-		data->refcnt++;
			
 
				-		data->current_mode = (mode==STARPU_R)?STARPU_R:STARPU_W;
			
 
				+		handle->refcnt++;
			
 
				+		handle->current_mode = (mode==STARPU_R)?STARPU_R:STARPU_W;
			
 
				 
			
 
				 		/* success */
			
 
				 		ret = 0;
			
@@ -132,9 +132,9 @@ static unsigned attempt_to_submit_data_request_from_job(job_t j, unsigned buffer
 
				 	else
			
 
				 	{
			
 
				 		/* there is already someone that may access the data */
			
 
				-		if ( (mode == STARPU_R) && (data->current_mode == STARPU_R))
			
 
				+		if ( (mode == STARPU_R) && (handle->current_mode == STARPU_R))
			
 
				 		{
			
 
				-			data->refcnt++;
			
 
				+			handle->refcnt++;
			
 
				 
			
 
				 			/* success : there is a new reader */
			
 
				 			ret = 0;
			
@@ -151,14 +151,14 @@ static unsigned attempt_to_submit_data_request_from_job(job_t j, unsigned buffer
 
				 				r->j = j;
			
 
				 				r->buffer_index = buffer_index;
			
 
				 
			
 
				-			data_requester_list_push_back(data->req_list, r);
			
 
				+			data_requester_list_push_back(handle->req_list, r);
			
 
				 
			
 
				 			/* failed */
			
 
				 			ret = 1;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_spin_unlock(&data->header_lock);
			
 
				+	starpu_spin_unlock(&handle->header_lock);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -192,21 +192,21 @@ unsigned submit_job_enforce_data_deps(job_t j)
 
				 
			
 
				 
			
 
				 /* The header lock must already be taken by the caller */
			
 
				-void notify_data_dependencies(data_state *data)
			
 
				+void notify_data_dependencies(starpu_data_handle handle)
			
 
				 {
			
 
				-	data->refcnt--;
			
 
				+	handle->refcnt--;
			
 
				 
			
 
				-	while (may_unlock_data_req_list_head(data))
			
 
				+	while (may_unlock_data_req_list_head(handle))
			
 
				 	{
			
 
				 		/* unlock the head of the requester list */
			
 
				-		data_requester_t r = data_requester_list_pop_front(data->req_list);
			
 
				+		data_requester_t r = data_requester_list_pop_front(handle->req_list);
			
 
				 
			
 
				-		data->refcnt++;
			
 
				+		handle->refcnt++;
			
 
				 	
			
 
				 		/* the data is now attributed to that request */
			
 
				-		data->current_mode = (r->mode==STARPU_R)?STARPU_R:STARPU_W;
			
 
				+		handle->current_mode = (r->mode==STARPU_R)?STARPU_R:STARPU_W;
			
 
				 
			
 
				-		starpu_spin_unlock(&data->header_lock);
			
 
				+		starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 		if (r->is_requested_by_codelet)
			
 
				 		{
			
@@ -223,6 +223,6 @@ void notify_data_dependencies(data_state *data)
 
				 
			
 
				 		data_requester_delete(r);
			
 
				 		
			
 
				-		starpu_spin_lock(&data->header_lock);
			
 
				+		starpu_spin_lock(&handle->header_lock);
			
 
				 	}
			
 
				 }
			
--- a/src/core/dependencies/data-concurrency.h
+++ b/src/core/dependencies/data-concurrency.h
@@ -21,9 +21,11 @@
 
				 
			
 
				 unsigned submit_job_enforce_data_deps(job_t j);
			
 
				 
			
 
				-void notify_data_dependencies(data_state *data);
			
 
				+void notify_data_dependencies(starpu_data_handle handle);
			
 
				+
			
 
				+unsigned attempt_to_submit_data_request_from_apps(starpu_data_handle handle,
			
 
				+		starpu_access_mode mode,
			
 
				+		void (*callback)(void *), void *argcb);
			
 
				 
			
 
				-unsigned attempt_to_submit_data_request_from_apps(data_state *state, starpu_access_mode mode,
			
 
				-						void (*callback)(void *), void *argcb);
			
 
				 #endif // __DATA_CONCURRENCY_H__
			
 
				 
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -32,8 +32,8 @@ size_t job_get_data_size(job_t j)
 
				 	unsigned buffer;
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		data_state *state = task->buffers[buffer].handle;
			
 
				-		size += state->ops->get_size(state);
			
 
				+		starpu_data_handle handle = task->buffers[buffer].handle;
			
 
				+		size += handle->ops->get_size(handle);
			
 
				 	}
			
 
				 
			
 
				 	return size;
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -123,16 +123,16 @@ double data_expected_penalty(struct jobq_s *q, struct job_s *j)
 
				 
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		data_state *state = j->task->buffers[buffer].handle;
			
 
				+		starpu_data_handle handle = j->task->buffers[buffer].handle;
			
 
				 
			
 
				 		if (j->task->buffers[buffer].mode == STARPU_W)
			
 
				 			break;
			
 
				 
			
 
				-		if (!is_data_present_or_requested(state, memory_node))
			
 
				+		if (!is_data_present_or_requested(handle, memory_node))
			
 
				 		{
			
 
				-			size_t size = state->ops->get_size(state);
			
 
				+			size_t size = handle->ops->get_size(handle);
			
 
				 
			
 
				-			uint32_t src_node = select_src_node(state);
			
 
				+			uint32_t src_node = select_src_node(handle);
			
 
				 
			
 
				 			penalty += predict_transfer_time(src_node, memory_node, size);
			
 
				 		}
			
--- a/src/core/policies/deque-modeling-policy-data-aware.c
+++ b/src/core/policies/deque-modeling-policy-data-aware.c
@@ -46,9 +46,9 @@ static void update_data_requests(struct jobq_s *q, struct job_s *j)
 
				 
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		data_state *state = j->task->buffers[buffer].handle;
			
 
				+		starpu_data_handle handle = j->task->buffers[buffer].handle;
			
 
				 
			
 
				-		set_data_requested_flag_if_needed(state, memory_node);
			
 
				+		set_data_requested_flag_if_needed(handle, memory_node);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -43,7 +43,7 @@ uint32_t select_node_to_handle_request(uint32_t src_node, uint32_t dst_node)
 
				 	return get_local_memory_node();
			
 
				 }
			
 
				 
			
 
				-uint32_t select_src_node(data_state *state)
			
 
				+uint32_t select_src_node(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned src_node = 0;
			
 
				 	unsigned i;
			
@@ -53,7 +53,7 @@ uint32_t select_src_node(data_state *state)
 
				 	uint32_t src_node_mask = 0;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				-		if (state->per_node[node].state != INVALID) {
			
 
				+		if (handle->per_node[node].state != INVALID) {
			
 
				 			/* we found a copy ! */
			
 
				 			src_node_mask |= (1<<node);
			
 
				 		}
			
@@ -84,30 +84,30 @@ uint32_t select_src_node(data_state *state)
 
				 }
			
 
				 
			
 
				 /* this may be called once the data is fetched with header and STARPU_RW-lock hold */
			
 
				-void update_data_state(data_state *state, uint32_t requesting_node, uint8_t write)
			
 
				+void update_data_state(starpu_data_handle handle, uint32_t requesting_node, uint8_t write)
			
 
				 {
			
 
				 	/* the data is present now */
			
 
				-	state->per_node[requesting_node].requested = 0;
			
 
				+	handle->per_node[requesting_node].requested = 0;
			
 
				 
			
 
				 	if (write) {
			
 
				 		/* the requesting node now has the only valid copy */
			
 
				 		uint32_t node;
			
 
				 		for (node = 0; node < MAXNODES; node++)
			
 
				-			state->per_node[node].state = INVALID;
			
 
				+			handle->per_node[node].state = INVALID;
			
 
				 
			
 
				-		state->per_node[requesting_node].state = OWNER;
			
 
				+		handle->per_node[requesting_node].state = OWNER;
			
 
				 	}
			
 
				 	else { /* read only */
			
 
				-		if (state->per_node[requesting_node].state != OWNER)
			
 
				+		if (handle->per_node[requesting_node].state != OWNER)
			
 
				 		{
			
 
				 			/* there was at least another copy of the data */
			
 
				 			uint32_t node;
			
 
				 			for (node = 0; node < MAXNODES; node++)
			
 
				 			{
			
 
				-				if (state->per_node[node].state != INVALID)
			
 
				-					state->per_node[node].state = SHARED;
			
 
				+				if (handle->per_node[node].state != INVALID)
			
 
				+					handle->per_node[node].state = SHARED;
			
 
				 			}
			
 
				-			state->per_node[requesting_node].state = SHARED;
			
 
				+			handle->per_node[requesting_node].state = SHARED;
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -133,35 +133,35 @@ void update_data_state(data_state *state, uint32_t requesting_node, uint8_t writ
 
				  * 		    else (invalid,owner->shared)
			
 
				  */
			
 
				 
			
 
				-int fetch_data_on_node(data_state *state, uint32_t requesting_node,
			
 
				+int fetch_data_on_node(starpu_data_handle handle, uint32_t requesting_node,
			
 
				 			uint8_t read, uint8_t write, unsigned is_prefetch)
			
 
				 {
			
 
				 	uint32_t local_node = get_local_memory_node();
			
 
				 
			
 
				-	while (starpu_spin_trylock(&state->header_lock))
			
 
				+	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 		datawizard_progress(local_node, 1);
			
 
				 
			
 
				 	if (!is_prefetch)
			
 
				-		state->per_node[requesting_node].refcnt++;
			
 
				+		handle->per_node[requesting_node].refcnt++;
			
 
				 
			
 
				-	if (state->per_node[requesting_node].state != INVALID)
			
 
				+	if (handle->per_node[requesting_node].state != INVALID)
			
 
				 	{
			
 
				 		/* the data is already available so we can stop */
			
 
				-		update_data_state(state, requesting_node, write);
			
 
				+		update_data_state(handle, requesting_node, write);
			
 
				 		msi_cache_hit(requesting_node);
			
 
				-		starpu_spin_unlock(&state->header_lock);
			
 
				+		starpu_spin_unlock(&handle->header_lock);
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				 	/* the only remaining situation is that the local copy was invalid */
			
 
				-	STARPU_ASSERT(state->per_node[requesting_node].state == INVALID);
			
 
				+	STARPU_ASSERT(handle->per_node[requesting_node].state == INVALID);
			
 
				 
			
 
				 	msi_cache_miss(requesting_node);
			
 
				 
			
 
				 	data_request_t r;
			
 
				 
			
 
				 	/* is there already a pending request ? */
			
 
				-	r = search_existing_data_request(state, requesting_node, read, write);
			
 
				+	r = search_existing_data_request(handle, requesting_node, read, write);
			
 
				 	/* at the exit of search_existing_data_request the lock is taken is the request existed ! */
			
 
				 
			
 
				 	if (!r) {
			
@@ -172,7 +172,7 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 		/* if the data is in read only mode, there is no need for a source */
			
 
				 		if (read)
			
 
				 		{
			
 
				-			src_node = select_src_node(state);
			
 
				+			src_node = select_src_node(handle);
			
 
				 			STARPU_ASSERT(src_node != requesting_node);
			
 
				 		}
			
 
				 	
			
@@ -186,16 +186,16 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 			data_request_t r_ram_to_dst;
			
 
				 
			
 
				 			/* XXX we hardcore 0 as the RAM node ... */
			
 
				-			r_ram_to_dst = create_data_request(state, 0, requesting_node, requesting_node, read, write, is_prefetch);
			
 
				+			r_ram_to_dst = create_data_request(handle, 0, requesting_node, requesting_node, read, write, is_prefetch);
			
 
				 
			
 
				 			if (!is_prefetch)
			
 
				 				r_ram_to_dst->refcnt++;
			
 
				 
			
 
				-			r_src_to_ram = search_existing_data_request(state, 0, read, write);
			
 
				+			r_src_to_ram = search_existing_data_request(handle, 0, read, write);
			
 
				 			if (!r_src_to_ram)
			
 
				 			{
			
 
				 				reuse_r_src_to_ram = 0;
			
 
				-				r_src_to_ram = create_data_request(state, src_node, 0, src_node, read, write, is_prefetch);
			
 
				+				r_src_to_ram = create_data_request(handle, src_node, 0, src_node, read, write, is_prefetch);
			
 
				 			}
			
 
				 			else {
			
 
				 				reuse_r_src_to_ram = 1;
			
@@ -207,7 +207,7 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 			if (reuse_r_src_to_ram)
			
 
				 				starpu_spin_unlock(&r_src_to_ram->lock);
			
 
				 
			
 
				-			starpu_spin_unlock(&state->header_lock);
			
 
				+			starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 			/* we only submit the first request, the remaining will be automatically submitted afterward */
			
 
				 			if (!reuse_r_src_to_ram)
			
@@ -221,12 +221,12 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 			uint32_t handling_node =
			
 
				 				select_node_to_handle_request(src_node, requesting_node);
			
 
				 
			
 
				-			r = create_data_request(state, src_node, requesting_node, handling_node, read, write, is_prefetch);
			
 
				+			r = create_data_request(handle, src_node, requesting_node, handling_node, read, write, is_prefetch);
			
 
				 
			
 
				 			if (!is_prefetch)
			
 
				 				r->refcnt++;
			
 
				 
			
 
				-			starpu_spin_unlock(&state->header_lock);
			
 
				+			starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 			post_data_request(r, handling_node);
			
 
				 		}
			
@@ -239,7 +239,7 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 		{
			
 
				 			starpu_spin_unlock(&r->lock);
			
 
				 
			
 
				-			starpu_spin_unlock(&state->header_lock);
			
 
				+			starpu_spin_unlock(&handle->header_lock);
			
 
				 			return 0;
			
 
				 		}
			
 
				 
			
@@ -258,21 +258,18 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 
			
 
				 		//fprintf(stderr, "found a similar request : refcnt (req) %d\n", r->refcnt);
			
 
				 		starpu_spin_unlock(&r->lock);
			
 
				-		starpu_spin_unlock(&state->header_lock);
			
 
				+		starpu_spin_unlock(&handle->header_lock);
			
 
				 	}
			
 
				-//
			
 
				-
			
 
				-//	fprintf(stderr, "AFTER %s... refcnt %d %d req %p %p\n", is_prefetch?"PREFETCH":"", state->per_node[0].refcnt, state->per_node[1].refcnt,  state->per_node[0].request, state->per_node[1].request);
			
 
				 
			
 
				 	return (is_prefetch?0:wait_data_request_completion(r, 1));
			
 
				 }
			
 
				 
			
 
				-static int prefetch_data_on_node(data_state *state, uint8_t read, uint8_t write, uint32_t node)
			
 
				+static int prefetch_data_on_node(starpu_data_handle handle, uint8_t read, uint8_t write, uint32_t node)
			
 
				 {
			
 
				-	return fetch_data_on_node(state, node, read, write, 1);
			
 
				+	return fetch_data_on_node(handle, node, read, write, 1);
			
 
				 }
			
 
				 
			
 
				-static int fetch_data(data_state *state, starpu_access_mode mode)
			
 
				+static int fetch_data(starpu_data_handle handle, starpu_access_mode mode)
			
 
				 {
			
 
				 	uint32_t requesting_node = get_local_memory_node(); 
			
 
				 
			
@@ -280,39 +277,39 @@ static int fetch_data(data_state *state, starpu_access_mode mode)
 
				 	read = (mode != STARPU_W); /* then R or STARPU_RW */
			
 
				 	write = (mode != STARPU_R); /* then STARPU_W or STARPU_RW */
			
 
				 
			
 
				-	return fetch_data_on_node(state, requesting_node, read, write, 0);
			
 
				+	return fetch_data_on_node(handle, requesting_node, read, write, 0);
			
 
				 }
			
 
				 
			
 
				-inline uint32_t get_data_refcnt(data_state *state, uint32_t node)
			
 
				+inline uint32_t get_data_refcnt(starpu_data_handle handle, uint32_t node)
			
 
				 {
			
 
				-	return state->per_node[node].refcnt;
			
 
				+	return handle->per_node[node].refcnt;
			
 
				 }
			
 
				 
			
 
				 /* in case the data was accessed on a write mode, do not forget to 
			
 
				  * make it accessible again once it is possible ! */
			
 
				-void release_data_on_node(data_state *state, uint32_t default_wb_mask, uint32_t memory_node)
			
 
				+void release_data_on_node(starpu_data_handle handle, uint32_t default_wb_mask, uint32_t memory_node)
			
 
				 {
			
 
				 	uint32_t wb_mask;
			
 
				 
			
 
				 	/* normally, the requesting node should have the data in an exclusive manner */
			
 
				-	STARPU_ASSERT(state->per_node[memory_node].state != INVALID);
			
 
				+	STARPU_ASSERT(handle->per_node[memory_node].state != INVALID);
			
 
				 
			
 
				-	wb_mask = default_wb_mask | state->wb_mask;
			
 
				+	wb_mask = default_wb_mask | handle->wb_mask;
			
 
				 
			
 
				 	/* are we doing write-through or just some normal write-back ? */
			
 
				 	if (wb_mask & ~(1<<memory_node)) {
			
 
				-		write_through_data(state, memory_node, wb_mask);
			
 
				+		write_through_data(handle, memory_node, wb_mask);
			
 
				 	}
			
 
				 
			
 
				 	uint32_t local_node = get_local_memory_node();
			
 
				-	while (starpu_spin_trylock(&state->header_lock))
			
 
				+	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 		datawizard_progress(local_node, 1);
			
 
				 
			
 
				-	state->per_node[memory_node].refcnt--;
			
 
				+	handle->per_node[memory_node].refcnt--;
			
 
				 
			
 
				-	notify_data_dependencies(state);
			
 
				+	notify_data_dependencies(handle);
			
 
				 
			
 
				-	starpu_spin_unlock(&state->header_lock);
			
 
				+	starpu_spin_unlock(&handle->header_lock);
			
 
				 }
			
 
				 
			
 
				 int prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
			
@@ -324,17 +321,17 @@ int prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
 
				 	for (index = 0; index < nbuffers; index++)
			
 
				 	{
			
 
				 		starpu_buffer_descr *descr;
			
 
				-		data_state *state;
			
 
				+		starpu_data_handle handle;
			
 
				 
			
 
				 		descr = &descrs[index];
			
 
				-		state = descr->handle;
			
 
				+		handle = descr->handle;
			
 
				 		
			
 
				 		uint32_t mode = task->buffers[index].mode;
			
 
				 	
			
 
				 		uint8_t read = (mode != STARPU_W);
			
 
				 		uint8_t write = (mode != STARPU_R);
			
 
				 
			
 
				-		prefetch_data_on_node(state, read, write, node);
			
 
				+		prefetch_data_on_node(handle, read, write, node);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -359,19 +356,19 @@ int fetch_task_input(struct starpu_task *task, uint32_t mask)
 
				 	{
			
 
				 		int ret;
			
 
				 		starpu_buffer_descr *descr;
			
 
				-		data_state *state;
			
 
				+		starpu_data_handle handle;
			
 
				 
			
 
				 		descr = &descrs[index];
			
 
				 
			
 
				-		state = descr->handle;
			
 
				+		handle = descr->handle;
			
 
				 	
			
 
				-		ret = fetch_data(state, descr->mode);
			
 
				+		ret = fetch_data(handle, descr->mode);
			
 
				 		if (STARPU_UNLIKELY(ret))
			
 
				 			goto enomem;
			
 
				 
			
 
				-		void *src_interface = starpu_data_get_interface_on_node(state, local_memory_node);
			
 
				+		void *src_interface = starpu_data_get_interface_on_node(handle, local_memory_node);
			
 
				 
			
 
				-		memcpy(&interface[index], src_interface, state->interface_size);
			
 
				+		memcpy(&interface[index], src_interface, handle->interface_size);
			
 
				 	}
			
 
				 
			
 
				 	TRACE_END_FETCH_INPUT(NULL);
			
@@ -409,29 +406,29 @@ void push_task_output(struct starpu_task *task, uint32_t mask)
 
				 
			
 
				 /* NB : this value can only be an indication of the status of a data
			
 
				 	at some point, but there is no strong garantee ! */
			
 
				-unsigned is_data_present_or_requested(data_state *state, uint32_t node)
			
 
				+unsigned is_data_present_or_requested(starpu_data_handle handle, uint32_t node)
			
 
				 {
			
 
				 	unsigned ret = 0;
			
 
				 
			
 
				 // XXX : this is just a hint, so we don't take the lock ...
			
 
				-//	pthread_spin_lock(&state->header_lock);
			
 
				+//	pthread_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-	if (state->per_node[node].state != INVALID 
			
 
				-		|| state->per_node[node].requested || state->per_node[node].request)
			
 
				+	if (handle->per_node[node].state != INVALID 
			
 
				+		|| handle->per_node[node].requested || handle->per_node[node].request)
			
 
				 		ret = 1;
			
 
				 
			
 
				-//	pthread_spin_unlock(&state->header_lock);
			
 
				+//	pthread_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-inline void set_data_requested_flag_if_needed(data_state *state, uint32_t node)
			
 
				+inline void set_data_requested_flag_if_needed(starpu_data_handle handle, uint32_t node)
			
 
				 {
			
 
				 // XXX : this is just a hint, so we don't take the lock ...
			
 
				-//	pthread_spin_lock(&state->header_lock);
			
 
				+//	pthread_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-	if (state->per_node[node].state == INVALID) 
			
 
				-		state->per_node[node].requested = 1;
			
 
				+	if (handle->per_node[node].state == INVALID) 
			
 
				+		handle->per_node[node].requested = 1;
			
 
				 
			
 
				-//	pthread_spin_unlock(&state->header_lock);
			
 
				+//	pthread_spin_unlock(&handle->header_lock);
			
 
				 }
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -93,7 +93,7 @@ LIST_TYPE(data_requester,
 
				 	void *argcb;
			
 
				 );
			
 
				 
			
 
				-typedef struct starpu_data_state_t {
			
 
				+struct starpu_data_state_t {
			
 
				 	data_requester_list_t req_list;
			
 
				 	/* the number of requests currently in the scheduling engine
			
 
				 	 * (not in the req_list anymore) */
			
@@ -128,32 +128,32 @@ typedef struct starpu_data_state_t {
 
				 	/* in some case, the application may explicitly tell StarPU that a
			
 
				  	 * piece of data is not likely to be used soon again */
			
 
				 	unsigned is_not_important;
			
 
				-} data_state;
			
 
				+};
			
 
				 
			
 
				 void display_msi_stats(void);
			
 
				 
			
 
				-//void release_data(data_state *state, uint32_t write_through_mask);
			
 
				+//void release_data(struct starpu_data_state_t *state, uint32_t write_through_mask);
			
 
				 
			
 
				 __attribute__((warn_unused_result))
			
 
				-int fetch_data_on_node(data_state *state, uint32_t requesting_node, uint8_t read, uint8_t write, unsigned is_prefetch);
			
 
				-void release_data_on_node(data_state *state, uint32_t default_wb_mask, unsigned memory_node);
			
 
				+int fetch_data_on_node(struct starpu_data_state_t *state, uint32_t requesting_node, uint8_t read, uint8_t write, unsigned is_prefetch);
			
 
				+void release_data_on_node(struct starpu_data_state_t *state, uint32_t default_wb_mask, unsigned memory_node);
			
 
				 
			
 
				-void update_data_state(data_state *state, uint32_t requesting_node, uint8_t write);
			
 
				+void update_data_state(struct starpu_data_state_t *state, uint32_t requesting_node, uint8_t write);
			
 
				 
			
 
				-uint32_t get_data_refcnt(data_state *state, uint32_t node);
			
 
				+uint32_t get_data_refcnt(struct starpu_data_state_t *state, uint32_t node);
			
 
				 
			
 
				 void push_task_output(struct starpu_task *task, uint32_t mask);
			
 
				 
			
 
				 __attribute__((warn_unused_result))
			
 
				 int fetch_task_input(struct starpu_task *task, uint32_t mask);
			
 
				 
			
 
				-unsigned is_data_present_or_requested(data_state *state, uint32_t node);
			
 
				+unsigned is_data_present_or_requested(struct starpu_data_state_t *state, uint32_t node);
			
 
				 
			
 
				-inline void set_data_requested_flag_if_needed(data_state *state, uint32_t node);
			
 
				+inline void set_data_requested_flag_if_needed(struct starpu_data_state_t *state, uint32_t node);
			
 
				 
			
 
				 int prefetch_task_input_on_node(struct starpu_task *task, uint32_t node);
			
 
				 
			
 
				 uint32_t select_node_to_handle_request(uint32_t src_node, uint32_t dst_node);
			
 
				-uint32_t select_src_node(data_state *state);
			
 
				+uint32_t select_src_node(struct starpu_data_state_t *state);
			
 
				 
			
 
				 #endif // __COHERENCY__H__
			
--- a/src/datawizard/copy-driver.c
+++ b/src/datawizard/copy-driver.c
@@ -88,22 +88,22 @@ static cudaStream_t *create_cuda_stream(struct data_request_s *req)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32_t dst_node, struct data_request_s *req __attribute__((unused)))
			
 
				+static int copy_data_1_to_1_generic(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, struct data_request_s *req __attribute__((unused)))
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	//ret = state->ops->copy_data_1_to_1(state, src_node, dst_node);
			
 
				+	//ret = handle->ops->copy_data_1_to_1(handle, src_node, dst_node);
			
 
				 
			
 
				-	const struct copy_data_methods_s *copy_methods = state->ops->copy_methods;
			
 
				+	const struct copy_data_methods_s *copy_methods = handle->ops->copy_methods;
			
 
				 
			
 
				 	node_kind src_kind = get_node_kind(src_node);
			
 
				 	node_kind dst_kind = get_node_kind(dst_node);
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[src_node].refcnt);
			
 
				-	STARPU_ASSERT(state->per_node[dst_node].refcnt);
			
 
				+	STARPU_ASSERT(handle->per_node[src_node].refcnt);
			
 
				+	STARPU_ASSERT(handle->per_node[dst_node].refcnt);
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[src_node].allocated);
			
 
				-	STARPU_ASSERT(state->per_node[dst_node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[src_node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[dst_node].allocated);
			
 
				 
			
 
				 	switch (dst_kind) {
			
 
				 	case RAM:
			
@@ -111,7 +111,7 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 
				 			case RAM:
			
 
				 				/* RAM -> RAM */
			
 
				 				STARPU_ASSERT(copy_methods->ram_to_ram);
			
 
				-				copy_methods->ram_to_ram(state, src_node, dst_node);
			
 
				+				copy_methods->ram_to_ram(handle, src_node, dst_node);
			
 
				 				break;
			
 
				 #ifdef USE_CUDA
			
 
				 			case CUDA_RAM:
			
@@ -124,11 +124,11 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 
				 					if (!req || !copy_methods->cuda_to_ram_async)
			
 
				 					{
			
 
				 						/* this is not associated to a request so it's synchronous */
			
 
				-						copy_methods->cuda_to_ram(state, src_node, dst_node);
			
 
				+						copy_methods->cuda_to_ram(handle, src_node, dst_node);
			
 
				 					}
			
 
				 					else {
			
 
				 						cudaStream_t *stream = create_cuda_stream(req);
			
 
				-						ret = copy_methods->cuda_to_ram_async(state, src_node, dst_node, stream);
			
 
				+						ret = copy_methods->cuda_to_ram_async(handle, src_node, dst_node, stream);
			
 
				 					}
			
 
				 				}
			
 
				 				else
			
@@ -159,11 +159,11 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 
				 				if (!req || !copy_methods->ram_to_cuda_async)
			
 
				 				{
			
 
				 					/* this is not associated to a request so it's synchronous */
			
 
				-					copy_methods->ram_to_cuda(state, src_node, dst_node);
			
 
				+					copy_methods->ram_to_cuda(handle, src_node, dst_node);
			
 
				 				}
			
 
				 				else {
			
 
				 					cudaStream_t *stream = create_cuda_stream(req);
			
 
				-					ret = copy_methods->ram_to_cuda_async(state, src_node, dst_node, stream);
			
 
				+					ret = copy_methods->ram_to_cuda_async(handle, src_node, dst_node, stream);
			
 
				 				}
			
 
				 				break;
			
 
				 			case CUDA_RAM:
			
@@ -189,34 +189,34 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int __attribute__((warn_unused_result)) driver_copy_data_1_to_1(data_state *state, uint32_t src_node, 
			
 
				+int __attribute__((warn_unused_result)) driver_copy_data_1_to_1(starpu_data_handle handle, uint32_t src_node, 
			
 
				 		uint32_t dst_node, unsigned donotread, struct data_request_s *req, unsigned may_alloc)
			
 
				 {
			
 
				 	if (!donotread)
			
 
				 	{
			
 
				-		STARPU_ASSERT(state->per_node[src_node].allocated);
			
 
				-		STARPU_ASSERT(state->per_node[src_node].refcnt);
			
 
				+		STARPU_ASSERT(handle->per_node[src_node].allocated);
			
 
				+		STARPU_ASSERT(handle->per_node[src_node].refcnt);
			
 
				 	}
			
 
				 
			
 
				 	int ret_alloc, ret_copy;
			
 
				 	unsigned __attribute__((unused)) com_id = 0;
			
 
				 
			
 
				 	/* first make sure the destination has an allocated buffer */
			
 
				-	ret_alloc = allocate_memory_on_node(state, dst_node, may_alloc);
			
 
				+	ret_alloc = allocate_memory_on_node(handle, dst_node, may_alloc);
			
 
				 	if (ret_alloc)
			
 
				 		goto nomem;
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[dst_node].allocated);
			
 
				-	STARPU_ASSERT(state->per_node[dst_node].refcnt);
			
 
				+	STARPU_ASSERT(handle->per_node[dst_node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[dst_node].refcnt);
			
 
				 
			
 
				 	/* if there is no need to actually read the data, 
			
 
				 	 * we do not perform any transfer */
			
 
				 	if (!donotread) {
			
 
				-		STARPU_ASSERT(state->ops);
			
 
				-		//STARPU_ASSERT(state->ops->copy_data_1_to_1);
			
 
				+		STARPU_ASSERT(handle->ops);
			
 
				+		//STARPU_ASSERT(handle->ops->copy_data_1_to_1);
			
 
				 
			
 
				 #ifdef DATA_STATS
			
 
				-		size_t size = state->ops->get_size(state);
			
 
				+		size_t size = handle->ops->get_size(handle);
			
 
				 		update_comm_ammount(src_node, dst_node, size);
			
 
				 #endif
			
 
				 		
			
@@ -229,7 +229,7 @@ int __attribute__((warn_unused_result)) driver_copy_data_1_to_1(data_state *stat
 
				 
			
 
				 		/* for now we set the size to 0 in the FxT trace XXX */
			
 
				 		TRACE_START_DRIVER_COPY(src_node, dst_node, 0, com_id);
			
 
				-		ret_copy = copy_data_1_to_1_generic(state, src_node, dst_node, req);
			
 
				+		ret_copy = copy_data_1_to_1_generic(handle, src_node, dst_node, req);
			
 
				 		if (ret_copy != EAGAIN)
			
 
				 		{
			
 
				 			TRACE_END_DRIVER_COPY(src_node, dst_node, 0, com_id);
			
--- a/src/datawizard/data_request.h
+++ b/src/datawizard/data_request.h
@@ -26,8 +26,6 @@
 
				 #define DATA_REQ_ALLOCATE	(1<<0)
			
 
				 #define DATA_REQ_COPY		(1<<1)
			
 
				 
			
 
				-struct starpu_data_state_t;
			
 
				-
			
 
				 LIST_TYPE(data_request,
			
 
				 	starpu_spinlock_t lock;
			
 
				 	unsigned refcnt;
			
--- a/src/datawizard/footprint.c
+++ b/src/datawizard/footprint.c
@@ -25,21 +25,21 @@ void compute_buffers_footprint(job_t j)
 
				 
			
 
				 	for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
			
 
				 	{
			
 
				-		data_state *state = task->buffers[buffer].handle;
			
 
				+		starpu_data_handle handle = task->buffers[buffer].handle;
			
 
				 
			
 
				-		STARPU_ASSERT(state->ops);
			
 
				-		STARPU_ASSERT(state->ops->footprint);
			
 
				+		STARPU_ASSERT(handle->ops);
			
 
				+		STARPU_ASSERT(handle->ops->footprint);
			
 
				 
			
 
				-		footprint = state->ops->footprint(state, footprint);
			
 
				+		footprint = handle->ops->footprint(handle, footprint);
			
 
				 	}
			
 
				 
			
 
				 	j->footprint = footprint;
			
 
				 	j->footprint_is_computed = 1;
			
 
				 }
			
 
				 
			
 
				-inline uint32_t compute_data_footprint(data_state *state)
			
 
				+inline uint32_t compute_data_footprint(starpu_data_handle handle)
			
 
				 {
			
 
				-	uint32_t interfaceid = (uint32_t)starpu_get_handle_interface_id(state);
			
 
				+	uint32_t interfaceid = (uint32_t)starpu_get_handle_interface_id(handle);
			
 
				 
			
 
				-	return state->ops->footprint(state, interfaceid);
			
 
				+	return handle->ops->footprint(handle, interfaceid);
			
 
				 }
			
--- a/src/datawizard/footprint.h
+++ b/src/datawizard/footprint.h
@@ -22,6 +22,6 @@
 
				 struct job_s;
			
 
				 
			
 
				 void compute_buffers_footprint(struct job_s *j);
			
 
				-inline uint32_t compute_data_footprint(data_state *state);
			
 
				+inline uint32_t compute_data_footprint(starpu_data_handle handle);
			
 
				 
			
 
				 #endif // __FOOTPRINT_H__
			
--- a/src/datawizard/hierarchy.c
+++ b/src/datawizard/hierarchy.c
@@ -20,57 +20,57 @@
 
				  * Stop monitoring a data
			
 
				  */
			
 
				 
			
 
				-static void starpu_data_liberate_interfaces(data_state *state)
			
 
				+static void starpu_data_liberate_interfaces(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				-		free(state->interface[node]);
			
 
				+		free(handle->interface[node]);
			
 
				 }
			
 
				 
			
 
				 /* TODO : move in a more appropriate file */
			
 
				-void starpu_delete_data(data_state *state)
			
 
				+void starpu_delete_data(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 
			
 
				-	STARPU_ASSERT(state);
			
 
				+	STARPU_ASSERT(handle);
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				-		local_data_state *local = &state->per_node[node];
			
 
				+		local_data_state *local = &handle->per_node[node];
			
 
				 
			
 
				 		if (local->allocated && local->automatically_allocated){
			
 
				 			/* free the data copy in a lazy fashion */
			
 
				-			request_mem_chunk_removal(state, node);
			
 
				+			request_mem_chunk_removal(handle, node);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	data_requester_list_delete(state->req_list);
			
 
				+	data_requester_list_delete(handle->req_list);
			
 
				 
			
 
				-	starpu_data_liberate_interfaces(state);
			
 
				+	starpu_data_liberate_interfaces(handle);
			
 
				 
			
 
				-	free(state);
			
 
				+	free(handle);
			
 
				 }
			
 
				 
			
 
				-void register_new_data(data_state *state, uint32_t home_node, uint32_t wb_mask)
			
 
				+void register_new_data(starpu_data_handle handle, uint32_t home_node, uint32_t wb_mask)
			
 
				 {
			
 
				-	STARPU_ASSERT(state);
			
 
				+	STARPU_ASSERT(handle);
			
 
				 
			
 
				 	/* initialize the new lock */
			
 
				-	state->req_list = data_requester_list_new();
			
 
				-	state->refcnt = 0;
			
 
				-	starpu_spin_init(&state->header_lock);
			
 
				+	handle->req_list = data_requester_list_new();
			
 
				+	handle->refcnt = 0;
			
 
				+	starpu_spin_init(&handle->header_lock);
			
 
				 
			
 
				 	/* first take care to properly lock the data */
			
 
				-	starpu_spin_lock(&state->header_lock);
			
 
				+	starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				 	/* we assume that all nodes may use that data */
			
 
				-	state->nnodes = MAXNODES;
			
 
				+	handle->nnodes = MAXNODES;
			
 
				 
			
 
				 	/* there is no hierarchy yet */
			
 
				-	state->nchildren = 0;
			
 
				+	handle->nchildren = 0;
			
 
				 
			
 
				-	state->is_not_important = 0;
			
 
				+	handle->is_not_important = 0;
			
 
				 
			
 
				-	state->wb_mask = wb_mask;
			
 
				+	handle->wb_mask = wb_mask;
			
 
				 
			
 
				 	/* that new data is invalid from all nodes perpective except for the
			
 
				 	 * home node */
			
@@ -79,45 +79,45 @@ void register_new_data(data_state *state, uint32_t home_node, uint32_t wb_mask)
 
				 	{
			
 
				 		if (node == home_node) {
			
 
				 			/* this is the home node with the only valid copy */
			
 
				-			state->per_node[node].state = OWNER;
			
 
				-			state->per_node[node].allocated = 1;
			
 
				-			state->per_node[node].automatically_allocated = 0;
			
 
				-			state->per_node[node].refcnt = 0;
			
 
				+			handle->per_node[node].state = OWNER;
			
 
				+			handle->per_node[node].allocated = 1;
			
 
				+			handle->per_node[node].automatically_allocated = 0;
			
 
				+			handle->per_node[node].refcnt = 0;
			
 
				 		}
			
 
				 		else {
			
 
				 			/* the value is not available here yet */
			
 
				-			state->per_node[node].state = INVALID;
			
 
				-			state->per_node[node].allocated = 0;
			
 
				-			state->per_node[node].refcnt = 0;
			
 
				+			handle->per_node[node].state = INVALID;
			
 
				+			handle->per_node[node].allocated = 0;
			
 
				+			handle->per_node[node].refcnt = 0;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	/* now the data is available ! */
			
 
				-	starpu_spin_unlock(&state->header_lock);
			
 
				+	starpu_spin_unlock(&handle->header_lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * This function applies a starpu_filter on all the elements of a partition
			
 
				  */
			
 
				-static void map_filter(data_state *root_data, starpu_filter *f)
			
 
				+static void map_filter(starpu_data_handle root_handle, starpu_filter *f)
			
 
				 {
			
 
				 	/* we need to apply the starpu_filter on all leaf of the tree */
			
 
				-	if (root_data->nchildren == 0) 
			
 
				+	if (root_handle->nchildren == 0)
			
 
				 	{
			
 
				 		/* this is a leaf */
			
 
				-		starpu_partition_data(root_data, f);
			
 
				+		starpu_partition_data(root_handle, f);
			
 
				 	}
			
 
				 	else {
			
 
				 		/* try to apply the starpu_filter recursively */
			
 
				 		int child;
			
 
				-		for (child = 0; child < root_data->nchildren; child++)
			
 
				+		for (child = 0; child < root_handle->nchildren; child++)
			
 
				 		{
			
 
				-			map_filter(&root_data->children[child], f);
			
 
				+			map_filter(&root_handle->children[child], f);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_map_filters(data_state *root_data, unsigned nfilters, ...)
			
 
				+void starpu_map_filters(starpu_data_handle root_handle, unsigned nfilters, ...)
			
 
				 {
			
 
				 	unsigned i;
			
 
				 	va_list pa;
			
@@ -129,13 +129,13 @@ void starpu_map_filters(data_state *root_data, unsigned nfilters, ...)
 
				 
			
 
				 		STARPU_ASSERT(next_filter);
			
 
				 
			
 
				-		map_filter(root_data, next_filter);
			
 
				+		map_filter(root_handle, next_filter);
			
 
				 	}
			
 
				 	va_end(pa);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * example get_sub_data(data_state *root_data, 3, 42, 0, 1);
			
 
				+ * example get_sub_data(starpu_data_handle root_handle, 3, 42, 0, 1);
			
 
				  */
			
 
				 starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i)
			
 
				 {
			
@@ -145,10 +145,10 @@ starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i)
 
				 	return &handle->children[i];
			
 
				 }
			
 
				 
			
 
				-data_state *get_sub_data(data_state *root_data, unsigned depth, ... )
			
 
				+starpu_data_handle get_sub_data(starpu_data_handle root_handle, unsigned depth, ... )
			
 
				 {
			
 
				-	STARPU_ASSERT(root_data);
			
 
				-	data_state *current_data = root_data;
			
 
				+	STARPU_ASSERT(root_handle);
			
 
				+	starpu_data_handle current_handle = root_handle;
			
 
				 
			
 
				 	/* the variable number of argument must correlate the depth in the tree */
			
 
				 	unsigned i; 
			
@@ -159,52 +159,53 @@ data_state *get_sub_data(data_state *root_data, unsigned depth, ... )
 
				 		unsigned next_child;
			
 
				 		next_child = va_arg(pa, unsigned);
			
 
				 
			
 
				-		STARPU_ASSERT((int)next_child < current_data->nchildren);
			
 
				+		STARPU_ASSERT((int)next_child < current_handle->nchildren);
			
 
				 
			
 
				-		current_data = &current_data->children[next_child];
			
 
				+		current_handle = &current_handle->children[next_child];
			
 
				 	}
			
 
				 	va_end(pa);
			
 
				 
			
 
				-	return current_data;
			
 
				+	return current_handle;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * For now, we assume that partitionned_data is already properly allocated;
			
 
				  * at least by the starpu_filter function !
			
 
				  */
			
 
				-void starpu_partition_data(data_state *initial_data, starpu_filter *f)
			
 
				+void starpu_partition_data(starpu_data_handle initial_handle, starpu_filter *f)
			
 
				 {
			
 
				 	int nparts;
			
 
				 	int i;
			
 
				 
			
 
				 	/* first take care to properly lock the data header */
			
 
				-	starpu_spin_lock(&initial_data->header_lock);
			
 
				+	starpu_spin_lock(&initial_handle->header_lock);
			
 
				 
			
 
				 	/* there should not be mutiple filters applied on the same data */
			
 
				-	STARPU_ASSERT(initial_data->nchildren == 0);
			
 
				+	STARPU_ASSERT(initial_handle->nchildren == 0);
			
 
				 
			
 
				 	/* this should update the pointers and size of the chunk */
			
 
				-	nparts = f->filter_func(f, initial_data);
			
 
				+	nparts = f->filter_func(f, initial_handle);
			
 
				 	STARPU_ASSERT(nparts > 0);
			
 
				 
			
 
				-	initial_data->nchildren = nparts;
			
 
				+	initial_handle->nchildren = nparts;
			
 
				 
			
 
				 	for (i = 0; i < nparts; i++)
			
 
				 	{
			
 
				-		data_state *children = starpu_data_get_child(initial_data, i);
			
 
				+		starpu_data_handle children =
			
 
				+			starpu_data_get_child(initial_handle, i);
			
 
				 
			
 
				 		STARPU_ASSERT(children);
			
 
				 
			
 
				 		children->nchildren = 0;
			
 
				 
			
 
				-		children->is_not_important = initial_data->is_not_important;
			
 
				+		children->is_not_important = initial_handle->is_not_important;
			
 
				 
			
 
				 		/* it is possible that the children does not use the same interface as the parent,
			
 
				 		 * in that case, the starpu_filter must set the proper methods */
			
 
				 		if (!children->ops)
			
 
				-			children->ops = initial_data->ops;
			
 
				+			children->ops = initial_handle->ops;
			
 
				 
			
 
				-		children->wb_mask = initial_data->wb_mask;
			
 
				+		children->wb_mask = initial_handle->wb_mask;
			
 
				 
			
 
				 		/* initialize the chunk lock */
			
 
				 		children->req_list = data_requester_list_new();
			
@@ -215,43 +216,43 @@ void starpu_partition_data(data_state *initial_data, starpu_filter *f)
 
				 		for (node = 0; node < MAXNODES; node++)
			
 
				 		{
			
 
				 			children->per_node[node].state = 
			
 
				-				initial_data->per_node[node].state;
			
 
				+				initial_handle->per_node[node].state;
			
 
				 			children->per_node[node].allocated = 
			
 
				-				initial_data->per_node[node].allocated;
			
 
				-			children->per_node[node].automatically_allocated = initial_data->per_node[node].automatically_allocated;
			
 
				+				initial_handle->per_node[node].allocated;
			
 
				+			children->per_node[node].automatically_allocated = initial_handle->per_node[node].automatically_allocated;
			
 
				 			children->per_node[node].refcnt = 0;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	/* now let the header */
			
 
				-	starpu_spin_unlock(&initial_data->header_lock);
			
 
				+	starpu_spin_unlock(&initial_handle->header_lock);
			
 
				 }
			
 
				 
			
 
				-void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
			
 
				+void starpu_unpartition_data(starpu_data_handle root_handle, uint32_t gathering_node)
			
 
				 {
			
 
				 	int child;
			
 
				 	unsigned node;
			
 
				 
			
 
				-	starpu_spin_lock(&root_data->header_lock);
			
 
				+	starpu_spin_lock(&root_handle->header_lock);
			
 
				 
			
 
				 #warning starpu_unpartition_data is not supported with NO_DATA_RW_LOCK yet ...
			
 
				 
			
 
				 	/* first take all the children lock (in order !) */
			
 
				-	for (child = 0; child < root_data->nchildren; child++)
			
 
				+	for (child = 0; child < root_handle->nchildren; child++)
			
 
				 	{
			
 
				 		/* make sure the intermediate children is unpartitionned as well */
			
 
				-		if (root_data->children[child].nchildren > 0)
			
 
				-			starpu_unpartition_data(&root_data->children[child], gathering_node);
			
 
				+		if (root_handle->children[child].nchildren > 0)
			
 
				+			starpu_unpartition_data(&root_handle->children[child], gathering_node);
			
 
				 
			
 
				 		int ret;
			
 
				-		ret = fetch_data_on_node(&root_data->children[child], gathering_node, 1, 0, 0);
			
 
				+		ret = fetch_data_on_node(&root_handle->children[child], gathering_node, 1, 0, 0);
			
 
				 		/* for now we pretend that the RAM is almost unlimited and that gathering 
			
 
				 		 * data should be possible from the node that does the unpartionning ... we
			
 
				 		 * don't want to have the programming deal with memory shortage at that time,
			
 
				 		 * really */
			
 
				 		STARPU_ASSERT(ret == 0); 
			
 
				 
			
 
				-		starpu_data_liberate_interfaces(&root_data->children[child]);
			
 
				+		starpu_data_liberate_interfaces(&root_handle->children[child]);
			
 
				 	}
			
 
				 
			
 
				 	/* the gathering_node should now have a valid copy of all the children.
			
@@ -274,9 +275,9 @@ void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
 
				 		/* until an issue is found the data is assumed to be valid */
			
 
				 		unsigned isvalid = 1;
			
 
				 
			
 
				-		for (child = 0; child < root_data->nchildren; child++)
			
 
				+		for (child = 0; child < root_handle->nchildren; child++)
			
 
				 		{
			
 
				-			local_data_state *local = &root_data->children[child].per_node[node];
			
 
				+			local_data_state *local = &root_handle->children[child].per_node[node];
			
 
				 
			
 
				 			if (local->state == INVALID) {
			
 
				 				isvalid = 0; 
			
@@ -284,7 +285,7 @@ void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
 
				 	
			
 
				 			if (local->allocated && local->automatically_allocated){
			
 
				 				/* free the data copy in a lazy fashion */
			
 
				-				request_mem_chunk_removal(root_data, node);
			
 
				+				request_mem_chunk_removal(root_handle, node);
			
 
				 				isvalid = 0; 
			
 
				 			}
			
 
				 		}
			
@@ -301,36 +302,35 @@ void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
 
				 
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				-		root_data->per_node[node].state = 
			
 
				+		root_handle->per_node[node].state = 
			
 
				 			still_valid[node]?newstate:INVALID;
			
 
				 	}
			
 
				 
			
 
				 	/* there is no child anymore */
			
 
				-	root_data->nchildren = 0;
			
 
				+	root_handle->nchildren = 0;
			
 
				 
			
 
				 	/* now the parent may be used again so we release the lock */
			
 
				-	starpu_spin_unlock(&root_data->header_lock);
			
 
				+	starpu_spin_unlock(&root_handle->header_lock);
			
 
				 }
			
 
				 
			
 
				 /* TODO move ! */
			
 
				-void starpu_advise_if_data_is_important(data_state *state, unsigned is_important)
			
 
				+void starpu_advise_if_data_is_important(starpu_data_handle handle, unsigned is_important)
			
 
				 {
			
 
				-
			
 
				-	starpu_spin_lock(&state->header_lock);
			
 
				+	starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				 	/* first take all the children lock (in order !) */
			
 
				 	int child;
			
 
				-	for (child = 0; child < state->nchildren; child++)
			
 
				+	for (child = 0; child < handle->nchildren; child++)
			
 
				 	{
			
 
				 		/* make sure the intermediate children is advised as well */
			
 
				-		if (state->children[child].nchildren > 0)
			
 
				-			starpu_advise_if_data_is_important(&state->children[child], is_important);
			
 
				+		if (handle->children[child].nchildren > 0)
			
 
				+			starpu_advise_if_data_is_important(&handle->children[child], is_important);
			
 
				 	}
			
 
				 
			
 
				-	state->is_not_important = !is_important;
			
 
				+	handle->is_not_important = !is_important;
			
 
				 
			
 
				 	/* now the parent may be used again so we release the lock */
			
 
				-	starpu_spin_unlock(&state->header_lock);
			
 
				+	starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 }
			
 
				 
			
@@ -356,7 +356,7 @@ starpu_data_handle starpu_data_state_create(size_t interfacesize)
 
				 void starpu_data_create_children(starpu_data_handle handle,
			
 
				 		unsigned nchildren, size_t interfacesize)
			
 
				 {
			
 
				-	handle->children = calloc(nchildren, sizeof(data_state));
			
 
				+	handle->children = calloc(nchildren, sizeof(struct starpu_data_state_t));
			
 
				 	STARPU_ASSERT(handle->children);
			
 
				 
			
 
				 	unsigned node;
			
--- a/src/datawizard/interfaces/bcsr_filters.c
+++ b/src/datawizard/interfaces/bcsr_filters.c
@@ -20,12 +20,12 @@
 
				 
			
 
				 extern struct data_interface_ops_t interface_blas_ops;
			
 
				 
			
 
				-unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused)), data_state *root_data)
			
 
				+unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused)), starpu_data_handle root_handle)
			
 
				 {
			
 
				 	unsigned nchunks;
			
 
				 
			
 
				 	struct starpu_bcsr_interface_s *interface =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nnz = interface->nnz;
			
 
				 
			
@@ -40,34 +40,34 @@ unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unus
 
				 	nchunks = nnz;
			
 
				 	
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
			
 
				+	starpu_data_create_children(root_handle, nchunks, sizeof(starpu_blas_interface_t));
			
 
				 
			
 
				 	/* actually create all the chunks */
			
 
				 
			
 
				 	/* XXX */
			
 
				-	STARPU_ASSERT(root_data->per_node[0].allocated);
			
 
				+	STARPU_ASSERT(root_handle->per_node[0].allocated);
			
 
				 
			
 
				 	/* each chunk becomes a small dense matrix */
			
 
				 	unsigned chunk;
			
 
				 	for (chunk = 0; chunk < nchunks; chunk++)
			
 
				 	{
			
 
				-		starpu_data_handle sub_handle = starpu_data_get_child(root_data, chunk);
			
 
				+		starpu_data_handle sub_handle = starpu_data_get_child(root_handle, chunk);
			
 
				 		uint32_t ptr_offset = c*r*chunk*elemsize;
			
 
				 
			
 
				 		unsigned node;
			
 
				 		for (node = 0; node < MAXNODES; node++)
			
 
				 		{
			
 
				 			starpu_blas_interface_t *local =
			
 
				-				starpu_data_get_interface_on_node(root_data, node);
			
 
				+				starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 			local->nx = c;
			
 
				 			local->ny = r;
			
 
				 			local->ld = c;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				+			if (root_handle->per_node[node].allocated) {
			
 
				 				struct starpu_bcsr_interface_s *node_interface =
			
 
				-					starpu_data_get_interface_on_node(root_data, node);
			
 
				+					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 				uint8_t *nzval = (uint8_t *)(node_interface->nzval);
			
 
				 				local->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
			
 
				 			}
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -47,10 +47,10 @@ static const struct copy_data_methods_s bcsr_copy_data_methods_s = {
 
				 	.spu_to_spu = NULL
			
 
				 };
			
 
				 
			
 
				-static size_t allocate_bcsr_buffer_on_node(struct starpu_data_state_t *state, uint32_t dst_node);
			
 
				+static size_t allocate_bcsr_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
			
 
				 static void liberate_bcsr_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
			
 
				-static size_t bcsr_interface_get_size(struct starpu_data_state_t *state);
			
 
				-static uint32_t footprint_bcsr_interface_crc32(data_state *state, uint32_t hstate);
			
 
				+static size_t bcsr_interface_get_size(starpu_data_handle handle);
			
 
				+static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle handle, uint32_t hstate);
			
 
				 
			
 
				 struct data_interface_ops_t interface_bcsr_ops = {
			
 
				 	.allocate_data_on_node = allocate_bcsr_buffer_on_node,
			
@@ -100,21 +100,21 @@ void starpu_register_bcsr_data(struct starpu_data_state_t **handle, uint32_t hom
 
				 	register_new_data(state, home_node, 0);
			
 
				 }
			
 
				 
			
 
				-static inline uint32_t footprint_bcsr_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), data_state *state, uint32_t hstate)
			
 
				+static inline uint32_t footprint_bcsr_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				 	hash = hstate;
			
 
				-	hash = hash_func(starpu_get_bcsr_nnz(state), hash);
			
 
				-	hash = hash_func(starpu_get_bcsr_c(state), hash);
			
 
				-	hash = hash_func(starpu_get_bcsr_r(state), hash);
			
 
				+	hash = hash_func(starpu_get_bcsr_nnz(handle), hash);
			
 
				+	hash = hash_func(starpu_get_bcsr_c(handle), hash);
			
 
				+	hash = hash_func(starpu_get_bcsr_r(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				-static uint32_t footprint_bcsr_interface_crc32(data_state *state, uint32_t hstate)
			
 
				+static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				-	return footprint_bcsr_interface_generic(crc32_be, state, hstate);
			
 
				+	return footprint_bcsr_interface_generic(crc32_be, handle, hstate);
			
 
				 }
			
 
				 
			
 
				 struct dumped_bcsr_interface_s {
			
@@ -130,95 +130,95 @@ struct dumped_bcsr_interface_s {
 
				 }  __attribute__ ((packed));
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_bcsr_nnz(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_bcsr_nnz(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nnz;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_nrow(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_bcsr_nrow(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nrow;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_firstentry(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_bcsr_firstentry(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->firstentry;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_r(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_bcsr_r(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->r;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_bcsr_c(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_bcsr_c(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->c;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_bcsr_elemsize(struct starpu_data_state_t *state)
			
 
				+size_t starpu_get_bcsr_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->elemsize;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_bcsr_local_nzval(struct starpu_data_state_t *state)
			
 
				+uintptr_t starpu_get_bcsr_local_nzval(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 	
			
 
				 	return interface->nzval;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_bcsr_local_colind(struct starpu_data_state_t *state)
			
 
				+uint32_t *starpu_get_bcsr_local_colind(starpu_data_handle handle)
			
 
				 {
			
 
				 	/* XXX 0 */
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->colind;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_bcsr_local_rowptr(struct starpu_data_state_t *state)
			
 
				+uint32_t *starpu_get_bcsr_local_rowptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	/* XXX 0 */
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->rowptr;
			
 
				 }
			
 
				 
			
 
				 
			
 
				-static size_t bcsr_interface_get_size(struct starpu_data_state_t *state)
			
 
				+static size_t bcsr_interface_get_size(starpu_data_handle handle)
			
 
				 {
			
 
				 	size_t size;
			
 
				 
			
 
				-	uint32_t nnz = starpu_get_bcsr_nnz(state);
			
 
				-	uint32_t nrow = starpu_get_bcsr_nrow(state);
			
 
				-	uint32_t r = starpu_get_bcsr_r(state);
			
 
				-	uint32_t c = starpu_get_bcsr_c(state);
			
 
				-	size_t elemsize = starpu_get_bcsr_elemsize(state);
			
 
				+	uint32_t nnz = starpu_get_bcsr_nnz(handle);
			
 
				+	uint32_t nrow = starpu_get_bcsr_nrow(handle);
			
 
				+	uint32_t r = starpu_get_bcsr_r(handle);
			
 
				+	uint32_t c = starpu_get_bcsr_c(handle);
			
 
				+	size_t elemsize = starpu_get_bcsr_elemsize(handle);
			
 
				 
			
 
				 	size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); 
			
 
				 
			
@@ -229,7 +229,7 @@ static size_t bcsr_interface_get_size(struct starpu_data_state_t *state)
 
				 /* memory allocation/deallocation primitives for the BLAS interface */
			
 
				 
			
 
				 /* returns the size of the allocated area */
			
 
				-static size_t allocate_bcsr_buffer_on_node(struct starpu_data_state_t *state, uint32_t dst_node)
			
 
				+static size_t allocate_bcsr_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
			
 
				 {
			
 
				 	uintptr_t addr_nzval;
			
 
				 	uint32_t *addr_colind, *addr_rowptr;
			
@@ -237,7 +237,7 @@ static size_t allocate_bcsr_buffer_on_node(struct starpu_data_state_t *state, ui
 
				 
			
 
				 	/* we need the 3 arrays to be allocated */
			
 
				 	starpu_bcsr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, dst_node);
			
 
				+		starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = interface->nnz;
			
 
				 	uint32_t nrow = interface->nrow;
			
@@ -350,13 +350,13 @@ static void liberate_bcsr_buffer_on_node(starpu_data_interface_t *interface, uin
 
				 }
			
 
				 
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *src_bcsr;
			
 
				 	starpu_bcsr_interface_t *dst_bcsr;
			
 
				 
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
@@ -379,13 +379,13 @@ static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_no
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *src_bcsr;
			
 
				 	starpu_bcsr_interface_t *dst_bcsr;
			
 
				 
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
@@ -410,13 +410,13 @@ static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_no
 
				 #endif // USE_CUDA
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_bcsr_interface_t *src_bcsr;
			
 
				 	starpu_bcsr_interface_t *dst_bcsr;
			
 
				 
			
 
				-	src_bcsr = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_bcsr = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_bcsr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_bcsr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = src_bcsr->nnz;
			
 
				 	uint32_t nrow = src_bcsr->nrow;
			
--- a/src/datawizard/interfaces/blas_filters.c
+++ b/src/datawizard/interfaces/blas_filters.c
@@ -21,13 +21,13 @@
 
				 /*
			
 
				  * an example of a dummy partition function : blocks ...
			
 
				  */
			
 
				-unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
			
 
				+unsigned starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	unsigned nchunks;
			
 
				 	uint32_t arg = f->filter_arg;
			
 
				 
			
 
				 	starpu_blas_interface_t *blas_root =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nx = blas_root->nx;
			
 
				 	uint32_t ny = blas_root->ny;
			
@@ -37,7 +37,7 @@ unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
 
				 	nchunks = STARPU_MIN(nx, arg);
			
 
				 
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
			
 
				+	starpu_data_create_children(root_handle, nchunks, sizeof(starpu_blas_interface_t));
			
 
				 
			
 
				 	/* actually create all the chunks */
			
 
				 	unsigned chunk;
			
@@ -50,7 +50,7 @@ unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
 
				 			STARPU_MIN(chunk_size, (size_t)nx - (size_t)chunk*chunk_size);
			
 
				 
			
 
				 		starpu_data_handle chunk_handle =
			
 
				-			starpu_data_get_child(root_data, chunk);
			
 
				+			starpu_data_get_child(root_handle, chunk);
			
 
				 
			
 
				 		unsigned node;
			
 
				 		for (node = 0; node < MAXNODES; node++)
			
@@ -62,9 +62,9 @@ unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
 
				 			local->ny = ny;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				+			if (root_handle->per_node[node].allocated) {
			
 
				 				starpu_blas_interface_t *local_root =
			
 
				-					starpu_data_get_interface_on_node(root_data, node);
			
 
				+					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 				local->ptr = local_root->ptr + offset;
			
 
				 				local->ld = local_root->ld;
			
@@ -75,13 +75,13 @@ unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
 
				 	return nchunks;
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_vertical_block_filter_func(starpu_filter *f, data_state *root_data)
			
 
				+unsigned starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	unsigned nchunks;
			
 
				 	uint32_t arg = f->filter_arg;
			
 
				 
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nx = interface->nx;
			
 
				 	uint32_t ny = interface->ny;
			
@@ -91,7 +91,7 @@ unsigned starpu_vertical_block_filter_func(starpu_filter *f, data_state *root_da
 
				 	nchunks = STARPU_MIN(ny, arg);
			
 
				 	
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
			
 
				+	starpu_data_create_children(root_handle, nchunks, sizeof(starpu_blas_interface_t));
			
 
				 
			
 
				 	/* actually create all the chunks */
			
 
				 	unsigned chunk;
			
@@ -103,7 +103,7 @@ unsigned starpu_vertical_block_filter_func(starpu_filter *f, data_state *root_da
 
				 			STARPU_MIN(chunk_size, (size_t)ny - (size_t)chunk*chunk_size);
			
 
				 
			
 
				 		starpu_data_handle chunk_handle =
			
 
				-			starpu_data_get_child(root_data, chunk);
			
 
				+			starpu_data_get_child(root_handle, chunk);
			
 
				 
			
 
				 		unsigned node;
			
 
				 		for (node = 0; node < MAXNODES; node++)
			
@@ -115,9 +115,9 @@ unsigned starpu_vertical_block_filter_func(starpu_filter *f, data_state *root_da
 
				 			local->ny = child_ny;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				+			if (root_handle->per_node[node].allocated) {
			
 
				 				starpu_blas_interface_t *local_root =
			
 
				-					starpu_data_get_interface_on_node(root_data, node);
			
 
				+					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 				size_t offset = 
			
 
				 					(size_t)chunk*chunk_size*local_root->ld*elemsize;
			
--- a/src/datawizard/interfaces/blas_interface.c
+++ b/src/datawizard/interfaces/blas_interface.c
@@ -30,12 +30,12 @@
 
				 #include <cuda_runtime.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cublas_async(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cublas_to_ram_async(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_cublas_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_cublas_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 
			
 
				 static const struct copy_data_methods_s blas_copy_data_methods_s = {
			
@@ -54,11 +54,11 @@ static const struct copy_data_methods_s blas_copy_data_methods_s = {
 
				 	.spu_to_spu = NULL
			
 
				 };
			
 
				 
			
 
				-static size_t allocate_blas_buffer_on_node(data_state *state, uint32_t dst_node);
			
 
				+static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
			
 
				 static void liberate_blas_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
			
 
				-static size_t blas_interface_get_size(struct starpu_data_state_t *state);
			
 
				-static uint32_t footprint_blas_interface_crc32(data_state *state, uint32_t hstate);
			
 
				-static void display_blas_interface(data_state *state, FILE *f);
			
 
				+static size_t blas_interface_get_size(starpu_data_handle handle);
			
 
				+static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle, uint32_t hstate);
			
 
				+static void display_blas_interface(starpu_data_handle handle, FILE *f);
			
 
				 #ifdef USE_GORDON
			
 
				 static int convert_blas_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				 #endif
			
@@ -95,21 +95,21 @@ static int convert_blas_to_gordon(starpu_data_interface_t *interface, uint64_t *
 
				 #endif
			
 
				 
			
 
				 /* declare a new data with the BLAS interface */
			
 
				-void starpu_register_blas_data(struct starpu_data_state_t **handle, uint32_t home_node,
			
 
				+void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 			uintptr_t ptr, uint32_t ld, uint32_t nx,
			
 
				 			uint32_t ny, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_data_state_t *state =
			
 
				+	starpu_data_handle handle =
			
 
				 		starpu_data_state_create(sizeof(starpu_blas_interface_t));
			
 
				 
			
 
				-	STARPU_ASSERT(handle);
			
 
				-	*handle = state;
			
 
				+	STARPU_ASSERT(handleptr);
			
 
				+	*handleptr = handle;
			
 
				 
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				 		starpu_blas_interface_t *local_interface =
			
 
				-			starpu_data_get_interface_on_node(state, node);
			
 
				+			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 		if (node == home_node) {
			
 
				 			local_interface->ptr = ptr;
			
@@ -125,25 +125,25 @@ void starpu_register_blas_data(struct starpu_data_state_t **handle, uint32_t hom
 
				 		local_interface->elemsize = elemsize;
			
 
				 	}
			
 
				 
			
 
				-	state->ops = &interface_blas_ops;
			
 
				+	handle->ops = &interface_blas_ops;
			
 
				 
			
 
				-	register_new_data(state, home_node, 0);
			
 
				+	register_new_data(handle, home_node, 0);
			
 
				 }
			
 
				 
			
 
				-static inline uint32_t footprint_blas_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), data_state *state, uint32_t hstate)
			
 
				+static inline uint32_t footprint_blas_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				 	hash = hstate;
			
 
				-	hash = hash_func(starpu_get_blas_nx(state), hash);
			
 
				-	hash = hash_func(starpu_get_blas_ny(state), hash);
			
 
				+	hash = hash_func(starpu_get_blas_nx(handle), hash);
			
 
				+	hash = hash_func(starpu_get_blas_ny(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				-static uint32_t footprint_blas_interface_crc32(data_state *state, uint32_t hstate)
			
 
				+static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				-	return footprint_blas_interface_generic(crc32_be, state, hstate);
			
 
				+	return footprint_blas_interface_generic(crc32_be, handle, hstate);
			
 
				 }
			
 
				 
			
 
				 struct dumped_blas_interface_s {
			
@@ -153,18 +153,18 @@ struct dumped_blas_interface_s {
 
				 	uint32_t ld;
			
 
				 } __attribute__ ((packed));
			
 
				 
			
 
				-static void display_blas_interface(data_state *state, FILE *f)
			
 
				+static void display_blas_interface(starpu_data_handle handle, FILE *f)
			
 
				 {
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	fprintf(f, "%u\t%u\t", interface->nx, interface->ny);
			
 
				 }
			
 
				 
			
 
				-static size_t blas_interface_get_size(struct starpu_data_state_t *state)
			
 
				+static size_t blas_interface_get_size(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	size_t size;
			
 
				 	size = (size_t)interface->nx*interface->ny*interface->elemsize; 
			
@@ -173,52 +173,52 @@ static size_t blas_interface_get_size(struct starpu_data_state_t *state)
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_blas_nx(data_state *state)
			
 
				+uint32_t starpu_get_blas_nx(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nx;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_blas_ny(data_state *state)
			
 
				+uint32_t starpu_get_blas_ny(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->ny;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_blas_local_ld(data_state *state)
			
 
				+uint32_t starpu_get_blas_local_ld(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->ld;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_blas_local_ptr(data_state *state)
			
 
				+uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->ptr;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_blas_elemsize(data_state *state)
			
 
				+size_t starpu_get_blas_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->elemsize;
			
 
				 }
			
@@ -226,7 +226,7 @@ size_t starpu_get_blas_elemsize(data_state *state)
 
				 /* memory allocation/deallocation primitives for the BLAS interface */
			
 
				 
			
 
				 /* returns the size of the allocated area */
			
 
				-static size_t allocate_blas_buffer_on_node(data_state *state, uint32_t dst_node)
			
 
				+static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
			
 
				 {
			
 
				 	uintptr_t addr = 0;
			
 
				 	unsigned fail = 0;
			
@@ -238,7 +238,7 @@ static size_t allocate_blas_buffer_on_node(data_state *state, uint32_t dst_node)
 
				 #endif
			
 
				 
			
 
				 	starpu_blas_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, dst_node);
			
 
				+		starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nx = interface->nx;
			
 
				 	uint32_t ny = interface->ny;
			
@@ -313,13 +313,13 @@ static void liberate_blas_buffer_on_node(starpu_data_interface_t *interface, uin
 
				 }
			
 
				 
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_blas_interface_t *src_blas;
			
 
				 	starpu_blas_interface_t *dst_blas;
			
 
				 
			
 
				-	src_blas = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_blas = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_blas = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	size_t elemsize = src_blas->elemsize;
			
 
				 
			
@@ -335,13 +335,13 @@ static int copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_blas_interface_t *src_blas;
			
 
				 	starpu_blas_interface_t *dst_blas;
			
 
				 
			
 
				-	src_blas = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_blas = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_blas = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	size_t elemsize = src_blas->elemsize;
			
 
				 
			
@@ -361,13 +361,13 @@ static int copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cublas_to_ram_async(data_state *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cublas_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				 	starpu_blas_interface_t *src_blas;
			
 
				 	starpu_blas_interface_t *dst_blas;
			
 
				 
			
 
				-	src_blas = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_blas = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_blas = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	size_t elemsize = src_blas->elemsize;
			
 
				 
			
@@ -399,13 +399,13 @@ static int copy_cublas_to_ram_async(data_state *state, uint32_t src_node, uint32
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas_async(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cublas_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				 	starpu_blas_interface_t *src_blas;
			
 
				 	starpu_blas_interface_t *dst_blas;
			
 
				 
			
 
				-	src_blas = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_blas = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_blas = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	size_t elemsize = src_blas->elemsize;
			
 
				 
			
@@ -435,13 +435,13 @@ static int copy_ram_to_cublas_async(struct starpu_data_state_t *state, uint32_t
 
				 #endif // USE_CUDA
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_blas_interface_t *src_blas;
			
 
				 	starpu_blas_interface_t *dst_blas;
			
 
				 
			
 
				-	src_blas = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_blas = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_blas = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	unsigned y;
			
 
				 	uint32_t nx = dst_blas->nx;
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -23,12 +23,12 @@
 
				 
			
 
				 #include <common/hash.h>
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cublas_async(data_state *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cublas_to_ram_async(data_state *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_cublas_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_cublas_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 
			
 
				 static const struct copy_data_methods_s block_copy_data_methods_s = {
			
@@ -48,11 +48,11 @@ static const struct copy_data_methods_s block_copy_data_methods_s = {
 
				 };
			
 
				 
			
 
				 
			
 
				-static size_t allocate_block_buffer_on_node(data_state *state, uint32_t dst_node);
			
 
				+static size_t allocate_block_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
			
 
				 static void liberate_block_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
			
 
				-static size_t block_interface_get_size(struct starpu_data_state_t *state);
			
 
				-static uint32_t footprint_block_interface_crc32(data_state *state, uint32_t hstate);
			
 
				-static void display_block_interface(data_state *state, FILE *f);
			
 
				+static size_t block_interface_get_size(starpu_data_handle handle);
			
 
				+static uint32_t footprint_block_interface_crc32(starpu_data_handle handle, uint32_t hstate);
			
 
				+static void display_block_interface(starpu_data_handle handle, FILE *f);
			
 
				 #ifdef USE_GORDON
			
 
				 static int convert_block_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				 #endif
			
@@ -81,21 +81,21 @@ int convert_block_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, g
 
				 #endif
			
 
				 
			
 
				 /* declare a new data with the BLAS interface */
			
 
				-void starpu_register_block_data(struct starpu_data_state_t **handle, uint32_t home_node,
			
 
				+void starpu_register_block_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
			
 
				 			uint32_t ny, uint32_t nz, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_data_state_t *state =
			
 
				+	starpu_data_handle handle =
			
 
				 		starpu_data_state_create(sizeof(starpu_block_interface_t));
			
 
				 
			
 
				-	STARPU_ASSERT(handle);
			
 
				-	*handle = state;
			
 
				+	STARPU_ASSERT(handleptr);
			
 
				+	*handleptr = handle;
			
 
				 
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				 		starpu_block_interface_t *local_interface =
			
 
				-			starpu_data_get_interface_on_node(state, node);
			
 
				+			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 		if (node == home_node) {
			
 
				 			local_interface->ptr = ptr;
			
@@ -114,26 +114,26 @@ void starpu_register_block_data(struct starpu_data_state_t **handle, uint32_t ho
 
				 		local_interface->elemsize = elemsize;
			
 
				 	}
			
 
				 
			
 
				-	state->ops = &interface_block_ops;
			
 
				+	handle->ops = &interface_block_ops;
			
 
				 
			
 
				-	register_new_data(state, home_node, 0);
			
 
				+	register_new_data(handle, home_node, 0);
			
 
				 }
			
 
				 
			
 
				-static inline uint32_t footprint_block_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), data_state *state, uint32_t hstate)
			
 
				+static inline uint32_t footprint_block_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				 	hash = hstate;
			
 
				-	hash = hash_func(starpu_get_block_nx(state), hash);
			
 
				-	hash = hash_func(starpu_get_block_ny(state), hash);
			
 
				-	hash = hash_func(starpu_get_block_nz(state), hash);
			
 
				+	hash = hash_func(starpu_get_block_nx(handle), hash);
			
 
				+	hash = hash_func(starpu_get_block_ny(handle), hash);
			
 
				+	hash = hash_func(starpu_get_block_nz(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				-static uint32_t footprint_block_interface_crc32(data_state *state, uint32_t hstate)
			
 
				+static uint32_t footprint_block_interface_crc32(starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				-	return footprint_block_interface_generic(crc32_be, state, hstate);
			
 
				+	return footprint_block_interface_generic(crc32_be, handle, hstate);
			
 
				 }
			
 
				 
			
 
				 struct dumped_block_interface_s {
			
@@ -145,21 +145,21 @@ struct dumped_block_interface_s {
 
				 	uint32_t ldz;
			
 
				 } __attribute__ ((packed));
			
 
				 
			
 
				-static void display_block_interface(data_state *state, FILE *f)
			
 
				+static void display_block_interface(starpu_data_handle handle, FILE *f)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface;
			
 
				 
			
 
				-	interface = starpu_data_get_interface_on_node(state, 0);
			
 
				+	interface = starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	fprintf(f, "%u\t%u\t%u\t", interface->nx, interface->ny, interface->nz);
			
 
				 }
			
 
				 
			
 
				-static size_t block_interface_get_size(struct starpu_data_state_t *state)
			
 
				+static size_t block_interface_get_size(starpu_data_handle handle)
			
 
				 {
			
 
				 	size_t size;
			
 
				 	starpu_block_interface_t *interface;
			
 
				 
			
 
				-	interface = starpu_data_get_interface_on_node(state, 0);
			
 
				+	interface = starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	size = interface->nx*interface->ny*interface->nz*interface->elemsize; 
			
 
				 
			
@@ -167,73 +167,73 @@ static size_t block_interface_get_size(struct starpu_data_state_t *state)
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_block_nx(data_state *state)
			
 
				+uint32_t starpu_get_block_nx(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nx;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_ny(data_state *state)
			
 
				+uint32_t starpu_get_block_ny(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->ny;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_nz(data_state *state)
			
 
				+uint32_t starpu_get_block_nz(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nz;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_local_ldy(data_state *state)
			
 
				+uint32_t starpu_get_block_local_ldy(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 	
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->ldy;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_block_local_ldz(data_state *state)
			
 
				+uint32_t starpu_get_block_local_ldz(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->ldz;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_block_local_ptr(data_state *state)
			
 
				+uintptr_t starpu_get_block_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->ptr;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_block_elemsize(data_state *state)
			
 
				+size_t starpu_get_block_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_block_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->elemsize;
			
 
				 }
			
@@ -242,7 +242,7 @@ size_t starpu_get_block_elemsize(data_state *state)
 
				 /* memory allocation/deallocation primitives for the BLOCK interface */
			
 
				 
			
 
				 /* returns the size of the allocated area */
			
 
				-static size_t allocate_block_buffer_on_node(data_state *state, uint32_t dst_node)
			
 
				+static size_t allocate_block_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
			
 
				 {
			
 
				 	uintptr_t addr = 0;
			
 
				 	unsigned fail = 0;
			
@@ -252,7 +252,7 @@ static size_t allocate_block_buffer_on_node(data_state *state, uint32_t dst_node
 
				 	cudaError_t status;
			
 
				 #endif
			
 
				 	starpu_block_interface_t *dst_block =
			
 
				-		starpu_data_get_interface_on_node(state, dst_node);
			
 
				+		starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nx = dst_block->nx;
			
 
				 	uint32_t ny = dst_block->ny;
			
@@ -329,13 +329,13 @@ static void liberate_block_buffer_on_node(starpu_data_interface_t *interface, ui
 
				 }
			
 
				 
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_block_interface_t *src_block;
			
 
				 	starpu_block_interface_t *dst_block;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	//fprintf(stderr, "COPY BLOCK -> RAM nx %d ny %d nz %d SRC ldy %d DST ldy %d\n", src_block->nx,  src_block->ny,  src_block->nz,  src_block->ldy, dst_block->ldy);
			
 
				 
			
@@ -369,13 +369,13 @@ static int copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cublas_to_ram_async(data_state *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cublas_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				 	starpu_block_interface_t *src_block;
			
 
				 	starpu_block_interface_t *dst_block;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nx = src_block->nx;
			
 
				 	uint32_t ny = src_block->ny;
			
@@ -489,13 +489,13 @@ no_async_default:
 
				 
			
 
				 
			
 
				 
			
 
				-static int copy_ram_to_cublas_async(data_state *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cublas_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				 	starpu_block_interface_t *src_block;
			
 
				 	starpu_block_interface_t *dst_block;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nx = src_block->nx;
			
 
				 	uint32_t ny = src_block->ny;
			
@@ -607,13 +607,13 @@ no_async_default:
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_block_interface_t *src_block;
			
 
				 	starpu_block_interface_t *dst_block;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	if ((src_block->nx == src_block->ldy) && (src_block->ldy == dst_block->ldy))
			
 
				 	{
			
@@ -645,13 +645,13 @@ static int copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst
 
				 #endif // USE_CUDA
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_block_interface_t *src_block;
			
 
				 	starpu_block_interface_t *dst_block;
			
 
				 
			
 
				-	src_block = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_block = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_block = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_block = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nx = dst_block->nx;
			
 
				 	uint32_t ny = dst_block->ny;
			
--- a/src/datawizard/interfaces/csr_filters.c
+++ b/src/datawizard/interfaces/csr_filters.c
@@ -18,13 +18,13 @@
 
				 #include <common/config.h>
			
 
				 #include <datawizard/hierarchy.h>
			
 
				 
			
 
				-unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, data_state *root_data)
			
 
				+unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	unsigned nchunks;
			
 
				 	uint32_t arg = f->filter_arg;
			
 
				 
			
 
				 	starpu_csr_interface_t *root_interface =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nrow = root_interface->nrow;
			
 
				 	size_t elemsize = root_interface->elemsize;
			
@@ -34,13 +34,13 @@ unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, data_state *roo
 
				 	nchunks = STARPU_MIN(nrow, arg);
			
 
				 	
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, nchunks, sizeof(starpu_csr_interface_t));
			
 
				+	starpu_data_create_children(root_handle, nchunks, sizeof(starpu_csr_interface_t));
			
 
				 
			
 
				 	/* actually create all the chunks */
			
 
				 	uint32_t chunk_size = (nrow + nchunks - 1)/nchunks;
			
 
				 
			
 
				 	/* XXX */
			
 
				-	STARPU_ASSERT(root_data->per_node[0].allocated);
			
 
				+	STARPU_ASSERT(root_handle->per_node[0].allocated);
			
 
				 	uint32_t *rowptr = root_interface->rowptr;
			
 
				 
			
 
				 	unsigned chunk;
			
@@ -55,7 +55,7 @@ unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, data_state *roo
 
				 		uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index]; 
			
 
				 
			
 
				 		starpu_data_handle chunk_handle =
			
 
				-			starpu_data_get_child(root_data, chunk);
			
 
				+			starpu_data_get_child(root_handle, chunk);
			
 
				 
			
 
				 		unsigned node;
			
 
				 		for (node = 0; node < MAXNODES; node++)
			
@@ -68,7 +68,7 @@ unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, data_state *roo
 
				 			local->firstentry = local_firstentry;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				+			if (root_handle->per_node[node].allocated) {
			
 
				 				local->rowptr = &local->rowptr[first_index];
			
 
				 				local->colind = &local->colind[local_firstentry];
			
 
				 				float *nzval = (float *)(local->nzval);
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -23,10 +23,10 @@
 
				 #include <common/hash.h>
			
 
				 
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				 #endif
			
 
				 
			
 
				 static const struct copy_data_methods_s csr_copy_data_methods_s = {
			
@@ -44,10 +44,10 @@ static const struct copy_data_methods_s csr_copy_data_methods_s = {
 
				 };
			
 
				 
			
 
				 
			
 
				-static size_t allocate_csr_buffer_on_node(struct starpu_data_state_t *state, uint32_t dst_node);
			
 
				+static size_t allocate_csr_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
			
 
				 static void liberate_csr_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
			
 
				-static size_t csr_interface_get_size(struct starpu_data_state_t *state);
			
 
				-static uint32_t footprint_csr_interface_crc32(data_state *state, uint32_t hstate);
			
 
				+static size_t csr_interface_get_size(starpu_data_handle handle);
			
 
				+static uint32_t footprint_csr_interface_crc32(starpu_data_handle handle, uint32_t hstate);
			
 
				 
			
 
				 struct data_interface_ops_t interface_csr_ops = {
			
 
				 	.allocate_data_on_node = allocate_csr_buffer_on_node,
			
@@ -59,20 +59,20 @@ struct data_interface_ops_t interface_csr_ops = {
 
				 };
			
 
				 
			
 
				 /* declare a new data with the BLAS interface */
			
 
				-void starpu_register_csr_data(struct starpu_data_state_t **handle, uint32_t home_node,
			
 
				+void starpu_register_csr_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_data_state_t *state =
			
 
				+	starpu_data_handle handle =
			
 
				 		starpu_data_state_create(sizeof(starpu_csr_interface_t));	
			
 
				 
			
 
				-	STARPU_ASSERT(handle);
			
 
				-	*handle = state;
			
 
				+	STARPU_ASSERT(handleptr);
			
 
				+	*handleptr = handle;
			
 
				 
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				 		starpu_csr_interface_t *local_interface =
			
 
				-			starpu_data_get_interface_on_node(state, node);
			
 
				+			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 		if (node == home_node) {
			
 
				 			local_interface->nzval = nzval;
			
@@ -92,28 +92,26 @@ void starpu_register_csr_data(struct starpu_data_state_t **handle, uint32_t home
 
				 
			
 
				 	}
			
 
				 
			
 
				-	state->ops = &interface_csr_ops;
			
 
				+	handle->ops = &interface_csr_ops;
			
 
				 
			
 
				-	register_new_data(state, home_node, 0);
			
 
				+	register_new_data(handle, home_node, 0);
			
 
				 }
			
 
				 
			
 
				-static inline uint32_t footprint_csr_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), data_state *state, uint32_t hstate)
			
 
				+static inline uint32_t footprint_csr_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				 	hash = hstate;
			
 
				-	hash = hash_func(starpu_get_csr_nnz(state), hash);
			
 
				+	hash = hash_func(starpu_get_csr_nnz(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				-static uint32_t footprint_csr_interface_crc32(data_state *state, uint32_t hstate)
			
 
				+static uint32_t footprint_csr_interface_crc32(starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				-	return footprint_csr_interface_generic(crc32_be, state, hstate);
			
 
				+	return footprint_csr_interface_generic(crc32_be, handle, hstate);
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				 struct dumped_csr_interface_s {
			
 
				 	uint32_t nnz;
			
 
				 	uint32_t nrow;
			
@@ -125,84 +123,84 @@ struct dumped_csr_interface_s {
 
				 }  __attribute__ ((packed));
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_csr_nnz(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_csr_nnz(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nnz;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_csr_nrow(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_csr_nrow(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nrow;
			
 
				 }
			
 
				 
			
 
				-uint32_t starpu_get_csr_firstentry(struct starpu_data_state_t *state)
			
 
				+uint32_t starpu_get_csr_firstentry(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->firstentry;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_csr_elemsize(struct starpu_data_state_t *state)
			
 
				+size_t starpu_get_csr_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->elemsize;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_csr_local_nzval(struct starpu_data_state_t *state)
			
 
				+uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->nzval;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_csr_local_colind(struct starpu_data_state_t *state)
			
 
				+uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->colind;
			
 
				 }
			
 
				 
			
 
				-uint32_t *starpu_get_csr_local_rowptr(struct starpu_data_state_t *state)
			
 
				+uint32_t *starpu_get_csr_local_rowptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->rowptr;
			
 
				 }
			
 
				 
			
 
				-static size_t csr_interface_get_size(struct starpu_data_state_t *state)
			
 
				+static size_t csr_interface_get_size(starpu_data_handle handle)
			
 
				 {
			
 
				 	size_t size;
			
 
				 
			
 
				-	uint32_t nnz = starpu_get_csr_nnz(state);
			
 
				-	uint32_t nrow = starpu_get_csr_nrow(state);
			
 
				-	size_t elemsize = starpu_get_csr_elemsize(state);
			
 
				+	uint32_t nnz = starpu_get_csr_nnz(handle);
			
 
				+	uint32_t nrow = starpu_get_csr_nrow(handle);
			
 
				+	size_t elemsize = starpu_get_csr_elemsize(handle);
			
 
				 
			
 
				 	size = nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
			
 
				 
			
@@ -212,7 +210,7 @@ static size_t csr_interface_get_size(struct starpu_data_state_t *state)
 
				 /* memory allocation/deallocation primitives for the BLAS interface */
			
 
				 
			
 
				 /* returns the size of the allocated area */
			
 
				-static size_t allocate_csr_buffer_on_node(struct starpu_data_state_t *state, uint32_t dst_node)
			
 
				+static size_t allocate_csr_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
			
 
				 {
			
 
				 	uintptr_t addr_nzval;
			
 
				 	uint32_t *addr_colind, *addr_rowptr;
			
@@ -220,7 +218,7 @@ static size_t allocate_csr_buffer_on_node(struct starpu_data_state_t *state, uin
 
				 
			
 
				 	/* we need the 3 arrays to be allocated */
			
 
				 	starpu_csr_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, dst_node);
			
 
				+		starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = interface->nnz;
			
 
				 	uint32_t nrow = interface->nrow;
			
@@ -330,13 +328,13 @@ static void liberate_csr_buffer_on_node(starpu_data_interface_t *interface, uint
 
				 }
			
 
				 
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_csr_interface_t *src_csr;
			
 
				 	starpu_csr_interface_t *dst_csr;
			
 
				 
			
 
				-	src_csr = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
@@ -356,13 +354,13 @@ static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_no
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_csr_interface_t *src_csr;
			
 
				 	starpu_csr_interface_t *dst_csr;
			
 
				 
			
 
				-	src_csr = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
@@ -384,14 +382,14 @@ static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_no
 
				 #endif // USE_CUDA
			
 
				 
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				-static int dummy_copy_ram_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 
			
 
				 	starpu_csr_interface_t *src_csr;
			
 
				 	starpu_csr_interface_t *dst_csr;
			
 
				 
			
 
				-	src_csr = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_csr = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_csr = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_csr = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nnz = src_csr->nnz;
			
 
				 	uint32_t nrow = src_csr->nrow;
			
--- a/src/datawizard/interfaces/vector_filters.c
+++ b/src/datawizard/interfaces/vector_filters.c
@@ -18,13 +18,13 @@
 
				 #include <common/config.h>
			
 
				 #include <datawizard/hierarchy.h>
			
 
				 
			
 
				-unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data)
			
 
				+unsigned starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	unsigned nchunks;
			
 
				 	uint32_t arg = f->filter_arg;
			
 
				 
			
 
				 	starpu_vector_interface_t *vector_root =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nx = vector_root->nx;
			
 
				 	size_t elemsize = vector_root->elemsize;
			
@@ -33,7 +33,7 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 
				 	nchunks = STARPU_MIN(nx, arg);
			
 
				 
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, nchunks, sizeof(starpu_vector_interface_t));
			
 
				+	starpu_data_create_children(root_handle, nchunks, sizeof(starpu_vector_interface_t));
			
 
				 
			
 
				 	/* actually create all the chunks */
			
 
				 	unsigned chunk;
			
@@ -46,7 +46,7 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 
				 			STARPU_MIN(chunk_size, nx - chunk*chunk_size);
			
 
				 
			
 
				 		starpu_data_handle chunk_handle =
			
 
				-			starpu_data_get_child(root_data, chunk);
			
 
				+			starpu_data_get_child(root_handle, chunk);
			
 
				 
			
 
				 		unsigned node;
			
 
				 		for (node = 0; node < MAXNODES; node++)
			
@@ -57,9 +57,9 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 
				 			local->nx = child_nx;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				+			if (root_handle->per_node[node].allocated) {
			
 
				 				starpu_vector_interface_t *local_root =
			
 
				-					starpu_data_get_interface_on_node(root_data, node);
			
 
				+					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 				local->ptr = local_root->ptr + offset;
			
 
				 			}
			
@@ -70,23 +70,23 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 
				 }
			
 
				 
			
 
				 
			
 
				-unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *root_data)
			
 
				+unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	uint32_t length_first = f->filter_arg;
			
 
				 
			
 
				 	starpu_vector_interface_t *vector_root =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nx = vector_root->nx;
			
 
				 	size_t elemsize = vector_root->elemsize;
			
 
				 
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, 2, sizeof(starpu_vector_interface_t));
			
 
				+	starpu_data_create_children(root_handle, 2, sizeof(starpu_vector_interface_t));
			
 
				 
			
 
				 	STARPU_ASSERT(length_first < nx);
			
 
				 
			
 
				 	starpu_data_handle chunk0_handle =
			
 
				-		starpu_data_get_child(root_data, 0);
			
 
				+		starpu_data_get_child(root_handle, 0);
			
 
				 
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
@@ -97,16 +97,16 @@ unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *roo
 
				 		local->nx = length_first;
			
 
				 		local->elemsize = elemsize;
			
 
				 
			
 
				-		if (root_data->per_node[node].allocated) {
			
 
				+		if (root_handle->per_node[node].allocated) {
			
 
				 			starpu_vector_interface_t *local_root =
			
 
				-				starpu_data_get_interface_on_node(root_data, node);
			
 
				+				starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 			local->ptr = local_root->ptr;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	starpu_data_handle chunk1_handle =
			
 
				-		starpu_data_get_child(root_data, 1);
			
 
				+		starpu_data_get_child(root_handle, 1);
			
 
				 
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
@@ -116,9 +116,9 @@ unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *roo
 
				 		local->nx = nx - length_first;
			
 
				 		local->elemsize = elemsize;
			
 
				 
			
 
				-		if (root_data->per_node[node].allocated) {
			
 
				+		if (root_handle->per_node[node].allocated) {
			
 
				 			starpu_vector_interface_t *local_root =
			
 
				-				starpu_data_get_interface_on_node(root_data, node);
			
 
				+				starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 			local->ptr = local_root->ptr + length_first*elemsize;
			
 
				 		}
			
@@ -127,19 +127,19 @@ unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *roo
 
				 	return 2;
			
 
				 }
			
 
				 
			
 
				-unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
			
 
				+unsigned starpu_list_filter_func_vector(starpu_filter *f, starpu_data_handle root_handle)
			
 
				 {
			
 
				 	uint32_t nchunks = f->filter_arg;
			
 
				 	uint32_t *length_tab = f->filter_arg_ptr;
			
 
				 
			
 
				 	starpu_vector_interface_t *vector_root =
			
 
				-		starpu_data_get_interface_on_node(root_data, 0);
			
 
				+		starpu_data_get_interface_on_node(root_handle, 0);
			
 
				 
			
 
				 	uint32_t nx = vector_root->nx;
			
 
				 	size_t elemsize = vector_root->elemsize;
			
 
				 
			
 
				 	/* first allocate the children data_state */
			
 
				-	starpu_data_create_children(root_data, nchunks, sizeof(starpu_vector_interface_t));
			
 
				+	starpu_data_create_children(root_handle, nchunks, sizeof(starpu_vector_interface_t));
			
 
				 
			
 
				 	unsigned current_pos = 0;
			
 
				 
			
@@ -148,7 +148,7 @@ unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
 
				 	for (chunk = 0; chunk < nchunks; chunk++)
			
 
				 	{
			
 
				 		starpu_data_handle chunk_handle =
			
 
				-			starpu_data_get_child(root_data, chunk);
			
 
				+			starpu_data_get_child(root_handle, chunk);
			
 
				 
			
 
				 		uint32_t chunk_size = length_tab[chunk];
			
 
				 
			
@@ -161,9 +161,9 @@ unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
 
				 			local->nx = chunk_size;
			
 
				 			local->elemsize = elemsize;
			
 
				 
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				+			if (root_handle->per_node[node].allocated) {
			
 
				 				starpu_vector_interface_t *local_root =
			
 
				-					starpu_data_get_interface_on_node(root_data, node);
			
 
				+					starpu_data_get_interface_on_node(root_handle, node);
			
 
				 
			
 
				 				local->ptr = local_root->ptr + current_pos*elemsize;
			
 
				 			}
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -28,12 +28,12 @@
 
				 #include <cuda.h>
			
 
				 #endif
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_ram_to_cublas(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_cublas_to_ram(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node);
			
 
				-static int copy_ram_to_cublas_async(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				-static int copy_cublas_to_ram_async(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node);
			
 
				+static int copy_ram_to_cublas_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				+static int copy_cublas_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
			
 
				 #endif
			
 
				 
			
 
				 static const struct copy_data_methods_s vector_copy_data_methods_s = {
			
@@ -52,11 +52,11 @@ static const struct copy_data_methods_s vector_copy_data_methods_s = {
 
				 	.spu_to_spu = NULL
			
 
				 };
			
 
				 
			
 
				-static size_t allocate_vector_buffer_on_node(data_state *state, uint32_t dst_node);
			
 
				+static size_t allocate_vector_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
			
 
				 static void liberate_vector_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
			
 
				-static size_t vector_interface_get_size(struct starpu_data_state_t *state);
			
 
				-static uint32_t footprint_vector_interface_crc32(data_state *state, uint32_t hstate);
			
 
				-static void display_vector_interface(data_state *state, FILE *f);
			
 
				+static size_t vector_interface_get_size(starpu_data_handle handle);
			
 
				+static uint32_t footprint_vector_interface_crc32(starpu_data_handle handle, uint32_t hstate);
			
 
				+static void display_vector_interface(starpu_data_handle handle, FILE *f);
			
 
				 #ifdef USE_GORDON
			
 
				 static int convert_vector_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				 #endif
			
@@ -84,21 +84,21 @@ int convert_vector_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-/* declare a new data with the BLAS interface */
			
 
				-void starpu_register_vector_data(struct starpu_data_state_t **handle, uint32_t home_node,
			
 
				+/* declare a new data with the vector interface */
			
 
				+void starpu_register_vector_data(starpu_data_handle *handleptr, uint32_t home_node,
			
 
				                         uintptr_t ptr, uint32_t nx, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_data_state_t *state =
			
 
				+	starpu_data_handle handle =
			
 
				 		starpu_data_state_create(sizeof(starpu_vector_interface_t));
			
 
				 
			
 
				-	STARPU_ASSERT(handle);
			
 
				-	*handle = state;
			
 
				+	STARPU_ASSERT(handleptr);
			
 
				+	*handleptr = handle;
			
 
				 
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < MAXNODES; node++)
			
 
				 	{
			
 
				 		starpu_vector_interface_t *local_interface = 
			
 
				-			starpu_data_get_interface_on_node(state, node);
			
 
				+			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 		if (node == home_node) {
			
 
				 			local_interface->ptr = ptr;
			
@@ -111,25 +111,25 @@ void starpu_register_vector_data(struct starpu_data_state_t **handle, uint32_t h
 
				 		local_interface->elemsize = elemsize;
			
 
				 	}
			
 
				 
			
 
				-	state->ops = &interface_vector_ops;
			
 
				+	handle->ops = &interface_vector_ops;
			
 
				 
			
 
				-	register_new_data(state, home_node, 0);
			
 
				+	register_new_data(handle, home_node, 0);
			
 
				 }
			
 
				 
			
 
				 
			
 
				-static inline uint32_t footprint_vector_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), data_state *state, uint32_t hstate)
			
 
				+static inline uint32_t footprint_vector_interface_generic(uint32_t (*hash_func)(uint32_t input, uint32_t hstate), starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				 	uint32_t hash;
			
 
				 
			
 
				 	hash = hstate;
			
 
				-	hash = hash_func(starpu_get_vector_nx(state), hash);
			
 
				+	hash = hash_func(starpu_get_vector_nx(handle), hash);
			
 
				 
			
 
				 	return hash;
			
 
				 }
			
 
				 
			
 
				-uint32_t footprint_vector_interface_crc32(data_state *state, uint32_t hstate)
			
 
				+uint32_t footprint_vector_interface_crc32(starpu_data_handle handle, uint32_t hstate)
			
 
				 {
			
 
				-	return footprint_vector_interface_generic(crc32_be, state, hstate);
			
 
				+	return footprint_vector_interface_generic(crc32_be, handle, hstate);
			
 
				 }
			
 
				 
			
 
				 struct dumped_vector_interface_s {
			
@@ -138,19 +138,19 @@ struct dumped_vector_interface_s {
 
				 	uint32_t elemsize;
			
 
				 } __attribute__ ((packed));
			
 
				 
			
 
				-static void display_vector_interface(data_state *state, FILE *f)
			
 
				+static void display_vector_interface(starpu_data_handle handle, FILE *f)
			
 
				 {
			
 
				 	starpu_vector_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	fprintf(f, "%u\t", interface->nx);
			
 
				 }
			
 
				 
			
 
				-static size_t vector_interface_get_size(struct starpu_data_state_t *state)
			
 
				+static size_t vector_interface_get_size(starpu_data_handle handle)
			
 
				 {
			
 
				 	size_t size;
			
 
				 	starpu_vector_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	size = interface->nx*interface->elemsize;
			
 
				 
			
@@ -158,31 +158,31 @@ static size_t vector_interface_get_size(struct starpu_data_state_t *state)
 
				 }
			
 
				 
			
 
				 /* offer an access to the data parameters */
			
 
				-uint32_t starpu_get_vector_nx(data_state *state)
			
 
				+uint32_t starpu_get_vector_nx(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_vector_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->nx;
			
 
				 }
			
 
				 
			
 
				-uintptr_t starpu_get_vector_local_ptr(data_state *state)
			
 
				+uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 	node = get_local_memory_node();
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+	STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 	starpu_vector_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, node);
			
 
				+		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				 	return interface->ptr;
			
 
				 }
			
 
				 
			
 
				-size_t starpu_get_vector_elemsize(data_state *state)
			
 
				+size_t starpu_get_vector_elemsize(starpu_data_handle handle)
			
 
				 {
			
 
				 	starpu_vector_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, 0);
			
 
				+		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	return interface->elemsize;
			
 
				 }
			
@@ -190,10 +190,10 @@ size_t starpu_get_vector_elemsize(data_state *state)
 
				 /* memory allocation/deallocation primitives for the vector interface */
			
 
				 
			
 
				 /* returns the size of the allocated area */
			
 
				-static size_t allocate_vector_buffer_on_node(data_state *state, uint32_t dst_node)
			
 
				+static size_t allocate_vector_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_vector_interface_t *interface =
			
 
				-		starpu_data_get_interface_on_node(state, dst_node);
			
 
				+		starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uintptr_t addr = 0;
			
 
				 	size_t allocated_memory;
			
@@ -248,13 +248,13 @@ void liberate_vector_buffer_on_node(starpu_data_interface_t *interface, uint32_t
 
				 }
			
 
				 
			
 
				 #ifdef USE_CUDA
			
 
				-static int copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_cublas_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_vector_interface_t *src_vector;
			
 
				 	starpu_vector_interface_t *dst_vector;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	cublasGetVector(src_vector->nx, src_vector->elemsize,
			
 
				 		(uint8_t *)src_vector->ptr, 1,
			
@@ -265,13 +265,13 @@ static int copy_cublas_to_ram(data_state *state, uint32_t src_node, uint32_t dst
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int copy_ram_to_cublas(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_vector_interface_t *src_vector;
			
 
				 	starpu_vector_interface_t *dst_vector;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	cublasSetVector(src_vector->nx, src_vector->elemsize,
			
 
				 		(uint8_t *)src_vector->ptr, 1,
			
@@ -282,13 +282,13 @@ static int copy_ram_to_cublas(data_state *state, uint32_t src_node, uint32_t dst
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int copy_cublas_to_ram_async(data_state *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_cublas_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				 	starpu_vector_interface_t *src_vector;
			
 
				 	starpu_vector_interface_t *dst_vector;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	cures = cudaMemcpyAsync((char *)dst_vector->ptr, (char *)src_vector->ptr, src_vector->nx*src_vector->elemsize, cudaMemcpyDeviceToHost, *stream);
			
@@ -309,13 +309,13 @@ static int copy_cublas_to_ram_async(data_state *state, uint32_t src_node, uint32
 
				 	return EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_ram_to_cublas_async(struct starpu_data_state_t *state, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				+static int copy_ram_to_cublas_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
			
 
				 {
			
 
				 	starpu_vector_interface_t *src_vector;
			
 
				 	starpu_vector_interface_t *dst_vector;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 	
			
@@ -340,13 +340,13 @@ static int copy_ram_to_cublas_async(struct starpu_data_state_t *state, uint32_t
 
				 
			
 
				 #endif // USE_CUDA
			
 
				 
			
 
				-static int dummy_copy_ram_to_ram(data_state *state, uint32_t src_node, uint32_t dst_node)
			
 
				+static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
			
 
				 {
			
 
				 	starpu_vector_interface_t *src_vector;
			
 
				 	starpu_vector_interface_t *dst_vector;
			
 
				 
			
 
				-	src_vector = starpu_data_get_interface_on_node(state, src_node);
			
 
				-	dst_vector = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	src_vector = starpu_data_get_interface_on_node(handle, src_node);
			
 
				+	dst_vector = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				 	uint32_t nx = dst_vector->nx;
			
 
				 	size_t elemsize = dst_vector->elemsize;
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -44,57 +44,57 @@ void deinit_mem_chunk_lists(void)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void lock_all_subtree(data_state *data)
			
 
				+static void lock_all_subtree(starpu_data_handle handle)
			
 
				 {
			
 
				-	if (data->nchildren == 0)
			
 
				+	if (handle->nchildren == 0)
			
 
				 	{
			
 
				 		/* this is a leaf */
			
 
				-		while (starpu_spin_trylock(&data->header_lock))
			
 
				+		while (starpu_spin_trylock(&handle->header_lock))
			
 
				 			datawizard_progress(get_local_memory_node(), 0);
			
 
				 	}
			
 
				 	else {
			
 
				 		/* lock all sub-subtrees children */
			
 
				 		int child;
			
 
				-		for (child = 0; child < data->nchildren; child++)
			
 
				+		for (child = 0; child < handle->nchildren; child++)
			
 
				 		{
			
 
				-			lock_all_subtree(&data->children[child]);
			
 
				+			lock_all_subtree(&handle->children[child]);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void unlock_all_subtree(data_state *data)
			
 
				+static void unlock_all_subtree(starpu_data_handle handle)
			
 
				 {
			
 
				-	if (data->nchildren == 0)
			
 
				+	if (handle->nchildren == 0)
			
 
				 	{
			
 
				 		/* this is a leaf */	
			
 
				-		starpu_spin_unlock(&data->header_lock);
			
 
				+		starpu_spin_unlock(&handle->header_lock);
			
 
				 	}
			
 
				 	else {
			
 
				 		/* lock all sub-subtrees children */
			
 
				 		int child;
			
 
				-		for (child = data->nchildren - 1; child >= 0; child--)
			
 
				+		for (child = handle->nchildren - 1; child >= 0; child--)
			
 
				 		{
			
 
				-			unlock_all_subtree(&data->children[child]);
			
 
				+			unlock_all_subtree(&handle->children[child]);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static unsigned may_free_subtree(data_state *data, unsigned node)
			
 
				+static unsigned may_free_subtree(starpu_data_handle handle, unsigned node)
			
 
				 {
			
 
				 	/* we only free if no one refers to the leaf */
			
 
				-	uint32_t refcnt = get_data_refcnt(data, node);
			
 
				+	uint32_t refcnt = get_data_refcnt(handle, node);
			
 
				 	if (refcnt)
			
 
				 		return 0;
			
 
				 	
			
 
				-	if (!data->nchildren)
			
 
				+	if (!handle->nchildren)
			
 
				 		return 1;
			
 
				 	
			
 
				 	/* look into all sub-subtrees children */
			
 
				 	int child;
			
 
				-	for (child = 0; child < data->nchildren; child++)
			
 
				+	for (child = 0; child < handle->nchildren; child++)
			
 
				 	{
			
 
				 		unsigned res;
			
 
				-		res = may_free_subtree(&data->children[child], node);
			
 
				+		res = may_free_subtree(&handle->children[child], node);
			
 
				 		if (!res) return 0;
			
 
				 	}
			
 
				 
			
@@ -118,7 +118,7 @@ static size_t do_free_mem_chunk(mem_chunk_t mc, unsigned node)
 
				 	return size; 
			
 
				 }
			
 
				 
			
 
				-static void transfer_subtree_to_node(data_state *data, unsigned src_node, 
			
 
				+static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_node, 
			
 
				 						unsigned dst_node)
			
 
				 {
			
 
				 	unsigned i;
			
@@ -126,44 +126,44 @@ static void transfer_subtree_to_node(data_state *data, unsigned src_node,
 
				 	unsigned cnt;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (data->nchildren == 0)
			
 
				+	if (handle->nchildren == 0)
			
 
				 	{
			
 
				 		/* this is a leaf */
			
 
				-		switch(data->per_node[src_node].state) {
			
 
				+		switch(handle->per_node[src_node].state) {
			
 
				 		case OWNER:
			
 
				 			/* the local node has the only copy */
			
 
				 			/* the owner is now the destination_node */
			
 
				-			data->per_node[src_node].state = INVALID;
			
 
				-			data->per_node[dst_node].state = OWNER;
			
 
				+			handle->per_node[src_node].state = INVALID;
			
 
				+			handle->per_node[dst_node].state = OWNER;
			
 
				 
			
 
				 #warning we should use requests during memory reclaim
			
 
				 			/* TODO use request !! */
			
 
				-			data->per_node[src_node].refcnt++;
			
 
				-			data->per_node[dst_node].refcnt++;
			
 
				+			handle->per_node[src_node].refcnt++;
			
 
				+			handle->per_node[dst_node].refcnt++;
			
 
				 
			
 
				-			ret = driver_copy_data_1_to_1(data, src_node, dst_node, 0, NULL, 1);
			
 
				+			ret = driver_copy_data_1_to_1(handle, src_node, dst_node, 0, NULL, 1);
			
 
				 			STARPU_ASSERT(ret == 0);
			
 
				 
			
 
				-			data->per_node[src_node].refcnt--;
			
 
				-			data->per_node[dst_node].refcnt--;
			
 
				+			handle->per_node[src_node].refcnt--;
			
 
				+			handle->per_node[dst_node].refcnt--;
			
 
				 
			
 
				 			break;
			
 
				 		case SHARED:
			
 
				 			/* some other node may have the copy */
			
 
				-			data->per_node[src_node].state = INVALID;
			
 
				+			handle->per_node[src_node].state = INVALID;
			
 
				 
			
 
				 			/* count the number of copies */
			
 
				 			cnt = 0;
			
 
				 			for (i = 0; i < MAXNODES; i++)
			
 
				 			{
			
 
				-				if (data->per_node[i].state == SHARED) {
			
 
				+				if (handle->per_node[i].state == SHARED) {
			
 
				 					cnt++; 
			
 
				 					last = i;
			
 
				 				}
			
 
				 			}
			
 
				 
			
 
				 			if (cnt == 1)
			
 
				-				data->per_node[last].state = OWNER;
			
 
				+				handle->per_node[last].state = OWNER;
			
 
				 
			
 
				 			break;
			
 
				 		case INVALID:
			
@@ -177,9 +177,9 @@ static void transfer_subtree_to_node(data_state *data, unsigned src_node,
 
				 	else {
			
 
				 		/* lock all sub-subtrees children */
			
 
				 		int child;
			
 
				-		for (child = 0; child < data->nchildren; child++)
			
 
				+		for (child = 0; child < handle->nchildren; child++)
			
 
				 		{
			
 
				-			transfer_subtree_to_node(&data->children[child],
			
 
				+			transfer_subtree_to_node(&handle->children[child],
			
 
				 							src_node, dst_node);
			
 
				 		}
			
 
				 	}
			
@@ -190,11 +190,11 @@ static size_t try_to_free_mem_chunk(mem_chunk_t mc, unsigned node, unsigned atte
 
				 {
			
 
				 	size_t liberated = 0;
			
 
				 
			
 
				-	data_state *data;
			
 
				+	starpu_data_handle handle;
			
 
				 
			
 
				-	data = mc->data;
			
 
				+	handle = mc->data;
			
 
				 
			
 
				-	STARPU_ASSERT(data);
			
 
				+	STARPU_ASSERT(handle);
			
 
				 
			
 
				 	if (attempts == 0)
			
 
				 	{
			
@@ -204,34 +204,34 @@ static size_t try_to_free_mem_chunk(mem_chunk_t mc, unsigned node, unsigned atte
 
				 	}
			
 
				 
			
 
				 	/* try to lock all the leafs of the subtree */
			
 
				-	lock_all_subtree(data);
			
 
				+	lock_all_subtree(handle);
			
 
				 
			
 
				 	/* check if they are all "free" */
			
 
				-	if (may_free_subtree(data, node))
			
 
				+	if (may_free_subtree(handle, node))
			
 
				 	{
			
 
				-		STARPU_ASSERT(data->per_node[node].refcnt == 0);
			
 
				+		STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				 
			
 
				 		/* in case there was nobody using that buffer, throw it 
			
 
				 		 * away after writing it back to main memory */
			
 
				-		transfer_subtree_to_node(data, node, 0);
			
 
				+		transfer_subtree_to_node(handle, node, 0);
			
 
				 
			
 
				-		STARPU_ASSERT(data->per_node[node].refcnt == 0);
			
 
				+		STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				 
			
 
				 		/* now the actual buffer may be liberated */
			
 
				 		liberated = do_free_mem_chunk(mc, node);
			
 
				 	}
			
 
				 
			
 
				 	/* unlock the leafs */
			
 
				-	unlock_all_subtree(data);
			
 
				+	unlock_all_subtree(handle);
			
 
				 
			
 
				 	return liberated;
			
 
				 }
			
 
				 
			
 
				 #ifdef USE_ALLOCATION_CACHE
			
 
				 /* we assume that mc_rwlock[node] is taken */
			
 
				-static void reuse_mem_chunk(unsigned node, data_state *new_data, mem_chunk_t mc, unsigned is_already_in_mc_list)
			
 
				+static void reuse_mem_chunk(unsigned node, starpu_data_handle new_data, mem_chunk_t mc, unsigned is_already_in_mc_list)
			
 
				 {
			
 
				-	data_state *old_data;
			
 
				+	starpu_data_handle old_data;
			
 
				 	old_data = mc->data;
			
 
				 
			
 
				 	/* we found an appropriate mem chunk: so we get it out
			
@@ -266,13 +266,11 @@ static void reuse_mem_chunk(unsigned node, data_state *new_data, mem_chunk_t mc,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				-static unsigned try_to_reuse_mem_chunk(mem_chunk_t mc, unsigned node, data_state *new_data, unsigned is_already_in_mc_list)
			
 
				+static unsigned try_to_reuse_mem_chunk(mem_chunk_t mc, unsigned node, starpu_data_handle new_data, unsigned is_already_in_mc_list)
			
 
				 {
			
 
				 	unsigned success = 0;
			
 
				 
			
 
				-	data_state *old_data;
			
 
				+	starpu_data_handle old_data;
			
 
				 
			
 
				 	old_data = mc->data;
			
 
				 
			
@@ -302,7 +300,7 @@ static unsigned try_to_reuse_mem_chunk(mem_chunk_t mc, unsigned node, data_state
 
				 
			
 
				 /* this function looks for a memory chunk that matches a given footprint in the
			
 
				  * list of mem chunk that need to be liberated */
			
 
				-static unsigned try_to_find_reusable_mem_chunk(unsigned node, data_state *data, uint32_t footprint)
			
 
				+static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle data, uint32_t footprint)
			
 
				 {
			
 
				 	pthread_rwlock_wrlock(&mc_rwlock[node]);
			
 
				 
			
@@ -414,29 +412,29 @@ static size_t reclaim_memory(uint32_t node, size_t toreclaim __attribute__ ((unu
 
				 	return liberated;
			
 
				 }
			
 
				 
			
 
				-static void register_mem_chunk(data_state *state, uint32_t dst_node, size_t size, unsigned automatically_allocated)
			
 
				+static void register_mem_chunk(starpu_data_handle handle, uint32_t dst_node, size_t size, unsigned automatically_allocated)
			
 
				 {
			
 
				 	int res;
			
 
				 
			
 
				 	mem_chunk_t mc = mem_chunk_new();
			
 
				 
			
 
				-	STARPU_ASSERT(state);
			
 
				-	STARPU_ASSERT(state->ops);
			
 
				+	STARPU_ASSERT(handle);
			
 
				+	STARPU_ASSERT(handle->ops);
			
 
				 
			
 
				-	mc->data = state;
			
 
				+	mc->data = handle;
			
 
				 	mc->size = size;
			
 
				-	mc->footprint = compute_data_footprint(state);
			
 
				-	mc->ops = state->ops;
			
 
				+	mc->footprint = compute_data_footprint(handle);
			
 
				+	mc->ops = handle->ops;
			
 
				 	mc->data_was_deleted = 0;
			
 
				 	mc->automatically_allocated = automatically_allocated;
			
 
				 
			
 
				 	/* the interface was already filled by ops->allocate_data_on_node */
			
 
				-	void *src_interface = starpu_data_get_interface_on_node(state, dst_node);
			
 
				+	void *src_interface = starpu_data_get_interface_on_node(handle, dst_node);
			
 
				 
			
 
				-	mc->interface = malloc(state->interface_size);
			
 
				+	mc->interface = malloc(handle->interface_size);
			
 
				 	STARPU_ASSERT(mc->interface);
			
 
				 
			
 
				-	memcpy(mc->interface, src_interface, state->interface_size);
			
 
				+	memcpy(mc->interface, src_interface, handle->interface_size);
			
 
				 
			
 
				 	res = pthread_rwlock_wrlock(&mc_rwlock[dst_node]);
			
 
				 	STARPU_ASSERT(!res);
			
@@ -447,7 +445,7 @@ static void register_mem_chunk(data_state *state, uint32_t dst_node, size_t size
 
				 	STARPU_ASSERT(!res);
			
 
				 }
			
 
				 
			
 
				-void request_mem_chunk_removal(data_state *state, unsigned node)
			
 
				+void request_mem_chunk_removal(starpu_data_handle handle, unsigned node)
			
 
				 {
			
 
				 	int res;
			
 
				 	res = pthread_rwlock_wrlock(&mc_rwlock[node]);
			
@@ -461,7 +459,7 @@ void request_mem_chunk_removal(data_state *state, unsigned node)
 
				 	{
			
 
				 		next_mc = mem_chunk_list_next(mc);
			
 
				 
			
 
				-		if (mc->data == state) {
			
 
				+		if (mc->data == handle) {
			
 
				 			/* we found the data */
			
 
				 			mc->data_was_deleted = 1;
			
 
				 
			
@@ -490,34 +488,34 @@ static size_t liberate_memory_on_node(mem_chunk_t mc, uint32_t node)
 
				 	STARPU_ASSERT(mc->ops);
			
 
				 	STARPU_ASSERT(mc->ops->liberate_data_on_node);
			
 
				 
			
 
				-	data_state *state = mc->data;
			
 
				+	starpu_data_handle handle = mc->data;
			
 
				 
			
 
				-//	while (starpu_spin_trylock(&state->header_lock))
			
 
				+//	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 //		datawizard_progress(get_local_memory_node());
			
 
				 
			
 
				 #warning can we block here ?
			
 
				-//	starpu_spin_lock(&state->header_lock);
			
 
				+//	starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-	if (mc->automatically_allocated && (state->per_node[node].refcnt == 0))
			
 
				+	if (mc->automatically_allocated && (handle->per_node[node].refcnt == 0))
			
 
				 	{
			
 
				-		STARPU_ASSERT(state->per_node[node].allocated);
			
 
				+		STARPU_ASSERT(handle->per_node[node].allocated);
			
 
				 
			
 
				 		mc->ops->liberate_data_on_node(mc->interface, node);
			
 
				 
			
 
				 		if (!mc->data_was_deleted)
			
 
				 		{
			
 
				-			state->per_node[node].allocated = 0;
			
 
				+			handle->per_node[node].allocated = 0;
			
 
				 
			
 
				 			/* XXX why do we need that ? */
			
 
				-			state->per_node[node].automatically_allocated = 0;
			
 
				+			handle->per_node[node].automatically_allocated = 0;
			
 
				 		}
			
 
				 
			
 
				 		liberated = mc->size;
			
 
				 
			
 
				-		STARPU_ASSERT(state->per_node[node].refcnt == 0);
			
 
				+		STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				 	}
			
 
				 
			
 
				-//	starpu_spin_unlock(&state->header_lock);
			
 
				+//	starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 	return liberated;
			
 
				 }
			
@@ -533,15 +531,15 @@ static size_t liberate_memory_on_node(mem_chunk_t mc, uint32_t node)
 
				  *	not referenced (or part of those).
			
 
				  *
			
 
				  */
			
 
				-int allocate_memory_on_node(data_state *state, uint32_t dst_node, unsigned may_alloc)
			
 
				+int allocate_memory_on_node(starpu_data_handle handle, uint32_t dst_node, unsigned may_alloc)
			
 
				 {
			
 
				 	unsigned attempts = 0;
			
 
				 	size_t allocated_memory;
			
 
				 
			
 
				-	STARPU_ASSERT(state);
			
 
				+	STARPU_ASSERT(handle);
			
 
				 
			
 
				 	/* A buffer is already allocated on the node */
			
 
				-	if (state->per_node[dst_node].allocated)
			
 
				+	if (handle->per_node[dst_node].allocated)
			
 
				 		return 0;
			
 
				 
			
 
				 	if (!may_alloc)
			
@@ -551,10 +549,10 @@ int allocate_memory_on_node(data_state *state, uint32_t dst_node, unsigned may_a
 
				 
			
 
				 #ifdef USE_ALLOCATION_CACHE
			
 
				 	/* perhaps we can directly reuse a buffer in the free-list */
			
 
				-	uint32_t footprint = compute_data_footprint(state);
			
 
				+	uint32_t footprint = compute_data_footprint(handle);
			
 
				 
			
 
				 	TRACE_START_ALLOC_REUSE(dst_node);
			
 
				-	if (try_to_find_reusable_mem_chunk(dst_node, state, footprint))
			
 
				+	if (try_to_find_reusable_mem_chunk(dst_node, handle, footprint))
			
 
				 	{
			
 
				 		allocation_cache_hit(dst_node);
			
 
				 		return 0;
			
@@ -563,18 +561,18 @@ int allocate_memory_on_node(data_state *state, uint32_t dst_node, unsigned may_a
 
				 #endif
			
 
				 
			
 
				 	do {
			
 
				-		STARPU_ASSERT(state->ops);
			
 
				-		STARPU_ASSERT(state->ops->allocate_data_on_node);
			
 
				+		STARPU_ASSERT(handle->ops);
			
 
				+		STARPU_ASSERT(handle->ops->allocate_data_on_node);
			
 
				 
			
 
				 		TRACE_START_ALLOC(dst_node);
			
 
				-		allocated_memory = state->ops->allocate_data_on_node(state, dst_node);
			
 
				+		allocated_memory = handle->ops->allocate_data_on_node(handle, dst_node);
			
 
				 		TRACE_END_ALLOC(dst_node);
			
 
				 
			
 
				 		if (!allocated_memory) {
			
 
				 			/* XXX perhaps we should find the proper granularity 
			
 
				 			 * not to waste our cache all the time */
			
 
				-			STARPU_ASSERT(state->ops->get_size);
			
 
				-			size_t data_size = state->ops->get_size(state);
			
 
				+			STARPU_ASSERT(handle->ops->get_size);
			
 
				+			size_t data_size = handle->ops->get_size(handle);
			
 
				 
			
 
				 			TRACE_START_MEMRECLAIM(dst_node);
			
 
				 			reclaim_memory(dst_node, 2*data_size, attempts);
			
@@ -587,10 +585,10 @@ int allocate_memory_on_node(data_state *state, uint32_t dst_node, unsigned may_a
 
				 	if (!allocated_memory)
			
 
				 		goto nomem;
			
 
				 
			
 
				-	register_mem_chunk(state, dst_node, allocated_memory, 1);
			
 
				+	register_mem_chunk(handle, dst_node, allocated_memory, 1);
			
 
				 
			
 
				-	state->per_node[dst_node].allocated = 1;
			
 
				-	state->per_node[dst_node].automatically_allocated = 1;
			
 
				+	handle->per_node[dst_node].allocated = 1;
			
 
				+	handle->per_node[dst_node].automatically_allocated = 1;
			
 
				 
			
 
				 	return 0;
			
 
				 nomem:
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -20,11 +20,11 @@
 
				 #include <datawizard/write_back.h>
			
 
				 #include <core/dependencies/data-concurrency.h>
			
 
				 
			
 
				-int starpu_request_data_allocation(data_state *state, uint32_t node)
			
 
				+int starpu_request_data_allocation(starpu_data_handle handle, uint32_t node)
			
 
				 {
			
 
				 	data_request_t r;
			
 
				 
			
 
				-	r = create_data_request(state, 0, node, node, 0, 0, 1);
			
 
				+	r = create_data_request(handle, 0, node, node, 0, 0, 1);
			
 
				 
			
 
				 	/* we do not increase the refcnt associated to the request since we are
			
 
				 	 * not waiting for its termination */
			
@@ -35,7 +35,7 @@ int starpu_request_data_allocation(data_state *state, uint32_t node)
 
				 }
			
 
				 
			
 
				 struct state_and_node {
			
 
				-	data_state *state;
			
 
				+	starpu_data_handle state;
			
 
				 	starpu_access_mode mode;
			
 
				 	unsigned node;
			
 
				 	pthread_cond_t cond;
			
@@ -50,12 +50,12 @@ static inline void _starpu_sync_data_with_mem_continuation(void *arg)
 
				 	int ret;
			
 
				 	struct state_and_node *statenode = arg;
			
 
				 
			
 
				-	data_state *state = statenode->state;
			
 
				+	starpu_data_handle handle = statenode->state;
			
 
				 
			
 
				 	unsigned r = (statenode->mode != STARPU_W);
			
 
				 	unsigned w = (statenode->mode != STARPU_R);
			
 
				 
			
 
				-	ret = fetch_data_on_node(state, 0, r, w, 0);
			
 
				+	ret = fetch_data_on_node(handle, 0, r, w, 0);
			
 
				 	STARPU_ASSERT(!ret);
			
 
				 	
			
 
				 	pthread_mutex_lock(&statenode->lock);
			
@@ -65,7 +65,7 @@ static inline void _starpu_sync_data_with_mem_continuation(void *arg)
 
				 }
			
 
				 
			
 
				 /* The data must be released by calling starpu_release_data_from_mem later on */
			
 
				-int starpu_sync_data_with_mem(data_state *state, starpu_access_mode mode)
			
 
				+int starpu_sync_data_with_mem(starpu_data_handle handle, starpu_access_mode mode)
			
 
				 {
			
 
				 	/* it is forbidden to call this function from a callback or a codelet */
			
 
				 	if (STARPU_UNLIKELY(!worker_may_perform_blocking_calls()))
			
@@ -73,7 +73,7 @@ int starpu_sync_data_with_mem(data_state *state, starpu_access_mode mode)
 
				 
			
 
				 	struct state_and_node statenode =
			
 
				 	{
			
 
				-		.state = state,
			
 
				+		.state = handle,
			
 
				 		.mode = mode,
			
 
				 		.node = 0, // unused
			
 
				 		.cond = PTHREAD_COND_INITIALIZER,
			
@@ -84,7 +84,7 @@ int starpu_sync_data_with_mem(data_state *state, starpu_access_mode mode)
 
				 	/* we try to get the data, if we do not succeed immediately, we set a
			
 
				  	* callback function that will be executed automatically when the data is
			
 
				  	* available again, otherwise we fetch the data directly */
			
 
				-	if (!attempt_to_submit_data_request_from_apps(state, mode,
			
 
				+	if (!attempt_to_submit_data_request_from_apps(handle, mode,
			
 
				 			_starpu_sync_data_with_mem_continuation, &statenode))
			
 
				 	{
			
 
				 		/* no one has locked this data yet, so we proceed immediately */
			
@@ -102,10 +102,10 @@ int starpu_sync_data_with_mem(data_state *state, starpu_access_mode mode)
 
				 
			
 
				 /* This function must be called after starpu_sync_data_with_mem so that the
			
 
				  * application release the data */
			
 
				-void starpu_release_data_from_mem(data_state *state)
			
 
				+void starpu_release_data_from_mem(starpu_data_handle handle)
			
 
				 {
			
 
				 	/* The application can now release the rw-lock */
			
 
				-	release_data_on_node(state, 0, 0);
			
 
				+	release_data_on_node(handle, 0, 0);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -130,7 +130,7 @@ static void _prefetch_data_on_node(void *arg)
 
				 
			
 
				 }
			
 
				 
			
 
				-int starpu_prefetch_data_on_node(data_state *state, unsigned node, unsigned async)
			
 
				+int starpu_prefetch_data_on_node(starpu_data_handle handle, unsigned node, unsigned async)
			
 
				 {
			
 
				 	/* it is forbidden to call this function from a callback or a codelet */
			
 
				 	if (STARPU_UNLIKELY(!worker_may_perform_blocking_calls()))
			
@@ -138,7 +138,7 @@ int starpu_prefetch_data_on_node(data_state *state, unsigned node, unsigned asyn
 
				 
			
 
				 	struct state_and_node statenode =
			
 
				 	{
			
 
				-		.state = state,
			
 
				+		.state = handle,
			
 
				 		.node = node,
			
 
				 		.async = async,
			
 
				 		.cond = PTHREAD_COND_INITIALIZER,
			
@@ -146,17 +146,17 @@ int starpu_prefetch_data_on_node(data_state *state, unsigned node, unsigned asyn
 
				 		.finished = 0
			
 
				 	};
			
 
				 
			
 
				-	if (!attempt_to_submit_data_request_from_apps(state, STARPU_R, _prefetch_data_on_node, &statenode))
			
 
				+	if (!attempt_to_submit_data_request_from_apps(handle, STARPU_R, _prefetch_data_on_node, &statenode))
			
 
				 	{
			
 
				 		/* we can immediately proceed */
			
 
				-		fetch_data_on_node(state, node, 1, 0, async);
			
 
				+		fetch_data_on_node(handle, node, 1, 0, async);
			
 
				 
			
 
				 		/* remove the "lock"/reference */
			
 
				 		if (!async)
			
 
				 		{
			
 
				-			starpu_spin_lock(&state->header_lock);
			
 
				-			notify_data_dependencies(state);
			
 
				-			starpu_spin_unlock(&state->header_lock);
			
 
				+			starpu_spin_lock(&handle->header_lock);
			
 
				+			notify_data_dependencies(handle);
			
 
				+			starpu_spin_unlock(&handle->header_lock);
			
 
				 		}
			
 
				 	}
			
 
				 	else {
			
--- a/src/datawizard/write_back.c
+++ b/src/datawizard/write_back.c
@@ -17,7 +17,7 @@
 
				 #include <datawizard/write_back.h>
			
 
				 #include <datawizard/coherency.h>
			
 
				 
			
 
				-void write_through_data(data_state *state, uint32_t requesting_node, 
			
 
				+void write_through_data(starpu_data_handle handle, uint32_t requesting_node, 
			
 
				 					   uint32_t write_through_mask)
			
 
				 {
			
 
				 	if ((write_through_mask & ~(1<<requesting_node)) == 0) {
			
@@ -25,7 +25,7 @@ void write_through_data(data_state *state, uint32_t requesting_node,
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	while (starpu_spin_trylock(&state->header_lock))
			
 
				+	while (starpu_spin_trylock(&handle->header_lock))
			
 
				 		datawizard_progress(requesting_node, 1);
			
 
				 
			
 
				 	/* first commit all changes onto the nodes specified by the mask */
			
@@ -43,10 +43,10 @@ void write_through_data(data_state *state, uint32_t requesting_node,
 
				 
			
 
				 				/* check that there is not already a similar
			
 
				 				 * request that we should reuse */
			
 
				-				r = search_existing_data_request(state, node, 1, 0);
			
 
				+				r = search_existing_data_request(handle, node, 1, 0);
			
 
				 				if (!r) {
			
 
				 					/* there was no existing request so we create one now */
			
 
				-					r = create_data_request(state, requesting_node,
			
 
				+					r = create_data_request(handle, requesting_node,
			
 
				 							node, handling_node, 1, 0, 1);
			
 
				 					post_data_request(r, handling_node);
			
 
				 				}
			
@@ -59,18 +59,18 @@ void write_through_data(data_state *state, uint32_t requesting_node,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	starpu_spin_unlock(&state->header_lock);
			
 
				+	starpu_spin_unlock(&handle->header_lock);
			
 
				 }
			
 
				 
			
 
				-void starpu_data_set_wb_mask(data_state *data, uint32_t wb_mask)
			
 
				+void starpu_data_set_wb_mask(starpu_data_handle handle, uint32_t wb_mask)
			
 
				 {
			
 
				-	data->wb_mask = wb_mask;
			
 
				+	handle->wb_mask = wb_mask;
			
 
				 
			
 
				 	/* in case the data has some children, set their wb_mask as well */
			
 
				-	if (data->nchildren > 0) 
			
 
				+	if (handle->nchildren > 0) 
			
 
				 	{
			
 
				 		int child;
			
 
				-		for (child = 0; child < data->nchildren; child++)
			
 
				-			starpu_data_set_wb_mask(&data->children[child], wb_mask);
			
 
				+		for (child = 0; child < handle->nchildren; child++)
			
 
				+			starpu_data_set_wb_mask(&handle->children[child], wb_mask);
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/write_back.h
+++ b/src/datawizard/write_back.h
@@ -20,7 +20,7 @@
 
				 #include <starpu.h>
			
 
				 #include <datawizard/coherency.h>
			
 
				 
			
 
				-void write_through_data(data_state *state, uint32_t requesting_node, 
			
 
				+void write_through_data(starpu_data_handle handle, uint32_t requesting_node, 
			
 
				 					   uint32_t write_through_mask);
			
 
				 
			
 
				 #endif // __DW_WRITE_BACK_H__