Procházet zdrojové kódy

Revert commit r2772. We actually cannot really merge replicates and mem_chunks
because multiple replicates may point to the same mem_chunk (eg. in the case of
filtered data). This is also much simpler to avoid dealing with dynamically
allocated replicate structures.

Cédric Augonnet před 14 roky
rodič
revize
d20dcb2005

+ 19 - 67
src/datawizard/coherency.c

@@ -56,7 +56,7 @@ uint32_t _starpu_select_src_node(starpu_data_handle handle)
 	uint32_t src_node_mask = 0;
 	for (node = 0; node < nnodes; node++)
 	{
-		if (handle->per_node[node]->state != STARPU_INVALID) {
+		if (handle->per_node[node].state != STARPU_INVALID) {
 			/* we found a copy ! */
 			src_node_mask |= (1<<node);
 		}
@@ -107,7 +107,7 @@ void _starpu_update_data_state(starpu_data_handle handle,
 		/* the requesting node now has the only valid copy */
 		uint32_t node;
 		for (node = 0; node < nnodes; node++)
-			handle->per_node[node]->state = STARPU_INVALID;
+			handle->per_node[node].state = STARPU_INVALID;
 
 		requesting_replicate->state = STARPU_OWNER;
 	}
@@ -118,7 +118,7 @@ void _starpu_update_data_state(starpu_data_handle handle,
 			uint32_t node;
 			for (node = 0; node < nnodes; node++)
 			{
-				struct starpu_data_replicate_s *replicate = handle->per_node[node];
+				struct starpu_data_replicate_s *replicate = &handle->per_node[node];
 				if (replicate->state != STARPU_INVALID)
 					replicate->state = STARPU_SHARED;
 			}
@@ -169,7 +169,7 @@ static starpu_data_request_t create_new_request_to_fetch_data(starpu_data_handle
 	unsigned src_is_a_gpu = (_starpu_get_node_kind(src_node) == STARPU_CUDA_RAM || _starpu_get_node_kind(src_node) == STARPU_OPENCL_RAM);
 	unsigned dst_is_a_gpu = (_starpu_get_node_kind(requesting_node) == STARPU_CUDA_RAM || _starpu_get_node_kind(requesting_node) == STARPU_OPENCL_RAM);
 
-	struct starpu_data_replicate_s *src_replicate = handle->per_node[src_node];
+	struct starpu_data_replicate_s *src_replicate = &handle->per_node[src_node];
 
 	/* we have to perform 2 successive requests for GPU->GPU transfers */
 	if ((mode & STARPU_R) && (src_is_a_gpu && dst_is_a_gpu)) {
@@ -177,7 +177,7 @@ static starpu_data_request_t create_new_request_to_fetch_data(starpu_data_handle
 		starpu_data_request_t r_src_to_ram;
 		starpu_data_request_t r_ram_to_dst;
 
-		struct starpu_data_replicate_s *ram_replicate = handle->per_node[0];
+		struct starpu_data_replicate_s *ram_replicate = &handle->per_node[0];
 
 		/* XXX we hardcore 0 as the RAM node ... */
 		/* We put a 1 in the number of dependencies because this
@@ -326,7 +326,7 @@ static int fetch_data(starpu_data_handle handle, struct starpu_data_replicate_s
 
 inline uint32_t _starpu_get_data_refcnt(starpu_data_handle handle, uint32_t node)
 {
-	return handle->per_node[node]->refcnt;
+	return handle->per_node[node].refcnt;
 }
 
 size_t _starpu_data_get_size(starpu_data_handle handle)
@@ -394,7 +394,7 @@ int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 			continue;
 
-		struct starpu_data_replicate_s *replicate = handle->per_node[node];
+		struct starpu_data_replicate_s *replicate = &handle->per_node[node];
 		prefetch_data_on_node(handle, replicate, mode);
 
 		_starpu_set_data_requested_flag_if_needed(replicate);
@@ -403,33 +403,6 @@ int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
 	return 0;
 }
 
-static struct starpu_data_replicate_s *initialize_data_replicate(starpu_data_handle handle, unsigned local_memory_node)
-{
-	struct starpu_data_replicate_s *replicate;
-
-	replicate = malloc(sizeof(struct starpu_data_replicate_s));
-	STARPU_ASSERT(replicate);
-
-	replicate->memory_node = local_memory_node;
-	replicate->relaxed_coherency = 1;
-
-	size_t interfacesize = handle->ops->interface_size;
-	replicate->interface = malloc(interfacesize);
-	STARPU_ASSERT(replicate->interface);
-	void *ram_interface = handle->per_node[0]->interface;
-	memcpy(replicate->interface, ram_interface, interfacesize);
-	replicate->allocated = 0;
-	replicate->automatically_allocated = 0;
-
-	replicate->state = STARPU_INVALID;
-	replicate->refcnt = 0;
-	replicate->handle = handle;
-	replicate->requested = 0;
-	replicate->request = NULL;
-
-	return replicate;
-}
-
 int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 {
 	STARPU_TRACE_START_FETCH_INPUT(NULL);
@@ -446,44 +419,23 @@ int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 		starpu_data_handle handle = descrs[index].handle;
 		starpu_access_mode mode = descrs[index].mode;
 
-		void *interface;
+		struct starpu_data_replicate_s *local_replicate;
 
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 		{
 			int workerid = starpu_worker_get_id();
-			struct starpu_data_replicate_s *local_replicate;
-
-			while (_starpu_spin_trylock(&handle->header_lock))
-				_starpu_datawizard_progress(local_memory_node, 1);
-
-			local_replicate = handle->per_worker[workerid];
-			if (!local_replicate)
-			{
-				local_replicate = initialize_data_replicate(handle, local_memory_node);
-				handle->per_worker[workerid] = local_replicate;
-			}
-
-			_starpu_spin_unlock(&handle->header_lock);
-
-			ret = fetch_data(handle, local_replicate, mode);
-			if (STARPU_UNLIKELY(ret))
-				goto enomem;
-
-			interface = local_replicate->interface;
+			local_replicate = &handle->per_worker[workerid];
 		}
 		else {
 			/* That's a "normal" buffer (R/W) */
+			local_replicate = &handle->per_node[local_memory_node];
+		}
 
-			struct starpu_data_replicate_s *local_replicate;
-			local_replicate = handle->per_node[local_memory_node];
-			ret = fetch_data(handle, local_replicate, mode);
-			if (STARPU_UNLIKELY(ret))
-				goto enomem;
+		ret = fetch_data(handle, local_replicate, mode);
+		if (STARPU_UNLIKELY(ret))
+			goto enomem;
 
-			interface = starpu_data_get_interface_on_node(handle, local_memory_node);
-		}
-		
-		task->interface[index] = interface;
+		task->interface[index] = local_replicate->interface;
 	}
 
 	STARPU_TRACE_END_FETCH_INPUT(NULL);
@@ -517,12 +469,12 @@ void _starpu_push_task_output(struct starpu_task *task, uint32_t mask)
 		if (mode & STARPU_RW)
 		{
 			unsigned local_node = _starpu_get_local_memory_node();
-			replicate = handle->per_node[local_node];
+			replicate = &handle->per_node[local_node];
 		}
 		else
 		{
 			int workerid = starpu_worker_get_id();
-			replicate = handle->per_worker[workerid];
+			replicate = &handle->per_worker[workerid];
 		}
 
 		_starpu_release_data_on_node(handle, mask, replicate);
@@ -541,8 +493,8 @@ unsigned _starpu_is_data_present_or_requested(starpu_data_handle handle, uint32_
 // XXX : this is just a hint, so we don't take the lock ...
 //	pthread_spin_lock(&handle->header_lock);
 
-	if (handle->per_node[node]->state != STARPU_INVALID 
-		|| handle->per_node[node]->requested || handle->per_node[node]->request)
+	if (handle->per_node[node].state != STARPU_INVALID 
+		|| handle->per_node[node].requested || handle->per_node[node].request)
 		ret = 1;
 
 //	pthread_spin_unlock(&handle->header_lock);

+ 2 - 2
src/datawizard/coherency.h

@@ -105,8 +105,8 @@ struct starpu_data_state_t {
 	unsigned nchildren;
 
 	/* describe the state of the data in term of coherency */
-	struct starpu_data_replicate_s *per_node[STARPU_MAXNODES];
-	struct starpu_data_replicate_s *per_worker[STARPU_NMAXWORKERS];
+	struct starpu_data_replicate_s per_node[STARPU_MAXNODES];
+	struct starpu_data_replicate_s per_worker[STARPU_NMAXWORKERS];
 
 	struct starpu_data_interface_ops_t *ops;
 

+ 36 - 12
src/datawizard/filters.c

@@ -148,9 +148,11 @@ void starpu_data_partition(starpu_data_handle initial_handle, struct starpu_data
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			struct starpu_data_replicate_s *initial_replicate = initial_handle->per_node[node];
+			struct starpu_data_replicate_s *initial_replicate; 
 			struct starpu_data_replicate_s *child_replicate;
-			child_replicate = child->per_node[node];
+
+			initial_replicate = &initial_handle->per_node[node];
+			child_replicate = &child->per_node[node];
 
 			child_replicate->state = initial_replicate->state;
 			child_replicate->allocated = initial_replicate->allocated;
@@ -165,6 +167,25 @@ void starpu_data_partition(starpu_data_handle initial_handle, struct starpu_data
 			f->filter_func(initial_interface, child_interface, f, i, nparts);
 		}
 
+		unsigned worker;
+		for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+		{
+			struct starpu_data_replicate_s *child_replicate;
+			child_replicate = &child->per_worker[worker];
+			
+			child_replicate->state = STARPU_INVALID;
+			child_replicate->allocated = 0;
+			child_replicate->automatically_allocated = 0;
+			child_replicate->refcnt = 0;
+			child_replicate->memory_node = starpu_worker_get_memory_node(worker);
+			child_replicate->requested = 0;
+			child_replicate->request = NULL;
+			child_replicate->relaxed_coherency = 1;
+
+			/* duplicate  the content of the interface on node 0 */
+			memcpy(child_replicate->interface, child->per_node[0].interface, child->ops->interface_size);
+		}
+
 		/* We compute the size and the footprint of the child once and
 		 * store it in the handle */
 		child->data_size = child->ops->get_size(child);
@@ -193,7 +214,7 @@ void starpu_data_unpartition(starpu_data_handle root_handle, uint32_t gathering_
 			starpu_data_unpartition(child_handle, gathering_node);
 
 		int ret;
-		ret = _starpu_fetch_data_on_node(child_handle, child_handle->per_node[gathering_node], STARPU_R, 0, NULL, NULL);
+		ret = _starpu_fetch_data_on_node(child_handle, &child_handle->per_node[gathering_node], STARPU_R, 0, NULL, NULL);
 		/* for now we pretend that the RAM is almost unlimited and that gathering 
 		 * data should be possible from the node that does the unpartionning ... we
 		 * don't want to have the programming deal with memory shortage at that time,
@@ -225,7 +246,7 @@ void starpu_data_unpartition(starpu_data_handle root_handle, uint32_t gathering_
 
 		for (child = 0; child < root_handle->nchildren; child++)
 		{
-			struct starpu_data_replicate_s *local = root_handle->children[child].per_node[node];
+			struct starpu_data_replicate_s *local = &root_handle->children[child].per_node[node];
 
 			if (local->state == STARPU_INVALID) {
 				isvalid = 0; 
@@ -252,7 +273,7 @@ void starpu_data_unpartition(starpu_data_handle root_handle, uint32_t gathering_
 
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		root_handle->per_node[node]->state = 
+		root_handle->per_node[node].state = 
 			still_valid[node]?newstate:STARPU_INVALID;
 	}
 
@@ -270,6 +291,7 @@ static void starpu_data_create_children(starpu_data_handle handle, unsigned nchi
 	STARPU_ASSERT(handle->children);
 
 	unsigned node;
+	unsigned worker;
 	unsigned child;
 
 	for (child = 0; child < nchildren; child++)
@@ -290,15 +312,17 @@ static void starpu_data_create_children(starpu_data_handle handle, unsigned nchi
 
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			handle_child->per_node[node] = calloc(1, sizeof(struct starpu_data_replicate_s));
-			STARPU_ASSERT(handle_child->per_node[node]);
-
 			/* relaxed_coherency = 0 */
+			handle_child->per_node[node].handle = handle_child;
+			handle_child->per_node[node].interface = calloc(1, interfacesize);
+			STARPU_ASSERT(handle_child->per_node[node].interface);
+		}
 
-			handle_child->per_node[node]->handle = handle_child;
-
-			handle_child->per_node[node]->interface = calloc(1, interfacesize);
-			STARPU_ASSERT(handle_child->per_node[node]->interface);
+		for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+		{
+			handle_child->per_worker[worker].handle = handle_child;
+			handle_child->per_worker[worker].interface = calloc(1, interfacesize);
+			STARPU_ASSERT(handle_child->per_node[node].interface);
 		}
 	}
 	

+ 42 - 16
src/datawizard/interfaces/data_interface.c

@@ -78,25 +78,44 @@ static void _starpu_register_new_data(starpu_data_handle handle,
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		struct starpu_data_replicate_s *replicate;
-		replicate = handle->per_node[node];
+		replicate = &handle->per_node[node];
 		
 		replicate->memory_node = node;
+		replicate->relaxed_coherency = 0;
+		replicate->refcnt = 0;
 
 		if (node == home_node) {
 			/* this is the home node with the only valid copy */
 			replicate->state = STARPU_OWNER;
 			replicate->allocated = 1;
 			replicate->automatically_allocated = 0;
-			replicate->refcnt = 0;
 		}
 		else {
 			/* the value is not available here yet */
 			replicate->state = STARPU_INVALID;
 			replicate->allocated = 0;
-			replicate->refcnt = 0;
 		}
 	}
 
+	unsigned worker;
+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+	{
+		struct starpu_data_replicate_s *replicate;
+		replicate = &handle->per_worker[worker];
+		replicate->allocated = 0;
+		replicate->automatically_allocated = 0;
+		replicate->state = STARPU_INVALID;
+		replicate->refcnt = 0;
+		replicate->handle = handle;
+		replicate->requested = 0;
+		replicate->request = NULL;
+		replicate->relaxed_coherency = 1;
+		replicate->memory_node = starpu_worker_get_memory_node(worker);
+
+		/* duplicate  the content of the interface on node 0 */
+		memcpy(replicate->interface, handle->per_node[0].interface, handle->ops->interface_size);
+	} 
+
 	/* now the data is available ! */
 	_starpu_spin_unlock(&handle->header_lock);
 }
@@ -116,10 +135,7 @@ static starpu_data_handle _starpu_data_handle_allocate(struct starpu_data_interf
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
 		struct starpu_data_replicate_s *replicate;
-
-		replicate = calloc(1, sizeof(struct starpu_data_replicate_s));
-		STARPU_ASSERT(replicate);
-		handle->per_node[node] = replicate;
+		replicate = &handle->per_node[node];
 		/* relaxed_coherency = 0 */
 
 		replicate->handle = handle;
@@ -128,6 +144,19 @@ static starpu_data_handle _starpu_data_handle_allocate(struct starpu_data_interf
 		STARPU_ASSERT(replicate->interface);
 	}
 
+	unsigned worker;
+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
+	{
+		struct starpu_data_replicate_s *replicate;
+		replicate = &handle->per_worker[worker];
+
+		replicate->handle = handle;
+
+		replicate->interface = calloc(1, interfacesize);
+		STARPU_ASSERT(replicate->interface);
+
+	}
+
 	return handle;
 }
 
@@ -156,7 +185,7 @@ void _starpu_data_free_interfaces(starpu_data_handle handle)
 {
 	unsigned node;
 	for (node = 0; node < STARPU_MAXNODES; node++)
-		free(handle->per_node[node]->interface);
+		free(handle->per_node[node].interface);
 }
 
 struct unregister_callback_arg {
@@ -176,7 +205,7 @@ static void _starpu_data_unregister_fetch_data_callback(void *_arg)
 
 	STARPU_ASSERT(handle);
 
-	struct starpu_data_replicate_s *replicate = handle->per_node[arg->memory_node];
+	struct starpu_data_replicate_s *replicate = &handle->per_node[arg->memory_node];
 
 	ret = _starpu_fetch_data_on_node(handle, replicate, STARPU_R, 0, NULL, NULL);
 	STARPU_ASSERT(!ret);
@@ -212,7 +241,7 @@ void starpu_data_unregister(starpu_data_handle handle)
 				_starpu_data_unregister_fetch_data_callback, &arg))
 		{
 			/* no one has locked this data yet, so we proceed immediately */
-			struct starpu_data_replicate_s *home_replicate = handle->per_node[home_node];
+			struct starpu_data_replicate_s *home_replicate = &handle->per_node[home_node];
 			int ret = _starpu_fetch_data_on_node(handle, home_replicate, STARPU_R, 0, NULL, NULL);
 			STARPU_ASSERT(!ret);
 		}
@@ -230,15 +259,12 @@ void starpu_data_unregister(starpu_data_handle handle)
 	unsigned node;
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		struct starpu_data_replicate_s *local = handle->per_node[node];
+		struct starpu_data_replicate_s *local = &handle->per_node[node];
 
 		if (local->allocated && local->automatically_allocated){
 			/* free the data copy in a lazy fashion */
 			_starpu_request_mem_chunk_removal(handle, node);
 		}
-		else {
-			free(local);
-		}
 	}
 
 	starpu_data_requester_list_delete(handle->req_list);
@@ -257,7 +283,7 @@ void starpu_data_invalidate(starpu_data_handle handle)
 	unsigned node;
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		struct starpu_data_replicate_s *local = handle->per_node[node];
+		struct starpu_data_replicate_s *local = &handle->per_node[node];
 
 		if (local->allocated && local->automatically_allocated){
 			/* free the data copy in a lazy fashion */
@@ -279,5 +305,5 @@ unsigned starpu_get_handle_interface_id(starpu_data_handle handle)
 
 void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node)
 {
-	return handle->per_node[memory_node]->interface;
+	return handle->per_node[memory_node].interface;
 }

+ 8 - 8
src/datawizard/memalloc.c

@@ -122,8 +122,8 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 
 	if (handle->nchildren == 0)
 	{
-		struct starpu_data_replicate_s *src_replicate = handle->per_node[src_node];
-		struct starpu_data_replicate_s *dst_replicate = handle->per_node[dst_node];
+		struct starpu_data_replicate_s *src_replicate = &handle->per_node[src_node];
+		struct starpu_data_replicate_s *dst_replicate = &handle->per_node[dst_node];
 
 		/* this is a leaf */
 		switch(src_replicate->state) {
@@ -153,14 +153,14 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 			cnt = 0;
 			for (i = 0; i < STARPU_MAXNODES; i++)
 			{
-				if (handle->per_node[i]->state == STARPU_SHARED) {
+				if (handle->per_node[i].state == STARPU_SHARED) {
 					cnt++; 
 					last = i;
 				}
 			}
 
 			if (cnt == 1)
-				handle->per_node[last]->state = STARPU_OWNER;
+				handle->per_node[last].state = STARPU_OWNER;
 
 			break;
 		case STARPU_INVALID:
@@ -285,13 +285,13 @@ static size_t try_to_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
 		/* check if they are all "free" */
 		if (may_free_subtree(handle, node))
 		{
-			STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
+			STARPU_ASSERT(handle->per_node[node].refcnt == 0);
 	
 			/* in case there was nobody using that buffer, throw it 
 			 * away after writing it back to main memory */
 			transfer_subtree_to_node(handle, node, 0);
 	
-			STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
+			STARPU_ASSERT(handle->per_node[node].refcnt == 0);
 	
 			/* now the actual buffer may be freed */
 			freed = do_free_mem_chunk(mc, node);
@@ -439,7 +439,7 @@ starpu_mem_chunk_t _starpu_memchunk_cache_lookup_locked(uint32_t node, starpu_da
 		if (mc->footprint == footprint)
 		{
 			/* Is that a false hit ? (this is _very_ unlikely) */
-			if (_starpu_data_interface_compare(handle->per_node[node]->interface, handle->ops, mc->interface, mc->ops))
+			if (_starpu_data_interface_compare(handle->per_node[node].interface, handle->ops, mc->interface, mc->ops))
 				continue;
 
 			/* Cache hit */
@@ -733,5 +733,5 @@ int _starpu_allocate_memory_on_node(starpu_data_handle handle, struct starpu_dat
 
 unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle handle, uint32_t memory_node)
 {
-	return handle->per_node[memory_node]->allocated;
+	return handle->per_node[memory_node].allocated;
 }

+ 11 - 11
src/datawizard/user_interactions.c

@@ -30,7 +30,7 @@ int starpu_data_request_allocation(starpu_data_handle handle, uint32_t node)
 
 	STARPU_ASSERT(handle);
 
-	r = _starpu_create_data_request(handle, NULL, handle->per_node[node], node, 0, 0, 1);
+	r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, 0, 0, 1);
 
 	/* we do not increase the refcnt associated to the request since we are
 	 * not waiting for its termination */
@@ -82,7 +82,7 @@ static void _starpu_data_acquire_continuation_non_blocking(void *arg)
 
 	STARPU_ASSERT(handle);
 
-	struct starpu_data_replicate_s *ram_replicate = handle->per_node[0];
+	struct starpu_data_replicate_s *ram_replicate = &handle->per_node[0];
 
 	ret = _starpu_fetch_data_on_node(handle, ram_replicate, wrapper->mode, 1,
 			_starpu_data_acquire_fetch_data_callback, wrapper);
@@ -124,7 +124,7 @@ int starpu_data_acquire_cb(starpu_data_handle handle,
 
 #warning TODO instead of having the is_prefetch argument, _starpu_fetch_data shoud consider two flags: async and detached
 	_starpu_spin_lock(&handle->header_lock);
-	handle->per_node[0]->refcnt++;
+	handle->per_node[0].refcnt++;
 	_starpu_spin_unlock(&handle->header_lock);
 
 	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
@@ -174,7 +174,7 @@ static inline void _starpu_data_acquire_continuation(void *arg)
 
 	STARPU_ASSERT(handle);
 
-	struct starpu_data_replicate_s *ram_replicate = handle->per_node[0];
+	struct starpu_data_replicate_s *ram_replicate = &handle->per_node[0];
 
 	_starpu_fetch_data_on_node(handle, ram_replicate, wrapper->mode, 0, NULL, NULL);
 	
@@ -244,7 +244,7 @@ int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode)
 	if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper))
 	{
 		/* no one has locked this data yet, so we proceed immediately */
-		struct starpu_data_replicate_s *ram_replicate = handle->per_node[0];
+		struct starpu_data_replicate_s *ram_replicate = &handle->per_node[0];
 		int ret = _starpu_fetch_data_on_node(handle, ram_replicate, mode, 0, NULL, NULL);
 		STARPU_ASSERT(!ret);
 	}
@@ -272,7 +272,7 @@ void starpu_data_release(starpu_data_handle handle)
 	STARPU_ASSERT(handle);
 
 	/* The application can now release the rw-lock */
-	_starpu_release_data_on_node(handle, 0, handle->per_node[0]);
+	_starpu_release_data_on_node(handle, 0, &handle->per_node[0]);
 
 	/* In case there are some implicit dependencies, unlock the "post sync" tasks */
 	_starpu_unlock_post_sync_tasks(handle);
@@ -284,7 +284,7 @@ static void _prefetch_data_on_node(void *arg)
 	starpu_data_handle handle = wrapper->handle;
         int ret;
 
-	struct starpu_data_replicate_s *replicate = handle->per_node[wrapper->node];
+	struct starpu_data_replicate_s *replicate = &handle->per_node[wrapper->node];
 	ret = _starpu_fetch_data_on_node(handle, replicate, STARPU_R, wrapper->async, NULL, NULL);
         STARPU_ASSERT(!ret);
 
@@ -323,7 +323,7 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle handle, unsigned
 	if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _prefetch_data_on_node, &wrapper))
 	{
 		/* we can immediately proceed */
-		struct starpu_data_replicate_s *replicate = handle->per_node[node];
+		struct starpu_data_replicate_s *replicate = &handle->per_node[node];
 		_starpu_fetch_data_on_node(handle, replicate, mode, async, NULL, NULL);
 
 		/* remove the "lock"/reference */
@@ -414,13 +414,13 @@ void starpu_data_query_status(starpu_data_handle handle, int memory_node, int *i
 //	_starpu_spin_lock(&handle->header_lock);
 
 	if (is_allocated)
-		*is_allocated = handle->per_node[memory_node]->allocated;
+		*is_allocated = handle->per_node[memory_node].allocated;
 
 	if (is_valid)
-		*is_valid = (handle->per_node[memory_node]->state != STARPU_INVALID);
+		*is_valid = (handle->per_node[memory_node].state != STARPU_INVALID);
 
 	if (is_requested)
-		*is_requested = handle->per_node[memory_node]->requested;
+		*is_requested = handle->per_node[memory_node].requested;
 
 //	_starpu_spin_unlock(&handle->header_lock);
 }

+ 3 - 3
src/datawizard/write_back.c

@@ -42,11 +42,11 @@ void _starpu_write_through_data(starpu_data_handle handle, uint32_t requesting_n
 
 				/* check that there is not already a similar
 				 * request that we should reuse */
-				r = _starpu_search_existing_data_request(handle->per_node[node], STARPU_R);
+				r = _starpu_search_existing_data_request(&handle->per_node[node], STARPU_R);
 				if (!r) {
 					/* there was no existing request so we create one now */
-					r = _starpu_create_data_request(handle, handle->per_node[requesting_node],
-							handle->per_node[node], handling_node, STARPU_R, 0, 1);
+					r = _starpu_create_data_request(handle, &handle->per_node[requesting_node],
+							&handle->per_node[node], handling_node, STARPU_R, 0, 1);
 					_starpu_post_data_request(r, handling_node);
 				}
 				else {

+ 2 - 2
src/debug/latency.c

@@ -27,11 +27,11 @@ void _starpu_benchmark_ping_pong(starpu_data_handle handle,
 	{
 		int ret;
 
-		struct starpu_data_replicate_s *replicate_0 = handle->per_node[node0];
+		struct starpu_data_replicate_s *replicate_0 = &handle->per_node[node0];
 		ret = _starpu_fetch_data_on_node(handle, replicate_0, STARPU_RW, 0, NULL, NULL);
 		STARPU_ASSERT(!ret);
 
-		struct starpu_data_replicate_s *replicate_1 = handle->per_node[node1];
+		struct starpu_data_replicate_s *replicate_1 = &handle->per_node[node1];
 		ret = _starpu_fetch_data_on_node(handle, replicate_1, STARPU_RW, 0, NULL, NULL);
 		STARPU_ASSERT(!ret);
 	}