Browse Source

- Implement the SCRATCH access mode using per-worker data replicates in the
handle structure rather than storing memchunks in the job structure. This
way, we don't have to reallocate scratch memory every time there is a new
task.
- Add a "relaxed coherency" flag in the memchunk and replicate structures. This
denote a memory chunck that can only be accessed in a relaxed coherency mode
(ie. scratch, and redux later on). "relaxed coherency" memchunks cannot be
used with filters.
- Memory allocation routines manipulate replicates instead of (handle,node)
pairs whenever possible.

Cédric Augonnet 14 years ago
parent
commit
e3620a36d3

+ 0 - 5
src/core/jobs.h

@@ -66,11 +66,6 @@ LIST_TYPE(starpu_job,
 	 * handles in the same order. */
 	struct starpu_buffer_descr_t ordered_buffers[STARPU_NMAXBUFS];
 	
-	/* In case the task accesses a piece of data in SCRATCH mode, we create
-	 * a memchunk specifically for that purpose when the task is submitted.
-	 * It is put back into the memchunk cache when the task is finished. */
-	starpu_mem_chunk_t scratch_memchunks[STARPU_NMAXBUFS];
-
 	/* If a tag is associated to the job, this points to the internal data
 	 * structure that describes the tag status. */
 	struct starpu_tag_s *tag;

+ 57 - 34
src/datawizard/coherency.c

@@ -304,8 +304,6 @@ static int prefetch_data_on_node(starpu_data_handle handle, struct starpu_data_r
 
 static int fetch_data(starpu_data_handle handle, struct starpu_data_replicate_s *replicate, starpu_access_mode mode)
 {
-	STARPU_ASSERT(!(mode & STARPU_SCRATCH));
-
 	return _starpu_fetch_data_on_node(handle, replicate, mode, 0, NULL, NULL);
 }
 
@@ -388,17 +386,41 @@ int _starpu_prefetch_task_input_on_node(struct starpu_task *task, uint32_t node)
 	return 0;
 }
 
+static struct starpu_data_replicate_s *initialize_data_replicate(starpu_data_handle handle, unsigned local_memory_node)
+{
+	struct starpu_data_replicate_s *replicate;
+
+	replicate = malloc(sizeof(struct starpu_data_replicate_s));
+	STARPU_ASSERT(replicate);
+
+	replicate->memory_node = local_memory_node;
+	replicate->relaxed_coherency = 1;
+
+	size_t interfacesize = handle->ops->interface_size;
+	replicate->interface = malloc(interfacesize);
+	STARPU_ASSERT(replicate->interface);
+	void *ram_interface = handle->per_node[0]->interface;
+	memcpy(replicate->interface, ram_interface, interfacesize);
+	replicate->allocated = 0;
+	replicate->automatically_allocated = 0;
+
+	replicate->state = STARPU_INVALID;
+	replicate->refcnt = 0;
+	replicate->handle = handle;
+	replicate->requested = 0;
+	replicate->request = NULL;
+
+	return replicate;
+}
+
 int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 {
 	STARPU_TRACE_START_FETCH_INPUT(NULL);
 
-	uint32_t local_memory_node = _starpu_get_local_memory_node();
-
 	starpu_buffer_descr *descrs = task->buffers;
 	unsigned nbuffers = task->cl->nbuffers;
 
-	/* TODO get that from the stack */
-	starpu_job_t j = (struct starpu_job_s *)task->starpu_private;
+	unsigned local_memory_node = _starpu_get_local_memory_node();
 
 	unsigned index;
 	for (index = 0; index < nbuffers; index++)
@@ -411,31 +433,30 @@ int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 
 		if (mode & STARPU_SCRATCH)
 		{
-			starpu_mem_chunk_t mc;
-			mc = _starpu_memchunk_cache_lookup(local_memory_node, handle);
-			if (!mc)
-			{
-				/* Cache miss */
+			int workerid = starpu_worker_get_id();
+			struct starpu_data_replicate_s *local_replicate;
 
-				/* This is a scratch memory, so we duplicate (any of)
-				 * the interface which contains sufficient information
-				 * to allocate the buffer. */
-				size_t interface_size = handle->ops->interface_size;
-				void *src_interface = starpu_data_get_interface_on_node(handle, local_memory_node);
-	
-				/* Pass the interface to StarPU so that the buffer can be allocated */
-				_starpu_allocate_interface(handle, src_interface, local_memory_node);
+			while (_starpu_spin_trylock(&handle->header_lock))
+				_starpu_datawizard_progress(local_memory_node, 1);
 
-				size_t size = _starpu_data_get_size(handle);
-#warning TODO create a replicate struct here:
-				mc = _starpu_memchunk_init(handle, size, src_interface, interface_size, 1);
+			local_replicate = handle->per_worker[workerid];
+			if (!local_replicate)
+			{
+				local_replicate = initialize_data_replicate(handle, local_memory_node);
+				handle->per_worker[workerid] = local_replicate;
 			}
 
-			interface = mc->interface;
-			j->scratch_memchunks[index] = mc;
+			_starpu_spin_unlock(&handle->header_lock);
+
+			ret = fetch_data(handle, local_replicate, mode);
+			if (STARPU_UNLIKELY(ret))
+				goto enomem;
+
+			interface = local_replicate->interface;
 		}
 		else {
 			/* That's a "normal" buffer (R/W) */
+
 			struct starpu_data_replicate_s *local_replicate;
 			local_replicate = handle->per_node[local_memory_node];
 			ret = fetch_data(handle, local_replicate, mode);
@@ -468,25 +489,27 @@ void _starpu_push_task_output(struct starpu_task *task, uint32_t mask)
         starpu_buffer_descr *descrs = task->buffers;
         unsigned nbuffers = task->cl->nbuffers;
 
-	starpu_job_t j = (struct starpu_job_s *)task->starpu_private;
-
-	uint32_t local_node = _starpu_get_local_memory_node();
-
 	unsigned index;
 	for (index = 0; index < nbuffers; index++)
 	{
 		starpu_data_handle handle = descrs[index].handle;
 		starpu_access_mode mode = descrs[index].mode;
 
-		if (mode & STARPU_SCRATCH)
+		struct starpu_data_replicate_s *replicate;
+
+		if (mode & STARPU_RW)
 		{
-			_starpu_memchunk_cache_insert(local_node, j->scratch_memchunks[index]);
+			unsigned local_node = _starpu_get_local_memory_node();
+			replicate = handle->per_node[local_node];
 		}
-		else {
-			struct starpu_data_replicate_s *replicate = handle->per_node[local_node];
-			_starpu_release_data_on_node(handle, mask, replicate);
-			_starpu_release_data_enforce_sequential_consistency(task, handle);
+		else
+		{
+			int workerid = starpu_worker_get_id();
+			replicate = handle->per_worker[workerid];
 		}
+
+		_starpu_release_data_on_node(handle, mask, replicate);
+		_starpu_release_data_enforce_sequential_consistency(task, handle);
 	}
 
 	STARPU_TRACE_END_PUSH_OUTPUT(NULL);

+ 5 - 0
src/datawizard/coherency.h

@@ -45,6 +45,10 @@ LIST_TYPE(starpu_data_replicate,
 
 	unsigned memory_node;
 
+	/* A buffer that is used for SCRATCH or reduction cannnot be used with
+	 * filters. */
+	unsigned relaxed_coherency;
+
 	/* describes the state of the local data in term of coherency */
 	starpu_cache_state	state; 
 
@@ -102,6 +106,7 @@ struct starpu_data_state_t {
 
 	/* describe the state of the data in term of coherency */
 	struct starpu_data_replicate_s *per_node[STARPU_MAXNODES];
+	struct starpu_data_replicate_s *per_worker[STARPU_NMAXWORKERS];
 
 	struct starpu_data_interface_ops_t *ops;
 

+ 3 - 1
src/datawizard/filters.c

@@ -235,7 +235,7 @@ void starpu_data_unpartition(starpu_data_handle root_handle, uint32_t gathering_
 
 		}
 
-		/* no problem was found so the node still has a valid copy */
+		/* if there was no invalid copy, the node still has a valid copy */
 		still_valid[node] = isvalid;
 		nvalids++;
 	}
@@ -288,6 +288,8 @@ static void starpu_data_create_children(starpu_data_handle handle, unsigned nchi
 			handle_child->per_node[node] = calloc(1, sizeof(struct starpu_data_replicate_s));
 			STARPU_ASSERT(handle_child->per_node[node]);
 
+			/* relaxed_coherency = 0 */
+
 			handle_child->per_node[node]->handle = handle_child;
 
 			handle_child->per_node[node]->interface = calloc(1, interfacesize);

+ 1 - 0
src/datawizard/interfaces/data_interface.c

@@ -116,6 +116,7 @@ static starpu_data_handle _starpu_data_handle_allocate(struct starpu_data_interf
 		replicate = calloc(1, sizeof(struct starpu_data_replicate_s));
 		STARPU_ASSERT(replicate);
 		handle->per_node[node] = replicate;
+		/* relaxed_coherency = 0 */
 
 		replicate->handle = handle;
 

+ 124 - 96
src/datawizard/memalloc.c

@@ -26,8 +26,6 @@ static starpu_mem_chunk_list_t mc_list[STARPU_MAXNODES];
 /* Explicitly caches memory chunks that can be reused */
 static starpu_mem_chunk_list_t memchunk_cache[STARPU_MAXNODES];
 
-static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node);
-
 void _starpu_init_mem_chunk_lists(void)
 {
 	unsigned i;
@@ -140,7 +138,7 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 			src_replicate->refcnt++;
 			dst_replicate->refcnt++;
 
-			ret = _starpu_driver_copy_data_1_to_1(handle, handle->per_node[src_node], handle->per_node[dst_node], 0, NULL, 1);
+			ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
 			STARPU_ASSERT(ret == 0);
 
 			src_replicate->refcnt--;
@@ -184,6 +182,56 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 	}
 }
 
+static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
+{
+	size_t freed = 0;
+
+	STARPU_ASSERT(mc->ops);
+	STARPU_ASSERT(mc->ops->free_data_on_node);
+
+	starpu_data_handle handle = mc->data;
+
+	/* Does this memory chunk refers to a handle that does not exist
+	 * anymore ? */
+	unsigned data_was_deleted = mc->data_was_deleted;
+
+	struct starpu_data_replicate_s *replicate = mc->replicate;
+
+//	while (_starpu_spin_trylock(&handle->header_lock))
+//		_starpu_datawizard_progress(_starpu_get_local_memory_node());
+
+#warning can we block here ?
+//	_starpu_spin_lock(&handle->header_lock);
+
+	if (mc->automatically_allocated && 
+		(!handle || data_was_deleted || replicate->refcnt == 0))
+	{
+		if (handle && !data_was_deleted)
+			STARPU_ASSERT(replicate->allocated);
+
+		mc->ops->free_data_on_node(mc->interface, node);
+
+		if (handle && !data_was_deleted)
+		{
+			replicate->allocated = 0;
+
+			/* XXX why do we need that ? */
+			replicate->automatically_allocated = 0;
+		}
+
+		freed = mc->size;
+
+		if (handle && !data_was_deleted)
+			STARPU_ASSERT(replicate->refcnt == 0);
+	}
+
+//	_starpu_spin_unlock(&handle->header_lock);
+
+	return freed;
+}
+
+
+
 static size_t do_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
 {
 	size_t size;
@@ -200,43 +248,66 @@ static size_t do_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
 	return size; 
 }
 
+/* This function is called for memory chunks that are possibly in used (ie. not
+ * in the cache). They should therefore still be associated to a handle. */
 static size_t try_to_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
 {
 	size_t freed = 0;
 
 	starpu_data_handle handle;
-
 	handle = mc->data;
-
 	STARPU_ASSERT(handle);
 
-	/* try to lock all the leafs of the subtree */
-	lock_all_subtree(handle);
-
-	/* check if they are all "free" */
-	if (may_free_subtree(handle, node))
+	/* Either it's a "relaxed coherency" memchunk, or it's a memchunk that
+	 * could be used with filters. */
+	if (mc->relaxed_coherency)
 	{
-		STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
+		STARPU_ASSERT(mc->replicate);
 
-		/* in case there was nobody using that buffer, throw it 
-		 * away after writing it back to main memory */
-		transfer_subtree_to_node(handle, node, 0);
+		while (_starpu_spin_trylock(&handle->header_lock))
+			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
 
-		STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
+		if (mc->replicate->refcnt == 0)
+		{
+			/* Note taht there is no need to transfer any data or
+			 * to update the status in terms of MSI protocol
+			 * because this memchunk is associated to a replicate
+			 * in "relaxed coherency" mode. */
+			freed = do_free_mem_chunk(mc, node);
+		}
 
-		/* now the actual buffer may be freed */
-		freed = do_free_mem_chunk(mc, node);
+		_starpu_spin_unlock(&handle->header_lock);
+	}
+	else {
+		/* try to lock all the leafs of the subtree */
+		lock_all_subtree(handle);
+	
+		/* check if they are all "free" */
+		if (may_free_subtree(handle, node))
+		{
+			STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
+	
+			/* in case there was nobody using that buffer, throw it 
+			 * away after writing it back to main memory */
+			transfer_subtree_to_node(handle, node, 0);
+	
+			STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
+	
+			/* now the actual buffer may be freed */
+			freed = do_free_mem_chunk(mc, node);
+		}
+	
+		/* unlock the leafs */
+		unlock_all_subtree(handle);
 	}
-
-	/* unlock the leafs */
-	unlock_all_subtree(handle);
-
 	return freed;
 }
 
 #ifdef STARPU_USE_ALLOCATION_CACHE
-/* we assume that mc_rwlock[node] is taken */
-static void reuse_mem_chunk(unsigned node, starpu_data_handle new_data, starpu_mem_chunk_t mc, unsigned is_already_in_mc_list)
+/* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
+ * that the mc is already in the list of buffers that are possibly used, and
+ * therefore not in the cache. */
+static void reuse_mem_chunk(unsigned node, struct starpu_data_replicate_s *new_replicate, starpu_mem_chunk_t mc, unsigned is_already_in_mc_list)
 {
 	starpu_data_handle old_data;
 	old_data = mc->data;
@@ -250,20 +321,18 @@ static void reuse_mem_chunk(unsigned node, starpu_data_handle new_data, starpu_m
 		starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
 	}
 
-	if (!mc->data_was_deleted)
-	{
-		struct starpu_data_replicate_s *old_replicate = old_data->per_node[node];
-		old_replicate->allocated = 0;
-		old_replicate->automatically_allocated = 0;
-	}
+	struct starpu_data_replicate_s *old_replicate = mc->replicate;
+	old_replicate->allocated = 0;
+	old_replicate->automatically_allocated = 0;
 
-	struct starpu_data_replicate_s *new_replicate = new_data->per_node[node];
 	new_replicate->allocated = 1;
 	new_replicate->automatically_allocated = 1;
 
-	memcpy(new_replicate->interface, mc->interface, old_data->ops->interface_size);
+	STARPU_ASSERT(new_replicate->interface);
+	STARPU_ASSERT(mc->interface);
+	memcpy(new_replicate->interface, mc->interface, old_replicate->ops->interface_size);
 
-	mc->data = new_data;
+	mc->data = new_replicate->handle;
 	mc->data_was_deleted = 0;
 	/* mc->ops, mc->size, mc->footprint and mc->interface should be
  	 * unchanged ! */
@@ -308,11 +377,11 @@ static unsigned try_to_reuse_mem_chunk(starpu_mem_chunk_t mc, unsigned node, sta
 }
 
 /* this function looks for a memory chunk that matches a given footprint in the
- * list of mem chunk that need to be freed */
+ * list of mem chunk that need to be freed. This function must be called with
+ * mc_rwlock[node] taken in write mode. */
 static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle data, uint32_t footprint)
 {
 	starpu_mem_chunk_t mc, next_mc;
-	pthread_rwlock_wrlock(&mc_rwlock[node]);
 
 	/* go through all buffers in the cache */
 	mc = _starpu_memchunk_cache_lookup_locked(node, handle);
@@ -320,7 +389,6 @@ static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle
 	{
 		/* We found an entry in the cache so we can reuse it */
 		reuse_mem_chunk(node, data, mc, 0);
-		pthread_rwlock_unlock(&mc_rwlock[node]);
 		return 1;
 	}
 
@@ -338,15 +406,10 @@ static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle
 		{
 //			fprintf(stderr, "found a candidate ...\n");
 			if (try_to_reuse_mem_chunk(mc, node, data, 1))
-			{
-				pthread_rwlock_unlock(&mc_rwlock[node]);
 				return 1;
-			}
 		}
 	}
 
-	pthread_rwlock_unlock(&mc_rwlock[node]);
-
 	return 0;
 }
 #endif
@@ -507,9 +570,10 @@ size_t _starpu_free_all_automatically_allocated_buffers(uint32_t node)
 	return reclaim_memory_generic(node, 1);
 }
 
-starpu_mem_chunk_t _starpu_memchunk_init(starpu_data_handle handle, size_t size, void *interface, size_t interface_size, unsigned automatically_allocated)
+static starpu_mem_chunk_t _starpu_memchunk_init(struct starpu_data_replicate_s *replicate, size_t size, size_t interface_size, unsigned automatically_allocated)
 {
 	starpu_mem_chunk_t mc = starpu_mem_chunk_new();
+	starpu_data_handle handle = replicate->handle;
 
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle->ops);
@@ -520,27 +584,30 @@ starpu_mem_chunk_t _starpu_memchunk_init(starpu_data_handle handle, size_t size,
 	mc->ops = handle->ops;
 	mc->data_was_deleted = 0;
 	mc->automatically_allocated = automatically_allocated;
+	mc->relaxed_coherency = replicate->relaxed_coherency;		
+	mc->replicate = replicate;
 
 	/* Save a copy of the interface */
 	mc->interface = malloc(interface_size);
 	STARPU_ASSERT(mc->interface);
-	memcpy(mc->interface, interface, interface_size);
+	memcpy(mc->interface, replicate->interface, interface_size);
 
 	return mc;
 }
 
-static void register_mem_chunk(starpu_data_handle handle, uint32_t dst_node, size_t size, unsigned automatically_allocated)
+static void register_mem_chunk(struct starpu_data_replicate_s *replicate, size_t size, unsigned automatically_allocated)
 {
 	int res;
 
+	unsigned dst_node = replicate->memory_node;
+
 	starpu_mem_chunk_t mc;
 
 	/* the interface was already filled by ops->allocate_data_on_node */
-	void *src_interface = starpu_data_get_interface_on_node(handle, dst_node);
-	size_t interface_size = handle->ops->interface_size;
+	size_t interface_size = replicate->handle->ops->interface_size;
 
 	/* Put this memchunk in the list of memchunk in use */
-	mc = _starpu_memchunk_init(handle, size, src_interface, interface_size, automatically_allocated); 
+	mc = _starpu_memchunk_init(replicate, size, interface_size, automatically_allocated); 
 
 	res = pthread_rwlock_wrlock(&mc_rwlock[dst_node]);
 	STARPU_ASSERT(!res);
@@ -589,52 +656,6 @@ void _starpu_request_mem_chunk_removal(starpu_data_handle handle, unsigned node)
 	STARPU_ASSERT(!res);
 }
 
-static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
-{
-	size_t freed = 0;
-
-	STARPU_ASSERT(mc->ops);
-	STARPU_ASSERT(mc->ops->free_data_on_node);
-
-	starpu_data_handle handle = mc->data;
-
-	/* Does this memory chunk refers to a handle that does not exist
-	 * anymore ? */
-	unsigned data_was_deleted = mc->data_was_deleted;
-
-//	while (_starpu_spin_trylock(&handle->header_lock))
-//		_starpu_datawizard_progress(_starpu_get_local_memory_node());
-
-#warning can we block here ?
-//	_starpu_spin_lock(&handle->header_lock);
-
-	if (mc->automatically_allocated && 
-		(!handle || data_was_deleted || handle->per_node[node]->refcnt == 0))
-	{
-		if (handle && !data_was_deleted)
-			STARPU_ASSERT(handle->per_node[node]->allocated);
-
-		mc->ops->free_data_on_node(mc->interface, node);
-
-		if (handle && !data_was_deleted)
-		{
-			handle->per_node[node]->allocated = 0;
-
-			/* XXX why do we need that ? */
-			handle->per_node[node]->automatically_allocated = 0;
-		}
-
-		freed = mc->size;
-
-		if (handle && !data_was_deleted)
-			STARPU_ASSERT(handle->per_node[node]->refcnt == 0);
-	}
-
-//	_starpu_spin_unlock(&handle->header_lock);
-
-	return freed;
-}
-
 /*
  * In order to allocate a piece of data, we try to reuse existing buffers if
  * its possible.
@@ -647,7 +668,7 @@ static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
  *
  */
 
-ssize_t _starpu_allocate_interface(starpu_data_handle handle, void *interface, uint32_t dst_node)
+static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct starpu_data_replicate_s *replicate, uint32_t dst_node)
 {
 	unsigned attempts = 0;
 	ssize_t allocated_memory;
@@ -659,12 +680,17 @@ ssize_t _starpu_allocate_interface(starpu_data_handle handle, void *interface, u
 	uint32_t footprint = _starpu_compute_data_footprint(handle);
 
 	STARPU_TRACE_START_ALLOC_REUSE(dst_node);
+	pthread_rwlock_wrlock(&mc_rwlock[node]);
+
 	if (try_to_find_reusable_mem_chunk(dst_node, handle, footprint))
 	{
+		pthread_rwlock_unlock(&mc_rwlock[node]);
 		_starpu_allocation_cache_hit(dst_node);
 		ssize_t data_size = _starpu_data_get_size(handle);
 		return data_size;
 	}
+
+	pthread_rwlock_unlock(&mc_rwlock[node]);
 	STARPU_TRACE_END_ALLOC_REUSE(dst_node);
 #endif
 
@@ -673,7 +699,8 @@ ssize_t _starpu_allocate_interface(starpu_data_handle handle, void *interface, u
 		STARPU_ASSERT(handle->ops->allocate_data_on_node);
 
 		STARPU_TRACE_START_ALLOC(dst_node);
-		allocated_memory = handle->ops->allocate_data_on_node(interface, dst_node);
+		STARPU_ASSERT(replicate->interface);
+		allocated_memory = handle->ops->allocate_data_on_node(replicate->interface, dst_node);
 		STARPU_TRACE_END_ALLOC(dst_node);
 
 		if (allocated_memory == -ENOMEM) {
@@ -699,13 +726,14 @@ int _starpu_allocate_memory_on_node(starpu_data_handle handle, struct starpu_dat
 	if (replicate->allocated)
 		return 0;
 
-	allocated_memory = _starpu_allocate_interface(handle, replicate->interface, dst_node);
+	STARPU_ASSERT(replicate->interface);
+	allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node);
 
 	/* perhaps we could really not handle that capacity misses */
 	if (allocated_memory == -ENOMEM)
 		return -ENOMEM;
 
-	register_mem_chunk(handle, dst_node, allocated_memory, 1);
+	register_mem_chunk(replicate, allocated_memory, 1);
 
 	replicate->allocated = 1;
 	replicate->automatically_allocated = 1;

+ 5 - 3
src/datawizard/memalloc.h

@@ -43,18 +43,20 @@ LIST_TYPE(starpu_mem_chunk,
 	void *interface;
 	unsigned automatically_allocated;
 	unsigned data_was_deleted;
+
+	/* A buffer that is used for SCRATCH or reduction cannnot be used with
+	 * filters. */
+	unsigned relaxed_coherency;
+	struct starpu_data_replicate_s *replicate;
 )
 
 void _starpu_init_mem_chunk_lists(void);
 void _starpu_deinit_mem_chunk_lists(void);
 void _starpu_request_mem_chunk_removal(starpu_data_handle handle, unsigned node);
-ssize_t _starpu_allocate_interface(starpu_data_handle handle, void *interface, uint32_t dst_node);
 int _starpu_allocate_memory_on_node(starpu_data_handle handle, struct starpu_data_replicate_s *replicate);
 size_t _starpu_free_all_automatically_allocated_buffers(uint32_t node);
 
 /* Memory chunk cache */
 void _starpu_memchunk_cache_insert(uint32_t node, starpu_mem_chunk_t);
 starpu_mem_chunk_t _starpu_memchunk_cache_lookup(uint32_t node, starpu_data_handle handle);
-starpu_mem_chunk_t _starpu_memchunk_init(starpu_data_handle handle, size_t size, void *interface,
-			size_t interface_size, unsigned automatically_allocated);
 #endif