Explorar o código

Start to get rid of the starpu_data_interface_t enum type: for now, the codelet
API is not affected, but this will permit to have user-provided data
interfaces, and this will remove the need for numerous memory copies as well as
significantly reducing the size of the task structure.

Cédric Augonnet %!s(int64=16) %!d(string=hai) anos
pai
achega
8c86d9e771

+ 1 - 1
include/starpu-data-filters.h

@@ -33,7 +33,7 @@ typedef struct starpu_filter_t {
 void starpu_partition_data(starpu_data_handle initial_data, starpu_filter *f); 
 void starpu_partition_data(starpu_data_handle initial_data, starpu_filter *f); 
 void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
 void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
 
 
-void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren);
+void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren, size_t interfacesize);
 
 
 starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i);
 starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i);
 
 

+ 2 - 0
include/starpu-data.h

@@ -40,6 +40,8 @@ typedef struct starpu_buffer_descr_t {
 	starpu_access_mode mode;
 	starpu_access_mode mode;
 } starpu_buffer_descr;
 } starpu_buffer_descr;
 
 
+starpu_data_handle starpu_data_state_create(size_t interfacesize);
+
 void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
 void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
 void starpu_delete_data(starpu_data_handle state);
 void starpu_delete_data(starpu_data_handle state);
 
 

+ 1 - 1
src/datawizard/coherency.c

@@ -371,7 +371,7 @@ int fetch_task_input(struct starpu_task *task, uint32_t mask)
 
 
 		void *src_interface = starpu_data_get_interface_on_node(state, local_memory_node);
 		void *src_interface = starpu_data_get_interface_on_node(state, local_memory_node);
 
 
-		memcpy(&interface[index], src_interface, sizeof(starpu_data_interface_t));
+		memcpy(&interface[index], src_interface, state->interface_size);
 	}
 	}
 
 
 	TRACE_END_FETCH_INPUT(NULL);
 	TRACE_END_FETCH_INPUT(NULL);

+ 3 - 1
src/datawizard/coherency.h

@@ -110,7 +110,9 @@ typedef struct starpu_data_state_t {
 	local_data_state per_node[MAXNODES];
 	local_data_state per_node[MAXNODES];
 
 
 	/* describe the actual data layout */
 	/* describe the actual data layout */
-	starpu_data_interface_t interface[MAXNODES];
+//	starpu_data_interface_t interface[MAXNODES];
+	void *interface[MAXNODES];
+	size_t interface_size;
 
 
 	struct data_interface_ops_t *ops;
 	struct data_interface_ops_t *ops;
 
 

+ 50 - 2
src/datawizard/hierarchy.c

@@ -19,6 +19,14 @@
 /* 
 /* 
  * Stop monitoring a data
  * Stop monitoring a data
  */
  */
+
+static void starpu_data_liberate_interfaces(data_state *state)
+{
+	unsigned node;
+	for (node = 0; node < MAXNODES; node++)
+		free(state->interface[node]);
+}
+
 /* TODO : move in a more appropriate file */
 /* TODO : move in a more appropriate file */
 void starpu_delete_data(data_state *state)
 void starpu_delete_data(data_state *state)
 {
 {
@@ -37,6 +45,8 @@ void starpu_delete_data(data_state *state)
 
 
 	data_requester_list_delete(state->req_list);
 	data_requester_list_delete(state->req_list);
 
 
+	starpu_data_liberate_interfaces(state);
+
 	free(state);
 	free(state);
 }
 }
 
 
@@ -181,7 +191,7 @@ void starpu_partition_data(data_state *initial_data, starpu_filter *f)
 
 
 	for (i = 0; i < nparts; i++)
 	for (i = 0; i < nparts; i++)
 	{
 	{
-		data_state *children = &initial_data->children[i];
+		data_state *children = starpu_data_get_child(initial_data, i);
 
 
 		STARPU_ASSERT(children);
 		STARPU_ASSERT(children);
 
 
@@ -240,6 +250,8 @@ void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
 		 * don't want to have the programming deal with memory shortage at that time,
 		 * don't want to have the programming deal with memory shortage at that time,
 		 * really */
 		 * really */
 		STARPU_ASSERT(ret == 0); 
 		STARPU_ASSERT(ret == 0); 
+
+		starpu_data_liberate_interfaces(&root_data->children[child]);
 	}
 	}
 
 
 	/* the gathering_node should now have a valid copy of all the children.
 	/* the gathering_node should now have a valid copy of all the children.
@@ -322,10 +334,46 @@ void starpu_advise_if_data_is_important(data_state *state, unsigned is_important
 
 
 }
 }
 
 
-void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren)
+starpu_data_handle starpu_data_state_create(size_t interfacesize)
+{
+	struct starpu_data_state_t *state =
+		calloc(1, sizeof(struct starpu_data_state_t));
+
+	STARPU_ASSERT(state);
+
+	state->interface_size = interfacesize;
+
+	unsigned node;
+	for (node = 0; node < MAXNODES; node++)
+	{
+		state->interface[node] = calloc(1, interfacesize);
+		STARPU_ASSERT(state->interface[node]);
+	}
+
+	return state;
+}
+
+void starpu_data_create_children(starpu_data_handle handle,
+		unsigned nchildren, size_t interfacesize)
 {
 {
 	handle->children = calloc(nchildren, sizeof(data_state));
 	handle->children = calloc(nchildren, sizeof(data_state));
 	STARPU_ASSERT(handle->children);
 	STARPU_ASSERT(handle->children);
 
 
+	unsigned node;
+	unsigned child;
+
+	for (child = 0; child < nchildren; child++)
+	{
+		starpu_data_handle handle_child = &handle->children[child];
+
+		handle_child->interface_size = interfacesize;
+
+		for (node = 0; node < MAXNODES; node++)
+		{
+			handle_child->interface[node] = calloc(1, interfacesize);
+			STARPU_ASSERT(handle->children->interface[node]);
+		}
+	}
+
 	handle->nchildren = nchildren;
 	handle->nchildren = nchildren;
 }
 }

+ 1 - 1
src/datawizard/interfaces/bcsr_filters.c

@@ -40,7 +40,7 @@ unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unus
 	nchunks = nnz;
 	nchunks = nnz;
 	
 	
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
 
 
 	/* actually create all the chunks */
 	/* actually create all the chunks */
 
 

+ 2 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -66,8 +66,8 @@ struct data_interface_ops_t interface_bcsr_ops = {
 void starpu_register_bcsr_data(struct starpu_data_state_t **handle, uint32_t home_node,
 void starpu_register_bcsr_data(struct starpu_data_state_t **handle, uint32_t home_node,
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry,  uint32_t r, uint32_t c, size_t elemsize)
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry,  uint32_t r, uint32_t c, size_t elemsize)
 {
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_bcsr_interface_t));
 
 
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 	*handle = state;
 	*handle = state;

+ 2 - 2
src/datawizard/interfaces/blas_filters.c

@@ -37,7 +37,7 @@ unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
 	nchunks = STARPU_MIN(nx, arg);
 	nchunks = STARPU_MIN(nx, arg);
 
 
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
 
 
 	/* actually create all the chunks */
 	/* actually create all the chunks */
 	unsigned chunk;
 	unsigned chunk;
@@ -91,7 +91,7 @@ unsigned starpu_vertical_block_filter_func(starpu_filter *f, data_state *root_da
 	nchunks = STARPU_MIN(ny, arg);
 	nchunks = STARPU_MIN(ny, arg);
 	
 	
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
 
 
 	/* actually create all the chunks */
 	/* actually create all the chunks */
 	unsigned chunk;
 	unsigned chunk;

+ 2 - 2
src/datawizard/interfaces/blas_interface.c

@@ -101,8 +101,8 @@ void starpu_register_blas_data(struct starpu_data_state_t **handle, uint32_t hom
 			uintptr_t ptr, uint32_t ld, uint32_t nx,
 			uintptr_t ptr, uint32_t ld, uint32_t nx,
 			uint32_t ny, size_t elemsize)
 			uint32_t ny, size_t elemsize)
 {
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_blas_interface_t));
 
 
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 	*handle = state;
 	*handle = state;

+ 2 - 2
src/datawizard/interfaces/block_interface.c

@@ -87,8 +87,8 @@ void starpu_register_block_data(struct starpu_data_state_t **handle, uint32_t ho
 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
 			uint32_t ny, uint32_t nz, size_t elemsize)
 			uint32_t ny, uint32_t nz, size_t elemsize)
 {
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_block_interface_t));
 
 
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 	*handle = state;
 	*handle = state;

+ 1 - 1
src/datawizard/interfaces/csr_filters.c

@@ -34,7 +34,7 @@ unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, data_state *roo
 	nchunks = STARPU_MIN(nrow, arg);
 	nchunks = STARPU_MIN(nrow, arg);
 	
 	
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_csr_interface_t));
 
 
 	/* actually create all the chunks */
 	/* actually create all the chunks */
 	uint32_t chunk_size = (nrow + nchunks - 1)/nchunks;
 	uint32_t chunk_size = (nrow + nchunks - 1)/nchunks;

+ 2 - 2
src/datawizard/interfaces/csr_interface.c

@@ -64,8 +64,8 @@ struct data_interface_ops_t interface_csr_ops = {
 void starpu_register_csr_data(struct starpu_data_state_t **handle, uint32_t home_node,
 void starpu_register_csr_data(struct starpu_data_state_t **handle, uint32_t home_node,
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
 {
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_csr_interface_t));	
 
 
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 	*handle = state;
 	*handle = state;

+ 1 - 1
src/datawizard/interfaces/data_interface.c

@@ -23,7 +23,7 @@ unsigned starpu_get_handle_interface_id(starpu_data_handle handle)
 
 
 void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node)
 void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node)
 {
 {
-	return &handle->interface[memory_node];
+	return handle->interface[memory_node];
 }
 }
 
 
 /* register data interface ? (do we need to register ?) descr =  type enum, required to get an id !  */
 /* register data interface ? (do we need to register ?) descr =  type enum, required to get an id !  */

+ 3 - 3
src/datawizard/interfaces/vector_filters.c

@@ -33,7 +33,7 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 	nchunks = STARPU_MIN(nx, arg);
 	nchunks = STARPU_MIN(nx, arg);
 
 
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_vector_interface_t));
 
 
 	/* actually create all the chunks */
 	/* actually create all the chunks */
 	unsigned chunk;
 	unsigned chunk;
@@ -81,7 +81,7 @@ unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *roo
 	size_t elemsize = vector_root->elemsize;
 	size_t elemsize = vector_root->elemsize;
 
 
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, 2);
+	starpu_data_create_children(root_data, 2, sizeof(starpu_vector_interface_t));
 
 
 	STARPU_ASSERT(length_first < nx);
 	STARPU_ASSERT(length_first < nx);
 
 
@@ -139,7 +139,7 @@ unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
 	size_t elemsize = vector_root->elemsize;
 	size_t elemsize = vector_root->elemsize;
 
 
 	/* first allocate the children data_state */
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_vector_interface_t));
 
 
 	unsigned current_pos = 0;
 	unsigned current_pos = 0;
 
 

+ 2 - 2
src/datawizard/interfaces/vector_interface.c

@@ -90,8 +90,8 @@ int convert_vector_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr,
 void starpu_register_vector_data(struct starpu_data_state_t **handle, uint32_t home_node,
 void starpu_register_vector_data(struct starpu_data_state_t **handle, uint32_t home_node,
                         uintptr_t ptr, uint32_t nx, size_t elemsize)
                         uintptr_t ptr, uint32_t nx, size_t elemsize)
 {
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_vector_interface_t));
 
 
 	STARPU_ASSERT(handle);
 	STARPU_ASSERT(handle);
 	*handle = state;
 	*handle = state;

+ 10 - 3
src/datawizard/memalloc.c

@@ -111,6 +111,8 @@ static size_t do_free_mem_chunk(mem_chunk_t mc, unsigned node)
 
 
 	/* remove the mem_chunk from the list */
 	/* remove the mem_chunk from the list */
 	mem_chunk_list_erase(mc_list[node], mc);
 	mem_chunk_list_erase(mc_list[node], mc);
+
+	free(mc->interface);
 	mem_chunk_delete(mc);
 	mem_chunk_delete(mc);
 
 
 	return size; 
 	return size; 
@@ -250,7 +252,7 @@ static void reuse_mem_chunk(unsigned node, data_state *new_data, mem_chunk_t mc,
 	new_data->per_node[node].allocated = 1;
 	new_data->per_node[node].allocated = 1;
 	new_data->per_node[node].automatically_allocated = 1;
 	new_data->per_node[node].automatically_allocated = 1;
 
 
-	memcpy(&new_data->interface[node], &mc->interface, sizeof(starpu_data_interface_t));
+	memcpy(&new_data->interface[node], mc->interface, old_data->interface_size);
 
 
 	mc->data = new_data;
 	mc->data = new_data;
 	mc->data_was_deleted = 0;
 	mc->data_was_deleted = 0;
@@ -383,6 +385,7 @@ static size_t reclaim_memory(uint32_t node, size_t toreclaim __attribute__ ((unu
 
 
 		mem_chunk_list_erase(mc_list_to_free[node], mc);
 		mem_chunk_list_erase(mc_list_to_free[node], mc);
 
 
+		free(mc->interface);
 		mem_chunk_delete(mc);
 		mem_chunk_delete(mc);
 	}
 	}
 
 
@@ -429,7 +432,11 @@ static void register_mem_chunk(data_state *state, uint32_t dst_node, size_t size
 
 
 	/* the interface was already filled by ops->allocate_data_on_node */
 	/* the interface was already filled by ops->allocate_data_on_node */
 	void *src_interface = starpu_data_get_interface_on_node(state, dst_node);
 	void *src_interface = starpu_data_get_interface_on_node(state, dst_node);
-	memcpy(&mc->interface, src_interface, sizeof(starpu_data_interface_t));
+
+	mc->interface = malloc(state->interface_size);
+	STARPU_ASSERT(mc->interface);
+
+	memcpy(mc->interface, src_interface, state->interface_size);
 
 
 	res = pthread_rwlock_wrlock(&mc_rwlock[dst_node]);
 	res = pthread_rwlock_wrlock(&mc_rwlock[dst_node]);
 	STARPU_ASSERT(!res);
 	STARPU_ASSERT(!res);
@@ -495,7 +502,7 @@ static size_t liberate_memory_on_node(mem_chunk_t mc, uint32_t node)
 	{
 	{
 		STARPU_ASSERT(state->per_node[node].allocated);
 		STARPU_ASSERT(state->per_node[node].allocated);
 
 
-		mc->ops->liberate_data_on_node(&mc->interface, node);
+		mc->ops->liberate_data_on_node(mc->interface, node);
 
 
 		if (!mc->data_was_deleted)
 		if (!mc->data_was_deleted)
 		{
 		{

+ 1 - 1
src/datawizard/memalloc.h

@@ -38,7 +38,7 @@ LIST_TYPE(mem_chunk,
 	 * because when a data is deleted, the memory chunk remains.
 	 * because when a data is deleted, the memory chunk remains.
 	 */
 	 */
 	struct data_interface_ops_t *ops;
 	struct data_interface_ops_t *ops;
-	starpu_data_interface_t interface;
+	void *interface;
 	unsigned automatically_allocated;
 	unsigned automatically_allocated;
 	unsigned data_was_deleted;
 	unsigned data_was_deleted;
 )
 )