Explorar el Código

Start to get rid of the starpu_data_interface_t enum type: for now, the codelet
API is not affected, but this will permit to have user-provided data
interfaces, and this will remove the need for numerous memory copies as well as
significantly reducing the size of the task structure.

Cédric Augonnet hace 15 años
padre
commit
8c86d9e771

+ 1 - 1
include/starpu-data-filters.h

@@ -33,7 +33,7 @@ typedef struct starpu_filter_t {
 void starpu_partition_data(starpu_data_handle initial_data, starpu_filter *f); 
 void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
 
-void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren);
+void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren, size_t interfacesize);
 
 starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i);
 

+ 2 - 0
include/starpu-data.h

@@ -40,6 +40,8 @@ typedef struct starpu_buffer_descr_t {
 	starpu_access_mode mode;
 } starpu_buffer_descr;
 
+starpu_data_handle starpu_data_state_create(size_t interfacesize);
+
 void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
 void starpu_delete_data(starpu_data_handle state);
 

+ 1 - 1
src/datawizard/coherency.c

@@ -371,7 +371,7 @@ int fetch_task_input(struct starpu_task *task, uint32_t mask)
 
 		void *src_interface = starpu_data_get_interface_on_node(state, local_memory_node);
 
-		memcpy(&interface[index], src_interface, sizeof(starpu_data_interface_t));
+		memcpy(&interface[index], src_interface, state->interface_size);
 	}
 
 	TRACE_END_FETCH_INPUT(NULL);

+ 3 - 1
src/datawizard/coherency.h

@@ -110,7 +110,9 @@ typedef struct starpu_data_state_t {
 	local_data_state per_node[MAXNODES];
 
 	/* describe the actual data layout */
-	starpu_data_interface_t interface[MAXNODES];
+//	starpu_data_interface_t interface[MAXNODES];
+	void *interface[MAXNODES];
+	size_t interface_size;
 
 	struct data_interface_ops_t *ops;
 

+ 50 - 2
src/datawizard/hierarchy.c

@@ -19,6 +19,14 @@
 /* 
  * Stop monitoring a data
  */
+
+static void starpu_data_liberate_interfaces(data_state *state)
+{
+	unsigned node;
+	for (node = 0; node < MAXNODES; node++)
+		free(state->interface[node]);
+}
+
 /* TODO : move in a more appropriate file */
 void starpu_delete_data(data_state *state)
 {
@@ -37,6 +45,8 @@ void starpu_delete_data(data_state *state)
 
 	data_requester_list_delete(state->req_list);
 
+	starpu_data_liberate_interfaces(state);
+
 	free(state);
 }
 
@@ -181,7 +191,7 @@ void starpu_partition_data(data_state *initial_data, starpu_filter *f)
 
 	for (i = 0; i < nparts; i++)
 	{
-		data_state *children = &initial_data->children[i];
+		data_state *children = starpu_data_get_child(initial_data, i);
 
 		STARPU_ASSERT(children);
 
@@ -240,6 +250,8 @@ void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
 		 * don't want to have the programming deal with memory shortage at that time,
 		 * really */
 		STARPU_ASSERT(ret == 0); 
+
+		starpu_data_liberate_interfaces(&root_data->children[child]);
 	}
 
 	/* the gathering_node should now have a valid copy of all the children.
@@ -322,10 +334,46 @@ void starpu_advise_if_data_is_important(data_state *state, unsigned is_important
 
 }
 
-void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren)
+starpu_data_handle starpu_data_state_create(size_t interfacesize)
+{
+	struct starpu_data_state_t *state =
+		calloc(1, sizeof(struct starpu_data_state_t));
+
+	STARPU_ASSERT(state);
+
+	state->interface_size = interfacesize;
+
+	unsigned node;
+	for (node = 0; node < MAXNODES; node++)
+	{
+		state->interface[node] = calloc(1, interfacesize);
+		STARPU_ASSERT(state->interface[node]);
+	}
+
+	return state;
+}
+
+void starpu_data_create_children(starpu_data_handle handle,
+		unsigned nchildren, size_t interfacesize)
 {
 	handle->children = calloc(nchildren, sizeof(data_state));
 	STARPU_ASSERT(handle->children);
 
+	unsigned node;
+	unsigned child;
+
+	for (child = 0; child < nchildren; child++)
+	{
+		starpu_data_handle handle_child = &handle->children[child];
+
+		handle_child->interface_size = interfacesize;
+
+		for (node = 0; node < MAXNODES; node++)
+		{
+			handle_child->interface[node] = calloc(1, interfacesize);
+			STARPU_ASSERT(handle->children->interface[node]);
+		}
+	}
+
 	handle->nchildren = nchildren;
 }

+ 1 - 1
src/datawizard/interfaces/bcsr_filters.c

@@ -40,7 +40,7 @@ unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unus
 	nchunks = nnz;
 	
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
 
 	/* actually create all the chunks */
 

+ 2 - 2
src/datawizard/interfaces/bcsr_interface.c

@@ -66,8 +66,8 @@ struct data_interface_ops_t interface_bcsr_ops = {
 void starpu_register_bcsr_data(struct starpu_data_state_t **handle, uint32_t home_node,
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry,  uint32_t r, uint32_t c, size_t elemsize)
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_bcsr_interface_t));
 
 	STARPU_ASSERT(handle);
 	*handle = state;

+ 2 - 2
src/datawizard/interfaces/blas_filters.c

@@ -37,7 +37,7 @@ unsigned starpu_block_filter_func(starpu_filter *f, data_state *root_data)
 	nchunks = STARPU_MIN(nx, arg);
 
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
 
 	/* actually create all the chunks */
 	unsigned chunk;
@@ -91,7 +91,7 @@ unsigned starpu_vertical_block_filter_func(starpu_filter *f, data_state *root_da
 	nchunks = STARPU_MIN(ny, arg);
 	
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_blas_interface_t));
 
 	/* actually create all the chunks */
 	unsigned chunk;

+ 2 - 2
src/datawizard/interfaces/blas_interface.c

@@ -101,8 +101,8 @@ void starpu_register_blas_data(struct starpu_data_state_t **handle, uint32_t hom
 			uintptr_t ptr, uint32_t ld, uint32_t nx,
 			uint32_t ny, size_t elemsize)
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_blas_interface_t));
 
 	STARPU_ASSERT(handle);
 	*handle = state;

+ 2 - 2
src/datawizard/interfaces/block_interface.c

@@ -87,8 +87,8 @@ void starpu_register_block_data(struct starpu_data_state_t **handle, uint32_t ho
 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
 			uint32_t ny, uint32_t nz, size_t elemsize)
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_block_interface_t));
 
 	STARPU_ASSERT(handle);
 	*handle = state;

+ 1 - 1
src/datawizard/interfaces/csr_filters.c

@@ -34,7 +34,7 @@ unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, data_state *roo
 	nchunks = STARPU_MIN(nrow, arg);
 	
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_csr_interface_t));
 
 	/* actually create all the chunks */
 	uint32_t chunk_size = (nrow + nchunks - 1)/nchunks;

+ 2 - 2
src/datawizard/interfaces/csr_interface.c

@@ -64,8 +64,8 @@ struct data_interface_ops_t interface_csr_ops = {
 void starpu_register_csr_data(struct starpu_data_state_t **handle, uint32_t home_node,
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_csr_interface_t));	
 
 	STARPU_ASSERT(handle);
 	*handle = state;

+ 1 - 1
src/datawizard/interfaces/data_interface.c

@@ -23,7 +23,7 @@ unsigned starpu_get_handle_interface_id(starpu_data_handle handle)
 
 void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node)
 {
-	return &handle->interface[memory_node];
+	return handle->interface[memory_node];
 }
 
 /* register data interface ? (do we need to register ?) descr =  type enum, required to get an id !  */

+ 3 - 3
src/datawizard/interfaces/vector_filters.c

@@ -33,7 +33,7 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 	nchunks = STARPU_MIN(nx, arg);
 
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_vector_interface_t));
 
 	/* actually create all the chunks */
 	unsigned chunk;
@@ -81,7 +81,7 @@ unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *roo
 	size_t elemsize = vector_root->elemsize;
 
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, 2);
+	starpu_data_create_children(root_data, 2, sizeof(starpu_vector_interface_t));
 
 	STARPU_ASSERT(length_first < nx);
 
@@ -139,7 +139,7 @@ unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
 	size_t elemsize = vector_root->elemsize;
 
 	/* first allocate the children data_state */
-	starpu_data_create_children(root_data, nchunks);
+	starpu_data_create_children(root_data, nchunks, sizeof(starpu_vector_interface_t));
 
 	unsigned current_pos = 0;
 

+ 2 - 2
src/datawizard/interfaces/vector_interface.c

@@ -90,8 +90,8 @@ int convert_vector_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr,
 void starpu_register_vector_data(struct starpu_data_state_t **handle, uint32_t home_node,
                         uintptr_t ptr, uint32_t nx, size_t elemsize)
 {
-	struct starpu_data_state_t *state = calloc(1, sizeof(struct starpu_data_state_t));
-	STARPU_ASSERT(state);
+	struct starpu_data_state_t *state =
+		starpu_data_state_create(sizeof(starpu_vector_interface_t));
 
 	STARPU_ASSERT(handle);
 	*handle = state;

+ 10 - 3
src/datawizard/memalloc.c

@@ -111,6 +111,8 @@ static size_t do_free_mem_chunk(mem_chunk_t mc, unsigned node)
 
 	/* remove the mem_chunk from the list */
 	mem_chunk_list_erase(mc_list[node], mc);
+
+	free(mc->interface);
 	mem_chunk_delete(mc);
 
 	return size; 
@@ -250,7 +252,7 @@ static void reuse_mem_chunk(unsigned node, data_state *new_data, mem_chunk_t mc,
 	new_data->per_node[node].allocated = 1;
 	new_data->per_node[node].automatically_allocated = 1;
 
-	memcpy(&new_data->interface[node], &mc->interface, sizeof(starpu_data_interface_t));
+	memcpy(&new_data->interface[node], mc->interface, old_data->interface_size);
 
 	mc->data = new_data;
 	mc->data_was_deleted = 0;
@@ -383,6 +385,7 @@ static size_t reclaim_memory(uint32_t node, size_t toreclaim __attribute__ ((unu
 
 		mem_chunk_list_erase(mc_list_to_free[node], mc);
 
+		free(mc->interface);
 		mem_chunk_delete(mc);
 	}
 
@@ -429,7 +432,11 @@ static void register_mem_chunk(data_state *state, uint32_t dst_node, size_t size
 
 	/* the interface was already filled by ops->allocate_data_on_node */
 	void *src_interface = starpu_data_get_interface_on_node(state, dst_node);
-	memcpy(&mc->interface, src_interface, sizeof(starpu_data_interface_t));
+
+	mc->interface = malloc(state->interface_size);
+	STARPU_ASSERT(mc->interface);
+
+	memcpy(mc->interface, src_interface, state->interface_size);
 
 	res = pthread_rwlock_wrlock(&mc_rwlock[dst_node]);
 	STARPU_ASSERT(!res);
@@ -495,7 +502,7 @@ static size_t liberate_memory_on_node(mem_chunk_t mc, uint32_t node)
 	{
 		STARPU_ASSERT(state->per_node[node].allocated);
 
-		mc->ops->liberate_data_on_node(&mc->interface, node);
+		mc->ops->liberate_data_on_node(mc->interface, node);
 
 		if (!mc->data_was_deleted)
 		{

+ 1 - 1
src/datawizard/memalloc.h

@@ -38,7 +38,7 @@ LIST_TYPE(mem_chunk,
 	 * because when a data is deleted, the memory chunk remains.
 	 */
 	struct data_interface_ops_t *ops;
-	starpu_data_interface_t interface;
+	void *interface;
 	unsigned automatically_allocated;
 	unsigned data_was_deleted;
 )