Prechádzať zdrojové kódy

Various cleanups in the data management code, start to outline an API to
manipulate/declare filters.

Cédric Augonnet 15 rokov pred
rodič
commit
51fe1b1897

+ 17 - 14
include/starpu-data-filters.h

@@ -17,42 +17,45 @@
 #ifndef __STARPU_DATA_FILTERS_H__
 #define __STARPU_DATA_FILTERS_H__
 
+#include <starpu.h>
 #include <starpu_config.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct starpu_data_state_t;
-
 typedef struct starpu_filter_t {
-	unsigned (*filter_func)(struct starpu_filter_t *, struct starpu_data_state_t *); /* the actual partitionning function */
+	unsigned (*filter_func)(struct starpu_filter_t *, starpu_data_handle); /* the actual partitionning function */
 	uint32_t filter_arg;
 	void *filter_arg_ptr;
 } starpu_filter;
 
-void starpu_partition_data(struct starpu_data_state_t *initial_data, starpu_filter *f); 
-void starpu_unpartition_data(struct starpu_data_state_t *root_data, uint32_t gathering_node);
+void starpu_partition_data(starpu_data_handle initial_data, starpu_filter *f); 
+void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
+
+void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren);
+
+starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i);
 
 /* unsigned list */
-struct starpu_data_state_t *get_sub_data(struct starpu_data_state_t *root_data, unsigned depth, ... );
+starpu_data_handle get_sub_data(starpu_data_handle root_data, unsigned depth, ... );
 
 /* starpu_filter * list */
-void starpu_map_filters(struct starpu_data_state_t *root_data, unsigned nfilters, ...);
+void starpu_map_filters(starpu_data_handle root_data, unsigned nfilters, ...);
 
 /* a few examples of filters */
 
 /* for BCSR */
-unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f, struct starpu_data_state_t *root_data);
-unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, struct starpu_data_state_t *root_data);
+unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f, starpu_data_handle root_data);
+unsigned starpu_vertical_block_filter_func_csr(starpu_filter *f, starpu_data_handle root_data);
 /* (filters for BLAS interface) */
-unsigned starpu_block_filter_func(starpu_filter *f, struct starpu_data_state_t *root_data);
-unsigned starpu_vertical_block_filter_func(starpu_filter *f, struct starpu_data_state_t *root_data);
+unsigned starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_data);
+unsigned starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root_data);
 
 /* for vector */
-unsigned starpu_block_filter_func_vector(starpu_filter *f, struct starpu_data_state_t *root_data);
-unsigned starpu_list_filter_func_vector(starpu_filter *f, struct starpu_data_state_t *root_data);
-unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, struct starpu_data_state_t *root_data);
+unsigned starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_data);
+unsigned starpu_list_filter_func_vector(starpu_filter *f, starpu_data_handle root_data);
+unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, starpu_data_handle root_data);
 
 #ifdef __cplusplus
 }

+ 3 - 3
include/starpu-data-interfaces.h

@@ -18,13 +18,13 @@
 #define __STARPU_DATA_INTERFACES_H__
 
 #include <starpu.h>
+#include <starpu-data.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct starpu_data_state_t;
-typedef struct starpu_data_state_t * starpu_data_handle;
+void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node);
 
 /* BLAS interface for dense matrices */
 typedef struct starpu_blas_interface_s {
@@ -102,7 +102,7 @@ uint32_t starpu_get_csr_firstentry(starpu_data_handle handle);
 uintptr_t starpu_get_csr_local_nzval(starpu_data_handle handle);
 uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle);
 uint32_t *starpu_get_csr_local_rowptr(starpu_data_handle handle);
-size_t starpu_get_csr_elemsize(struct starpu_data_state_t *state);
+size_t starpu_get_csr_elemsize(starpu_data_handle handle);
 
 /* CSC interface for sparse matrices (compressed sparse column representation) */
 typedef struct starpu_csc_interface_s {

+ 13 - 11
include/starpu-data.h

@@ -17,16 +17,18 @@
 #ifndef __STARPU_DATA_H__
 #define __STARPU_DATA_H__
 
+#include <starpu.h>
 #include <starpu_config.h>
+
+struct starpu_data_state_t;
+typedef struct starpu_data_state_t * starpu_data_handle;
+
 #include <starpu-data-interfaces.h>
 #include <starpu-data-filters.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-
-struct starpu_data_state_t;
-
 typedef enum {
 	STARPU_R,
 	STARPU_W,
@@ -38,20 +40,20 @@ typedef struct starpu_buffer_descr_t {
 	starpu_access_mode mode;
 } starpu_buffer_descr;
 
-void starpu_unpartition_data(struct starpu_data_state_t *root_data, uint32_t gathering_node);
-void starpu_delete_data(struct starpu_data_state_t *state);
+void starpu_unpartition_data(starpu_data_handle root_data, uint32_t gathering_node);
+void starpu_delete_data(starpu_data_handle state);
 
-void starpu_advise_if_data_is_important(struct starpu_data_state_t *state, unsigned is_important);
+void starpu_advise_if_data_is_important(starpu_data_handle state, unsigned is_important);
 
-int starpu_sync_data_with_mem(struct starpu_data_state_t *state, starpu_access_mode mode);
-void starpu_release_data_from_mem(struct starpu_data_state_t *state);
+int starpu_sync_data_with_mem(starpu_data_handle state, starpu_access_mode mode);
+void starpu_release_data_from_mem(starpu_data_handle state);
 
 int starpu_malloc_pinned_if_possible(void **A, size_t dim);
 int starpu_free_pinned_if_possible(void *A);
 
-int starpu_request_data_allocation(struct starpu_data_state_t *state, uint32_t node);
+int starpu_request_data_allocation(starpu_data_handle state, uint32_t node);
 
-int starpu_prefetch_data_on_node(struct starpu_data_state_t *state, unsigned node, unsigned async);
+int starpu_prefetch_data_on_node(starpu_data_handle state, unsigned node, unsigned async);
 
 unsigned starpu_get_worker_memory_node(unsigned workerid);
 
@@ -59,7 +61,7 @@ unsigned starpu_get_worker_memory_node(unsigned workerid);
  * that when it is modified, it is automatically transfered into those memory
  * node. For instance a (1<<0) write-back mask means that the CUDA workers will
  * commit their changes in main memory (node 0). */
-void starpu_data_set_wb_mask(struct starpu_data_state_t *state, uint32_t wb_mask);
+void starpu_data_set_wb_mask(starpu_data_handle state, uint32_t wb_mask);
 
 #ifdef __cplusplus
 }

+ 17 - 0
src/datawizard/hierarchy.c

@@ -127,6 +127,14 @@ void starpu_map_filters(data_state *root_data, unsigned nfilters, ...)
 /*
  * example get_sub_data(data_state *root_data, 3, 42, 0, 1);
  */
+starpu_data_handle starpu_data_get_child(starpu_data_handle handle, unsigned i)
+{
+#warning TODO nchildren should not be an int
+	STARPU_ASSERT(i < (unsigned)handle->nchildren);
+
+	return &handle->children[i];
+}
+
 data_state *get_sub_data(data_state *root_data, unsigned depth, ... )
 {
 	STARPU_ASSERT(root_data);
@@ -292,6 +300,7 @@ void starpu_unpartition_data(data_state *root_data, uint32_t gathering_node)
 	starpu_spin_unlock(&root_data->header_lock);
 }
 
+/* TODO move ! */
 void starpu_advise_if_data_is_important(data_state *state, unsigned is_important)
 {
 
@@ -312,3 +321,11 @@ void starpu_advise_if_data_is_important(data_state *state, unsigned is_important
 	starpu_spin_unlock(&state->header_lock);
 
 }
+
+void starpu_data_create_children(starpu_data_handle handle, unsigned nchildren)
+{
+	handle->children = calloc(nchildren, sizeof(data_state));
+	STARPU_ASSERT(handle->children);
+
+	handle->nchildren = nchildren;
+}

+ 4 - 5
src/datawizard/interfaces/bcsr_filters.c

@@ -37,8 +37,7 @@ unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unus
 	nchunks = nnz;
 	
 	/* first allocate the children data_state */
-	root_data->children = calloc(nchunks, sizeof(data_state));
-	STARPU_ASSERT(root_data->children);
+	starpu_data_create_children(root_data, nchunks);
 
 	/* actually create all the chunks */
 
@@ -49,12 +48,13 @@ unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unus
 	unsigned chunk;
 	for (chunk = 0; chunk < nchunks; chunk++)
 	{
+		starpu_data_handle sub_handle = starpu_data_get_child(root_data, chunk);
 		uint32_t ptr_offset = c*r*chunk*elemsize;
 
 		unsigned node;
 		for (node = 0; node < MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local = &root_data->children[chunk].interface[node].blas;
+			starpu_blas_interface_t *local = &sub_handle->interface[node].blas;
 
 			local->nx = c;
 			local->ny = r;
@@ -67,8 +67,7 @@ unsigned starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unus
 			}
 		}
 
-		struct starpu_data_state_t *state = &root_data->children[chunk];
-		state->ops = &interface_blas_ops;
+		sub_handle->ops = &interface_blas_ops;
 	}
 
 	return nchunks;

+ 8 - 1
src/datawizard/interfaces/data_interface.c

@@ -16,7 +16,14 @@
 
 #include <datawizard/datawizard.h>
 
-unsigned starpu_get_handle_interface_id(data_state *handle)
+unsigned starpu_get_handle_interface_id(starpu_data_handle handle)
 {
 	return handle->ops->interfaceid;
 }
+
+void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node)
+{
+	return &handle->interface[memory_node];
+}
+
+/* register data interface ? (do we need to register ?) descr =  type enum, required to get an id !  */

+ 48 - 17
src/datawizard/interfaces/vector_filters.c

@@ -23,7 +23,9 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 	unsigned nchunks;
 	uint32_t arg = f->filter_arg;
 
-	starpu_vector_interface_t *vector_root = &root_data->interface[0].vector;
+	starpu_vector_interface_t *vector_root =
+		starpu_data_get_interface_on_node(root_data, 0);
+
 	uint32_t nx = vector_root->nx;
 	size_t elemsize = vector_root->elemsize;
 
@@ -31,8 +33,7 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 	nchunks = STARPU_MIN(nx, arg);
 
 	/* first allocate the children data_state */
-	root_data->children = calloc(nchunks, sizeof(data_state));
-	STARPU_ASSERT(root_data->children);
+	starpu_data_create_children(root_data, nchunks);
 
 	/* actually create all the chunks */
 	unsigned chunk;
@@ -44,16 +45,23 @@ unsigned starpu_block_filter_func_vector(starpu_filter *f, data_state *root_data
 		uint32_t child_nx = 
 			STARPU_MIN(chunk_size, nx - chunk*chunk_size);
 
+		starpu_data_handle chunk_handle =
+			starpu_data_get_child(root_data, chunk);
+
 		unsigned node;
 		for (node = 0; node < MAXNODES; node++)
 		{
-			starpu_vector_interface_t *local = &root_data->children[chunk].interface[node].vector;
+			starpu_vector_interface_t *local =
+				starpu_data_get_interface_on_node(chunk_handle, node);
 
 			local->nx = child_nx;
 			local->elemsize = elemsize;
 
 			if (root_data->per_node[node].allocated) {
-				local->ptr = root_data->interface[node].vector.ptr + offset;
+				starpu_vector_interface_t *local_root =
+					starpu_data_get_interface_on_node(root_data, node);
+
+				local->ptr = local_root->ptr + offset;
 			}
 		}
 	}
@@ -66,38 +74,53 @@ unsigned starpu_divide_in_2_filter_func_vector(starpu_filter *f, data_state *roo
 {
 	uint32_t length_first = f->filter_arg;
 
-	starpu_vector_interface_t *vector_root = &root_data->interface[0].vector;
+	starpu_vector_interface_t *vector_root =
+		starpu_data_get_interface_on_node(root_data, 0);
+
 	uint32_t nx = vector_root->nx;
 	size_t elemsize = vector_root->elemsize;
 
 	/* first allocate the children data_state */
-	root_data->children = calloc(2, sizeof(data_state));
-	STARPU_ASSERT(root_data->children);
+	starpu_data_create_children(root_data, 2);
 
 	STARPU_ASSERT(length_first < nx);
 
+	starpu_data_handle chunk0_handle =
+		starpu_data_get_child(root_data, 0);
+
 	unsigned node;
 	for (node = 0; node < MAXNODES; node++)
 	{
-		starpu_vector_interface_t *local = &root_data->children[0].interface[node].vector;
+		starpu_vector_interface_t *local =
+			starpu_data_get_interface_on_node(chunk0_handle, node);
 
 		local->nx = length_first;
 		local->elemsize = elemsize;
 
 		if (root_data->per_node[node].allocated) {
-			local->ptr = root_data->interface[node].vector.ptr;
+			starpu_vector_interface_t *local_root =
+				starpu_data_get_interface_on_node(root_data, node);
+
+			local->ptr = local_root->ptr;
 		}
 	}
 
+	starpu_data_handle chunk1_handle =
+		starpu_data_get_child(root_data, 1);
+
 	for (node = 0; node < MAXNODES; node++)
 	{
-		starpu_vector_interface_t *local = &root_data->children[1].interface[node].vector;
+		starpu_vector_interface_t *local =
+			starpu_data_get_interface_on_node(chunk1_handle, node);
 
 		local->nx = nx - length_first;
 		local->elemsize = elemsize;
 
 		if (root_data->per_node[node].allocated) {
-			local->ptr = root_data->interface[node].vector.ptr + length_first*elemsize;
+			starpu_vector_interface_t *local_root =
+				starpu_data_get_interface_on_node(root_data, node);
+
+			local->ptr = local_root->ptr + length_first*elemsize;
 		}
 	}
 
@@ -109,13 +132,14 @@ unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
 	uint32_t nchunks = f->filter_arg;
 	uint32_t *length_tab = f->filter_arg_ptr;
 
-	starpu_vector_interface_t *vector_root = &root_data->interface[0].vector;
+	starpu_vector_interface_t *vector_root =
+		starpu_data_get_interface_on_node(root_data, 0);
+
 	uint32_t nx = vector_root->nx;
 	size_t elemsize = vector_root->elemsize;
 
 	/* first allocate the children data_state */
-	root_data->children = calloc(nchunks, sizeof(data_state));
-	STARPU_ASSERT(root_data->children);
+	starpu_data_create_children(root_data, nchunks);
 
 	unsigned current_pos = 0;
 
@@ -123,18 +147,25 @@ unsigned starpu_list_filter_func_vector(starpu_filter *f, data_state *root_data)
 	unsigned chunk;
 	for (chunk = 0; chunk < nchunks; chunk++)
 	{
+		starpu_data_handle chunk_handle =
+			starpu_data_get_child(root_data, chunk);
+
 		uint32_t chunk_size = length_tab[chunk];
 
 		unsigned node;
 		for (node = 0; node < MAXNODES; node++)
 		{
-			starpu_vector_interface_t *local = &root_data->children[chunk].interface[node].vector;
+			starpu_vector_interface_t *local =
+				starpu_data_get_interface_on_node(chunk_handle, node);
 
 			local->nx = chunk_size;
 			local->elemsize = elemsize;
 
 			if (root_data->per_node[node].allocated) {
-				local->ptr = root_data->interface[node].vector.ptr + current_pos*elemsize;
+				starpu_vector_interface_t *local_root =
+					starpu_data_get_interface_on_node(root_data, node);
+
+				local->ptr = local_root->ptr + current_pos*elemsize;
 			}
 		}