Browse Source

Fix the block filter functions.

For some values of N and NCHUNKS, some weird results were returned.
Cyril Roelandt 13 years ago
parent
commit
82cab64103

+ 33 - 0
src/datawizard/filters.c

@@ -446,3 +446,36 @@ static void starpu_data_create_children(starpu_data_handle_t handle, unsigned nc
 	/* this handle now has children */
 	handle->nchildren = nchildren;
 }
+
+/*
+ * Given an integer N, NPARTS the number of parts it must be divided in, ID the
+ * part currently considered, determines the CHUNK_SIZE and the OFFSET, taking
+ * into account the size of the elements stored in the data structure ELEMSIZE
+ * and LD, the leading dimension.
+ */
+void
+_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts,
+					     size_t elemsize, unsigned id,
+					     unsigned ld, unsigned *chunk_size,
+					     size_t *offset)
+{
+	*chunk_size = n/nparts;
+	unsigned remainder = n % nparts;
+	if (id < remainder)
+		(*chunk_size)++;
+	/*
+	 * Computing the total offset. The formula may not be really clear, but
+	 * it really just is:
+	 *
+	 * total = 0;
+	 * for (i = 0; i < id; i++)
+	 * {
+	 * 	total += n/nparts;
+	 * 	if (i < n%nparts)
+	 *		total++;
+	 * }
+	 * offset = total * elemsize * ld;
+	 */
+	if (offset != NULL)
+		*offset = (id *(n/nparts) + STARPU_MIN(remainder, id)) * ld * elemsize;
+}

+ 5 - 0
src/datawizard/filters.h

@@ -25,4 +25,9 @@
 #include <starpu.h>
 #include <common/config.h>
 
+void
+_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts,
+					     size_t elemsize, unsigned id,
+					     unsigned ld, unsigned *chunk_size,
+					     size_t *offset);
 #endif

+ 31 - 26
src/datawizard/interfaces/block_filters.c

@@ -17,6 +17,7 @@
 #include <starpu.h>
 #include <common/config.h>
 #include <datawizard/filters.h>
+#include <datawizard/interfaces/filter_utils.h>
 
 void starpu_block_filter_func_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
                                     unsigned id, unsigned nparts)
@@ -31,12 +32,10 @@ void starpu_block_filter_func_block(void *father_interface, void *child_interfac
 
 	STARPU_ASSERT(nparts <= nx);
 
-	uint32_t chunk_size = nx/nparts;
-	size_t offset = id*chunk_size*elemsize;
-
-	/* The last submatrix contains the remaining columns. */
-	if (STARPU_UNLIKELY(id + 1 == nparts))
-		chunk_size += (nx % nparts);
+	uint32_t chunk_size;
+	size_t offset;
+	_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1,
+				       &chunk_size, &offset);
 
 	block_child->nx = chunk_size;
 	block_child->ny = ny;
@@ -70,12 +69,13 @@ void starpu_block_shadow_filter_func_block(void *father_interface, void *child_i
 
 	STARPU_ASSERT(nparts <= nx);
 
-	uint32_t chunk_size = (nx + nparts - 1)/nparts;
-	size_t offset = id*chunk_size*elemsize;
-
-        uint32_t child_nx = STARPU_MIN(chunk_size, nx - id*chunk_size) + 2 * shadow_size;
+	uint32_t child_nx;
+	size_t offset;
+	_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1,
+						     &child_nx, &offset);
+	
 
-	block_child->nx = child_nx;
+	block_child->nx = child_nx + 2 * shadow_size;
 	block_child->ny = ny;
 	block_child->nz = nz;
 	block_child->elemsize = elemsize;
@@ -104,10 +104,10 @@ void starpu_vertical_block_filter_func_block(void *father_interface, void *child
 
 	STARPU_ASSERT(nparts <= ny);
 
-	uint32_t chunk_size = (ny + nparts - 1)/nparts;
-	size_t offset = id*chunk_size*block_father->ldy*elemsize;
-
-        uint32_t child_ny = STARPU_MIN(chunk_size, ny - id*chunk_size);
+	uint32_t child_ny;
+	size_t offset;
+	_filter_nparts_compute_chunk_size_and_offset(ny, nparts, elemsize, id, block_father->ldy,
+				       &child_ny, &offset);
 
 	block_child->nx = nx;
 	block_child->ny = child_ny;
@@ -141,13 +141,15 @@ void starpu_vertical_block_shadow_filter_func_block(void *father_interface, void
 
 	STARPU_ASSERT(nparts <= ny);
 
-	uint32_t chunk_size = (ny + nparts - 1)/nparts;
-	size_t offset = id*chunk_size*block_father->ldy*elemsize;
+	uint32_t child_ny;
+	size_t offset;
 
-        uint32_t child_ny = STARPU_MIN(chunk_size, ny - id*chunk_size) + 2 * shadow_size;
+	_filter_nparts_compute_chunk_size_and_offset(ny, nparts, elemsize, id,
+						     block_father->ldy,
+						     &child_ny, &offset);
 
 	block_child->nx = nx;
-	block_child->ny = child_ny;
+	block_child->ny = child_ny + 2 * shadow_size;
 	block_child->nz = nz;
 	block_child->elemsize = elemsize;
 
@@ -175,10 +177,11 @@ void starpu_depth_block_filter_func_block(void *father_interface, void *child_in
 
 	STARPU_ASSERT(nparts <= nz);
 
-	uint32_t chunk_size = (nz + nparts - 1)/nparts;
-	size_t offset = id*chunk_size*block_father->ldz*elemsize;
+	uint32_t child_nz;
+	size_t offset;
 
-        uint32_t child_nz = STARPU_MIN(chunk_size, nz - id*chunk_size);
+	_filter_nparts_compute_chunk_size_and_offset(nz, nparts, elemsize, id,
+				       block_father->ldz, &child_nz, &offset);
 
 	block_child->nx = nx;
 	block_child->ny = ny;
@@ -212,14 +215,16 @@ void starpu_depth_block_shadow_filter_func_block(void *father_interface, void *c
 
 	STARPU_ASSERT(nparts <= nz);
 
-	uint32_t chunk_size = (nz + nparts - 1)/nparts;
-	size_t offset = id*chunk_size*block_father->ldz*elemsize;
+	uint32_t child_nz;
+	size_t offset;
 
-        uint32_t child_nz = STARPU_MIN(chunk_size, nz - id*chunk_size) + 2 * shadow_size;
+	_filter_nparts_compute_chunk_size_and_offset(nz, nparts, elemsize, id,
+						     block_father->ldz,
+						     &child_nz, &offset);
 
 	block_child->nx = nx;
 	block_child->ny = ny;
-	block_child->nz = child_nz;
+	block_child->nz = child_nz + 2 * shadow_size;
 	block_child->elemsize = elemsize;
 
 	if (block_father->dev_handle)

+ 5 - 0
src/datawizard/interfaces/csr_filters.c

@@ -38,6 +38,11 @@ void starpu_vertical_block_filter_func_csr(void *father_interface, void *child_i
 
 	uint32_t child_nrow =
 	  STARPU_MIN(chunk_size, nrow - id*chunk_size);
+	/* TODO: the formula for the chunk size is probably wrong: we should
+	 * probably do this instead, and write a test.
+	_filter_nparts_compute_chunk_size_and_offset(nrow, nparts, elemsize,
+						     id, 1, &chunk_size, NULL);
+	 */
 
 	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index];
 

+ 23 - 16
src/datawizard/interfaces/matrix_filters.c

@@ -34,11 +34,11 @@ void starpu_block_filter_func(void *father_interface, void *child_interface, STA
 
 	STARPU_ASSERT(nchunks <= nx);
 
-	size_t chunk_size = ((size_t)nx + nchunks - 1)/nchunks;
-	size_t offset = (size_t)id*chunk_size*elemsize;
+	uint32_t child_nx;
+	size_t offset;
 
-	uint32_t child_nx =
-	  STARPU_MIN(chunk_size, (size_t)nx - (size_t)id*chunk_size);
+	_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
+						     &child_nx, &offset);
 
 	/* update the child's interface */
 	matrix_child->nx = child_nx;
@@ -73,11 +73,13 @@ void starpu_block_shadow_filter_func(void *father_interface, void *child_interfa
 
 	STARPU_ASSERT(nchunks <= nx);
 
-	size_t chunk_size = ((size_t)nx + nchunks - 1)/nchunks;
-	size_t offset = (size_t)id*chunk_size*elemsize;
+	uint32_t child_nx;
+	size_t offset;
 
-	uint32_t child_nx =
-	  STARPU_MIN(chunk_size, (size_t)nx - (size_t)id*chunk_size) + 2 * shadow_size;
+	_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
+						     &child_nx, &offset);
+
+	child_nx += 2 * shadow_size;
 
 	/* update the child's interface */
 	matrix_child->nx = child_nx;
@@ -106,9 +108,12 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 
 	STARPU_ASSERT(nchunks <= ny);
 
-	size_t chunk_size = ((size_t)ny + nchunks - 1)/nchunks;
-	size_t child_ny =
-	  STARPU_MIN(chunk_size, (size_t)ny - (size_t)id*chunk_size);
+	uint32_t child_ny;
+	size_t offset;
+
+	_filter_nparts_compute_chunk_size_and_offset(ny, nchunks, elemsize, id,
+						     matrix_father->ld,
+						     &child_ny, &offset);
 
 	matrix_child->nx = nx;
 	matrix_child->ny = child_ny;
@@ -117,7 +122,6 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 	/* is the information on this node valid ? */
 	if (matrix_father->dev_handle)
 	{
-		size_t offset = (size_t)id*chunk_size*matrix_father->ld*elemsize;
 		if (matrix_father->ptr)
 			matrix_child->ptr = matrix_father->ptr + offset;
 		matrix_child->ld = matrix_father->ld;
@@ -140,9 +144,13 @@ void starpu_vertical_block_shadow_filter_func(void *father_interface, void *chil
 
 	STARPU_ASSERT(nchunks <= ny);
 
-	size_t chunk_size = ((size_t)ny + nchunks - 1)/nchunks;
-	size_t child_ny =
-	  STARPU_MIN(chunk_size, (size_t)ny - (size_t)id*chunk_size) + 2 * shadow_size;
+	uint32_t child_ny;
+	size_t offset;
+
+	_filter_nparts_compute_chunk_size_and_offset(ny, nchunks, elemsize, id,
+						     matrix_father->ld,
+						     &child_ny, &offset);
+	child_ny += 2 * shadow_size;
 
 	matrix_child->nx = nx;
 	matrix_child->ny = child_ny;
@@ -151,7 +159,6 @@ void starpu_vertical_block_shadow_filter_func(void *father_interface, void *chil
 	/* is the information on this node valid ? */
 	if (matrix_father->dev_handle)
 	{
-		size_t offset = (size_t)id*chunk_size*matrix_father->ld*elemsize;
 		if (matrix_father->ptr)
 			matrix_child->ptr = matrix_father->ptr + offset;
 		matrix_child->ld = matrix_father->ld;

+ 10 - 10
src/datawizard/interfaces/vector_filters.c

@@ -19,6 +19,7 @@
 #include <starpu.h>
 #include <common/config.h>
 #include <datawizard/filters.h>
+#include <datawizard/interfaces/filter_utils.h>
 
 void starpu_block_filter_func_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks)
 {
@@ -30,11 +31,10 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 
 	STARPU_ASSERT(nchunks <= nx);
 
-	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
-	size_t offset = id*chunk_size*elemsize;
-
-	uint32_t child_nx =
-	  STARPU_MIN(chunk_size, nx - id*chunk_size);
+	uint32_t child_nx;
+	size_t offset;
+	_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
+						     &child_nx, &offset);
 
 	vector_child->nx = child_nx;
 	vector_child->elemsize = elemsize;
@@ -62,11 +62,11 @@ void starpu_block_shadow_filter_func_vector(void *father_interface, void *child_
 
 	STARPU_ASSERT(nchunks <= nx);
 
-	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
-	size_t offset = id*chunk_size*elemsize;
-
-	uint32_t child_nx =
-	  STARPU_MIN(chunk_size, nx - id*chunk_size) + 2 * shadow_size;
+	uint32_t child_nx;
+	size_t offset;
+	_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
+						     &child_nx, &offset);
+	child_nx += 2*shadow_size;
 
 	vector_child->nx = child_nx;
 	vector_child->elemsize = elemsize;