Переглянути джерело

Add starpu_bcsr_filter_block filtering function

Samuel Thibault 5 роки тому
батько
коміт
5f299d82f3

+ 1 - 0
ChangeLog

@@ -42,6 +42,7 @@ Small features:
   * New environment variables STARPU_BUS_STATS_FILE and
     STARPU_WORKER_STATS_FILE to specify files in which to display
     statistics about data transfers and workers.
+  * Add starpu_bcsr_filter_block filtering function.
 
 StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad)
 ====================================================================

+ 4 - 2
doc/doxygen/chapters/310_data_management.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2019                                CNRS
- * Copyright (C) 2009-2011,2014-2019                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2020                      Université de Bordeaux
  * Copyright (C) 2011,2012                                Inria
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -184,7 +184,9 @@ StarPU provides an example on how to deal with such matrices in
 <c>examples/spmv</c>.
 
 BCSR data handles can be partitioned into its dense matrix blocks by using
-starpu_bcsr_filter_canonical_block().
+starpu_bcsr_filter_canonical_block(), or split into other BCSR data handles by
+using starpu_bcsr_filter_block() (but only split along the leading dimension is
+supported, i.e. along adjacent nnz blocks)
 
 \subsection CSRDataInterface CSR Data Interface
 

+ 7 - 0
include/starpu_data_filters.h

@@ -323,6 +323,13 @@ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_inte
 */
 struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(struct starpu_data_filter *f, unsigned child);
 
+/**
+   Partition a block-sparse matrix into block-sparse matrices.
+
+   The split is done along the leading dimension, i.e. along adjacent nnz blocks.
+*/
+void starpu_bcsr_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
 /** @} */
 
 /**

+ 6 - 1
include/starpu_data_interfaces.h

@@ -1801,6 +1801,11 @@ extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
 /**
    BCSR interface for sparse matrices (blocked compressed sparse
    row representation)
+
+   Note: when a BCSR matrix is partitioned, nzval, colind, and rowptr point into
+   the corresponding father arrays. The rowptr content is thus the same as the
+   father's. Firstentry is used to offset this so it becomes valid for the child
+   arrays.
 */
 struct starpu_bcsr_interface
 {
@@ -1809,7 +1814,7 @@ struct starpu_bcsr_interface
 	uint32_t nnz;                     /**< number of non-zero BLOCKS */
 	uint32_t nrow;                    /**< number of rows (in terms of BLOCKS) */
 
-	uintptr_t nzval;                  /**< non-zero values */
+	uintptr_t nzval;                  /**< non-zero values: nnz blocks of r*c elements */
 	uint32_t *colind;                 /**< array of nnz elements, colind[i] is the block-column index for block i in nzval */
 	uint32_t *rowptr;                 /**< array of nrow+1
 					   * elements, rowptr[i] is

+ 41 - 1
src/datawizard/interfaces/bcsr_filters.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2008-2011,2013,2014,2016,2019            Université de Bordeaux
+ * Copyright (C) 2008-2011,2013,2014,2016,2019-2020       Université de Bordeaux
  * Copyright (C) 2010                                     Mehdi Juhoor
  * Copyright (C) 2010,2011,2013,2015,2017,2019            CNRS
  *
@@ -20,6 +20,46 @@
 #include <common/config.h>
 #include <datawizard/filters.h>
 
+void starpu_bcsr_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nparts)
+{
+	struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface;
+	struct starpu_bcsr_interface *bcsr_child = (struct starpu_bcsr_interface *) child_interface;
+
+	size_t elemsize = bcsr_father->elemsize;
+	uint32_t firstentry = bcsr_father->firstentry;
+	uint32_t r = bcsr_father->r;
+	uint32_t c = bcsr_father->c;
+	uint32_t *rowptr = bcsr_father->rowptr;
+
+	unsigned child_nrow;
+	size_t child_rowoffset;
+
+	STARPU_ASSERT_MSG(bcsr_father->id == STARPU_BCSR_INTERFACE_ID, "%s can only be applied on a bcsr data", __func__);
+
+	bcsr_child->id = bcsr_father->id;
+
+	if (!bcsr_father->nzval)
+		/* Not supported yet */
+		return;
+
+	starpu_filter_nparts_compute_chunk_size_and_offset(bcsr_father->nrow, nparts, 1, id, 1, &child_nrow, &child_rowoffset);
+
+	/* child blocks indexes between these (0-based) */
+	uint32_t start_block = rowptr[child_rowoffset] - firstentry;
+	uint32_t end_block = rowptr[child_rowoffset + child_nrow] - firstentry;
+
+	bcsr_child->nzval = bcsr_father->nzval + start_block * r*c * elemsize;
+	bcsr_child->nnz = end_block - start_block;
+	bcsr_child->nrow = child_nrow;
+	bcsr_child->colind = bcsr_father->colind + start_block;
+	bcsr_child->rowptr = rowptr + child_rowoffset;
+
+	bcsr_child->firstentry = firstentry + start_block;
+	bcsr_child->r = bcsr_father->r;
+	bcsr_child->c = bcsr_father->c;
+	bcsr_child->elemsize = elemsize;
+}
+
 void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nparts)
 {
 	struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface;

+ 2 - 1
tests/Makefile.am

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 # Copyright (C) 2010-2018                                Inria
-# Copyright (C) 2009-2019                                Université de Bordeaux
+# Copyright (C) 2009-2020                                Université de Bordeaux
 # Copyright (C) 2010-2017,2019                           CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -255,6 +255,7 @@ myPROGRAMS +=				\
 	datawizard/acquire_release		\
 	datawizard/acquire_release2		\
 	datawizard/acquire_try			\
+	datawizard/bcsr				\
 	datawizard/cache			\
 	datawizard/commute			\
 	datawizard/commute2			\

+ 149 - 0
tests/datawizard/bcsr.c

@@ -0,0 +1,149 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011-2013                                Inria
+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
+ * Copyright (C) 2012,2013,2017,2020                      Université de Bordeaux
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include "../helper.h"
+
+static starpu_data_handle_t bcsr_handle;
+
+void cpu_show_bcsr(void *descr[], void *arg)
+{
+	(void)arg;
+	struct starpu_bcsr_interface *iface = descr[0];
+	uint32_t nnz = iface->nnz;
+	uint32_t nrow = iface->nrow;
+	int *nzval = (int *)iface->nzval;
+	uint32_t *colind = iface->colind;
+	uint32_t *rowptr = iface->rowptr;
+
+	uint32_t firstentry = iface->firstentry;
+	uint32_t r = iface->r;
+	uint32_t c = iface->c;
+	uint32_t elemsize = iface->elemsize;
+
+	uint32_t i, j, y, x;
+	static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+
+	printf("nnz %d elemsize %d\n", nnz, elemsize);
+
+	for (i = 0; i < nrow; i++) {
+		uint32_t row_start = rowptr[i] - firstentry;
+		uint32_t row_end = rowptr[i+1] - firstentry;
+
+		printf("row %d\n", i);
+
+		for (j = row_start; j < row_end; j++)
+		{
+			int *block = nzval + j * r*c;
+
+			printf( " column %d\n", colind[j]);
+
+			for (y = 0; y < r; y++)
+			{
+				for (x = 0; x < c; x++)
+					printf("  %d", block[y*c+x]);
+				printf("\n");
+			}
+		}
+	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+}
+
+
+struct starpu_codelet show_cl =
+{
+	.cpu_funcs = { cpu_show_bcsr },
+	.nbuffers = 1,
+	.modes = { STARPU_R },
+};
+
+/*
+ * In this test, we use the following matrix: 
+ *
+ *   +----------------+
+ *   |  0   1   0   0 |
+ *   |  2   3   0   0 |
+ *   |  4   5   8   9 |
+ *   |  6   7  10  11 |
+ *   +----------------+
+ *
+ * nzval  = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11]
+ * colind = [0, 0, 1]
+ * rowptr = [0, 1, 3 ]
+ * r = c = 2
+ */
+
+/* Size of the blocks */
+#define R              2
+#define C              2
+
+#define NNZ_BLOCKS     3   /* out of 4 */
+#define NZVAL_SIZE     (R*C*NNZ_BLOCKS)
+
+#define NROWS          2
+
+static int nzval[NZVAL_SIZE]  =
+{
+	0, 1, 2, 3,    /* First block  */
+	4, 5, 6, 7,    /* Second block */
+	8, 9, 10, 11   /* Third block  */
+};
+static uint32_t colind[NNZ_BLOCKS] = { 0, 0, 1 };
+
+static uint32_t rowptr[NROWS+1] = { 0, 1, NNZ_BLOCKS };
+
+int main(int argc, char **argv)
+{
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+
+	if (starpu_initialize(&conf, &argc, &argv) == -ENODEV || starpu_cpu_worker_get_count() == 0)
+		return STARPU_TEST_SKIPPED;
+
+	starpu_bcsr_data_register(&bcsr_handle,
+				  STARPU_MAIN_RAM,
+				  NNZ_BLOCKS,
+				  NROWS,
+				  (uintptr_t) nzval,
+				  colind,
+				  rowptr,
+				  0, /* firstentry */
+				  R,
+				  C,
+				  sizeof(nzval[0]));
+
+	starpu_task_insert(&show_cl, STARPU_R, bcsr_handle, 0);
+
+	struct starpu_data_filter filter = {
+		.filter_func = starpu_bcsr_filter_block,
+		.nchildren = 2,
+	};
+	starpu_data_partition(bcsr_handle, &filter);
+
+	starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 0), 0);
+	starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 1), 0);
+	starpu_data_unpartition(bcsr_handle, STARPU_MAIN_RAM);
+
+	starpu_data_unregister(bcsr_handle);
+
+	starpu_shutdown();
+
+	return 0;
+}