Browse Source

Fix checking the mpi cholesky decomposition: we need to get back all data on node 0 for the check

Samuel Thibault 5 years ago
parent
commit
a2c129f8e4

+ 2 - 1
mpi/examples/matrix_decomposition/mpi_cholesky.c

@@ -63,7 +63,8 @@ int main(int argc, char **argv)
 #ifndef STARPU_SIMGRID
 	matrix_display(bmat, rank);
 
-	dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops, 0.001);
+	if (check)
+		dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops, 0.001);
 #endif
 
 	matrix_free(&bmat, rank, nodes, 1);

+ 3 - 0
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -170,6 +170,9 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 	{
 		for (y = 0; y < nblocks; y++)
 		{
+			/* Get back data on node 0 for the potential check */
+			starpu_mpi_get_data_on_node(MPI_COMM_WORLD, data_handles[x][y], 0);
+
 			if (data_handles[x][y])
 				starpu_data_unregister(data_handles[x][y]);
 		}

+ 6 - 12
mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c

@@ -45,6 +45,12 @@ int main(int argc, char **argv)
 
 	parse_args(argc, argv, nodes);
 
+	if (check)
+	{
+		fprintf(stderr,"can't check in distributed mode\n");
+		check = 0;
+	}
+
 	matrix_init(&bmat, rank, nodes, 0);
 
 	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);
@@ -52,20 +58,8 @@ int main(int argc, char **argv)
 	starpu_cublas_shutdown();
 	starpu_mpi_shutdown();
 
-#ifndef STARPU_SIMGRID
-	if (rank == 0)
-	{
-		matrix_display(bmat, rank);
-
-		dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops, 0.001);
-	}
-#endif
 	matrix_free(&bmat, rank, nodes, 0);
 
-#ifndef STARPU_SIMGRID
-	assert(correctness);
-#endif
-
 	if (rank == 0)
 	{
 		FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000);

+ 8 - 2
mpi/examples/matrix_decomposition/mpi_decomposition_params.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2013,2015-2017                      CNRS
- * Copyright (C) 2009,2010,2014-2017                      Université de Bordeaux
+ * Copyright (C) 2009,2010,2014-2017,2020                 Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,6 +35,7 @@ unsigned nblocks = 16;
 unsigned nbigblocks = 2;
 #endif
 unsigned noprio = 0;
+unsigned check = 0;
 unsigned display = 0;
 int dblockx = -1;
 int dblocky = -1;
@@ -79,6 +80,11 @@ void parse_args(int argc, char **argv, int nodes)
                         noprio = 1;
                 }
 
+                if (strcmp(argv[i], "-check") == 0)
+                {
+                        check = 1;
+                }
+
                 if (strcmp(argv[i], "-display") == 0)
                 {
                         display = 1;
@@ -86,7 +92,7 @@ void parse_args(int argc, char **argv, int nodes)
 
                 if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
                 {
-			printf("usage : %s [-size size] [-nblocks nblocks] [-no-prio] [-display]\n", argv[0]);
+			printf("usage : %s [-size size] [-nblocks nblocks] [-no-prio] [-display] [-check]\n", argv[0]);
                 }
         }
 

+ 2 - 1
mpi/examples/matrix_decomposition/mpi_decomposition_params.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2009,2010,2014                           Université de Bordeaux
+ * Copyright (C) 2009,2010,2014,2020                      Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -24,6 +24,7 @@ extern unsigned size;
 extern unsigned nblocks;
 extern unsigned nbigblocks;
 extern unsigned noprio;
+extern unsigned check;
 extern unsigned display;
 extern int dblockx;
 extern int dblocky;