Browse Source

mpi_cholesky: Add progressive flushing for columns, antidiagonals, and prios

Samuel Thibault 5 years ago
parent
commit
943339c07e
1 changed files with 59 additions and 25 deletions
  1. 59 25
      mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

+ 59 - 25
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -180,7 +180,7 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 /* TODO: generate from compiler polyhedral analysis of classical algorithm */
 /* TODO: generate from compiler polyhedral analysis of classical algorithm */
 static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, int nodes)
 static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, int nodes)
 {
 {
-	unsigned k, m, n;
+	unsigned k, m, n, i;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
 	unsigned nn = size/nblocks;
 	unsigned nn = size/nblocks;
 
 
@@ -202,6 +202,13 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 						       0);
+
+				if (m == n)
+				{
+					/* Nobody else will need it */
+					starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
+					starpu_data_wont_use(data_handles[m][k]);
+				}
 			}
 			}
 			k = n;
 			k = n;
 			if (m > n)
 			if (m > n)
@@ -223,23 +230,22 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       0);
 						       0);
 			}
 			}
+
 		}
 		}
 
 
+		/* We won't need it any more */
+		starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]);
+		starpu_data_wont_use(data_handles[n][n]);
+
 		starpu_iteration_pop();
 		starpu_iteration_pop();
 	}
 	}
-
-	/* Submit flushes, StarPU will fit them according to the progress */
-	starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD);
-	for (m = 0; m < nblocks; m++)
-		for (n = 0; n < nblocks ; n++)
-			starpu_data_wont_use(data_handles[m][n]);
 }
 }
 
 
 /* TODO: generate from compiler polyhedral analysis of classical algorithm */
 /* TODO: generate from compiler polyhedral analysis of classical algorithm */
 static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int rank, int nodes)
 static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int rank, int nodes)
 {
 {
 	unsigned a, c;
 	unsigned a, c;
-	unsigned k, m, n;
+	unsigned k, m, n, i;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
 	unsigned nn = size/nblocks;
 	unsigned nn = size/nblocks;
 
 
@@ -274,6 +280,13 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 						       0);
+
+				if (m == nblocks-1)
+				{
+					/* Nobody else will need it */
+					starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]);
+					starpu_data_wont_use(data_handles[n][k]);
+				}
 			}
 			}
 
 
 			/* k = n */
 			/* k = n */
@@ -296,6 +309,13 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       0);
 						       0);
 			}
 			}
+
+			if (m == nblocks - 1)
+			{
+				/* We do not need the potrf result any more */
+				starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]);
+				starpu_data_wont_use(data_handles[n][n]);
+			}
 		}
 		}
 
 
 		/* column within second antidiagonal for a */
 		/* column within second antidiagonal for a */
@@ -318,6 +338,13 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 						       0);
+
+				if (m == nblocks-1)
+				{
+					/* Nobody else will need it */
+					starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]);
+					starpu_data_wont_use(data_handles[n][k]);
+				}
 			}
 			}
 			/* non-diagonal block, solve */
 			/* non-diagonal block, solve */
 			k = n;
 			k = n;
@@ -327,16 +354,17 @@ static void run_cholesky_antidiagonal(starpu_data_handle_t **data_handles, int r
 					       STARPU_RW, data_handles[m][k],
 					       STARPU_RW, data_handles[m][k],
 					       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 					       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
 					       0);
 					       0);
+
+			if (m == nblocks - 1)
+			{
+				/* We do not need the potrf result any more */
+				starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]);
+				starpu_data_wont_use(data_handles[n][n]);
+			}
 		}
 		}
 
 
 		starpu_iteration_pop();
 		starpu_iteration_pop();
 	}
 	}
-
-	/* Submit flushes, StarPU will fit them according to the progress */
-	starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD);
-	for (m = 0; m < nblocks; m++)
-		for (n = 0; n < nblocks ; n++)
-			starpu_data_wont_use(data_handles[m][n]);
 }
 }
 
 
 /* TODO: generate from compiler polyhedral analysis of classical algorithm */
 /* TODO: generate from compiler polyhedral analysis of classical algorithm */
@@ -358,14 +386,13 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 	{
 	{
 		starpu_iteration_push(a);
 		starpu_iteration_push(a);
 
 
-		for (k = 0; k < nblocks; k++)
+		for (k = 0; k < (int) nblocks; k++)
 		{
 		{
 			n = k;
 			n = k;
 			/* Should be m = a-k-n; for potrf and trsm to respect
 			/* Should be m = a-k-n; for potrf and trsm to respect
 			   priorities, but needs to be this for dependencies */
 			   priorities, but needs to be this for dependencies */
 			m = a-2*k-n;
 			m = a-2*k-n;
 
 
-
 			if (m == n)
 			if (m == n)
 			{
 			{
 				/* diagonal block, factorize */
 				/* diagonal block, factorize */
@@ -375,7 +402,7 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
 						       0);
 						       0);
 			}
 			}
-			else if (m >= n && m < nblocks)
+			else if (m >= n && m < (int) nblocks)
 			{
 			{
 				/* non-diagonal block, solve */
 				/* non-diagonal block, solve */
 				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
 				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
@@ -386,13 +413,20 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 						       0);
 						       0);
 			}
 			}
 
 
+			if (m == (int) nblocks - 1)
+			{
+				/* We do not need the potrf result any more */
+				starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]);
+				starpu_data_wont_use(data_handles[n][n]);
+			}
+
 			/* column within antidiagonal for a */
 			/* column within antidiagonal for a */
-			for (n = k + 1; n < nblocks; n++)
+			for (n = k + 1; n < (int) nblocks; n++)
 			{
 			{
 				/* row */
 				/* row */
 				m = a-2*k-n;
 				m = a-2*k-n;
 
 
-				if (m >= n && m < nblocks)
+				if (m >= n && m < (int) nblocks)
 				{
 				{
 					/* Update */
 					/* Update */
 					starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
 					starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
@@ -402,6 +436,12 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 							       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 							       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 							       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 							       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 							       0);
 							       0);
+					if (m == (int) nblocks - 1)
+					{
+						/* Nobody else will need it */
+						starpu_data_wont_use(data_handles[n][k]);
+						starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]);
+					}
 				}
 				}
 			}
 			}
 
 
@@ -409,12 +449,6 @@ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank, int
 
 
 		starpu_iteration_pop();
 		starpu_iteration_pop();
 	}
 	}
-
-	/* Submit flushes, StarPU will fit them according to the progress */
-	starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD);
-	for (m = 0; m < nblocks; m++)
-		for (n = 0; n < nblocks ; n++)
-			starpu_data_wont_use(data_handles[m][n]);
 }
 }
 
 
 /*
 /*