Sfoglia il codice sorgente

mpi cholesky: Simplify column-wise algorithm

Samuel Thibault 4 anni fa
parent
commit
c4799d3b9d
1 ha cambiato i file con 36 aggiunte e 26 eliminazioni
  1. 36 26
      mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

+ 36 - 26
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -189,8 +189,36 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 	{
 		starpu_iteration_push(n);
 
+		/* First handle the diagonal block */
 		/* Row */
-		for (m = n; m<nblocks; m++)
+		m = n;
+
+		for (k = 0; k < n; k++)
+		{
+			/* Accumulate updates from TRSMs */
+			starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
+					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+					       STARPU_R, data_handles[n][k],
+					       STARPU_R, data_handles[m][k],
+					       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+					       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+					       0);
+
+			/* Nobody else will need it */
+			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
+			starpu_data_wont_use(data_handles[m][k]);
+		}
+
+		k = n;
+		/* Factorize */
+		starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
+				       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
+				       STARPU_RW, data_handles[k][k],
+				       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
+				       0);
+
+		/* Row */
+		for (m = n + 1; m<nblocks; m++)
 		{
 			for (k = 0; k < n; k++)
 			{
@@ -203,34 +231,16 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
 						       STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
 						       0);
 
-				if (m == n)
-				{
-					/* Nobody else will need it */
-					starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
-					starpu_data_wont_use(data_handles[m][k]);
-				}
 			}
 			k = n;
-			if (m > n)
-			{
-				/* non-diagonal block, solve */
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
-						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-						       STARPU_R, data_handles[k][k],
-						       STARPU_RW, data_handles[m][k],
-						       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
-						       0);
-			}
-			else
-			{
-				/* diagonal block, factorize */
-				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
-						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
-						       STARPU_RW, data_handles[k][k],
-						       STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
-						       0);
+			/* Solve */
+			starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
+					       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+					       STARPU_R, data_handles[k][k],
+					       STARPU_RW, data_handles[m][k],
+					       STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
+					       0);
 			}
-
 		}
 
 		/* We won't need it any more */