|
@@ -189,8 +189,36 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
|
|
|
{
|
|
|
starpu_iteration_push(n);
|
|
|
|
|
|
+
|
|
|
|
|
|
- for (m = n; m<nblocks; m++)
|
|
|
+ m = n;
|
|
|
+
|
|
|
+ for (k = 0; k < n; k++)
|
|
|
+ {
|
|
|
+
|
|
|
+ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
|
|
|
+ STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
|
|
|
+ STARPU_R, data_handles[n][k],
|
|
|
+ STARPU_R, data_handles[m][k],
|
|
|
+ STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
|
|
|
+ STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
|
|
|
+ 0);
|
|
|
+
|
|
|
+
|
|
|
+ starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
|
|
|
+ starpu_data_wont_use(data_handles[m][k]);
|
|
|
+ }
|
|
|
+
|
|
|
+ k = n;
|
|
|
+
|
|
|
+ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
|
|
|
+ STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
|
|
|
+ STARPU_RW, data_handles[k][k],
|
|
|
+ STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
|
|
|
+ 0);
|
|
|
+
|
|
|
+
|
|
|
+ for (m = n + 1; m<nblocks; m++)
|
|
|
{
|
|
|
for (k = 0; k < n; k++)
|
|
|
{
|
|
@@ -203,34 +231,16 @@ static void run_cholesky_column(starpu_data_handle_t **data_handles, int rank, i
|
|
|
STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
|
|
|
0);
|
|
|
|
|
|
- if (m == n)
|
|
|
- {
|
|
|
-
|
|
|
- starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
|
|
|
- starpu_data_wont_use(data_handles[m][k]);
|
|
|
- }
|
|
|
}
|
|
|
k = n;
|
|
|
- if (m > n)
|
|
|
- {
|
|
|
-
|
|
|
- starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
|
|
|
- STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
|
|
|
- STARPU_R, data_handles[k][k],
|
|
|
- STARPU_RW, data_handles[m][k],
|
|
|
- STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
|
|
|
- 0);
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
-
|
|
|
- starpu_mpi_task_insert(MPI_COMM_WORLD, &cl11,
|
|
|
- STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO,
|
|
|
- STARPU_RW, data_handles[k][k],
|
|
|
- STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
|
|
|
- 0);
|
|
|
+
|
|
|
+ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl21,
|
|
|
+ STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
|
|
|
+ STARPU_R, data_handles[k][k],
|
|
|
+ STARPU_RW, data_handles[m][k],
|
|
|
+ STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
|
|
|
+ 0);
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
|
|
|
|