|
@@ -20,6 +20,18 @@
|
|
|
#include <limits.h>
|
|
|
#include <math.h>
|
|
|
|
|
|
+int _nodes;
|
|
|
+starpu_mpi_checkpoint_template_t* checkpoint_p;
|
|
|
+
|
|
|
+int backup_function(int rank)
|
|
|
+{
|
|
|
+ if (rank==0)
|
|
|
+ return 1;
|
|
|
+ else
|
|
|
+ return 0;
|
|
|
+// return (rank+1)%_nodes;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Create the codelets
|
|
|
*/
|
|
@@ -63,7 +75,8 @@ static struct starpu_codelet cl22 =
|
|
|
#endif
|
|
|
.cuda_flags = {STARPU_CUDA_ASYNC},
|
|
|
.nbuffers = 3,
|
|
|
- .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
|
|
|
+ .modes = {STARPU_R, STARPU_R, STARPU_RW},
|
|
|
+// .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
|
|
|
.model = &chol_model_22,
|
|
|
.color = 0x00ff00,
|
|
|
};
|
|
@@ -73,6 +86,10 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
|
|
|
unsigned k, m, n;
|
|
|
unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
|
|
|
|
|
|
+ starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &k, sizeof(unsigned), nblocks*nblocks+10, backup_function);
|
|
|
+ starpu_mpi_checkpoint_template_freeze(checkpoint_p);
|
|
|
+ _starpu_mpi_checkpoint_template_print(*checkpoint_p);
|
|
|
+
|
|
|
for (k = 0; k < nblocks; k++)
|
|
|
{
|
|
|
starpu_iteration_push(k);
|
|
@@ -102,7 +119,8 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
|
|
|
STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
|
|
|
STARPU_R, data_handles[n][k],
|
|
|
STARPU_R, data_handles[m][k],
|
|
|
- STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
|
|
|
+ STARPU_RW, data_handles[m][n],
|
|
|
+// STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
|
|
|
0);
|
|
|
}
|
|
|
}
|
|
@@ -111,6 +129,7 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
|
|
|
if (my_distrib(m, k, nodes) == rank)
|
|
|
starpu_data_wont_use(data_handles[m][k]);
|
|
|
}
|
|
|
+ starpu_mpi_submit_checkpoint_template(*checkpoint_p);
|
|
|
starpu_iteration_pop();
|
|
|
}
|
|
|
}
|
|
@@ -356,6 +375,10 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
|
|
|
|
|
|
/* create all the DAG nodes */
|
|
|
|
|
|
+ _nodes = nodes;
|
|
|
+ starpu_malloc((void**)&checkpoint_p, sizeof(starpu_mpi_checkpoint_template_t));
|
|
|
+ starpu_mpi_checkpoint_template_create(checkpoint_p, 13);
|
|
|
+
|
|
|
data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
|
|
|
for(m=0 ; m<nblocks ; m++) data_handles[m] = malloc(nblocks*sizeof(starpu_data_handle_t));
|
|
|
|
|
@@ -384,6 +407,7 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
|
|
|
{
|
|
|
starpu_data_set_coordinates(data_handles[m][n], 2, n, m);
|
|
|
starpu_mpi_data_register(data_handles[m][n], (m*nblocks)+n, mpi_rank);
|
|
|
+ starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_R, &data_handles[m][n], backup_function(mpi_rank));
|
|
|
}
|
|
|
}
|
|
|
}
|