Browse Source

Add CP support to the example

Romain LION 4 years ago
parent
commit
61bb009570
1 changed files with 26 additions and 2 deletions
  1. 26 2
      mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

+ 26 - 2
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -20,6 +20,18 @@
 #include <limits.h>
 #include <math.h>
 
+int _nodes;
+starpu_mpi_checkpoint_template_t* checkpoint_p;
+
+int backup_function(int rank)
+{
+	if (rank==0)
+		return 1;
+	else
+		return 0;
+//	return (rank+1)%_nodes;
+}
+
 /*
  *	Create the codelets
  */
@@ -63,7 +75,8 @@ static struct starpu_codelet cl22 =
 #endif
 	.cuda_flags = {STARPU_CUDA_ASYNC},
 	.nbuffers = 3,
-	.modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
+    .modes = {STARPU_R, STARPU_R, STARPU_RW},
+//     .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE},
 	.model = &chol_model_22,
 	.color = 0x00ff00,
 };
@@ -73,6 +86,10 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 	unsigned k, m, n;
 	unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN;
 
+	starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &k, sizeof(unsigned), nblocks*nblocks+10, backup_function);
+	starpu_mpi_checkpoint_template_freeze(checkpoint_p);
+	_starpu_mpi_checkpoint_template_print(*checkpoint_p);
+
 	for (k = 0; k < nblocks; k++)
 	{
 		starpu_iteration_push(k);
@@ -102,7 +119,8 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 							       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
 							       STARPU_R, data_handles[n][k],
 							       STARPU_R, data_handles[m][k],
-							       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+					               STARPU_RW, data_handles[m][n],
+//							               STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
 							       0);
 				}
 			}
@@ -111,6 +129,7 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 			if (my_distrib(m, k, nodes) == rank)
 				starpu_data_wont_use(data_handles[m][k]);
 		}
+		starpu_mpi_submit_checkpoint_template(*checkpoint_p);
 		starpu_iteration_pop();
 	}
 }
@@ -356,6 +375,10 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 
 	/* create all the DAG nodes */
 
+	_nodes = nodes;
+	starpu_malloc((void**)&checkpoint_p, sizeof(starpu_mpi_checkpoint_template_t));
+	starpu_mpi_checkpoint_template_create(checkpoint_p, 13);
+
 	data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
 	for(m=0 ; m<nblocks ; m++) data_handles[m] = malloc(nblocks*sizeof(starpu_data_handle_t));
 
@@ -384,6 +407,7 @@ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing
 			{
 				starpu_data_set_coordinates(data_handles[m][n], 2, n, m);
 				starpu_mpi_data_register(data_handles[m][n], (m*nblocks)+n, mpi_rank);
+				starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_R, &data_handles[m][n], backup_function(mpi_rank));
 			}
 		}
 	}