|
@@ -106,9 +106,10 @@ int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_te
|
|
|
struct _starpu_mpi_cp_ack_arg_cb* arg;
|
|
|
void* cpy_ptr;
|
|
|
struct _starpu_mpi_checkpoint_template_item* item;
|
|
|
+ int current_instance;
|
|
|
|
|
|
- starpu_pthread_mutex_lock(&cp_template->mutex); // Need to lock to ensure cp_template->cp_template_current_instance stay constant
|
|
|
- checkpoint_template_increment_instance(cp_template);
|
|
|
+ current_instance = increment_current_instance();
|
|
|
+ _starpu_mpi_checkpoint_template_create_instance_tracker(cp_template, cp_template->cp_template_id, current_instance);
|
|
|
_starpu_mpi_checkpoint_post_cp_discard_recv(cp_template);
|
|
|
|
|
|
item = _starpu_mpi_checkpoint_template_get_first_data(cp_template);
|
|
@@ -124,7 +125,7 @@ int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_te
|
|
|
arg->type = STARPU_VALUE;
|
|
|
arg->count = item->count;
|
|
|
arg->msg.checkpoint_id = cp_template->cp_template_id;
|
|
|
- arg->msg.checkpoint_instance = cp_template->cp_template_current_instance;
|
|
|
+ arg->msg.checkpoint_instance = current_instance;
|
|
|
if (item->backupped_by != -1)
|
|
|
{
|
|
|
cpy_ptr = malloc(item->count);
|
|
@@ -159,7 +160,7 @@ int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_te
|
|
|
arg->type = STARPU_R;
|
|
|
arg->count = item->count;
|
|
|
arg->msg.checkpoint_id = cp_template->cp_template_id;
|
|
|
- arg->msg.checkpoint_instance = cp_template->cp_template_current_instance;
|
|
|
+ arg->msg.checkpoint_instance = current_instance;
|
|
|
_starpu_mpi_isend_cache_aware(*handle, item->backupped_by, starpu_mpi_data_get_tag(*handle), MPI_COMM_WORLD, 1, 0, 0,
|
|
|
&_starpu_mpi_push_cp_ack_recv_cb, (void*)arg, &_starpu_mpi_cached_push_cp_ack_recv_cb, (void*)arg, 1);
|
|
|
// the callbacks need to post ack recv. The cache one needs to release the handle.
|
|
@@ -175,7 +176,7 @@ int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_te
|
|
|
arg->type = STARPU_R;
|
|
|
arg->count = item->count;
|
|
|
arg->msg.checkpoint_id = cp_template->cp_template_id;
|
|
|
- arg->msg.checkpoint_instance = cp_template->cp_template_current_instance;
|
|
|
+ arg->msg.checkpoint_instance = current_instance;
|
|
|
_starpu_mpi_irecv_cache_aware(*handle, starpu_mpi_data_get_rank(*handle), starpu_mpi_data_get_tag(*handle), MPI_COMM_WORLD, 1, 0,
|
|
|
NULL, NULL, &_starpu_data_release_cb, (void*)arg->handle, 1, 0, 1);
|
|
|
// The callback needs to do nothing. The cached one must release the handle.
|
|
@@ -189,7 +190,6 @@ int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_te
|
|
|
item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item);
|
|
|
}
|
|
|
|
|
|
- starpu_pthread_mutex_unlock(&cp_template->mutex);
|
|
|
return 0;
|
|
|
}
|
|
|
|