|
@@ -39,7 +39,7 @@ extern struct _starpu_mpi_req* _starpu_mpi_irecv_cache_aware(starpu_data_handle_
|
|
|
void _starpu_mpi_treat_ack_receipt_cb(void* _args)
|
|
|
{
|
|
|
struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args;
|
|
|
- _STARPU_DEBUG(3, "ack msg recved id:%d inst:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance);
|
|
|
+ _STARPU_MPI_DEBUG(3, "ack msg recved id:%d inst:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance);
|
|
|
if (_checkpoint_template_digest_ack_reception(arg->msg.checkpoint_id, arg->msg.checkpoint_instance) == 0) {
|
|
|
free(arg);
|
|
|
}
|
|
@@ -48,14 +48,14 @@ void _starpu_mpi_treat_ack_receipt_cb(void* _args)
|
|
|
void _arg_free(void* _args)
|
|
|
{
|
|
|
struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args;
|
|
|
- _STARPU_DEBUG(3,stderr, "Ack send succeeded cpid:%d, cpinst:%d, dest:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank);
|
|
|
- free(_args);
|
|
|
+ _STARPU_MPI_DEBUG(3, "Ack send succeeded cpid:%d, cpinst:%d, dest:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank);
|
|
|
+ free(arg);
|
|
|
}
|
|
|
|
|
|
void _starpu_mpi_push_cp_ack_send_cb(void* _args)
|
|
|
{
|
|
|
struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args;
|
|
|
- _STARPU_DEBUG(3,"Send ack msg to %d: id=%d inst=%d\n", arg->rank, arg->msg.checkpoint_id, arg->msg.checkpoint_instance);
|
|
|
+ _STARPU_MPI_DEBUG(3,"Send ack msg to %d: id=%d inst=%d\n", arg->rank, arg->msg.checkpoint_id, arg->msg.checkpoint_instance);
|
|
|
_ft_service_msg_isend_cb((void*)&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_ACK, MPI_COMM_WORLD, _arg_free, _args);
|
|
|
}
|
|
|
|
|
@@ -74,7 +74,7 @@ void _starpu_mpi_push_cp_ack_recv_cb(void* _args)
|
|
|
free(starpu_data_handle_to_pointer(arg->handle, STARPU_MAIN_RAM));
|
|
|
starpu_data_unregister(arg->handle);
|
|
|
}
|
|
|
- _STARPU_DEBUG(3, "Posting ack recv cb from %d\n", arg->rank);
|
|
|
+ _STARPU_MPI_DEBUG(3, "Posting ack recv cb from %d\n", arg->rank);
|
|
|
_ft_service_msg_irecv_cb((void*)&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_ACK, MPI_COMM_WORLD, _starpu_mpi_treat_ack_receipt_cb, _args);
|
|
|
}
|
|
|
|
|
@@ -82,8 +82,9 @@ void _starpu_mpi_push_cp_ack_recv_cb(void* _args)
|
|
|
void _starpu_checkpoint_cached_data_recv_copy_and_ack(void* _arg)
|
|
|
{
|
|
|
struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _arg;
|
|
|
- starpu_data_register_same(&arg->copy_handle, arg->handle);
|
|
|
- starpu_data_cpy(arg->copy_handle, arg->handle, 1, _starpu_mpi_store_data_and_push_cp_ack_send_cb, _arg);
|
|
|
+ starpu_data_dup_ro(&arg->copy_handle, arg->handle, 1, _starpu_mpi_store_data_and_push_cp_ack_send_cb, _arg);
|
|
|
+// starpu_data_register_same(&arg->copy_handle, arg->handle);
|
|
|
+// starpu_data_cpy(arg->copy_handle, arg->handle, 1, _starpu_mpi_store_data_and_push_cp_ack_send_cb, _arg);
|
|
|
starpu_data_release(arg->handle);
|
|
|
}
|
|
|
|
|
@@ -95,11 +96,16 @@ void _starpu_checkpoint_data_recv_copy_and_ack(void* _arg)
|
|
|
{
|
|
|
// an handle as specificaly been created, no need to copy the data. Call directly the Callback
|
|
|
arg->copy_handle = arg->handle;
|
|
|
- return _starpu_mpi_store_data_and_push_cp_ack_send_cb(_arg);
|
|
|
+ _starpu_mpi_store_data_and_push_cp_ack_send_cb(_arg);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ else if (STARPU_R == arg->type)
|
|
|
+ {
|
|
|
+ starpu_data_dup_ro(&arg->copy_handle, arg->handle, 1, _starpu_mpi_store_data_and_push_cp_ack_send_cb, _arg);
|
|
|
+// starpu_data_register_same(&arg->copy_handle, arg->handle);
|
|
|
+// starpu_data_cpy(arg->copy_handle, arg->handle, 1, _starpu_mpi_store_data_and_push_cp_ack_send_cb, _arg);
|
|
|
+ return;
|
|
|
}
|
|
|
-
|
|
|
- starpu_data_register_same(&arg->copy_handle, arg->handle);
|
|
|
- starpu_data_cpy(arg->copy_handle, arg->handle, 1, _starpu_mpi_store_data_and_push_cp_ack_send_cb, _arg);
|
|
|
}
|
|
|
|
|
|
int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_template)
|