|
@@ -34,9 +34,10 @@ extern struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t dat
|
|
|
extern struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency);
|
|
|
|
|
|
|
|
|
-int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, va_list varg_list)
|
|
|
+static int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, va_list varg_list)
|
|
|
{
|
|
|
int arg_type;
|
|
|
+ void* useless;
|
|
|
void* ptr;
|
|
|
int count;
|
|
|
int backup_rank;
|
|
@@ -49,8 +50,8 @@ int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* c
|
|
|
va_list varg_list_copy;
|
|
|
va_copy(varg_list_copy, varg_list);
|
|
|
|
|
|
- while ((arg_type = va_arg(varg_list_copy, int)) != 0) {
|
|
|
-
|
|
|
+ while ((arg_type = va_arg(varg_list_copy, int)) != 0)
|
|
|
+ {
|
|
|
STARPU_ASSERT_MSG(!(arg_type & STARPU_COMMUTE), "Unable to checkpoint non sequential task flow.\n");
|
|
|
|
|
|
switch(arg_type)
|
|
@@ -98,15 +99,6 @@ int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* c
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, ...)
|
|
|
-{
|
|
|
- va_list varg_list;
|
|
|
- va_start(varg_list, cp_id);
|
|
|
- int ret = _starpu_mpi_checkpoint_template_register(cp_template, cp_id, varg_list);
|
|
|
- va_end(varg_list);
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
void print_received_value(void* handle)
|
|
|
{
|
|
|
fprintf(stderr, "Node %d - I received backup value:%d\n", my_rank, *(int*)starpu_data_handle_to_pointer(*(starpu_data_handle_t*)handle, STARPU_MAIN_RAM));
|
|
@@ -164,7 +156,7 @@ int _starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_t
|
|
|
* @param args
|
|
|
* @return
|
|
|
*/
|
|
|
-void* _starpu_mpi_checkpoint_ack_send_routine(void* args)
|
|
|
+void* _starpu_mpi_checkpoint_ack_send_cb(void* args)
|
|
|
{
|
|
|
starpu_mpi_checkpoint_template_t cp_template = (starpu_mpi_checkpoint_template_t) args;
|
|
|
starpu_pthread_mutex_lock(&cp_template->mutex);
|
|
@@ -214,15 +206,37 @@ int _starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_te
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template)
|
|
|
-{
|
|
|
- _starpu_mpi_checkpoint_template_submit(cp_template);
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
int starpu_mpi_checkpoint_turn_on(void)
|
|
|
{
|
|
|
starpu_pthread_mutex_init(&cp_template_mutex, NULL);
|
|
|
starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); //TODO: check compatibility with several Comms behaviour
|
|
|
return 0;
|
|
|
-}
|
|
|
+}
|
|
|
+
|
|
|
+int starpu_mpi_checkpoint_turn_off(void)
|
|
|
+{
|
|
|
+ for (int i=0 ; i<MAX_CP_TEMPLATE_NUMBER ; i++)
|
|
|
+ {
|
|
|
+ if (cp_template_array[i] == NULL)
|
|
|
+ {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ _starpu_checkpoint_template_free(cp_template_array[i]);
|
|
|
+ cp_template_array[i] = NULL;
|
|
|
+ }
|
|
|
+ starpu_pthread_mutex_destroy(&cp_template_mutex);
|
|
|
+}
|
|
|
+
|
|
|
+int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, ...)
|
|
|
+{
|
|
|
+ va_list varg_list;
|
|
|
+ va_start(varg_list, cp_id);
|
|
|
+ int ret = _starpu_mpi_checkpoint_template_register(cp_template, cp_id, varg_list);
|
|
|
+ va_end(varg_list);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template)
|
|
|
+{
|
|
|
+ return _starpu_mpi_checkpoint_template_submit(cp_template);
|
|
|
+}
|