|
@@ -38,10 +38,11 @@ struct _starpu_mpi_checkpoint_template{
|
|
struct _starpu_mpi_checkpoint_template_item_list list;
|
|
struct _starpu_mpi_checkpoint_template_item_list list;
|
|
int size;
|
|
int size;
|
|
int cp_template_id;
|
|
int cp_template_id;
|
|
|
|
+ int send_number;
|
|
|
|
+ int current_send_number;
|
|
int pending;
|
|
int pending;
|
|
int frozen;
|
|
int frozen;
|
|
starpu_pthread_mutex_t mutex;
|
|
starpu_pthread_mutex_t mutex;
|
|
- starpu_sem_t completion_sem;
|
|
|
|
};
|
|
};
|
|
|
|
|
|
static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_item_create(int type, void* ptr, int count, int backup_rank, int backup_of)
|
|
static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_item_create(int type, void* ptr, int count, int backup_rank, int backup_of)
|
|
@@ -68,21 +69,15 @@ static inline starpu_mpi_checkpoint_template_t _starpu_mpi_checkpoint_template_n
|
|
|
|
|
|
static inline int _starpu_mpi_checkpoint_template_add_data(starpu_mpi_checkpoint_template_t cp_template, int type, void* ptr, int count, int backup_rank, int backup_of)
|
|
static inline int _starpu_mpi_checkpoint_template_add_data(starpu_mpi_checkpoint_template_t cp_template, int type, void* ptr, int count, int backup_rank, int backup_of)
|
|
{
|
|
{
|
|
|
|
+ starpu_pthread_mutex_lock(&cp_template->mutex);
|
|
STARPU_ASSERT_MSG(!cp_template->frozen, "It is not possible to modify registered checkpoint template.\n");
|
|
STARPU_ASSERT_MSG(!cp_template->frozen, "It is not possible to modify registered checkpoint template.\n");
|
|
struct _starpu_mpi_checkpoint_template_item* item;
|
|
struct _starpu_mpi_checkpoint_template_item* item;
|
|
item = _starpu_mpi_checkpoint_template_item_create(type, ptr, count, backup_rank, backup_of);
|
|
item = _starpu_mpi_checkpoint_template_item_create(type, ptr, count, backup_rank, backup_of);
|
|
_starpu_mpi_checkpoint_template_item_list_push_back(&cp_template->list, item);
|
|
_starpu_mpi_checkpoint_template_item_list_push_back(&cp_template->list, item);
|
|
|
|
+ starpu_pthread_mutex_unlock(&cp_template->mutex);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static inline int _starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t _cp_template)
|
|
|
|
-{
|
|
|
|
- _cp_template->frozen = 1;
|
|
|
|
- _cp_template->size = _starpu_mpi_checkpoint_template_item_list_size(&_cp_template->list);
|
|
|
|
- starpu_sem_init(&_cp_template->completion_sem, 0, _cp_template->size-1);
|
|
|
|
- return _cp_template->size;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_get_first_data(starpu_mpi_checkpoint_template_t template)
|
|
static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_get_first_data(starpu_mpi_checkpoint_template_t template)
|
|
{
|
|
{
|
|
return _starpu_mpi_checkpoint_template_item_list_front(&template->list);
|
|
return _starpu_mpi_checkpoint_template_item_list_front(&template->list);
|
|
@@ -98,6 +93,42 @@ static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoin
|
|
return NULL;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static inline int _starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t cp_template)
|
|
|
|
+{
|
|
|
|
+ starpu_pthread_mutex_lock(&cp_template->mutex);
|
|
|
|
+
|
|
|
|
+ cp_template->frozen = 1;
|
|
|
|
+ cp_template->send_number = 0;
|
|
|
|
+ cp_template->size = _starpu_mpi_checkpoint_template_item_list_size(&cp_template->list);
|
|
|
|
+
|
|
|
|
+ struct _starpu_mpi_checkpoint_template_item* item = _starpu_mpi_checkpoint_template_get_first_data(cp_template);
|
|
|
|
+
|
|
|
|
+ while (item != _starpu_mpi_checkpoint_template_end(cp_template))
|
|
|
|
+ {
|
|
|
|
+ switch (item->type)
|
|
|
|
+ {
|
|
|
|
+ case STARPU_VALUE:
|
|
|
|
+ cp_template->send_number++;
|
|
|
|
+ break;
|
|
|
|
+ case STARPU_R:
|
|
|
|
+ if (starpu_mpi_data_get_rank(*(starpu_data_handle_t *) item->ptr))
|
|
|
|
+ {
|
|
|
|
+ cp_template->send_number++;
|
|
|
|
+ }
|
|
|
|
+ break;
|
|
|
|
+ case STARPU_DATA_ARRAY:
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ starpu_pthread_mutex_unlock(&cp_template->mutex);
|
|
|
|
+
|
|
|
|
+ return cp_template->size;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
int _starpu_mpi_checkpoint_turn_on(void);
|
|
int _starpu_mpi_checkpoint_turn_on(void);
|
|
|
|
|
|
// For test purpose
|
|
// For test purpose
|