Browse Source

Trace event CP begin when the first CP starpu data is effectively sent

Romain LION 5 years ago
parent
commit
b91fa68310
1 changed files with 16 additions and 2 deletions
  1. 16 2
      mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c

+ 16 - 2
mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c

@@ -103,7 +103,21 @@ void _send_cp_external_data_cb(void* _args)
 }
 
 void _send_cp_internal_data_cb(void* _args) {
+
+	struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args;
 	_starpu_mpi_push_cp_ack_recv_cb(_args);
+	if (!arg->cache_flag)
+	{
+
+		//TODO: check cp_domain!
+		struct _starpu_mpi_checkpoint_tracker* tracker = _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(0, arg->msg.checkpoint_instance);
+		fprintf(stderr, "inst: %d tracker:%p\n", arg->msg.checkpoint_instance, tracker);
+		if(!tracker->first_msg_sent_flag)
+		{
+			tracker->first_msg_sent_flag = 1;
+			_STARPU_MPI_TRACE_CHECKPOINT_BEGIN(arg->msg.checkpoint_instance,0);
+		}
+	}
 }
 
 void _send_internal_data_stats(struct _starpu_mpi_cp_ack_arg_cb* arg)
@@ -146,9 +160,9 @@ int starpu_mpi_submit_checkpoint_template(starpu_mpi_checkpoint_template_t cp_te
 
 	current_instance = increment_current_instance();
 	_starpu_mpi_checkpoint_post_cp_discard_recv(cp_template);
-
+	_starpu_mpi_checkpoint_template_create_instance_tracker(cp_template, cp_template->cp_id, cp_template->checkpoint_domain, current_instance);
+	//TODO check what happens when all the ack msg are received when we arrrive here.
 	item = _starpu_mpi_checkpoint_template_get_first_data(cp_template);
-	_STARPU_MPI_TRACE_CHECKPOINT_BEGIN(current_instance, cp_template->checkpoint_domain);
 	while (item != _starpu_mpi_checkpoint_template_end(cp_template))
 	{
 		switch (item->type)