Explorar o código

Solve handle problem. OK for now

Romain LION %!s(int64=5) %!d(string=hai) anos
pai
achega
bedef1ccfc

+ 10 - 0
mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c

@@ -45,15 +45,25 @@ int checkpoint_package_data_add(int cp_id, int cp_inst, int rank, starpu_mpi_tag
 int checkpoint_package_data_del(int cp_id, int cp_inst, int rank)
 {
 	int done = 0;
+	struct _starpu_mpi_checkpoint_data* next_checkpoint_data = NULL;
 	struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list);
 	while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list))
 	{
+		next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data);
 		if (checkpoint_data->cp_id==cp_id && checkpoint_data->cp_inst==cp_inst
 			&& checkpoint_data->rank==rank)
 		{
+			if (checkpoint_data->type==STARPU_R)
+			{
+				starpu_data_handle_t handle = checkpoint_data->ptr;
+//				void* ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
+//				free(ptr);
+				starpu_data_unregister(handle);
+			}
 			_starpu_mpi_checkpoint_data_list_erase(checkpoint_data_list, checkpoint_data);
 			done++;
 		}
+		checkpoint_data = next_checkpoint_data;
 	}
 	fprintf(stderr, "cleared %d data from checkpoint database.\n", done);
 

+ 3 - 5
mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.c

@@ -20,10 +20,11 @@
 #include <sys/param.h>
 #include <starpu_mpi_private.h>
 #include <starpu_mpi_cache.h>
+#include <mpi/starpu_mpi_mpi_backend.h>
 #include <mpi_failure_tolerance/starpu_mpi_checkpoint_template.h>
 #include <mpi_failure_tolerance/starpu_mpi_checkpoint.h>
 #include <mpi_failure_tolerance/starpu_mpi_ft_service_comms.h>
-#include <mpi/starpu_mpi_mpi_backend.h>
+#include <mpi_failure_tolerance/starpu_mpi_checkpoint_package.h>
 
 
 #define MAX_CP_TEMPLATE_NUMBER 32 // Arbitrary limit
@@ -96,7 +97,6 @@ int _starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t c
 	void*        ptr;
 	int              count;
 	int              backupped_by;
-	int              backup_of;
 	int              data_rank;
 	starpu_mpi_tag_t tag;
 	backup_of_fn     _backup_of;
@@ -109,7 +109,6 @@ int _starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t c
 			ptr          = va_arg(varg_list, void*);
 			count        = 1;
 			backupped_by = va_arg(varg_list, int);
-			backup_of    = -1;
 			data_rank    = starpu_mpi_data_get_rank(*(starpu_data_handle_t*)ptr);
 			if (my_rank==data_rank)
 			{
@@ -184,6 +183,7 @@ void checkpoint_discard(void* _args)
 	// TODO: flag data as "CP ready", since the CP has succeeded
 	struct _starpu_mpi_cp_discard_arg_cb* arg = (struct _starpu_mpi_cp_discard_arg_cb*) _args;
 	fprintf(stderr, "DISCARDING OLD CHECKPOINT DATA - new one is CPID:%d - CPINST:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance);
+	checkpoint_package_data_del(arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank);
 }
 
 int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template)
@@ -388,8 +388,6 @@ int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t* c
 
 
 int _checkpoint_template_digest_ack_reception(int checkpoint_id, int checkpoint_instance) {
-	int old_cp_id;
-	starpu_mpi_checkpoint_template_t old_cp_template;
 	starpu_mpi_checkpoint_template_t cp_template = _starpu_mpi_get_checkpoint_template_by_id(checkpoint_id);
 	starpu_pthread_mutex_lock(&cp_template_mutex);
 	fprintf(stderr, "Digesting ack recv: id=%d, inst=%d\n", checkpoint_id, checkpoint_instance);

+ 4 - 0
mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.h

@@ -149,6 +149,10 @@ static inline int _checkpoint_template_add_to_backup_arrays(starpu_mpi_checkpoin
 		cp_template->backup_of_array[cp_template->backup_of_array_used_size] = -1;
 		return backup_of;
 	}
+	else
+	{
+		fprintf(stderr, "Checkpoint template item does not refer any backup information. This should not happen.\n");
+	}
 }
 
 static int _starpu_mpi_checkpoint_template_add_data(starpu_mpi_checkpoint_template_t cp_template, int type, void* ptr, int count, int backupped_by, int backup_of, starpu_mpi_tag_t tag)