Forráskód Böngészése

Correclty count data storage/discard stat.

Romain LION 5 éve
szülő
commit
0b49d4b711

+ 0 - 2
mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c

@@ -150,7 +150,6 @@ int checkpoint_package_data_del(int cp_id, int cp_inst, int rank)
 		checkpoint_data = next_checkpoint_data;
 	}
 	starpu_pthread_mutex_unlock(&package_package_mutex);
-	_STARPU_MPI_FT_STATS_DISCARD_CP_DATA(size);
 	_STARPU_MPI_DEBUG(0, "cleared %d data from checkpoint database (%ld bytes).\n", done, size);
 
 	return done;
@@ -172,7 +171,6 @@ int _checkpoint_package_data_delete_all()
 		checkpoint_data = next_checkpoint_data;
 	}
 	starpu_pthread_mutex_unlock(&package_package_mutex);
-	_STARPU_MPI_FT_STATS_DISCARD_CP_DATA(size);
 	_STARPU_MPI_DEBUG(0, "cleared %d data from checkpoint database (%ld bytes).\n", done, size);
 
 	return done;

+ 1 - 0
mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.c

@@ -34,5 +34,6 @@ int ft_service_msgs_received_count;
 size_t ft_service_msgs_received_total_size;
 
 struct size_sample_list cp_data_in_memory_list; //over time
+size_t cp_data_in_memory_size_max_at_t;
 size_t cp_data_in_memory_size_total;
 

+ 9 - 1
mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.h

@@ -44,6 +44,7 @@ extern size_t ft_service_msgs_received_total_size;
 
 extern struct size_sample_list cp_data_in_memory_list; //over time
 extern size_t cp_data_in_memory_size_total;
+size_t cp_data_in_memory_size_max_at_t;
 
 
 static inline void stat_init();
@@ -115,6 +116,7 @@ static inline void stat_init()
 	ft_service_msgs_received_total_size = 0;
 
 	cp_data_in_memory_size_total = 0;
+	cp_data_in_memory_size_max_at_t = 0;
 }
 
 static inline void _starpu_ft_stats_send_data(size_t size)
@@ -199,6 +201,10 @@ static inline void _starpu_ft_stats_add_cp_data_in_memory(size_t size)
 	tmp_sample = size_sample_list_back(&cp_data_in_memory_list);
 	tmp = (NULL==tmp_sample?0:tmp_sample->size);
 	tmp+=size;
+	if (tmp>cp_data_in_memory_size_max_at_t)
+	{
+		cp_data_in_memory_size_max_at_t = tmp;
+	}
 	sample->size = tmp;
 	size_sample_list_push_back(&cp_data_in_memory_list, sample);
 	starpu_pthread_mutex_unlock(&_ft_stats_mutex);
@@ -239,7 +245,9 @@ static inline void _starpu_ft_stats_write_to_fd(FILE* fd)
 	fprintf(fd, "RECV\t%d\t"                 "%ld\t"                    "%d\t"               "%ld\t"               "%d\t"                 "%ld\n",
 	        cp_data_msgs_received_count, cp_data_msgs_received_total_size, cp_data_msgs_received_cached_count, cp_data_msgs_received_cached_total_size, ft_service_msgs_received_count, ft_service_msgs_received_total_size);
 	fprintf(fd, "\n");
-	fprintf(fd, "IN_MEM_CP_DATA_TOTAL:%ld\n", cp_data_in_memory_size_total);
+	fprintf(fd, "IN_MEM_CP_DATA_TOTAL:%lu\n", cp_data_in_memory_size_total);
+	fprintf(fd, "\n");
+	fprintf(fd, "IN_MEM_CP_DATA_MAX_AT_T:%lu\n", cp_data_in_memory_size_max_at_t);
 	fprintf(fd, "\n");
 //	fprintf(fd, "IN_MEM_CP_DATA_TRACKING\n");
 //	struct size_sample *sample = size_sample_list_begin(&cp_data_in_memory_list);