Parcourir la source

Generate a unique key during starpu_mpi_init so that we can make sure that all
the traces handed to the fxt-tool are coming from the same MPI run.

Cédric Augonnet il y a 15 ans
Parent
commit
0824f64dbb
5 fichiers modifiés avec 65 ajouts et 25 suppressions
  1. 38 13
      mpi/starpu_mpi.c
  2. 3 3
      mpi/starpu_mpi_fxt.h
  3. 2 3
      tools/fxt-tool-mpi.c
  4. 21 5
      tools/fxt-tool.c
  5. 1 1
      tools/fxt-tool.h

+ 38 - 13
mpi/starpu_mpi.c

@@ -14,6 +14,7 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+#include <stdlib.h>
 #include <starpu_mpi.h>
 #include <starpu_mpi_datatype.h>
 #include <starpu_mpi_private.h>
@@ -539,6 +540,41 @@ static void *progress_thread_func(void *arg __attribute__((unused)))
 static int hookid = - 1;
 #endif
 
+static void _starpu_mpi_add_sync_point_in_fxt(void)
+{
+#ifdef USE_FXT
+	int rank;
+	int worldsize;
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
+	
+	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
+	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
+
+	/* We generate a "unique" key so that we can make sure that different
+	 * FxT traces come from the same MPI run. */
+	int random_number;
+
+	/* XXX perhaps we don't want to generate a new seed if the application
+	 * specified some reproductible behaviour ? */
+	if (rank == 0)
+	{
+		srand(time(NULL));
+		random_number = rand();
+	}
+		
+	MPI_Bcast(&random_number, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+	TRACE_MPI_BARRIER(rank, worldsize, random_number);
+
+#ifdef VERBOSE
+	fprintf(stderr, "StarPU MPI (rank %d): unique key %x\n", rank, random_number);
+#endif
+
+#endif
+}
+
+
 int starpu_mpi_initialize(void)
 {
 	pthread_mutex_init(&mutex, NULL);
@@ -559,20 +595,9 @@ int starpu_mpi_initialize(void)
 	hookid = starpu_register_progression_hook(progression_hook_func, NULL);
 	STARPU_ASSERT(hookid >= 0);
 #endif
-	
-#ifdef USE_FXT
-	int rank;
-	int worldsize;
-	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
-	
-	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
-
-	fprintf(stderr, "BARRIER\n");
-	TRACE_MPI_BARRIER(rank, worldsize);
-#endif
 
+	_starpu_mpi_add_sync_point_in_fxt();
+	
 	return 0;
 }
 

+ 3 - 3
mpi/starpu_mpi_fxt.h

@@ -24,10 +24,10 @@
 #define FUT_MPI_BARRIER		0x5201
 
 #ifdef USE_FXT
-#define TRACE_MPI_BARRIER(rank, worldsize)	\
-	FUT_DO_PROBE3(FUT_MPI_BARRIER, rank, worldsize, syscall(SYS_gettid));
+#define TRACE_MPI_BARRIER(rank, worldsize, key)	\
+	FUT_DO_PROBE4(FUT_MPI_BARRIER, rank, worldsize, key, syscall(SYS_gettid));
 #else
-#define TRACE_MPI_BARRIER(a, b)		do {} while(0);
+#define TRACE_MPI_BARRIER(a, b, c)	do {} while(0);
 #endif
 
 

+ 2 - 3
tools/fxt-tool-mpi.c

@@ -18,7 +18,7 @@
 
 /* Returns 0 if a barrier is found, -1 otherwise. In case of success, offset is
  * filled with the timestamp of the barrier */
-int find_sync_point(char *filename_in, uint64_t *offset)
+int find_sync_point(char *filename_in, uint64_t *offset, int *key)
 {
 	STARPU_ASSERT(offset);
 
@@ -55,10 +55,9 @@ int find_sync_point(char *filename_in, uint64_t *offset)
 		{
 			/* We found the sync point */
 			*offset = ev.time;
+			*key = ev.param[2];
 			found = 1;
 			func_ret = 0;
-
-			fprintf(stderr, "OK !\n");
 		}
 	}
 

+ 21 - 5
tools/fxt-tool.c

@@ -733,8 +733,6 @@ int main(int argc, char **argv)
 {
 	int fd_out;
 
-	int use_stdout = 1;
-
 	parse_args(argc, argv);
 
 	init_dag_dot();
@@ -770,11 +768,16 @@ int main(int argc, char **argv)
 		 *	- psi_k(x) = x - offset_k
 		 */
 		
+		int unique_keys[64];
 		uint64_t start_k[64];
 		uint64_t sync_k[64];
 		unsigned sync_k_exists[64];
 		uint64_t M = 0;
 
+		unsigned found_one_sync_point = 0;
+		int key;
+		unsigned display_mpi = 0; 
+
 		/* Compute all start_k */
 		for (inputfile = 0; inputfile < ninputfiles; inputfile++)
 		{
@@ -786,15 +789,28 @@ int main(int argc, char **argv)
 		for (inputfile = 0; inputfile < ninputfiles; inputfile++)
 		{
 			int ret = find_sync_point(filenames[inputfile],
-							&sync_k[inputfile]);
+							&sync_k[inputfile],
+							&unique_keys[inputfile]);
 			if (ret == -1)
 			{
 				/* There was no sync point, we assume there is no offset */
 				sync_k_exists[inputfile] = 0;
-				fprintf(stderr, "BAD ret %d\n", ret);
 			}
 			else {
-				fprintf(stderr, "GOOD ret %d\n", ret);
+				if (!found_one_sync_point)
+				{
+					key = unique_keys[inputfile];
+					display_mpi = 1;
+					found_one_sync_point = 1;
+				}
+				else {
+					if (key != unique_keys[inputfile])
+					{
+						fprintf(stderr, "Warning: traces are coming from different run so we will not try to display MPI communications.\n");
+						display_mpi = 0;
+					}
+				}
+
 
 				STARPU_ASSERT(sync_k[inputfile] >= start_k[inputfile]);
 

+ 1 - 1
tools/fxt-tool.h

@@ -51,7 +51,7 @@ unsigned get_colour_symbol_blue(char *name);
 
 void reinit_colors(void);
 
-int find_sync_point(char *filename_in, uint64_t *offset);
+int find_sync_point(char *filename_in, uint64_t *offset, int *key);
 uint64_t find_start_time(char *filename_in);
 
 #endif // __FXT_TOOL_H__