Browse Source

Add a synchronization point during the initialization of the MPI lib so that we
can compute the offset between multiple FxT traces of the same MPI program.
If the fxt-tool utility detects such a sync point in the traces, the different
traces are offset to make sure that they appear synchronized in the Paje trace.

Cédric Augonnet 16 years ago
parent
commit
6983263fed
8 changed files with 251 additions and 19 deletions
  1. 13 0
      mpi/starpu_mpi.c
  2. 35 0
      mpi/starpu_mpi_fxt.h
  3. 3 0
      mpi/starpu_mpi_private.h
  4. 2 2
      tools/Makefile.am
  5. 36 0
      tools/fxt-tool-common.c
  6. 75 0
      tools/fxt-tool-mpi.c
  7. 83 17
      tools/fxt-tool.c
  8. 4 0
      tools/fxt-tool.h

+ 13 - 0
mpi/starpu_mpi.c

@@ -559,6 +559,19 @@ int starpu_mpi_initialize(void)
 	hookid = starpu_register_progression_hook(progression_hook_func, NULL);
 	STARPU_ASSERT(hookid >= 0);
 #endif
+	
+#ifdef USE_FXT
+	int rank;
+	int worldsize;
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+	MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
+	
+	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
+	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
+
+	fprintf(stderr, "BARRIER\n");
+	TRACE_MPI_BARRIER(rank, worldsize);
+#endif
 
 	return 0;
 }

+ 35 - 0
mpi/starpu_mpi_fxt.h

@@ -0,0 +1,35 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_MPI_FXT_H__
+#define __STARPU_MPI_FXT_H__
+
+#include <starpu.h>
+#include <common/config.h>
+#include <common/fxt.h>
+
+#define FUT_MPI_BARRIER		0x5201
+
+#ifdef USE_FXT
+#define TRACE_MPI_BARRIER(rank, worldsize)	\
+	FUT_DO_PROBE3(FUT_MPI_BARRIER, rank, worldsize, syscall(SYS_gettid));
+#else
+#define TRACE_MPI_BARRIER(a, b)		do {} while(0);
+#endif
+
+
+
+#endif // __STARPU_MPI_FXT_H__

+ 3 - 0
mpi/starpu_mpi_private.h

@@ -17,7 +17,10 @@
 #ifndef __STARPU_MPI_PRIVATE_H__
 #define __STARPU_MPI_PRIVATE_H__
 
+#include <starpu.h>
+#include <common/config.h>
 #include "starpu_mpi.h"
+#include "starpu_mpi_fxt.h"
 #include <common/list.h>
 #include <pthread.h>
 

+ 2 - 2
tools/Makefile.am

@@ -17,7 +17,7 @@
 SUBDIRS = 
 
 LIBS = $(top_builddir)/src/libstarpu.la @LIBS@
-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/
+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/
 
 bin_PROGRAMS =
 
@@ -30,7 +30,7 @@ calibrate_bus_SOURCES = calibrate_bus.c
 if USE_FXT
 bin_PROGRAMS += fxt-tool fxt-stats
 
-fxt_tool_SOURCES = fxt-tool.c fxt-tool-common.c dag-dot.c histo-paje.c
+fxt_tool_SOURCES = fxt-tool.c fxt-tool-common.c fxt-tool-mpi.c dag-dot.c histo-paje.c
 fxt_tool_CFLAGS = -I$(top_srcdir)/src/
 fxt_tool_LDADD = 
 

+ 36 - 0
tools/fxt-tool-common.c

@@ -75,3 +75,39 @@ void reinit_colors(void)
 	cpus_index = 0;
 	cuda_index = 0;
 }
+
+uint64_t find_start_time(char *filename_in)
+{
+	/* Open the trace file */
+	int fd_in;
+	fd_in = open(filename_in, O_RDONLY);
+	if (fd_in < 0) {
+	        perror("open failed :");
+	        exit(-1);
+	}
+
+	static fxt_t fut;
+	fut = fxt_fdopen(fd_in);
+	if (!fut) {
+	        perror("fxt_fdopen :");
+	        exit(-1);
+	}
+	
+	fxt_blockev_t block;
+	block = fxt_blockev_enter(fut);
+
+	struct fxt_ev_64 ev;
+
+	int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
+	STARPU_ASSERT (ret == FXT_EV_OK);
+
+	/* Close the trace file */
+	if (close(fd_in))
+	{
+	        perror("close failed :");
+	        exit(-1);
+	}
+	return (ev.time);
+}
+
+

+ 75 - 0
tools/fxt-tool-mpi.c

@@ -0,0 +1,75 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "fxt-tool.h"
+
+/* Returns 0 if a barrier is found, -1 otherwise. In case of success, offset is
+ * filled with the timestamp of the barrier */
+int find_sync_point(char *filename_in, uint64_t *offset)
+{
+	STARPU_ASSERT(offset);
+
+	/* Open the trace file */
+	int fd_in;
+	fd_in = open(filename_in, O_RDONLY);
+	if (fd_in < 0) {
+	        perror("open failed :");
+	        exit(-1);
+	}
+
+	static fxt_t fut;
+	fut = fxt_fdopen(fd_in);
+	if (!fut) {
+	        perror("fxt_fdopen :");
+	        exit(-1);
+	}
+	
+	fxt_blockev_t block;
+	block = fxt_blockev_enter(fut);
+
+	struct fxt_ev_64 ev;
+
+	int func_ret = -1;
+	unsigned found = 0;
+	while(!found) {
+		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
+		if (ret != FXT_EV_OK) {
+			fprintf(stderr, "no more block ...\n");
+			break;
+		}
+
+		if (ev.code == FUT_MPI_BARRIER)
+		{
+			/* We found the sync point */
+			*offset = ev.time;
+			found = 1;
+			func_ret = 0;
+
+			fprintf(stderr, "OK !\n");
+		}
+	}
+
+	/* Close the trace file */
+	if (close(fd_in))
+	{
+	        perror("close failed :");
+	        exit(-1);
+	}
+
+	return func_ret;
+}
+
+

+ 83 - 17
tools/fxt-tool.c

@@ -1,6 +1,6 @@
 /*
  * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ * Copyright (C) INRIA 2008-2010 (see AUTHORS file)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,6 +30,7 @@ struct fxt_ev_64 ev;
 /* In case we are going to gather multiple traces (eg in the case of MPI
  * processes), we may need to prefix the name of the containers. */
 char *prefix = "";
+uint64_t offset = 0;
 
 static uint64_t start_time = 0;
 static uint64_t end_time = 0;
@@ -105,7 +106,7 @@ static void paje_output_file_init(void)
 
 static float get_event_time_stamp(void)
 {
-	return (float)((ev.time-start_time)/1000000.0);
+	return (float)((ev.time-offset)/1000000.0);
 }
 
 static int register_worker_id(unsigned long tid)
@@ -530,9 +531,10 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
-void parse_new_file(char *filename_in, char *file_prefix)
+void parse_new_file(char *filename_in, char *file_prefix, uint64_t file_offset)
 {
 	prefix = file_prefix;
+	offset = file_offset;
 
 	/* Open the trace file */
 	int fd_in;
@@ -558,6 +560,16 @@ void parse_new_file(char *filename_in, char *file_prefix)
 	symbol_list = symbol_name_list_new(); 
 	communication_list = communication_list_new();
 
+	/* TODO starttime ...*/
+	/* create the "program" container */
+	fprintf(out_paje_file, "7      0.0 %sp      P      0       program%s \n", prefix, prefix);
+	/* create a variable with the number of tasks */
+	if (!no_counter)
+	{
+		fprintf(out_paje_file, "7     %f    %ssched   Sc    %sp     scheduler \n", 0.0, prefix, prefix);
+		fprintf(out_paje_file, "13    0.0    ntask %ssched 0.0\n", prefix);
+	}
+
 	unsigned first_event = 1;
 
 	while(1) {
@@ -574,15 +586,6 @@ void parse_new_file(char *filename_in, char *file_prefix)
 			first_event = 0;
 			start_time = ev.time;
 
-			/* create the "program" container */
-			fprintf(out_paje_file, "7      0.0 %sp      P      0       program%s \n", prefix, prefix);
-			/* create a variable with the number of tasks */
-			if (!no_counter)
-			{
-				fprintf(out_paje_file, "7     0.0    %ssched   Sc    %sp     scheduler \n", prefix, prefix);
-				fprintf(out_paje_file, "13    0.0    ntask %ssched 0.0\n", prefix);
-			}
-
 		}
 
 		switch (ev.code) {
@@ -707,8 +710,8 @@ void parse_new_file(char *filename_in, char *file_prefix)
 				break;
 
 			default:
-				fprintf(stderr, "unknown event.. %x at time %llx\n",
-					(unsigned)ev.code, (long long unsigned)ev.time);
+				fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
+					(unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time-offset));
 				break;
 		}
 	}
@@ -744,15 +747,78 @@ int main(int argc, char **argv)
 	if (ninputfiles == 1)
 	{
 		/* we usually only have a single trace */
-		parse_new_file(filenames[0], "");
+		uint64_t file_start_time = find_start_time(filenames[0]);
+		parse_new_file(filenames[0], "", file_start_time);
 	}
 	else {
 		unsigned inputfile;
+
+		uint64_t offsets[64];
+		uint64_t found_offsets[64];
+		uint64_t start_times[64];
+
+		uint64_t max = 0;
+
+		/*
+		 * Find the trace offsets:
+		 *	- If there is no sync point
+		 *		psi_k(x) = x - start_k
+		 *	- If there is a sync point sync_k
+		 *		psi_k(x) = x - sync_k + M
+		 *		where M = max { sync_i - start_i | there exists sync_i}
+		 * More generally:
+		 *	- psi_k(x) = x - offset_k
+		 */
+		
+		uint64_t start_k[64];
+		uint64_t sync_k[64];
+		unsigned sync_k_exists[64];
+		uint64_t M = 0;
+
+		/* Compute all start_k */
+		for (inputfile = 0; inputfile < ninputfiles; inputfile++)
+		{
+			uint64_t file_start = find_start_time(filenames[inputfile]);
+			start_k[inputfile] = file_start; 
+		}
+
+		/* Compute all sync_k if they exist */
+		for (inputfile = 0; inputfile < ninputfiles; inputfile++)
+		{
+			int ret = find_sync_point(filenames[inputfile],
+							&sync_k[inputfile]);
+			if (ret == -1)
+			{
+				/* There was no sync point, we assume there is no offset */
+				sync_k_exists[inputfile] = 0;
+				fprintf(stderr, "BAD ret %d\n", ret);
+			}
+			else {
+				fprintf(stderr, "GOOD ret %d\n", ret);
+
+				STARPU_ASSERT(sync_k[inputfile] >= start_k[inputfile]);
+
+				sync_k_exists[inputfile] = 1;
+
+				uint64_t diff = sync_k[inputfile] - start_k[inputfile];
+				if (diff > M)
+					M = diff;
+			}
+		}
+
+		/* Compute the offset */
+		for (inputfile = 0; inputfile < ninputfiles; inputfile++)
+		{
+			offsets[inputfile] = (sync_k_exists[inputfile]?start_k[inputfile]:(M-sync_k[inputfile]));
+		}
+
+		/* generate the Paje trace for the different files */
 		for (inputfile = 0; inputfile < ninputfiles; inputfile++)
 		{
+
 			char file_prefix[32];
-			snprintf(file_prefix, 32, "FILE%d", inputfile);
-			parse_new_file(filenames[inputfile], file_prefix);
+			snprintf(file_prefix, 32, "file_%d_", inputfile);
+			parse_new_file(filenames[inputfile], file_prefix, offsets[inputfile]);
 		}
 	}
 

+ 4 - 0
tools/fxt-tool.h

@@ -28,6 +28,7 @@
 
 #include <common/fxt.h>
 #include <common/list.h>
+#include <starpu_mpi_fxt.h>
 
 #include "histo-paje.h"
 
@@ -50,4 +51,7 @@ unsigned get_colour_symbol_blue(char *name);
 
 void reinit_colors(void);
 
+int find_sync_point(char *filename_in, uint64_t *offset);
+uint64_t find_start_time(char *filename_in);
+
 #endif // __FXT_TOOL_H__