Browse Source

fxt: add MPI tag information

Lucas Leandro Nesi 5 years ago
parent
commit
9cf097626a

+ 1 - 0
mpi/src/starpu_mpi.c

@@ -279,6 +279,7 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 	{
 	{
 		_mpi_backend._starpu_mpi_backend_data_register(data_handle, data_tag);
 		_mpi_backend._starpu_mpi_backend_data_register(data_handle, data_tag);
 		mpi_data->node_tag.data_tag = data_tag;
 		mpi_data->node_tag.data_tag = data_tag;
+		_STARPU_MPI_TRACE_DATA_SET_TAG(data_handle, data_tag);
 	}
 	}
 	if (rank != -1)
 	if (rank != -1)
 	{
 	{

+ 4 - 0
mpi/src/starpu_mpi_fxt.h

@@ -58,6 +58,7 @@ extern "C"
 #define _STARPU_MPI_FUT_POLLING_END			0x5215
 #define _STARPU_MPI_FUT_POLLING_END			0x5215
 #define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN		0x5216
 #define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN		0x5216
 #define _STARPU_MPI_FUT_DRIVER_RUN_END			0x5217
 #define _STARPU_MPI_FUT_DRIVER_RUN_END			0x5217
+#define _STARPU_MPI_FUT_DATA_SET_TAG			0x5218
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 
 
@@ -110,6 +111,8 @@ extern "C"
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid());
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid());
 #define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank)	\
 #define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank)	\
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid());
 	FUT_DO_PROBE3(_STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid());
+#define _STARPU_MPI_TRACE_DATA_SET_TAG(handle, data_tag)	\
+	FUT_DO_PROBE3(_STARPU_MPI_FUT_DATA_SET_TAG, (handle), (data_tag), _starpu_gettid());
 #if 0
 #if 0
 /* This is very expensive in the trace, only enable for debugging */
 /* This is very expensive in the trace, only enable for debugging */
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()	\
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()	\
@@ -169,6 +172,7 @@ extern "C"
 #define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_UWAIT_END(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_UWAIT_END(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b)			do {} while(0);
+#define _STARPU_MPI_TRACE_DATA_SET_TAG(a, b)			do {} while(0);
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()		do {} while(0)
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN()		do {} while(0)
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
 #define _STARPU_MPI_TRACE_TESTING_DETACHED_END()		do {} while(0)
 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)		do {} while(0)
 #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag)		do {} while(0)

+ 16 - 0
src/debug/traces/starpu_fxt.c

@@ -281,6 +281,7 @@ struct data_info
 	int home_node;
 	int home_node;
 	int mpi_rank;
 	int mpi_rank;
 	int mpi_owner;
 	int mpi_owner;
+	long mpi_tag;
 };
 };
 
 
 struct data_info *data_info;
 struct data_info *data_info;
@@ -303,6 +304,7 @@ static struct data_info *get_data(unsigned long handle, int mpi_rank)
 		data->home_node = STARPU_MAIN_RAM;
 		data->home_node = STARPU_MAIN_RAM;
 		data->mpi_rank = mpi_rank;
 		data->mpi_rank = mpi_rank;
 		data->mpi_owner = mpi_rank;
 		data->mpi_owner = mpi_rank;
+		data->mpi_tag = -1;
 		HASH_ADD(hh, data_info, handle, sizeof(handle), data);
 		HASH_ADD(hh, data_info, handle, sizeof(handle), data);
 	}
 	}
 	else
 	else
@@ -342,6 +344,8 @@ static void data_dump(struct data_info *data)
 	}
 	}
 	if (data->mpi_owner >= 0)
 	if (data->mpi_owner >= 0)
 		fprintf(data_file, "MPIOwner: %d\n", data->mpi_owner);
 		fprintf(data_file, "MPIOwner: %d\n", data->mpi_owner);
+	if (data->mpi_tag >= 0)
+		fprintf(data_file, "MPITag: %ld\n", data->mpi_tag);
 	fprintf(data_file, "\n");
 	fprintf(data_file, "\n");
 out:
 out:
 	HASH_DEL(data_info, data);
 	HASH_DEL(data_info, data);
@@ -2218,6 +2222,15 @@ static void handle_mpi_data_set_rank(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 	data->mpi_owner = rank;
 	data->mpi_owner = rank;
 }
 }
 
 
+static void handle_mpi_data_set_tag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+	unsigned long handle = ev->param[0];
+	long tag = ev->param[1];
+	struct data_info *data = get_data(handle, options->file_rank);
+
+	data->mpi_tag = tag;
+}
+
 static const char *copy_link_type(unsigned prefetch)
 static const char *copy_link_type(unsigned prefetch)
 {
 {
 	switch (prefetch)
 	switch (prefetch)
@@ -3859,6 +3872,9 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 			case _STARPU_MPI_FUT_DATA_SET_RANK:
 			case _STARPU_MPI_FUT_DATA_SET_RANK:
 				handle_mpi_data_set_rank(&ev, options);
 				handle_mpi_data_set_rank(&ev, options);
 				break;
 				break;
+			case _STARPU_MPI_FUT_DATA_SET_TAG:
+				handle_mpi_data_set_tag(&ev, options);
+				break;
 
 
 			case _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN:
 			case _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN:
 				handle_mpi_testing_detached_begin(&ev, options);
 				handle_mpi_testing_detached_begin(&ev, options);

+ 3 - 2
src/debug/traces/starpu_fxt.h

@@ -59,8 +59,8 @@ void _starpu_fxt_dag_add_sync_point(void);
  */
  */
 
 
 int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key, int *rank);
 int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key, int *rank);
-void _starpu_fxt_mpi_add_send_transfer(int src, int dst, int mpi_tag, size_t size, float date, long jobid);
-void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, int mpi_tag, float date, long jobid);
+void _starpu_fxt_mpi_add_send_transfer(int src, int dst, long mpi_tag, size_t size, float date, long jobid);
+void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, long mpi_tag, float date, long jobid);
 void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file);
 void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file);
 
 
 void _starpu_fxt_write_paje_header(FILE *file, struct starpu_fxt_options *options);
 void _starpu_fxt_write_paje_header(FILE *file, struct starpu_fxt_options *options);
@@ -68,6 +68,7 @@ void _starpu_fxt_write_paje_header(FILE *file, struct starpu_fxt_options *option
 extern int _starpu_poti_extendedSetState;
 extern int _starpu_poti_extendedSetState;
 extern int _starpu_poti_semiExtendedSetState;
 extern int _starpu_poti_semiExtendedSetState;
 extern int _starpu_poti_MemoryEvent;
 extern int _starpu_poti_MemoryEvent;
+extern int _starpu_poti_MpiLinkStart;
 
 
 /*
 /*
  * Animation
  * Animation

+ 12 - 8
src/debug/traces/starpu_fxt_mpi.c

@@ -34,7 +34,7 @@ LIST_TYPE(mpi_transfer,
 	unsigned matched;
 	unsigned matched;
 	int src;
 	int src;
 	int dst;
 	int dst;
-	int mpi_tag;
+	long mpi_tag;
 	size_t size;
 	size_t size;
 	float date;
 	float date;
 	long jobid;
 	long jobid;
@@ -123,7 +123,7 @@ unsigned mpi_recvs_used[MAX_MPI_NODES] = {0};
 unsigned mpi_recvs_matched[MAX_MPI_NODES][MAX_MPI_NODES] = { {0} };
 unsigned mpi_recvs_matched[MAX_MPI_NODES][MAX_MPI_NODES] = { {0} };
 unsigned mpi_sends_matched[MAX_MPI_NODES][MAX_MPI_NODES] = { {0} };
 unsigned mpi_sends_matched[MAX_MPI_NODES][MAX_MPI_NODES] = { {0} };
 
 
-void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, int mpi_tag, size_t size, float date, long jobid)
+void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, long mpi_tag, size_t size, float date, long jobid)
 {
 {
 	STARPU_ASSERT(src >= 0);
 	STARPU_ASSERT(src >= 0);
 	if (src >= MAX_MPI_NODES)
 	if (src >= MAX_MPI_NODES)
@@ -153,7 +153,7 @@ void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 	mpi_sends[src][slot].jobid = jobid;
 	mpi_sends[src][slot].jobid = jobid;
 }
 }
 
 
-void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag, float date, long jobid)
+void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, long mpi_tag, float date, long jobid)
 {
 {
 	if (dst >= MAX_MPI_NODES)
 	if (dst >= MAX_MPI_NODES)
 		return;
 		return;
@@ -182,7 +182,7 @@ void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst,
 }
 }
 
 
 static
 static
-struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag)
+struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, long mpi_tag)
 {
 {
 	unsigned slot;
 	unsigned slot;
 	unsigned firstslot = mpi_recvs_matched[src][dst];
 	unsigned firstslot = mpi_recvs_matched[src][dst];
@@ -280,7 +280,7 @@ static void display_all_transfers_from_trace(FILE *out_paje_file, unsigned n)
 
 
 		cur = &mpi_sends[src][slot[src]];
 		cur = &mpi_sends[src][slot[src]];
 		int dst = cur->dst;
 		int dst = cur->dst;
-		int mpi_tag = cur->mpi_tag;
+		long mpi_tag = cur->mpi_tag;
 		size_t size = cur->size;
 		size_t size = cur->size;
 
 
 		if (dst < MAX_MPI_NODES)
 		if (dst < MAX_MPI_NODES)
@@ -323,16 +323,20 @@ static void display_all_transfers_from_trace(FILE *out_paje_file, unsigned n)
 			snprintf(paje_value, sizeof(paje_value), "%lu", (long unsigned) size);
 			snprintf(paje_value, sizeof(paje_value), "%lu", (long unsigned) size);
 			snprintf(paje_key, sizeof(paje_key), "mpicom_%lu", id);
 			snprintf(paje_key, sizeof(paje_key), "mpicom_%lu", id);
 			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", src);
 			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", src);
-			poti_StartLink(start_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key);
+
+			char str_mpi_tag[STARPU_POTI_STR_LEN];
+			snprintf(str_mpi_tag, sizeof(str_mpi_tag), "%ld", mpi_tag);
+			poti_user_StartLink(_starpu_poti_MpiLinkStart, start_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key, 1, str_mpi_tag);
+
 			poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_out_bandwidth[src]);
 			poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_out_bandwidth[src]);
 			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", dst);
 			snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", dst);
 			poti_EndLink(end_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key);
 			poti_EndLink(end_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key);
 			poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_in_bandwidth[dst]);
 			poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_in_bandwidth[dst]);
 #else
 #else
-			fprintf(out_paje_file, "18	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", start_date, (unsigned long)size, src, id);
-			fprintf(out_paje_file, "19	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", end_date, (unsigned long)size, dst, id);
 			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwo_mpi	%f\n", start_date, src, current_out_bandwidth[src]);
 			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwo_mpi	%f\n", start_date, src, current_out_bandwidth[src]);
 			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwi_mpi	%f\n", start_date, dst, current_in_bandwidth[dst]);
 			fprintf(out_paje_file, "13	%.9f	%d_mpict	bwi_mpi	%f\n", start_date, dst, current_in_bandwidth[dst]);
+			fprintf(out_paje_file, "23	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu	%ld\n", start_date, (unsigned long)size, src, id, mpi_tag);
+			fprintf(out_paje_file, "19	%.9f	MPIL	MPIroot	%lu	%d_mpict	mpicom_%lu\n", end_date, (unsigned long)size, dst, id);
 #endif
 #endif
 		}
 		}
 		else
 		else

+ 20 - 0
src/debug/traces/starpu_paje.c

@@ -30,6 +30,7 @@
 int _starpu_poti_extendedSetState = -1;
 int _starpu_poti_extendedSetState = -1;
 int _starpu_poti_semiExtendedSetState = -1;
 int _starpu_poti_semiExtendedSetState = -1;
 int _starpu_poti_MemoryEvent = -1;
 int _starpu_poti_MemoryEvent = -1;
+int _starpu_poti_MpiLinkStart = -1;
 #endif
 #endif
 #endif
 #endif
 
 
@@ -72,6 +73,7 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED, struct st
 							     "Size string",
 							     "Size string",
 							     "Dest string");
 							     "Dest string");
 	}
 	}
+	_starpu_poti_MpiLinkStart = poti_header_DeclareEvent(PAJE_StartLink, 1, "MPITAG string");
 #endif
 #endif
 #else
 #else
 	poti_header(1,1);
 	poti_header(1,1);
@@ -222,6 +224,24 @@ void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED, struct st
 		fprintf(file, "%%	Tid	string\n");
 		fprintf(file, "%%	Tid	string\n");
 		fprintf(file, "%%EndEventDef\n");
 		fprintf(file, "%%EndEventDef\n");
 	}
 	}
+	fprintf(file, "%%EventDef	PajeStartLink	23\n");
+	fprintf(file, "%%	Time	date\n");
+	fprintf(file, "%%	Type	string\n");
+	fprintf(file, "%%	Container	string\n");
+	fprintf(file, "%%	Value	string\n");
+	fprintf(file, "%%	StartContainer	string\n");
+	fprintf(file, "%%	Key	string\n");
+	fprintf(file, "%%	MPITAG	string\n");
+	fprintf(file, "%%EndEventDef\n");
+	fprintf(file, "%%EventDef	PajeEndLink	24\n");
+	fprintf(file, "%%	Time	date\n");
+	fprintf(file, "%%	Type	string\n");
+	fprintf(file, "%%	Container	string\n");
+	fprintf(file, "%%	Value	string\n");
+	fprintf(file, "%%	EndContainer	string\n");
+	fprintf(file, "%%	Key	string\n");
+	fprintf(file, "%%	MPITAG	string\n");
+	fprintf(file, "%%EndEventDef\n");
 #endif
 #endif
 
 
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI