Browse Source

add some debug messages in the MPI lib

Cédric Augonnet 15 years ago
parent
commit
dc7a276f27
4 changed files with 131 additions and 25 deletions
  1. 54 17
      mpi/examples/mpi_lu/pxlu.c
  2. 6 0
      mpi/examples/mpi_lu/pxlu.h
  3. 15 5
      mpi/examples/mpi_lu/pxlu_kernels.c
  4. 56 3
      mpi/starpu_mpi.c

+ 54 - 17
mpi/examples/mpi_lu/pxlu.c

@@ -18,8 +18,8 @@
 #include "pxlu_kernels.h"
 
 #define MPI_TAG11(k)	((1U << 16) | (k))
-#define MPI_TAG12(k, j)	((2U << 16) | (j)*nblocks | (k))
-#define MPI_TAG21(k, i)	((3U << 16) | (i)*nblocks | (k))
+#define MPI_TAG12(k, j)	((2U << 16) | (j)<<8 | (k))
+#define MPI_TAG21(k, i)	((3U << 16) | (i)<<8 | (k))
 
 #define TAG11(k)	((starpu_tag_t)( (1ULL<<50) | (unsigned long long)(k)))
 #define TAG12(k,i)	((starpu_tag_t)(((2ULL<<50) | (((unsigned long long)(k))<<32)	\
@@ -43,6 +43,9 @@
 
 #define STARPU_TAG_INIT	((starpu_tag_t)(11ULL<<50))
 
+//#define VERBOSE_INIT	1
+
+#define DEBUG	1
 
 static unsigned no_prio = 0;
 
@@ -58,6 +61,17 @@ struct callback_arg {
  *	Various
  */
 
+static struct debug_info *create_debug_info(unsigned i, unsigned j, unsigned k)
+{
+	struct debug_info *info = malloc(sizeof(struct debug_info));
+
+	info->i = i;
+	info->j = j;
+	info->k = k;
+
+	return info;
+}
+
 static struct starpu_task *create_task(starpu_tag_t id)
 {
 	struct starpu_task *task = starpu_task_create();
@@ -182,7 +196,6 @@ static void create_task_11_recv(unsigned k)
 			tag_array[ndeps++] = TAG12(k-1, j);
 	}
 	
-	
 	int source = get_block_rank(k, k);
 	starpu_data_handle block_handle = STARPU_PLU(get_tmp_11_block_handle)();
 	int mpi_tag = MPI_TAG11(k);
@@ -239,6 +252,8 @@ static void create_task_11_real(unsigned k)
 
 	task->cl = &STARPU_PLU(cl11);
 
+	task->cl_arg = create_debug_info(k, k, k);
+
 	/* which sub-data is manipulated ? */
 	task->buffers[0].handle = STARPU_PLU(get_block_handle)(k, k);
 	task->buffers[0].mode = STARPU_RW;
@@ -268,7 +283,9 @@ static void create_task_11(unsigned k)
 {
 	if (get_block_rank(k, k) == rank)
 	{
-//		fprintf(stderr, "CREATE real task 11(%d) (TAG11_SAVE(%d) = %lx) on node %d\n", k, k, TAG11_SAVE(k), rank);
+#ifdef VERBOSE_INIT
+		fprintf(stderr, "CREATE real task 11(%d) (TAG11_SAVE(%d) = %lx) on node %d\n", k, k, TAG11_SAVE(k), rank);
+#endif
 		create_task_11_real(k);
 	}
 	else {
@@ -278,11 +295,15 @@ static void create_task_11(unsigned k)
 		
 		if (rank_mask[rank])
 		{
-//			fprintf(stderr, "create RECV task 11(%d) on node %d\n", k, rank);
+#ifdef VERBOSE_INIT
+			fprintf(stderr, "create RECV task 11(%d) on node %d\n", k, rank);
+#endif
 			create_task_11_recv(k);
 		}
 		else {
-//			fprintf(stderr, "Node %d needs not 11(%d)\n", rank, k);
+#ifdef VERBOSE_INIT
+			fprintf(stderr, "Node %d needs not 11(%d)\n", rank, k);
+#endif
 		}
 	}
 }
@@ -365,6 +386,8 @@ static void create_task_12_real(unsigned k, unsigned j)
 //	task->cl = &STARPU_PLU(cl12);
 	task->cl = &STARPU_PLU(cl21);
 
+	task->cl_arg = create_debug_info(j, j, k);
+
 	int myrank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
 
@@ -414,7 +437,9 @@ static void create_task_12(unsigned k, unsigned j)
 {
 	if (get_block_rank(k, j) == rank)
 	{
-//		fprintf(stderr, "CREATE real task 12(k = %d, j = %d) on node %d\n", k, j, rank);
+#ifdef VERBOSE_INIT
+		fprintf(stderr, "CREATE real task 12(k = %d, j = %d) on node %d\n", k, j, rank);
+#endif
 		create_task_12_real(k, j);
 	}
 	else {
@@ -424,12 +449,16 @@ static void create_task_12(unsigned k, unsigned j)
 		
 		if (rank_mask[rank])
 		{
-//			fprintf(stderr, "create RECV task 12(k = %d, j = %d) on node %d\n", k, j, rank);
+#ifdef VERBOSE_INIT
+			fprintf(stderr, "create RECV task 12(k = %d, j = %d) on node %d\n", k, j, rank);
+#endif
 			create_task_12_recv(k, j);
 		}
-//		else {
-//			fprintf(stderr, "Node %d needs not 12(k=%d, i=%d)\n", rank, k, j);
-//		}
+		else {
+#ifdef VERBOSE_INIT
+			fprintf(stderr, "Node %d needs not 12(k=%d, i=%d)\n", rank, k, j);
+#endif
+		}
 	}
 }
 
@@ -456,7 +485,7 @@ static void create_task_21_recv(unsigned k, unsigned i)
 		if (rank == get_block_rank(i, j))
 			tag_array[ndeps++] = TAG22(k-1, i, j);
 	}
-	
+
 	int source = get_block_rank(i, k);
 	starpu_data_handle block_handle = STARPU_PLU(get_tmp_21_block_handle)(i);
 	int mpi_tag = MPI_TAG21(k, i);
@@ -508,6 +537,8 @@ static void create_task_21_real(unsigned k, unsigned i)
 #warning temporary fix 
 //	task->cl = &STARPU_PLU(cl21);
 	task->cl = &STARPU_PLU(cl12);
+
+	task->cl_arg = create_debug_info(i, i, k);
 	
 	/* which sub-data is manipulated ? */
 	starpu_data_handle diag_block;
@@ -553,7 +584,9 @@ static void create_task_21(unsigned k, unsigned i)
 {
 	if (get_block_rank(i, k) == rank)
 	{
-	//	fprintf(stderr, "CREATE real task 21(k = %d, i = %d) on node %d\n", k, i, rank);
+#ifdef VERBOSE_INIT
+		fprintf(stderr, "CREATE real task 21(k = %d, i = %d) on node %d\n", k, i, rank);
+#endif
 		create_task_21_real(k, i);
 	}
 	else {
@@ -563,12 +596,16 @@ static void create_task_21(unsigned k, unsigned i)
 		
 		if (rank_mask[rank])
 		{
-	//		fprintf(stderr, "create RECV task 21(k = %d, i = %d) on node %d\n", k, i, rank);
+#ifdef VERBOSE_INIT
+			fprintf(stderr, "create RECV task 21(k = %d, i = %d) on node %d\n", k, i, rank);
+#endif
 			create_task_21_recv(k, i);
 		}
-//		else {
-//			fprintf(stderr, "Node %d needs not 21(k=%d, i=%d)\n", rank, k,i);
-//		}
+		else {
+#ifdef VERBOSE_INIT
+			fprintf(stderr, "Node %d needs not 21(k=%d, i=%d)\n", rank, k,i);
+#endif
+		}
 	}
 }
 

+ 6 - 0
mpi/examples/mpi_lu/pxlu.h

@@ -28,6 +28,12 @@
 #define BLAS3_FLOP(n1,n2,n3)    \
         (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3))
 
+struct debug_info {
+	unsigned i;
+	unsigned j;
+	unsigned k;
+};
+
 double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
 
 TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);

+ 15 - 5
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -18,7 +18,7 @@
 #include "pxlu_kernels.h"
 #include <math.h>
 
-//#define VERBOSE_KERNELS	1
+#define VERBOSE_KERNELS	1
 
 /*
  *   U22 
@@ -40,9 +40,11 @@ static inline void STARPU_PLU(common_u22)(void *descr[],
 	unsigned ld22 = GET_BLAS_LD(descr[2]);
 
 #ifdef VERBOSE_KERNELS
+	struct debug_info *info = _args;
+
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 22 %d\n", rank);
+	fprintf(stderr, "KERNEL 22 %d - k = %d i = %d j = %d\n", rank, info->k, info->i, info->j);
 #endif
 
 #ifdef USE_CUDA
@@ -132,9 +134,11 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 	unsigned ny12 = GET_BLAS_NY(descr[1]);
 
 #ifdef VERBOSE_KERNELS
+	struct debug_info *info = _args;
+
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 12 %d\n", rank);
+	fprintf(stderr, "KERNEL 12 %d - k = %d i %d\n", rank, info->k, info->i);
 
 	fprintf(stderr, "INPUT 12 U11\n");
 	STARPU_PLU(display_data_content)(sub11, nx12);
@@ -232,9 +236,11 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 	unsigned ny21 = GET_BLAS_NY(descr[1]);
 	
 #ifdef VERBOSE_KERNELS
+	struct debug_info *info = _args;
+
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	fprintf(stderr, "KERNEL 21 %d \n", rank);
+	fprintf(stderr, "KERNEL 21 %d (k = %d, i = %d)\n", rank, info->k, info->i);
 
 	fprintf(stderr, "INPUT 21 U11\n");
 	STARPU_PLU(display_data_content)(sub11, nx21);
@@ -329,9 +335,13 @@ static inline void STARPU_PLU(common_u11)(void *descr[],
 
 	unsigned long z;
 
+#ifdef VERBOSE_KERNELS
+	struct debug_info *info = _args;
+
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-//	fprintf(stderr, "KERNEL 11 %d\n", rank);
+	fprintf(stderr, "KERNEL 11 %d - k = %d\n", rank, info->k);
+#endif
 
 	switch (s) {
 		case 0:

+ 56 - 3
mpi/starpu_mpi.c

@@ -19,6 +19,8 @@
 #include <starpu_mpi_datatype.h>
 #include <starpu_mpi_private.h>
 
+#define VERBOSE_STARPU_MPI	1
+
 /* TODO find a better way to select the polling method (perhaps during the
  * configuration) */
 #define USE_STARPU_ACTIVITY	1
@@ -45,8 +47,17 @@ static int running = 0;
 static void starpu_mpi_isend_func(struct starpu_mpi_req_s *req)
 {
 	void *ptr = starpu_mpi_handle_to_ptr(req->data_handle);
+
+#ifdef VERBOSE_STARPU_MPI
+	int rank;
+        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+	fprintf(stderr, "<<<< STARPU MPI >>>> : Rank %d post MPI isend tag %x dst %d ptr %p req %p\n", rank, req->mpi_tag, req->srcdst, ptr, &req->request);
+#endif
+
 	starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
 
+	//MPI_Isend(ptr, 1, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
 	MPI_Isend(ptr, 1, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
 
 	TRACE_MPI_ISEND(req->srcdst, req->mpi_tag, 0);
@@ -136,8 +147,17 @@ int starpu_mpi_isend_detached(starpu_data_handle data_handle,
 static void starpu_mpi_irecv_func(struct starpu_mpi_req_s *req)
 {
 	void *ptr = starpu_mpi_handle_to_ptr(req->data_handle);
+	STARPU_ASSERT(ptr);
+
 	starpu_mpi_handle_to_datatype(req->data_handle, &req->datatype);
 
+#ifdef VERBOSE_STARPU_MPI
+	int rank;
+        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+	fprintf(stderr, "<<<< STARPU MPI >>>> : Rank %d post MPI irecv tag %x src %d ptr %p req %p datatype %d\n", rank, req->mpi_tag, req->srcdst, ptr, &req->request, req->datatype);
+#endif
+
 	MPI_Irecv(ptr, 1, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
 
 	/* somebody is perhaps waiting for the MPI request to be posted */
@@ -314,6 +334,7 @@ static void starpu_mpi_test_func(struct starpu_mpi_req_s *testing_req)
 	/* Which is the mpi request we are testing for ? */
 	struct starpu_mpi_req_s *req = testing_req->other_request;
 
+//	fprintf(stderr, "<<<< STARPU MPI >>>> Test request %p - mpitag %x - TYPE %s %d\n", &req->request, req->mpi_tag, (req->request_type == RECV_REQ)?"recv : source":"send : dest", req->srcdst);
 	int ret = MPI_Test(&req->request, testing_req->flag, testing_req->status);
 
 	if (*testing_req->flag)
@@ -391,6 +412,13 @@ static void handle_request_termination(struct starpu_mpi_req_s *req)
 	MPI_Type_free(&req->datatype);
 	starpu_release_data_from_mem(req->data_handle);
 
+#ifdef VERBOSE_STARPU_MPI
+	int rank;
+        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+	fprintf(stderr, "<<<< STARPU MPI >>>> Rank %d : complete MPI (%s %d) req %p - tag %x\n", rank, (req->request_type == RECV_REQ)?"recv : source":"send : dest", req->srcdst, &req->request, req->mpi_tag);
+#endif
+
 	if (req->request_type == RECV_REQ)
 	{
 		TRACE_MPI_IRECV_END(req->srcdst, req->mpi_tag);
@@ -457,9 +485,27 @@ static void test_detached_requests(void)
 
 		pthread_mutex_unlock(&detached_requests_mutex);
 
-		MPI_Test(&req->request, &flag, &status);
+		int ret = MPI_Test(&req->request, &flag, &status);
+		STARPU_ASSERT(ret == MPI_SUCCESS);
+
+
+#ifdef VERBOSE_STARPU_MPI
+//		if ((req->mpi_tag == 0x20003) || (req->mpi_tag == 0x30003))
+//		if (0)
+		if ((req->mpi_tag == 0x20003))
+		{
+			int rank;
+		        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+			fprintf(stderr, "<<<< STARPU MPI >>>> Rank %d Test request %p - mpitag %x - TYPE %s %d\n", rank, &req->request, req->mpi_tag, (req->request_type == RECV_REQ)?"recv : source":"send : dest", req->srcdst);
+		}
+#endif
+
+
 		if (flag)
+		{
 			handle_request_termination(req);
+		}
 
 		pthread_mutex_lock(&detached_requests_mutex);
 
@@ -468,8 +514,8 @@ static void test_detached_requests(void)
 
 #warning TODO fix memleak
 		/* Detached requests are automatically allocated by the lib */
-//		if (req->detached)
-//			free(req);
+		//if (req->detached)
+		//	free(req);
 	}
 	
 	pthread_mutex_unlock(&detached_requests_mutex);
@@ -516,7 +562,14 @@ static void *progress_thread_func(void *arg __attribute__((unused)))
 #endif
 
 		if (block)
+		{
+//			int rank;
+//		        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+//			if (rank == 3)
+//			fprintf(stderr, "<<<< STARPU MPI >>>> Rank %d NO MORE REQUESTS TO HANDLE\n", rank);
+
 			pthread_cond_wait(&cond, &mutex);
+		}
 
 		if (!running)
 			break;