Selaa lähdekoodia

add a -display flag

Cédric Augonnet 15 vuotta sitten
vanhempi
commit
d247fadc97

+ 13 - 6
mpi/examples/mpi_lu/plu_example.c

@@ -29,6 +29,7 @@ static unsigned nblocks = 16;
 static unsigned check = 0;
 static unsigned p = 1;
 static unsigned q = 1;
+static unsigned display = 0;
 
 static starpu_data_handle *dataA_handles;
 static TYPE **dataA;
@@ -60,6 +61,10 @@ static void parse_args(int argc, char **argv)
 			check = 1;
 		}
 
+		if (strcmp(argv[i], "-display") == 0) {
+			display = 1;
+		}
+
 		if (strcmp(argv[i], "-p") == 0) {
 			char *argptr;
 			p = strtol(argv[++i], &argptr, 10);
@@ -72,6 +77,11 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
+unsigned STARPU_PLU(display_flag)(void)
+{
+	return display;
+}
+
 static void fill_block_with_random(TYPE *blockptr, unsigned size, unsigned nblocks)
 {
 	const unsigned block_size = (size/nblocks);
@@ -101,8 +111,6 @@ starpu_data_handle STARPU_PLU(get_tmp_21_block_handle)(unsigned i)
 
 static void init_matrix(int rank)
 {
-	fprintf(stderr, "INIT MATRIX on node %d\n", rank);
-
 	/* Allocate a grid of data handles, not all of them have to be allocated later on */
 	dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle));
 	dataA = calloc(nblocks*nblocks, sizeof(TYPE *));
@@ -202,6 +210,9 @@ starpu_data_handle STARPU_PLU(get_block_handle)(unsigned i, unsigned j)
 
 static void display_grid(int rank, unsigned nblocks)
 {
+	if (!display)
+		return;
+
 	//if (rank == 0)
 	{
 		fprintf(stderr, "2D grid layout (Rank %d): \n", rank);
@@ -279,16 +290,12 @@ int main(int argc, char **argv)
 		if (rank == 0)
 			STARPU_PLU(display_data_content)(a_r, size);
 
-		fprintf(stderr, "COMPUTE AX on node %d \n", rank);
 //		STARPU_PLU(compute_ax)(size, x, y, nblocks, rank);
-
-		fprintf(stderr, "COMPUTE AX on node %d AFTER\n", rank);
 	}
 
 	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
 	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
 
-	fprintf(stderr, "GO for main on node %d\n", rank);
 	double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size);
 
 	/*

+ 10 - 2
mpi/examples/mpi_lu/plu_solve.c

@@ -41,6 +41,9 @@ static double frobenius_norm(TYPE *v, unsigned n)
 
 void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize)
 {
+	if (!STARPU_PLU(display_flag)())
+		return;
+
 	fprintf(stderr, "DISPLAY BLOCK\n");
 
 	unsigned i, j;
@@ -324,6 +327,8 @@ void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved
 {
 	TYPE *all_r = STARPU_PLU(reconstruct_matrix)(size, nblocks);
 
+	unsigned display = STARPU_PLU(display_flag)();
+
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
@@ -360,7 +365,9 @@ void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved
 	        /* now A_err = L, compute L*U */
 	        CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
 	
-		fprintf(stderr, "\nLU\n");
+		if (display)
+			fprintf(stderr, "\nLU\n");
+
 		STARPU_PLU(display_data_content)(L, size);
 	
 	        /* compute "LU - A" in L*/
@@ -369,7 +376,8 @@ void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved
 	        TYPE err = CPU_ASUM(size*size, L, 1);
 	        int max = CPU_IAMAX(size*size, L, 1);
 	
-		fprintf(stderr, "DISPLAY ERROR\n");
+		if (display)
+			fprintf(stderr, "DISPLAY ERROR\n");
 
 		STARPU_PLU(display_data_content)(L, size);
 	

+ 2 - 2
mpi/examples/mpi_lu/pxlu.c

@@ -684,7 +684,7 @@ static void wait_termination(void)
 			{
 				starpu_data_handle block21 = STARPU_PLU(get_block_handle)(i, k);
 				//starpu_data_handle block21 = STARPU_PLU(get_block_handle)(k, i);
-				fprintf(stderr, "BLOCK21 i %d k %d -> handle %p\n", i, k, block21);
+				//fprintf(stderr, "BLOCK21 i %d k %d -> handle %p\n", i, k, block21);
 				wait_tag_and_fetch_handle(TAG21_SAVE(k, i), block21);
 			}
 		}
@@ -696,7 +696,7 @@ static void wait_termination(void)
 			{
 				//starpu_data_handle block12 = STARPU_PLU(get_block_handle)(j, k);
 				starpu_data_handle block12 = STARPU_PLU(get_block_handle)(k, j);
-				fprintf(stderr, "BLOCK12 j %d k %d -> handle %p\n", j, k, block12);
+				//fprintf(stderr, "BLOCK12 j %d k %d -> handle %p\n", j, k, block12);
 				wait_tag_and_fetch_handle(TAG12_SAVE(k, j), block12);
 			}
 		}

+ 2 - 0
mpi/examples/mpi_lu/pxlu.h

@@ -33,6 +33,8 @@ double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size);
 TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks);
 void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved);
 
+unsigned STARPU_PLU(display_flag)(void);
+
 void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank);
 void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank);
 starpu_data_handle STARPU_PLU(get_block_handle)(unsigned i, unsigned j);

+ 18 - 7
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -18,6 +18,8 @@
 #include "pxlu_kernels.h"
 #include <math.h>
 
+//#define VERBOSE_KERNELS	1
+
 /*
  *   U22 
  */
@@ -37,9 +39,11 @@ static inline void STARPU_PLU(common_u22)(void *descr[],
 	unsigned ld21 = GET_BLAS_LD(descr[1]);
 	unsigned ld22 = GET_BLAS_LD(descr[2]);
 
+#ifdef VERBOSE_KERNELS
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	//fprintf(stderr, "KERNEL 22 %d\n", rank);
+	fprintf(stderr, "KERNEL 22 %d\n", rank);
+#endif
 
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -127,19 +131,21 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 	unsigned nx12 = GET_BLAS_NX(descr[1]);
 	unsigned ny12 = GET_BLAS_NY(descr[1]);
 
+#ifdef VERBOSE_KERNELS
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	fprintf(stderr, "KERNEL 12 %d\n", rank);
 
-#ifdef USE_CUDA
-	cublasStatus status;
-	cudaError_t cures;
-#endif
-
 	fprintf(stderr, "INPUT 12 U11\n");
 	STARPU_PLU(display_data_content)(sub11, nx12);
 	fprintf(stderr, "INPUT 12 U12\n");
 	STARPU_PLU(display_data_content)(sub12, nx12);
+#endif
+
+#ifdef USE_CUDA
+	cublasStatus status;
+	cudaError_t cures;
+#endif
 
 	/* solve L11 U12 = A12 (find U12) */
 	switch (s) {
@@ -166,8 +172,10 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 			break;
 	}
 
+#ifdef VERBOSE_KERNELS
 	fprintf(stderr, "OUTPUT 12 U12\n");
 	STARPU_PLU(display_data_content)(sub12, nx12);
+#endif
 }
 
 static void STARPU_PLU(cpu_u12)(void *descr[], void *_args)
@@ -223,6 +231,7 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 	unsigned nx21 = GET_BLAS_NX(descr[1]);
 	unsigned ny21 = GET_BLAS_NY(descr[1]);
 	
+#ifdef VERBOSE_KERNELS
 	int rank;
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	fprintf(stderr, "KERNEL 21 %d \n", rank);
@@ -231,6 +240,7 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 	STARPU_PLU(display_data_content)(sub11, nx21);
 	fprintf(stderr, "INPUT 21 U21\n");
 	STARPU_PLU(display_data_content)(sub21, nx21);
+#endif
 
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -261,11 +271,12 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 			break;
 	}
 
+#ifdef VERBOSE_KERNELS
 	fprintf(stderr, "OUTPUT 21 U11\n");
 	STARPU_PLU(display_data_content)(sub11, nx21);
 	fprintf(stderr, "OUTPUT 21 U21\n");
 	STARPU_PLU(display_data_content)(sub21, nx21);
-
+#endif
 }
 
 static void STARPU_PLU(cpu_u21)(void *descr[], void *_args)