Browse Source

Integrate bound in lu output

Samuel Thibault 10 years ago
parent
commit
ed082a5e41

+ 3 - 14
examples/lu/lu_example.c

@@ -30,9 +30,9 @@ static unsigned check = 0;
 static unsigned pivot = 0;
 static unsigned no_stride = 0;
 static unsigned profile = 0;
-static unsigned bound = 0;
-static unsigned bounddeps = 0;
-static unsigned boundprio = 0;
+unsigned bound = 0;
+unsigned bounddeps = 0;
+unsigned boundprio = 0;
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
@@ -326,9 +326,6 @@ int main(int argc, char **argv)
 
 	display_matrix(A, size, size, "A");
 
-	if (bound)
-		starpu_bound_start(bounddeps, boundprio);
-
 	if (profile)
 		starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
 
@@ -382,8 +379,6 @@ int main(int argc, char **argv)
 	if (bound)
 	{
 		double min;
-		FPRINTF(stderr, "Setting bound\n");
-		starpu_bound_stop();
 		if (bounddeps)
 		{
 			FILE *f = fopen("lu.pl", "w");
@@ -399,12 +394,6 @@ int main(int argc, char **argv)
 			FPRINTF(stderr,"system printed to lu.mps\n");
 			fclose(f);
 		}
-		else
-		{
-			starpu_bound_compute(&min, NULL, 0);
-			if (min != 0.)
-				FPRINTF(stderr, "theoretical min: %f ms\n", min);
-		}
 	}
 
 #ifndef STARPU_SIMGRID

+ 18 - 2
examples/lu/xlu.c

@@ -178,6 +178,9 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	/* create all the DAG nodes */
 	unsigned i,j,k;
 
+	if (bound)
+		starpu_bound_start(bounddeps, boundprio);
+
 	for (k = 0; k < nblocks; k++)
 	{
 		struct starpu_task *task = create_task_11(dataA, k);
@@ -223,12 +226,25 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 	end = starpu_timing_now();
 
+	if (bound)
+		starpu_bound_stop();
+
 	double timing = end - start;
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 
-	PRINTF("# size\tms\tGFlops\n");
-	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f);
+	PRINTF("# size\tms\tGFlops");
+	if (bound)
+		PRINTF("\tTms\tTGFlops");
+	PRINTF("\n");
+	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
+	if (bound)
+	{
+		double min;
+		starpu_bound_compute(&min, NULL, 0);
+		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
+	}
+	PRINTF("\n");
 
 	return 0;
 }

+ 3 - 0
examples/lu/xlu.h

@@ -111,6 +111,9 @@ extern struct starpu_perfmodel model_11;
 extern struct starpu_perfmodel model_12;
 extern struct starpu_perfmodel model_21;
 extern struct starpu_perfmodel model_22;
+extern unsigned bound;
+extern unsigned bounddeps;
+extern unsigned boundprio;
 
 struct piv_s
 {

+ 18 - 2
examples/lu/xlu_implicit.c

@@ -117,6 +117,9 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 	/* create all the DAG nodes */
 	unsigned i,j,k;
 
+	if (bound)
+		starpu_bound_start(bounddeps, boundprio);
+
 	start = starpu_timing_now();
 
 	for (k = 0; k < nblocks; k++)
@@ -144,12 +147,25 @@ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
 
 	end = starpu_timing_now();
 
+	if (bound)
+		starpu_bound_stop();
+
 	double timing = end - start;
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 
-	PRINTF("# size\tms\tGFlops\n");
-	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f);
+	PRINTF("# size\tms\tGFlops");
+	if (bound)
+		PRINTF("\tTms\tTGFlops");
+	PRINTF("\n");
+	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
+	if (bound)
+	{
+		double min;
+		starpu_bound_compute(&min, NULL, 0);
+		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
+	}
+	PRINTF("\n");
 
 	return 0;
 }

+ 30 - 4
examples/lu/xlu_implicit_pivot.c

@@ -162,6 +162,9 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 	/* create all the DAG nodes */
 	unsigned i,j,k;
 
+	if (bound)
+		starpu_bound_start(bounddeps, boundprio);
+
 	start = starpu_timing_now();
 
 	for (k = 0; k < nblocks; k++)
@@ -199,6 +202,9 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 	end = starpu_timing_now();
 
+	if (bound)
+		starpu_bound_stop();
+
 	*timing = end - start;
 	return 0;
 }
@@ -254,8 +260,18 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 
-	PRINTF("# size\tms\tGFlops\n");
-	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f);
+	PRINTF("# size\tms\tGFlops");
+	if (bound)
+		PRINTF("\tTms\tTGFlops");
+	PRINTF("\n");
+	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
+	if (bound)
+	{
+		double min;
+		starpu_bound_compute(&min, NULL, 0);
+		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
+	}
+	PRINTF("\n");
 
 
 	/* gather all the data */
@@ -309,8 +325,18 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
 	double flop = (2.0f*n*n*n)/3.0f;
 
-	PRINTF("# size\tms\tGFlops\n");
-	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f);
+	PRINTF("# size\tms\tGFlops");
+	if (bound)
+		PRINTF("\tTms\tTGFlops");
+	PRINTF("\n");
+	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
+	if (bound)
+	{
+		double min;
+		starpu_bound_compute(&min, NULL, 0);
+		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
+	}
+	PRINTF("\n");
 
 
 	for (bj = 0; bj < nblocks; bj++)

+ 30 - 4
examples/lu/xlu_pivot.c

@@ -240,6 +240,9 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 	/* create all the DAG nodes */
 	unsigned i,j,k;
 
+	if (bound)
+		starpu_bound_start(bounddeps, boundprio);
+
 	for (k = 0; k < nblocks; k++)
 	{
 		struct starpu_task *task = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
@@ -309,6 +312,9 @@ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
 
 	end = starpu_timing_now();
 
+	if (bound)
+		starpu_bound_stop();
+
 	*timing = end - start;
 	return 0;
 }
@@ -374,8 +380,18 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 	unsigned n = starpu_matrix_get_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 
-	PRINTF("# size\tms\tGFlops\n");
-	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f);
+	PRINTF("# size\tms\tGFlops");
+	if (bound)
+		PRINTF("\tTms\tTGFlops");
+	PRINTF("\n");
+	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
+	if (bound)
+	{
+		double min;
+		starpu_bound_compute(&min, NULL, 0);
+		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
+	}
+	PRINTF("\n");
 
 	/* gather all the data */
 	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
@@ -428,8 +444,18 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 	unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks;
 	double flop = (2.0f*n*n*n)/3.0f;
 
-	PRINTF("# size\tms\tGFlops\n");
-	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f);
+	PRINTF("# size\tms\tGFlops");
+	if (bound)
+		PRINTF("\tTms\tTGFlops");
+	PRINTF("\n");
+	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
+	if (bound)
+	{
+		double min;
+		starpu_bound_compute(&min, NULL, 0);
+		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
+	}
+	PRINTF("\n");
 
 	for (bj = 0; bj < nblocks; bj++)
 	for (bi = 0; bi < nblocks; bi++)