ソースを参照

The performance of the MPI Cholesky kernel is obtained by taking the
length of the longest process. We add barriers to ensure that the
measurement starts at the same time too.

Cédric Augonnet 14 年 前
コミット
689896a8ef
共有2 個のファイルを変更した38 個の追加15 個の削除を含む
  1. 20 8
      mpi/examples/cholesky/mpi_cholesky.c
  2. 18 7
      mpi/examples/cholesky/mpi_cholesky_distributed.c

+ 20 - 8
mpi/examples/cholesky/mpi_cholesky.c

@@ -79,7 +79,6 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         data_handles = malloc(nblocks*sizeof(starpu_data_handle *));
         for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle));
 
-	gettimeofday(&start, NULL);
         for(x = 0; x < nblocks ;  x++) {
                 for (y = 0; y < nblocks; y++) {
                         int mpi_rank = my_distrib(x, y, nodes);
@@ -103,6 +102,9 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
                 }
         }
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+	gettimeofday(&start, NULL);
+
 	for (k = 0; k < nblocks; k++)
         {
                 int prio = STARPU_DEFAULT_PRIO;
@@ -151,14 +153,18 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         }
 	free(data_handles);
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
 	gettimeofday(&end, NULL);
 
-	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-	fprintf(stderr, "[%d] Computation took (in ms)\n", rank);
-	fprintf(stdout, "%2.2f\n", timing/1000);
-
-	double flop = (1.0f*size*size*size)/3.0f;
-	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	if (rank == 0)
+	{
+		double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+		fprintf(stderr, "Computation took (in ms)\n");
+		fprintf(stdout, "%2.2f\n", timing/1000);
+	
+		double flop = (1.0f*size*size*size)/3.0f;
+		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	}
 }
 
 int main(int argc, char **argv)
@@ -173,7 +179,13 @@ int main(int argc, char **argv)
 
 	parse_args(argc, argv);
 
-	starpu_init(NULL);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	
+	conf.sched_policy_name = "heft";
+	conf.calibrate = 1;
+
+	starpu_init(&conf);
 	starpu_mpi_initialize_extended(&rank, &nodes);
 	starpu_helper_cublas_init();
 

+ 18 - 7
mpi/examples/cholesky/mpi_cholesky_distributed.c

@@ -79,6 +79,7 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         data_handles = malloc(nblocks*sizeof(starpu_data_handle *));
         for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle));
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
 	gettimeofday(&start, NULL);
         for(x = 0; x < nblocks ;  x++) {
                 for (y = 0; y < nblocks; y++) {
@@ -151,14 +152,18 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         }
 	free(data_handles);
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
 	gettimeofday(&end, NULL);
 
-	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-	fprintf(stderr, "[%d] Computation took (in ms)\n", rank);
-	fprintf(stdout, "%2.2f\n", timing/1000);
-
-	double flop = (1.0f*size*size*size)/3.0f;
-	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	if (rank == 0)
+	{
+		double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+		fprintf(stderr, "Computation took (in ms)\n");
+		fprintf(stdout, "%2.2f\n", timing/1000);
+	
+		double flop = (1.0f*size*size*size)/3.0f;
+		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	}
 }
 
 int main(int argc, char **argv)
@@ -173,7 +178,13 @@ int main(int argc, char **argv)
 
 	parse_args(argc, argv);
 
-	starpu_init(NULL);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+
+	conf.sched_policy_name = "heft";
+	conf.calibrate = 1;
+
+	starpu_init(&conf);
 	starpu_mpi_initialize_extended(&rank, &nodes);
 	starpu_helper_cublas_init();