浏览代码

The performance of the MPI Cholesky kernel is obtained by taking the
length of the longest process. We add barriers to ensure that the
measurement starts at the same time too.

Cédric Augonnet 14 年之前
父节点
当前提交
689896a8ef
共有 2 个文件被更改,包括 38 次插入15 次删除
  1. 20 8
      mpi/examples/cholesky/mpi_cholesky.c
  2. 18 7
      mpi/examples/cholesky/mpi_cholesky_distributed.c

+ 20 - 8
mpi/examples/cholesky/mpi_cholesky.c

@@ -79,7 +79,6 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         data_handles = malloc(nblocks*sizeof(starpu_data_handle *));
         data_handles = malloc(nblocks*sizeof(starpu_data_handle *));
         for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle));
         for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle));
 
 
-	gettimeofday(&start, NULL);
         for(x = 0; x < nblocks ;  x++) {
         for(x = 0; x < nblocks ;  x++) {
                 for (y = 0; y < nblocks; y++) {
                 for (y = 0; y < nblocks; y++) {
                         int mpi_rank = my_distrib(x, y, nodes);
                         int mpi_rank = my_distrib(x, y, nodes);
@@ -103,6 +102,9 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
                 }
                 }
         }
         }
 
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+	gettimeofday(&start, NULL);
+
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
         {
         {
                 int prio = STARPU_DEFAULT_PRIO;
                 int prio = STARPU_DEFAULT_PRIO;
@@ -151,14 +153,18 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         }
         }
 	free(data_handles);
 	free(data_handles);
 
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
 	gettimeofday(&end, NULL);
 	gettimeofday(&end, NULL);
 
 
-	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-	fprintf(stderr, "[%d] Computation took (in ms)\n", rank);
-	fprintf(stdout, "%2.2f\n", timing/1000);
-
-	double flop = (1.0f*size*size*size)/3.0f;
-	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	if (rank == 0)
+	{
+		double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+		fprintf(stderr, "Computation took (in ms)\n");
+		fprintf(stdout, "%2.2f\n", timing/1000);
+	
+		double flop = (1.0f*size*size*size)/3.0f;
+		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	}
 }
 }
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)
@@ -173,7 +179,13 @@ int main(int argc, char **argv)
 
 
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
-	starpu_init(NULL);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+	
+	conf.sched_policy_name = "heft";
+	conf.calibrate = 1;
+
+	starpu_init(&conf);
 	starpu_mpi_initialize_extended(&rank, &nodes);
 	starpu_mpi_initialize_extended(&rank, &nodes);
 	starpu_helper_cublas_init();
 	starpu_helper_cublas_init();
 
 

+ 18 - 7
mpi/examples/cholesky/mpi_cholesky_distributed.c

@@ -79,6 +79,7 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         data_handles = malloc(nblocks*sizeof(starpu_data_handle *));
         data_handles = malloc(nblocks*sizeof(starpu_data_handle *));
         for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle));
         for(x=0 ; x<nblocks ; x++) data_handles[x] = malloc(nblocks*sizeof(starpu_data_handle));
 
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
 	gettimeofday(&start, NULL);
 	gettimeofday(&start, NULL);
         for(x = 0; x < nblocks ;  x++) {
         for(x = 0; x < nblocks ;  x++) {
                 for (y = 0; y < nblocks; y++) {
                 for (y = 0; y < nblocks; y++) {
@@ -151,14 +152,18 @@ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblo
         }
         }
 	free(data_handles);
 	free(data_handles);
 
 
+	starpu_mpi_barrier(MPI_COMM_WORLD);
 	gettimeofday(&end, NULL);
 	gettimeofday(&end, NULL);
 
 
-	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
-	fprintf(stderr, "[%d] Computation took (in ms)\n", rank);
-	fprintf(stdout, "%2.2f\n", timing/1000);
-
-	double flop = (1.0f*size*size*size)/3.0f;
-	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	if (rank == 0)
+	{
+		double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+		fprintf(stderr, "Computation took (in ms)\n");
+		fprintf(stdout, "%2.2f\n", timing/1000);
+	
+		double flop = (1.0f*size*size*size)/3.0f;
+		fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
+	}
 }
 }
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)
@@ -173,7 +178,13 @@ int main(int argc, char **argv)
 
 
 	parse_args(argc, argv);
 	parse_args(argc, argv);
 
 
-	starpu_init(NULL);
+	struct starpu_conf conf;
+	starpu_conf_init(&conf);
+
+	conf.sched_policy_name = "heft";
+	conf.calibrate = 1;
+
+	starpu_init(&conf);
 	starpu_mpi_initialize_extended(&rank, &nodes);
 	starpu_mpi_initialize_extended(&rank, &nodes);
 	starpu_helper_cublas_init();
 	starpu_helper_cublas_init();