浏览代码

cholesky: submit tasks by columns

Like Chameleon does, which allows to actually properly emit the
starpu_data_wont_use and starpu_mpi_cache_flush hints.
Samuel Thibault 4 年之前
父节点
当前提交
9ff52bc3a1
共有 2 个文件被更改,包括 30 次插入33 次删除
  1. 17 20
      examples/cholesky/cholesky_implicit.c
  2. 13 13
      mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

+ 17 - 20
examples/cholesky/cholesky_implicit.c

@@ -92,29 +92,26 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 		}
 		starpu_data_wont_use(sdatakk);
 
-		for (m = k+1; m<nblocks; m++)
+		for (n = k+1; n<nblocks; n++)
 		{
-                        starpu_data_handle_t sdatamk = starpu_data_get_sub_data(dataA, 2, m, k);
-			for (n = k+1; n<nblocks; n++)
+                        starpu_data_handle_t sdatank = starpu_data_get_sub_data(dataA, 2, n, k);
+			for (m = n; m<nblocks; m++)
 			{
-				if (n <= m)
-                                {
-					starpu_data_handle_t sdatank = starpu_data_get_sub_data(dataA, 2, n, k);
-					starpu_data_handle_t sdatamn = starpu_data_get_sub_data(dataA, 2, m, n);
-
-					ret = starpu_task_insert(&cl22,
-								 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-								 STARPU_R, sdatamk,
-								 STARPU_R, sdatank,
-								 cl22.modes[2], sdatamn,
-								 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
-								 STARPU_TAG_ONLY, TAG22(k,m,n),
-								 0);
-					if (ret == -ENODEV) return 77;
-					STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
-                                }
+				starpu_data_handle_t sdatamk = starpu_data_get_sub_data(dataA, 2, m, k);
+				starpu_data_handle_t sdatamn = starpu_data_get_sub_data(dataA, 2, m, n);
+
+				ret = starpu_task_insert(&cl22,
+							 STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+							 STARPU_R, sdatamk,
+							 STARPU_R, sdatank,
+							 cl22.modes[2], sdatamn,
+							 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+							 STARPU_TAG_ONLY, TAG22(k,m,n),
+							 0);
+				if (ret == -ENODEV) return 77;
+				STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 			}
-			starpu_data_wont_use(sdatamk);
+			starpu_data_wont_use(sdatank);
 		}
 		starpu_iteration_pop();
 	}

+ 13 - 13
mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c

@@ -93,23 +93,23 @@ static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int node
 			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[k][k]);
 			if (my_distrib(k, k, nodes) == rank)
 				starpu_data_wont_use(data_handles[k][k]);
+		}
 
-			for (n = k+1; n<nblocks; n++)
+		for (n = k+1; n<nblocks; n++)
+		{
+			for (m = n; m<nblocks; m++)
 			{
-				if (n <= m)
-				{
-					starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
-							       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
-							       STARPU_R, data_handles[n][k],
-							       STARPU_R, data_handles[m][k],
-							       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
-							       0);
-				}
+				starpu_mpi_task_insert(MPI_COMM_WORLD, &cl22,
+						       STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO,
+						       STARPU_R, data_handles[n][k],
+						       STARPU_R, data_handles[m][k],
+						       STARPU_RW | STARPU_COMMUTE, data_handles[m][n],
+						       0);
 			}
 
-			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[m][k]);
-			if (my_distrib(m, k, nodes) == rank)
-				starpu_data_wont_use(data_handles[m][k]);
+			starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]);
+			if (my_distrib(n, k, nodes) == rank)
+				starpu_data_wont_use(data_handles[n][k]);
 		}
 		starpu_iteration_pop();
 	}