|
@@ -28,7 +28,8 @@ initializes a token on node 0, and the token is passed from node to node,
|
|
|
incremented by one on each step. The code is not using StarPU yet.
|
|
|
|
|
|
\code{.c}
|
|
|
- for (loop = 0; loop < nloops; loop++) {
|
|
|
+ for (loop = 0; loop < nloops; loop++)
|
|
|
+ {
|
|
|
int tag = loop*size + rank;
|
|
|
|
|
|
if (loop == 0 && rank == 0)
|
|
@@ -62,7 +63,8 @@ execution to StarPU. This is possible by just using starpu_data_acquire(), for
|
|
|
instance:
|
|
|
|
|
|
\code{.c}
|
|
|
- for (loop = 0; loop < nloops; loop++) {
|
|
|
+ for (loop = 0; loop < nloops; loop++)
|
|
|
+ {
|
|
|
int tag = loop*size + rank;
|
|
|
|
|
|
/* Acquire the data to be able to write to it */
|
|
@@ -412,7 +414,8 @@ communication cache when unregistering the data.
|
|
|
|
|
|
\code{.c}
|
|
|
/* Returns the MPI node number where data is */
|
|
|
-int my_distrib(int x, int y, int nb_nodes) {
|
|
|
+int my_distrib(int x, int y, int nb_nodes)
|
|
|
+{
|
|
|
/* Block distrib */
|
|
|
return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes;
|
|
|
|
|
@@ -439,8 +442,10 @@ data which will be needed by the tasks that we will execute.
|
|
|
unsigned matrix[X][Y];
|
|
|
starpu_data_handle_t data_handles[X][Y];
|
|
|
|
|
|
- for(x = 0; x < X; x++) {
|
|
|
- for (y = 0; y < Y; y++) {
|
|
|
+ for(x = 0; x < X; x++)
|
|
|
+ {
|
|
|
+ for (y = 0; y < Y; y++)
|
|
|
+ {
|
|
|
int mpi_rank = my_distrib(x, y, size);
|
|
|
if (mpi_rank == my_rank)
|
|
|
/* Owning data */
|
|
@@ -454,7 +459,8 @@ data which will be needed by the tasks that we will execute.
|
|
|
else
|
|
|
/* I know it's useless to allocate anything for this */
|
|
|
data_handles[x][y] = NULL;
|
|
|
- if (data_handles[x][y]) {
|
|
|
+ if (data_handles[x][y])
|
|
|
+ {
|
|
|
starpu_mpi_data_register(data_handles[x][y], x*X+y, mpi_rank);
|
|
|
}
|
|
|
}
|
|
@@ -604,8 +610,10 @@ register any data that wasn't registered already and will be needed, then
|
|
|
migrate the data, and register the new location.
|
|
|
|
|
|
\code{.c}
|
|
|
- for(x = 0; x < X; x++) {
|
|
|
- for (y = 0; y < Y; y++) {
|
|
|
+ for(x = 0; x < X; x++)
|
|
|
+ {
|
|
|
+ for (y = 0; y < Y; y++)
|
|
|
+ {
|
|
|
int mpi_rank = my_distrib2(x, y, size);
|
|
|
if (!data_handles[x][y] && (mpi_rank == my_rank
|
|
|
|| my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size)
|
|
@@ -613,7 +621,8 @@ migrate the data, and register the new location.
|
|
|
/* Register newly-needed data */
|
|
|
starpu_variable_data_register(&data_handles[x][y], -1,
|
|
|
(uintptr_t)NULL, sizeof(unsigned));
|
|
|
- if (data_handles[x][y]) {
|
|
|
+ if (data_handles[x][y])
|
|
|
+ {
|
|
|
/* Migrate the data */
|
|
|
starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank);
|
|
|
}
|
|
@@ -636,9 +645,12 @@ resides. Otherwise the unregistration will complain that it does not have the
|
|
|
latest value on the original home node.
|
|
|
|
|
|
\code{.c}
|
|
|
- for(x = 0; x < X; x++) {
|
|
|
- for (y = 0; y < Y; y++) {
|
|
|
- if (data_handles[x][y]) {
|
|
|
+ for(x = 0; x < X; x++)
|
|
|
+ {
|
|
|
+ for (y = 0; y < Y; y++)
|
|
|
+ {
|
|
|
+ if (data_handles[x][y])
|
|
|
+ {
|
|
|
int mpi_rank = my_distrib(x, y, size);
|
|
|
/* Get back data to original place where the user-provided buffer is. */
|
|
|
starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL);
|
|
@@ -669,20 +681,24 @@ data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *));
|
|
|
for(x = 0; x < nblocks ; x++)
|
|
|
{
|
|
|
int mpi_rank = my_distrib(x, nodes);
|
|
|
- if (rank == root) {
|
|
|
+ if (rank == root)
|
|
|
+ {
|
|
|
starpu_vector_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)vector[x],
|
|
|
blocks_size, sizeof(float));
|
|
|
}
|
|
|
- else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1))) {
|
|
|
+ else if ((mpi_rank == rank) || ((rank == mpi_rank+1 || rank == mpi_rank-1)))
|
|
|
+ {
|
|
|
/* I own that index, or i will need it for my computations */
|
|
|
starpu_vector_data_register(&data_handles[x], -1, (uintptr_t)NULL,
|
|
|
block_size, sizeof(float));
|
|
|
}
|
|
|
- else {
|
|
|
+ else
|
|
|
+ {
|
|
|
/* I know it's useless to allocate anything for this */
|
|
|
data_handles[x] = NULL;
|
|
|
}
|
|
|
- if (data_handles[x]) {
|
|
|
+ if (data_handles[x])
|
|
|
+ {
|
|
|
starpu_mpi_data_register(data_handles[x], x*nblocks+y, mpi_rank);
|
|
|
}
|
|
|
}
|
|
@@ -691,10 +707,13 @@ for(x = 0; x < nblocks ; x++)
|
|
|
starpu_mpi_scatter_detached(data_handles, nblocks, root, MPI_COMM_WORLD);
|
|
|
|
|
|
/* Calculation */
|
|
|
-for(x = 0; x < nblocks ; x++) {
|
|
|
- if (data_handles[x]) {
|
|
|
+for(x = 0; x < nblocks ; x++)
|
|
|
+{
|
|
|
+ if (data_handles[x])
|
|
|
+ {
|
|
|
int owner = starpu_data_get_rank(data_handles[x]);
|
|
|
- if (owner == rank) {
|
|
|
+ if (owner == rank)
|
|
|
+ {
|
|
|
starpu_task_insert(&cl, STARPU_RW, data_handles[x], 0);
|
|
|
}
|
|
|
}
|