|
@@ -19,6 +19,8 @@
|
|
* See the GNU Lesser General Public License in COPYING.LGPL for more details.
|
|
* See the GNU Lesser General Public License in COPYING.LGPL for more details.
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
+#define STARPU_VERBOSE
|
|
|
|
+
|
|
#include <datawizard/filters.h>
|
|
#include <datawizard/filters.h>
|
|
#include <datawizard/footprint.h>
|
|
#include <datawizard/footprint.h>
|
|
#include <datawizard/interfaces/data_interface.h>
|
|
#include <datawizard/interfaces/data_interface.h>
|
|
@@ -234,9 +236,19 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
|
|
child->switch_cl = NULL;
|
|
child->switch_cl = NULL;
|
|
child->partitioned = 0;
|
|
child->partitioned = 0;
|
|
child->readonly = 0;
|
|
child->readonly = 0;
|
|
|
|
+ child->active = inherit_state;
|
|
|
|
+ child->active_ro = 0;
|
|
child->mpi_data = initial_handle->mpi_data;
|
|
child->mpi_data = initial_handle->mpi_data;
|
|
child->root_handle = initial_handle->root_handle;
|
|
child->root_handle = initial_handle->root_handle;
|
|
child->father_handle = initial_handle;
|
|
child->father_handle = initial_handle;
|
|
|
|
+ child->active_children = NULL;
|
|
|
|
+ child->active_readonly_children = NULL;
|
|
|
|
+ child->nactive_readonly_children = 0;
|
|
|
|
+ child->nsiblings = nparts;
|
|
|
|
+ if (inherit_state)
|
|
|
|
+ child->siblings = NULL;
|
|
|
|
+ else
|
|
|
|
+ child->siblings = childrenp;
|
|
child->sibling_index = i;
|
|
child->sibling_index = i;
|
|
child->depth = initial_handle->depth + 1;
|
|
child->depth = initial_handle->depth + 1;
|
|
|
|
|
|
@@ -586,6 +598,7 @@ void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct star
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
struct starpu_codelet *cl = initial_handle->switch_cl;
|
|
struct starpu_codelet *cl = initial_handle->switch_cl;
|
|
int home_node = initial_handle->home_node;
|
|
int home_node = initial_handle->home_node;
|
|
|
|
+ starpu_data_handle_t *children;
|
|
if (home_node == -1)
|
|
if (home_node == -1)
|
|
/* Nothing better for now */
|
|
/* Nothing better for now */
|
|
/* TODO: pass -1, and make _starpu_fetch_nowhere_task_input
|
|
/* TODO: pass -1, and make _starpu_fetch_nowhere_task_input
|
|
@@ -594,11 +607,13 @@ void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct star
|
|
*/
|
|
*/
|
|
home_node = STARPU_MAIN_RAM;
|
|
home_node = STARPU_MAIN_RAM;
|
|
|
|
|
|
|
|
+ _STARPU_MALLOC(children, nparts * sizeof(*children));
|
|
for (i = 0; i < nparts; i++)
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
{
|
|
- _STARPU_CALLOC(childrenp[i], 1, sizeof(struct _starpu_data_state));
|
|
|
|
|
|
+ _STARPU_CALLOC(children[i], 1, sizeof(struct _starpu_data_state));
|
|
|
|
+ childrenp[i] = children[i];
|
|
}
|
|
}
|
|
- _starpu_data_partition(initial_handle, childrenp, nparts, f, 0);
|
|
|
|
|
|
+ _starpu_data_partition(initial_handle, children, nparts, f, 0);
|
|
|
|
|
|
if (!cl)
|
|
if (!cl)
|
|
{
|
|
{
|
|
@@ -607,6 +622,7 @@ void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct star
|
|
cl = initial_handle->switch_cl;
|
|
cl = initial_handle->switch_cl;
|
|
cl->where = STARPU_NOWHERE;
|
|
cl->where = STARPU_NOWHERE;
|
|
cl->nbuffers = STARPU_VARIABLE_NBUFFERS;
|
|
cl->nbuffers = STARPU_VARIABLE_NBUFFERS;
|
|
|
|
+ cl->flags = STARPU_CODELET_NOPLANS;
|
|
cl->name = "data_partition_switch";
|
|
cl->name = "data_partition_switch";
|
|
cl->specific_nodes = 1;
|
|
cl->specific_nodes = 1;
|
|
}
|
|
}
|
|
@@ -624,6 +640,15 @@ void starpu_data_partition_clean(starpu_data_handle_t root_handle, unsigned npar
|
|
{
|
|
{
|
|
unsigned i;
|
|
unsigned i;
|
|
|
|
|
|
|
|
+ if (children[0]->active) {
|
|
|
|
+#ifdef STARPU_DEVEL
|
|
|
|
+#warning FIXME: better choose gathering node
|
|
|
|
+#endif
|
|
|
|
+ starpu_data_unpartition_submit(root_handle, nparts, children, STARPU_MAIN_RAM);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ free(children[0]->siblings);
|
|
|
|
+
|
|
for (i = 0; i < nparts; i++)
|
|
for (i = 0; i < nparts; i++)
|
|
starpu_data_unregister_submit(children[i]);
|
|
starpu_data_unregister_submit(children[i]);
|
|
|
|
|
|
@@ -634,17 +659,26 @@ void starpu_data_partition_clean(starpu_data_handle_t root_handle, unsigned npar
|
|
|
|
|
|
void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
|
|
void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
|
|
{
|
|
{
|
|
|
|
+ unsigned i;
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
_starpu_spin_lock(&initial_handle->header_lock);
|
|
_starpu_spin_lock(&initial_handle->header_lock);
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned == 0, "One can't submit several partition plannings at the same time");
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned == 0, "One can't submit several partition plannings at the same time");
|
|
STARPU_ASSERT_MSG(initial_handle->readonly == 0, "One can't submit a partition planning while a readonly partitioning is active");
|
|
STARPU_ASSERT_MSG(initial_handle->readonly == 0, "One can't submit a partition planning while a readonly partitioning is active");
|
|
initial_handle->partitioned++;
|
|
initial_handle->partitioned++;
|
|
|
|
+ initial_handle->active_children = children[0]->siblings;
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
|
|
|
|
|
|
+ for (i = 0; i < nparts; i++)
|
|
|
|
+ {
|
|
|
|
+ _starpu_spin_lock(&children[i]->header_lock);
|
|
|
|
+ children[i]->active = 1;
|
|
|
|
+ _starpu_spin_unlock(&children[i]->header_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
if (!initial_handle->initialized)
|
|
if (!initial_handle->initialized)
|
|
/* No need for coherency, it is not initialized */
|
|
/* No need for coherency, it is not initialized */
|
|
return;
|
|
return;
|
|
- unsigned i;
|
|
|
|
|
|
+
|
|
struct starpu_data_descr descr[nparts];
|
|
struct starpu_data_descr descr[nparts];
|
|
for (i = 0; i < nparts; i++)
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
{
|
|
@@ -659,15 +693,27 @@ void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned
|
|
|
|
|
|
void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
|
|
void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children)
|
|
{
|
|
{
|
|
|
|
+ unsigned i;
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
_starpu_spin_lock(&initial_handle->header_lock);
|
|
_starpu_spin_lock(&initial_handle->header_lock);
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned == 0 || initial_handle->readonly, "One can't submit a readonly partition planning at the same time as a readwrite partition planning");
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned == 0 || initial_handle->readonly, "One can't submit a readonly partition planning at the same time as a readwrite partition planning");
|
|
initial_handle->partitioned++;
|
|
initial_handle->partitioned++;
|
|
initial_handle->readonly = 1;
|
|
initial_handle->readonly = 1;
|
|
|
|
+ if (initial_handle->nactive_readonly_children < initial_handle->partitioned) {
|
|
|
|
+ _STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children));
|
|
|
|
+ initial_handle->nactive_readonly_children = initial_handle->partitioned;
|
|
|
|
+ }
|
|
|
|
+ initial_handle->active_readonly_children[initial_handle->partitioned-1] = children[0]->siblings;
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
|
|
|
|
|
|
+ for (i = 0; i < nparts; i++)
|
|
|
|
+ {
|
|
|
|
+ _starpu_spin_lock(&children[i]->header_lock);
|
|
|
|
+ children[i]->active = 1;
|
|
|
|
+ _starpu_spin_unlock(&children[i]->header_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
STARPU_ASSERT_MSG(initial_handle->initialized, "It is odd to read-only-partition a data which does not have a value yet");
|
|
STARPU_ASSERT_MSG(initial_handle->initialized, "It is odd to read-only-partition a data which does not have a value yet");
|
|
- unsigned i;
|
|
|
|
struct starpu_data_descr descr[nparts];
|
|
struct starpu_data_descr descr[nparts];
|
|
for (i = 0; i < nparts; i++)
|
|
for (i = 0; i < nparts; i++)
|
|
{
|
|
{
|
|
@@ -686,6 +732,8 @@ void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't upgrade a readonly partition planning to readwrite while other readonly partition plannings are active");
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't upgrade a readonly partition planning to readwrite while other readonly partition plannings are active");
|
|
STARPU_ASSERT_MSG(initial_handle->readonly == 1, "One can only upgrade a readonly partition planning");
|
|
STARPU_ASSERT_MSG(initial_handle->readonly == 1, "One can only upgrade a readonly partition planning");
|
|
initial_handle->readonly = 0;
|
|
initial_handle->readonly = 0;
|
|
|
|
+ initial_handle->active_children = initial_handle->active_readonly_children[0];
|
|
|
|
+ initial_handle->active_readonly_children[0] = NULL;
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
|
|
|
|
unsigned i;
|
|
unsigned i;
|
|
@@ -703,16 +751,37 @@ void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial
|
|
|
|
|
|
void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node)
|
|
void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node)
|
|
{
|
|
{
|
|
|
|
+ unsigned i;
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency");
|
|
STARPU_ASSERT_MSG(gather_node == initial_handle->home_node || gather_node == -1, "gathering node different from home node is currently not supported");
|
|
STARPU_ASSERT_MSG(gather_node == initial_handle->home_node || gather_node == -1, "gathering node different from home node is currently not supported");
|
|
_starpu_spin_lock(&initial_handle->header_lock);
|
|
_starpu_spin_lock(&initial_handle->header_lock);
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for this handle");
|
|
STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for this handle");
|
|
|
|
+ if (initial_handle->readonly) {
|
|
|
|
+ /* Replace this children set with the last set in the list of readonly children sets */
|
|
|
|
+ for (i = 0; i < initial_handle->partitioned-1; i++) {
|
|
|
|
+ if (initial_handle->active_readonly_children[i] == children[0]->siblings) {
|
|
|
|
+ initial_handle->active_readonly_children[i] = initial_handle->active_readonly_children[initial_handle->partitioned-1];
|
|
|
|
+ initial_handle->active_readonly_children[initial_handle->partitioned-1] = NULL;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ initial_handle->active_children = NULL;
|
|
|
|
+ }
|
|
initial_handle->partitioned--;
|
|
initial_handle->partitioned--;
|
|
if (!initial_handle->partitioned)
|
|
if (!initial_handle->partitioned)
|
|
initial_handle->readonly = 0;
|
|
initial_handle->readonly = 0;
|
|
|
|
+ initial_handle->active_children = NULL;
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
_starpu_spin_unlock(&initial_handle->header_lock);
|
|
|
|
|
|
- unsigned i, n;
|
|
|
|
|
|
+ for (i = 0; i < nparts; i++)
|
|
|
|
+ {
|
|
|
|
+ _starpu_spin_lock(&children[i]->header_lock);
|
|
|
|
+ children[i]->active = 0;
|
|
|
|
+ _starpu_spin_unlock(&children[i]->header_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ unsigned n;
|
|
struct starpu_data_descr descr[nparts];
|
|
struct starpu_data_descr descr[nparts];
|
|
for (i = 0, n = 0; i < nparts; i++)
|
|
for (i = 0, n = 0; i < nparts; i++)
|
|
{
|
|
{
|
|
@@ -755,6 +824,113 @@ void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle
|
|
starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, 0);
|
|
starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, 0);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/* Unpartition everything below ancestor */
|
|
|
|
+void starpu_data_unpartition_submit_r(starpu_data_handle_t ancestor, int gathering_node)
|
|
|
|
+{
|
|
|
|
+ unsigned i, j, nsiblings;
|
|
|
|
+ if (!ancestor->partitioned)
|
|
|
|
+ /* It's already unpartitioned */
|
|
|
|
+ return;
|
|
|
|
+ _STARPU_DEBUG("ancestor %p needs unpartitioning\n", ancestor);
|
|
|
|
+ if (ancestor->readonly)
|
|
|
|
+ {
|
|
|
|
+ /* Uh, has to go through all read-only partitions */
|
|
|
|
+ for (i = 0; i < ancestor->partitioned; i++) {
|
|
|
|
+ starpu_data_handle_t *children = ancestor->active_readonly_children[i];
|
|
|
|
+ _STARPU_DEBUG("unpartition readonly children %p\n", children);
|
|
|
|
+ nsiblings = children[0]->nsiblings;
|
|
|
|
+ for (j = 0; j < nsiblings; j++) {
|
|
|
|
+ /* Make sure our children are unpartitioned */
|
|
|
|
+ starpu_data_unpartition_submit_r(children[j], gathering_node);
|
|
|
|
+ }
|
|
|
|
+ /* And unpartition them */
|
|
|
|
+ starpu_data_unpartition_submit(ancestor, nsiblings, children, gathering_node);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DEBUG("unpartition children %p\n", ancestor->active_children);
|
|
|
|
+ /* Only one partition */
|
|
|
|
+ nsiblings = ancestor->active_children[0]->nsiblings;
|
|
|
|
+ for (i = 0; i < nsiblings; i++)
|
|
|
|
+ starpu_data_unpartition_submit_r(ancestor->active_children[i], gathering_node);
|
|
|
|
+ /* And unpartition ourself */
|
|
|
|
+ starpu_data_unpartition_submit(ancestor, nsiblings, ancestor->active_children, gathering_node);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Make ancestor partition itself properly for target */
|
|
|
|
+static void _starpu_data_partition_access_look_up(starpu_data_handle_t ancestor, starpu_data_handle_t target, int write)
|
|
|
|
+{
|
|
|
|
+ /* First make sure ancestor has proper state, if not, ask father */
|
|
|
|
+ if (!ancestor->active || (write && ancestor->active_ro))
|
|
|
|
+ {
|
|
|
|
+ /* (The root is always active-rw) */
|
|
|
|
+ STARPU_ASSERT(ancestor->father_handle);
|
|
|
|
+ _STARPU_DEBUG("ancestor %p is not ready: %s, asking father %p\n", ancestor, ancestor->active ? ancestor->active_ro ? "RO" : "RW" : "NONE", ancestor->father_handle);
|
|
|
|
+ _starpu_data_partition_access_look_up(ancestor->father_handle, ancestor, write);
|
|
|
|
+ _STARPU_DEBUG("ancestor %p is now ready\n", ancestor);
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ _STARPU_DEBUG("ancestor %p was ready\n", ancestor);
|
|
|
|
+
|
|
|
|
+ /* We shouldn't be called for nothing */
|
|
|
|
+ STARPU_ASSERT(!ancestor->partitioned || !target || ancestor->active_children != target->siblings || (ancestor->readonly && write));
|
|
|
|
+
|
|
|
|
+ /* Then unpartition ancestor if needed */
|
|
|
|
+ if (ancestor->partitioned &&
|
|
|
|
+ /* Not the right children, unpartition ourself */
|
|
|
|
+ ((target && ancestor->active_children != target->siblings) ||
|
|
|
|
+ /* We are partitioned and we want to write or some child
|
|
|
|
+ * is writing and we want to read, unpartition ourself*/
|
|
|
|
+ (!target && (write || !ancestor->readonly))))
|
|
|
|
+ {
|
|
|
|
+#ifdef STARPU_DEVEL
|
|
|
|
+#warning FIXME: better choose gathering node
|
|
|
|
+#endif
|
|
|
|
+ starpu_data_unpartition_submit_r(ancestor, STARPU_MAIN_RAM);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!target)
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DEBUG("ancestor %p is done\n", ancestor);
|
|
|
|
+ /* No child target, nothing more to do actually. */
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Then partition ancestor towards target */
|
|
|
|
+ if (ancestor->partitioned)
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DEBUG("ancestor %p is already partitioned RO, turn RW\n", ancestor);
|
|
|
|
+ /* Already partitioned, normally it's already for the target */
|
|
|
|
+ STARPU_ASSERT(ancestor->active_children == target->siblings);
|
|
|
|
+ /* And we are here just because we haven't partitioned rw */
|
|
|
|
+ STARPU_ASSERT(ancestor->readonly && write);
|
|
|
|
+ /* So we just need to upgrade ro to rw */
|
|
|
|
+ starpu_data_partition_readwrite_upgrade_submit(ancestor, target->nsiblings, target->siblings);
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ /* Just need to partition properly for the child */
|
|
|
|
+ if (write)
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DEBUG("partition ancestor %p RW\n", ancestor);
|
|
|
|
+ starpu_data_partition_submit(ancestor, target->nsiblings, target->siblings);
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ _STARPU_DEBUG("partition ancestor %p RO\n", ancestor);
|
|
|
|
+ starpu_data_partition_readonly_submit(ancestor, target->nsiblings, target->siblings);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write)
|
|
|
|
+{
|
|
|
|
+ _STARPU_DEBUG("accessing %p %s\n", target, write ? "RW" : "RO");
|
|
|
|
+ _starpu_data_partition_access_look_up(target, NULL, write);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Given an integer N, NPARTS the number of parts it must be divided in, ID the
|
|
* Given an integer N, NPARTS the number of parts it must be divided in, ID the
|
|
* part currently considered, determines the CHUNK_SIZE and the OFFSET, taking
|
|
* part currently considered, determines the CHUNK_SIZE and the OFFSET, taking
|