|
@@ -18,163 +18,100 @@
|
|
#include <common/config.h>
|
|
#include <common/config.h>
|
|
#include <datawizard/filters.h>
|
|
#include <datawizard/filters.h>
|
|
|
|
|
|
-void starpu_block_filter_func_vector(starpu_filter *f, starpu_data_handle root_handle)
|
|
|
|
|
|
+void starpu_block_filter_func_vector(void *father_interface, void *child_interface, __attribute__((unused)) starpu_filter *f, unsigned id, unsigned nchunks)
|
|
{
|
|
{
|
|
- unsigned nchunks;
|
|
|
|
- uint32_t arg = f->filter_arg;
|
|
|
|
|
|
+ starpu_vector_interface_t *vector_father = father_interface;
|
|
|
|
+ starpu_vector_interface_t *vector_child = child_interface;
|
|
|
|
|
|
- starpu_vector_interface_t *vector_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, 0);
|
|
|
|
|
|
+ uint32_t nx = vector_father->nx;
|
|
|
|
+ size_t elemsize = vector_father->elemsize;
|
|
|
|
|
|
- uint32_t nx = vector_root->nx;
|
|
|
|
- size_t elemsize = vector_root->elemsize;
|
|
|
|
|
|
+ /* actually create all the chunks */
|
|
|
|
+ uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
|
|
|
|
+ size_t offset = id*chunk_size*elemsize;
|
|
|
|
|
|
- /* we will have arg chunks */
|
|
|
|
- nchunks = STARPU_MIN(nx, arg);
|
|
|
|
|
|
+ uint32_t child_nx =
|
|
|
|
+ STARPU_MIN(chunk_size, nx - id*chunk_size);
|
|
|
|
|
|
- /* first allocate the children data_state */
|
|
|
|
- starpu_data_create_children(root_handle, nchunks, root_handle->ops);
|
|
|
|
|
|
+ vector_child->nx = child_nx;
|
|
|
|
+ vector_child->elemsize = elemsize;
|
|
|
|
|
|
- /* actually create all the chunks */
|
|
|
|
- unsigned chunk;
|
|
|
|
- for (chunk = 0; chunk < nchunks; chunk++)
|
|
|
|
- {
|
|
|
|
- uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
|
|
|
|
- size_t offset = chunk*chunk_size*elemsize;
|
|
|
|
-
|
|
|
|
- uint32_t child_nx =
|
|
|
|
- STARPU_MIN(chunk_size, nx - chunk*chunk_size);
|
|
|
|
-
|
|
|
|
- starpu_data_handle chunk_handle =
|
|
|
|
- starpu_data_get_child(root_handle, chunk);
|
|
|
|
-
|
|
|
|
- unsigned node;
|
|
|
|
- for (node = 0; node < STARPU_MAXNODES; node++)
|
|
|
|
- {
|
|
|
|
- starpu_vector_interface_t *local =
|
|
|
|
- starpu_data_get_interface_on_node(chunk_handle, node);
|
|
|
|
-
|
|
|
|
- local->nx = child_nx;
|
|
|
|
- local->elemsize = elemsize;
|
|
|
|
-
|
|
|
|
- if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
|
|
|
|
- starpu_vector_interface_t *local_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, node);
|
|
|
|
-
|
|
|
|
- local->ptr = local_root->ptr + offset;
|
|
|
|
- local->dev_handle = local_root->dev_handle;
|
|
|
|
- local->offset = local_root->offset + offset;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ if (vector_father->ptr != 0) {
|
|
|
|
+ vector_child->ptr = vector_father->ptr + offset;
|
|
|
|
+ vector_child->dev_handle = vector_father->dev_handle;
|
|
|
|
+ vector_child->offset = vector_father->offset + offset;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
-void starpu_vector_divide_in_2_filter_func(starpu_filter *f, starpu_data_handle root_handle)
|
|
|
|
|
|
+void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, starpu_filter *f, unsigned id, __attribute__((unused)) unsigned nchunks)
|
|
{
|
|
{
|
|
- uint32_t length_first = f->filter_arg;
|
|
|
|
|
|
+ /* there cannot be more than 2 chunks */
|
|
|
|
+ STARPU_ASSERT(id < 2);
|
|
|
|
+
|
|
|
|
+ starpu_vector_interface_t *vector_father = father_interface;
|
|
|
|
+ starpu_vector_interface_t *vector_child = child_interface;
|
|
|
|
|
|
- starpu_vector_interface_t *vector_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, 0);
|
|
|
|
-
|
|
|
|
- uint32_t nx = vector_root->nx;
|
|
|
|
- size_t elemsize = vector_root->elemsize;
|
|
|
|
|
|
+ uint32_t length_first = f->filter_arg;
|
|
|
|
|
|
- /* first allocate the children data_state */
|
|
|
|
- starpu_data_create_children(root_handle, 2, root_handle->ops);
|
|
|
|
|
|
+ uint32_t nx = vector_father->nx;
|
|
|
|
+ size_t elemsize = vector_father->elemsize;
|
|
|
|
|
|
STARPU_ASSERT(length_first < nx);
|
|
STARPU_ASSERT(length_first < nx);
|
|
-
|
|
|
|
- starpu_data_handle chunk0_handle =
|
|
|
|
- starpu_data_get_child(root_handle, 0);
|
|
|
|
-
|
|
|
|
- unsigned node;
|
|
|
|
- for (node = 0; node < STARPU_MAXNODES; node++)
|
|
|
|
- {
|
|
|
|
- starpu_vector_interface_t *local =
|
|
|
|
- starpu_data_get_interface_on_node(chunk0_handle, node);
|
|
|
|
-
|
|
|
|
- local->nx = length_first;
|
|
|
|
- local->elemsize = elemsize;
|
|
|
|
-
|
|
|
|
- if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
|
|
|
|
- starpu_vector_interface_t *local_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, node);
|
|
|
|
-
|
|
|
|
- local->ptr = local_root->ptr;
|
|
|
|
- local->offset = local_root->offset;
|
|
|
|
- local->dev_handle = local_root->dev_handle;
|
|
|
|
- }
|
|
|
|
|
|
+
|
|
|
|
+ /* this is the first child */
|
|
|
|
+ if (id == 0) {
|
|
|
|
+ vector_child->nx = length_first;
|
|
|
|
+ vector_child->elemsize = elemsize;
|
|
|
|
+
|
|
|
|
+ if (vector_father->ptr != 0) {
|
|
|
|
+ vector_child->ptr = vector_father->ptr;
|
|
|
|
+ vector_child->offset = vector_father->offset;
|
|
|
|
+ vector_child->dev_handle = vector_father->dev_handle;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
- starpu_data_handle chunk1_handle =
|
|
|
|
- starpu_data_get_child(root_handle, 1);
|
|
|
|
-
|
|
|
|
- for (node = 0; node < STARPU_MAXNODES; node++)
|
|
|
|
- {
|
|
|
|
- starpu_vector_interface_t *local =
|
|
|
|
- starpu_data_get_interface_on_node(chunk1_handle, node);
|
|
|
|
-
|
|
|
|
- local->nx = nx - length_first;
|
|
|
|
- local->elemsize = elemsize;
|
|
|
|
-
|
|
|
|
- if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
|
|
|
|
- starpu_vector_interface_t *local_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, node);
|
|
|
|
|
|
+ /* the second child */
|
|
|
|
+ else {
|
|
|
|
+ vector_child->nx = nx - length_first;
|
|
|
|
+ vector_child->elemsize = elemsize;
|
|
|
|
|
|
- local->ptr = local_root->ptr + length_first*elemsize;
|
|
|
|
- local->offset = local_root->offset + length_first*elemsize;
|
|
|
|
- local->dev_handle = local_root->dev_handle;
|
|
|
|
- }
|
|
|
|
|
|
+ if (vector_father->ptr != 0) {
|
|
|
|
+ vector_child->ptr = vector_father->ptr + length_first*elemsize;
|
|
|
|
+ vector_child->offset = vector_father->offset + length_first*elemsize;
|
|
|
|
+ vector_child->dev_handle = vector_father->dev_handle;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-void starpu_vector_list_filter_func(starpu_filter *f, starpu_data_handle root_handle)
|
|
|
|
-{
|
|
|
|
- uint32_t nchunks = f->filter_arg;
|
|
|
|
- uint32_t *length_tab = f->filter_arg_ptr;
|
|
|
|
|
|
|
|
- starpu_vector_interface_t *vector_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, 0);
|
|
|
|
|
|
+void starpu_vector_list_filter_func(void *father_interface, void *child_interface, starpu_filter *f, unsigned id, __attribute__((unused)) unsigned nchunks)
|
|
|
|
+{
|
|
|
|
+ starpu_vector_interface_t *vector_father = father_interface;
|
|
|
|
+ starpu_vector_interface_t *vector_child = child_interface;
|
|
|
|
|
|
- size_t elemsize = vector_root->elemsize;
|
|
|
|
|
|
+ uint32_t *length_tab = f->filter_arg_ptr;
|
|
|
|
|
|
- /* first allocate the children data_state */
|
|
|
|
- starpu_data_create_children(root_handle, nchunks, root_handle->ops);
|
|
|
|
|
|
+ size_t elemsize = vector_father->elemsize;
|
|
|
|
|
|
unsigned current_pos = 0;
|
|
unsigned current_pos = 0;
|
|
|
|
|
|
- /* actually create all the chunks */
|
|
|
|
- unsigned chunk;
|
|
|
|
- for (chunk = 0; chunk < nchunks; chunk++)
|
|
|
|
- {
|
|
|
|
- starpu_data_handle chunk_handle =
|
|
|
|
- starpu_data_get_child(root_handle, chunk);
|
|
|
|
-
|
|
|
|
- uint32_t chunk_size = length_tab[chunk];
|
|
|
|
-
|
|
|
|
- unsigned node;
|
|
|
|
- for (node = 0; node < STARPU_MAXNODES; node++)
|
|
|
|
- {
|
|
|
|
- starpu_vector_interface_t *local =
|
|
|
|
- starpu_data_get_interface_on_node(chunk_handle, node);
|
|
|
|
-
|
|
|
|
- local->nx = chunk_size;
|
|
|
|
- local->elemsize = elemsize;
|
|
|
|
-
|
|
|
|
- if (starpu_data_test_if_allocated_on_node(root_handle, node)) {
|
|
|
|
- starpu_vector_interface_t *local_root =
|
|
|
|
- starpu_data_get_interface_on_node(root_handle, node);
|
|
|
|
-
|
|
|
|
- local->ptr = local_root->ptr + current_pos*elemsize;
|
|
|
|
- local->offset = local_root->offset + current_pos*elemsize;
|
|
|
|
- local->dev_handle = local_root->dev_handle;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- current_pos += chunk_size;
|
|
|
|
|
|
+ uint32_t chunk_size = length_tab[id];
|
|
|
|
+
|
|
|
|
+ vector_child->nx = chunk_size;
|
|
|
|
+ vector_child->elemsize = elemsize;
|
|
|
|
+
|
|
|
|
+ if (vector_father->ptr != 0) {
|
|
|
|
+ /* compute the current position */
|
|
|
|
+ unsigned i;
|
|
|
|
+ for (i = 0; i <= id; i++)
|
|
|
|
+ current_pos += length_tab[i];
|
|
|
|
+
|
|
|
|
+ vector_child->ptr = vector_father->ptr + current_pos*elemsize;
|
|
|
|
+ vector_child->offset = vector_father->offset + current_pos*elemsize;
|
|
|
|
+ vector_child->dev_handle = vector_father->dev_handle;
|
|
}
|
|
}
|
|
-
|
|
|
|
- __attribute__ ((unused)) uint32_t nx = vector_root->nx;
|
|
|
|
|
|
+
|
|
|
|
+ __attribute__ ((unused)) uint32_t nx = vector_father->nx;
|
|
STARPU_ASSERT(current_pos == nx);
|
|
STARPU_ASSERT(current_pos == nx);
|
|
}
|
|
}
|