Browse Source

add tensor filter and examples

HE Kun 3 years ago
parent
commit
674ba5d671

+ 3 - 1
examples/Makefile.am

@@ -236,6 +236,7 @@ STARPU_EXAMPLES +=				\
 	cpp/add_vectors_interface		\
 	filters/fread				\
 	filters/fvector				\
+	filters/ftensor				\
 	filters/fblock				\
 	filters/fmatrix				\
 	filters/fmultiple_manual		\
@@ -625,7 +626,8 @@ endif
 examplebin_PROGRAMS +=				\
 	filters/shadow				\
 	filters/shadow2d			\
-	filters/shadow3d
+	filters/shadow3d			\
+	filters/shadow4d
 
 #############################
 # Custom multiformat filter #

+ 191 - 0
examples/filters/ftensor.c

@@ -0,0 +1,191 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This examplifies how to use partitioning filters.  We here just split a 4D
+ * matrix into 4D slices (along the X axis), and run a dumb kernel on them.
+ */
+
+#include <starpu.h>
+
+#define NX    6
+#define NY    5
+#define NZ    4
+#define NT    3
+#define PARTS 2
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+void cpu_func(void *buffers[], void *cl_arg)
+{
+    int i, j, k, l;
+    int *factor = (int *) cl_arg;
+    int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]);
+    int nx = (int)STARPU_TENSOR_GET_NX(buffers[0]);
+    int ny = (int)STARPU_TENSOR_GET_NY(buffers[0]);
+    int nz = (int)STARPU_TENSOR_GET_NZ(buffers[0]);
+    int nt = (int)STARPU_TENSOR_GET_NT(buffers[0]);
+    unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]);
+    unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]);
+    unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]);
+
+    for(l=0; l<nt ; l++)
+    {
+        for(k=0; k<nz ; k++)
+        {
+            for(j=0; j<ny ; j++)
+            {
+                for(i=0; i<nx ; i++)
+                    val[(l*ldt)+(k*ldz)+(j*ldy)+i] = *factor;
+            }
+        }
+    }
+        
+}
+
+void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt)
+{
+        int i, j, k, l;
+        FPRINTF(stderr, "tensor=%p nx=%d ny=%d nz=%d nt=%d ldy=%u ldz=%u ldt=%u\n", tensor, nx, ny, nz, nt, ldy, ldz, ldt);
+        for(l=0 ; l<nt ; l++)
+        {
+            for(k=0 ; k<nz ; k++)
+            {
+                for(j=0 ; j<ny ; j++)
+                {
+                    for(i=0 ; i<nx ; i++)
+                    {
+                        FPRINTF(stderr, "%2d ", tensor[(l*ldt)+(k*ldz)+(j*ldy)+i]);
+                    }
+                    FPRINTF(stderr,"\n");
+                }
+                FPRINTF(stderr,"\n");
+            }
+            FPRINTF(stderr,"\n");
+        }
+        FPRINTF(stderr,"\n");
+}
+
+void print_data(starpu_data_handle_t tensor_handle)
+{
+    int *tensor = (int *)starpu_tensor_get_local_ptr(tensor_handle);
+    int nx = starpu_tensor_get_nx(tensor_handle);
+    int ny = starpu_tensor_get_ny(tensor_handle);
+    int nz = starpu_tensor_get_nz(tensor_handle);
+    int nt = starpu_tensor_get_nt(tensor_handle);
+    unsigned ldy = starpu_tensor_get_local_ldy(tensor_handle);
+    unsigned ldz = starpu_tensor_get_local_ldz(tensor_handle);
+    unsigned ldt = starpu_tensor_get_local_ldt(tensor_handle);
+
+    print_tensor(tensor, nx, ny, nz, nt, ldy, ldz, ldt);
+}
+
+int main(void)
+{
+    int *tensor,n=0;
+    int i, j, k, l;
+    int ret;
+
+    tensor = (int*)malloc(NX*NY*NZ*NT*sizeof(tensor[0]));
+    assert(tensor);
+    for(l=0 ; l<NT ; l++)
+    {
+        for(k=0 ; k<NZ ; k++)
+        {
+            for(j=0 ; j<NY ; j++)
+            {
+                for(i=0 ; i<NX ; i++)
+                {
+                    tensor[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = n++;
+                }
+            }
+        }
+    }
+
+    starpu_data_handle_t handle;
+    struct starpu_codelet cl =
+    {
+        .cpu_funcs = {cpu_func},
+        .cpu_funcs_name = {"cpu_func"},
+        .nbuffers = 1,
+        .modes = {STARPU_RW},
+        .name = "tensor_scal"
+    };
+
+    ret = starpu_init(NULL);
+    if (ret == -ENODEV)
+        return 77;
+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+    
+    /* Declare data to StarPU */
+    starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int));
+    FPRINTF(stderr, "IN  Tensor\n");
+    print_data(handle);
+
+    /* Partition the tensor in PARTS sub-tensors */
+    struct starpu_data_filter f =
+    {
+        .filter_func = starpu_tensor_filter_block,
+        .nchildren = PARTS
+    };
+    starpu_data_partition(handle, &f);
+
+    FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle));
+
+    for(i=0 ; i<starpu_data_get_nb_children(handle) ; i++)
+    {
+        starpu_data_handle_t stensor = starpu_data_get_sub_data(handle, 1, i);
+        FPRINTF(stderr, "Sub tensor %d\n", i);
+        print_data(stensor);
+    }
+
+    /* Submit a task on each sub-tensor */
+    for(i=0 ; i<starpu_data_get_nb_children(handle) ; i++)
+    {
+        int multiplier=i;
+        struct starpu_task *task = starpu_task_create();
+
+        FPRINTF(stderr,"Dealing with sub-tensor %d\n", i);
+        task->cl = &cl;
+        task->synchronous = 1;
+        task->callback_func = NULL;
+        task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
+        task->cl_arg = &multiplier;
+        task->cl_arg_size = sizeof(multiplier);
+
+        ret = starpu_task_submit(task);
+        if (ret)
+        {
+            FPRINTF(stderr, "Error when submitting task\n");
+            exit(ret);
+        }
+    }
+
+    /* Unpartition the data, unregister it from StarPU and shutdown */
+    starpu_data_unpartition(handle, STARPU_MAIN_RAM);
+    print_data(handle);
+    starpu_data_unregister(handle);
+
+    /* Print result tensor */
+    FPRINTF(stderr, "OUT Tensor\n");
+    print_tensor(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ);
+
+    free(tensor);
+
+    starpu_shutdown();
+    return 0;
+
+}    

+ 497 - 0
examples/filters/shadow4d.c

@@ -0,0 +1,497 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ * Copyright (C) 2010       Mehdi Juhoor
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This examplifies the use of the 4D matrix shadow filters: a source "matrix" of
+ * NX*NY*NZ*NT elements (plus SHADOW wrap-around elements) is partitioned into
+ * matrices with some shadowing, and these are copied into a destination
+ * "matrix2" of
+ * NRPARTSX*NPARTSY*NPARTSZ*NPARTST*((NX/NPARTSX+2*SHADOWX)*(NY/NPARTSY+2*SHADOWY)*(NZ/NPARTSZ+2*SHADOWZ)*(NT/NPARTST+2*SHADOWT))
+ * elements, partitioned in the traditionnal way, thus showing how shadowing
+ * shows up.
+ */
+
+#include <starpu.h>
+
+/* Shadow width */
+#define SHADOWX 2
+#define SHADOWY 2
+#define SHADOWZ 1
+#define SHADOWT 1
+#define NX    6
+#define NY    6
+#define NZ    2
+#define NT    2
+#define PARTSX 2
+#define PARTSY 2
+#define PARTSZ 2
+#define PARTST 2
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+void cpu_func(void *buffers[], void *cl_arg)
+{
+    (void)cl_arg;
+        /* length of the shadowed source matrix */
+        unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]);
+        unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]);
+        unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]);
+        unsigned x = STARPU_TENSOR_GET_NX(buffers[0]);
+        unsigned y = STARPU_TENSOR_GET_NY(buffers[0]);
+        unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]);
+        unsigned t = STARPU_TENSOR_GET_NT(buffers[0]);
+        /* local copy of the shadowed source matrix pointer */
+        int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]);
+
+        /* length of the destination matrix */
+        unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]);
+        unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]);
+        unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]);
+        unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]);
+        unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]);
+        unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]);
+        unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]);
+        /* local copy of the destination matrix pointer */
+        int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]);
+
+    unsigned i, j, k, l;
+
+    /* If things go right, sizes should match */
+    STARPU_ASSERT(x == x2);
+    STARPU_ASSERT(y == y2);
+    STARPU_ASSERT(z == z2);
+    STARPU_ASSERT(t == t2);
+    for (l = 0; l < t; l++)
+        for (k = 0; k < z; k++)
+            for (j = 0; j < y; j++)
+                for (i = 0; i < x; i++)
+                    val2[l*ldt2+k*ldz2+j*ldy2+i] = val[l*ldt+k*ldz+j*ldy+i];
+}
+
+#ifdef STARPU_USE_CUDA
+void cuda_func(void *buffers[], void *cl_arg)
+{
+    (void)cl_arg;
+        /* length of the shadowed source matrix*/
+        unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]);
+        unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]);
+        unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]);
+        unsigned x = STARPU_TENSOR_GET_NX(buffers[0]);
+        unsigned y = STARPU_TENSOR_GET_NY(buffers[0]);
+        unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]);
+        unsigned t = STARPU_TENSOR_GET_NT(buffers[0]);
+        /* local copy of the shadowed source matrix pointer */
+        int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]);
+
+        /* length of the destination matrix */
+        unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]);
+        unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]);
+        unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]);
+        unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]);
+        unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]);
+        unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]);
+        unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]);
+        /* local copy of the destination matrix pointer */
+        int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]);
+
+    unsigned l;
+    cudaError_t cures;
+
+    /* If things go right, sizes should match */
+    STARPU_ASSERT(x == x2);
+    STARPU_ASSERT(y == y2);
+    STARPU_ASSERT(z == z2);
+    STARPU_ASSERT(t == t2);
+    for (l = 0; l < t; l++)
+    {
+        for (k = 0; k < z; k++)
+        {
+            cures = cudaMemcpy2DAsync(val2+k*ldz2+l*ldt2, ldy2*sizeof(*val2), val+k*ldz+l*ldt, ldy*sizeof(*val),
+                    x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream());
+            STARPU_ASSERT(!cures);
+        }
+    }
+        
+}
+#endif
+
+int main(void)
+{
+    unsigned i, j, k, l, m, n, p, q;
+    int matrix[NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX];
+    int matrix2[NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX];
+    starpu_data_handle_t handle, handle2;
+    int ret;
+
+    struct starpu_codelet cl =
+    {
+        .cpu_funcs = {cpu_func},
+        .cpu_funcs_name = {"cpu_func"},
+#ifdef STARPU_USE_CUDA
+        .cuda_funcs = {cuda_func},
+        .cuda_flags = {STARPU_CUDA_ASYNC},
+#endif
+        .nbuffers = 2,
+        .modes = {STARPU_R, STARPU_W}
+    };
+
+    memset(matrix, -1, sizeof(matrix));
+    for(l=1 ; l<=NT ; l++)
+        for(k=1 ; k<=NZ ; k++)
+            for(j=1 ; j<=NY ; j++)
+                for(i=1 ; i<=NX ; i++)
+                    matrix[SHADOWT+l-1][SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k+l;
+
+    /*copy cubes*/
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for (k = SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for (j = SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k][j][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l][k][j][SHADOWX+i];
+                }
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for(k=SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k][j+NY][i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l][k][SHADOWY+j][i];
+                }
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for(k=0 ; k<SHADOWZ ; k++)
+            for(j=SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k+NZ][j][i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l][SHADOWZ+k][j][i];
+                }
+    for (l = 0 ; l<SHADOWT ; l++)
+        for(k=SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for(j=SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k][j][i];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k][j][i];
+                }
+
+    /*copy planes*/
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for (k = SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k][j+NY][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l][k][j+NY][SHADOWX+i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l][k][SHADOWY+j][i+NX];
+                    matrix[l][k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[l][k][SHADOWY+j][SHADOWX+i];
+                }
+
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for (k=0 ; k<SHADOWZ ; k++)
+            for(j = SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k+NZ][j][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l][k+NZ][j][SHADOWX+i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l][SHADOWZ+k][j][i+NX];
+                    matrix[l][SHADOWZ+NZ+k][j][SHADOWX+NX+i] = matrix[l][SHADOWZ+k][j][SHADOWX+i];
+                }
+
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for (k=0 ; k<SHADOWZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k+NZ][j+NY][i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l][k+NZ][SHADOWY+j][i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l][SHADOWZ+k][j+NY][i];
+                    matrix[l][SHADOWZ+NZ+k][SHADOWY+NY+j][i] = matrix[l][SHADOWZ+k][SHADOWY+j][i];
+                }
+
+    for (l=0 ; l<SHADOWT ; l++)
+        for (k = SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for(j = SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k][j][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l+NT][k][j][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k][j][i+NX];
+                    matrix[SHADOWT+NT+l][k][j][SHADOWX+NX+i] = matrix[SHADOWT+l][k][j][SHADOWX+i];
+                }
+
+    for (l=0 ; l<SHADOWT ; l++)
+        for (k = SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k][j+NY][i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l+NT][k][SHADOWY+j][i];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k][j+NY][i];
+                    matrix[SHADOWT+NT+l][k][SHADOWY+NY+j][i] = matrix[SHADOWT+l][k][SHADOWY+j][i];
+                }
+
+    for (l=0 ; l<SHADOWT ; l++)
+        for(k=0 ; k<SHADOWZ ; k++)
+            for (j = SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k+NZ][j][i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l+NT][SHADOWZ+k][j][i];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k+NZ][j][i];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][j][i] = matrix[SHADOWT+l][SHADOWZ+k][j][i];
+                }
+
+    /* Copy borders */
+    for (l = SHADOWT ; l<SHADOWT+NT ; l++)
+        for (k=0 ; k<SHADOWZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l][k+NZ][j+NY][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l][k+NZ][j+NY][SHADOWX+i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l][k+NZ][SHADOWY+j][i+NX];
+                    matrix[l][k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[l][k+NZ][SHADOWY+j][SHADOWX+i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l][SHADOWZ+k][j+NY][i+NX];
+                    matrix[l][SHADOWZ+NZ+k][j][SHADOWX+NX+i] = matrix[l][SHADOWZ+k][j+NY][SHADOWX+i];
+                    matrix[l][SHADOWZ+NZ+k][SHADOWY+NY+j][i] = matrix[l][SHADOWZ+k][SHADOWY+j][i+NX];
+                    matrix[l][SHADOWZ+NZ+k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[l][SHADOWZ+k][SHADOWY+j][SHADOWX+i];
+                }
+    for (l=0 ; l<SHADOWT ; l++)
+        for (k = SHADOWZ ; k<SHADOWZ+NZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k][j+NY][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l+NT][k][j+NY][SHADOWX+i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l+NT][k][SHADOWY+j][i+NX];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k][j+NY][i+NX];
+                    matrix[l][k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[l+NT][k][SHADOWY+j][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][k][j][SHADOWX+NX+i] = matrix[SHADOWT+l][k][j+NY][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][k][SHADOWY+NY+j][i] = matrix[SHADOWT+l][k][SHADOWY+j][i+NX];
+                    matrix[SHADOWT+NT+l][k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[SHADOWT+l][k][SHADOWY+j][SHADOWX+i];
+                }
+    for (l=0 ; l<SHADOWT ; l++)
+        for(k=0 ; k<SHADOWZ ; k++)
+            for (j = SHADOWY ; j<SHADOWY+NY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k+NZ][j][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l+NT][k+NZ][j][SHADOWX+i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l+NT][SHADOWZ+k][j][i+NX];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k+NZ][j][i+NX];
+                    matrix[l][SHADOWZ+NZ+k][j][SHADOWX+NX+i] = matrix[l+NT][SHADOWZ+k][j][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][k][j][SHADOWX+NX+i] = matrix[SHADOWT+l][k+NZ][j][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][j][i] = matrix[SHADOWT+l][SHADOWZ+k][j][i+NX];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][j][SHADOWX+NX+i] = matrix[SHADOWT+l][SHADOWZ+k][j][SHADOWX+i];
+                }
+    for (l=0 ; l<SHADOWT ; l++)
+        for(k=0 ; k<SHADOWZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=SHADOWX ; i<SHADOWX+NX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k+NZ][j+NY][i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l+NT][k+NZ][SHADOWY+j][i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l+NT][SHADOWZ+k][j+NY][i];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k+NZ][j+NY][i];
+                    matrix[l][SHADOWZ+NZ+k][SHADOWY+NY+j][i] = matrix[l+NT][SHADOWZ+k][SHADOWY+j][i];
+                    matrix[SHADOWT+NT+l][k][SHADOWY+NY+j][i] = matrix[SHADOWT+l][k+NZ][SHADOWY+j][i];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][j][i] = matrix[SHADOWT+l][SHADOWZ+k][j+NY][i];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][SHADOWY+NY+j][i] = matrix[SHADOWT+l][SHADOWZ+k][SHADOWY+j][i];
+                }
+
+    /* Copy corners */
+    for(l=0 ; l<SHADOWT ; l++)
+        for(k=0 ; k<SHADOWZ ; k++)
+            for(j=0 ; j<SHADOWY ; j++)
+                for(i=0 ; i<SHADOWX ; i++)
+                {
+                    matrix[l][k][j][i] = matrix[l+NT][k+NZ][j+NY][i+NX];
+                    matrix[l][k][j][SHADOWX+NX+i] = matrix[l+NT][k+NZ][j+NY][SHADOWX+i];
+                    matrix[l][k][SHADOWY+NY+j][i] = matrix[l+NT][k+NZ][SHADOWY+j][i+NX];
+                    matrix[l][k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[l+NT][k+NZ][SHADOWY+j][SHADOWX+i];
+                    matrix[l][SHADOWZ+NZ+k][j][i] = matrix[l+NT][SHADOWZ+k][j+NY][i+NX];
+                    matrix[l][SHADOWZ+NZ+k][j][SHADOWX+NX+i] = matrix[l+NT][SHADOWZ+k][j+NY][SHADOWX+i];
+                    matrix[l][SHADOWZ+NZ+k][SHADOWY+NY+j][i] = matrix[l+NT][SHADOWZ+k][SHADOWY+j][i+NX];
+                    matrix[l][SHADOWZ+NZ+k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[l+NT][SHADOWZ+k][SHADOWY+j][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][k][j][i] = matrix[SHADOWT+l][k+NZ][j+NY][i+NX];
+                    matrix[SHADOWT+NT+l][k][j][SHADOWX+NX+i] = matrix[SHADOWT+l][k+NZ][j+NY][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][k][SHADOWY+NY+j][i] = matrix[SHADOWT+l][k+NZ][SHADOWY+j][i+NX];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][j][i] = matrix[SHADOWT+l][SHADOWZ+k][j+NY][i+NX];
+                    matrix[SHADOWT+NT+l][k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[SHADOWT+l][k+NZ][SHADOWY+j][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][j][SHADOWX+NX+i] = matrix[SHADOWT+l][SHADOWZ+k][j+NY][SHADOWX+i];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][SHADOWY+NY+j][i] = matrix[SHADOWT+l][SHADOWZ+k][SHADOWY+j][i+NX];
+                    matrix[SHADOWT+NT+l][SHADOWZ+NZ+k][SHADOWY+NY+j][SHADOWX+NX+i] = matrix[SHADOWT+l][SHADOWZ+k][SHADOWY+j][SHADOWX+i];
+                }
+
+    FPRINTF(stderr,"IN  Matrix:\n");
+    for(l=0 ; l<NT + 2*SHADOWT ; l++)
+    {
+        for(k=0 ; k<NZ + 2*SHADOWZ ; k++)
+        {
+            for(j=0 ; j<NY + 2*SHADOWY ; j++)
+            {
+                for(i=0 ; i<NX + 2*SHADOWX ; i++)
+                    FPRINTF(stderr, "%5d ", matrix[l][k][j][i]);
+                FPRINTF(stderr,"\n");
+            }
+            FPRINTF(stderr,"\n\n");
+        }
+        FPRINTF(stderr,"\n\n");
+    }
+    FPRINTF(stderr,"\n");
+
+    ret = starpu_init(NULL);
+    if (ret == -ENODEV)
+        exit(77);
+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+    /* Declare source matrix to StarPU */
+    starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix,
+            NX + 2*SHADOWX, (NX + 2*SHADOWX) * (NY + 2*SHADOWY), (NX + 2*SHADOWX) * (NY + 2*SHADOWY) * (NZ + 2*SHADOWZ),
+            NX + 2*SHADOWX, NY + 2*SHADOWY, NZ + 2*SHADOWZ, NT + 2*SHADOWT,
+            sizeof(matrix[0][0][0][0]));
+
+    /* Declare destination matrix to StarPU */
+    starpu_tensor_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)matrix2,
+            NX + PARTSX*2*SHADOWX, (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY), (NX + PARTSX*2*SHADOWX) * (NY + PARTSY*2*SHADOWY) * (NZ + PARTSZ*2*SHADOWZ),
+            NX + PARTSX*2*SHADOWX, NY + PARTSY*2*SHADOWY, NZ + PARTSZ*2*SHADOWZ, NT + PARTST*2*SHADOWT,
+            sizeof(matrix2[0][0][0][0]));
+
+    /* Partition the source matrix in PARTST*PARTSZ*PARTSY*PARTSX sub-matrices with shadows */
+    /* NOTE: the resulting handles should only be used in read-only mode,
+     * as StarPU will not know how the overlapping parts would have to be
+     * combined. */
+    struct starpu_data_filter ft =
+    {
+        .filter_func = starpu_tensor_filter_time_block_shadow,
+        .nchildren = PARTST,
+        .filter_arg_ptr = (void*)(uintptr_t) SHADOWT /* Shadow width */
+    };
+    struct starpu_data_filter fz =
+    {
+        .filter_func = starpu_tensor_filter_depth_block_shadow,
+        .nchildren = PARTSZ,
+        .filter_arg_ptr = (void*)(uintptr_t) SHADOWZ /* Shadow width */
+    };
+    struct starpu_data_filter fy =
+    {
+        .filter_func = starpu_tensor_filter_vertical_block_shadow,
+        .nchildren = PARTSY,
+        .filter_arg_ptr = (void*)(uintptr_t) SHADOWY /* Shadow width */
+    };
+    struct starpu_data_filter fx =
+    {
+        .filter_func = starpu_tensor_filter_block_shadow,
+        .nchildren = PARTSX,
+        .filter_arg_ptr = (void*)(uintptr_t) SHADOWX /* Shadow width */
+    };
+    starpu_data_map_filters(handle, 4, &ft, &fz, &fy, &fx);
+
+    /* Partition the destination matrix in PARTST*PARTSZ*PARTSY*PARTSX sub-matrices */
+    struct starpu_data_filter ft2 =
+    {
+        .filter_func = starpu_tensor_filter_time_block,
+        .nchildren = PARTST,
+    };
+    struct starpu_data_filter fz2 =
+    {
+        .filter_func = starpu_tensor_filter_depth_block,
+        .nchildren = PARTSZ,
+    };
+    struct starpu_data_filter fy2 =
+    {
+        .filter_func = starpu_tensor_filter_vertical_block,
+        .nchildren = PARTSY,
+    };
+    struct starpu_data_filter fx2 =
+    {
+        .filter_func = starpu_tensor_filter_block,
+        .nchildren = PARTSX,
+    };
+    starpu_data_map_filters(handle2, 4, &ft2, &fz2, &fy2, &fx2);
+
+
+    /* Submit a task on each sub-matrix */
+    for (l=0; l<PARTST; l++)
+    {
+        for (k=0; k<PARTSZ; k++)
+        {
+            for (j=0; j<PARTSY; j++)
+            {
+                for (i=0; i<PARTSX; i++)
+                {
+                    starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 4, l, k, j, i);
+                    starpu_data_handle_t sub_handle2 = starpu_data_get_sub_data(handle2, 4, l, k, j, i);
+                    struct starpu_task *task = starpu_task_create();
+
+                    task->handles[0] = sub_handle;
+                    task->handles[1] = sub_handle2;
+                    task->cl = &cl;
+                    task->synchronous = 1;
+
+                    ret = starpu_task_submit(task);
+                    if (ret == -ENODEV) goto enodev;
+                    STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+                }
+            }
+        }
+    }
+
+    starpu_data_unpartition(handle, STARPU_MAIN_RAM);
+    starpu_data_unpartition(handle2, STARPU_MAIN_RAM);
+    starpu_data_unregister(handle);
+    starpu_data_unregister(handle2);
+    starpu_shutdown();
+
+    FPRINTF(stderr,"OUT Matrix:\n");
+    for(l=0 ; l<NT + PARTST*2*SHADOWT ; l++)
+    {
+        for(k=0 ; k<NZ + PARTSZ*2*SHADOWZ ; k++)
+        {
+            for(j=0 ; j<NY + PARTSY*2*SHADOWY ; j++)
+            {
+                for(i=0 ; i<NX + PARTSX*2*SHADOWX ; i++)
+                {
+                    FPRINTF(stderr, "%5d ", matrix2[l][k][j][i]);
+                }
+                FPRINTF(stderr,"\n");
+            }
+            FPRINTF(stderr,"\n\n");
+        }
+        FPRINTF(stderr,"\n\n");
+    }
+    FPRINTF(stderr,"\n");
+    for(l=0 ; l<PARTST ; l++)
+        for(k=0 ; k<PARTSZ ; k++)
+            for(j=0 ; j<PARTSY ; j++)
+                for(i=0 ; i<PARTSX ; i++)
+                    for (q=0 ; q<NT/PARTST + 2*SHADOWT ; q++)
+                        for (p=0 ; p<NZ/PARTSZ + 2*SHADOWZ ; p++)
+                            for (n=0 ; n<NY/PARTSY + 2*SHADOWY ; n++)
+                                for (m=0 ; m<NX/PARTSX + 2*SHADOWX ; m++)
+                                    STARPU_ASSERT(matrix2[l*(NT/PARTST+2*SHADOWT)+q][k*(NZ/PARTSZ+2*SHADOWZ)+p][j*(NY/PARTSY+2*SHADOWY)+n][i*(NX/PARTSX+2*SHADOWX)+m] ==
+                                            matrix[l*(NT/PARTST)+q][k*(NZ/PARTSZ)+p][j*(NY/PARTSY)+n][i*(NX/PARTSX)+m]);
+
+    return 0;
+
+enodev:
+    FPRINTF(stderr, "WARNING: No one can execute this task\n");
+    starpu_shutdown();
+    return 77;
+}

+ 85 - 0
include/starpu_data_filters.h

@@ -527,6 +527,91 @@ void starpu_block_filter_depth_block(void *father_interface, void *child_interfa
 */
 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
 
+/** @} */
+
+/**
+   @name Predefined Tensor Filter Functions
+   Predefined partitioning functions for tensor
+   data.
+   @{
+*/
+
+/**
+  Partition a tensor along the X dimension, thus getting
+  (x/\p nparts ,y,z,t) tensors. If \p nparts does not divide x, the last
+  submatrix contains the remainder.
+ */
+void starpu_tensor_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the X dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   ((x-2*shadow)/\p nparts +2*shadow,y,z,t) tensors. If \p nparts does not
+   divide x, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+*/
+void starpu_tensor_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the Y dimension, thus getting
+   (x,y/\p nparts ,z,t) tensors. If \p nparts does not divide y, the last
+   submatrix contains the remainder.
+ */
+void starpu_tensor_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the Y dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   (x,(y-2*shadow)/\p nparts +2*shadow,z,t) tensors. If \p nparts does not
+   divide y, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+*/
+void starpu_tensor_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the Z dimension, thus getting
+   (x,y,z/\p nparts,t) tensors. If \p nparts does not divide z, the last
+   submatrix contains the remainder.
+*/
+void starpu_tensor_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the Z dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   (x,y,(z-2*shadow)/\p nparts +2*shadow,t) tensors. If \p nparts does not
+   divide z, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+*/
+void starpu_tensor_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the T dimension, thus getting
+   (x,y,z,t/\p nparts) tensors. If \p nparts does not divide t, the last
+   submatrix contains the remainder.
+*/
+void starpu_tensor_filter_time_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
+/**
+   Partition a tensor along the T dimension, with a
+   shadow border <c>filter_arg_ptr</c>, thus getting
+   (x,y,z,(t-2*shadow)/\p nparts +2*shadow) tensors. If \p nparts does not
+   divide t, the last submatrix contains the remainder.
+
+   <b>IMPORTANT</b>:
+   This can only be used for read-only access, as no coherency is
+   enforced for the shadowed parts.
+*/
+void starpu_tensor_filter_time_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
+
 /**
    Given an integer \p n, \p n the number of parts it must be divided in, \p id the
    part currently considered, determines the \p chunk_size and the \p offset, taking

+ 1 - 0
src/Makefile.am

@@ -249,6 +249,7 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = 		\
 	datawizard/interfaces/block_interface.c			\
 	datawizard/interfaces/tensor_interface.c		\
 	datawizard/interfaces/vector_interface.c		\
+	datawizard/interfaces/tensor_filters.c		    \
 	datawizard/interfaces/bcsr_filters.c			\
 	datawizard/interfaces/csr_filters.c			\
 	datawizard/interfaces/vector_filters.c			\

+ 188 - 0
src/datawizard/interfaces/tensor_filters.c

@@ -0,0 +1,188 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <common/config.h>
+#include <datawizard/filters.h>
+
+static void _starpu_tensor_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                   unsigned id, unsigned nparts, uintptr_t shadow_size)
+{
+    struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface;
+    struct starpu_tensor_interface *tensor_child = (struct starpu_tensor_interface *) child_interface;
+
+    unsigned blocksize;
+    /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz, in case time, it's nt*/
+    uint32_t nn;
+    uint32_t nx;
+    uint32_t ny;
+    uint32_t nz;
+    uint32_t nt;
+
+    switch(dim)
+    {
+        /* horizontal*/
+        case 1:
+            /* actual number of elements */
+            nx = tensor_father->nx - 2 * shadow_size;
+            ny = tensor_father->ny;
+            nz = tensor_father->nz;
+            nt = tensor_father->nt;
+            nn = nx;
+            blocksize = 1;
+            break;
+        /* vertical*/
+        case 2:
+            nx = tensor_father->nx;
+            /* actual number of elements */
+            ny = tensor_father->ny - 2 * shadow_size;
+            nz = tensor_father->nz;
+            nt = tensor_father->nt;
+            nn = ny;
+            blocksize = tensor_father->ldy;
+            break;
+        /* depth*/
+        case 3:
+            nx = tensor_father->nx;
+            ny = tensor_father->ny;
+            /* actual number of elements */
+            nz = tensor_father->nz - 2 * shadow_size;
+            nt = tensor_father->nt;
+            nn = nz;
+            blocksize = tensor_father->ldz;
+            break;
+        /* time*/
+        case 4:
+            nx = tensor_father->nx;
+            ny = tensor_father->ny;
+            nz = tensor_father->nz;
+            /* actual number of elements */
+            nt = tensor_father->nt - 2 * shadow_size;
+            nn = nt;
+            blocksize = tensor_father->ldt;
+            break;
+    }
+
+    size_t elemsize = tensor_father->elemsize;
+
+    STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts);
+
+    uint32_t child_nn;
+    size_t offset;
+    starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset);
+
+    child_nn += 2 * shadow_size;
+
+    STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__);
+    tensor_child->id = tensor_father->id;
+
+    switch(dim)
+    {
+        case 1:
+            tensor_child->nx = child_nn;
+            tensor_child->ny = ny;
+            tensor_child->nz = nz;
+            tensor_child->nt = nt;
+            break;
+        case 2:
+            tensor_child->nx = nx;
+            tensor_child->ny = child_nn;
+            tensor_child->nz = nz;
+            tensor_child->nt = nt;
+            break;
+        case 3:
+            tensor_child->nx = nx;
+            tensor_child->ny = ny;
+            tensor_child->nz = child_nn;
+            tensor_child->nt = nt;
+            break;
+        case 4:
+            tensor_child->nx = nx;
+            tensor_child->ny = ny;
+            tensor_child->nz = nz;
+            tensor_child->nt = child_nn;
+            break;
+    }
+
+    tensor_child->elemsize = elemsize;
+
+    if (tensor_father->dev_handle)
+    {
+        if (tensor_father->ptr)
+            tensor_child->ptr = tensor_father->ptr + offset;
+        tensor_child->ldy = tensor_father->ldy;
+        tensor_child->ldz = tensor_father->ldz;
+        tensor_child->ldt = tensor_father->ldt;
+        tensor_child->dev_handle = tensor_father->dev_handle;
+        tensor_child->offset = tensor_father->offset + offset;
+    }
+}
+
+void starpu_tensor_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                   unsigned id, unsigned nparts)
+{
+    _starpu_tensor_filter_block(1, father_interface, child_interface, f, id, nparts, 0);
+}
+
+void starpu_tensor_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                      unsigned id, unsigned nparts)
+{
+    uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr;
+
+    _starpu_tensor_filter_block(1, father_interface, child_interface, f, id, nparts, shadow_size);
+}
+
+void starpu_tensor_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                    unsigned id, unsigned nparts)
+{
+    _starpu_tensor_filter_block(2, father_interface, child_interface, f, id, nparts, 0);
+}
+
+void starpu_tensor_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                      unsigned id, unsigned nparts)
+{
+    uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr;
+
+    _starpu_tensor_filter_block(2, father_interface, child_interface, f, id, nparts, shadow_size);
+}
+
+void starpu_tensor_filter_depth_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                    unsigned id, unsigned nparts)
+{
+    _starpu_tensor_filter_block(3, father_interface, child_interface, f, id, nparts, 0);
+}
+
+void starpu_tensor_filter_depth_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                      unsigned id, unsigned nparts)
+{
+    uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr;
+
+    _starpu_tensor_filter_block(3, father_interface, child_interface, f, id, nparts, shadow_size);
+}
+
+void starpu_tensor_filter_time_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                    unsigned id, unsigned nparts)
+{
+    _starpu_tensor_filter_block(4, father_interface, child_interface, f, id, nparts, 0);
+}
+
+void starpu_tensor_filter_time_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
+                      unsigned id, unsigned nparts)
+{
+    uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr;
+
+    _starpu_tensor_filter_block(4, father_interface, child_interface, f, id, nparts, shadow_size);
+}