Browse Source

** IMPORTANT **

For Xmas, here is a brand new API (that breaks all existing code).

We remove the starpu_data_interface_t enum type as it does not permit
to have the application to manipulate user-provided data interfaces.  This also
reduces significantly the size of the task structure as well as the data handle
structure.

The prototype of a codelet should now be

	void (*kernel)(void *descr[], void *arg)

The elements of the "descr" array should be pointers to the buffer container
the data description on the current memory node. For instance, if the i-th
piece of data is a "vector" data (described by the starpu_vector_interface_t
struct type), a typical way to access this data is now:

void kernel(void *descr[], void *arg)
{
	/* descr[i].vector.nx is deprecated */

	struct starpu_vector_interface_t *vector_interface = descr[i];
	unsigned nx = vector_interface->nx;
}

To somehow avoid verbosity, we provide some macros, for instance the
previous kernel could have been written as:

void kernel(void *descr[], void *arg)
{
	unsigned nx = GET_VECTOR_NX(descr[i]);
}
Cédric Augonnet 16 years ago
parent
commit
0281ad5c41
71 changed files with 652 additions and 615 deletions
  1. 4 4
      examples/audio/starpu-audio-processing.c
  2. 8 8
      examples/axpy/axpy.c
  3. 1 1
      examples/basic-examples/hello-world.c
  4. 10 10
      examples/basic-examples/mult.c
  5. 6 3
      examples/basic-examples/vector-scal.c
  6. 6 6
      examples/cholesky/dw_cholesky.h
  7. 27 27
      examples/cholesky/dw_cholesky_kernels.c
  8. 8 8
      examples/heat/dw_factolu.h
  9. 36 36
      examples/heat/dw_factolu_kernels.c
  10. 15 15
      examples/heat/dw_sparse_cg.h
  11. 72 72
      examples/heat/dw_sparse_cg_kernels.c
  12. 3 3
      examples/incrementer/incrementer.c
  13. 2 2
      examples/incrementer/incrementer_kernels.cu
  14. 8 8
      examples/lu/xlu.h
  15. 48 48
      examples/lu/xlu_kernels.c
  16. 12 12
      examples/lu/xlu_kernels.h
  17. 2 2
      examples/mult/dw_mult.h
  18. 11 11
      examples/mult/sgemm_kernels.c
  19. 11 11
      examples/mult/xgemm_kernels.c
  20. 45 45
      examples/pastix-wrappers/starpu-blas-wrapper.c
  21. 7 7
      examples/ppm-downscaler/yuv-downscaler.c
  22. 2 2
      examples/spmv/dw_block_spmv.h
  23. 9 9
      examples/spmv/dw_block_spmv_kernels.c
  24. 22 22
      examples/spmv/dw_spmv.c
  25. 23 23
      examples/starpufft/starpufftx1d.c
  26. 24 24
      examples/starpufft/starpufftx2d.c
  27. 10 10
      examples/strassen/strassen.h
  28. 36 36
      examples/strassen/strassen_kernels.c
  29. 12 12
      examples/strassen2/strassen2.c
  30. 33 33
      examples/strassen2/strassen2_kernels.c
  31. 1 1
      examples/tag_example/tag_example.c
  32. 1 1
      examples/tag_example/tag_example2.c
  33. 1 1
      examples/tag_example/tag_restartable.c
  34. 23 0
      include/starpu-data-interfaces.h
  35. 7 3
      include/starpu-task.h
  36. 3 3
      mpi/tests/ring.c
  37. 3 3
      mpi/tests/ring_async.c
  38. 2 2
      mpi/tests/ring_kernel.cu
  39. 1 1
      src/core/jobs.h
  40. 1 3
      src/datawizard/coherency.c
  41. 0 1
      src/datawizard/coherency.h
  42. 10 8
      src/datawizard/interfaces/bcsr_interface.c
  43. 13 11
      src/datawizard/interfaces/blas_interface.c
  44. 8 6
      src/datawizard/interfaces/block_interface.c
  45. 10 8
      src/datawizard/interfaces/csr_interface.c
  46. 2 3
      src/datawizard/interfaces/data_interface.h
  47. 12 8
      src/datawizard/interfaces/vector_interface.c
  48. 1 1
      src/util/execute_on_all.c
  49. 2 2
      src/util/malloc.c
  50. 1 1
      tests/core/empty_task_sync_point.c
  51. 1 1
      tests/core/execute_on_a_specific_worker.c
  52. 1 1
      tests/core/multithreaded.c
  53. 2 3
      tests/core/starpu_wait_all_tasks.c
  54. 1 1
      tests/core/starpu_wait_task.c
  55. 1 1
      tests/core/static_restartable.c
  56. 1 1
      tests/core/static_restartable_tag.c
  57. 1 1
      tests/core/static_restartable_using_initializer.c
  58. 1 1
      tests/datawizard/dining_philosophers.c
  59. 1 1
      tests/datawizard/readers_and_writers.c
  60. 6 6
      tests/datawizard/write_only_tmp_buffer.c
  61. 1 1
      tests/errorcheck/invalid_blocking_calls.c
  62. 1 1
      tests/microbenchs/async-tasks-overhead.c
  63. 2 2
      tests/microbenchs/dsm_stress.c
  64. 1 1
      tests/microbenchs/prefetch_data_on_node.c
  65. 2 2
      tests/microbenchs/redundant_buffer.c
  66. 1 1
      tests/microbenchs/sync-tasks-overhead.c
  67. 6 6
      tests/microbenchs/sync_and_notify_data.c
  68. 4 4
      tests/microbenchs/sync_and_notify_data_kernels.cu
  69. 1 1
      tests/microbenchs/tag-wait-api.c
  70. 1 1
      tests/microbenchs/tasks-overhead.c
  71. 1 1
      tests/overlap/overlap.c

+ 4 - 4
examples/audio/starpu-audio-processing.c

@@ -144,11 +144,11 @@ typedef struct {
 static fft_plan_cache plans[STARPU_NMAXWORKERS];
 
 #ifdef USE_CUDA
-static void band_filter_kernel_gpu(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+static void band_filter_kernel_gpu(void *descr[], __attribute__((unused)) void *arg)
 {
 	cufftResult cures;
 
-	float *localA = (float *)descr[0].vector.ptr;
+	float *localA = (float *)GET_VECTOR_PTR(descr[0]);
 	cufftComplex *localout;
 
 	int workerid = starpu_get_worker_id();
@@ -194,9 +194,9 @@ static void band_filter_kernel_gpu(starpu_data_interface_t *descr, __attribute__
 
 static pthread_mutex_t fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-static void band_filter_kernel_cpu(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+static void band_filter_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 {
-	float *localA = (float *)descr[0].vector.ptr;
+	float *localA = (float *)GET_VECTOR_PTR(descr[0]);
 
 	int workerid = starpu_get_worker_id();
 	

+ 8 - 8
examples/axpy/axpy.c

@@ -36,27 +36,27 @@ TYPE *vec_x, *vec_y;
 /* descriptors for StarPU */
 starpu_data_handle handle_y, handle_x;
 
-void axpy_cpu(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void axpy_cpu(void *descr[], __attribute__((unused)) void *arg)
 {
 	TYPE alpha = *((TYPE *)arg);
 
-	unsigned n = descr[0].vector.nx;
+	unsigned n = GET_VECTOR_NX(descr[0]);
 
-	TYPE *block_x = (TYPE *)descr[0].vector.ptr;
-	TYPE *block_y = (TYPE *)descr[1].vector.ptr;
+	TYPE *block_x = (TYPE *)GET_VECTOR_PTR(descr[0]);
+	TYPE *block_y = (TYPE *)GET_VECTOR_PTR(descr[1]);
 
 	AXPY((int)n, alpha, block_x, 1, block_y, 1);
 }
 
 #ifdef USE_CUDA
-void axpy_gpu(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void axpy_gpu(void *descr[], __attribute__((unused)) void *arg)
 {
 	TYPE alpha = *((TYPE *)arg);
 
-	unsigned n = descr[0].vector.nx;
+	unsigned n = GET_VECTOR_NX(descr[0]);
 
-	TYPE *block_x = (TYPE *)descr[0].vector.ptr;
-	TYPE *block_y = (TYPE *)descr[1].vector.ptr;
+	TYPE *block_x = (TYPE *)GET_VECTOR_PTR(descr[0]);
+	TYPE *block_y = (TYPE *)GET_VECTOR_PTR(descr[1]);
 
 	CUBLASAXPY((int)n, alpha, block_x, 1, block_y, 1);
 	cudaThreadSynchronize();

+ 1 - 1
examples/basic-examples/hello-world.c

@@ -43,7 +43,7 @@ void callback_func(void *callback_arg)
  * DSM; the second arguments references a read-only buffer that is passed as an
  * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there
  * are no data input/output managed by the DSM (cl.nbuffers = 0) */
-void cpu_func(starpu_data_interface_t *buffers, void *func_arg)
+void cpu_func(void *buffers[], void *func_arg)
 {
 	float *array = func_arg;
 

+ 10 - 10
examples/basic-examples/mult.c

@@ -109,16 +109,16 @@ static void callback_func(void *arg)
  * field of the descr[x] elements which are union types.
  */
 
-static void cpu_mult(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 {
 	float *subA, *subB, *subC;
 	uint32_t nxC, nyC, nyA;
 	uint32_t ldA, ldB, ldC;
 
 	/* .blas.ptr gives a pointer to the first element of the local copy */
-	subA = (float *)descr[0].blas.ptr;
-	subB = (float *)descr[1].blas.ptr;
-	subC = (float *)descr[2].blas.ptr;
+	subA = (float *)GET_BLAS_PTR(descr[0]);
+	subB = (float *)GET_BLAS_PTR(descr[1]);
+	subC = (float *)GET_BLAS_PTR(descr[2]);
 
 	/* .blas.nx is the number of rows (consecutive elements) and .blas.ny
 	 * is the number of lines that are separated by .blas.ld elements (ld
@@ -126,13 +126,13 @@ static void cpu_mult(starpu_data_interface_t *descr, __attribute__((unused))  vo
 	 * NB: in case some filters were used, the leading dimension is not
 	 * guaranteed to be the same in main memory (on the original matrix)
 	 * and on the accelerator! */
-	nxC = descr[2].blas.nx;
-	nyC = descr[2].blas.ny;
-	nyA = descr[0].blas.ny;
+	nxC = GET_BLAS_NX(descr[2]);
+	nyC = GET_BLAS_NY(descr[2]);
+	nyA = GET_BLAS_NY(descr[0]);
 
-	ldA = descr[0].blas.ld;
-	ldB = descr[1].blas.ld;
-	ldC = descr[2].blas.ld;
+	ldA = GET_BLAS_LD(descr[0]);
+	ldB = GET_BLAS_LD(descr[1]);
+	ldC = GET_BLAS_LD(descr[2]);
 
 	/* we assume a FORTRAN-ordering! */
 	unsigned i,j,k;

+ 6 - 3
examples/basic-examples/vector-scal.c

@@ -30,11 +30,12 @@
 #define	N	2048
 
 /* This kernel takes a buffer and scales it by a constant factor */
-static void scal_func(starpu_data_interface_t *buffers, void *arg)
+static void scal_func(void *buffers[], void *arg)
 {
 	unsigned i;
 	float *factor = arg;
 
+#warning TODO update
 	/* 
 	 * The "buffers" array matches the task->buffers one: for instance
 	 * task->buffers[0].handle is a handle that corresponds to a data with
@@ -46,13 +47,15 @@ static void scal_func(starpu_data_interface_t *buffers, void *arg)
 	 * each elements.
 	 */
 
+	starpu_vector_interface_t *vector = buffers[0];
+
 	/* length of the vector */
-	unsigned n = buffers[0].vector.nx;
+	unsigned n = vector->nx;
 
 	/* get a pointer to the local copy of the vector : note that we have to
 	 * cast it in (float *) since a vector could contain any type of
 	 * elements so that the .ptr field is actually a uintptr_t */
-	float *val = (float *)buffers[0].vector.ptr;
+	float *val = (float *)vector->ptr;
 
 	/* scale the vector */
 	for (i = 0; i < n; i++)

+ 6 - 6
examples/cholesky/dw_cholesky.h

@@ -72,14 +72,14 @@ static unsigned nbigblocks = 8;
 static unsigned pinned = 0;
 static unsigned noprio = 0;
 
-void chol_core_codelet_update_u11(starpu_data_interface_t *, void *);
-void chol_core_codelet_update_u21(starpu_data_interface_t *, void *);
-void chol_core_codelet_update_u22(starpu_data_interface_t *, void *);
+void chol_core_codelet_update_u11(void **, void *);
+void chol_core_codelet_update_u21(void **, void *);
+void chol_core_codelet_update_u22(void **, void *);
 
 #ifdef USE_CUDA
-void chol_cublas_codelet_update_u11(starpu_data_interface_t *descr, void *_args);
-void chol_cublas_codelet_update_u21(starpu_data_interface_t *descr, void *_args);
-void chol_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args);
+void chol_cublas_codelet_update_u11(void *descr[], void *_args);
+void chol_cublas_codelet_update_u21(void *descr[], void *_args);
+void chol_cublas_codelet_update_u22(void *descr[], void *_args);
 #endif
 
 void initialize_system(float **A, unsigned dim, unsigned pinned);

+ 27 - 27
examples/cholesky/dw_cholesky_kernels.c

@@ -27,20 +27,20 @@
  *   U22 
  */
 
-static inline void chol_common_core_codelet_update_u22(starpu_data_interface_t *buffers, int s, __attribute__((unused)) void *_args)
+static inline void chol_common_core_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
-	float *left 	= (float *)buffers[0].blas.ptr;
-	float *right 	= (float *)buffers[1].blas.ptr;
-	float *center 	= (float *)buffers[2].blas.ptr;
+	float *left 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *right 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *center 	= (float *)GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = buffers[2].blas.ny;
-	unsigned dy = buffers[2].blas.nx;
-	unsigned dz = buffers[0].blas.ny;
+	unsigned dx = GET_BLAS_NY(descr[2]);
+	unsigned dy = GET_BLAS_NX(descr[2]);
+	unsigned dz = GET_BLAS_NY(descr[0]);
 
-	unsigned ld21 = buffers[0].blas.ld;
-	unsigned ld12 = buffers[1].blas.ld;
-	unsigned ld22 = buffers[2].blas.ld;
+	unsigned ld21 = GET_BLAS_LD(descr[0]);
+	unsigned ld12 = GET_BLAS_LD(descr[1]);
+	unsigned ld22 = GET_BLAS_LD(descr[2]);
 
 #ifdef USE_CUDA
 	cublasStatus st;
@@ -69,13 +69,13 @@ static inline void chol_common_core_codelet_update_u22(starpu_data_interface_t *
 	}
 }
 
-void chol_core_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
+void chol_core_codelet_update_u22(void *descr[], void *_args)
 {
 	chol_common_core_codelet_update_u22(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void chol_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
+void chol_cublas_codelet_update_u22(void *descr[], void *_args)
 {
 	chol_common_core_codelet_update_u22(descr, 1, _args);
 }
@@ -85,20 +85,20 @@ void chol_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
  * U21
  */
 
-static inline void chol_common_codelet_update_u21(starpu_data_interface_t *buffers, int s, __attribute__((unused)) void *_args)
+static inline void chol_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 //	printf("21\n");
 	float *sub11;
 	float *sub21;
 
-	sub11 = (float *)buffers[0].blas.ptr;
-	sub21 = (float *)buffers[1].blas.ptr;
+	sub11 = (float *)GET_BLAS_PTR(descr[0]);
+	sub21 = (float *)GET_BLAS_PTR(descr[1]);
 
-	unsigned ld11 = buffers[0].blas.ld;
-	unsigned ld21 = buffers[1].blas.ld;
+	unsigned ld11 = GET_BLAS_LD(descr[0]);
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
 
-	unsigned nx21 = buffers[1].blas.ny;
-	unsigned ny21 = buffers[1].blas.nx;
+	unsigned nx21 = GET_BLAS_NY(descr[1]);
+	unsigned ny21 = GET_BLAS_NX(descr[1]);
 
 	switch (s) {
 		case 0:
@@ -116,13 +116,13 @@ static inline void chol_common_codelet_update_u21(starpu_data_interface_t *buffe
 	}
 }
 
-void chol_core_codelet_update_u21(starpu_data_interface_t *descr, void *_args)
+void chol_core_codelet_update_u21(void *descr[], void *_args)
 {
 	 chol_common_codelet_update_u21(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void chol_cublas_codelet_update_u21(starpu_data_interface_t *descr, void *_args)
+void chol_cublas_codelet_update_u21(void *descr[], void *_args)
 {
 	chol_common_codelet_update_u21(descr, 1, _args);
 }
@@ -132,15 +132,15 @@ void chol_cublas_codelet_update_u21(starpu_data_interface_t *descr, void *_args)
  *	U11
  */
 
-static inline void chol_common_codelet_update_u11(starpu_data_interface_t *descr, int s, __attribute__((unused)) void *_args) 
+static inline void chol_common_codelet_update_u11(void *descr[], int s, __attribute__((unused)) void *_args) 
 {
 //	printf("11\n");
 	float *sub11;
 
-	sub11 = (float *)descr[0].blas.ptr; 
+	sub11 = (float *)GET_BLAS_PTR(descr[0]); 
 
-	unsigned nx = descr[0].blas.ny;
-	unsigned ld = descr[0].blas.ld;
+	unsigned nx = GET_BLAS_NY(descr[0]);
+	unsigned ld = GET_BLAS_LD(descr[0]);
 
 	unsigned z;
 
@@ -200,13 +200,13 @@ static inline void chol_common_codelet_update_u11(starpu_data_interface_t *descr
 }
 
 
-void chol_core_codelet_update_u11(starpu_data_interface_t *descr, void *_args)
+void chol_core_codelet_update_u11(void *descr[], void *_args)
 {
 	chol_common_codelet_update_u11(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void chol_cublas_codelet_update_u11(starpu_data_interface_t *descr, void *_args)
+void chol_cublas_codelet_update_u11(void *descr[], void *_args)
 {
 	chol_common_codelet_update_u11(descr, 1, _args);
 }

+ 8 - 8
examples/heat/dw_factolu.h

@@ -184,16 +184,16 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 }
 #endif // CHECK_RESULTS
 
-void dw_core_codelet_update_u11(starpu_data_interface_t *, void *);
-void dw_core_codelet_update_u12(starpu_data_interface_t *, void *);
-void dw_core_codelet_update_u21(starpu_data_interface_t *, void *);
-void dw_core_codelet_update_u22(starpu_data_interface_t *, void *);
+void dw_core_codelet_update_u11(void **, void *);
+void dw_core_codelet_update_u12(void **, void *);
+void dw_core_codelet_update_u21(void **, void *);
+void dw_core_codelet_update_u22(void **, void *);
 
 #ifdef USE_CUDA
-void dw_cublas_codelet_update_u11(starpu_data_interface_t *descr, void *_args);
-void dw_cublas_codelet_update_u12(starpu_data_interface_t *descr, void *_args);
-void dw_cublas_codelet_update_u21(starpu_data_interface_t *descr, void *_args);
-void dw_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args);
+void dw_cublas_codelet_update_u11(void *descr[], void *_args);
+void dw_cublas_codelet_update_u12(void *descr[], void *_args);
+void dw_cublas_codelet_update_u21(void *descr[], void *_args);
+void dw_cublas_codelet_update_u22(void *descr[], void *_args);
 #endif
 
 void dw_callback_codelet_update_u11(void *);

+ 36 - 36
examples/heat/dw_factolu_kernels.c

@@ -102,19 +102,19 @@ void display_stat_heat(void)
  *   U22 
  */
 
-static inline void dw_common_core_codelet_update_u22(starpu_data_interface_t *buffers, int s, __attribute__((unused)) void *_args)
+static inline void dw_common_core_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
-	float *left 	= (float *)buffers[0].blas.ptr;
-	float *right 	= (float *)buffers[1].blas.ptr;
-	float *center 	= (float *)buffers[2].blas.ptr;
+	float *left 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *right 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *center 	= (float *)GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = buffers[2].blas.nx;
-	unsigned dy = buffers[2].blas.ny;
-	unsigned dz = buffers[0].blas.ny;
+	unsigned dx = GET_BLAS_NX(descr[2]);
+	unsigned dy = GET_BLAS_NY(descr[2]);
+	unsigned dz = GET_BLAS_NY(descr[0]);
 
-	unsigned ld12 = buffers[0].blas.ld;
-	unsigned ld21 = buffers[1].blas.ld;
-	unsigned ld22 = buffers[2].blas.ld;
+	unsigned ld12 = GET_BLAS_LD(descr[0]);
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
+	unsigned ld22 = GET_BLAS_LD(descr[2]);
 
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -145,7 +145,7 @@ static inline void dw_common_core_codelet_update_u22(starpu_data_interface_t *bu
 	}
 }
 
-void dw_core_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
+void dw_core_codelet_update_u22(void *descr[], void *_args)
 {
 	dw_common_core_codelet_update_u22(descr, 0, _args);
 
@@ -154,7 +154,7 @@ void dw_core_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
 }
 
 #ifdef USE_CUDA
-void dw_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
+void dw_cublas_codelet_update_u22(void *descr[], void *_args)
 {
 	dw_common_core_codelet_update_u22(descr, 1, _args);
 
@@ -167,18 +167,18 @@ void dw_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args)
  * U12
  */
 
-static inline void dw_common_codelet_update_u12(starpu_data_interface_t *buffers, int s, __attribute__((unused)) void *_args) {
+static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribute__((unused)) void *_args) {
 	float *sub11;
 	float *sub12;
 
-	sub11 = (float *)buffers[0].blas.ptr;	
-	sub12 = (float *)buffers[1].blas.ptr;
+	sub11 = (float *)GET_BLAS_PTR(descr[0]);	
+	sub12 = (float *)GET_BLAS_PTR(descr[1]);
 
-	unsigned ld11 = buffers[0].blas.ld;
-	unsigned ld12 = buffers[1].blas.ld;
+	unsigned ld11 = GET_BLAS_LD(descr[0]);
+	unsigned ld12 = GET_BLAS_LD(descr[1]);
 
-	unsigned nx12 = buffers[1].blas.nx;
-	unsigned ny12 = buffers[1].blas.ny;
+	unsigned nx12 = GET_BLAS_NX(descr[1]);
+	unsigned ny12 = GET_BLAS_NY(descr[1]);
 	
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -208,7 +208,7 @@ static inline void dw_common_codelet_update_u12(starpu_data_interface_t *buffers
 	}
 }
 
-void dw_core_codelet_update_u12(starpu_data_interface_t *descr, void *_args)
+void dw_core_codelet_update_u12(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u12(descr, 0, _args);
 
@@ -217,7 +217,7 @@ void dw_core_codelet_update_u12(starpu_data_interface_t *descr, void *_args)
 }
 
 #ifdef USE_CUDA
-void dw_cublas_codelet_update_u12(starpu_data_interface_t *descr, void *_args)
+void dw_cublas_codelet_update_u12(void *descr[], void *_args)
 {
 	 dw_common_codelet_update_u12(descr, 1, _args);
 
@@ -230,18 +230,18 @@ void dw_cublas_codelet_update_u12(starpu_data_interface_t *descr, void *_args)
  * U21
  */
 
-static inline void dw_common_codelet_update_u21(starpu_data_interface_t *buffers, int s, __attribute__((unused)) void *_args) {
+static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribute__((unused)) void *_args) {
 	float *sub11;
 	float *sub21;
 
-	sub11 = (float *)buffers[0].blas.ptr;
-	sub21 = (float *)buffers[1].blas.ptr;
+	sub11 = (float *)GET_BLAS_PTR(descr[0]);
+	sub21 = (float *)GET_BLAS_PTR(descr[1]);
 
-	unsigned ld11 = buffers[0].blas.ld;
-	unsigned ld21 = buffers[1].blas.ld;
+	unsigned ld11 = GET_BLAS_LD(descr[0]);
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
 
-	unsigned nx21 = buffers[1].blas.nx;
-	unsigned ny21 = buffers[1].blas.ny;
+	unsigned nx21 = GET_BLAS_NX(descr[1]);
+	unsigned ny21 = GET_BLAS_NY(descr[1]);
 	
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -268,7 +268,7 @@ static inline void dw_common_codelet_update_u21(starpu_data_interface_t *buffers
 	}
 }
 
-void dw_core_codelet_update_u21(starpu_data_interface_t *descr, void *_args)
+void dw_core_codelet_update_u21(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u21(descr, 0, _args);
 
@@ -277,7 +277,7 @@ void dw_core_codelet_update_u21(starpu_data_interface_t *descr, void *_args)
 }
 
 #ifdef USE_CUDA
-void dw_cublas_codelet_update_u21(starpu_data_interface_t *descr, void *_args)
+void dw_cublas_codelet_update_u21(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u21(descr, 1, _args);
 
@@ -305,14 +305,14 @@ static inline void debug_print(float *tab, unsigned ld, unsigned n)
 	fprintf(stderr, "\n");
 }
 
-static inline void dw_common_codelet_update_u11(starpu_data_interface_t *descr, int s, __attribute__((unused)) void *_args) 
+static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribute__((unused)) void *_args) 
 {
 	float *sub11;
 
-	sub11 = (float *)descr[0].blas.ptr; 
+	sub11 = (float *)GET_BLAS_PTR(descr[0]); 
 
-	unsigned long nx = descr[0].blas.nx;
-	unsigned long ld = descr[0].blas.ld;
+	unsigned long nx = GET_BLAS_NX(descr[0]);
+	unsigned long ld = GET_BLAS_LD(descr[0]);
 
 	unsigned long z;
 
@@ -363,7 +363,7 @@ static inline void dw_common_codelet_update_u11(starpu_data_interface_t *descr,
 }
 
 
-void dw_core_codelet_update_u11(starpu_data_interface_t *descr, void *_args)
+void dw_core_codelet_update_u11(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u11(descr, 0, _args);
 
@@ -372,7 +372,7 @@ void dw_core_codelet_update_u11(starpu_data_interface_t *descr, void *_args)
 }
 
 #ifdef USE_CUDA
-void dw_cublas_codelet_update_u11(starpu_data_interface_t *descr, void *_args)
+void dw_cublas_codelet_update_u11(void *descr[], void *_args)
 {
 	dw_common_codelet_update_u11(descr, 1, _args);
 

+ 15 - 15
examples/heat/dw_sparse_cg.h

@@ -104,29 +104,29 @@ static void __attribute__ ((unused)) print_results(float *result, unsigned size)
 	}
 }
 
-void core_codelet_func_1(starpu_data_interface_t *descr, void *arg);
+void core_codelet_func_1(void *descr[], void *arg);
 
-void core_codelet_func_2(starpu_data_interface_t *descr, void *arg);
+void core_codelet_func_2(void *descr[], void *arg);
 
-void cublas_codelet_func_3(starpu_data_interface_t *descr, void *arg);
-void core_codelet_func_3(starpu_data_interface_t *descr, void *arg);
+void cublas_codelet_func_3(void *descr[], void *arg);
+void core_codelet_func_3(void *descr[], void *arg);
 
-void core_codelet_func_4(starpu_data_interface_t *descr, void *arg);
+void core_codelet_func_4(void *descr[], void *arg);
 
-void core_codelet_func_5(starpu_data_interface_t *descr, void *arg);
-void cublas_codelet_func_5(starpu_data_interface_t *descr, void *arg);
+void core_codelet_func_5(void *descr[], void *arg);
+void cublas_codelet_func_5(void *descr[], void *arg);
 
-void cublas_codelet_func_6(starpu_data_interface_t *descr, void *arg);
-void core_codelet_func_6(starpu_data_interface_t *descr, void *arg);
+void cublas_codelet_func_6(void *descr[], void *arg);
+void core_codelet_func_6(void *descr[], void *arg);
 
-void cublas_codelet_func_7(starpu_data_interface_t *descr, void *arg);
-void core_codelet_func_7(starpu_data_interface_t *descr, void *arg);
+void cublas_codelet_func_7(void *descr[], void *arg);
+void core_codelet_func_7(void *descr[], void *arg);
 
-void cublas_codelet_func_8(starpu_data_interface_t *descr, void *arg);
-void core_codelet_func_8(starpu_data_interface_t *descr, void *arg);
+void cublas_codelet_func_8(void *descr[], void *arg);
+void core_codelet_func_8(void *descr[], void *arg);
 
-void cublas_codelet_func_9(starpu_data_interface_t *descr, void *arg);
-void core_codelet_func_9(starpu_data_interface_t *descr, void *arg);
+void cublas_codelet_func_9(void *descr[], void *arg);
+void core_codelet_func_9(void *descr[], void *arg);
 
 void iteration_cg(void *problem);
 

+ 72 - 72
examples/heat/dw_sparse_cg_kernels.c

@@ -50,24 +50,24 @@
  *		descr[0] = A, descr[1] = x, descr [2] = r, descr[3] = b
  */
 
-void core_codelet_func_1(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void core_codelet_func_1(void *descr[], __attribute__((unused)) void *arg)
 {
-	float *nzval = (float *)descr[0].csr.nzval;
-	uint32_t *colind = descr[0].csr.colind;
-	uint32_t *rowptr = descr[0].csr.rowptr;
+	float *nzval = (float *)GET_CSR_NZVAL(descr[0]);
+	uint32_t *colind = GET_CSR_COLIND(descr[0]);
+	uint32_t *rowptr = GET_CSR_ROWPTR(descr[0]);
 
-	uint32_t firstentry = descr[0].csr.firstentry;
+	uint32_t firstentry = GET_CSR_ELEMSIZE(descr[0]);
 
-	float *vecx = (float *)descr[1].vector.ptr;
-	float *vecr = (float *)descr[2].vector.ptr;
-	float *vecb = (float *)descr[3].vector.ptr;
+	float *vecx = (float *)GET_VECTOR_PTR(descr[1]);
+	float *vecr = (float *)GET_VECTOR_PTR(descr[2]);
+	float *vecb = (float *)GET_VECTOR_PTR(descr[3]);
 
 
 	uint32_t nnz;
 	uint32_t nrow;
 
-	nnz = descr[0].csr.nnz;
-	nrow = descr[0].csr.nrow;
+	nnz = GET_CSR_NNZ(descr[0]);
+	nrow = GET_CSR_NROW(descr[0]);
 
 	unsigned row;
 	for (row = 0; row < nrow; row++)
@@ -94,17 +94,17 @@ void core_codelet_func_1(starpu_data_interface_t *descr, __attribute__((unused))
  *	compute d = r
  *		descr[0] = d, descr[1] = r
  */
-void core_codelet_func_2(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void core_codelet_func_2(void *descr[], __attribute__((unused)) void *arg)
 {
 	/* simply copy r into d */
-	uint32_t nx = descr[0].vector.nx;
-	size_t elemsize = descr[0].vector.elemsize;
+	uint32_t nx = GET_VECTOR_NX(descr[0]);
+	size_t elemsize = GET_VECTOR_ELEMSIZE(descr[0]);
 
-	STARPU_ASSERT(descr[0].vector.nx == descr[1].vector.nx);
-	STARPU_ASSERT(descr[0].vector.elemsize == descr[1].vector.elemsize);
+	STARPU_ASSERT(GET_VECTOR_NX(descr[0]) == GET_VECTOR_NX(descr[1]));
+	STARPU_ASSERT(GET_VECTOR_ELEMSIZE(descr[0]) == GET_VECTOR_ELEMSIZE(descr[1]));
 
-	float *src = (float *)descr[1].vector.ptr;
-	float *dst = (float *)descr[0].vector.ptr;
+	float *src = (float *)GET_VECTOR_PTR(descr[1]);
+	float *dst = (float *)GET_VECTOR_PTR(descr[0]);
 
 	memcpy(dst, src, nx*elemsize);
 }
@@ -116,7 +116,7 @@ void core_codelet_func_2(starpu_data_interface_t *descr, __attribute__((unused))
  *		args = &delta_new, &delta_0
  */
 
-void core_codelet_func_3(starpu_data_interface_t *descr, void *arg)
+void core_codelet_func_3(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float dot;
@@ -124,8 +124,8 @@ void core_codelet_func_3(starpu_data_interface_t *descr, void *arg)
 	int size;
 	
 	/* get the vector */
-	vec = (float *)descr[0].vector.ptr;
-	size = (int)descr[0].vector.nx;
+	vec = (float *)GET_VECTOR_PTR(descr[0]);
+	size = (int)GET_VECTOR_NX(descr[0]);
 
 	dot = SDOT(size, vec, 1, vec, 1);
 
@@ -136,7 +136,7 @@ void core_codelet_func_3(starpu_data_interface_t *descr, void *arg)
 }
 
 #ifdef USE_CUDA
-void cublas_codelet_func_3(starpu_data_interface_t *descr, void *arg)
+void cublas_codelet_func_3(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float dot;
@@ -144,8 +144,8 @@ void cublas_codelet_func_3(starpu_data_interface_t *descr, void *arg)
 	uint32_t size;
 	
 	/* get the vector */
-	vec = (float *)descr[0].vector.ptr;
-	size = descr[0].vector.nx;
+	vec = (float *)GET_VECTOR_PTR(descr[0]);
+	size = GET_VECTOR_NX(descr[0]);
 
 	dot = cublasSdot (size, vec, 1, vec, 1);
 
@@ -161,22 +161,22 @@ void cublas_codelet_func_3(starpu_data_interface_t *descr, void *arg)
  *		descr[0] = A, descr[1] = d, descr [2] = q
  */
 
-void core_codelet_func_4(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void core_codelet_func_4(void *descr[], __attribute__((unused)) void *arg)
 {
-	float *nzval = (float *)descr[0].csr.nzval;
-	uint32_t *colind = descr[0].csr.colind;
-	uint32_t *rowptr = descr[0].csr.rowptr;
+	float *nzval = (float *)GET_CSR_NZVAL(descr[0]);
+	uint32_t *colind = GET_CSR_COLIND(descr[0]);
+	uint32_t *rowptr = GET_CSR_ROWPTR(descr[0]);
 
-	uint32_t firstentry = descr[0].csr.firstentry;
+	uint32_t firstentry = GET_CSR_FIRSTENTRY(descr[0]);
 
-	float *vecd = (float *)descr[1].vector.ptr;
-	float *vecq = (float *)descr[2].vector.ptr;
+	float *vecd = (float *)GET_VECTOR_PTR(descr[1]);
+	float *vecq = (float *)GET_VECTOR_PTR(descr[2]);
 
 	uint32_t nnz;
 	uint32_t nrow;
 
-	nnz = descr[0].csr.nnz;
-	nrow = descr[0].csr.nrow;
+	nnz = GET_CSR_NNZ(descr[0]);
+	nrow = GET_CSR_NROW(descr[0]);
 
 	unsigned row;
 	for (row = 0; row < nrow; row++)
@@ -207,7 +207,7 @@ void core_codelet_func_4(starpu_data_interface_t *descr, __attribute__((unused))
  *		args = &alpha, &delta_new
  */
 
-void core_codelet_func_5(starpu_data_interface_t *descr, void *arg)
+void core_codelet_func_5(void *descr[], void *arg)
 {
 	float dot;
 	struct cg_problem *pb = arg;
@@ -215,11 +215,11 @@ void core_codelet_func_5(starpu_data_interface_t *descr, void *arg)
 	uint32_t size;
 	
 	/* get the vector */
-	vecd = (float *)descr[0].vector.ptr;
-	vecq = (float *)descr[1].vector.ptr;
+	vecd = (float *)GET_VECTOR_PTR(descr[0]);
+	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
-	STARPU_ASSERT(descr[1].vector.nx == descr[0].vector.nx);
-	size = descr[0].vector.nx;
+	STARPU_ASSERT(GET_VECTOR_NX(descr[0]) == GET_VECTOR_NX(descr[1]));
+	size = GET_VECTOR_NX(descr[0]);
 
 	dot = SDOT(size, vecd, 1, vecq, 1);
 
@@ -227,7 +227,7 @@ void core_codelet_func_5(starpu_data_interface_t *descr, void *arg)
 }
 
 #ifdef USE_CUDA
-void cublas_codelet_func_5(starpu_data_interface_t *descr, void *arg)
+void cublas_codelet_func_5(void *descr[], void *arg)
 {
 	float dot;
 	struct cg_problem *pb = arg;
@@ -235,11 +235,11 @@ void cublas_codelet_func_5(starpu_data_interface_t *descr, void *arg)
 	uint32_t size;
 	
 	/* get the vector */
-	vecd = (float *)descr[0].vector.ptr;
-	vecq = (float *)descr[1].vector.ptr;
+	vecd = (float *)GET_VECTOR_PTR(descr[0]);
+	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
-	STARPU_ASSERT(descr[1].vector.nx == descr[0].vector.nx);
-	size = descr[0].vector.nx;
+	STARPU_ASSERT(GET_VECTOR_NX(descr[0]) == GET_VECTOR_NX(descr[1]));
+	size = GET_VECTOR_NX(descr[0]);
 
 	dot = cublasSdot (size, vecd, 1, vecq, 1);
 
@@ -256,33 +256,33 @@ void cublas_codelet_func_5(starpu_data_interface_t *descr, void *arg)
  *		args = &alpha
  */
 
-void core_codelet_func_6(starpu_data_interface_t *descr, void *arg)
+void core_codelet_func_6(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecx, *vecd;
 	uint32_t size;
 	
 	/* get the vector */
-	vecx = (float *)descr[0].vector.ptr;
-	vecd = (float *)descr[1].vector.ptr;
+	vecx = (float *)GET_VECTOR_PTR(descr[0]);
+	vecd = (float *)GET_VECTOR_PTR(descr[1]);
 
-	size = descr[0].vector.nx;
+	size = GET_VECTOR_NX(descr[0]);
 
 	SAXPY(size, pb->alpha, vecd, 1, vecx, 1);
 }
 
 #ifdef USE_CUDA
-void cublas_codelet_func_6(starpu_data_interface_t *descr, void *arg)
+void cublas_codelet_func_6(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecx, *vecd;
 	uint32_t size;
 	
 	/* get the vector */
-	vecx = (float *)descr[0].vector.ptr;
-	vecd = (float *)descr[1].vector.ptr;
+	vecx = (float *)GET_VECTOR_PTR(descr[0]);
+	vecd = (float *)GET_VECTOR_PTR(descr[1]);
 
-	size = descr[0].vector.nx;
+	size = GET_VECTOR_NX(descr[0]);
 
 	cublasSaxpy (size, pb->alpha, vecd, 1, vecx, 1);
 }
@@ -295,33 +295,33 @@ void cublas_codelet_func_6(starpu_data_interface_t *descr, void *arg)
  *		args = &alpha
  */
 
-void core_codelet_func_7(starpu_data_interface_t *descr, void *arg)
+void core_codelet_func_7(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecr, *vecq;
 	uint32_t size;
 	
 	/* get the vector */
-	vecr = (float *)descr[0].vector.ptr;
-	vecq = (float *)descr[1].vector.ptr;
+	vecr = (float *)GET_VECTOR_PTR(descr[0]);
+	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
-	size = descr[0].vector.nx;
+	size = GET_VECTOR_NX(descr[0]);
 
 	SAXPY(size, -pb->alpha, vecq, 1, vecr, 1);
 }
 
 #ifdef USE_CUDA
-void cublas_codelet_func_7(starpu_data_interface_t *descr, void *arg)
+void cublas_codelet_func_7(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecr, *vecq;
 	uint32_t size;
 	
 	/* get the vector */
-	vecr = (float *)descr[0].vector.ptr;
-	vecq = (float *)descr[1].vector.ptr;
+	vecr = (float *)GET_VECTOR_PTR(descr[0]);
+	vecq = (float *)GET_VECTOR_PTR(descr[1]);
 
-	size = descr[0].vector.nx;
+	size = GET_VECTOR_NX(descr[0]);
 
 	cublasSaxpy (size, -pb->alpha, vecq, 1, vecr, 1);
 }
@@ -336,7 +336,7 @@ void cublas_codelet_func_7(starpu_data_interface_t *descr, void *arg)
  *		args = &delta_old, &delta_new, &beta
  */
 
-void core_codelet_func_8(starpu_data_interface_t *descr, void *arg)
+void core_codelet_func_8(void *descr[], void *arg)
 {
 	float dot;
 	struct cg_problem *pb = arg;
@@ -344,8 +344,8 @@ void core_codelet_func_8(starpu_data_interface_t *descr, void *arg)
 	uint32_t size;
 	
 	/* get the vector */
-	vecr = (float *)descr[0].vector.ptr;
-	size = descr[0].vector.nx;
+	vecr = (float *)GET_VECTOR_PTR(descr[0]);
+	size = GET_VECTOR_NX(descr[0]);
 
 	dot = SDOT(size, vecr, 1, vecr, 1);
 
@@ -355,7 +355,7 @@ void core_codelet_func_8(starpu_data_interface_t *descr, void *arg)
 }
 
 #ifdef USE_CUDA
-void cublas_codelet_func_8(starpu_data_interface_t *descr, void *arg)
+void cublas_codelet_func_8(void *descr[], void *arg)
 {
 	float dot;
 	struct cg_problem *pb = arg;
@@ -363,8 +363,8 @@ void cublas_codelet_func_8(starpu_data_interface_t *descr, void *arg)
 	uint32_t size;
 	
 	/* get the vector */
-	vecr = (float *)descr[0].vector.ptr;
-	size = descr[0].vector.nx;
+	vecr = (float *)GET_VECTOR_PTR(descr[0]);
+	size = GET_VECTOR_NX(descr[0]);
 
 	dot = cublasSdot (size, vecr, 1, vecr, 1);
 
@@ -382,17 +382,17 @@ void cublas_codelet_func_8(starpu_data_interface_t *descr, void *arg)
  *
  */
 
-void core_codelet_func_9(starpu_data_interface_t *descr, void *arg)
+void core_codelet_func_9(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecd, *vecr;
 	uint32_t size;
 	
 	/* get the vector */
-	vecd = (float *)descr[0].vector.ptr;
-	vecr = (float *)descr[1].vector.ptr;
+	vecd = (float *)GET_VECTOR_PTR(descr[0]);
+	vecr = (float *)GET_VECTOR_PTR(descr[1]);
 
-	size = descr[0].vector.nx;
+	size = GET_VECTOR_NX(descr[0]);
 
 	/* d = beta d */
 	SSCAL(size, pb->beta, vecd, 1);
@@ -402,17 +402,17 @@ void core_codelet_func_9(starpu_data_interface_t *descr, void *arg)
 }
 
 #ifdef USE_CUDA
-void cublas_codelet_func_9(starpu_data_interface_t *descr, void *arg)
+void cublas_codelet_func_9(void *descr[], void *arg)
 {
 	struct cg_problem *pb = arg;
 	float *vecd, *vecr;
 	uint32_t size;
 	
 	/* get the vector */
-	vecd = (float *)descr[0].vector.ptr;
-	vecr = (float *)descr[1].vector.ptr;
+	vecd = (float *)GET_VECTOR_PTR(descr[0]);
+	vecr = (float *)GET_VECTOR_PTR(descr[1]);
 
-	size = descr[0].vector.nx;
+	size = GET_VECTOR_NX(descr[0]);
 
 	/* d = beta d */
 	cublasSscal(size, pb->beta, vecd, 1);

+ 3 - 3
examples/incrementer/incrementer.c

@@ -20,15 +20,15 @@
 #define NITER	50000
 
 #ifdef USE_CUDA
-extern void cuda_codelet(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args);
+extern void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
 
 extern void cuda_codelet_host(float *tab);
 
-void core_codelet(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+void core_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	float *val = (float *)buffers[0].vector.ptr;
+	float *val = (float *)GET_VECTOR_PTR(descr[0]);
 
 	val[0] += 1.0f; val[1] += 1.0f;
 }

+ 2 - 2
examples/incrementer/incrementer_kernels.cu

@@ -24,9 +24,9 @@ static __global__ void cuda_incrementer(float * tab)
 	return;
 }
 
-extern "C" void cuda_codelet(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+extern "C" void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	float *val = (float *)buffers[0].vector.ptr;
+	float *val = (float *)GET_VECTOR_PTR(descr[0]);
 
 	cuda_incrementer<<<1,1>>>(val);
 }

+ 8 - 8
examples/lu/xlu.h

@@ -74,16 +74,16 @@ static void __attribute__ ((unused)) compare_A_LU(float *A, float *LU,
 }
 #endif // CHECK_RESULTS
 
-void dw_core_codelet_update_u11(starpu_data_interface_t *, void *);
-void dw_core_codelet_update_u12(starpu_data_interface_t *, void *);
-void dw_core_codelet_update_u21(starpu_data_interface_t *, void *);
-void dw_core_codelet_update_u22(starpu_data_interface_t *, void *);
+void dw_core_codelet_update_u11(void **, void *);
+void dw_core_codelet_update_u12(void **, void *);
+void dw_core_codelet_update_u21(void **, void *);
+void dw_core_codelet_update_u22(void **, void *);
 
 #ifdef USE_CUDA
-void dw_cublas_codelet_update_u11(starpu_data_interface_t *descr, void *_args);
-void dw_cublas_codelet_update_u12(starpu_data_interface_t *descr, void *_args);
-void dw_cublas_codelet_update_u21(starpu_data_interface_t *descr, void *_args);
-void dw_cublas_codelet_update_u22(starpu_data_interface_t *descr, void *_args);
+void dw_cublas_codelet_update_u11(void *descr[], void *_args);
+void dw_cublas_codelet_update_u12(void *descr[], void *_args);
+void dw_cublas_codelet_update_u21(void *descr[], void *_args);
+void dw_cublas_codelet_update_u22(void *descr[], void *_args);
 #endif
 
 void dw_callback_codelet_update_u11(void *);

+ 48 - 48
examples/lu/xlu_kernels.c

@@ -21,20 +21,20 @@
  *   U22 
  */
 
-static inline void STARPU_LU(common_u22)(starpu_data_interface_t *buffers,
+static inline void STARPU_LU(common_u22)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
-	TYPE *right 	= (TYPE *)buffers[0].blas.ptr;
-	TYPE *left 	= (TYPE *)buffers[1].blas.ptr;
-	TYPE *center 	= (TYPE *)buffers[2].blas.ptr;
+	TYPE *right 	= (TYPE *)GET_BLAS_PTR(descr[0]);
+	TYPE *left 	= (TYPE *)GET_BLAS_PTR(descr[1]);
+	TYPE *center 	= (TYPE *)GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = buffers[2].blas.nx;
-	unsigned dy = buffers[2].blas.ny;
-	unsigned dz = buffers[0].blas.ny;
+	unsigned dx = GET_BLAS_NX(descr[2]);
+	unsigned dy = GET_BLAS_NY(descr[2]);
+	unsigned dz = GET_BLAS_NY(descr[0]);
 
-	unsigned ld12 = buffers[0].blas.ld;
-	unsigned ld21 = buffers[1].blas.ld;
-	unsigned ld22 = buffers[2].blas.ld;
+	unsigned ld12 = GET_BLAS_LD(descr[0]);
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
+	unsigned ld22 = GET_BLAS_LD(descr[2]);
 
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -69,13 +69,13 @@ static inline void STARPU_LU(common_u22)(starpu_data_interface_t *buffers,
 	}
 }
 
-void STARPU_LU(cpu_u22)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cpu_u22)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u22)(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_u22)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cublas_u22)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u22)(descr, 1, _args);
 }
@@ -85,20 +85,20 @@ void STARPU_LU(cublas_u22)(starpu_data_interface_t *descr, void *_args)
  * U12
  */
 
-static inline void STARPU_LU(common_u12)(starpu_data_interface_t *buffers,
+static inline void STARPU_LU(common_u12)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
 	TYPE *sub11;
 	TYPE *sub12;
 
-	sub11 = (TYPE *)buffers[0].blas.ptr;	
-	sub12 = (TYPE *)buffers[1].blas.ptr;
+	sub11 = (TYPE *)GET_BLAS_PTR(descr[0]);	
+	sub12 = (TYPE *)GET_BLAS_PTR(descr[1]);
 
-	unsigned ld11 = buffers[0].blas.ld;
-	unsigned ld12 = buffers[1].blas.ld;
+	unsigned ld11 = GET_BLAS_LD(descr[0]);
+	unsigned ld12 = GET_BLAS_LD(descr[1]);
 
-	unsigned nx12 = buffers[1].blas.nx;
-	unsigned ny12 = buffers[1].blas.ny;
+	unsigned nx12 = GET_BLAS_NX(descr[1]);
+	unsigned ny12 = GET_BLAS_NY(descr[1]);
 
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -131,13 +131,13 @@ static inline void STARPU_LU(common_u12)(starpu_data_interface_t *buffers,
 	}
 }
 
-void STARPU_LU(cpu_u12)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cpu_u12)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u12)(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_u12)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cublas_u12)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u12)(descr, 1, _args);
 }
@@ -147,20 +147,20 @@ void STARPU_LU(cublas_u12)(starpu_data_interface_t *descr, void *_args)
  * U21
  */
 
-static inline void STARPU_LU(common_u21)(starpu_data_interface_t *buffers,
+static inline void STARPU_LU(common_u21)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
 	TYPE *sub11;
 	TYPE *sub21;
 
-	sub11 = (TYPE *)buffers[0].blas.ptr;
-	sub21 = (TYPE *)buffers[1].blas.ptr;
+	sub11 = (TYPE *)GET_BLAS_PTR(descr[0]);
+	sub21 = (TYPE *)GET_BLAS_PTR(descr[1]);
 
-	unsigned ld11 = buffers[0].blas.ld;
-	unsigned ld21 = buffers[1].blas.ld;
+	unsigned ld11 = GET_BLAS_LD(descr[0]);
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
 
-	unsigned nx21 = buffers[1].blas.nx;
-	unsigned ny21 = buffers[1].blas.ny;
+	unsigned nx21 = GET_BLAS_NX(descr[1]);
+	unsigned ny21 = GET_BLAS_NY(descr[1]);
 	
 #ifdef USE_CUDA
 	cublasStatus status;
@@ -191,13 +191,13 @@ static inline void STARPU_LU(common_u21)(starpu_data_interface_t *buffers,
 	}
 }
 
-void STARPU_LU(cpu_u21)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cpu_u21)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u21)(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_u21)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cublas_u21)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u21)(descr, 1, _args);
 }
@@ -207,15 +207,15 @@ void STARPU_LU(cublas_u21)(starpu_data_interface_t *descr, void *_args)
  *	U11
  */
 
-static inline void STARPU_LU(common_u11)(starpu_data_interface_t *descr,
+static inline void STARPU_LU(common_u11)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)descr[0].blas.ptr; 
+	sub11 = (TYPE *)GET_BLAS_PTR(descr[0]); 
 
-	unsigned long nx = descr[0].blas.nx;
-	unsigned long ld = descr[0].blas.ld;
+	unsigned long nx = GET_BLAS_NX(descr[0]);
+	unsigned long ld = GET_BLAS_LD(descr[0]);
 
 	unsigned long z;
 
@@ -263,13 +263,13 @@ static inline void STARPU_LU(common_u11)(starpu_data_interface_t *descr,
 	}
 }
 
-void STARPU_LU(cpu_u11)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cpu_u11)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u11)(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_u11)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cublas_u11)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u11)(descr, 1, _args);
 }
@@ -279,15 +279,15 @@ void STARPU_LU(cublas_u11)(starpu_data_interface_t *descr, void *_args)
  *	U11 with pivoting
  */
 
-static inline void STARPU_LU(common_u11_pivot)(starpu_data_interface_t *descr,
+static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 				int s, void *_args)
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)descr[0].blas.ptr; 
+	sub11 = (TYPE *)GET_BLAS_PTR(descr[0]); 
 
-	unsigned long nx = descr[0].blas.nx;
-	unsigned long ld = descr[0].blas.ld;
+	unsigned long nx = GET_BLAS_NX(descr[0]);
+	unsigned long ld = GET_BLAS_LD(descr[0]);
 
 	unsigned long z;
 
@@ -378,13 +378,13 @@ static inline void STARPU_LU(common_u11_pivot)(starpu_data_interface_t *descr,
 	}
 }
 
-void STARPU_LU(cpu_u11_pivot)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cpu_u11_pivot)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u11_pivot)(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_u11_pivot)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cublas_u11_pivot)(void *descr[], void *_args)
 {
 	STARPU_LU(common_u11_pivot)(descr, 1, _args);
 }
@@ -394,14 +394,14 @@ void STARPU_LU(cublas_u11_pivot)(starpu_data_interface_t *descr, void *_args)
  *	Pivoting
  */
 
-static inline void STARPU_LU(common_pivot)(starpu_data_interface_t *descr,
+static inline void STARPU_LU(common_pivot)(void *descr[],
 				int s, void *_args)
 {
 	TYPE *matrix;
 
-	matrix = (TYPE *)descr[0].blas.ptr; 
-	unsigned long nx = descr[0].blas.nx;
-	unsigned long ld = descr[0].blas.ld;
+	matrix = (TYPE *)GET_BLAS_PTR(descr[0]); 
+	unsigned long nx = GET_BLAS_NX(descr[0]);
+	unsigned long ld = GET_BLAS_LD(descr[0]);
 
 	unsigned row, rowaux;
 
@@ -442,13 +442,13 @@ static inline void STARPU_LU(common_pivot)(starpu_data_interface_t *descr,
 	}
 }
 
-void STARPU_LU(cpu_pivot)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cpu_pivot)(void *descr[], void *_args)
 {
 	STARPU_LU(common_pivot)(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_pivot)(starpu_data_interface_t *descr, void *_args)
+void STARPU_LU(cublas_pivot)(void *descr[], void *_args)
 {
 	STARPU_LU(common_pivot)(descr, 1, _args);
 }

+ 12 - 12
examples/lu/xlu_kernels.h

@@ -23,20 +23,20 @@
 #define xstr(s)        str(s)
 #define STARPU_LU_STR(name)  xstr(STARPU_LU(name))
 
-void STARPU_LU(cpu_pivot)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cpu_u11_pivot)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cpu_u11)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cpu_u12)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cpu_u21)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cpu_u22)(starpu_data_interface_t *descr, void *_args);
+void STARPU_LU(cpu_pivot)(void *descr[], void *_args);
+void STARPU_LU(cpu_u11_pivot)(void *descr[], void *_args);
+void STARPU_LU(cpu_u11)(void *descr[], void *_args);
+void STARPU_LU(cpu_u12)(void *descr[], void *_args);
+void STARPU_LU(cpu_u21)(void *descr[], void *_args);
+void STARPU_LU(cpu_u22)(void *descr[], void *_args);
 
 #ifdef USE_CUDA
-void STARPU_LU(cublas_pivot)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cublas_u11_pivot)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cublas_u11)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cublas_u12)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cublas_u21)(starpu_data_interface_t *descr, void *_args);
-void STARPU_LU(cublas_u22)(starpu_data_interface_t *descr, void *_args);
+void STARPU_LU(cublas_pivot)(void *descr[], void *_args);
+void STARPU_LU(cublas_u11_pivot)(void *descr[], void *_args);
+void STARPU_LU(cublas_u11)(void *descr[], void *_args);
+void STARPU_LU(cublas_u12)(void *descr[], void *_args);
+void STARPU_LU(cublas_u21)(void *descr[], void *_args);
+void STARPU_LU(cublas_u22)(void *descr[], void *_args);
 #endif
 
 #endif // __XLU_KERNELS_H__

+ 2 - 2
examples/mult/dw_mult.h

@@ -194,9 +194,9 @@ static void display_memory_consumption(void)
 }
 
 #ifdef USE_CUDA
-void cublas_mult(starpu_data_interface_t *descr, __attribute__((unused)) void *arg);
+void cublas_mult(void *descr[], __attribute__((unused)) void *arg);
 #endif
 
-void core_mult(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
+void core_mult(void *descr[], __attribute__((unused))  void *arg);
 
 #endif // __MULT_H__

+ 11 - 11
examples/mult/sgemm_kernels.c

@@ -25,22 +25,22 @@
 	float *subB;			\
 	float *subC;			\
 					\
-	subA = (float *)descr[0].blas.ptr;	\
-	subB = (float *)descr[1].blas.ptr;	\
-	subC = (float *)descr[2].blas.ptr;	\
+	subA = (float *)GET_BLAS_PTR(descr[0]);	\
+	subB = (float *)GET_BLAS_PTR(descr[1]);	\
+	subC = (float *)GET_BLAS_PTR(descr[2]);	\
 					\
-	nxC = descr[2].blas.nx;		\
-	nyC = descr[2].blas.ny;		\
-	nyA = descr[0].blas.ny;		\
+	nxC = GET_BLAS_NX(descr[2]);		\
+	nyC = GET_BLAS_NY(descr[2]);		\
+	nyA = GET_BLAS_NY(descr[0]);		\
 					\
-	ldA = descr[0].blas.ld;		\
-	ldB = descr[1].blas.ld;		\
-	ldC = descr[2].blas.ld;
+	ldA = GET_BLAS_LD(descr[0]);		\
+	ldB = GET_BLAS_LD(descr[1]);		\
+	ldC = GET_BLAS_LD(descr[2]);
 
 
 
 #ifdef USE_CUDA
-void cublas_mult(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void cublas_mult(void *descr[], __attribute__((unused)) void *arg)
 {
 	COMMON_CODE
 
@@ -59,7 +59,7 @@ void cublas_mult(starpu_data_interface_t *descr, __attribute__((unused)) void *a
 }
 #endif
 
-void core_mult(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void core_mult(void *descr[], __attribute__((unused))  void *arg)
 {
 	COMMON_CODE
 

+ 11 - 11
examples/mult/xgemm_kernels.c

@@ -25,22 +25,22 @@
 	TYPE *subB;			\
 	TYPE *subC;			\
 					\
-	subA = (TYPE *)descr[0].blas.ptr;	\
-	subB = (TYPE *)descr[1].blas.ptr;	\
-	subC = (TYPE *)descr[2].blas.ptr;	\
+	subA = (TYPE *)GET_BLAS_PTR(descr[0]);	\
+	subB = (TYPE *)GET_BLAS_PTR(descr[1]);	\
+	subC = (TYPE *)GET_BLAS_PTR(descr[2]);	\
 					\
-	nxC = descr[2].blas.nx;		\
-	nyC = descr[2].blas.ny;		\
-	nyA = descr[0].blas.ny;		\
+	nxC = GET_BLAS_NX(descr[2]);		\
+	nyC = GET_BLAS_NY(descr[2]);		\
+	nyA = GET_BLAS_NY(descr[0]);		\
 					\
-	ldA = descr[0].blas.ld;		\
-	ldB = descr[1].blas.ld;		\
-	ldC = descr[2].blas.ld;
+	ldA = GET_BLAS_LD(descr[0]);		\
+	ldB = GET_BLAS_LD(descr[1]);		\
+	ldC = GET_BLAS_LD(descr[2]);
 
 
 
 #ifdef USE_CUDA
-void STARPU_GEMM(cublas_mult)(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+void STARPU_GEMM(cublas_mult)(void *descr[], __attribute__((unused)) void *arg)
 {
 	COMMON_CODE
 
@@ -59,7 +59,7 @@ void STARPU_GEMM(cublas_mult)(starpu_data_interface_t *descr, __attribute__((unu
 }
 #endif
 
-void STARPU_GEMM(core_mult)(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void STARPU_GEMM(core_mult)(void *descr[], __attribute__((unused))  void *arg)
 {
 	COMMON_CODE
 

+ 45 - 45
examples/pastix-wrappers/starpu-blas-wrapper.c

@@ -195,7 +195,7 @@ void STARPU_MONITOR_CBLK(unsigned col, float *data, unsigned stride, unsigned wi
 static data_state work_block_1;
 static data_state work_block_2;
 
-void allocate_maxbloktab_on_cublas(starpu_data_interface_t *descr __attribute__((unused)), void *arg __attribute__((unused)))
+void allocate_maxbloktab_on_cublas(void *descr[] __attribute__((unused)), void *arg __attribute__((unused)))
 {
 	starpu_request_data_allocation(&work_block_1, 1);
 	starpu_request_data_allocation(&work_block_2, 1);
@@ -253,15 +253,15 @@ void STARPU_DECLARE_WORK_BLOCKS(float *maxbloktab1, float *maxbloktab2, unsigned
 
 }
 
-void _core_cblk_strsm(starpu_data_interface_t *descr, void *arg __attribute__((unused)))
+void _core_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
 {
 	uint32_t nx, ny, ld;
-	nx = descr[0].blas.nx;
-	ny = descr[0].blas.ny;
-	ld = descr[0].blas.ld;
+	nx = GET_BLAS_NX(descr[0]);
+	ny = GET_BLAS_NY(descr[0]);
+	ld = GET_BLAS_LD(descr[0]);
 
 	float *diag_cblkdata, *extra_cblkdata;
-	diag_cblkdata = (float *)descr[0].blas.ptr;
+	diag_cblkdata = (float *)GET_BLAS_PTR(descr[0]);
 	extra_cblkdata = diag_cblkdata + ny;
 
 	unsigned m = nx - ny;
@@ -275,15 +275,15 @@ void _core_cblk_strsm(starpu_data_interface_t *descr, void *arg __attribute__((u
 }
 
 
-void _cublas_cblk_strsm(starpu_data_interface_t *descr, void *arg __attribute__((unused)))
+void _cublas_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
 {
 	uint32_t nx, ny, ld;
-	nx = descr[0].blas.nx;
-	ny = descr[0].blas.ny;
-	ld = descr[0].blas.ld;
+	nx = GET_BLAS_NX(descr[0]);
+	ny = GET_BLAS_NY(descr[0]);
+	ld = GET_BLAS_LD(descr[0]);
 
 	float *diag_cblkdata, *extra_cblkdata;
-	diag_cblkdata = (float *)descr[0].blas.ptr;
+	diag_cblkdata = (float *)GET_BLAS_PTR(descr[0]);
 	extra_cblkdata = diag_cblkdata + ny;
 
 	unsigned m = nx - ny;
@@ -354,15 +354,15 @@ struct starpu_compute_contrib_compact_args {
 };
 
 
-void _core_compute_contrib_compact(starpu_data_interface_t *descr, void *arg)
+void _core_compute_contrib_compact(void *descr[], void *arg)
 {
 	struct starpu_compute_contrib_compact_args *args = arg;
 
-	float *gaik = (float *)descr[0].blas.ptr + args->dima;
-	float *gb = (float *)descr[1].blas.ptr; 
-	unsigned strideb = (unsigned)descr[1].blas.ld;
-	float *gc = (float *)descr[2].blas.ptr;
-	unsigned stridec = (unsigned)descr[2].blas.ld;
+	float *gaik = (float *)GET_BLAS_PTR(descr[0]) + args->dima;
+	float *gb = (float *)GET_BLAS_PTR(descr[1]); 
+	unsigned strideb = (unsigned)GET_BLAS_LD(descr[1]);
+	float *gc = (float *)GET_BLAS_PTR(descr[2]);
+	unsigned stridec = (unsigned)GET_BLAS_LD(descr[2]);
 
 	core_sgemm++;
 
@@ -375,15 +375,15 @@ void _core_compute_contrib_compact(starpu_data_interface_t *descr, void *arg)
 }
 
 
-void _cublas_compute_contrib_compact(starpu_data_interface_t *descr, void *arg)
+void _cublas_compute_contrib_compact(void *descr[], void *arg)
 {
 	struct starpu_compute_contrib_compact_args *args = arg;
 
-	float *gaik = (float *)descr[0].blas.ptr + args->dima;
-	float *gb = (float *)descr[1].blas.ptr;
-	unsigned strideb = (unsigned)descr[1].blas.ld;
-	float *gc = (float *)descr[2].blas.ptr;
-	unsigned stridec = (unsigned)descr[2].blas.ld;
+	float *gaik = (float *)GET_BLAS_PTR(descr[0]) + args->dima;
+	float *gb = (float *)GET_BLAS_PTR(descr[1]);
+	unsigned strideb = (unsigned)GET_BLAS_LD(descr[1]);
+	float *gc = (float *)GET_BLAS_PTR(descr[2]);
+	unsigned stridec = (unsigned)GET_BLAS_LD(descr[2]);
 	
 	cublas_sgemm++;
 
@@ -508,27 +508,27 @@ struct sgemm_args {
 };
 
 
-void _cublas_sgemm(starpu_data_interface_t *descr, void *arg)
+void _cublas_sgemm(void *descr[], void *arg)
 {
 	float *A, *B, *C;
 	uint32_t nxA, nyA, ldA;
 	uint32_t nxB, nyB, ldB;
 	uint32_t nxC, nyC, ldC;
 
-	A = (float *)descr[0].blas.ptr;
-	nxA = descr[0].blas.nx;
-	nyA = descr[0].blas.ny;
-	ldA = descr[0].blas.ld;
+	A = (float *)GET_BLAS_PTR(descr[0]);
+	nxA = GET_BLAS_NX(descr[0]);
+	nyA = GET_BLAS_NY(descr[0]);
+	ldA = GET_BLAS_LD(descr[0]);
 
-	B = (float *)descr[1].blas.ptr;
-	nxB = descr[1].blas.nx;
-	nyB = descr[1].blas.ny;
-	ldB = descr[1].blas.ld;
+	B = (float *)GET_BLAS_PTR(descr[1]);
+	nxB = GET_BLAS_NX(descr[1]);
+	nyB = GET_BLAS_NY(descr[1]);
+	ldB = GET_BLAS_LD(descr[1]);
 
-	C = (float *)descr[2].blas.ptr;
-	nxC = descr[2].blas.nx;
-	nyC = descr[2].blas.ny;
-	ldC = descr[2].blas.ld;
+	C = (float *)GET_BLAS_PTR(descr[2]);
+	nxC = GET_BLAS_NX(descr[2]);
+	nyC = GET_BLAS_NY(descr[2]);
+	ldC = GET_BLAS_LD(descr[2]);
 
 	struct sgemm_args *args = arg;
 
@@ -656,21 +656,21 @@ struct strsm_args {
 	int m,n;
 };
 //
-//void _core_strsm(starpu_data_interface_t *descr, void *arg)
+//void _core_strsm(void *descr[], void *arg)
 //{
 //	float *A, *B;
 //	uint32_t nxA, nyA, ldA;
 //	uint32_t nxB, nyB, ldB;
 //
-//	A = (float *)descr[0].blas.ptr;
-//	nxA = descr[0].blas.nx;
-//	nyA = descr[0].blas.ny;
-//	ldA = descr[0].blas.ld;
+//	A = (float *)GET_BLAS_PTR(descr[0]);
+//	nxA = GET_BLAS_NX(descr[0]);
+//	nyA = GET_BLAS_NY(descr[0]);
+//	ldA = GET_BLAS_LD(descr[0]);
 //
-//	B = (float *)descr[1].blas.ptr;
-//	nxB = descr[1].blas.nx;
-//	nyB = descr[1].blas.ny;
-//	ldB = descr[1].blas.ld;
+//	B = (float *)GET_BLAS_PTR(descr[1]);
+//	nxB = GET_BLAS_NX(descr[1]);
+//	nyB = GET_BLAS_NY(descr[1]);
+//	ldB = GET_BLAS_LD(descr[1]);
 //
 //	struct strsm_args *args = arg;
 //

+ 7 - 7
examples/ppm-downscaler/yuv-downscaler.c

@@ -66,16 +66,16 @@ static void ds_callback(void *arg)
 	}
 }
 
-static void ds_kernel_cpu(starpu_data_interface_t *descr, __attribute__((unused)) void *arg)
+static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 {
-	uint8_t *input = (uint8_t *)descr[0].blas.ptr;
-	unsigned input_ld = descr[0].blas.ld;
+	uint8_t *input = (uint8_t *)GET_BLAS_PTR(descr[0]);
+	unsigned input_ld = GET_BLAS_LD(descr[0]);
 
-	uint8_t *output = (uint8_t *)descr[1].blas.ptr;
-	unsigned output_ld = descr[1].blas.ld;
+	uint8_t *output = (uint8_t *)GET_BLAS_PTR(descr[1]);
+	unsigned output_ld = GET_BLAS_LD(descr[1]);
 
-	unsigned ncols = descr[0].blas.nx;
-	unsigned nlines = descr[0].blas.ny;
+	unsigned ncols = GET_BLAS_NX(descr[0]);
+	unsigned nlines = GET_BLAS_NY(descr[0]);
 
 	unsigned line, col;
 	for (line = 0; line < nlines; line+=FACTOR)

+ 2 - 2
examples/spmv/dw_block_spmv.h

@@ -32,10 +32,10 @@
 #include <cublas.h>
 #endif
 
-void core_block_spmv(starpu_data_interface_t *descr, void *_args);
+void core_block_spmv(void *descr[], void *_args);
 
 #ifdef USE_CUDA
-void cublas_block_spmv(starpu_data_interface_t *descr, void *_args);
+void cublas_block_spmv(void *descr[], void *_args);
 #endif // USE_CUDA
 
 #endif // __DW_BLOCK_SPMV_H__

+ 9 - 9
examples/spmv/dw_block_spmv_kernels.c

@@ -20,17 +20,17 @@
  *   U22 
  */
 
-static inline void common_block_spmv(starpu_data_interface_t *buffers, int s, __attribute__((unused)) void *_args)
+static inline void common_block_spmv(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
-	float *block 	= (float *)buffers[0].blas.ptr;
-	float *in 	= (float *)buffers[1].vector.ptr;
-	float *out 	= (float *)buffers[2].vector.ptr;
+	float *block 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *in 	= (float *)GET_VECTOR_PTR(descr[1]);
+	float *out 	= (float *)GET_VECTOR_PTR(descr[2]);
 
-	unsigned dx = buffers[0].blas.nx;
-	unsigned dy = buffers[0].blas.ny;
+	unsigned dx = GET_BLAS_NX(descr[0]);
+	unsigned dy = GET_BLAS_NY(descr[0]);
 
-	unsigned ld = buffers[0].blas.ld;
+	unsigned ld = GET_BLAS_LD(descr[0]);
 
 	switch (s) {
 		case 0:
@@ -47,7 +47,7 @@ static inline void common_block_spmv(starpu_data_interface_t *buffers, int s, __
 	}
 }
 
-void core_block_spmv(starpu_data_interface_t *descr, void *_args)
+void core_block_spmv(void *descr[], void *_args)
 {
 //	printf("CORE CODELET \n");
 
@@ -55,7 +55,7 @@ void core_block_spmv(starpu_data_interface_t *descr, void *_args)
 }
 
 #ifdef USE_CUDA
-void cublas_block_spmv(starpu_data_interface_t *descr, void *_args)
+void cublas_block_spmv(void *descr[], void *_args)
 {
 //	printf("CUBLAS CODELET \n");
 

+ 22 - 22
examples/spmv/dw_spmv.c

@@ -33,20 +33,20 @@ extern void spmv_kernel_cpu_wrapper(uint32_t nnz, uint32_t nrow, float *nzval,
 			float *vecin, uint32_t nx_in,
 			float * vecout, uint32_t nx_out);
 
-void spmv_kernel_cuda(starpu_data_interface_t *buffers, void *args)
+void spmv_kernel_cuda(void *descr[], void *args)
 {
-	uint32_t nnz = buffers[0].csr.nnz;
-	uint32_t nrow = buffers[0].csr.nrow;
-	float *nzval = (float *)buffers[0].csr.nzval;
-	uint32_t *colind = buffers[0].csr.colind;
-	uint32_t *rowptr = buffers[0].csr.rowptr;
-	uint32_t firstentry = buffers[0].csr.firstentry;
+	uint32_t nnz = GET_CSR_NNZ(descr[0]);
+	uint32_t nrow = GET_CSR_NROW(descr[0]);
+	float *nzval = (float *)GET_CSR_NZVAL(descr[0]);
+	uint32_t *colind = GET_CSR_COLIND(descr[0]);
+	uint32_t *rowptr = GET_CSR_ROWPTR(descr[0]);
+	uint32_t firstentry = GET_CSR_FIRSTENTRY(descr[0]);
 
-	float *vecin = (float *)buffers[1].vector.ptr;
-	uint32_t nx_in = buffers[1].vector.nx;
+	float *vecin = (float *)GET_VECTOR_PTR(descr[1]);
+	uint32_t nx_in = GET_VECTOR_NX(descr[1]);
 
-	float *vecout = (float *)buffers[2].vector.ptr;
-	uint32_t nx_out = buffers[2].vector.nx;
+	float *vecout = (float *)GET_VECTOR_PTR(descr[2]);
+	uint32_t nx_out = GET_VECTOR_NX(descr[2]);
 
 	spmv_kernel_cpu_wrapper(nnz, nrow, nzval, colind, rowptr, firstentry, vecin, nx_in, vecout, nx_out);
 }
@@ -91,25 +91,25 @@ void parse_args(int argc, char **argv)
 	}
 }
 
-void core_spmv(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void core_spmv(void *descr[], __attribute__((unused))  void *arg)
 {
-	float *nzval = (float *)descr[0].csr.nzval;
-	uint32_t *colind = descr[0].csr.colind;
-	uint32_t *rowptr = descr[0].csr.rowptr;
+	float *nzval = (float *)GET_CSR_NZVAL(descr[0]);
+	uint32_t *colind = GET_CSR_COLIND(descr[0]);
+	uint32_t *rowptr = GET_CSR_ROWPTR(descr[0]);
 
-	float *vecin = (float *)descr[1].vector.ptr;
-	float *vecout = (float *)descr[2].vector.ptr;
+	float *vecin = (float *)GET_VECTOR_PTR(descr[1]);
+	float *vecout = (float *)GET_VECTOR_PTR(descr[2]);
 
-	uint32_t firstelem = descr[0].csr.firstentry;
+	uint32_t firstelem = GET_CSR_FIRSTENTRY(descr[0]);
 
 	uint32_t nnz;
 	uint32_t nrow;
 
-	nnz = descr[0].csr.nnz;
-	nrow = descr[0].csr.nrow;
+	nnz = GET_CSR_NNZ(descr[0]);
+	nrow = GET_CSR_NROW(descr[0]);
 
-	//STARPU_ASSERT(nrow == descr[1].vector.nx);
-	STARPU_ASSERT(nrow == descr[2].vector.nx);
+	//STARPU_ASSERT(nrow == GET_VECTOR_NX(descr[1]));
+	STARPU_ASSERT(nrow == GET_VECTOR_NX(descr[2]));
 
 	unsigned row;
 	for (row = 0; row < nrow; row++)

+ 23 - 23
examples/starpufft/starpufftx1d.c

@@ -21,7 +21,7 @@
 #ifdef USE_CUDA
 /* Twist the full vector into a n2 chunk */
 static void
-STARPUFFT(twist1_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist1_1d_kernel_gpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -29,8 +29,8 @@ STARPUFFT(twist1_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 	int n1 = plan->n1[0];
 	int n2 = plan->n2[0];
 
-	_cufftComplex * restrict in = (_cufftComplex *)descr[0].vector.ptr;
-	_cufftComplex * restrict twisted1 = (_cufftComplex *)descr[1].vector.ptr;
+	_cufftComplex * restrict in = (_cufftComplex *)GET_VECTOR_PTR(descr[0]);
+	_cufftComplex * restrict twisted1 = (_cufftComplex *)GET_VECTOR_PTR(descr[1]);
 	
 	cudaStream_t stream = STARPUFFT(get_local_stream)(plan, starpu_get_worker_id());
 
@@ -41,7 +41,7 @@ STARPUFFT(twist1_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Perform an n2 fft */
 static void
-STARPUFFT(fft1_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft1_1d_kernel_gpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -49,9 +49,9 @@ STARPUFFT(fft1_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 	int n2 = plan->n2[0];
 	cufftResult cures;
 
-	_cufftComplex * restrict in = (_cufftComplex *)descr[0].vector.ptr;
-	_cufftComplex * restrict out = (_cufftComplex *)descr[1].vector.ptr;
-	const _cufftComplex * restrict roots = (_cufftComplex *)descr[2].vector.ptr;
+	_cufftComplex * restrict in = (_cufftComplex *)GET_VECTOR_PTR(descr[0]);
+	_cufftComplex * restrict out = (_cufftComplex *)GET_VECTOR_PTR(descr[1]);
+	const _cufftComplex * restrict roots = (_cufftComplex *)GET_VECTOR_PTR(descr[2]);
 
 	int workerid = starpu_get_worker_id();
 
@@ -78,7 +78,7 @@ STARPUFFT(fft1_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 }
 
 static void
-STARPUFFT(fft2_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft2_1d_kernel_gpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -87,8 +87,8 @@ STARPUFFT(fft2_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 	int n3 = n2/DIV_1D;
 	cufftResult cures;
 
-	_cufftComplex * restrict in = (_cufftComplex *)descr[0].vector.ptr;
-	_cufftComplex * restrict out = (_cufftComplex *)descr[1].vector.ptr;
+	_cufftComplex * restrict in = (_cufftComplex *)GET_VECTOR_PTR(descr[0]);
+	_cufftComplex * restrict out = (_cufftComplex *)GET_VECTOR_PTR(descr[1]);
 
 	int workerid = starpu_get_worker_id();
 
@@ -112,7 +112,7 @@ STARPUFFT(fft2_1d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Twist the full vector into a n2 chunk */
 static void
-STARPUFFT(twist1_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist1_1d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -121,8 +121,8 @@ STARPUFFT(twist1_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int n1 = plan->n1[0];
 	int n2 = plan->n2[0];
 
-	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)descr[0].vector.ptr;
-	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)descr[1].vector.ptr;
+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
+	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[1]);
 
 	//printf("twist1 %d %g\n", i, (double) cabs(plan->in[i]));
 
@@ -133,7 +133,7 @@ STARPUFFT(twist1_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 #ifdef HAVE_FFTW
 /* Perform an n2 fft */
 static void
-STARPUFFT(fft1_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft1_1d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -142,8 +142,8 @@ STARPUFFT(fft1_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int n2 = plan->n2[0];
 	int workerid = starpu_get_worker_id();
 
-	const STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)descr[0].vector.ptr;
-	STARPUFFT(complex) * restrict fft1 = (STARPUFFT(complex) *)descr[1].vector.ptr;
+	const STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
+	STARPUFFT(complex) * restrict fft1 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[1]);
 
 	_fftw_complex * restrict worker_in1 = (STARPUFFT(complex) *)plan->plans[workerid].in1;
 	_fftw_complex * restrict worker_out1 = (STARPUFFT(complex) *)plan->plans[workerid].out1;
@@ -160,7 +160,7 @@ STARPUFFT(fft1_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Twist the full vector into a package of n2/DIV_1D (n1) chunks */
 static void
-STARPUFFT(twist2_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist2_1d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -171,7 +171,7 @@ STARPUFFT(twist2_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int n2 = plan->n2[0];
 	int n3 = n2/DIV_1D;
 
-	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)descr[0].vector.ptr;
+	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
 
 	//printf("twist2 %d %g\n", jj, (double) cabs(plan->fft1[jj]));
 
@@ -185,15 +185,15 @@ STARPUFFT(twist2_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 #ifdef HAVE_FFTW
 /* Perform n2/DIV_1D (n1) ffts */
 static void
-STARPUFFT(fft2_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft2_1d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
 	//int jj = args->jj;
 	int workerid = starpu_get_worker_id();
 
-	const STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)descr[0].vector.ptr;
-	STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)descr[1].vector.ptr;
+	const STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
+	STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[1]);
 
 	//printf("fft2 %d %g\n", jj, (double) cabs(twisted2[plan->totsize4-1]));
 
@@ -209,7 +209,7 @@ STARPUFFT(fft2_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Spread the package of n2/DIV_1D (n1) chunks into the full vector */
 static void
-STARPUFFT(twist3_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist3_1d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -220,7 +220,7 @@ STARPUFFT(twist3_1d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int n2 = plan->n2[0];
 	int n3 = n2/DIV_1D;
 
-	const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)descr[0].vector.ptr;
+	const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
 
 	//printf("twist3 %d %g\n", jj, (double) cabs(fft2[0]));
 

+ 24 - 24
examples/starpufft/starpufftx2d.c

@@ -25,7 +25,7 @@
 #ifdef USE_CUDA
 /* Twist the full vector into a n2,m2 chunk */
 static void
-STARPUFFT(twist1_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist1_2d_kernel_gpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -36,8 +36,8 @@ STARPUFFT(twist1_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 	int m1 = plan->n1[1];
 	int m2 = plan->n2[1];
 
-	_cufftComplex * restrict in = (_cufftComplex *)descr[0].vector.ptr;
-	_cufftComplex * restrict twisted1 = (_cufftComplex *)descr[1].vector.ptr;
+	_cufftComplex * restrict in = (_cufftComplex *)GET_VECTOR_PTR(descr[0]);
+	_cufftComplex * restrict twisted1 = (_cufftComplex *)GET_VECTOR_PTR(descr[1]);
 
 	cudaStream_t stream = STARPUFFT(get_local_stream)(plan, starpu_get_worker_id());
 
@@ -47,7 +47,7 @@ STARPUFFT(twist1_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Perform an n2,m2 fft */
 static void
-STARPUFFT(fft1_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft1_2d_kernel_gpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -57,10 +57,10 @@ STARPUFFT(fft1_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 	int m2 = plan->n2[1];
 	cufftResult cures;
 
-	_cufftComplex * restrict in = (_cufftComplex *)descr[0].vector.ptr;
-	_cufftComplex * restrict out = (_cufftComplex *)descr[1].vector.ptr;
-	const _cufftComplex * restrict roots0 = (_cufftComplex *)descr[2].vector.ptr;
-	const _cufftComplex * restrict roots1 = (_cufftComplex *)descr[3].vector.ptr;
+	_cufftComplex * restrict in = (_cufftComplex *)GET_VECTOR_PTR(descr[0]);
+	_cufftComplex * restrict out = (_cufftComplex *)GET_VECTOR_PTR(descr[1]);
+	const _cufftComplex * restrict roots0 = (_cufftComplex *)GET_VECTOR_PTR(descr[2]);
+	const _cufftComplex * restrict roots1 = (_cufftComplex *)GET_VECTOR_PTR(descr[3]);
 
 	int workerid = starpu_get_worker_id();
 
@@ -88,7 +88,7 @@ STARPUFFT(fft1_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 }
 
 static void
-STARPUFFT(fft2_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft2_2d_kernel_gpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -101,8 +101,8 @@ STARPUFFT(fft2_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 	int n;
 	cufftResult cures;
 
-	_cufftComplex * restrict in = (_cufftComplex *)descr[0].vector.ptr;
-	_cufftComplex * restrict out = (_cufftComplex *)descr[1].vector.ptr;
+	_cufftComplex * restrict in = (_cufftComplex *)GET_VECTOR_PTR(descr[0]);
+	_cufftComplex * restrict out = (_cufftComplex *)GET_VECTOR_PTR(descr[1]);
 
 	int workerid = starpu_get_worker_id();
 
@@ -127,7 +127,7 @@ STARPUFFT(fft2_2d_kernel_gpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Twist the full vector into a n2,m2 chunk */
 static void
-STARPUFFT(twist1_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist1_2d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -140,8 +140,8 @@ STARPUFFT(twist1_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int m2 = plan->n2[1];
 	int m = plan->n[1];
 
-	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)descr[0].vector.ptr;
-	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)descr[1].vector.ptr;
+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
+	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[1]);
 
 	//printf("twist1 %d %d %g\n", i, j, (double) cabs(plan->in[i+j]));
 
@@ -153,7 +153,7 @@ STARPUFFT(twist1_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 #ifdef HAVE_FFTW
 /* Perform an n2,m2 fft */
 static void
-STARPUFFT(fft1_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft1_2d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -164,8 +164,8 @@ STARPUFFT(fft1_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int m2 = plan->n2[1];
 	int workerid = starpu_get_worker_id();
 
-	const STARPUFFT(complex) *twisted1 = (STARPUFFT(complex) *)descr[0].vector.ptr;
-	STARPUFFT(complex) *fft1 = (STARPUFFT(complex) *)descr[1].vector.ptr;
+	const STARPUFFT(complex) *twisted1 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
+	STARPUFFT(complex) *fft1 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[1]);
 
 	_fftw_complex * restrict worker_in1 = (STARPUFFT(complex) *)plan->plans[workerid].in1;
 	_fftw_complex * restrict worker_out1 = (STARPUFFT(complex) *)plan->plans[workerid].out1;
@@ -182,7 +182,7 @@ STARPUFFT(fft1_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Twist the full vector into a package of n2/DIV_2D_N,m2/DIV_2D_M (n1,m1) chunks */
 static void
-STARPUFFT(twist2_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist2_2d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -197,7 +197,7 @@ STARPUFFT(twist2_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int n3 = n2/DIV_2D_N;
 	int m3 = m2/DIV_2D_M;
 
-	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)descr[0].vector.ptr;
+	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
 
 	//printf("twist2 %d %d %g\n", kk, ll, (double) cabs(plan->fft1[kk+ll]));
 
@@ -215,7 +215,7 @@ STARPUFFT(twist2_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 #ifdef HAVE_FFTW
 /* Perform (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) ffts */
 static void
-STARPUFFT(fft2_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(fft2_2d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -223,8 +223,8 @@ STARPUFFT(fft2_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	//int ll = args->ll;
 	int workerid = starpu_get_worker_id();
 
-	const STARPUFFT(complex) *twisted2 = (STARPUFFT(complex) *)descr[0].vector.ptr;
-	STARPUFFT(complex) *fft2 = (STARPUFFT(complex) *)descr[1].vector.ptr;
+	const STARPUFFT(complex) *twisted2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
+	STARPUFFT(complex) *fft2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[1]);
 
 	//printf("fft2 %d %d %g\n", kk, ll, (double) cabs(twisted2[plan->totsize4-1]));
 
@@ -240,7 +240,7 @@ STARPUFFT(fft2_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 
 /* Spread the package of (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) chunks into the full vector */
 static void
-STARPUFFT(twist3_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
+STARPUFFT(twist3_2d_kernel_cpu)(void *descr[], void *_args)
 {
 	struct STARPUFFT(args) *args = _args;
 	STARPUFFT(plan) plan = args->plan;
@@ -256,7 +256,7 @@ STARPUFFT(twist3_2d_kernel_cpu)(starpu_data_interface_t *descr, void *_args)
 	int m3 = m2/DIV_2D_M;
 	int m = plan->n[1];
 
-	const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)descr[0].vector.ptr;
+	const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)GET_VECTOR_PTR(descr[0]);
 
 	//printf("twist3 %d %d %g\n", kk, ll, (double) cabs(fft2[0]));
 

+ 10 - 10
examples/strassen/strassen.h

@@ -91,18 +91,18 @@ typedef struct {
 	unsigned i;
 } phase3_t;
 
-void mult_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void self_add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void self_sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
+void mult_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+void add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
 
 #ifdef USE_CUDA
-void mult_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void self_add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-void self_sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
+void mult_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+void sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+void add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+void self_add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+void self_sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
 #endif
 
 void strassen(starpu_data_handle A, starpu_data_handle B, starpu_data_handle C, void (*callback)(void *), void *argcb, unsigned reclevel);

+ 36 - 36
examples/strassen/strassen_kernels.c

@@ -17,19 +17,19 @@
 #include "strassen.h"
 
 
-static void mult_common_codelet(starpu_data_interface_t *buffers, int s, __attribute__((unused))  void *arg)
+static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)buffers[0].blas.ptr;
-	float *left 	= (float *)buffers[1].blas.ptr;
-	float *right 	= (float *)buffers[2].blas.ptr;
+	float *center 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *left 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *right 	= (float *)GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = buffers[0].blas.nx;
-	unsigned dy = buffers[0].blas.ny;
-	unsigned dz = buffers[1].blas.nx;
+	unsigned dx = GET_BLAS_NX(descr[0]);
+	unsigned dy = GET_BLAS_NY(descr[0]);
+	unsigned dz = GET_BLAS_NX(descr[1]);
 
-	unsigned ld21 = buffers[1].blas.ld;
-	unsigned ld12 = buffers[2].blas.ld;
-	unsigned ld22 = buffers[0].blas.ld;
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
+	unsigned ld12 = GET_BLAS_LD(descr[2]);
+	unsigned ld22 = GET_BLAS_LD(descr[0]);
 
 	switch (s) {
 		case 0:
@@ -51,32 +51,32 @@ static void mult_common_codelet(starpu_data_interface_t *buffers, int s, __attri
 	}
 }
 
-void mult_core_codelet(starpu_data_interface_t *descr, void *_args)
+void mult_core_codelet(void *descr[], void *_args)
 {
 	mult_common_codelet(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void mult_cublas_codelet(starpu_data_interface_t *descr, void *_args)
+void mult_cublas_codelet(void *descr[], void *_args)
 {
 	mult_common_codelet(descr, 1, _args);
 }
 #endif
 
-static void add_sub_common_codelet(starpu_data_interface_t *buffers, int s, __attribute__((unused))  void *arg, float alpha)
+static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg, float alpha)
 {
 	/* C = A op B */
 
-	float *C 	= (float *)buffers[0].blas.ptr;
-	float *A 	= (float *)buffers[1].blas.ptr;
-	float *B 	= (float *)buffers[2].blas.ptr;
+	float *C 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *A 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *B 	= (float *)GET_BLAS_PTR(descr[2]);
 
-	unsigned dx = buffers[0].blas.nx;
-	unsigned dy = buffers[0].blas.ny;
+	unsigned dx = GET_BLAS_NX(descr[0]);
+	unsigned dy = GET_BLAS_NY(descr[0]);
 
-	unsigned ldA = buffers[1].blas.ld;
-	unsigned ldB = buffers[2].blas.ld;
-	unsigned ldC = buffers[0].blas.ld;
+	unsigned ldA = GET_BLAS_LD(descr[1]);
+	unsigned ldB = GET_BLAS_LD(descr[2]);
+	unsigned ldC = GET_BLAS_LD(descr[0]);
 
 	// TODO check dim ...
 
@@ -112,41 +112,41 @@ static void add_sub_common_codelet(starpu_data_interface_t *buffers, int s, __at
 	}
 }
 
-void sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, -1.0f);
 }
 
-void add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
 
 #ifdef USE_CUDA
-void sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 1, arg, -1.0f);
 }
 
-void add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 1, arg, 1.0f);
 }
 #endif
 
 
-static void self_add_sub_common_codelet(starpu_data_interface_t *buffers, int s, __attribute__((unused))  void *arg, float alpha)
+static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg, float alpha)
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)buffers[0].blas.ptr;
-	float *A 	= (float *)buffers[1].blas.ptr;
+	float *C 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *A 	= (float *)GET_BLAS_PTR(descr[1]);
 
-	unsigned dx = buffers[0].blas.nx;
-	unsigned dy = buffers[0].blas.ny;
+	unsigned dx = GET_BLAS_NX(descr[0]);
+	unsigned dy = GET_BLAS_NY(descr[0]);
 
-	unsigned ldA = buffers[1].blas.ld;
-	unsigned ldC = buffers[0].blas.ld;
+	unsigned ldA = GET_BLAS_LD(descr[1]);
+	unsigned ldC = GET_BLAS_LD(descr[0]);
 
 	// TODO check dim ...
 	
@@ -181,23 +181,23 @@ static void self_add_sub_common_codelet(starpu_data_interface_t *buffers, int s,
 
 
 
-void self_add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
 
-void self_sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, -1.0f);
 }
 
 #ifdef USE_CUDA
-void self_add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 1, arg, 1.0f);
 }
 
-void self_sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 1, arg, -1.0f);
 }

+ 12 - 12
examples/strassen2/strassen2.c

@@ -77,21 +77,21 @@ static unsigned reclevel = 3;
 static unsigned norandom = 0;
 static unsigned pin = 0;
 
-extern void mult_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void self_add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void self_sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
+extern void mult_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg);
 
 #ifdef USE_CUDA
-extern void mult_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void self_add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
-extern void self_sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg);
+extern void mult_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void self_add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
+extern void self_sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg);
 #endif
 
-extern void null_codelet(__attribute__((unused)) starpu_data_interface_t *descr,
+extern void null_codelet(__attribute__((unused)) void *descr[],
                   __attribute__((unused))  void *arg);
 
 
@@ -713,7 +713,7 @@ void strassen_mult(struct strassen_iter *iter)
 	create_cleanup_task(clean_struct);
 }
 
-static void dummy_codelet_func(__attribute__((unused))starpu_data_interface_t *descr,
+static void dummy_codelet_func(__attribute__((unused))void *descr[],
 				__attribute__((unused))  void *arg)
 {
 }

+ 33 - 33
examples/strassen2/strassen2_kernels.c

@@ -51,17 +51,17 @@ void display_perf(double timing, unsigned size)
 	fprintf(stderr, "       GFlop/s : %2.2f\n", (double)total_flop / (double)timing/1000);
 }
 
-static void mult_common_codelet(starpu_data_interface_t *buffers, int s, __attribute__((unused))  void *arg)
+static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)buffers[0].blas.ptr;
-	float *left 	= (float *)buffers[1].blas.ptr;
-	float *right 	= (float *)buffers[2].blas.ptr;
+	float *center 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *left 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *right 	= (float *)GET_BLAS_PTR(descr[2]);
 
-	unsigned n = buffers[0].blas.nx;
+	unsigned n = GET_BLAS_NX(descr[0]);
 
-	unsigned ld21 = buffers[1].blas.ld;
-	unsigned ld12 = buffers[2].blas.ld;
-	unsigned ld22 = buffers[0].blas.ld;
+	unsigned ld21 = GET_BLAS_LD(descr[1]);
+	unsigned ld12 = GET_BLAS_LD(descr[2]);
+	unsigned ld22 = GET_BLAS_LD(descr[0]);
 
 	double flop = 2.0*n*n*n;
 
@@ -90,31 +90,31 @@ static void mult_common_codelet(starpu_data_interface_t *buffers, int s, __attri
 	}
 }
 
-void mult_core_codelet(starpu_data_interface_t *descr, void *_args)
+void mult_core_codelet(void *descr[], void *_args)
 {
 	mult_common_codelet(descr, 0, _args);
 }
 
 #ifdef USE_CUDA
-void mult_cublas_codelet(starpu_data_interface_t *descr, void *_args)
+void mult_cublas_codelet(void *descr[], void *_args)
 {
 	mult_common_codelet(descr, 1, _args);
 }
 #endif
 
-static void add_sub_common_codelet(starpu_data_interface_t *buffers, int s, __attribute__((unused))  void *arg, float alpha)
+static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg, float alpha)
 {
 	/* C = A op B */
 
-	float *C 	= (float *)buffers[0].blas.ptr;
-	float *A 	= (float *)buffers[1].blas.ptr;
-	float *B 	= (float *)buffers[2].blas.ptr;
+	float *C 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *A 	= (float *)GET_BLAS_PTR(descr[1]);
+	float *B 	= (float *)GET_BLAS_PTR(descr[2]);
 
-	unsigned n = buffers[0].blas.nx;
+	unsigned n = GET_BLAS_NX(descr[0]);
 
-	unsigned ldA = buffers[1].blas.ld;
-	unsigned ldB = buffers[2].blas.ld;
-	unsigned ldC = buffers[0].blas.ld;
+	unsigned ldA = GET_BLAS_LD(descr[1]);
+	unsigned ldB = GET_BLAS_LD(descr[2]);
+	unsigned ldC = GET_BLAS_LD(descr[0]);
 
 	double flop = 2.0*n*n;
 
@@ -161,40 +161,40 @@ static void add_sub_common_codelet(starpu_data_interface_t *buffers, int s, __at
 	}
 }
 
-void sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, -1.0f);
 }
 
-void add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
 
 #ifdef USE_CUDA
-void sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 1, arg, -1.0f);
 }
 
-void add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	add_sub_common_codelet(descr, 1, arg, 1.0f);
 }
 #endif
 
 
-static void self_add_sub_common_codelet(starpu_data_interface_t *buffers, int s, __attribute__((unused))  void *arg, float alpha)
+static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg, float alpha)
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)buffers[0].blas.ptr;
-	float *A 	= (float *)buffers[1].blas.ptr;
+	float *C 	= (float *)GET_BLAS_PTR(descr[0]);
+	float *A 	= (float *)GET_BLAS_PTR(descr[1]);
 
-	unsigned n = buffers[0].blas.nx;
+	unsigned n = GET_BLAS_NX(descr[0]);
 
-	unsigned ldA = buffers[1].blas.ld;
-	unsigned ldC = buffers[0].blas.ld;
+	unsigned ldA = GET_BLAS_LD(descr[1]);
+	unsigned ldC = GET_BLAS_LD(descr[0]);
 
 	double flop = 1.0*n*n;
 
@@ -237,30 +237,30 @@ static void self_add_sub_common_codelet(starpu_data_interface_t *buffers, int s,
 
 
 
-void self_add_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_add_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, 1.0f);
 }
 
-void self_sub_core_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_sub_core_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 0, arg, -1.0f);
 }
 
 #ifdef USE_CUDA
-void self_add_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_add_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 1, arg, 1.0f);
 }
 
-void self_sub_cublas_codelet(starpu_data_interface_t *descr, __attribute__((unused))  void *arg)
+void self_sub_cublas_codelet(void *descr[], __attribute__((unused))  void *arg)
 {
 	self_add_sub_common_codelet(descr, 1, arg, -1.0f);
 }
 #endif
 
 /* this codelet does nothing  */
-void null_codelet(__attribute__((unused)) starpu_data_interface_t *descr,
+void null_codelet(__attribute__((unused)) void *descr[],
 		  __attribute__((unused))  void *arg)
 {
 }

+ 1 - 1
examples/tag_example/tag_example.c

@@ -150,7 +150,7 @@ void callback_core(void *argcb __attribute__ ((unused)))
 	}
 }
 
-void core_codelet(starpu_data_interface_t *buffers __attribute__((unused)),
+void core_codelet(void *descr[] __attribute__((unused)),
 			void *_args __attribute__ ((unused)))
 {
 //	printf("execute task\n");

+ 1 - 1
examples/tag_example/tag_example2.c

@@ -100,7 +100,7 @@ static void create_task_grid(unsigned iter)
 
 }
 
-void core_codelet(starpu_data_interface_t *buffers __attribute__ ((unused)),
+void core_codelet(void *descr[] __attribute__ ((unused)),
 			void *_args __attribute__ ((unused)))
 {
 }

+ 1 - 1
examples/tag_example/tag_restartable.c

@@ -102,7 +102,7 @@ static void start_task_grid(unsigned iter)
 		starpu_submit_task(tasks[iter][i]);
 }
 
-void core_codelet(starpu_data_interface_t *descr, void *_args __attribute__((unused)))
+void core_codelet(void *descr[], void *_args __attribute__((unused)))
 {
 	//int i = (uintptr_t) _args;
 	//printf("doing %x\n", i);

+ 23 - 0
include/starpu-data-interfaces.h

@@ -44,6 +44,14 @@ uint32_t starpu_get_blas_local_ld(starpu_data_handle handle);
 uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle);
 size_t starpu_get_blas_elemsize(starpu_data_handle handle);
 
+/* helper methods */
+#define GET_BLAS_PTR(interface)	(((starpu_blas_interface_t *)(interface))->ptr)
+#define GET_BLAS_NX(interface)	(((starpu_blas_interface_t *)(interface))->nx)
+#define GET_BLAS_NY(interface)	(((starpu_blas_interface_t *)(interface))->ny)
+#define GET_BLAS_LD(interface)	(((starpu_blas_interface_t *)(interface))->ld)
+#define GET_BLAS_ELEMSIZE(interface)	(((starpu_blas_interface_t *)(interface))->elemsize)
+
+
 /* BLOCK interface for 3D dense blocks */
 typedef struct starpu_block_interface_s {
 	uintptr_t ptr;
@@ -79,6 +87,11 @@ uint32_t starpu_get_vector_nx(starpu_data_handle handle);
 size_t starpu_get_vector_elemsize(starpu_data_handle handle);
 uintptr_t starpu_get_vector_local_ptr(starpu_data_handle handle);
 
+/* helper methods */
+#define GET_VECTOR_PTR(interface)	(((starpu_vector_interface_t *)(interface))->ptr)
+#define GET_VECTOR_NX(interface)	(((starpu_vector_interface_t *)(interface))->nx)
+#define GET_VECTOR_ELEMSIZE(interface)	(((starpu_vector_interface_t *)(interface))->elemsize)
+
 /* CSR interface for sparse matrices (compressed sparse row representation) */
 typedef struct starpu_csr_interface_s {
 	uint32_t nnz; /* number of non-zero entries */
@@ -104,6 +117,14 @@ uint32_t *starpu_get_csr_local_colind(starpu_data_handle handle);
 uint32_t *starpu_get_csr_local_rowptr(starpu_data_handle handle);
 size_t starpu_get_csr_elemsize(starpu_data_handle handle);
 
+#define GET_CSR_NNZ(interface)	(((starpu_csr_interface_t *)(interface))->nnz)
+#define GET_CSR_NROW(interface)	(((starpu_csr_interface_t *)(interface))->nrow)
+#define GET_CSR_NZVAL(interface)	(((starpu_csr_interface_t *)(interface))->nzval)
+#define GET_CSR_COLIND(interface)	(((starpu_csr_interface_t *)(interface))->colind)
+#define GET_CSR_ROWPTR(interface)	(((starpu_csr_interface_t *)(interface))->rowptr)
+#define GET_CSR_FIRSTENTRY(interface)	(((starpu_csr_interface_t *)(interface))->firstentry)
+#define GET_CSR_ELEMSIZE(interface)	(((starpu_csr_interface_t *)(interface))->elemsize)
+
 /* CSC interface for sparse matrices (compressed sparse column representation) */
 typedef struct starpu_csc_interface_s {
 	int nnz; /* number of non-zero entries */
@@ -163,6 +184,7 @@ size_t starpu_get_bcsr_elemsize(starpu_data_handle);
 
 unsigned starpu_get_handle_interface_id(starpu_data_handle);
 
+#if 0
 typedef union {
 	starpu_blas_interface_t blas;	/* dense BLAS representation */
 	starpu_block_interface_t block;	/* BLOCK interface for 3D dense blocks */
@@ -172,6 +194,7 @@ typedef union {
 	starpu_bcsr_interface_t bcsr;	/* blocked compressed sparse row */
 	uint8_t pad[64];
 } starpu_data_interface_t;
+#endif
 
 #ifdef __cplusplus
 }

+ 7 - 3
include/starpu-task.h

@@ -56,8 +56,11 @@ typedef struct starpu_codelet_t {
 	uint32_t where;
 
 	/* the different implementations of the codelet */
-	void (*cuda_func)(starpu_data_interface_t *, void *);
-	void (*core_func)(starpu_data_interface_t *, void *);
+	//void (*cuda_func)(starpu_data_interface_t *, void *);
+	//void (*core_func)(starpu_data_interface_t *, void *);
+
+	void (*cuda_func)(void **, void *);
+	void (*core_func)(void **, void *);
 	uint8_t gordon_func;
 
 	/* how many buffers do the codelet takes as argument ? */
@@ -76,7 +79,8 @@ struct starpu_task {
 
 	/* arguments managed by the DSM */
 	struct starpu_buffer_descr_t buffers[STARPU_NMAXBUFS];
-	starpu_data_interface_t interface[STARPU_NMAXBUFS];
+	//starpu_data_interface_t interface[STARPU_NMAXBUFS];
+	void *interface[STARPU_NMAXBUFS];
 
 	/* arguments not managed by the DSM are given as a buffer */
 	void *cl_arg;

+ 3 - 3
mpi/tests/ring.c

@@ -22,12 +22,12 @@ unsigned token = 42;
 starpu_data_handle token_handle;
 
 #ifdef USE_CUDA
-extern void increment_cuda(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args);
+extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
-void increment_core(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+void increment_core(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *tokenptr = (unsigned *)buffers[0].vector.ptr;
+	unsigned *tokenptr = (unsigned *)GET_VECTOR_PTR(descr[0]);
 	(*tokenptr)++;
 }
 

+ 3 - 3
mpi/tests/ring_async.c

@@ -22,12 +22,12 @@ unsigned token = 42;
 starpu_data_handle token_handle;
 
 #ifdef USE_CUDA
-extern void increment_cuda(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args);
+extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
-void increment_core(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+void increment_core(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *tokenptr = (unsigned *)buffers[0].vector.ptr;
+	unsigned *tokenptr = (unsigned *)GET_VECTOR_PTR(descr[0]);
 	(*tokenptr)++;
 }
 

+ 2 - 2
mpi/tests/ring_kernel.cu

@@ -21,9 +21,9 @@ static __global__ void cuda_incrementer(unsigned *token)
 	(*token)++;
 }
 
-extern "C" void increment_cuda(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+extern "C" void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *tokenptr = (unsigned *)buffers[0].vector.ptr;
+	unsigned *tokenptr = (unsigned *)GET_VECTOR_PTR(descr[0]);
 
 	cuda_incrementer<<<1,1>>>(tokenptr);
 }

+ 1 - 1
src/core/jobs.h

@@ -42,7 +42,7 @@
 struct worker_s;
 
 /* codelet function */
-typedef void (*cl_func)(starpu_data_interface_t *, void *);
+typedef void (*cl_func)(void **, void *);
 typedef void (*callback)(void *);
 
 #define CORE_MAY_PERFORM(j)	((j)->task->cl->where & CORE)

+ 1 - 3
src/datawizard/coherency.c

@@ -348,7 +348,6 @@ int fetch_task_input(struct starpu_task *task, uint32_t mask)
 	uint32_t local_memory_node = get_local_memory_node();
 
 	starpu_buffer_descr *descrs = task->buffers;
-	starpu_data_interface_t *interface = task->interface;
 	unsigned nbuffers = task->cl->nbuffers;
 
 	unsigned index;
@@ -367,8 +366,7 @@ int fetch_task_input(struct starpu_task *task, uint32_t mask)
 			goto enomem;
 
 		void *src_interface = starpu_data_get_interface_on_node(handle, local_memory_node);
-
-		memcpy(&interface[index], src_interface, handle->ops->interface_size);
+		task->interface[index] = src_interface;
 	}
 
 	TRACE_END_FETCH_INPUT(NULL);

+ 0 - 1
src/datawizard/coherency.h

@@ -110,7 +110,6 @@ struct starpu_data_state_t {
 	local_data_state per_node[MAXNODES];
 
 	/* describe the actual data layout */
-//	starpu_data_interface_t interface[MAXNODES];
 	void *interface[MAXNODES];
 
 	struct data_interface_ops_t *ops;

+ 10 - 8
src/datawizard/interfaces/bcsr_interface.c

@@ -49,7 +49,7 @@ static const struct copy_data_methods_s bcsr_copy_data_methods_s = {
 
 static void register_bcsr_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
 static size_t allocate_bcsr_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_bcsr_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
+static void liberate_bcsr_buffer_on_node(void *interface, uint32_t node);
 static size_t bcsr_interface_get_size(starpu_data_handle handle);
 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle handle, uint32_t hstate);
 
@@ -330,20 +330,22 @@ fail_nzval:
 	return allocated_memory;
 }
 
-static void liberate_bcsr_buffer_on_node(starpu_data_interface_t *interface, uint32_t node)
+static void liberate_bcsr_buffer_on_node(void *interface, uint32_t node)
 {
+	starpu_bcsr_interface_t *bcsr_interface = interface;	
+
 	node_kind kind = get_node_kind(node);
 	switch(kind) {
 		case RAM:
-			free((void*)interface->bcsr.nzval);
-			free((void*)interface->bcsr.colind);
-			free((void*)interface->bcsr.rowptr);
+			free((void*)bcsr_interface->nzval);
+			free((void*)bcsr_interface->colind);
+			free((void*)bcsr_interface->rowptr);
 			break;
 #ifdef USE_CUDA
 		case CUDA_RAM:
-			cublasFree((void*)interface->bcsr.nzval);
-			cublasFree((void*)interface->bcsr.colind);
-			cublasFree((void*)interface->bcsr.rowptr);
+			cublasFree((void*)bcsr_interface->nzval);
+			cublasFree((void*)bcsr_interface->colind);
+			cublasFree((void*)bcsr_interface->rowptr);
 			break;
 #endif
 		default:

+ 13 - 11
src/datawizard/interfaces/blas_interface.c

@@ -56,12 +56,12 @@ static const struct copy_data_methods_s blas_copy_data_methods_s = {
 
 static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
 static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_blas_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
+static void liberate_blas_buffer_on_node(void *interface, uint32_t node);
 static size_t blas_interface_get_size(starpu_data_handle handle);
 static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle, uint32_t hstate);
 static void display_blas_interface(starpu_data_handle handle, FILE *f);
 #ifdef USE_GORDON
-static int convert_blas_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
 #endif
 
 struct data_interface_ops_t interface_blas_ops = {
@@ -80,14 +80,14 @@ struct data_interface_ops_t interface_blas_ops = {
 };
 
 #ifdef USE_GORDON
-static int convert_blas_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
 {
-	size_t elemsize = (*interface).blas.elemsize;
-	uint32_t nx = (*interface).blas.nx;
-	uint32_t ny = (*interface).blas.ny;
-	uint32_t ld = (*interface).blas.ld;
+	size_t elemsize = GET_BLAS_ELEMSIZE(interface);
+	uint32_t nx = GET_BLAS_NX(interface);
+	uint32_t ny = GET_BLAS_NY(interface);
+	uint32_t ld = GET_BLAS_LD(interface);
 
-	*ptr = (*interface).blas.ptr;
+	*ptr = GET_BLAS_PTR(interface);
 
 	/* The gordon_stride_init function may use a contiguous buffer
  	 * in case nx = ld (in that case, (*ss).size = elemsize*nx*ny */
@@ -289,8 +289,10 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 	return allocated_memory;
 }
 
-static void liberate_blas_buffer_on_node(starpu_data_interface_t *interface, uint32_t node)
+static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 {
+	starpu_blas_interface_t *blas_interface = interface;
+
 #ifdef USE_CUDA
 	cudaError_t status;
 #endif
@@ -298,11 +300,11 @@ static void liberate_blas_buffer_on_node(starpu_data_interface_t *interface, uin
 	node_kind kind = get_node_kind(node);
 	switch(kind) {
 		case RAM:
-			free((void*)interface->blas.ptr);
+			free((void*)blas_interface->ptr);
 			break;
 #ifdef USE_CUDA
 		case CUDA_RAM:
-			status = cudaFree((void*)interface->blas.ptr);			
+			status = cudaFree((void*)blas_interface->ptr);			
 			if (STARPU_UNLIKELY(status))
 				CUDA_REPORT_ERROR(status);
 

+ 8 - 6
src/datawizard/interfaces/block_interface.c

@@ -50,12 +50,12 @@ static const struct copy_data_methods_s block_copy_data_methods_s = {
 
 static void register_block_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
 static size_t allocate_block_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_block_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
+static void liberate_block_buffer_on_node(void *interface, uint32_t node);
 static size_t block_interface_get_size(starpu_data_handle handle);
 static uint32_t footprint_block_interface_crc32(starpu_data_handle handle, uint32_t hstate);
 static void display_block_interface(starpu_data_handle handle, FILE *f);
 #ifdef USE_GORDON
-static int convert_block_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss);
+static int convert_block_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 
 struct data_interface_ops_t interface_block_ops = {
@@ -74,7 +74,7 @@ struct data_interface_ops_t interface_block_ops = {
 };
 
 #ifdef USE_GORDON
-int convert_block_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+int convert_block_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
 {
 	/* TODO */
 	STARPU_ASSERT(0);
@@ -305,8 +305,10 @@ static size_t allocate_block_buffer_on_node(starpu_data_handle handle, uint32_t
 	return allocated_memory;
 }
 
-static void liberate_block_buffer_on_node(starpu_data_interface_t *interface, uint32_t node)
+static void liberate_block_buffer_on_node(void *interface, uint32_t node)
 {
+	starpu_block_interface_t *block_interface = interface;
+
 #ifdef USE_CUDA
 	cudaError_t status;
 #endif
@@ -314,11 +316,11 @@ static void liberate_block_buffer_on_node(starpu_data_interface_t *interface, ui
 	node_kind kind = get_node_kind(node);
 	switch(kind) {
 		case RAM:
-			free((void*)interface->block.ptr);
+			free((void*)block_interface->ptr);
 			break;
 #ifdef USE_CUDA
 		case CUDA_RAM:
-			status = cudaFree((void*)interface->blas.ptr);
+			status = cudaFree((void*)block_interface->ptr);
 			if (STARPU_UNLIKELY(status))
 				CUDA_REPORT_ERROR(status);
 

+ 10 - 8
src/datawizard/interfaces/csr_interface.c

@@ -45,7 +45,7 @@ static const struct copy_data_methods_s csr_copy_data_methods_s = {
 
 static void register_csr_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
 static size_t allocate_csr_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_csr_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
+static void liberate_csr_buffer_on_node(void *interface, uint32_t node);
 static size_t csr_interface_get_size(starpu_data_handle handle);
 static uint32_t footprint_csr_interface_crc32(starpu_data_handle handle, uint32_t hstate);
 
@@ -305,20 +305,22 @@ fail_nzval:
 	return allocated_memory;
 }
 
-static void liberate_csr_buffer_on_node(starpu_data_interface_t *interface, uint32_t node)
+static void liberate_csr_buffer_on_node(void *interface, uint32_t node)
 {
+	starpu_csr_interface_t *csr_interface = interface;	
+
 	node_kind kind = get_node_kind(node);
 	switch(kind) {
 		case RAM:
-			free((void*)interface->csr.nzval);
-			free((void*)interface->csr.colind);
-			free((void*)interface->csr.rowptr);
+			free((void*)csr_interface->nzval);
+			free((void*)csr_interface->colind);
+			free((void*)csr_interface->rowptr);
 			break;
 #ifdef USE_CUDA
 		case CUDA_RAM:
-			cublasFree((void*)interface->csr.nzval);
-			cublasFree((void*)interface->csr.colind);
-			cublasFree((void*)interface->csr.rowptr);
+			cublasFree((void*)csr_interface->nzval);
+			cublasFree((void*)csr_interface->colind);
+			cublasFree((void*)csr_interface->rowptr);
 			break;
 #endif
 		default:

+ 2 - 3
src/datawizard/interfaces/data_interface.h

@@ -30,14 +30,13 @@ struct data_interface_ops_t {
 	void (*register_data_handle)(starpu_data_handle handle,
 					uint32_t home_node, void *interface);
 	size_t (*allocate_data_on_node)(starpu_data_handle handle, uint32_t node);
-	void (*liberate_data_on_node)(starpu_data_interface_t *interface,
-					uint32_t node);
+	void (*liberate_data_on_node)(void *interface, uint32_t node);
 	const struct copy_data_methods_s *copy_methods;
 	size_t (*get_size)(starpu_data_handle handle);
 	uint32_t (*footprint)(starpu_data_handle handle, uint32_t hstate);
 	void (*display)(starpu_data_handle handle, FILE *f);
 #ifdef USE_GORDON
-	int (*convert_to_gordon)(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+	int (*convert_to_gordon)(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
 #endif
 	/* an identifier that is unique to each interface */
 	unsigned interfaceid;

+ 12 - 8
src/datawizard/interfaces/vector_interface.c

@@ -54,12 +54,12 @@ static const struct copy_data_methods_s vector_copy_data_methods_s = {
 
 static void register_vector_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
 static size_t allocate_vector_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_vector_buffer_on_node(starpu_data_interface_t *interface, uint32_t node);
+static void liberate_vector_buffer_on_node(void *interface, uint32_t node);
 static size_t vector_interface_get_size(starpu_data_handle handle);
 static uint32_t footprint_vector_interface_crc32(starpu_data_handle handle, uint32_t hstate);
 static void display_vector_interface(starpu_data_handle handle, FILE *f);
 #ifdef USE_GORDON
-static int convert_vector_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_vector_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
 #endif
 
 struct data_interface_ops_t interface_vector_ops = {
@@ -100,10 +100,12 @@ static void register_vector_handle(starpu_data_handle handle, uint32_t home_node
 }
 
 #ifdef USE_GORDON
-int convert_vector_to_gordon(starpu_data_interface_t *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+int convert_vector_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
 {
-	*ptr = (*interface).vector.ptr;
-	(*ss).size = (*interface).vector.nx * (*interface).vector.elemsize;
+	starpu_vector_interface_t *vector_interface = interface;
+	
+	*ptr = vector_interface->ptr;
+	(*ss).size = vector_interface->nx * vector_interface->elemsize;
 
 	return 0;
 }
@@ -230,16 +232,18 @@ static size_t allocate_vector_buffer_on_node(starpu_data_handle handle, uint32_t
 	return allocated_memory;
 }
 
-void liberate_vector_buffer_on_node(starpu_data_interface_t *interface, uint32_t node)
+void liberate_vector_buffer_on_node(void *interface, uint32_t node)
 {
+	starpu_vector_interface_t *vector_interface = interface;
+
 	node_kind kind = get_node_kind(node);
 	switch(kind) {
 		case RAM:
-			free((void*)interface->vector.ptr);
+			free((void*)vector_interface->ptr);
 			break;
 #ifdef USE_CUDA
 		case CUDA_RAM:
-			cublasFree((void*)interface->vector.ptr);
+			cublasFree((void*)vector_interface->ptr);
 			break;
 #endif
 		default:

+ 1 - 1
src/util/execute_on_all.c

@@ -22,7 +22,7 @@ typedef struct wrapper_func_args {
 	void *arg;
 } _wrapper_func_args;
 
-static void wrapper_func(starpu_data_interface_t *buffers __attribute__ ((unused)), void *_args)
+static void wrapper_func(void *buffers[] __attribute__ ((unused)), void *_args)
 {
 	_wrapper_func_args *args = _args;
 	args->func(args->arg);

+ 2 - 2
src/util/malloc.c

@@ -30,7 +30,7 @@ struct malloc_pinned_codelet_struct {
 	size_t dim;
 };
 
-static void malloc_pinned_codelet(starpu_data_interface_t *buffers __attribute__((unused)), void *arg)
+static void malloc_pinned_codelet(void *buffers[] __attribute__((unused)), void *arg)
 {
 	struct malloc_pinned_codelet_struct *s = arg;
 
@@ -86,7 +86,7 @@ int starpu_malloc_pinned_if_possible(void **A, size_t dim)
 }
 
 #ifdef USE_CUDA
-static void free_pinned_codelet(starpu_data_interface_t *buffers __attribute__((unused)), void *arg)
+static void free_pinned_codelet(void *buffers[] __attribute__((unused)), void *arg)
 {
 	cudaError_t cures;
 	cures = cudaFreeHost(arg);

+ 1 - 1
tests/core/empty_task_sync_point.c

@@ -28,7 +28,7 @@ static starpu_tag_t tagD = 0x3042;
 static starpu_tag_t tagE = 0x4042;
 static starpu_tag_t tagF = 0x5042;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/core/execute_on_a_specific_worker.c

@@ -49,7 +49,7 @@ static void callback(void *arg)
 
 
 
-static void codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 //	int id = starpu_get_worker_id();
 //	fprintf(stderr, "worker #%d\n", id);

+ 1 - 1
tests/core/multithreaded.c

@@ -27,7 +27,7 @@ pthread_t threads[16];
 static unsigned ntasks = 65536;
 static unsigned nthreads = 2;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 2 - 3
tests/core/starpu_wait_all_tasks.c

@@ -14,16 +14,15 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+#include <starpu.h>
 #include <sys/time.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
 
-#include <starpu.h>
-
 static unsigned ntasks = 65536;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[], void *arg)
 {
 }
 

+ 1 - 1
tests/core/starpu_wait_task.c

@@ -23,7 +23,7 @@
 
 static unsigned ntasks = 65536;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/core/static_restartable.c

@@ -22,7 +22,7 @@
 
 static unsigned ntasks = 65536;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/core/static_restartable_tag.c

@@ -23,7 +23,7 @@
 static unsigned ntasks = 65536;
 static starpu_tag_t tag = 0x32;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/core/static_restartable_using_initializer.c

@@ -25,7 +25,7 @@ struct starpu_task task = STARPU_TASK_INITIALIZER;
 
 static unsigned ntasks = 65536;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/datawizard/dining_philosophers.c

@@ -6,7 +6,7 @@
 starpu_data_handle fork_handles[N];
 unsigned forks[N];
 
-static void eat_kernel(starpu_data_interface_t *buffers, void *arg)
+static void eat_kernel(void *descr[], void *arg)
 {
 }
 

+ 1 - 1
tests/datawizard/readers_and_writers.c

@@ -3,7 +3,7 @@
 static unsigned book = 0;
 static starpu_data_handle book_handle;
 
-static void dummy_kernel(starpu_data_interface_t *buffers, void *arg)
+static void dummy_kernel(void *descr[], void *arg)
 {
 }
 

+ 6 - 6
tests/datawizard/write_only_tmp_buffer.c

@@ -25,24 +25,24 @@
 starpu_data_handle v_handle;
 
 #ifdef USE_CUDA
-static void cuda_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void cuda_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	char *buf = (char *)buffers[0].vector.ptr;
+	char *buf = (char *)GET_VECTOR_PTR(descr[0]);
 
 	cudaMemset(buf, 42, 1);
 }
 #endif
 
-static void core_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void core_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	char *buf = (char *)buffers[0].vector.ptr;
+	char *buf = (char *)GET_VECTOR_PTR(descr[0]);
 
 	*buf = 42;
 }
 
-static void display_var(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void display_var(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	char *buf = (char *)buffers[0].vector.ptr;
+	char *buf = (char *)GET_VECTOR_PTR(descr[0]);
 	if (*buf != 42)
 	{
 		fprintf(stderr, "Value = %c (should be %c)\n", *buf, 42);

+ 1 - 1
tests/errorcheck/invalid_blocking_calls.c

@@ -21,7 +21,7 @@
 static starpu_data_handle handle;
 static unsigned data = 42;
 
-static void wrong_func(starpu_data_interface_t *descr, void *arg)
+static void wrong_func(void *descr[], void *arg)
 {
 	int ret;
 

+ 1 - 1
tests/microbenchs/async-tasks-overhead.c

@@ -29,7 +29,7 @@ static unsigned cnt;
 
 static unsigned finished = 0;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 2 - 2
tests/microbenchs/dsm_stress.c

@@ -50,11 +50,11 @@ static void callback(void *arg)
 
 
 
-static void cuda_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void cuda_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 }
 
-static void core_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void core_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 }
 

+ 1 - 1
tests/microbenchs/prefetch_data_on_node.c

@@ -52,7 +52,7 @@ static void callback(void *arg)
 
 
 
-static void codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 //	fprintf(stderr, "pif\n");
 //	fflush(stderr);

+ 2 - 2
tests/microbenchs/redundant_buffer.c

@@ -27,11 +27,11 @@
 starpu_data_handle v_handle;
 static unsigned *v;
 
-static void cuda_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void cuda_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 }
 
-static void core_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void core_codelet_null(void *descr[], __attribute__ ((unused)) void *_args)
 {
 }
 

+ 1 - 1
tests/microbenchs/sync-tasks-overhead.c

@@ -22,7 +22,7 @@
 
 static unsigned ntasks = 65536;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 6 - 6
tests/microbenchs/sync_and_notify_data.c

@@ -42,8 +42,8 @@
  */
 
 #ifdef USE_CUDA
-void cuda_codelet_incA(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args);
-void cuda_codelet_incC(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args);
+void cuda_codelet_incA(void *descr[], __attribute__ ((unused)) void *_args);
+void cuda_codelet_incC(void *descr[], __attribute__ ((unused)) void *_args);
 #endif
 
 #define VECTORSIZE	16
@@ -51,15 +51,15 @@ void cuda_codelet_incC(starpu_data_interface_t *buffers, __attribute__ ((unused)
 starpu_data_handle v_handle;
 static unsigned v[VECTORSIZE] __attribute__((aligned(128))) = {0, 0, 0, 0};
 
-void core_codelet_incA(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+void core_codelet_incA(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *val = (unsigned *)buffers[0].vector.ptr;
+	unsigned *val = (unsigned *)GET_VECTOR_PTR(descr[0]);
 	val[0]++;
 }
 
-void core_codelet_incC(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+void core_codelet_incC(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *val = (unsigned *)buffers[0].vector.ptr;
+	unsigned *val = (unsigned *)GET_VECTOR_PTR(descr[0]);
 	val[2]++;
 }
 

+ 4 - 4
tests/microbenchs/sync_and_notify_data_kernels.cu

@@ -25,9 +25,9 @@ extern "C" __global__ void _cuda_incA(unsigned *v)
 	v[0]++;
 }
 
-extern "C" void cuda_codelet_incA(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+extern "C" void cuda_codelet_incA(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *v = (unsigned *)buffers[0].vector.ptr;
+	unsigned *v = (unsigned *)GET_VECTOR_PTR(descr[0]);
 
 	_cuda_incA<<<1,1>>>(v);
 
@@ -43,9 +43,9 @@ extern "C" __global__ void _cuda_incC(unsigned *v)
 	v[2]++;
 }
 
-extern "C" void cuda_codelet_incC(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+extern "C" void cuda_codelet_incC(void *descr[], __attribute__ ((unused)) void *_args)
 {
-	unsigned *v = (unsigned *)buffers[0].vector.ptr;
+	unsigned *v = (unsigned *)GET_VECTOR_PTR(descr[0]);
 
 	_cuda_incC<<<1,1>>>(v);
 

+ 1 - 1
tests/microbenchs/tag-wait-api.c

@@ -20,7 +20,7 @@
 
 #include <starpu.h>
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/microbenchs/tasks-overhead.c

@@ -29,7 +29,7 @@ static unsigned nbuffers = 0;
 
 struct starpu_task *tasks;
 
-static void dummy_func(starpu_data_interface_t *descr __attribute__ ((unused)), void *arg __attribute__ ((unused)))
+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
 {
 }
 

+ 1 - 1
tests/overlap/overlap.c

@@ -48,7 +48,7 @@ static void callback(void *arg)
 	}
 }
 
-static void codelet_sleep(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+static void codelet_sleep(void *descr[], __attribute__ ((unused)) void *_args)
 {
 	usleep(TASKDURATION);
 }