Pārlūkot izejas kodu

Rename the "blas" interface into "matrix" which is much clearer.

Cédric Augonnet 15 gadi atpakaļ
vecāks
revīzija
da923646ea
38 mainītis faili ar 404 papildinājumiem un 404 dzēšanām
  1. 13 13
      examples/basic-examples/mult.c
  2. 2 2
      examples/cholesky/dw_cholesky.c
  3. 1 1
      examples/cholesky/dw_cholesky_grain.c
  4. 18 18
      examples/cholesky/dw_cholesky_kernels.c
  5. 6 6
      examples/cholesky/dw_cholesky_models.c
  6. 1 1
      examples/cholesky/dw_cholesky_no_stride.c
  7. 3 3
      examples/common/blas_model.c
  8. 3 3
      examples/heat/dw_factolu.c
  9. 1 1
      examples/heat/dw_factolu_grain.c
  10. 24 24
      examples/heat/dw_factolu_kernels.c
  11. 2 2
      examples/heat/dw_factolu_tag.c
  12. 18 18
      examples/heat/lu_kernels_model.c
  13. 2 2
      examples/lu/xlu.c
  14. 30 30
      examples/lu/xlu_kernels.c
  15. 4 4
      examples/lu/xlu_pivot.c
  16. 3 3
      examples/mult/dw_mult.c
  17. 3 3
      examples/mult/dw_mult_no_stride.c
  18. 3 3
      examples/mult/dw_mult_no_stride_no_tag.c
  19. 9 9
      examples/mult/sgemm_kernels.c
  20. 3 3
      examples/mult/xgemm.c
  21. 9 9
      examples/mult/xgemm_kernels.c
  22. 12 12
      examples/ppm-downscaler/yuv-downscaler.c
  23. 4 4
      examples/spmv/dw_block_spmv_kernels.c
  24. 3 3
      examples/strassen/strassen.c
  25. 23 23
      examples/strassen/strassen_kernels.c
  26. 6 6
      examples/strassen/strassen_models.c
  27. 3 3
      examples/strassen/test_strassen.c
  28. 5 5
      examples/strassen2/strassen2.c
  29. 19 19
      examples/strassen2/strassen2_kernels.c
  30. 14 14
      include/starpu-data-interfaces.h
  31. 7 7
      mpi/examples/mpi_lu/plu_example.c
  32. 24 24
      mpi/examples/mpi_lu/pxlu_kernels.c
  33. 2 2
      mpi/starpu_mpi_datatype.c
  34. 2 2
      src/Makefile.am
  35. 2 2
      src/datawizard/interfaces/bcsr_filters.c
  36. 1 1
      src/datawizard/interfaces/data_interface.h
  37. 10 10
      src/datawizard/interfaces/blas_filters.c
  38. 109 109
      src/datawizard/interfaces/blas_interface.c

+ 13 - 13
examples/basic-examples/mult.c

@@ -18,7 +18,7 @@
  * This example shows a simple implementation of a blocked matrix
  * multiplication. Note that this is NOT intended to be an efficient
  * implementation of sgemm! In this example, we show:
- *  - how to declare dense matrices (starpu_register_blas_data)
+ *  - how to declare dense matrices (starpu_register_matrix_data)
  *  - how to manipulate matrices within codelets (eg. descr[0].blas.ld)
  *  - how to use filters to partition the matrices into blocks
  *    (starpu_partition_data and starpu_map_filters)
@@ -116,9 +116,9 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 	uint32_t ldA, ldB, ldC;
 
 	/* .blas.ptr gives a pointer to the first element of the local copy */
-	subA = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	subB = (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	subC = (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	subA = (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	subB = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	subC = (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
 	/* .blas.nx is the number of rows (consecutive elements) and .blas.ny
 	 * is the number of lines that are separated by .blas.ld elements (ld
@@ -126,13 +126,13 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 	 * NB: in case some filters were used, the leading dimension is not
 	 * guaranteed to be the same in main memory (on the original matrix)
 	 * and on the accelerator! */
-	nxC = STARPU_GET_BLAS_NX(descr[2]);
-	nyC = STARPU_GET_BLAS_NY(descr[2]);
-	nyA = STARPU_GET_BLAS_NY(descr[0]);
+	nxC = STARPU_GET_MATRIX_NX(descr[2]);
+	nyC = STARPU_GET_MATRIX_NY(descr[2]);
+	nyA = STARPU_GET_MATRIX_NY(descr[0]);
 
-	ldA = STARPU_GET_BLAS_LD(descr[0]);
-	ldB = STARPU_GET_BLAS_LD(descr[1]);
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
+	ldA = STARPU_GET_MATRIX_LD(descr[0]);
+	ldB = STARPU_GET_MATRIX_LD(descr[1]);
+	ldC = STARPU_GET_MATRIX_LD(descr[2]);
 
 	/* we assume a FORTRAN-ordering! */
 	unsigned i,j,k;
@@ -199,11 +199,11 @@ static void partition_mult_data(void)
 	 * node in which resides the matrix: 0 means that the 3rd argument is
 	 * an adress in main memory.
 	 */
-	starpu_register_blas_data(&A_handle, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(float));
-	starpu_register_blas_data(&B_handle, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(float));
-	starpu_register_blas_data(&C_handle, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(float));
 
 	/* A filter is a method to partition a data into disjoint chunks, it is

+ 2 - 2
examples/cholesky/dw_cholesky.c

@@ -211,7 +211,7 @@ static void _dw_cholesky(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 
 	double flop = (1.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
@@ -240,7 +240,7 @@ void dw_cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 1 - 1
examples/cholesky/dw_cholesky_grain.c

@@ -168,7 +168,7 @@ static void _dw_cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 18 - 18
examples/cholesky/dw_cholesky_kernels.c

@@ -30,17 +30,17 @@
 static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus st;
@@ -91,14 +91,14 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, __attrib
 	float *sub11;
 	float *sub21;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NY(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NX(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NY(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NX(descr[1]);
 
 	switch (s) {
 		case 0:
@@ -137,10 +137,10 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 //	printf("11\n");
 	float *sub11;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned nx = STARPU_GET_BLAS_NY(descr[0]);
-	unsigned ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned nx = STARPU_GET_MATRIX_NY(descr[0]);
+	unsigned ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned z;
 

+ 6 - 6
examples/cholesky/dw_cholesky_models.c

@@ -38,7 +38,7 @@ static double cpu_chol_task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176);
 
@@ -53,7 +53,7 @@ static double cuda_chol_task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883);
 
@@ -68,7 +68,7 @@ static double cpu_chol_task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965);
 
@@ -83,7 +83,7 @@ static double cuda_chol_task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520);
 
@@ -98,7 +98,7 @@ static double cpu_chol_task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760);
 
@@ -113,7 +113,7 @@ static double cuda_chol_task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666);
 

+ 1 - 1
examples/cholesky/dw_cholesky_no_stride.c

@@ -311,7 +311,7 @@ int main(int argc, char **argv)
 	for (x = 0; x < nblocks; x++)
 	{
 		if (x <= y) {
-			starpu_register_blas_data(&A_state[y][x], 0, (uintptr_t)A[y][x], 
+			starpu_register_matrix_data(&A_state[y][x], 0, (uintptr_t)A[y][x], 
 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
 		}
 	}

+ 3 - 3
examples/common/blas_model.c

@@ -32,9 +32,9 @@ double gemm_cost(starpu_buffer_descr *descr)
 	uint32_t nxC, nyC, nxA;
 
 
-	nxC = starpu_get_blas_nx(descr[2].handle);
-	nyC = starpu_get_blas_ny(descr[2].handle);
-	nxA = starpu_get_blas_nx(descr[0].handle);
+	nxC = starpu_get_matrix_nx(descr[2].handle);
+	nyC = starpu_get_matrix_ny(descr[2].handle);
+	nxA = starpu_get_matrix_nx(descr[0].handle);
 
 //	printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA);
 

+ 3 - 3
examples/heat/dw_factolu.c

@@ -625,7 +625,7 @@ void dw_codelet_facto(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -682,7 +682,7 @@ void dw_codelet_facto_v2(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -727,7 +727,7 @@ void dw_factoLU(float *matA, unsigned size,
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, 
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, 
 			size, size, sizeof(float));
 
 	starpu_filter f;

+ 1 - 1
examples/heat/dw_factolu_grain.c

@@ -197,7 +197,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 	 * (re)partition data
 	 */
 	starpu_data_handle dataA;
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	STARPU_ASSERT((size % blocksize) == 0);
 	STARPU_ASSERT((inner_size % blocksize) == 0);

+ 24 - 24
examples/heat/dw_factolu_kernels.c

@@ -104,17 +104,17 @@ void display_stat_heat(void)
 
 static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -171,14 +171,14 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribut
 	float *sub11;
 	float *sub12;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]);	
-	sub12 = (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]);	
+	sub12 = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx12 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny12 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx12 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny12 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -234,14 +234,14 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribut
 	float *sub11;
 	float *sub21;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -309,10 +309,10 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribut
 {
 	float *sub11;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 

+ 2 - 2
examples/heat/dw_factolu_tag.c

@@ -254,7 +254,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -276,7 +276,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 18 - 18
examples/heat/lu_kernels_model.c

@@ -44,7 +44,7 @@ double task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/537.5);
 
@@ -55,7 +55,7 @@ double task_12_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 //	double cost = ((n*n*n)/1744.695);
 	double cost = ((n*n*n)/3210.80);
@@ -69,7 +69,7 @@ double task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 //	double cost = ((n*n*n)/1744.695);
 	double cost = ((n*n*n)/3691.53);
@@ -84,9 +84,9 @@ double task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_get_blas_nx(descr[2].handle);
-	ny = starpu_get_blas_ny(descr[2].handle);
-	nz = starpu_get_blas_ny(descr[0].handle);
+	nx = starpu_get_matrix_nx(descr[2].handle);
+	ny = starpu_get_matrix_ny(descr[2].handle);
+	nz = starpu_get_matrix_ny(descr[0].handle);
 
 	double cost = ((nx*ny*nz)/4110.0);
 
@@ -104,7 +104,7 @@ double task_11_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/1853.7806);
 
@@ -116,7 +116,7 @@ double task_12_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/42838.5718);
 
@@ -129,7 +129,7 @@ double task_21_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/49208.667);
 
@@ -143,9 +143,9 @@ double task_22_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_get_blas_nx(descr[2].handle);
-	ny = starpu_get_blas_ny(descr[2].handle);
-	nz = starpu_get_blas_ny(descr[0].handle);
+	nx = starpu_get_matrix_nx(descr[2].handle);
+	ny = starpu_get_matrix_ny(descr[2].handle);
+	nz = starpu_get_matrix_ny(descr[0].handle);
 
 	double cost = ((nx*ny*nz)/57523.560);
 
@@ -163,7 +163,7 @@ double task_11_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/537.5);
 
@@ -175,7 +175,7 @@ double task_12_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/6668.224);
 
@@ -188,7 +188,7 @@ double task_21_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/6793.8423);
 
@@ -202,9 +202,9 @@ double task_22_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_get_blas_nx(descr[2].handle);
-	ny = starpu_get_blas_ny(descr[2].handle);
-	nz = starpu_get_blas_ny(descr[0].handle);
+	nx = starpu_get_matrix_nx(descr[2].handle);
+	ny = starpu_get_matrix_ny(descr[2].handle);
+	nz = starpu_get_matrix_ny(descr[0].handle);
 
 	double cost = ((nx*ny*nz)/4203.0175);
 

+ 2 - 2
examples/lu/xlu.c

@@ -302,7 +302,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -313,7 +313,7 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 30 - 30
examples/lu/xlu_kernels.c

@@ -24,17 +24,17 @@
 static inline void STARPU_LU(common_u22)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
-	TYPE *right 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	TYPE *left 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
-	TYPE *center 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[2]);
+	TYPE *right 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	TYPE *left 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
+	TYPE *center 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -91,14 +91,14 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub12;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);	
-	sub12 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);	
+	sub12 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx12 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny12 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx12 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny12 = STARPU_GET_MATRIX_NY(descr[1]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -153,14 +153,14 @@ static inline void STARPU_LU(common_u21)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub21;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -212,10 +212,10 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 
@@ -284,10 +284,10 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 
@@ -399,9 +399,9 @@ static inline void STARPU_LU(common_pivot)(void *descr[],
 {
 	TYPE *matrix;
 
-	matrix = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	matrix = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned row, rowaux;
 

+ 4 - 4
examples/lu/xlu_pivot.c

@@ -425,7 +425,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;
@@ -465,7 +465,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	fprintf(stderr, "Computation took (in ms)\n");
 	fprintf(stderr, "%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 
@@ -490,7 +490,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 	for (bj = 0; bj < nblocks; bj++)
 	for (bi = 0; bi < nblocks; bi++)
 	{
-		starpu_register_blas_data(&dataAp[bi+nblocks*bj], 0,
+		starpu_register_matrix_data(&dataAp[bi+nblocks*bj], 0,
 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
 			size/nblocks, size/nblocks, sizeof(TYPE));
 	}
@@ -514,7 +514,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 	fprintf(stderr, "Computation took (in ms)\n");
 	fprintf(stderr, "%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataAp[0])*nblocks;
+	unsigned n = starpu_get_matrix_nx(dataAp[0])*nblocks;
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 

+ 3 - 3
examples/mult/dw_mult.c

@@ -153,11 +153,11 @@ static void partition_mult_data(void)
 {
 	gettimeofday(&start, NULL);
 
-	starpu_register_blas_data(&A_handle, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(float));
-	starpu_register_blas_data(&B_handle, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(float));
-	starpu_register_blas_data(&C_handle, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(float));
 
 	starpu_data_set_wb_mask(C_handle, 1<<0);

+ 3 - 3
examples/mult/dw_mult_no_stride.c

@@ -155,7 +155,7 @@ static void init_problem_data(void)
 	{
 		for (z = 0; z < nslicesz; z++)
 		{
-			starpu_register_blas_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
+			starpu_register_matrix_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEZ, sizeof(float));
 		}
 	}
@@ -164,7 +164,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
+			starpu_register_matrix_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
 				BLOCKSIZEZ, BLOCKSIZEZ, BLOCKSIZEX, sizeof(float));
 		}
 	}
@@ -173,7 +173,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
+			starpu_register_matrix_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEX, sizeof(float));
 		}
 	}

+ 3 - 3
examples/mult/dw_mult_no_stride_no_tag.c

@@ -171,7 +171,7 @@ static void init_problem_data(void)
 	{
 		for (z = 0; z < nslicesz; z++)
 		{
-			starpu_register_blas_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
+			starpu_register_matrix_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEZ, sizeof(float));
 		}
 	}
@@ -180,7 +180,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
+			starpu_register_matrix_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
 				BLOCKSIZEZ, BLOCKSIZEZ, BLOCKSIZEX, sizeof(float));
 		}
 	}
@@ -189,7 +189,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
+			starpu_register_matrix_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEX, sizeof(float));
 		}
 	}

+ 9 - 9
examples/mult/sgemm_kernels.c

@@ -25,17 +25,17 @@
 	float *subB;			\
 	float *subC;			\
 					\
-	subA = (float *)STARPU_GET_BLAS_PTR(descr[0]);	\
-	subB = (float *)STARPU_GET_BLAS_PTR(descr[1]);	\
-	subC = (float *)STARPU_GET_BLAS_PTR(descr[2]);	\
+	subA = (float *)STARPU_GET_MATRIX_PTR(descr[0]);	\
+	subB = (float *)STARPU_GET_MATRIX_PTR(descr[1]);	\
+	subC = (float *)STARPU_GET_MATRIX_PTR(descr[2]);	\
 					\
-	nxC = STARPU_GET_BLAS_NX(descr[2]);		\
-	nyC = STARPU_GET_BLAS_NY(descr[2]);		\
-	nyA = STARPU_GET_BLAS_NY(descr[0]);		\
+	nxC = STARPU_GET_MATRIX_NX(descr[2]);		\
+	nyC = STARPU_GET_MATRIX_NY(descr[2]);		\
+	nyA = STARPU_GET_MATRIX_NY(descr[0]);		\
 					\
-	ldA = STARPU_GET_BLAS_LD(descr[0]);		\
-	ldB = STARPU_GET_BLAS_LD(descr[1]);		\
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
+	ldA = STARPU_GET_MATRIX_LD(descr[0]);		\
+	ldB = STARPU_GET_MATRIX_LD(descr[1]);		\
+	ldC = STARPU_GET_MATRIX_LD(descr[2]);
 
 
 

+ 3 - 3
examples/mult/xgemm.c

@@ -137,11 +137,11 @@ static void init_problem_data(void)
 
 static void partition_mult_data(void)
 {
-	starpu_register_blas_data(&A_handle, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(TYPE));
-	starpu_register_blas_data(&B_handle, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(TYPE));
-	starpu_register_blas_data(&C_handle, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(TYPE));
 
 	starpu_data_set_wb_mask(C_handle, 1<<0);

+ 9 - 9
examples/mult/xgemm_kernels.c

@@ -25,17 +25,17 @@
 	TYPE *subB;			\
 	TYPE *subC;			\
 					\
-	subA = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);	\
-	subB = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);	\
-	subC = (TYPE *)STARPU_GET_BLAS_PTR(descr[2]);	\
+	subA = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);	\
+	subB = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);	\
+	subC = (TYPE *)STARPU_GET_MATRIX_PTR(descr[2]);	\
 					\
-	nxC = STARPU_GET_BLAS_NX(descr[2]);		\
-	nyC = STARPU_GET_BLAS_NY(descr[2]);		\
-	nyA = STARPU_GET_BLAS_NY(descr[0]);		\
+	nxC = STARPU_GET_MATRIX_NX(descr[2]);		\
+	nyC = STARPU_GET_MATRIX_NY(descr[2]);		\
+	nyA = STARPU_GET_MATRIX_NY(descr[0]);		\
 					\
-	ldA = STARPU_GET_BLAS_LD(descr[0]);		\
-	ldB = STARPU_GET_BLAS_LD(descr[1]);		\
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
+	ldA = STARPU_GET_MATRIX_LD(descr[0]);		\
+	ldB = STARPU_GET_MATRIX_LD(descr[1]);		\
+	ldC = STARPU_GET_MATRIX_LD(descr[2]);
 
 
 

+ 12 - 12
examples/ppm-downscaler/yuv-downscaler.c

@@ -68,14 +68,14 @@ static void ds_callback(void *arg)
 
 static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 {
-	uint8_t *input = (uint8_t *)STARPU_GET_BLAS_PTR(descr[0]);
-	unsigned input_ld = STARPU_GET_BLAS_LD(descr[0]);
+	uint8_t *input = (uint8_t *)STARPU_GET_MATRIX_PTR(descr[0]);
+	unsigned input_ld = STARPU_GET_MATRIX_LD(descr[0]);
 
-	uint8_t *output = (uint8_t *)STARPU_GET_BLAS_PTR(descr[1]);
-	unsigned output_ld = STARPU_GET_BLAS_LD(descr[1]);
+	uint8_t *output = (uint8_t *)STARPU_GET_MATRIX_PTR(descr[1]);
+	unsigned output_ld = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned ncols = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned nlines = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned ncols = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned nlines = STARPU_GET_MATRIX_NY(descr[0]);
 
 	unsigned line, col;
 	for (line = 0; line < nlines; line+=FACTOR)
@@ -169,39 +169,39 @@ int main(int argc, char **argv)
 	for (frame = 0; frame < nframes; frame++)
 	{
 		/* register Y layer */
-		starpu_register_blas_data(&frame_y_handle[frame], 0,
+		starpu_register_matrix_data(&frame_y_handle[frame], 0,
 			(uintptr_t)&yuv_in_buffer[frame].y,
 			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
 
 		starpu_partition_data(frame_y_handle[frame], &filter_y);
 
-		starpu_register_blas_data(&new_frame_y_handle[frame], 0,
+		starpu_register_matrix_data(&new_frame_y_handle[frame], 0,
 			(uintptr_t)&yuv_out_buffer[frame].y,
 			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
 
 		starpu_partition_data(new_frame_y_handle[frame], &filter_y);
 
 		/* register U layer */
-		starpu_register_blas_data(&frame_u_handle[frame], 0,
+		starpu_register_matrix_data(&frame_u_handle[frame], 0,
 			(uintptr_t)&yuv_in_buffer[frame].u,
 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
 
 		starpu_partition_data(frame_u_handle[frame], &filter_u);
 
-		starpu_register_blas_data(&new_frame_u_handle[frame], 0,
+		starpu_register_matrix_data(&new_frame_u_handle[frame], 0,
 			(uintptr_t)&yuv_out_buffer[frame].u,
 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
 
 		starpu_partition_data(new_frame_u_handle[frame], &filter_u);
 
 		/* register V layer */
-		starpu_register_blas_data(&frame_v_handle[frame], 0,
+		starpu_register_matrix_data(&frame_v_handle[frame], 0,
 			(uintptr_t)&yuv_in_buffer[frame].v,
 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
 
 		starpu_partition_data(frame_v_handle[frame], &filter_v);
 
-		starpu_register_blas_data(&new_frame_v_handle[frame], 0,
+		starpu_register_matrix_data(&new_frame_v_handle[frame], 0,
 			(uintptr_t)&yuv_out_buffer[frame].v,
 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
 

+ 4 - 4
examples/spmv/dw_block_spmv_kernels.c

@@ -23,14 +23,14 @@
 static inline void common_block_spmv(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
-	float *block 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
+	float *block 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
 	float *in 	= (float *)STARPU_GET_VECTOR_PTR(descr[1]);
 	float *out 	= (float *)STARPU_GET_VECTOR_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	switch (s) {
 		case 0:

+ 3 - 3
examples/strassen/strassen.c

@@ -23,15 +23,15 @@ static starpu_data_handle create_tmp_matrix(starpu_data_handle M)
 	starpu_data_handle state = malloc(sizeof(starpu_data_handle));
 
 	/* create a matrix with the same dimensions as M */
-	uint32_t nx = starpu_get_blas_nx(M);
-	uint32_t ny = starpu_get_blas_nx(M);
+	uint32_t nx = starpu_get_matrix_nx(M);
+	uint32_t ny = starpu_get_matrix_nx(M);
 
 	STARPU_ASSERT(state);
 
 	data = malloc(nx*ny*sizeof(float));
 	STARPU_ASSERT(data);
 
-	starpu_register_blas_data(&state, 0, (uintptr_t)data, nx, nx, ny, sizeof(float));
+	starpu_register_matrix_data(&state, 0, (uintptr_t)data, nx, nx, ny, sizeof(float));
 	
 	return state;
 }

+ 23 - 23
examples/strassen/strassen_kernels.c

@@ -19,17 +19,17 @@
 
 static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
-	unsigned dz = STARPU_GET_BLAS_NX(descr[1]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
+	unsigned dz = STARPU_GET_MATRIX_NX(descr[1]);
 
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[0]);
 
 	switch (s) {
 		case 0:
@@ -67,16 +67,16 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 {
 	/* C = A op B */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *B 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *B 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldB = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldB = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	// TODO check dim ...
 
@@ -139,14 +139,14 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	// TODO check dim ...
 	

+ 6 - 6
examples/strassen/strassen_models.c

@@ -41,7 +41,7 @@ static double self_add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (n*n)/10.0f/4.0f/7.75f;
 
@@ -56,7 +56,7 @@ static double cuda_self_add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (n*n)/10.0f/4.0f;
 
@@ -71,7 +71,7 @@ static double add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (1.45f*n*n)/10.0f/2.0f;
 
@@ -86,7 +86,7 @@ static double cuda_add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (1.45f*n*n)/10.0f/2.0f;
 
@@ -102,7 +102,7 @@ static double mult_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f/4.11f/0.2588);
 
@@ -117,7 +117,7 @@ static double cuda_mult_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f/4.11f);
 

+ 3 - 3
examples/strassen/test_strassen.c

@@ -156,11 +156,11 @@ void init_problem(void)
 		}
 	}
 
-	starpu_register_blas_data(&A_state, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_state, 0, (uintptr_t)A, 
 		dim, dim, dim, sizeof(float));
-	starpu_register_blas_data(&B_state, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_state, 0, (uintptr_t)B, 
 		dim, dim, dim, sizeof(float));
-	starpu_register_blas_data(&C_state, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_state, 0, (uintptr_t)C, 
 		dim, dim, dim, sizeof(float));
 
 	gettimeofday(&start, NULL);

+ 5 - 5
examples/strassen2/strassen2.c

@@ -190,7 +190,7 @@ static starpu_data_handle allocate_tmp_matrix(unsigned size, unsigned reclevel)
 
 	buffer = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
 
-	starpu_register_blas_data(data, 0, (uintptr_t)buffer, size, size, size, sizeof(float));
+	starpu_register_matrix_data(data, 0, (uintptr_t)buffer, size, size, size, sizeof(float));
 
 	/* we construct a starpu_filter tree of depth reclevel */
 	unsigned rec;
@@ -417,7 +417,7 @@ void strassen_mult(struct strassen_iter *iter)
         starpu_data_handle C21 = starpu_get_sub_data(iter->C, 2, 0, 1);
         starpu_data_handle C22 = starpu_get_sub_data(iter->C, 2, 1, 1);
 
-	unsigned size = starpu_get_blas_nx(A11);
+	unsigned size = starpu_get_matrix_nx(A11);
 
 	/* M1a = (A11 + A22) */
 	iter->Mia_data[0] = allocate_tmp_matrix(size, iter->reclevel);
@@ -804,9 +804,9 @@ int main(int argc, char **argv)
 	B = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
 	C = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
 
-	starpu_register_blas_data(&data_A, 0, (uintptr_t)A, size, size, size, sizeof(float));
-	starpu_register_blas_data(&data_B, 0, (uintptr_t)B, size, size, size, sizeof(float));
-	starpu_register_blas_data(&data_C, 0, (uintptr_t)C, size, size, size, sizeof(float));
+	starpu_register_matrix_data(&data_A, 0, (uintptr_t)A, size, size, size, sizeof(float));
+	starpu_register_matrix_data(&data_B, 0, (uintptr_t)B, size, size, size, sizeof(float));
+	starpu_register_matrix_data(&data_C, 0, (uintptr_t)C, size, size, size, sizeof(float));
 
 	unsigned rec;
 	for (rec = 0; rec < reclevel; rec++)

+ 19 - 19
examples/strassen2/strassen2_kernels.c

@@ -53,15 +53,15 @@ void display_perf(double timing, unsigned size)
 
 static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned n = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned n = STARPU_GET_MATRIX_NX(descr[0]);
 
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[0]);
 
 	double flop = 2.0*n*n*n;
 
@@ -106,15 +106,15 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 {
 	/* C = A op B */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *B 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *B 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned n = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned n = STARPU_GET_MATRIX_NX(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldB = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldB = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	double flop = 2.0*n*n;
 
@@ -188,13 +188,13 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned n = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned n = STARPU_GET_MATRIX_NX(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	double flop = 1.0*n*n;
 

+ 14 - 14
include/starpu-data-interfaces.h

@@ -26,30 +26,30 @@ extern "C" {
 
 void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node);
 
-/* BLAS interface for dense matrices */
-typedef struct starpu_blas_interface_s {
+/* Matrix interface for dense matrices */
+typedef struct starpu_matrix_interface_s {
 	uintptr_t ptr;
 	uint32_t nx;
 	uint32_t ny;
 	uint32_t ld;
 	size_t elemsize;
-} starpu_blas_interface_t;
+} starpu_matrix_interface_t;
 
-void starpu_register_blas_data(starpu_data_handle *handle, uint32_t home_node,
+void starpu_register_matrix_data(starpu_data_handle *handle, uint32_t home_node,
                         uintptr_t ptr, uint32_t ld, uint32_t nx,
                         uint32_t ny, size_t elemsize);
-uint32_t starpu_get_blas_nx(starpu_data_handle handle);
-uint32_t starpu_get_blas_ny(starpu_data_handle handle);
-uint32_t starpu_get_blas_local_ld(starpu_data_handle handle);
-uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle);
-size_t starpu_get_blas_elemsize(starpu_data_handle handle);
+uint32_t starpu_get_matrix_nx(starpu_data_handle handle);
+uint32_t starpu_get_matrix_ny(starpu_data_handle handle);
+uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle);
+uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle);
+size_t starpu_get_matrix_elemsize(starpu_data_handle handle);
 
 /* helper methods */
-#define STARPU_GET_BLAS_PTR(interface)	(((starpu_blas_interface_t *)(interface))->ptr)
-#define STARPU_GET_BLAS_NX(interface)	(((starpu_blas_interface_t *)(interface))->nx)
-#define STARPU_GET_BLAS_NY(interface)	(((starpu_blas_interface_t *)(interface))->ny)
-#define STARPU_GET_BLAS_LD(interface)	(((starpu_blas_interface_t *)(interface))->ld)
-#define STARPU_GET_BLAS_ELEMSIZE(interface)	(((starpu_blas_interface_t *)(interface))->elemsize)
+#define STARPU_GET_MATRIX_PTR(interface)	(((starpu_matrix_interface_t *)(interface))->ptr)
+#define STARPU_GET_MATRIX_NX(interface)	(((starpu_matrix_interface_t *)(interface))->nx)
+#define STARPU_GET_MATRIX_NY(interface)	(((starpu_matrix_interface_t *)(interface))->ny)
+#define STARPU_GET_MATRIX_LD(interface)	(((starpu_matrix_interface_t *)(interface))->ld)
+#define STARPU_GET_MATRIX_ELEMSIZE(interface)	(((starpu_matrix_interface_t *)(interface))->elemsize)
 
 
 /* BLOCK interface for 3D dense blocks */

+ 7 - 7
mpi/examples/mpi_lu/plu_example.c

@@ -236,7 +236,7 @@ static void init_matrix(int rank)
 				}
 
 				/* Register it to StarPU */
-				starpu_register_blas_data(handleptr, 0,
+				starpu_register_matrix_data(handleptr, 0,
 					(uintptr_t)*blockptr, size/nblocks,
 					size/nblocks, size/nblocks, sizeof(TYPE));
 			}
@@ -255,7 +255,7 @@ static void init_matrix(int rank)
 #ifdef SINGLE_TMP11
 	starpu_malloc_pinned_if_possible((void **)&tmp_11_block, blocksize);
 	allocated_memory_extra += blocksize;
-	starpu_register_blas_data(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
+	starpu_register_matrix_data(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
 			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 #else
 	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle));
@@ -270,7 +270,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_11_block[k]);
 
-			starpu_register_blas_data(&tmp_11_block_handles[k], 0,
+			starpu_register_matrix_data(&tmp_11_block_handles[k], 0,
 				(uintptr_t)tmp_11_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -305,7 +305,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[k]);
 
-			starpu_register_blas_data(&tmp_12_block_handles[k], 0,
+			starpu_register_matrix_data(&tmp_12_block_handles[k], 0,
 				(uintptr_t)tmp_12_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -316,7 +316,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[k]);
 
-			starpu_register_blas_data(&tmp_21_block_handles[k], 0,
+			starpu_register_matrix_data(&tmp_21_block_handles[k], 0,
 				(uintptr_t)tmp_21_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -328,7 +328,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[i][k]);
 	
-			starpu_register_blas_data(&tmp_12_block_handles[i][k], 0,
+			starpu_register_matrix_data(&tmp_12_block_handles[i][k], 0,
 				(uintptr_t)tmp_12_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -339,7 +339,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[i][k]);
 	
-			starpu_register_blas_data(&tmp_21_block_handles[i][k], 0,
+			starpu_register_matrix_data(&tmp_21_block_handles[i][k], 0,
 				(uintptr_t)tmp_21_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}

+ 24 - 24
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -27,17 +27,17 @@
 static inline void STARPU_PLU(common_u22)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
-	TYPE *right 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	TYPE *left 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
-	TYPE *center 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[2]);
+	TYPE *right 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	TYPE *left 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
+	TYPE *center 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
@@ -127,14 +127,14 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub12;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);	
-	sub12 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);	
+	sub12 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx12 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny12 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx12 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny12 = STARPU_GET_MATRIX_NY(descr[1]);
 
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
@@ -234,14 +234,14 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub21;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
@@ -342,10 +342,10 @@ static inline void STARPU_PLU(common_u11)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 

+ 2 - 2
mpi/starpu_mpi_datatype.c

@@ -32,8 +32,8 @@ static int handle_to_datatype_blas(starpu_data_handle data_handle, MPI_Datatype
 {
 	int ret;
 
-	unsigned nx = starpu_get_blas_nx(data_handle);
-	unsigned ny = starpu_get_blas_ny(data_handle);
+	unsigned nx = starpu_get_matrix_nx(data_handle);
+	unsigned ny = starpu_get_matrix_ny(data_handle);
 	unsigned ld = starpu_get_blas_local_ld(data_handle);
 	size_t elemsize = starpu_get_blas_elemsize(data_handle);
 

+ 2 - 2
src/Makefile.am

@@ -129,8 +129,8 @@ libstarpu_la_SOURCES = 						\
 	datawizard/interfaces/data_interface.c			\
 	datawizard/interfaces/bcsr_interface.c			\
 	datawizard/interfaces/csr_interface.c			\
-	datawizard/interfaces/blas_filters.c			\
-	datawizard/interfaces/blas_interface.c			\
+	datawizard/interfaces/matrix_filters.c			\
+	datawizard/interfaces/matrix_interface.c		\
 	datawizard/interfaces/block_interface.c			\
 	datawizard/interfaces/vector_interface.c		\
 	datawizard/interfaces/bcsr_filters.c			\

+ 2 - 2
src/datawizard/interfaces/bcsr_filters.c

@@ -38,7 +38,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 	nchunks = nnz;
 	
 	/* first allocate the children : it's a set of BLAS !*/
-	starpu_data_create_children(root_handle, nchunks, &_starpu_interface_blas_ops);
+	starpu_data_create_children(root_handle, nchunks, &_starpu_interface_matrix_ops);
 
 	/* actually create all the chunks */
 
@@ -54,7 +54,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local =
+			starpu_matrix_interface_t *local =
 				starpu_data_get_interface_on_node(sub_handle, node);
 
 			local->nx = c;

+ 1 - 1
src/datawizard/interfaces/data_interface.h

@@ -47,6 +47,6 @@ void _starpu_register_data_handle(starpu_data_handle *handleptr, uint32_t home_n
 				struct starpu_data_interface_ops_t *ops);
 
 /* Some data interfaces or filters use this interface internally */
-extern struct starpu_data_interface_ops_t _starpu_interface_blas_ops;
+extern struct starpu_data_interface_ops_t _starpu_interface_matrix_ops;
 
 #endif // __DATA_INTERFACE_H__

+ 10 - 10
src/datawizard/interfaces/blas_filters.c

@@ -26,18 +26,18 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 	unsigned nchunks;
 	uint32_t arg = f->filter_arg;
 
-	starpu_blas_interface_t *blas_root =
+	starpu_matrix_interface_t *matrix_root =
 		starpu_data_get_interface_on_node(root_handle, 0);
 
-	uint32_t nx = blas_root->nx;
-	uint32_t ny = blas_root->ny;
-	size_t elemsize = blas_root->elemsize;
+	uint32_t nx = matrix_root->nx;
+	uint32_t ny = matrix_root->ny;
+	size_t elemsize = matrix_root->elemsize;
 
 	/* we will have arg chunks */
 	nchunks = STARPU_MIN(nx, arg);
 
 	/* first allocate the children, they have the same interface type as
-	 * the root (blas) */
+	 * the root (matrix) */
 	starpu_data_create_children(root_handle, nchunks, root_handle->ops);
 
 	/* actually create all the chunks */
@@ -56,7 +56,7 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local = 
+			starpu_matrix_interface_t *local = 
 				starpu_data_get_interface_on_node(chunk_handle, node);
 
 			local->nx = child_nx;
@@ -64,7 +64,7 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 			local->elemsize = elemsize;
 
 			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
-				starpu_blas_interface_t *local_root =
+				starpu_matrix_interface_t *local_root =
 					starpu_data_get_interface_on_node(root_handle, node);
 
 				local->ptr = local_root->ptr + offset;
@@ -79,7 +79,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 	unsigned nchunks;
 	uint32_t arg = f->filter_arg;
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(root_handle, 0);
 
 	uint32_t nx = interface->nx;
@@ -107,7 +107,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local =
+			starpu_matrix_interface_t *local =
 				starpu_data_get_interface_on_node(chunk_handle, node);
 
 			local->nx = nx;
@@ -115,7 +115,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 			local->elemsize = elemsize;
 
 			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
-				starpu_blas_interface_t *local_root =
+				starpu_matrix_interface_t *local_root =
 					starpu_data_get_interface_on_node(root_handle, node);
 
 				size_t offset = 

+ 109 - 109
src/datawizard/interfaces/blas_interface.c

@@ -35,7 +35,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
 #endif
 
-static const struct starpu_copy_data_methods_s blas_copy_data_methods_s = {
+static const struct starpu_copy_data_methods_s matrix_copy_data_methods_s = {
 	.ram_to_ram = dummy_copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -51,40 +51,40 @@ static const struct starpu_copy_data_methods_s blas_copy_data_methods_s = {
 	.spu_to_spu = NULL
 };
 
-static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
-static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_blas_buffer_on_node(void *interface, uint32_t node);
-static size_t blas_interface_get_size(starpu_data_handle handle);
-static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle);
-static void display_blas_interface(starpu_data_handle handle, FILE *f);
+static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
+static size_t allocate_matrix_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
+static void liberate_matrix_buffer_on_node(void *interface, uint32_t node);
+static size_t matrix_interface_get_size(starpu_data_handle handle);
+static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle);
+static void display_matrix_interface(starpu_data_handle handle, FILE *f);
 #ifdef STARPU_USE_GORDON
-static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_matrix_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
 #endif
 
-struct starpu_data_interface_ops_t _starpu_interface_blas_ops = {
-	.register_data_handle = register_blas_handle,
-	.allocate_data_on_node = allocate_blas_buffer_on_node,
-	.liberate_data_on_node = liberate_blas_buffer_on_node,
-	.copy_methods = &blas_copy_data_methods_s,
-	.get_size = blas_interface_get_size,
-	.footprint = footprint_blas_interface_crc32,
+struct starpu_data_interface_ops_t _starpu_interface_matrix_ops = {
+	.register_data_handle = register_matrix_handle,
+	.allocate_data_on_node = allocate_matrix_buffer_on_node,
+	.liberate_data_on_node = liberate_matrix_buffer_on_node,
+	.copy_methods = &matrix_copy_data_methods_s,
+	.get_size = matrix_interface_get_size,
+	.footprint = footprint_matrix_interface_crc32,
 #ifdef STARPU_USE_GORDON
-	.convert_to_gordon = convert_blas_to_gordon,
+	.convert_to_gordon = convert_matrix_to_gordon,
 #endif
 	.interfaceid = STARPU_BLAS_INTERFACE_ID, 
-	.interface_size = sizeof(starpu_blas_interface_t),
-	.display = display_blas_interface
+	.interface_size = sizeof(starpu_matrix_interface_t),
+	.display = display_matrix_interface
 };
 
 #ifdef STARPU_USE_GORDON
-static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+static int convert_matrix_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
 {
 	size_t elemsize = GET_BLAS_ELEMSIZE(interface);
-	uint32_t nx = STARPU_GET_BLAS_NX(interface);
-	uint32_t ny = STARPU_GET_BLAS_NY(interface);
-	uint32_t ld = STARPU_GET_BLAS_LD(interface);
+	uint32_t nx = STARPU_GET_MATRIX_NX(interface);
+	uint32_t ny = STARPU_GET_MATRIX_NY(interface);
+	uint32_t ld = STARPU_GET_MATRIX_LD(interface);
 
-	*ptr = STARPU_GET_BLAS_PTR(interface);
+	*ptr = STARPU_GET_MATRIX_PTR(interface);
 
 	/* The gordon_stride_init function may use a contiguous buffer
  	 * in case nx = ld (in that case, (*ss).size = elemsize*nx*ny */
@@ -94,37 +94,37 @@ static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideS
 }
 #endif
 
-static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
+static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
 {
-	starpu_blas_interface_t *blas_interface = interface;
+	starpu_matrix_interface_t *matrix_interface = interface;
 
 	unsigned node;
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		starpu_blas_interface_t *local_interface =
+		starpu_matrix_interface_t *local_interface =
 			starpu_data_get_interface_on_node(handle, node);
 
 		if (node == home_node) {
-			local_interface->ptr = blas_interface->ptr;
-			local_interface->ld  = blas_interface->ld;
+			local_interface->ptr = matrix_interface->ptr;
+			local_interface->ld  = matrix_interface->ld;
 		}
 		else {
 			local_interface->ptr = 0;
 			local_interface->ld  = 0;
 		}
 
-		local_interface->nx = blas_interface->nx;
-		local_interface->ny = blas_interface->ny;
-		local_interface->elemsize = blas_interface->elemsize;
+		local_interface->nx = matrix_interface->nx;
+		local_interface->ny = matrix_interface->ny;
+		local_interface->elemsize = matrix_interface->elemsize;
 	}
 }
 
 /* declare a new data with the BLAS interface */
-void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node,
+void starpu_register_matrix_data(starpu_data_handle *handleptr, uint32_t home_node,
 			uintptr_t ptr, uint32_t ld, uint32_t nx,
 			uint32_t ny, size_t elemsize)
 {
-	starpu_blas_interface_t interface = {
+	starpu_matrix_interface_t interface = {
 		.ptr = ptr,
 		.ld = ld,
 		.nx = nx,
@@ -132,25 +132,25 @@ void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node
 		.elemsize = elemsize
 	};
 
-	_starpu_register_data_handle(handleptr, home_node, &interface, &_starpu_interface_blas_ops);
+	_starpu_register_data_handle(handleptr, home_node, &interface, &_starpu_interface_matrix_ops);
 }
 
-static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle)
+static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle)
 {
-	return _starpu_crc32_be(starpu_get_blas_nx(handle), starpu_get_blas_ny(handle));
+	return _starpu_crc32_be(starpu_get_matrix_nx(handle), starpu_get_matrix_ny(handle));
 }
 
-static void display_blas_interface(starpu_data_handle handle, FILE *f)
+static void display_matrix_interface(starpu_data_handle handle, FILE *f)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	fprintf(f, "%u\t%u\t", interface->nx, interface->ny);
 }
 
-static size_t blas_interface_get_size(starpu_data_handle handle)
+static size_t matrix_interface_get_size(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	size_t size;
@@ -160,51 +160,51 @@ static size_t blas_interface_get_size(starpu_data_handle handle)
 }
 
 /* offer an access to the data parameters */
-uint32_t starpu_get_blas_nx(starpu_data_handle handle)
+uint32_t starpu_get_matrix_nx(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	return interface->nx;
 }
 
-uint32_t starpu_get_blas_ny(starpu_data_handle handle)
+uint32_t starpu_get_matrix_ny(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	return interface->ny;
 }
 
-uint32_t starpu_get_blas_local_ld(starpu_data_handle handle)
+uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle)
 {
 	unsigned node;
 	node = _starpu_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, node);
 
 	return interface->ld;
 }
 
-uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle)
+uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle)
 {
 	unsigned node;
 	node = _starpu_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, node);
 
 	return interface->ptr;
 }
 
-size_t starpu_get_blas_elemsize(starpu_data_handle handle)
+size_t starpu_get_matrix_elemsize(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	return interface->elemsize;
@@ -213,7 +213,7 @@ size_t starpu_get_blas_elemsize(starpu_data_handle handle)
 /* memory allocation/deallocation primitives for the BLAS interface */
 
 /* returns the size of the allocated area */
-static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
+static size_t allocate_matrix_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
 {
 	uintptr_t addr = 0;
 	unsigned fail = 0;
@@ -224,7 +224,7 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 	size_t pitch;
 #endif
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, dst_node);
 
 	uint32_t nx = interface->nx;
@@ -275,9 +275,9 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 	return allocated_memory;
 }
 
-static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
+static void liberate_matrix_buffer_on_node(void *interface, uint32_t node)
 {
-	starpu_blas_interface_t *blas_interface = interface;
+	starpu_matrix_interface_t *matrix_interface = interface;
 
 #ifdef STARPU_USE_CUDA
 	cudaError_t status;
@@ -286,11 +286,11 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
 		case STARPU_RAM:
-			free((void*)blas_interface->ptr);
+			free((void*)matrix_interface->ptr);
 			break;
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
-			status = cudaFree((void*)blas_interface->ptr);			
+			status = cudaFree((void*)matrix_interface->ptr);			
 			if (STARPU_UNLIKELY(status))
 				STARPU_CUDA_REPORT_ERROR(status);
 
@@ -304,40 +304,40 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 #ifdef STARPU_USE_CUDA
 static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;
-	cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, src_blas->ld*elemsize,
-			src_blas->nx*elemsize, src_blas->ny, cudaMemcpyDeviceToHost);
+	cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+			src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyDeviceToHost);
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return 0;
 }
 
 static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;
-	cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, src_blas->ld*elemsize,
-			src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
+	cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+			src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyHostToDevice);
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 		
@@ -345,31 +345,31 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 		
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return 0;
 }
 
 static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;	
-	cures = cudaMemcpy2DAsync((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, (size_t)src_blas->ld*elemsize,
-			(size_t)src_blas->nx*elemsize, src_blas->ny,
+	cures = cudaMemcpy2DAsync((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, (size_t)src_matrix->ld*elemsize,
+			(size_t)src_matrix->nx*elemsize, src_matrix->ny,
 			cudaMemcpyDeviceToHost, *stream);
 	if (cures)
 	{
-		cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, (size_t)src_blas->ld*elemsize,
-			(size_t)src_blas->nx*elemsize, (size_t)src_blas->ny,
+		cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, (size_t)src_matrix->ld*elemsize,
+			(size_t)src_matrix->nx*elemsize, (size_t)src_matrix->ny,
 			cudaMemcpyDeviceToHost);
 
 		if (STARPU_UNLIKELY(cures))
@@ -383,31 +383,31 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 		return 0;
 	}
 
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return EAGAIN;
 }
 
 static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;
-	cures = cudaMemcpy2DAsync((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-				(char *)src_blas->ptr, src_blas->ld*elemsize,
-				src_blas->nx*elemsize, src_blas->ny,
+	cures = cudaMemcpy2DAsync((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+				(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+				src_matrix->nx*elemsize, src_matrix->ny,
 				cudaMemcpyHostToDevice, *stream);
 	if (cures)
 	{
-		cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-				(char *)src_blas->ptr, src_blas->ld*elemsize,
-				src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
+		cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+				(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+				src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyHostToDevice);
 		cudaThreadSynchronize();
 
 		if (STARPU_UNLIKELY(cures))
@@ -416,7 +416,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 		return 0;
 	}
 
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return EAGAIN;
 }
@@ -426,22 +426,22 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 /* as not all platform easily have a BLAS lib installed ... */
 static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
 	unsigned y;
-	uint32_t nx = dst_blas->nx;
-	uint32_t ny = dst_blas->ny;
-	size_t elemsize = dst_blas->elemsize;
+	uint32_t nx = dst_matrix->nx;
+	uint32_t ny = dst_matrix->ny;
+	size_t elemsize = dst_matrix->elemsize;
 
-	uint32_t ld_src = src_blas->ld;
-	uint32_t ld_dst = dst_blas->ld;
+	uint32_t ld_src = src_matrix->ld;
+	uint32_t ld_dst = dst_matrix->ld;
 
-	uintptr_t ptr_src = src_blas->ptr;
-	uintptr_t ptr_dst = dst_blas->ptr;
+	uintptr_t ptr_src = src_matrix->ptr;
+	uintptr_t ptr_dst = dst_matrix->ptr;
 
 
 	for (y = 0; y < ny; y++)