瀏覽代碼

Rename the "blas" interface into "matrix" which is much clearer.

Cédric Augonnet 15 年之前
父節點
當前提交
da923646ea
共有 38 個文件被更改,包括 404 次插入404 次删除
  1. 13 13
      examples/basic-examples/mult.c
  2. 2 2
      examples/cholesky/dw_cholesky.c
  3. 1 1
      examples/cholesky/dw_cholesky_grain.c
  4. 18 18
      examples/cholesky/dw_cholesky_kernels.c
  5. 6 6
      examples/cholesky/dw_cholesky_models.c
  6. 1 1
      examples/cholesky/dw_cholesky_no_stride.c
  7. 3 3
      examples/common/blas_model.c
  8. 3 3
      examples/heat/dw_factolu.c
  9. 1 1
      examples/heat/dw_factolu_grain.c
  10. 24 24
      examples/heat/dw_factolu_kernels.c
  11. 2 2
      examples/heat/dw_factolu_tag.c
  12. 18 18
      examples/heat/lu_kernels_model.c
  13. 2 2
      examples/lu/xlu.c
  14. 30 30
      examples/lu/xlu_kernels.c
  15. 4 4
      examples/lu/xlu_pivot.c
  16. 3 3
      examples/mult/dw_mult.c
  17. 3 3
      examples/mult/dw_mult_no_stride.c
  18. 3 3
      examples/mult/dw_mult_no_stride_no_tag.c
  19. 9 9
      examples/mult/sgemm_kernels.c
  20. 3 3
      examples/mult/xgemm.c
  21. 9 9
      examples/mult/xgemm_kernels.c
  22. 12 12
      examples/ppm-downscaler/yuv-downscaler.c
  23. 4 4
      examples/spmv/dw_block_spmv_kernels.c
  24. 3 3
      examples/strassen/strassen.c
  25. 23 23
      examples/strassen/strassen_kernels.c
  26. 6 6
      examples/strassen/strassen_models.c
  27. 3 3
      examples/strassen/test_strassen.c
  28. 5 5
      examples/strassen2/strassen2.c
  29. 19 19
      examples/strassen2/strassen2_kernels.c
  30. 14 14
      include/starpu-data-interfaces.h
  31. 7 7
      mpi/examples/mpi_lu/plu_example.c
  32. 24 24
      mpi/examples/mpi_lu/pxlu_kernels.c
  33. 2 2
      mpi/starpu_mpi_datatype.c
  34. 2 2
      src/Makefile.am
  35. 2 2
      src/datawizard/interfaces/bcsr_filters.c
  36. 1 1
      src/datawizard/interfaces/data_interface.h
  37. 10 10
      src/datawizard/interfaces/blas_filters.c
  38. 109 109
      src/datawizard/interfaces/blas_interface.c

+ 13 - 13
examples/basic-examples/mult.c

@@ -18,7 +18,7 @@
  * This example shows a simple implementation of a blocked matrix
  * multiplication. Note that this is NOT intended to be an efficient
  * implementation of sgemm! In this example, we show:
- *  - how to declare dense matrices (starpu_register_blas_data)
+ *  - how to declare dense matrices (starpu_register_matrix_data)
  *  - how to manipulate matrices within codelets (eg. descr[0].blas.ld)
  *  - how to use filters to partition the matrices into blocks
  *    (starpu_partition_data and starpu_map_filters)
@@ -116,9 +116,9 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 	uint32_t ldA, ldB, ldC;
 
 	/* .blas.ptr gives a pointer to the first element of the local copy */
-	subA = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	subB = (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	subC = (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	subA = (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	subB = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	subC = (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
 	/* .blas.nx is the number of rows (consecutive elements) and .blas.ny
 	 * is the number of lines that are separated by .blas.ld elements (ld
@@ -126,13 +126,13 @@ static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
 	 * NB: in case some filters were used, the leading dimension is not
 	 * guaranteed to be the same in main memory (on the original matrix)
 	 * and on the accelerator! */
-	nxC = STARPU_GET_BLAS_NX(descr[2]);
-	nyC = STARPU_GET_BLAS_NY(descr[2]);
-	nyA = STARPU_GET_BLAS_NY(descr[0]);
+	nxC = STARPU_GET_MATRIX_NX(descr[2]);
+	nyC = STARPU_GET_MATRIX_NY(descr[2]);
+	nyA = STARPU_GET_MATRIX_NY(descr[0]);
 
-	ldA = STARPU_GET_BLAS_LD(descr[0]);
-	ldB = STARPU_GET_BLAS_LD(descr[1]);
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
+	ldA = STARPU_GET_MATRIX_LD(descr[0]);
+	ldB = STARPU_GET_MATRIX_LD(descr[1]);
+	ldC = STARPU_GET_MATRIX_LD(descr[2]);
 
 	/* we assume a FORTRAN-ordering! */
 	unsigned i,j,k;
@@ -199,11 +199,11 @@ static void partition_mult_data(void)
 	 * node in which resides the matrix: 0 means that the 3rd argument is
 	 * an adress in main memory.
 	 */
-	starpu_register_blas_data(&A_handle, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(float));
-	starpu_register_blas_data(&B_handle, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(float));
-	starpu_register_blas_data(&C_handle, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(float));
 
 	/* A filter is a method to partition a data into disjoint chunks, it is

+ 2 - 2
examples/cholesky/dw_cholesky.c

@@ -211,7 +211,7 @@ static void _dw_cholesky(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 
 	double flop = (1.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
@@ -240,7 +240,7 @@ void dw_cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks)
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 1 - 1
examples/cholesky/dw_cholesky_grain.c

@@ -168,7 +168,7 @@ static void _dw_cholesky_grain(float *matA, unsigned size, unsigned ld, unsigned
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 18 - 18
examples/cholesky/dw_cholesky_kernels.c

@@ -30,17 +30,17 @@
 static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus st;
@@ -91,14 +91,14 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, __attrib
 	float *sub11;
 	float *sub21;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NY(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NX(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NY(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NX(descr[1]);
 
 	switch (s) {
 		case 0:
@@ -137,10 +137,10 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 //	printf("11\n");
 	float *sub11;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned nx = STARPU_GET_BLAS_NY(descr[0]);
-	unsigned ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned nx = STARPU_GET_MATRIX_NY(descr[0]);
+	unsigned ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned z;
 

+ 6 - 6
examples/cholesky/dw_cholesky_models.c

@@ -38,7 +38,7 @@ static double cpu_chol_task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176);
 
@@ -53,7 +53,7 @@ static double cuda_chol_task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883);
 
@@ -68,7 +68,7 @@ static double cpu_chol_task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965);
 
@@ -83,7 +83,7 @@ static double cuda_chol_task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520);
 
@@ -98,7 +98,7 @@ static double cpu_chol_task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760);
 
@@ -113,7 +113,7 @@ static double cuda_chol_task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666);
 

+ 1 - 1
examples/cholesky/dw_cholesky_no_stride.c

@@ -311,7 +311,7 @@ int main(int argc, char **argv)
 	for (x = 0; x < nblocks; x++)
 	{
 		if (x <= y) {
-			starpu_register_blas_data(&A_state[y][x], 0, (uintptr_t)A[y][x], 
+			starpu_register_matrix_data(&A_state[y][x], 0, (uintptr_t)A[y][x], 
 				BLOCKSIZE, BLOCKSIZE, BLOCKSIZE, sizeof(float));
 		}
 	}

+ 3 - 3
examples/common/blas_model.c

@@ -32,9 +32,9 @@ double gemm_cost(starpu_buffer_descr *descr)
 	uint32_t nxC, nyC, nxA;
 
 
-	nxC = starpu_get_blas_nx(descr[2].handle);
-	nyC = starpu_get_blas_ny(descr[2].handle);
-	nxA = starpu_get_blas_nx(descr[0].handle);
+	nxC = starpu_get_matrix_nx(descr[2].handle);
+	nyC = starpu_get_matrix_ny(descr[2].handle);
+	nxA = starpu_get_matrix_nx(descr[0].handle);
 
 //	printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA);
 

+ 3 - 3
examples/heat/dw_factolu.c

@@ -625,7 +625,7 @@ void dw_codelet_facto(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -682,7 +682,7 @@ void dw_codelet_facto_v2(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -727,7 +727,7 @@ void dw_factoLU(float *matA, unsigned size,
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, 
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, 
 			size, size, sizeof(float));
 
 	starpu_filter f;

+ 1 - 1
examples/heat/dw_factolu_grain.c

@@ -197,7 +197,7 @@ static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_si
 	 * (re)partition data
 	 */
 	starpu_data_handle dataA;
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	STARPU_ASSERT((size % blocksize) == 0);
 	STARPU_ASSERT((inner_size % blocksize) == 0);

+ 24 - 24
examples/heat/dw_factolu_kernels.c

@@ -104,17 +104,17 @@ void display_stat_heat(void)
 
 static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args)
 {
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -171,14 +171,14 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, __attribut
 	float *sub11;
 	float *sub12;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]);	
-	sub12 = (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]);	
+	sub12 = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx12 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny12 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx12 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny12 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -234,14 +234,14 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, __attribut
 	float *sub11;
 	float *sub21;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -309,10 +309,10 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, __attribut
 {
 	float *sub11;
 
-	sub11 = (float *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (float *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 

+ 2 - 2
examples/heat/dw_factolu_tag.c

@@ -254,7 +254,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -276,7 +276,7 @@ void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, u
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(float));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 18 - 18
examples/heat/lu_kernels_model.c

@@ -44,7 +44,7 @@ double task_11_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/537.5);
 
@@ -55,7 +55,7 @@ double task_12_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 //	double cost = ((n*n*n)/1744.695);
 	double cost = ((n*n*n)/3210.80);
@@ -69,7 +69,7 @@ double task_21_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 //	double cost = ((n*n*n)/1744.695);
 	double cost = ((n*n*n)/3691.53);
@@ -84,9 +84,9 @@ double task_22_cost(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_get_blas_nx(descr[2].handle);
-	ny = starpu_get_blas_ny(descr[2].handle);
-	nz = starpu_get_blas_ny(descr[0].handle);
+	nx = starpu_get_matrix_nx(descr[2].handle);
+	ny = starpu_get_matrix_ny(descr[2].handle);
+	nz = starpu_get_matrix_ny(descr[0].handle);
 
 	double cost = ((nx*ny*nz)/4110.0);
 
@@ -104,7 +104,7 @@ double task_11_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/1853.7806);
 
@@ -116,7 +116,7 @@ double task_12_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/42838.5718);
 
@@ -129,7 +129,7 @@ double task_21_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/49208.667);
 
@@ -143,9 +143,9 @@ double task_22_cost_cuda(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_get_blas_nx(descr[2].handle);
-	ny = starpu_get_blas_ny(descr[2].handle);
-	nz = starpu_get_blas_ny(descr[0].handle);
+	nx = starpu_get_matrix_nx(descr[2].handle);
+	ny = starpu_get_matrix_ny(descr[2].handle);
+	nz = starpu_get_matrix_ny(descr[0].handle);
 
 	double cost = ((nx*ny*nz)/57523.560);
 
@@ -163,7 +163,7 @@ double task_11_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/537.5);
 
@@ -175,7 +175,7 @@ double task_12_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/6668.224);
 
@@ -188,7 +188,7 @@ double task_21_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = ((n*n*n)/6793.8423);
 
@@ -202,9 +202,9 @@ double task_22_cost_cpu(starpu_buffer_descr *descr)
 {
 	uint32_t nx, ny, nz;
 
-	nx = starpu_get_blas_nx(descr[2].handle);
-	ny = starpu_get_blas_ny(descr[2].handle);
-	nz = starpu_get_blas_ny(descr[0].handle);
+	nx = starpu_get_matrix_nx(descr[2].handle);
+	ny = starpu_get_matrix_ny(descr[2].handle);
+	nz = starpu_get_matrix_ny(descr[0].handle);
 
 	double cost = ((nx*ny*nz)/4203.0175);
 

+ 2 - 2
examples/lu/xlu.c

@@ -302,7 +302,7 @@ static void dw_codelet_facto_v3(starpu_data_handle dataA, unsigned nblocks)
 	fprintf(stderr, "Computation took (in ms)\n");
 	printf("%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 }
@@ -313,7 +313,7 @@ void STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigne
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;

+ 30 - 30
examples/lu/xlu_kernels.c

@@ -24,17 +24,17 @@
 static inline void STARPU_LU(common_u22)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
-	TYPE *right 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	TYPE *left 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
-	TYPE *center 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[2]);
+	TYPE *right 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	TYPE *left 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
+	TYPE *center 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -91,14 +91,14 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub12;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);	
-	sub12 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);	
+	sub12 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx12 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny12 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx12 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny12 = STARPU_GET_MATRIX_NY(descr[1]);
 
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -153,14 +153,14 @@ static inline void STARPU_LU(common_u21)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub21;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef STARPU_USE_CUDA
 	cublasStatus status;
@@ -212,10 +212,10 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 
@@ -284,10 +284,10 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 
@@ -399,9 +399,9 @@ static inline void STARPU_LU(common_pivot)(void *descr[],
 {
 	TYPE *matrix;
 
-	matrix = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	matrix = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned row, rowaux;
 

+ 4 - 4
examples/lu/xlu_pivot.c

@@ -425,7 +425,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 
 	/* monitor and partition the A matrix into blocks :
 	 * one block is now determined by 2 unsigned (i,j) */
-	starpu_register_blas_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
+	starpu_register_matrix_data(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE));
 
 	starpu_filter f;
 		f.filter_func = starpu_vertical_block_filter_func;
@@ -465,7 +465,7 @@ void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size
 	fprintf(stderr, "Computation took (in ms)\n");
 	fprintf(stderr, "%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataA);
+	unsigned n = starpu_get_matrix_nx(dataA);
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 
@@ -490,7 +490,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 	for (bj = 0; bj < nblocks; bj++)
 	for (bi = 0; bi < nblocks; bi++)
 	{
-		starpu_register_blas_data(&dataAp[bi+nblocks*bj], 0,
+		starpu_register_matrix_data(&dataAp[bi+nblocks*bj], 0,
 			(uintptr_t)matA[bi+nblocks*bj], size/nblocks,
 			size/nblocks, size/nblocks, sizeof(TYPE));
 	}
@@ -514,7 +514,7 @@ void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, un
 	fprintf(stderr, "Computation took (in ms)\n");
 	fprintf(stderr, "%2.2f\n", timing/1000);
 
-	unsigned n = starpu_get_blas_nx(dataAp[0])*nblocks;
+	unsigned n = starpu_get_matrix_nx(dataAp[0])*nblocks;
 	double flop = (2.0f*n*n*n)/3.0f;
 	fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f));
 

+ 3 - 3
examples/mult/dw_mult.c

@@ -153,11 +153,11 @@ static void partition_mult_data(void)
 {
 	gettimeofday(&start, NULL);
 
-	starpu_register_blas_data(&A_handle, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(float));
-	starpu_register_blas_data(&B_handle, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(float));
-	starpu_register_blas_data(&C_handle, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(float));
 
 	starpu_data_set_wb_mask(C_handle, 1<<0);

+ 3 - 3
examples/mult/dw_mult_no_stride.c

@@ -155,7 +155,7 @@ static void init_problem_data(void)
 	{
 		for (z = 0; z < nslicesz; z++)
 		{
-			starpu_register_blas_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
+			starpu_register_matrix_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEZ, sizeof(float));
 		}
 	}
@@ -164,7 +164,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
+			starpu_register_matrix_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
 				BLOCKSIZEZ, BLOCKSIZEZ, BLOCKSIZEX, sizeof(float));
 		}
 	}
@@ -173,7 +173,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
+			starpu_register_matrix_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEX, sizeof(float));
 		}
 	}

+ 3 - 3
examples/mult/dw_mult_no_stride_no_tag.c

@@ -171,7 +171,7 @@ static void init_problem_data(void)
 	{
 		for (z = 0; z < nslicesz; z++)
 		{
-			starpu_register_blas_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
+			starpu_register_matrix_data(&A_state[y][z], 0, (uintptr_t)A[y][z], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEZ, sizeof(float));
 		}
 	}
@@ -180,7 +180,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
+			starpu_register_matrix_data(&B_state[z][x], 0, (uintptr_t)B[z][x], 
 				BLOCKSIZEZ, BLOCKSIZEZ, BLOCKSIZEX, sizeof(float));
 		}
 	}
@@ -189,7 +189,7 @@ static void init_problem_data(void)
 	{
 		for (x = 0; x < nslicesx; x++)
 		{
-			starpu_register_blas_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
+			starpu_register_matrix_data(&C_state[y][x], 0, (uintptr_t)C[y][x], 
 				BLOCKSIZEY, BLOCKSIZEY, BLOCKSIZEX, sizeof(float));
 		}
 	}

+ 9 - 9
examples/mult/sgemm_kernels.c

@@ -25,17 +25,17 @@
 	float *subB;			\
 	float *subC;			\
 					\
-	subA = (float *)STARPU_GET_BLAS_PTR(descr[0]);	\
-	subB = (float *)STARPU_GET_BLAS_PTR(descr[1]);	\
-	subC = (float *)STARPU_GET_BLAS_PTR(descr[2]);	\
+	subA = (float *)STARPU_GET_MATRIX_PTR(descr[0]);	\
+	subB = (float *)STARPU_GET_MATRIX_PTR(descr[1]);	\
+	subC = (float *)STARPU_GET_MATRIX_PTR(descr[2]);	\
 					\
-	nxC = STARPU_GET_BLAS_NX(descr[2]);		\
-	nyC = STARPU_GET_BLAS_NY(descr[2]);		\
-	nyA = STARPU_GET_BLAS_NY(descr[0]);		\
+	nxC = STARPU_GET_MATRIX_NX(descr[2]);		\
+	nyC = STARPU_GET_MATRIX_NY(descr[2]);		\
+	nyA = STARPU_GET_MATRIX_NY(descr[0]);		\
 					\
-	ldA = STARPU_GET_BLAS_LD(descr[0]);		\
-	ldB = STARPU_GET_BLAS_LD(descr[1]);		\
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
+	ldA = STARPU_GET_MATRIX_LD(descr[0]);		\
+	ldB = STARPU_GET_MATRIX_LD(descr[1]);		\
+	ldC = STARPU_GET_MATRIX_LD(descr[2]);
 
 
 

+ 3 - 3
examples/mult/xgemm.c

@@ -137,11 +137,11 @@ static void init_problem_data(void)
 
 static void partition_mult_data(void)
 {
-	starpu_register_blas_data(&A_handle, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_handle, 0, (uintptr_t)A, 
 		ydim, ydim, zdim, sizeof(TYPE));
-	starpu_register_blas_data(&B_handle, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_handle, 0, (uintptr_t)B, 
 		zdim, zdim, xdim, sizeof(TYPE));
-	starpu_register_blas_data(&C_handle, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_handle, 0, (uintptr_t)C, 
 		ydim, ydim, xdim, sizeof(TYPE));
 
 	starpu_data_set_wb_mask(C_handle, 1<<0);

+ 9 - 9
examples/mult/xgemm_kernels.c

@@ -25,17 +25,17 @@
 	TYPE *subB;			\
 	TYPE *subC;			\
 					\
-	subA = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);	\
-	subB = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);	\
-	subC = (TYPE *)STARPU_GET_BLAS_PTR(descr[2]);	\
+	subA = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);	\
+	subB = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);	\
+	subC = (TYPE *)STARPU_GET_MATRIX_PTR(descr[2]);	\
 					\
-	nxC = STARPU_GET_BLAS_NX(descr[2]);		\
-	nyC = STARPU_GET_BLAS_NY(descr[2]);		\
-	nyA = STARPU_GET_BLAS_NY(descr[0]);		\
+	nxC = STARPU_GET_MATRIX_NX(descr[2]);		\
+	nyC = STARPU_GET_MATRIX_NY(descr[2]);		\
+	nyA = STARPU_GET_MATRIX_NY(descr[0]);		\
 					\
-	ldA = STARPU_GET_BLAS_LD(descr[0]);		\
-	ldB = STARPU_GET_BLAS_LD(descr[1]);		\
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
+	ldA = STARPU_GET_MATRIX_LD(descr[0]);		\
+	ldB = STARPU_GET_MATRIX_LD(descr[1]);		\
+	ldC = STARPU_GET_MATRIX_LD(descr[2]);
 
 
 

+ 12 - 12
examples/ppm-downscaler/yuv-downscaler.c

@@ -68,14 +68,14 @@ static void ds_callback(void *arg)
 
 static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 {
-	uint8_t *input = (uint8_t *)STARPU_GET_BLAS_PTR(descr[0]);
-	unsigned input_ld = STARPU_GET_BLAS_LD(descr[0]);
+	uint8_t *input = (uint8_t *)STARPU_GET_MATRIX_PTR(descr[0]);
+	unsigned input_ld = STARPU_GET_MATRIX_LD(descr[0]);
 
-	uint8_t *output = (uint8_t *)STARPU_GET_BLAS_PTR(descr[1]);
-	unsigned output_ld = STARPU_GET_BLAS_LD(descr[1]);
+	uint8_t *output = (uint8_t *)STARPU_GET_MATRIX_PTR(descr[1]);
+	unsigned output_ld = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned ncols = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned nlines = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned ncols = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned nlines = STARPU_GET_MATRIX_NY(descr[0]);
 
 	unsigned line, col;
 	for (line = 0; line < nlines; line+=FACTOR)
@@ -169,39 +169,39 @@ int main(int argc, char **argv)
 	for (frame = 0; frame < nframes; frame++)
 	{
 		/* register Y layer */
-		starpu_register_blas_data(&frame_y_handle[frame], 0,
+		starpu_register_matrix_data(&frame_y_handle[frame], 0,
 			(uintptr_t)&yuv_in_buffer[frame].y,
 			WIDTH, WIDTH, HEIGHT, sizeof(uint8_t));
 
 		starpu_partition_data(frame_y_handle[frame], &filter_y);
 
-		starpu_register_blas_data(&new_frame_y_handle[frame], 0,
+		starpu_register_matrix_data(&new_frame_y_handle[frame], 0,
 			(uintptr_t)&yuv_out_buffer[frame].y,
 			NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t));
 
 		starpu_partition_data(new_frame_y_handle[frame], &filter_y);
 
 		/* register U layer */
-		starpu_register_blas_data(&frame_u_handle[frame], 0,
+		starpu_register_matrix_data(&frame_u_handle[frame], 0,
 			(uintptr_t)&yuv_in_buffer[frame].u,
 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
 
 		starpu_partition_data(frame_u_handle[frame], &filter_u);
 
-		starpu_register_blas_data(&new_frame_u_handle[frame], 0,
+		starpu_register_matrix_data(&new_frame_u_handle[frame], 0,
 			(uintptr_t)&yuv_out_buffer[frame].u,
 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
 
 		starpu_partition_data(new_frame_u_handle[frame], &filter_u);
 
 		/* register V layer */
-		starpu_register_blas_data(&frame_v_handle[frame], 0,
+		starpu_register_matrix_data(&frame_v_handle[frame], 0,
 			(uintptr_t)&yuv_in_buffer[frame].v,
 			WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t));
 
 		starpu_partition_data(frame_v_handle[frame], &filter_v);
 
-		starpu_register_blas_data(&new_frame_v_handle[frame], 0,
+		starpu_register_matrix_data(&new_frame_v_handle[frame], 0,
 			(uintptr_t)&yuv_out_buffer[frame].v,
 			NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t));
 

+ 4 - 4
examples/spmv/dw_block_spmv_kernels.c

@@ -23,14 +23,14 @@
 static inline void common_block_spmv(void *descr[], int s, __attribute__((unused)) void *_args)
 {
 	//printf("22\n");
-	float *block 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
+	float *block 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
 	float *in 	= (float *)STARPU_GET_VECTOR_PTR(descr[1]);
 	float *out 	= (float *)STARPU_GET_VECTOR_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	switch (s) {
 		case 0:

+ 3 - 3
examples/strassen/strassen.c

@@ -23,15 +23,15 @@ static starpu_data_handle create_tmp_matrix(starpu_data_handle M)
 	starpu_data_handle state = malloc(sizeof(starpu_data_handle));
 
 	/* create a matrix with the same dimensions as M */
-	uint32_t nx = starpu_get_blas_nx(M);
-	uint32_t ny = starpu_get_blas_nx(M);
+	uint32_t nx = starpu_get_matrix_nx(M);
+	uint32_t ny = starpu_get_matrix_nx(M);
 
 	STARPU_ASSERT(state);
 
 	data = malloc(nx*ny*sizeof(float));
 	STARPU_ASSERT(data);
 
-	starpu_register_blas_data(&state, 0, (uintptr_t)data, nx, nx, ny, sizeof(float));
+	starpu_register_matrix_data(&state, 0, (uintptr_t)data, nx, nx, ny, sizeof(float));
 	
 	return state;
 }

+ 23 - 23
examples/strassen/strassen_kernels.c

@@ -19,17 +19,17 @@
 
 static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
-	unsigned dz = STARPU_GET_BLAS_NX(descr[1]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
+	unsigned dz = STARPU_GET_MATRIX_NX(descr[1]);
 
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[0]);
 
 	switch (s) {
 		case 0:
@@ -67,16 +67,16 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 {
 	/* C = A op B */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *B 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *B 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldB = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldB = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	// TODO check dim ...
 
@@ -139,14 +139,14 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	// TODO check dim ...
 	

+ 6 - 6
examples/strassen/strassen_models.c

@@ -41,7 +41,7 @@ static double self_add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (n*n)/10.0f/4.0f/7.75f;
 
@@ -56,7 +56,7 @@ static double cuda_self_add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (n*n)/10.0f/4.0f;
 
@@ -71,7 +71,7 @@ static double add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (1.45f*n*n)/10.0f/2.0f;
 
@@ -86,7 +86,7 @@ static double cuda_add_sub_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (1.45f*n*n)/10.0f/2.0f;
 
@@ -102,7 +102,7 @@ static double mult_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f/4.11f/0.2588);
 
@@ -117,7 +117,7 @@ static double cuda_mult_cost(starpu_buffer_descr *descr)
 {
 	uint32_t n;
 
-	n = starpu_get_blas_nx(descr[0].handle);
+	n = starpu_get_matrix_nx(descr[0].handle);
 
 	double cost = (((double)(n)*n*n)/1000.0f/4.11f);
 

+ 3 - 3
examples/strassen/test_strassen.c

@@ -156,11 +156,11 @@ void init_problem(void)
 		}
 	}
 
-	starpu_register_blas_data(&A_state, 0, (uintptr_t)A, 
+	starpu_register_matrix_data(&A_state, 0, (uintptr_t)A, 
 		dim, dim, dim, sizeof(float));
-	starpu_register_blas_data(&B_state, 0, (uintptr_t)B, 
+	starpu_register_matrix_data(&B_state, 0, (uintptr_t)B, 
 		dim, dim, dim, sizeof(float));
-	starpu_register_blas_data(&C_state, 0, (uintptr_t)C, 
+	starpu_register_matrix_data(&C_state, 0, (uintptr_t)C, 
 		dim, dim, dim, sizeof(float));
 
 	gettimeofday(&start, NULL);

+ 5 - 5
examples/strassen2/strassen2.c

@@ -190,7 +190,7 @@ static starpu_data_handle allocate_tmp_matrix(unsigned size, unsigned reclevel)
 
 	buffer = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
 
-	starpu_register_blas_data(data, 0, (uintptr_t)buffer, size, size, size, sizeof(float));
+	starpu_register_matrix_data(data, 0, (uintptr_t)buffer, size, size, size, sizeof(float));
 
 	/* we construct a starpu_filter tree of depth reclevel */
 	unsigned rec;
@@ -417,7 +417,7 @@ void strassen_mult(struct strassen_iter *iter)
         starpu_data_handle C21 = starpu_get_sub_data(iter->C, 2, 0, 1);
         starpu_data_handle C22 = starpu_get_sub_data(iter->C, 2, 1, 1);
 
-	unsigned size = starpu_get_blas_nx(A11);
+	unsigned size = starpu_get_matrix_nx(A11);
 
 	/* M1a = (A11 + A22) */
 	iter->Mia_data[0] = allocate_tmp_matrix(size, iter->reclevel);
@@ -804,9 +804,9 @@ int main(int argc, char **argv)
 	B = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
 	C = allocate_tmp_matrix_wrapper(size*size*sizeof(float));
 
-	starpu_register_blas_data(&data_A, 0, (uintptr_t)A, size, size, size, sizeof(float));
-	starpu_register_blas_data(&data_B, 0, (uintptr_t)B, size, size, size, sizeof(float));
-	starpu_register_blas_data(&data_C, 0, (uintptr_t)C, size, size, size, sizeof(float));
+	starpu_register_matrix_data(&data_A, 0, (uintptr_t)A, size, size, size, sizeof(float));
+	starpu_register_matrix_data(&data_B, 0, (uintptr_t)B, size, size, size, sizeof(float));
+	starpu_register_matrix_data(&data_C, 0, (uintptr_t)C, size, size, size, sizeof(float));
 
 	unsigned rec;
 	for (rec = 0; rec < reclevel; rec++)

+ 19 - 19
examples/strassen2/strassen2_kernels.c

@@ -53,15 +53,15 @@ void display_perf(double timing, unsigned size)
 
 static void mult_common_codelet(void *descr[], int s, __attribute__((unused))  void *arg)
 {
-	float *center 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *left 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *right 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *center 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *left 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *right 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned n = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned n = STARPU_GET_MATRIX_NX(descr[0]);
 
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[0]);
 
 	double flop = 2.0*n*n*n;
 
@@ -106,15 +106,15 @@ static void add_sub_common_codelet(void *descr[], int s, __attribute__((unused))
 {
 	/* C = A op B */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	float *B 	= (float *)STARPU_GET_BLAS_PTR(descr[2]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
+	float *B 	= (float *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned n = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned n = STARPU_GET_MATRIX_NX(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldB = STARPU_GET_BLAS_LD(descr[2]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldB = STARPU_GET_MATRIX_LD(descr[2]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	double flop = 2.0*n*n;
 
@@ -188,13 +188,13 @@ static void self_add_sub_common_codelet(void *descr[], int s, __attribute__((unu
 {
 	/* C +=/-= A */
 
-	float *C 	= (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	float *A 	= (float *)STARPU_GET_BLAS_PTR(descr[1]);
+	float *C 	= (float *)STARPU_GET_MATRIX_PTR(descr[0]);
+	float *A 	= (float *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned n = STARPU_GET_BLAS_NX(descr[0]);
+	unsigned n = STARPU_GET_MATRIX_NX(descr[0]);
 
-	unsigned ldA = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ldC = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned ldA = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ldC = STARPU_GET_MATRIX_LD(descr[0]);
 
 	double flop = 1.0*n*n;
 

+ 14 - 14
include/starpu-data-interfaces.h

@@ -26,30 +26,30 @@ extern "C" {
 
 void *starpu_data_get_interface_on_node(starpu_data_handle handle, unsigned memory_node);
 
-/* BLAS interface for dense matrices */
-typedef struct starpu_blas_interface_s {
+/* Matrix interface for dense matrices */
+typedef struct starpu_matrix_interface_s {
 	uintptr_t ptr;
 	uint32_t nx;
 	uint32_t ny;
 	uint32_t ld;
 	size_t elemsize;
-} starpu_blas_interface_t;
+} starpu_matrix_interface_t;
 
-void starpu_register_blas_data(starpu_data_handle *handle, uint32_t home_node,
+void starpu_register_matrix_data(starpu_data_handle *handle, uint32_t home_node,
                         uintptr_t ptr, uint32_t ld, uint32_t nx,
                         uint32_t ny, size_t elemsize);
-uint32_t starpu_get_blas_nx(starpu_data_handle handle);
-uint32_t starpu_get_blas_ny(starpu_data_handle handle);
-uint32_t starpu_get_blas_local_ld(starpu_data_handle handle);
-uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle);
-size_t starpu_get_blas_elemsize(starpu_data_handle handle);
+uint32_t starpu_get_matrix_nx(starpu_data_handle handle);
+uint32_t starpu_get_matrix_ny(starpu_data_handle handle);
+uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle);
+uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle);
+size_t starpu_get_matrix_elemsize(starpu_data_handle handle);
 
 /* helper methods */
-#define STARPU_GET_BLAS_PTR(interface)	(((starpu_blas_interface_t *)(interface))->ptr)
-#define STARPU_GET_BLAS_NX(interface)	(((starpu_blas_interface_t *)(interface))->nx)
-#define STARPU_GET_BLAS_NY(interface)	(((starpu_blas_interface_t *)(interface))->ny)
-#define STARPU_GET_BLAS_LD(interface)	(((starpu_blas_interface_t *)(interface))->ld)
-#define STARPU_GET_BLAS_ELEMSIZE(interface)	(((starpu_blas_interface_t *)(interface))->elemsize)
+#define STARPU_GET_MATRIX_PTR(interface)	(((starpu_matrix_interface_t *)(interface))->ptr)
+#define STARPU_GET_MATRIX_NX(interface)	(((starpu_matrix_interface_t *)(interface))->nx)
+#define STARPU_GET_MATRIX_NY(interface)	(((starpu_matrix_interface_t *)(interface))->ny)
+#define STARPU_GET_MATRIX_LD(interface)	(((starpu_matrix_interface_t *)(interface))->ld)
+#define STARPU_GET_MATRIX_ELEMSIZE(interface)	(((starpu_matrix_interface_t *)(interface))->elemsize)
 
 
 /* BLOCK interface for 3D dense blocks */

+ 7 - 7
mpi/examples/mpi_lu/plu_example.c

@@ -236,7 +236,7 @@ static void init_matrix(int rank)
 				}
 
 				/* Register it to StarPU */
-				starpu_register_blas_data(handleptr, 0,
+				starpu_register_matrix_data(handleptr, 0,
 					(uintptr_t)*blockptr, size/nblocks,
 					size/nblocks, size/nblocks, sizeof(TYPE));
 			}
@@ -255,7 +255,7 @@ static void init_matrix(int rank)
 #ifdef SINGLE_TMP11
 	starpu_malloc_pinned_if_possible((void **)&tmp_11_block, blocksize);
 	allocated_memory_extra += blocksize;
-	starpu_register_blas_data(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
+	starpu_register_matrix_data(&tmp_11_block_handle, 0, (uintptr_t)tmp_11_block,
 			size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 #else
 	tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle));
@@ -270,7 +270,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_11_block[k]);
 
-			starpu_register_blas_data(&tmp_11_block_handles[k], 0,
+			starpu_register_matrix_data(&tmp_11_block_handles[k], 0,
 				(uintptr_t)tmp_11_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -305,7 +305,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[k]);
 
-			starpu_register_blas_data(&tmp_12_block_handles[k], 0,
+			starpu_register_matrix_data(&tmp_12_block_handles[k], 0,
 				(uintptr_t)tmp_12_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -316,7 +316,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[k]);
 
-			starpu_register_blas_data(&tmp_21_block_handles[k], 0,
+			starpu_register_matrix_data(&tmp_21_block_handles[k], 0,
 				(uintptr_t)tmp_21_block[k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -328,7 +328,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_12_block[i][k]);
 	
-			starpu_register_blas_data(&tmp_12_block_handles[i][k], 0,
+			starpu_register_matrix_data(&tmp_12_block_handles[i][k], 0,
 				(uintptr_t)tmp_12_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}
@@ -339,7 +339,7 @@ static void init_matrix(int rank)
 			allocated_memory_extra += blocksize;
 			STARPU_ASSERT(tmp_21_block[i][k]);
 	
-			starpu_register_blas_data(&tmp_21_block_handles[i][k], 0,
+			starpu_register_matrix_data(&tmp_21_block_handles[i][k], 0,
 				(uintptr_t)tmp_21_block[i][k],
 				size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE));
 		}

+ 24 - 24
mpi/examples/mpi_lu/pxlu_kernels.c

@@ -27,17 +27,17 @@
 static inline void STARPU_PLU(common_u22)(void *descr[],
 				int s, __attribute__((unused)) void *_args)
 {
-	TYPE *right 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	TYPE *left 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
-	TYPE *center 	= (TYPE *)STARPU_GET_BLAS_PTR(descr[2]);
+	TYPE *right 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	TYPE *left 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
+	TYPE *center 	= (TYPE *)STARPU_GET_MATRIX_PTR(descr[2]);
 
-	unsigned dx = STARPU_GET_BLAS_NX(descr[2]);
-	unsigned dy = STARPU_GET_BLAS_NY(descr[2]);
-	unsigned dz = STARPU_GET_BLAS_NY(descr[0]);
+	unsigned dx = STARPU_GET_MATRIX_NX(descr[2]);
+	unsigned dy = STARPU_GET_MATRIX_NY(descr[2]);
+	unsigned dz = STARPU_GET_MATRIX_NY(descr[0]);
 
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
-	unsigned ld22 = STARPU_GET_BLAS_LD(descr[2]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
+	unsigned ld22 = STARPU_GET_MATRIX_LD(descr[2]);
 
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
@@ -127,14 +127,14 @@ static inline void STARPU_PLU(common_u12)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub12;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);	
-	sub12 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);	
+	sub12 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld12 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld12 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx12 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny12 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx12 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny12 = STARPU_GET_MATRIX_NY(descr[1]);
 
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
@@ -234,14 +234,14 @@ static inline void STARPU_PLU(common_u21)(void *descr[],
 	TYPE *sub11;
 	TYPE *sub21;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]);
-	sub21 = (TYPE *)STARPU_GET_BLAS_PTR(descr[1]);
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]);
+	sub21 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[1]);
 
-	unsigned ld11 = STARPU_GET_BLAS_LD(descr[0]);
-	unsigned ld21 = STARPU_GET_BLAS_LD(descr[1]);
+	unsigned ld11 = STARPU_GET_MATRIX_LD(descr[0]);
+	unsigned ld21 = STARPU_GET_MATRIX_LD(descr[1]);
 
-	unsigned nx21 = STARPU_GET_BLAS_NX(descr[1]);
-	unsigned ny21 = STARPU_GET_BLAS_NY(descr[1]);
+	unsigned nx21 = STARPU_GET_MATRIX_NX(descr[1]);
+	unsigned ny21 = STARPU_GET_MATRIX_NY(descr[1]);
 	
 #ifdef VERBOSE_KERNELS
 	struct debug_info *info = _args;
@@ -342,10 +342,10 @@ static inline void STARPU_PLU(common_u11)(void *descr[],
 {
 	TYPE *sub11;
 
-	sub11 = (TYPE *)STARPU_GET_BLAS_PTR(descr[0]); 
+	sub11 = (TYPE *)STARPU_GET_MATRIX_PTR(descr[0]); 
 
-	unsigned long nx = STARPU_GET_BLAS_NX(descr[0]);
-	unsigned long ld = STARPU_GET_BLAS_LD(descr[0]);
+	unsigned long nx = STARPU_GET_MATRIX_NX(descr[0]);
+	unsigned long ld = STARPU_GET_MATRIX_LD(descr[0]);
 
 	unsigned long z;
 

+ 2 - 2
mpi/starpu_mpi_datatype.c

@@ -32,8 +32,8 @@ static int handle_to_datatype_blas(starpu_data_handle data_handle, MPI_Datatype
 {
 	int ret;
 
-	unsigned nx = starpu_get_blas_nx(data_handle);
-	unsigned ny = starpu_get_blas_ny(data_handle);
+	unsigned nx = starpu_get_matrix_nx(data_handle);
+	unsigned ny = starpu_get_matrix_ny(data_handle);
 	unsigned ld = starpu_get_blas_local_ld(data_handle);
 	size_t elemsize = starpu_get_blas_elemsize(data_handle);
 

+ 2 - 2
src/Makefile.am

@@ -129,8 +129,8 @@ libstarpu_la_SOURCES = 						\
 	datawizard/interfaces/data_interface.c			\
 	datawizard/interfaces/bcsr_interface.c			\
 	datawizard/interfaces/csr_interface.c			\
-	datawizard/interfaces/blas_filters.c			\
-	datawizard/interfaces/blas_interface.c			\
+	datawizard/interfaces/matrix_filters.c			\
+	datawizard/interfaces/matrix_interface.c		\
 	datawizard/interfaces/block_interface.c			\
 	datawizard/interfaces/vector_interface.c		\
 	datawizard/interfaces/bcsr_filters.c			\

+ 2 - 2
src/datawizard/interfaces/bcsr_filters.c

@@ -38,7 +38,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 	nchunks = nnz;
 	
 	/* first allocate the children : it's a set of BLAS !*/
-	starpu_data_create_children(root_handle, nchunks, &_starpu_interface_blas_ops);
+	starpu_data_create_children(root_handle, nchunks, &_starpu_interface_matrix_ops);
 
 	/* actually create all the chunks */
 
@@ -54,7 +54,7 @@ void starpu_canonical_block_filter_bcsr(starpu_filter *f __attribute__((unused))
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local =
+			starpu_matrix_interface_t *local =
 				starpu_data_get_interface_on_node(sub_handle, node);
 
 			local->nx = c;

+ 1 - 1
src/datawizard/interfaces/data_interface.h

@@ -47,6 +47,6 @@ void _starpu_register_data_handle(starpu_data_handle *handleptr, uint32_t home_n
 				struct starpu_data_interface_ops_t *ops);
 
 /* Some data interfaces or filters use this interface internally */
-extern struct starpu_data_interface_ops_t _starpu_interface_blas_ops;
+extern struct starpu_data_interface_ops_t _starpu_interface_matrix_ops;
 
 #endif // __DATA_INTERFACE_H__

+ 10 - 10
src/datawizard/interfaces/blas_filters.c

@@ -26,18 +26,18 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 	unsigned nchunks;
 	uint32_t arg = f->filter_arg;
 
-	starpu_blas_interface_t *blas_root =
+	starpu_matrix_interface_t *matrix_root =
 		starpu_data_get_interface_on_node(root_handle, 0);
 
-	uint32_t nx = blas_root->nx;
-	uint32_t ny = blas_root->ny;
-	size_t elemsize = blas_root->elemsize;
+	uint32_t nx = matrix_root->nx;
+	uint32_t ny = matrix_root->ny;
+	size_t elemsize = matrix_root->elemsize;
 
 	/* we will have arg chunks */
 	nchunks = STARPU_MIN(nx, arg);
 
 	/* first allocate the children, they have the same interface type as
-	 * the root (blas) */
+	 * the root (matrix) */
 	starpu_data_create_children(root_handle, nchunks, root_handle->ops);
 
 	/* actually create all the chunks */
@@ -56,7 +56,7 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local = 
+			starpu_matrix_interface_t *local = 
 				starpu_data_get_interface_on_node(chunk_handle, node);
 
 			local->nx = child_nx;
@@ -64,7 +64,7 @@ void starpu_block_filter_func(starpu_filter *f, starpu_data_handle root_handle)
 			local->elemsize = elemsize;
 
 			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
-				starpu_blas_interface_t *local_root =
+				starpu_matrix_interface_t *local_root =
 					starpu_data_get_interface_on_node(root_handle, node);
 
 				local->ptr = local_root->ptr + offset;
@@ -79,7 +79,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 	unsigned nchunks;
 	uint32_t arg = f->filter_arg;
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(root_handle, 0);
 
 	uint32_t nx = interface->nx;
@@ -107,7 +107,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			starpu_blas_interface_t *local =
+			starpu_matrix_interface_t *local =
 				starpu_data_get_interface_on_node(chunk_handle, node);
 
 			local->nx = nx;
@@ -115,7 +115,7 @@ void starpu_vertical_block_filter_func(starpu_filter *f, starpu_data_handle root
 			local->elemsize = elemsize;
 
 			if (starpu_test_if_data_is_allocated_on_node(root_handle, node)) {
-				starpu_blas_interface_t *local_root =
+				starpu_matrix_interface_t *local_root =
 					starpu_data_get_interface_on_node(root_handle, node);
 
 				size_t offset = 

+ 109 - 109
src/datawizard/interfaces/blas_interface.c

@@ -35,7 +35,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream);
 #endif
 
-static const struct starpu_copy_data_methods_s blas_copy_data_methods_s = {
+static const struct starpu_copy_data_methods_s matrix_copy_data_methods_s = {
 	.ram_to_ram = dummy_copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -51,40 +51,40 @@ static const struct starpu_copy_data_methods_s blas_copy_data_methods_s = {
 	.spu_to_spu = NULL
 };
 
-static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
-static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
-static void liberate_blas_buffer_on_node(void *interface, uint32_t node);
-static size_t blas_interface_get_size(starpu_data_handle handle);
-static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle);
-static void display_blas_interface(starpu_data_handle handle, FILE *f);
+static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node, void *interface);
+static size_t allocate_matrix_buffer_on_node(starpu_data_handle handle, uint32_t dst_node);
+static void liberate_matrix_buffer_on_node(void *interface, uint32_t node);
+static size_t matrix_interface_get_size(starpu_data_handle handle);
+static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle);
+static void display_matrix_interface(starpu_data_handle handle, FILE *f);
 #ifdef STARPU_USE_GORDON
-static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_matrix_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss); 
 #endif
 
-struct starpu_data_interface_ops_t _starpu_interface_blas_ops = {
-	.register_data_handle = register_blas_handle,
-	.allocate_data_on_node = allocate_blas_buffer_on_node,
-	.liberate_data_on_node = liberate_blas_buffer_on_node,
-	.copy_methods = &blas_copy_data_methods_s,
-	.get_size = blas_interface_get_size,
-	.footprint = footprint_blas_interface_crc32,
+struct starpu_data_interface_ops_t _starpu_interface_matrix_ops = {
+	.register_data_handle = register_matrix_handle,
+	.allocate_data_on_node = allocate_matrix_buffer_on_node,
+	.liberate_data_on_node = liberate_matrix_buffer_on_node,
+	.copy_methods = &matrix_copy_data_methods_s,
+	.get_size = matrix_interface_get_size,
+	.footprint = footprint_matrix_interface_crc32,
 #ifdef STARPU_USE_GORDON
-	.convert_to_gordon = convert_blas_to_gordon,
+	.convert_to_gordon = convert_matrix_to_gordon,
 #endif
 	.interfaceid = STARPU_BLAS_INTERFACE_ID, 
-	.interface_size = sizeof(starpu_blas_interface_t),
-	.display = display_blas_interface
+	.interface_size = sizeof(starpu_matrix_interface_t),
+	.display = display_matrix_interface
 };
 
 #ifdef STARPU_USE_GORDON
-static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+static int convert_matrix_to_gordon(void *interface, uint64_t *ptr, gordon_strideSize_t *ss) 
 {
 	size_t elemsize = GET_BLAS_ELEMSIZE(interface);
-	uint32_t nx = STARPU_GET_BLAS_NX(interface);
-	uint32_t ny = STARPU_GET_BLAS_NY(interface);
-	uint32_t ld = STARPU_GET_BLAS_LD(interface);
+	uint32_t nx = STARPU_GET_MATRIX_NX(interface);
+	uint32_t ny = STARPU_GET_MATRIX_NY(interface);
+	uint32_t ld = STARPU_GET_MATRIX_LD(interface);
 
-	*ptr = STARPU_GET_BLAS_PTR(interface);
+	*ptr = STARPU_GET_MATRIX_PTR(interface);
 
 	/* The gordon_stride_init function may use a contiguous buffer
  	 * in case nx = ld (in that case, (*ss).size = elemsize*nx*ny */
@@ -94,37 +94,37 @@ static int convert_blas_to_gordon(void *interface, uint64_t *ptr, gordon_strideS
 }
 #endif
 
-static void register_blas_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
+static void register_matrix_handle(starpu_data_handle handle, uint32_t home_node, void *interface)
 {
-	starpu_blas_interface_t *blas_interface = interface;
+	starpu_matrix_interface_t *matrix_interface = interface;
 
 	unsigned node;
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		starpu_blas_interface_t *local_interface =
+		starpu_matrix_interface_t *local_interface =
 			starpu_data_get_interface_on_node(handle, node);
 
 		if (node == home_node) {
-			local_interface->ptr = blas_interface->ptr;
-			local_interface->ld  = blas_interface->ld;
+			local_interface->ptr = matrix_interface->ptr;
+			local_interface->ld  = matrix_interface->ld;
 		}
 		else {
 			local_interface->ptr = 0;
 			local_interface->ld  = 0;
 		}
 
-		local_interface->nx = blas_interface->nx;
-		local_interface->ny = blas_interface->ny;
-		local_interface->elemsize = blas_interface->elemsize;
+		local_interface->nx = matrix_interface->nx;
+		local_interface->ny = matrix_interface->ny;
+		local_interface->elemsize = matrix_interface->elemsize;
 	}
 }
 
 /* declare a new data with the BLAS interface */
-void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node,
+void starpu_register_matrix_data(starpu_data_handle *handleptr, uint32_t home_node,
 			uintptr_t ptr, uint32_t ld, uint32_t nx,
 			uint32_t ny, size_t elemsize)
 {
-	starpu_blas_interface_t interface = {
+	starpu_matrix_interface_t interface = {
 		.ptr = ptr,
 		.ld = ld,
 		.nx = nx,
@@ -132,25 +132,25 @@ void starpu_register_blas_data(starpu_data_handle *handleptr, uint32_t home_node
 		.elemsize = elemsize
 	};
 
-	_starpu_register_data_handle(handleptr, home_node, &interface, &_starpu_interface_blas_ops);
+	_starpu_register_data_handle(handleptr, home_node, &interface, &_starpu_interface_matrix_ops);
 }
 
-static uint32_t footprint_blas_interface_crc32(starpu_data_handle handle)
+static uint32_t footprint_matrix_interface_crc32(starpu_data_handle handle)
 {
-	return _starpu_crc32_be(starpu_get_blas_nx(handle), starpu_get_blas_ny(handle));
+	return _starpu_crc32_be(starpu_get_matrix_nx(handle), starpu_get_matrix_ny(handle));
 }
 
-static void display_blas_interface(starpu_data_handle handle, FILE *f)
+static void display_matrix_interface(starpu_data_handle handle, FILE *f)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	fprintf(f, "%u\t%u\t", interface->nx, interface->ny);
 }
 
-static size_t blas_interface_get_size(starpu_data_handle handle)
+static size_t matrix_interface_get_size(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	size_t size;
@@ -160,51 +160,51 @@ static size_t blas_interface_get_size(starpu_data_handle handle)
 }
 
 /* offer an access to the data parameters */
-uint32_t starpu_get_blas_nx(starpu_data_handle handle)
+uint32_t starpu_get_matrix_nx(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	return interface->nx;
 }
 
-uint32_t starpu_get_blas_ny(starpu_data_handle handle)
+uint32_t starpu_get_matrix_ny(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	return interface->ny;
 }
 
-uint32_t starpu_get_blas_local_ld(starpu_data_handle handle)
+uint32_t starpu_get_matrix_local_ld(starpu_data_handle handle)
 {
 	unsigned node;
 	node = _starpu_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, node);
 
 	return interface->ld;
 }
 
-uintptr_t starpu_get_blas_local_ptr(starpu_data_handle handle)
+uintptr_t starpu_get_matrix_local_ptr(starpu_data_handle handle)
 {
 	unsigned node;
 	node = _starpu_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_test_if_data_is_allocated_on_node(handle, node));
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, node);
 
 	return interface->ptr;
 }
 
-size_t starpu_get_blas_elemsize(starpu_data_handle handle)
+size_t starpu_get_matrix_elemsize(starpu_data_handle handle)
 {
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, 0);
 
 	return interface->elemsize;
@@ -213,7 +213,7 @@ size_t starpu_get_blas_elemsize(starpu_data_handle handle)
 /* memory allocation/deallocation primitives for the BLAS interface */
 
 /* returns the size of the allocated area */
-static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
+static size_t allocate_matrix_buffer_on_node(starpu_data_handle handle, uint32_t dst_node)
 {
 	uintptr_t addr = 0;
 	unsigned fail = 0;
@@ -224,7 +224,7 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 	size_t pitch;
 #endif
 
-	starpu_blas_interface_t *interface =
+	starpu_matrix_interface_t *interface =
 		starpu_data_get_interface_on_node(handle, dst_node);
 
 	uint32_t nx = interface->nx;
@@ -275,9 +275,9 @@ static size_t allocate_blas_buffer_on_node(starpu_data_handle handle, uint32_t d
 	return allocated_memory;
 }
 
-static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
+static void liberate_matrix_buffer_on_node(void *interface, uint32_t node)
 {
-	starpu_blas_interface_t *blas_interface = interface;
+	starpu_matrix_interface_t *matrix_interface = interface;
 
 #ifdef STARPU_USE_CUDA
 	cudaError_t status;
@@ -286,11 +286,11 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 	starpu_node_kind kind = _starpu_get_node_kind(node);
 	switch(kind) {
 		case STARPU_RAM:
-			free((void*)blas_interface->ptr);
+			free((void*)matrix_interface->ptr);
 			break;
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
-			status = cudaFree((void*)blas_interface->ptr);			
+			status = cudaFree((void*)matrix_interface->ptr);			
 			if (STARPU_UNLIKELY(status))
 				STARPU_CUDA_REPORT_ERROR(status);
 
@@ -304,40 +304,40 @@ static void liberate_blas_buffer_on_node(void *interface, uint32_t node)
 #ifdef STARPU_USE_CUDA
 static int copy_cuda_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;
-	cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, src_blas->ld*elemsize,
-			src_blas->nx*elemsize, src_blas->ny, cudaMemcpyDeviceToHost);
+	cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+			src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyDeviceToHost);
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return 0;
 }
 
 static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;
-	cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, src_blas->ld*elemsize,
-			src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
+	cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+			src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyHostToDevice);
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 		
@@ -345,31 +345,31 @@ static int copy_ram_to_cuda(starpu_data_handle handle, uint32_t src_node, uint32
 	if (STARPU_UNLIKELY(cures))
 		STARPU_CUDA_REPORT_ERROR(cures);
 		
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return 0;
 }
 
 static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;	
-	cures = cudaMemcpy2DAsync((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, (size_t)src_blas->ld*elemsize,
-			(size_t)src_blas->nx*elemsize, src_blas->ny,
+	cures = cudaMemcpy2DAsync((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, (size_t)src_matrix->ld*elemsize,
+			(size_t)src_matrix->nx*elemsize, src_matrix->ny,
 			cudaMemcpyDeviceToHost, *stream);
 	if (cures)
 	{
-		cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-			(char *)src_blas->ptr, (size_t)src_blas->ld*elemsize,
-			(size_t)src_blas->nx*elemsize, (size_t)src_blas->ny,
+		cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+			(char *)src_matrix->ptr, (size_t)src_matrix->ld*elemsize,
+			(size_t)src_matrix->nx*elemsize, (size_t)src_matrix->ny,
 			cudaMemcpyDeviceToHost);
 
 		if (STARPU_UNLIKELY(cures))
@@ -383,31 +383,31 @@ static int copy_cuda_to_ram_async(starpu_data_handle handle, uint32_t src_node,
 		return 0;
 	}
 
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return EAGAIN;
 }
 
 static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node, cudaStream_t *stream)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
-	size_t elemsize = src_blas->elemsize;
+	size_t elemsize = src_matrix->elemsize;
 
 	cudaError_t cures;
-	cures = cudaMemcpy2DAsync((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-				(char *)src_blas->ptr, src_blas->ld*elemsize,
-				src_blas->nx*elemsize, src_blas->ny,
+	cures = cudaMemcpy2DAsync((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+				(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+				src_matrix->nx*elemsize, src_matrix->ny,
 				cudaMemcpyHostToDevice, *stream);
 	if (cures)
 	{
-		cures = cudaMemcpy2D((char *)dst_blas->ptr, dst_blas->ld*elemsize,
-				(char *)src_blas->ptr, src_blas->ld*elemsize,
-				src_blas->nx*elemsize, src_blas->ny, cudaMemcpyHostToDevice);
+		cures = cudaMemcpy2D((char *)dst_matrix->ptr, dst_matrix->ld*elemsize,
+				(char *)src_matrix->ptr, src_matrix->ld*elemsize,
+				src_matrix->nx*elemsize, src_matrix->ny, cudaMemcpyHostToDevice);
 		cudaThreadSynchronize();
 
 		if (STARPU_UNLIKELY(cures))
@@ -416,7 +416,7 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 		return 0;
 	}
 
-	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_blas->nx*src_blas->ny*src_blas->elemsize);
+	STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)src_matrix->nx*src_matrix->ny*src_matrix->elemsize);
 
 	return EAGAIN;
 }
@@ -426,22 +426,22 @@ static int copy_ram_to_cuda_async(starpu_data_handle handle, uint32_t src_node,
 /* as not all platform easily have a BLAS lib installed ... */
 static int dummy_copy_ram_to_ram(starpu_data_handle handle, uint32_t src_node, uint32_t dst_node)
 {
-	starpu_blas_interface_t *src_blas;
-	starpu_blas_interface_t *dst_blas;
+	starpu_matrix_interface_t *src_matrix;
+	starpu_matrix_interface_t *dst_matrix;
 
-	src_blas = starpu_data_get_interface_on_node(handle, src_node);
-	dst_blas = starpu_data_get_interface_on_node(handle, dst_node);
+	src_matrix = starpu_data_get_interface_on_node(handle, src_node);
+	dst_matrix = starpu_data_get_interface_on_node(handle, dst_node);
 
 	unsigned y;
-	uint32_t nx = dst_blas->nx;
-	uint32_t ny = dst_blas->ny;
-	size_t elemsize = dst_blas->elemsize;
+	uint32_t nx = dst_matrix->nx;
+	uint32_t ny = dst_matrix->ny;
+	size_t elemsize = dst_matrix->elemsize;
 
-	uint32_t ld_src = src_blas->ld;
-	uint32_t ld_dst = dst_blas->ld;
+	uint32_t ld_src = src_matrix->ld;
+	uint32_t ld_dst = dst_matrix->ld;
 
-	uintptr_t ptr_src = src_blas->ptr;
-	uintptr_t ptr_dst = dst_blas->ptr;
+	uintptr_t ptr_src = src_matrix->ptr;
+	uintptr_t ptr_dst = dst_matrix->ptr;
 
 
 	for (y = 0; y < ny; y++)