11 years ago · b6d83183ea
--- a/examples/axpy/axpy.c
+++ b/examples/axpy/axpy.c
@@ -31,7 +31,7 @@
 
				 
			
 
				 #include "axpy.h"
			
 
				 
			
 
				-#define AXPY	SAXPY
			
 
				+#define AXPY	STARPU_SAXPY
			
 
				 #define CUBLASAXPY	cublasSaxpy
			
 
				 
			
 
				 #define N	(16*1024*1024)
			
--- a/examples/cg/cg.h
+++ b/examples/cg/cg.h
@@ -30,11 +30,11 @@
 
				 
			
 
				 #ifdef DOUBLE
			
 
				 #define TYPE	double
			
 
				-#define GEMV	DGEMV
			
 
				-#define DOT	DDOT
			
 
				-#define GEMV	DGEMV
			
 
				-#define AXPY	DAXPY
			
 
				-#define SCAL	DSCAL
			
 
				+#define GEMV	STARPU_DGEMV
			
 
				+#define DOT	STARPU_DDOT
			
 
				+#define GEMV	STARPU_DGEMV
			
 
				+#define AXPY	STARPU_DAXPY
			
 
				+#define SCAL	STARPU_DSCAL
			
 
				 #define cublasdot	cublasDdot
			
 
				 #define cublasscal	cublasDscal
			
 
				 #define cublasaxpy	cublasDaxpy
			
@@ -42,11 +42,11 @@
 
				 #define cublasscal	cublasDscal
			
 
				 #else
			
 
				 #define TYPE	float
			
 
				-#define GEMV	SGEMV
			
 
				-#define DOT	SDOT
			
 
				-#define GEMV	SGEMV
			
 
				-#define AXPY	SAXPY
			
 
				-#define SCAL	SSCAL
			
 
				+#define GEMV	STARPU_SGEMV
			
 
				+#define DOT	STARPU_SDOT
			
 
				+#define GEMV	STARPU_SGEMV
			
 
				+#define AXPY	STARPU_SAXPY
			
 
				+#define SCAL	STARPU_SSCAL
			
 
				 #define cublasdot	cublasSdot
			
 
				 #define cublasscal	cublasSscal
			
 
				 #define cublasaxpy	cublasSaxpy
			
--- a/examples/cholesky/cholesky_grain_tag.c
+++ b/examples/cholesky/cholesky_grain_tag.c
@@ -392,7 +392,7 @@ int main(int argc, char **argv)
 
				 	float *test_mat = malloc(size*size*sizeof(float));
			
 
				 	STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-	SSYRK("L", "N", size, size, 1.0f,
			
 
				+	STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 				mat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 	FPRINTF(stderr, "comparing results ...\n");
			
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -243,7 +243,7 @@ static void execute_cholesky(unsigned size, unsigned nblocks)
 
				 		float *test_mat = malloc(size*size*sizeof(float));
			
 
				 		STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-		SSYRK("L", "N", size, size, 1.0f,
			
 
				+		STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 					mat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 		FPRINTF(stderr, "comparing results ...\n");
			
--- a/examples/cholesky/cholesky_kernels.c
+++ b/examples/cholesky/cholesky_kernels.c
@@ -49,7 +49,7 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 
				 		if (worker_size == 1)
			
 
				 		{
			
 
				 			/* Sequential CPU kernel */
			
 
				-			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
			
 
				+			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
			
 
				 				right, ld12, 1.0f, center, ld22);
			
 
				 		}
			
 
				 		else
			
@@ -63,7 +63,7 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 
				 			float *new_left = &left[block_size*rank];
			
 
				 			float *new_center = &center[block_size*rank];
			
 
				 
			
 
				-			SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, 
			
 
				+			STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, 
			
 
				 				right, ld12, 1.0f, new_center, ld22);
			
 
				 		}
			
 
				 	}
			
@@ -113,7 +113,7 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, STARPU_A
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				+			STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case 1:
			
@@ -172,9 +172,9 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_A
 
				 
			
 
				 				STARPU_ASSERT(lambda11 != 0.0f);
			
 
				 		
			
 
				-				SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				+				STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				 		
			
 
				-				SSYR("L", nx - z - 1, -1.0f, 
			
 
				+				STARPU_SSYR("L", nx - z - 1, -1.0f, 
			
 
				 							&sub11[(z+1)+z*ld], 1,
			
 
				 							&sub11[(z+1)+(z+1)*ld], ld);
			
 
				 			}
			
--- a/examples/cholesky/cholesky_tag.c
+++ b/examples/cholesky/cholesky_tag.c
@@ -368,7 +368,7 @@ int main(int argc, char **argv)
 
				 	float *test_mat = malloc(size*size*sizeof(float));
			
 
				 	STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-	SSYRK("L", "N", size, size, 1.0f,
			
 
				+	STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 				mat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 	FPRINTF(stderr, "comparing results ...\n");
			
--- a/examples/common/blas.c
+++ b/examples/common/blas.c
@@ -23,13 +23,13 @@
 
				 
			
 
				 /*
			
 
				     This files contains BLAS wrappers for the different BLAS implementations
			
 
				-  (eg. REFBLAS, STARPU_ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most
			
 
				+  (eg. REFBLAS, ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most
			
 
				   libraries do not supply C-based ordering.
			
 
				  */
			
 
				 
			
 
				 #ifdef STARPU_ATLAS
			
 
				 
			
 
				-inline void SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			float alpha, const float *A, int lda, const float *B, int ldb, 
			
 
				 			float beta, float *C, int ldc)
			
 
				 {
			
@@ -40,7 +40,7 @@ inline void SGEMM(char *transa, char *transb, int M, int N, int K,
 
				 			M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);				
			
 
				 }
			
 
				 
			
 
				-inline void DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			double alpha, double *A, int lda, double *B, int ldb, 
			
 
				 			double beta, double *C, int ldc)
			
 
				 {
			
@@ -51,7 +51,7 @@ inline void DGEMM(char *transa, char *transb, int M, int N, int K,
 
				 			M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);				
			
 
				 }
			
 
				 
			
 
				-inline void SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY)
			
 
				+inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY)
			
 
				 {
			
 
				 	enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
			
 
				 
			
@@ -59,7 +59,7 @@ inline void SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, fl
 
				 					X, incX, beta, Y, incY);
			
 
				 }
			
 
				 
			
 
				-inline void DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY)
			
 
				+inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY)
			
 
				 {
			
 
				 	enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
			
 
				 
			
@@ -67,27 +67,27 @@ inline void DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
 
				 					X, incX, beta, Y, incY);
			
 
				 }
			
 
				 
			
 
				-inline float SASUM(int N, float *X, int incX)
			
 
				+inline float STARPU_SASUM(int N, float *X, int incX)
			
 
				 {
			
 
				 	return cblas_sasum(N, X, incX);
			
 
				 }
			
 
				 
			
 
				-inline double DASUM(int N, double *X, int incX)
			
 
				+inline double STARPU_DASUM(int N, double *X, int incX)
			
 
				 {
			
 
				 	return cblas_dasum(N, X, incX);
			
 
				 }
			
 
				 
			
 
				-void SSCAL(int N, float alpha, float *X, int incX)
			
 
				+void STARPU_SSCAL(int N, float alpha, float *X, int incX)
			
 
				 {
			
 
				 	cblas_sscal(N, alpha, X, incX);
			
 
				 }
			
 
				 
			
 
				-void DSCAL(int N, double alpha, double *X, int incX)
			
 
				+void STARPU_DSCAL(int N, double alpha, double *X, int incX)
			
 
				 {
			
 
				 	cblas_dscal(N, alpha, X, incX);
			
 
				 }
			
 
				 
			
 
				-void STRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const float alpha, const float *A, const int lda,
			
 
				                    float *B, const int ldb)
			
@@ -100,7 +100,7 @@ void STRSM (const char *side, const char *uplo, const char *transa,
 
				 	cblas_strsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb);
			
 
				 }
			
 
				 
			
 
				-void DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const double alpha, const double *A, const int lda,
			
 
				                    double *B, const int ldb)
			
@@ -113,7 +113,7 @@ void DTRSM (const char *side, const char *uplo, const char *transa,
 
				 	cblas_dtrsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb);
			
 
				 }
			
 
				 
			
 
				-void SSYR (const char *uplo, const int n, const float alpha,
			
 
				+void STARPU_SSYR (const char *uplo, const int n, const float alpha,
			
 
				                   const float *x, const int incx, float *A, const int lda)
			
 
				 {
			
 
				 	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
			
@@ -121,7 +121,7 @@ void SSYR (const char *uplo, const int n, const float alpha,
 
				 	cblas_ssyr(CblasColMajor, uplo_, n, alpha, x, incx, A, lda); 
			
 
				 }
			
 
				 
			
 
				-void SSYRK (const char *uplo, const char *trans, const int n,
			
 
				+void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
			
 
				                    const int k, const float alpha, const float *A,
			
 
				                    const int lda, const float beta, float *C,
			
 
				                    const int ldc)
			
@@ -132,21 +132,21 @@ void SSYRK (const char *uplo, const char *trans, const int n,
 
				 	cblas_ssyrk(CblasColMajor, uplo_, trans_, n, k, alpha, A, lda, beta, C, ldc); 
			
 
				 }
			
 
				 
			
 
				-void SGER(const int m, const int n, const float alpha,
			
 
				+void STARPU_SGER(const int m, const int n, const float alpha,
			
 
				                   const float *x, const int incx, const float *y,
			
 
				                   const int incy, float *A, const int lda)
			
 
				 {
			
 
				 	cblas_sger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda);
			
 
				 }
			
 
				 
			
 
				-void DGER(const int m, const int n, const double alpha,
			
 
				+void STARPU_DGER(const int m, const int n, const double alpha,
			
 
				                   const double *x, const int incx, const double *y,
			
 
				                   const int incy, double *A, const int lda)
			
 
				 {
			
 
				 	cblas_dger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda);
			
 
				 }
			
 
				 
			
 
				-void STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				+void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				                    const int n, const float *A, const int lda, float *x, 
			
 
				                    const int incx)
			
 
				 {
			
@@ -157,7 +157,7 @@ void STRSV (const char *uplo, const char *trans, const char *diag,
 
				 	cblas_strsv(CblasColMajor, uplo_, trans_, diag_, n, A, lda, x, incx);
			
 
				 }
			
 
				 
			
 
				-void STRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const float alpha, const float *A, const int lda,
			
 
				                  float *B, const int ldb)
			
@@ -170,7 +170,7 @@ void STRMM(const char *side, const char *uplo, const char *transA,
 
				 	cblas_strmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb);
			
 
				 }
			
 
				 
			
 
				-void DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const double alpha, const double *A, const int lda,
			
 
				                  double *B, const int ldb)
			
@@ -183,7 +183,7 @@ void DTRMM(const char *side, const char *uplo, const char *transA,
 
				 	cblas_dtrmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb);
			
 
				 }
			
 
				 
			
 
				-void STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				+void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				                  const int n, const float *A, const int lda, float *X,
			
 
				                  const int incX)
			
 
				 {
			
@@ -194,53 +194,53 @@ void STRMV(const char *uplo, const char *transA, const char *diag,
 
				 	cblas_strmv(CblasColMajor, uplo_, transA_, diag_, n, A, lda, X, incX);
			
 
				 }
			
 
				 
			
 
				-void SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY)
			
 
				+void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY)
			
 
				 {
			
 
				 	cblas_saxpy(n, alpha, X, incX, Y, incY);
			
 
				 }
			
 
				 
			
 
				-void DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY)
			
 
				+void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY)
			
 
				 {
			
 
				 	cblas_daxpy(n, alpha, X, incX, Y, incY);
			
 
				 }
			
 
				 
			
 
				-int ISAMAX (const int n, float *X, const int incX)
			
 
				+int STARPU_ISAMAX (const int n, float *X, const int incX)
			
 
				 {
			
 
				     int retVal;
			
 
				     retVal = cblas_isamax(n, X, incX);
			
 
				     return retVal;
			
 
				 }
			
 
				 
			
 
				-int IDAMAX (const int n, double *X, const int incX)
			
 
				+int STARPU_IDAMAX (const int n, double *X, const int incX)
			
 
				 {
			
 
				     int retVal;
			
 
				     retVal = cblas_idamax(n, X, incX);
			
 
				     return retVal;
			
 
				 }
			
 
				 
			
 
				-float SDOT(const int n, const float *x, const int incx, const float *y, const int incy)
			
 
				+float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy)
			
 
				 {
			
 
				 	return cblas_sdot(n, x, incx, y, incy);
			
 
				 }
			
 
				 
			
 
				-double DDOT(const int n, const double *x, const int incx, const double *y, const int incy)
			
 
				+double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy)
			
 
				 {
			
 
				 	return cblas_ddot(n, x, incx, y, incy);
			
 
				 }
			
 
				 
			
 
				-void SSWAP(const int n, float *x, const int incx, float *y, const int incy)
			
 
				+void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy)
			
 
				 {
			
 
				 	cblas_sswap(n, x, incx, y, incy);
			
 
				 }
			
 
				 
			
 
				-void DSWAP(const int n, double *x, const int incx, double *y, const int incy)
			
 
				+void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy)
			
 
				 {
			
 
				 	cblas_dswap(n, x, incx, y, incy);
			
 
				 }
			
 
				 
			
 
				 #elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
			
 
				 
			
 
				-inline void SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			float alpha, const float *A, int lda, const float *B, int ldb, 
			
 
				 			float beta, float *C, int ldc)
			
 
				 {
			
@@ -249,7 +249,7 @@ inline void SGEMM(char *transa, char *transb, int M, int N, int K,
 
				 			 &beta, C, &ldc);	
			
 
				 }
			
 
				 
			
 
				-inline void DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			double alpha, double *A, int lda, double *B, int ldb, 
			
 
				 			double beta, double *C, int ldc)
			
 
				 {
			
@@ -259,39 +259,39 @@ inline void DGEMM(char *transa, char *transb, int M, int N, int K,
 
				 }
			
 
				 
			
 
				 
			
 
				-inline void SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
			
 
				+inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
			
 
				 		float *X, int incX, float beta, float *Y, int incY)
			
 
				 {
			
 
				 	sgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
			
 
				 }
			
 
				 
			
 
				-inline void DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
			
 
				+inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
			
 
				 		double *X, int incX, double beta, double *Y, int incY)
			
 
				 {
			
 
				 	dgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
			
 
				 }
			
 
				 
			
 
				-inline float SASUM(int N, float *X, int incX)
			
 
				+inline float STARPU_SASUM(int N, float *X, int incX)
			
 
				 {
			
 
				 	return sasum_(&N, X, &incX);
			
 
				 }
			
 
				 
			
 
				-inline double DASUM(int N, double *X, int incX)
			
 
				+inline double STARPU_DASUM(int N, double *X, int incX)
			
 
				 {
			
 
				 	return dasum_(&N, X, &incX);
			
 
				 }
			
 
				 
			
 
				-void SSCAL(int N, float alpha, float *X, int incX)
			
 
				+void STARPU_SSCAL(int N, float alpha, float *X, int incX)
			
 
				 {
			
 
				 	sscal_(&N, &alpha, X, &incX);
			
 
				 }
			
 
				 
			
 
				-void DSCAL(int N, double alpha, double *X, int incX)
			
 
				+void STARPU_DSCAL(int N, double alpha, double *X, int incX)
			
 
				 {
			
 
				 	dscal_(&N, &alpha, X, &incX);
			
 
				 }
			
 
				 
			
 
				-void STRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const float alpha, const float *A, const int lda,
			
 
				                    float *B, const int ldb)
			
@@ -299,7 +299,7 @@ void STRSM (const char *side, const char *uplo, const char *transa,
 
				 	strsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
			
 
				 }
			
 
				 
			
 
				-void DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const double alpha, const double *A, const int lda,
			
 
				                    double *B, const int ldb)
			
@@ -307,13 +307,13 @@ void DTRSM (const char *side, const char *uplo, const char *transa,
 
				 	dtrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
			
 
				 }
			
 
				 
			
 
				-void SSYR (const char *uplo, const int n, const float alpha,
			
 
				+void STARPU_SSYR (const char *uplo, const int n, const float alpha,
			
 
				                   const float *x, const int incx, float *A, const int lda)
			
 
				 {
			
 
				 	ssyr_(uplo, &n, &alpha, x, &incx, A, &lda); 
			
 
				 }
			
 
				 
			
 
				-void SSYRK (const char *uplo, const char *trans, const int n,
			
 
				+void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
			
 
				                    const int k, const float alpha, const float *A,
			
 
				                    const int lda, const float beta, float *C,
			
 
				                    const int ldc)
			
@@ -321,28 +321,28 @@ void SSYRK (const char *uplo, const char *trans, const int n,
 
				 	ssyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); 
			
 
				 }
			
 
				 
			
 
				-void SGER(const int m, const int n, const float alpha,
			
 
				+void STARPU_SGER(const int m, const int n, const float alpha,
			
 
				                   const float *x, const int incx, const float *y,
			
 
				                   const int incy, float *A, const int lda)
			
 
				 {
			
 
				 	sger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
			
 
				 }
			
 
				 
			
 
				-void DGER(const int m, const int n, const double alpha,
			
 
				+void STARPU_DGER(const int m, const int n, const double alpha,
			
 
				                   const double *x, const int incx, const double *y,
			
 
				                   const int incy, double *A, const int lda)
			
 
				 {
			
 
				 	dger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
			
 
				 }
			
 
				 
			
 
				-void STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				+void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				                    const int n, const float *A, const int lda, float *x, 
			
 
				                    const int incx)
			
 
				 {
			
 
				 	strsv_(uplo, trans, diag, &n, A, &lda, x, &incx);
			
 
				 }
			
 
				 
			
 
				-void STRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const float alpha, const float *A, const int lda,
			
 
				                  float *B, const int ldb)
			
@@ -350,7 +350,7 @@ void STRMM(const char *side, const char *uplo, const char *transA,
 
				 	strmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
			
 
				 }
			
 
				 
			
 
				-void DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const double alpha, const double *A, const int lda,
			
 
				                  double *B, const int ldb)
			
@@ -358,38 +358,38 @@ void DTRMM(const char *side, const char *uplo, const char *transA,
 
				 	dtrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
			
 
				 }
			
 
				 
			
 
				-void STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				+void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				                  const int n, const float *A, const int lda, float *X,
			
 
				                  const int incX)
			
 
				 {
			
 
				 	strmv_(uplo, transA, diag, &n, A, &lda, X, &incX);
			
 
				 }
			
 
				 
			
 
				-void SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY)
			
 
				+void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY)
			
 
				 {
			
 
				 	saxpy_(&n, &alpha, X, &incX, Y, &incY);
			
 
				 }
			
 
				 
			
 
				-void DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY)
			
 
				+void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY)
			
 
				 {
			
 
				 	daxpy_(&n, &alpha, X, &incX, Y, &incY);
			
 
				 }
			
 
				 
			
 
				-int ISAMAX (const int n, float *X, const int incX)
			
 
				+int STARPU_ISAMAX (const int n, float *X, const int incX)
			
 
				 {
			
 
				     int retVal;
			
 
				     retVal = isamax_ (&n, X, &incX);
			
 
				     return retVal;
			
 
				 }
			
 
				 
			
 
				-int IDAMAX (const int n, double *X, const int incX)
			
 
				+int STARPU_IDAMAX (const int n, double *X, const int incX)
			
 
				 {
			
 
				     int retVal;
			
 
				     retVal = idamax_ (&n, X, &incX);
			
 
				     return retVal;
			
 
				 }
			
 
				 
			
 
				-float SDOT(const int n, const float *x, const int incx, const float *y, const int incy)
			
 
				+float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy)
			
 
				 {
			
 
				 	float retVal = 0;
			
 
				 
			
@@ -399,104 +399,104 @@ float SDOT(const int n, const float *x, const int incx, const float *y, const in
 
				 	return retVal;
			
 
				 }
			
 
				 
			
 
				-double DDOT(const int n, const double *x, const int incx, const double *y, const int incy)
			
 
				+double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy)
			
 
				 {
			
 
				 	return ddot_(&n, x, &incx, y, &incy);
			
 
				 }
			
 
				 
			
 
				-void SSWAP(const int n, float *X, const int incX, float *Y, const int incY)
			
 
				+void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY)
			
 
				 {
			
 
				 	sswap_(&n, X, &incX, Y, &incY);
			
 
				 }
			
 
				 
			
 
				-void DSWAP(const int n, double *X, const int incX, double *Y, const int incY)
			
 
				+void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY)
			
 
				 {
			
 
				 	dswap_(&n, X, &incX, Y, &incY);
			
 
				 }
			
 
				 
			
 
				 
			
 
				 #elif defined(STARPU_SIMGRID)
			
 
				-inline void SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			float alpha, const float *A, int lda, const float *B, int ldb, 
			
 
				 			float beta, float *C, int ldc) { }
			
 
				 
			
 
				-inline void DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			double alpha, double *A, int lda, double *B, int ldb, 
			
 
				 			double beta, double *C, int ldc) { }
			
 
				 
			
 
				-inline void SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
			
 
				+inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
			
 
				 		float *X, int incX, float beta, float *Y, int incY) { }
			
 
				 
			
 
				-inline void DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
			
 
				+inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
			
 
				 		double *X, int incX, double beta, double *Y, int incY) { }
			
 
				 
			
 
				-inline float SASUM(int N, float *X, int incX) { }
			
 
				+inline float STARPU_SASUM(int N, float *X, int incX) { }
			
 
				 
			
 
				-inline double DASUM(int N, double *X, int incX) { }
			
 
				+inline double STARPU_DASUM(int N, double *X, int incX) { }
			
 
				 
			
 
				-void SSCAL(int N, float alpha, float *X, int incX) { }
			
 
				+void STARPU_SSCAL(int N, float alpha, float *X, int incX) { }
			
 
				 
			
 
				-void DSCAL(int N, double alpha, double *X, int incX) { }
			
 
				+void STARPU_DSCAL(int N, double alpha, double *X, int incX) { }
			
 
				 
			
 
				-void STRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const float alpha, const float *A, const int lda,
			
 
				                    float *B, const int ldb) { }
			
 
				 
			
 
				-void DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const double alpha, const double *A, const int lda,
			
 
				                    double *B, const int ldb) { }
			
 
				 
			
 
				-void SSYR (const char *uplo, const int n, const float alpha,
			
 
				+void STARPU_SSYR (const char *uplo, const int n, const float alpha,
			
 
				                   const float *x, const int incx, float *A, const int lda) { }
			
 
				 
			
 
				-void SSYRK (const char *uplo, const char *trans, const int n,
			
 
				+void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
			
 
				                    const int k, const float alpha, const float *A,
			
 
				                    const int lda, const float beta, float *C,
			
 
				                    const int ldc) { }
			
 
				 
			
 
				-void SGER(const int m, const int n, const float alpha,
			
 
				+void STARPU_SGER(const int m, const int n, const float alpha,
			
 
				                   const float *x, const int incx, const float *y,
			
 
				                   const int incy, float *A, const int lda) { }
			
 
				 
			
 
				-void DGER(const int m, const int n, const double alpha,
			
 
				+void STARPU_DGER(const int m, const int n, const double alpha,
			
 
				                   const double *x, const int incx, const double *y,
			
 
				                   const int incy, double *A, const int lda) { }
			
 
				 
			
 
				-void STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				+void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				                    const int n, const float *A, const int lda, float *x, 
			
 
				                    const int incx) { }
			
 
				 
			
 
				-void STRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const float alpha, const float *A, const int lda,
			
 
				                  float *B, const int ldb) { }
			
 
				 
			
 
				-void DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const double alpha, const double *A, const int lda,
			
 
				                  double *B, const int ldb) { }
			
 
				 
			
 
				-void STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				+void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				                  const int n, const float *A, const int lda, float *X,
			
 
				                  const int incX) { }
			
 
				 
			
 
				-void SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { }
			
 
				+void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { }
			
 
				 
			
 
				-void DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { }
			
 
				+void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { }
			
 
				 
			
 
				-int ISAMAX (const int n, float *X, const int incX) { }
			
 
				+int STARPU_ISAMAX (const int n, float *X, const int incX) { }
			
 
				 
			
 
				-int IDAMAX (const int n, double *X, const int incX) { }
			
 
				+int STARPU_IDAMAX (const int n, double *X, const int incX) { }
			
 
				 
			
 
				-float SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { }
			
 
				+float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { }
			
 
				 
			
 
				-double DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { }
			
 
				+double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { }
			
 
				 
			
 
				-void SSWAP(const int n, float *X, const int incX, float *Y, const int incY) { }
			
 
				+void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY) { }
			
 
				 
			
 
				-void DSWAP(const int n, double *X, const int incX, double *Y, const int incY) { }
			
 
				+void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY) { }
			
 
				 
			
 
				 
			
 
				 #else
			
--- a/examples/common/blas.h
+++ b/examples/common/blas.h
@@ -24,63 +24,63 @@
 
				 #include <cblas.h>
			
 
				 #endif
			
 
				 
			
 
				-void SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, 
			
 
				+void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, 
			
 
				 		const float *B, int ldb, float beta, float *C, int ldc);
			
 
				-void DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, 
			
 
				+void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, 
			
 
				 		double *B, int ldb, double beta, double *C, int ldc);
			
 
				-void SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
			
 
				+void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
			
 
				 		float *X, int incX, float beta, float *Y, int incY);
			
 
				-void DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
			
 
				+void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
			
 
				 		double *X, int incX, double beta, double *Y, int incY);
			
 
				-float SASUM(int N, float *X, int incX);
			
 
				-double DASUM(int N, double *X, int incX);
			
 
				-void SSCAL(int N, float alpha, float *X, int incX);
			
 
				-void DSCAL(int N, double alpha, double *X, int incX);
			
 
				-void STRSM (const char *side, const char *uplo, const char *transa,
			
 
				+float STARPU_SASUM(int N, float *X, int incX);
			
 
				+double STARPU_DASUM(int N, double *X, int incX);
			
 
				+void STARPU_SSCAL(int N, float alpha, float *X, int incX);
			
 
				+void STARPU_DSCAL(int N, double alpha, double *X, int incX);
			
 
				+void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const float alpha, const float *A, const int lda,
			
 
				                    float *B, const int ldb);
			
 
				-void DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				+void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
			
 
				                    const char *diag, const int m, const int n,
			
 
				                    const double alpha, const double *A, const int lda,
			
 
				                    double *B, const int ldb);
			
 
				-void DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				+void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
			
 
				 			double alpha, double *A, int lda, double *B, int ldb, 
			
 
				 			double beta, double *C, int ldc);
			
 
				-void SSYR (const char *uplo, const int n, const float alpha,
			
 
				+void STARPU_SSYR (const char *uplo, const int n, const float alpha,
			
 
				                   const float *x, const int incx, float *A, const int lda);
			
 
				-void SSYRK (const char *uplo, const char *trans, const int n,
			
 
				+void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
			
 
				                    const int k, const float alpha, const float *A,
			
 
				                    const int lda, const float beta, float *C,
			
 
				                    const int ldc);
			
 
				-void SGER (const int m, const int n, const float alpha,
			
 
				+void STARPU_SGER (const int m, const int n, const float alpha,
			
 
				                   const float *x, const int incx, const float *y,
			
 
				                   const int incy, float *A, const int lda);
			
 
				-void DGER(const int m, const int n, const double alpha,
			
 
				+void STARPU_DGER(const int m, const int n, const double alpha,
			
 
				                   const double *x, const int incx, const double *y,
			
 
				                   const int incy, double *A, const int lda);
			
 
				-void STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				+void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
			
 
				                    const int n, const float *A, const int lda, float *x, 
			
 
				                    const int incx);
			
 
				-void STRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const float alpha, const float *A, const int lda,
			
 
				                  float *B, const int ldb);
			
 
				-void DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				+void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
			
 
				                  const char *diag, const int m, const int n,
			
 
				                  const double alpha, const double *A, const int lda,
			
 
				                  double *B, const int ldb);
			
 
				-void STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				+void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
			
 
				                  const int n, const float *A, const int lda, float *X,
			
 
				                  const int incX);
			
 
				-void SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incy);
			
 
				-void DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY);
			
 
				-int ISAMAX (const int n, float *X, const int incX);
			
 
				-int IDAMAX (const int n, double *X, const int incX);
			
 
				-float SDOT(const int n, const float *x, const int incx, const float *y, const int incy);
			
 
				-double DDOT(const int n, const double *x, const int incx, const double *y, const int incy);
			
 
				-void SSWAP(const int n, float *x, const int incx, float *y, const int incy);
			
 
				-void DSWAP(const int n, double *x, const int incx, double *y, const int incy);
			
 
				+void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incy);
			
 
				+void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY);
			
 
				+int STARPU_ISAMAX (const int n, float *X, const int incX);
			
 
				+int STARPU_IDAMAX (const int n, double *X, const int incX);
			
 
				+float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy);
			
 
				+double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy);
			
 
				+void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy);
			
 
				+void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy);
			
 
				 
			
 
				 #if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL)
			
 
				 
			
--- a/examples/heat/dw_factolu.h
+++ b/examples/heat/dw_factolu.h
@@ -141,7 +141,7 @@ static void STARPU_ATTRIBUTE_UNUSED compare_A_LU(float *A, float *LU,
 
				 
			
 
				 
			
 
				         /* now A_err = L, compute L*U */
			
 
				-	STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
			
 
				+	STARPU_STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
			
 
				 
			
 
				 	float max_err = 0.0f;
			
 
				 	for (i = 0; i < size ; i++)
			
--- a/examples/heat/dw_factolu_kernels.c
+++ b/examples/heat/dw_factolu_kernels.c
@@ -124,7 +124,7 @@ static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, STARPU
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			SGEMM("N", "N",	dy, dx, dz, 
			
 
				+			STARPU_SGEMM("N", "N",	dy, dx, dz, 
			
 
				 				-1.0f, left, ld21, right, ld12,
			
 
				 					     1.0f, center, ld22);
			
 
				 			break;
			
@@ -189,7 +189,7 @@ static inline void dw_common_codelet_update_u12(void *descr[], int s, STARPU_ATT
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			STRSM("L", "L", "N", "N",
			
 
				+			STARPU_STRSM("L", "L", "N", "N",
			
 
				 					 nx12, ny12, 1.0f, sub11, ld11, sub12, ld12);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -251,7 +251,7 @@ static inline void dw_common_codelet_update_u21(void *descr[], int s, STARPU_ATT
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				+			STARPU_STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case 1:
			
@@ -325,9 +325,9 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, STARPU_ATT
 
				 				pivot = sub11[z+z*ld];
			
 
				 				STARPU_ASSERT(pivot != 0.0f);
			
 
				 		
			
 
				-				SSCAL(nx - z - 1, (1.0f/pivot), &sub11[z+(z+1)*ld], ld);
			
 
				+				STARPU_SSCAL(nx - z - 1, (1.0f/pivot), &sub11[z+(z+1)*ld], ld);
			
 
				 		
			
 
				-				SGER(nx - z - 1, nx - z - 1, -1.0f,
			
 
				+				STARPU_SGER(nx - z - 1, nx - z - 1, -1.0f,
			
 
				 						&sub11[z+(z+1)*ld], ld,
			
 
				 						&sub11[(z+1)+z*ld], 1,
			
 
				 						&sub11[(z+1) + (z+1)*ld],ld);
			
--- a/examples/heat/dw_sparse_cg_kernels.c
+++ b/examples/heat/dw_sparse_cg_kernels.c
@@ -126,7 +126,7 @@ void cpu_codelet_func_3(void *descr[], void *arg)
 
				 	vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				 	size = (int)STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	dot = SDOT(size, vec, 1, vec, 1);
			
 
				+	dot = STARPU_SDOT(size, vec, 1, vec, 1);
			
 
				 
			
 
				 	fprintf(stderr, "func 3 : DOT = %f\n", dot);
			
 
				 
			
@@ -218,7 +218,7 @@ void cpu_codelet_func_5(void *descr[], void *arg)
 
				 	STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1]));
			
 
				 	size = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	dot = SDOT(size, vecd, 1, vecq, 1);
			
 
				+	dot = STARPU_SDOT(size, vecd, 1, vecq, 1);
			
 
				 
			
 
				 	pb->alpha = pb->delta_new / dot;
			
 
				 }
			
@@ -265,7 +265,7 @@ void cpu_codelet_func_6(void *descr[], void *arg)
 
				 
			
 
				 	size = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	SAXPY(size, pb->alpha, vecd, 1, vecx, 1);
			
 
				+	STARPU_SAXPY(size, pb->alpha, vecd, 1, vecx, 1);
			
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -304,7 +304,7 @@ void cpu_codelet_func_7(void *descr[], void *arg)
 
				 
			
 
				 	size = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	SAXPY(size, -pb->alpha, vecq, 1, vecr, 1);
			
 
				+	STARPU_SAXPY(size, -pb->alpha, vecq, 1, vecr, 1);
			
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -344,7 +344,7 @@ void cpu_codelet_func_8(void *descr[], void *arg)
 
				 	vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
			
 
				 	size = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	dot = SDOT(size, vecr, 1, vecr, 1);
			
 
				+	dot = STARPU_SDOT(size, vecr, 1, vecr, 1);
			
 
				 
			
 
				 	pb->delta_old = pb->delta_new;
			
 
				 	pb->delta_new = dot;
			
@@ -392,10 +392,10 @@ void cpu_codelet_func_9(void *descr[], void *arg)
 
				 	size = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				 	/* d = beta d */
			
 
				-	SSCAL(size, pb->beta, vecd, 1);
			
 
				+	STARPU_SSCAL(size, pb->beta, vecd, 1);
			
 
				 
			
 
				 	/* d = r + d */
			
 
				-	SAXPY (size, 1.0f, vecr, 1, vecd, 1);
			
 
				+	STARPU_SAXPY (size, 1.0f, vecr, 1, vecd, 1);
			
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
--- a/examples/heat/heat.c
+++ b/examples/heat/heat.c
@@ -362,10 +362,10 @@ static void solve_system(unsigned size, unsigned subsize, float *result, int *Re
 
				 	}
			
 
				 
			
 
				 		/* L */
			
 
				-		STRSV("L", "N", "N", subsize, A, subsize, B, 1);
			
 
				+		STARPU_STRSV("L", "N", "N", subsize, A, subsize, B, 1);
			
 
				 	
			
 
				 		/* U */
			
 
				-	        STRSV("U", "N", "U", subsize, A, subsize, B, 1);
			
 
				+	        STARPU_STRSV("U", "N", "U", subsize, A, subsize, B, 1);
			
 
				 	
			
 
				 		STARPU_ASSERT(DIM == size);
			
 
				 	
			
@@ -378,19 +378,19 @@ static void solve_system(unsigned size, unsigned subsize, float *result, int *Re
 
				 	
			
 
				 	
			
 
				 		/* LUB = U * LUB */
			
 
				-		STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
			
 
				+		STARPU_STRMV("U", "N", "U", subsize, A, subsize, LUB, 1);
			
 
				 		
			
 
				 		/* LUB = L * LUB */
			
 
				-		STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
			
 
				+		STARPU_STRMV("L", "N", "N", subsize, A, subsize, LUB, 1);
			
 
				 	
			
 
				 		/* LUB -= B */
			
 
				-		SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
			
 
				+		STARPU_SAXPY(subsize, -1.0f, savedB, 1, LUB, 1);
			
 
				 	
			
 
				 		/* check if LUB is close to the 0 vector */
			
 
				-		int maxind = ISAMAX(subsize, LUB, 1);
			
 
				+		int maxind = STARPU_ISAMAX(subsize, LUB, 1);
			
 
				 		FPRINTF(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]);
			
 
				 
			
 
				-		float sum = SASUM(subsize, LUB, 1);
			
 
				+		float sum = STARPU_SASUM(subsize, LUB, 1);
			
 
				 		FPRINTF(stderr,"avg. error %e\n", sum/subsize);
			
 
				 	
			
 
				 		free(LUB);
			
--- a/examples/lu/lu-double.h
+++ b/examples/lu/lu-double.h
@@ -33,16 +33,16 @@
 
				 #define CUBLAS_SWAP	cublasDswap
			
 
				 #define CUBLAS_IAMAX	cublasIdamax
			
 
				 
			
 
				-#define CPU_GEMM	DGEMM
			
 
				-#define CPU_TRSM	DTRSM
			
 
				-#define CPU_SCAL	DSCAL
			
 
				-#define CPU_GER		DGER
			
 
				-#define CPU_SWAP	DSWAP
			
 
				+#define CPU_GEMM	STARPU_DGEMM
			
 
				+#define CPU_TRSM	STARPU_DTRSM
			
 
				+#define CPU_SCAL	STARPU_DSCAL
			
 
				+#define CPU_GER		STARPU_DGER
			
 
				+#define CPU_SWAP	STARPU_DSWAP
			
 
				 
			
 
				-#define CPU_TRMM	DTRMM
			
 
				-#define CPU_AXPY	DAXPY
			
 
				-#define CPU_ASUM	DASUM
			
 
				-#define CPU_IAMAX	IDAMAX
			
 
				+#define CPU_TRMM	STARPU_DTRMM
			
 
				+#define CPU_AXPY	STARPU_DAXPY
			
 
				+#define CPU_ASUM	STARPU_DASUM
			
 
				+#define CPU_IAMAX	STARPU_IDAMAX
			
 
				 
			
 
				 #define PIVOT_THRESHHOLD	10e-10
			
 
				 
			
--- a/examples/lu/lu-float.h
+++ b/examples/lu/lu-float.h
@@ -35,16 +35,16 @@
 
				 #define CUBLAS_SWAP	cublasSswap
			
 
				 #define CUBLAS_IAMAX	cublasIsamax
			
 
				 
			
 
				-#define CPU_GEMM	SGEMM
			
 
				-#define CPU_TRSM	STRSM
			
 
				-#define CPU_SCAL	SSCAL
			
 
				-#define CPU_GER		SGER
			
 
				-#define CPU_SWAP	SSWAP
			
 
				-
			
 
				-#define CPU_TRMM	STRMM
			
 
				-#define CPU_AXPY	SAXPY
			
 
				-#define CPU_ASUM	SASUM
			
 
				-#define CPU_IAMAX	ISAMAX
			
 
				+#define CPU_GEMM	STARPU_SGEMM
			
 
				+#define CPU_TRSM	STARPU_STRSM
			
 
				+#define CPU_SCAL	STARPU_SSCAL
			
 
				+#define CPU_GER		STARPU_SGER
			
 
				+#define CPU_SWAP	STARPU_SSWAP
			
 
				+
			
 
				+#define CPU_TRMM	STARPU_STRMM
			
 
				+#define CPU_AXPY	STARPU_SAXPY
			
 
				+#define CPU_ASUM	STARPU_SASUM
			
 
				+#define CPU_IAMAX	STARPU_ISAMAX
			
 
				 
			
 
				 #define PIVOT_THRESHHOLD	10e-5
			
 
				 
			
--- a/examples/lu/xlu.h
+++ b/examples/lu/xlu.h
@@ -60,7 +60,7 @@ static void STARPU_ATTRIBUTE_UNUSED compare_A_LU(float *A, float *LU,
 
				 	}
			
 
				 
			
 
				         /* now A_err = L, compute L*U */
			
 
				-	STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
			
 
				+	STARPU_STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size);
			
 
				 
			
 
				 	float max_err = 0.0f;
			
 
				 	for (i = 0; i < size ; i++)
			
--- a/examples/mult/double.h
+++ b/examples/mult/double.h
@@ -17,9 +17,9 @@
 
				 #define TYPE	double
			
 
				 
			
 
				 #define CUBLAS_GEMM cublasDgemm
			
 
				-#define CPU_GEMM	DGEMM
			
 
				-#define CPU_ASUM	DASUM
			
 
				-#define CPU_IAMAX	IDAMAX
			
 
				+#define CPU_GEMM	STARPU_DGEMM
			
 
				+#define CPU_ASUM	STARPU_DASUM
			
 
				+#define CPU_IAMAX	STARPU_IDAMAX
			
 
				 #define STARPU_GEMM(name)	starpu_dgemm_##name
			
 
				 
			
 
				 #define str(s) #s
			
--- a/examples/mult/simple.h
+++ b/examples/mult/simple.h
@@ -17,9 +17,9 @@
 
				 #define TYPE	float
			
 
				 
			
 
				 #define CUBLAS_GEMM cublasSgemm
			
 
				-#define CPU_GEMM	SGEMM
			
 
				-#define CPU_ASUM	SASUM
			
 
				-#define CPU_IAMAX	ISAMAX
			
 
				+#define CPU_GEMM	STARPU_SGEMM
			
 
				+#define CPU_ASUM	STARPU_SASUM
			
 
				+#define CPU_IAMAX	STARPU_ISAMAX
			
 
				 #define STARPU_GEMM(name)	starpu_sgemm_##name
			
 
				 
			
 
				 #define str(s) #s
			
--- a/examples/pipeline/pipeline.c
+++ b/examples/pipeline/pipeline.c
@@ -90,7 +90,7 @@ void pipeline_cpu_axpy(void *descr[], void *arg)
 
				 	float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]);
			
 
				 	int n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 
			
 
				-	SAXPY(n, 1., x, 1, y, 1);
			
 
				+	STARPU_SAXPY(n, 1., x, 1, y, 1);
			
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -129,7 +129,7 @@ void pipeline_cpu_sum(void *descr[], void *_args)
 
				 	int n = STARPU_VECTOR_GET_NX(descr[0]);
			
 
				 	float y;
			
 
				 
			
 
				-	y = SASUM(n, x, 1);
			
 
				+	y = STARPU_SASUM(n, x, 1);
			
 
				 
			
 
				 	FPRINTF(stderr,"CPU finished with %f\n", y);
			
 
				 }
			
--- a/gcc-plugin/examples/cholesky/cholesky.c
+++ b/gcc-plugin/examples/cholesky/cholesky.c
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
 
				 		}
			
 
				 	}
			
 
				 	float test_mat[size * size] __heap;
			
 
				-	SSYRK("L", "N", size, size, 1.0f,
			
 
				+	STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 	      rmat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 	fprintf(stderr, "comparing results ...\n");
			
--- a/gcc-plugin/examples/cholesky/cholesky_kernels.c
+++ b/gcc-plugin/examples/cholesky/cholesky_kernels.c
@@ -42,7 +42,7 @@ static inline void chol_common_cpu_codelet_update_u22(const float *left, const f
 
				 
			
 
				 	switch (s) {
			
 
				 		case 0:
			
 
				-			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
			
 
				+			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
			
 
				 				right, ld12, 1.0f, center, ld22);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -95,7 +95,7 @@ static inline void chol_common_codelet_update_u21(const float *sub11, float *sub
 
				 {
			
 
				 	switch (s) {
			
 
				 		case 0:
			
 
				-			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				+			STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case 1:
			
@@ -153,9 +153,9 @@ static inline void chol_common_codelet_update_u11(float *sub11, unsigned nx, uns
 
				 
			
 
				 				STARPU_ASSERT(lambda11 != 0.0f);
			
 
				 
			
 
				-				SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				+				STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				 
			
 
				-				SSYR("L", nx - z - 1, -1.0f,
			
 
				+				STARPU_SSYR("L", nx - z - 1, -1.0f,
			
 
				 							&sub11[(z+1)+z*ld], 1,
			
 
				 							&sub11[(z+1)+(z+1)*ld], ld);
			
 
				 			}
			
--- a/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
+++ b/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c
@@ -203,7 +203,7 @@ void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *corr
 
				 	float *test_mat = malloc(size*size*sizeof(float));
			
 
				 	STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-	SSYRK("L", "N", size, size, 1.0f,
			
 
				+	STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 			rmat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 	FPRINTF(stderr, "[%d] comparing results ...\n", rank);
			
--- a/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c
+++ b/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c
@@ -55,7 +55,7 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
			
 
				+			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21,
			
 
				 				right, ld12, 1.0f, center, ld22);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -111,7 +111,7 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, STARPU_A
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				+			STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case 1:
			
@@ -171,9 +171,9 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_A
 
				 
			
 
				 				STARPU_ASSERT(lambda11 != 0.0f);
			
 
				 
			
 
				-				SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				+				STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				 
			
 
				-				SSYR("L", nx - z - 1, -1.0f,
			
 
				+				STARPU_SSYR("L", nx - z - 1, -1.0f,
			
 
				 							&sub11[(z+1)+z*ld], 1,
			
 
				 							&sub11[(z+1)+(z+1)*ld], ld);
			
 
				 			}
			
--- a/mpi/examples/mpi_lu/mpi_lu-double.h
+++ b/mpi/examples/mpi_lu/mpi_lu-double.h
@@ -27,16 +27,16 @@
 
				 #define CUBLAS_SWAP	cublasDswap
			
 
				 #define CUBLAS_IAMAX	cublasIdamax
			
 
				 
			
 
				-#define CPU_GEMM	DGEMM
			
 
				-#define CPU_GEMV	DGEMV
			
 
				-#define CPU_TRSM	DTRSM
			
 
				-#define CPU_SCAL	DSCAL
			
 
				-#define CPU_GER		DGER
			
 
				-#define CPU_SWAP	DSWAP
			
 
				+#define CPU_GEMM	STARPU_DGEMM
			
 
				+#define CPU_GEMV	STARPU_DGEMV
			
 
				+#define CPU_TRSM	STARPU_DTRSM
			
 
				+#define CPU_SCAL	STARPU_DSCAL
			
 
				+#define CPU_GER		STARPU_DGER
			
 
				+#define CPU_SWAP	STARPU_DSWAP
			
 
				 
			
 
				-#define CPU_TRMM	DTRMM
			
 
				-#define CPU_AXPY	DAXPY
			
 
				-#define CPU_ASUM	DASUM
			
 
				-#define CPU_IAMAX	IDAMAX
			
 
				+#define CPU_TRMM	STARPU_DTRMM
			
 
				+#define CPU_AXPY	STARPU_DAXPY
			
 
				+#define CPU_ASUM	STARPU_DASUM
			
 
				+#define CPU_IAMAX	STARPU_IDAMAX
			
 
				 
			
 
				 #define PIVOT_THRESHHOLD	10e-10
			
--- a/mpi/examples/mpi_lu/mpi_lu-float.h
+++ b/mpi/examples/mpi_lu/mpi_lu-float.h
@@ -27,16 +27,16 @@
 
				 #define CUBLAS_SWAP	cublasSswap
			
 
				 #define CUBLAS_IAMAX	cublasIsamax
			
 
				 
			
 
				-#define CPU_GEMM	SGEMM
			
 
				-#define CPU_GEMV	SGEMV
			
 
				-#define CPU_TRSM	STRSM
			
 
				-#define CPU_SCAL	SSCAL
			
 
				-#define CPU_GER		SGER
			
 
				-#define CPU_SWAP	SSWAP
			
 
				+#define CPU_GEMM	STARPU_SGEMM
			
 
				+#define CPU_GEMV	STARPU_SGEMV
			
 
				+#define CPU_TRSM	STARPU_STRSM
			
 
				+#define CPU_SCAL	STARPU_SSCAL
			
 
				+#define CPU_GER		STARPU_SGER
			
 
				+#define CPU_SWAP	STARPU_SSWAP
			
 
				 
			
 
				-#define CPU_TRMM	STRMM
			
 
				-#define CPU_AXPY	SAXPY
			
 
				-#define CPU_ASUM	SASUM
			
 
				-#define CPU_IAMAX	ISAMAX
			
 
				+#define CPU_TRMM	STARPU_STRMM
			
 
				+#define CPU_AXPY	STARPU_SAXPY
			
 
				+#define CPU_ASUM	STARPU_SASUM
			
 
				+#define CPU_IAMAX	STARPU_ISAMAX
			
 
				 
			
 
				 #define PIVOT_THRESHHOLD	10e-5
			
--- a/sc_hypervisor/examples/cholesky/cholesky_grain_tag.c
+++ b/sc_hypervisor/examples/cholesky/cholesky_grain_tag.c
@@ -406,7 +406,7 @@ int main(int argc, char **argv)
 
				 	float *test_mat = malloc(size*size*sizeof(float));
			
 
				 	STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-	SSYRK("L", "N", size, size, 1.0f,
			
 
				+	STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 				mat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 	FPRINTF(stderr, "comparing results ...\n");
			
--- a/sc_hypervisor/examples/cholesky/cholesky_implicit.c
+++ b/sc_hypervisor/examples/cholesky/cholesky_implicit.c
@@ -290,7 +290,7 @@ static void execute_cholesky(unsigned size, unsigned nblocks)
 
				 		float *test_mat = malloc(size*size*sizeof(float));
			
 
				 		STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-		SSYRK("L", "N", size, size, 1.0f,
			
 
				+		STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 					mat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 		FPRINTF(stderr, "comparing results ...\n");
			
--- a/sc_hypervisor/examples/cholesky/cholesky_kernels.c
+++ b/sc_hypervisor/examples/cholesky/cholesky_kernels.c
@@ -52,7 +52,7 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 
				 		if (worker_size == 1)
			
 
				 		{
			
 
				 			/* Sequential CPU kernel */
			
 
				-			SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
			
 
				+			STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, 
			
 
				 				right, ld12, 1.0f, center, ld22);
			
 
				 		}
			
 
				 		else
			
@@ -66,7 +66,7 @@ static inline void chol_common_cpu_codelet_update_u22(void *descr[], int s, STAR
 
				 			float *new_left = &left[block_size*rank];
			
 
				 			float *new_center = &center[block_size*rank];
			
 
				 
			
 
				-			SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, 
			
 
				+			STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, 
			
 
				 				right, ld12, 1.0f, new_center, ld22);
			
 
				 		}
			
 
				 	}
			
@@ -117,7 +117,7 @@ static inline void chol_common_codelet_update_u21(void *descr[], int s, STARPU_A
 
				 	switch (s)
			
 
				 	{
			
 
				 		case 0:
			
 
				-			STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				+			STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case 1:
			
@@ -177,9 +177,9 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, STARPU_A
 
				 
			
 
				 				STARPU_ASSERT(lambda11 != 0.0f);
			
 
				 		
			
 
				-				SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				+				STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1);
			
 
				 		
			
 
				-				SSYR("L", nx - z - 1, -1.0f, 
			
 
				+				STARPU_SSYR("L", nx - z - 1, -1.0f, 
			
 
				 							&sub11[(z+1)+z*ld], 1,
			
 
				 							&sub11[(z+1)+(z+1)*ld], ld);
			
 
				 			}
			
--- a/sc_hypervisor/examples/cholesky/cholesky_tag.c
+++ b/sc_hypervisor/examples/cholesky/cholesky_tag.c
@@ -391,7 +391,7 @@ int main(int argc, char **argv)
 
				 	float *test_mat = malloc(size*size*sizeof(float));
			
 
				 	STARPU_ASSERT(test_mat);
			
 
				 
			
 
				-	SSYRK("L", "N", size, size, 1.0f,
			
 
				+	STARPU_SSYRK("L", "N", size, size, 1.0f,
			
 
				 				mat, size, 0.0f, test_mat, size);
			
 
				 
			
 
				 	FPRINTF(stderr, "comparing results ...\n");