Pārlūkot izejas kodu

julia: Link with the openblas library shipped with Julia.

Pierre Huchant 5 gadi atpakaļ
vecāks
revīzija
66ebef73fb
3 mainītis faili ar 195 papildinājumiem un 535 dzēšanām
  1. 1 1
      julia/src/Makefile.am
  2. 78 400
      julia/src/blas.c
  3. 116 134
      julia/src/blas.h

+ 1 - 1
julia/src/Makefile.am

@@ -19,7 +19,7 @@ include $(top_srcdir)/starpu-notests.mk
 CLEANFILES = *.gcno *.gcda
 
 AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(HWLOC_CFLAGS) $(FXT_CFLAGS) -fPIC
-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@ $(FXT_LIBS)
+LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ `@JULIA@ $(top_srcdir)/julia/src/openblas_ldflags.jl`
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/julia/src
 AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS) -no-undefined
 

+ 78 - 400
julia/src/blas.c

@@ -17,500 +17,178 @@
 #include <ctype.h>
 #include <stdio.h>
 
-#include <starpu.h>
 #include "blas.h"
 
-/*
-    This files contains BLAS wrappers for the different BLAS implementations
-  (eg. REFBLAS, ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most
-  libraries do not supply C-based ordering.
- */
-
-#ifdef STARPU_ATLAS
-
-inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
-			float alpha, const float *A, int lda, const float *B, int ldb, 
-			float beta, float *C, int ldc)
-{
-	enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_TRANSPOSE tb = (toupper(transb[0]) == 'N')?CblasNoTrans:CblasTrans;
-
-	cblas_sgemm(CblasColMajor, ta, tb,
-			M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);				
-}
-
-inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
-			double alpha, double *A, int lda, double *B, int ldb, 
-			double beta, double *C, int ldc)
-{
-	enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_TRANSPOSE tb = (toupper(transb[0]) == 'N')?CblasNoTrans:CblasTrans;
-
-	cblas_dgemm(CblasColMajor, ta, tb,
-			M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);				
-}
-
-inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY)
-{
-	enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
-
-	cblas_sgemv(CblasColMajor, ta, M, N, alpha, A, lda,
-					X, incX, beta, Y, incY);
-}
-
-inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY)
-{
-	enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
-
-	cblas_dgemv(CblasColMajor, ta, M, N, alpha, A, lda,
-					X, incX, beta, Y, incY);
-}
-
-inline float STARPU_SASUM(int N, float *X, int incX)
-{
-	return cblas_sasum(N, X, incX);
-}
-
-inline double STARPU_DASUM(int N, double *X, int incX)
-{
-	return cblas_dasum(N, X, incX);
-}
-
-void STARPU_SSCAL(int N, float alpha, float *X, int incX)
-{
-	cblas_sscal(N, alpha, X, incX);
-}
-
-void STARPU_DSCAL(int N, double alpha, double *X, int incX)
-{
-	cblas_dscal(N, alpha, X, incX);
-}
-
-void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const float alpha, const float *A, const int lda,
-                   float *B, const int ldb)
+inline void STARPU_SGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, 
+			float alpha, const float *A, BLASINT lda, const float *B, BLASINT ldb, 
+			float beta, float *C, BLASINT ldc)
 {
-	enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight;
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE transa_ = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;
-
-	cblas_strsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb);
-}
-
-void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const double alpha, const double *A, const int lda,
-                   double *B, const int ldb)
-{
-	enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight;
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE transa_ = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;
-
-	cblas_dtrsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb);
-}
-
-void STARPU_SSYR (const char *uplo, const int n, const float alpha,
-                  const float *x, const int incx, float *A, const int lda)
-{
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-
-	cblas_ssyr(CblasColMajor, uplo_, n, alpha, x, incx, A, lda); 
-}
-
-void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
-                   const int k, const float alpha, const float *A,
-                   const int lda, const float beta, float *C,
-                   const int ldc)
-{
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE trans_ = (toupper(trans[0]) == 'N')?CblasNoTrans:CblasTrans;
-	
-	cblas_ssyrk(CblasColMajor, uplo_, trans_, n, k, alpha, A, lda, beta, C, ldc); 
-}
-
-void STARPU_SGER(const int m, const int n, const float alpha,
-                  const float *x, const int incx, const float *y,
-                  const int incy, float *A, const int lda)
-{
-	cblas_sger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda);
-}
-
-void STARPU_DGER(const int m, const int n, const double alpha,
-                  const double *x, const int incx, const double *y,
-                  const int incy, double *A, const int lda)
-{
-	cblas_dger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda);
-}
-
-void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
-                   const int n, const float *A, const int lda, float *x, 
-                   const int incx)
-{
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE trans_ = (toupper(trans[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;
-
-	cblas_strsv(CblasColMajor, uplo_, trans_, diag_, n, A, lda, x, incx);
-}
-
-void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const float alpha, const float *A, const int lda,
-                 float *B, const int ldb)
-{
-	enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight;
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;
-
-	cblas_strmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb);
-}
-
-void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const double alpha, const double *A, const int lda,
-                 double *B, const int ldb)
-{
-	enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight;
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;
-
-	cblas_dtrmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb);
-}
-
-void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
-                 const int n, const float *A, const int lda, float *X,
-                 const int incX)
-{
-	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
-	enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans;
-	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;
-
-	cblas_strmv(CblasColMajor, uplo_, transA_, diag_, n, A, lda, X, incX);
-}
-
-void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY)
-{
-	cblas_saxpy(n, alpha, X, incX, Y, incY);
-}
-
-void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY)
-{
-	cblas_daxpy(n, alpha, X, incX, Y, incY);
-}
-
-int STARPU_ISAMAX (const int n, float *X, const int incX)
-{
-    int retVal;
-    retVal = cblas_isamax(n, X, incX);
-    return retVal;
-}
-
-int STARPU_IDAMAX (const int n, double *X, const int incX)
-{
-    int retVal;
-    retVal = cblas_idamax(n, X, incX);
-    return retVal;
-}
-
-float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy)
-{
-	return cblas_sdot(n, x, incx, y, incy);
-}
-
-double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy)
-{
-	return cblas_ddot(n, x, incx, y, incy);
-}
-
-void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy)
-{
-	cblas_sswap(n, x, incx, y, incy);
-}
-
-void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy)
-{
-	cblas_dswap(n, x, incx, y, incy);
-}
-
-#elif defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL) || defined(STARPU_ARMPL)
-
-inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
-			float alpha, const float *A, int lda, const float *B, int ldb, 
-			float beta, float *C, int ldc)
-{
-	sgemm_(transa, transb, &M, &N, &K, &alpha,
+	sgemm_64_(transa, transb, &M, &N, &K, &alpha,
 			 A, &lda, B, &ldb,
 			 &beta, C, &ldc);	
 }
 
-inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
-			double alpha, double *A, int lda, double *B, int ldb, 
-			double beta, double *C, int ldc)
+inline void STARPU_DGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, 
+			double alpha, double *A, BLASINT lda, double *B, BLASINT ldb, 
+			double beta, double *C, BLASINT ldc)
 {
-	dgemm_(transa, transb, &M, &N, &K, &alpha,
+	dgemm_64_(transa, transb, &M, &N, &K, &alpha,
 			 A, &lda, B, &ldb,
 			 &beta, C, &ldc);	
 }
 
 
-inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
-		float *X, int incX, float beta, float *Y, int incY)
+inline void STARPU_SGEMV(char *transa, BLASINT M, BLASINT N, float alpha, float *A, BLASINT lda,
+		float *X, BLASINT incX, float beta, float *Y, BLASINT incY)
 {
-	sgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
+	sgemv_64_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
 }
 
-inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
-		double *X, int incX, double beta, double *Y, int incY)
+inline void STARPU_DGEMV(char *transa, BLASINT M, BLASINT N, double alpha, double *A, BLASINT lda,
+		double *X, BLASINT incX, double beta, double *Y, BLASINT incY)
 {
-	dgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
+	dgemv_64_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
 }
 
-inline float STARPU_SASUM(int N, float *X, int incX)
+inline float STARPU_SASUM(BLASINT N, float *X, BLASINT incX)
 {
-	return sasum_(&N, X, &incX);
+	return sasum_64_(&N, X, &incX);
 }
 
-inline double STARPU_DASUM(int N, double *X, int incX)
+inline double STARPU_DASUM(BLASINT N, double *X, BLASINT incX)
 {
-	return dasum_(&N, X, &incX);
+	return dasum_64_(&N, X, &incX);
 }
 
-void STARPU_SSCAL(int N, float alpha, float *X, int incX)
+void STARPU_SSCAL(BLASINT N, float alpha, float *X, BLASINT incX)
 {
-	sscal_(&N, &alpha, X, &incX);
+	sscal_64_(&N, &alpha, X, &incX);
 }
 
-void STARPU_DSCAL(int N, double alpha, double *X, int incX)
+void STARPU_DSCAL(BLASINT N, double alpha, double *X, BLASINT incX)
 {
-	dscal_(&N, &alpha, X, &incX);
+	dscal_64_(&N, &alpha, X, &incX);
 }
 
 void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const float alpha, const float *A, const int lda,
-                   float *B, const int ldb)
+                   const char *diag, const BLASINT m, const BLASINT n,
+                   const float alpha, const float *A, const BLASINT lda,
+                   float *B, const BLASINT ldb)
 {
-	strsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+	strsm_64_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
 }
 
 void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const double alpha, const double *A, const int lda,
-                   double *B, const int ldb)
+                   const char *diag, const BLASINT m, const BLASINT n,
+                   const double alpha, const double *A, const BLASINT lda,
+                   double *B, const BLASINT ldb)
 {
-	dtrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+	dtrsm_64_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
 }
 
-void STARPU_SSYR (const char *uplo, const int n, const float alpha,
-                  const float *x, const int incx, float *A, const int lda)
+void STARPU_SSYR (const char *uplo, const BLASINT n, const float alpha,
+                  const float *x, const BLASINT incx, float *A, const BLASINT lda)
 {
-	ssyr_(uplo, &n, &alpha, x, &incx, A, &lda); 
+	ssyr_64_(uplo, &n, &alpha, x, &incx, A, &lda); 
 }
 
-void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
-                   const int k, const float alpha, const float *A,
-                   const int lda, const float beta, float *C,
-                   const int ldc)
+void STARPU_SSYRK (const char *uplo, const char *trans, const BLASINT n,
+                   const BLASINT k, const float alpha, const float *A,
+                   const BLASINT lda, const float beta, float *C,
+                   const BLASINT ldc)
 {
-	ssyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); 
+	ssyrk_64_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); 
 }
 
-void STARPU_SGER(const int m, const int n, const float alpha,
-                  const float *x, const int incx, const float *y,
-                  const int incy, float *A, const int lda)
+void STARPU_SGER(const BLASINT m, const BLASINT n, const float alpha,
+                  const float *x, const BLASINT incx, const float *y,
+                  const BLASINT incy, float *A, const BLASINT lda)
 {
-	sger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
+	sger_64_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
 }
 
-void STARPU_DGER(const int m, const int n, const double alpha,
-                  const double *x, const int incx, const double *y,
-                  const int incy, double *A, const int lda)
+void STARPU_DGER(const BLASINT m, const BLASINT n, const double alpha,
+                  const double *x, const BLASINT incx, const double *y,
+                  const BLASINT incy, double *A, const BLASINT lda)
 {
-	dger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
+	dger_64_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
 }
 
 void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
-                   const int n, const float *A, const int lda, float *x, 
-                   const int incx)
+                   const BLASINT n, const float *A, const BLASINT lda, float *x, 
+                   const BLASINT incx)
 {
-	strsv_(uplo, trans, diag, &n, A, &lda, x, &incx);
+	strsv_64_(uplo, trans, diag, &n, A, &lda, x, &incx);
 }
 
 void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const float alpha, const float *A, const int lda,
-                 float *B, const int ldb)
+                 const char *diag, const BLASINT m, const BLASINT n,
+                 const float alpha, const float *A, const BLASINT lda,
+                 float *B, const BLASINT ldb)
 {
-	strmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+	strmm_64_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
 }
 
 void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const double alpha, const double *A, const int lda,
-                 double *B, const int ldb)
+                 const char *diag, const BLASINT m, const BLASINT n,
+                 const double alpha, const double *A, const BLASINT lda,
+                 double *B, const BLASINT ldb)
 {
-	dtrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+	dtrmm_64_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
 }
 
 void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
-                 const int n, const float *A, const int lda, float *X,
-                 const int incX)
+                 const BLASINT n, const float *A, const BLASINT lda, float *X,
+                 const BLASINT incX)
 {
-	strmv_(uplo, transA, diag, &n, A, &lda, X, &incX);
+	strmv_64_(uplo, transA, diag, &n, A, &lda, X, &incX);
 }
 
-void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY)
+void STARPU_SAXPY(const BLASINT n, const float alpha, float *X, const BLASINT incX, float *Y, const BLASINT incY)
 {
-	saxpy_(&n, &alpha, X, &incX, Y, &incY);
+	saxpy_64_(&n, &alpha, X, &incX, Y, &incY);
 }
 
-void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY)
+void STARPU_DAXPY(const BLASINT n, const double alpha, double *X, const BLASINT incX, double *Y, const BLASINT incY)
 {
-	daxpy_(&n, &alpha, X, &incX, Y, &incY);
+	daxpy_64_(&n, &alpha, X, &incX, Y, &incY);
 }
 
-int STARPU_ISAMAX (const int n, float *X, const int incX)
+BLASINT STARPU_ISAMAX (const BLASINT n, float *X, const BLASINT incX)
 {
-    int retVal;
-    retVal = isamax_ (&n, X, &incX);
+    BLASINT retVal;
+    retVal = isamax_64_ (&n, X, &incX);
     return retVal;
 }
 
-int STARPU_IDAMAX (const int n, double *X, const int incX)
+BLASINT STARPU_IDAMAX (const BLASINT n, double *X, const BLASINT incX)
 {
-    int retVal;
-    retVal = idamax_ (&n, X, &incX);
+    BLASINT retVal;
+    retVal = idamax_64_ (&n, X, &incX);
     return retVal;
 }
 
-float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy)
+float STARPU_SDOT(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy)
 {
 	float retVal = 0;
 
 	/* GOTOBLAS will return a FLOATRET which is a double, not a float */
-	retVal = (float)sdot_(&n, x, &incx, y, &incy);
+	retVal = (float)sdot_64_(&n, x, &incx, y, &incy);
 
 	return retVal;
 }
 
-double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy)
-{
-	return ddot_(&n, x, &incx, y, &incy);
-}
-
-void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY)
-{
-	sswap_(&n, X, &incX, Y, &incY);
-}
-
-void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY)
+double STARPU_DDOT(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy)
 {
-	dswap_(&n, X, &incX, Y, &incY);
+	return ddot_64_(&n, x, &incx, y, &incy);
 }
 
-#if defined(STARPU_MKL) || defined(STARPU_ARMPL)
-void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda)
+void STARPU_SSWAP(const BLASINT n, float *X, const BLASINT incX, float *Y, const BLASINT incY)
 {
-	int info = 0;
-	spotrf_(uplo, &n, a, &lda, &info);
+	sswap_64_(&n, X, &incX, Y, &incY);
 }
 
-void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda)
+void STARPU_DSWAP(const BLASINT n, double *X, const BLASINT incX, double *Y, const BLASINT incY)
 {
-	int info = 0;
-	dpotrf_(uplo, &n, a, &lda, &info);
+	dswap_64_(&n, X, &incX, Y, &incY);
 }
-#endif
-
-#elif defined(STARPU_SIMGRID)
-inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, 
-			float alpha, const float *A, int lda, const float *B, int ldb, 
-			float beta, float *C, int ldc) { }
-
-inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
-			double alpha, double *A, int lda, double *B, int ldb, 
-			double beta, double *C, int ldc) { }
-
-inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
-		float *X, int incX, float beta, float *Y, int incY) { }
-
-inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
-		double *X, int incX, double beta, double *Y, int incY) { }
-
-inline float STARPU_SASUM(int N, float *X, int incX) { return 0.; }
-
-inline double STARPU_DASUM(int N, double *X, int incX) { return 0.; }
-
-void STARPU_SSCAL(int N, float alpha, float *X, int incX) { }
-
-void STARPU_DSCAL(int N, double alpha, double *X, int incX) { }
-
-void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const float alpha, const float *A, const int lda,
-                   float *B, const int ldb) { }
-
-void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const double alpha, const double *A, const int lda,
-                   double *B, const int ldb) { }
-
-void STARPU_SSYR (const char *uplo, const int n, const float alpha,
-                  const float *x, const int incx, float *A, const int lda) { }
-
-void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
-                   const int k, const float alpha, const float *A,
-                   const int lda, const float beta, float *C,
-                   const int ldc) { }
-
-void STARPU_SGER(const int m, const int n, const float alpha,
-                  const float *x, const int incx, const float *y,
-                  const int incy, float *A, const int lda) { }
-
-void STARPU_DGER(const int m, const int n, const double alpha,
-                  const double *x, const int incx, const double *y,
-                  const int incy, double *A, const int lda) { }
-
-void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
-                   const int n, const float *A, const int lda, float *x, 
-                   const int incx) { }
-
-void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const float alpha, const float *A, const int lda,
-                 float *B, const int ldb) { }
-
-void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const double alpha, const double *A, const int lda,
-                 double *B, const int ldb) { }
-
-void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
-                 const int n, const float *A, const int lda, float *X,
-                 const int incX) { }
-
-void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { }
-
-void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { }
-
-int STARPU_ISAMAX (const int n, float *X, const int incX) { return 0; }
-
-int STARPU_IDAMAX (const int n, double *X, const int incX) { return 0; }
-
-float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { return 0.; }
-
-double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { return 0.; }
-
-void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY) { }
-
-void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY) { }
-
-void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda) { }
-
-void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda) { }
-#endif

+ 116 - 134
julia/src/blas.h

@@ -17,150 +17,132 @@
 #ifndef __BLAS_H__
 #define __BLAS_H__
 
-#include <starpu.h>
+#include <stdint.h>
 
-#if defined(STARPU_ATLAS) || defined(STARPU_HAVE_CBLAS_H)
-#include <cblas.h>
-#endif
+#define BLASINT int64_t
 
-void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, 
-		const float *B, int ldb, float beta, float *C, int ldc);
-void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, 
-		double *B, int ldb, double beta, double *C, int ldc);
-void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
-		float *X, int incX, float beta, float *Y, int incY);
-void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda,
-		double *X, int incX, double beta, double *Y, int incY);
-float STARPU_SASUM(int N, float *X, int incX);
-double STARPU_DASUM(int N, double *X, int incX);
-void STARPU_SSCAL(int N, float alpha, float *X, int incX);
-void STARPU_DSCAL(int N, double alpha, double *X, int incX);
+void STARPU_SGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, float alpha, const float *A, BLASINT lda, 
+		const float *B, BLASINT ldb, float beta, float *C, BLASINT ldc);
+void STARPU_DGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, double alpha, double *A, BLASINT lda, 
+		double *B, BLASINT ldb, double beta, double *C, BLASINT ldc);
+void STARPU_SGEMV(char *transa, BLASINT M, BLASINT N, float alpha, float *A, BLASINT lda,
+		float *X, BLASINT incX, float beta, float *Y, BLASINT incY);
+void STARPU_DGEMV(char *transa, BLASINT M, BLASINT N, double alpha, double *A, BLASINT lda,
+		double *X, BLASINT incX, double beta, double *Y, BLASINT incY);
+float STARPU_SASUM(BLASINT N, float *X, BLASINT incX);
+double STARPU_DASUM(BLASINT N, double *X, BLASINT incX);
+void STARPU_SSCAL(BLASINT N, float alpha, float *X, BLASINT incX);
+void STARPU_DSCAL(BLASINT N, double alpha, double *X, BLASINT incX);
 void STARPU_STRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const float alpha, const float *A, const int lda,
-                   float *B, const int ldb);
+                   const char *diag, const BLASINT m, const BLASINT n,
+                   const float alpha, const float *A, const BLASINT lda,
+                   float *B, const BLASINT ldb);
 void STARPU_DTRSM (const char *side, const char *uplo, const char *transa,
-                   const char *diag, const int m, const int n,
-                   const double alpha, const double *A, const int lda,
-                   double *B, const int ldb);
-void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, 
-			double alpha, double *A, int lda, double *B, int ldb, 
-			double beta, double *C, int ldc);
-void STARPU_SSYR (const char *uplo, const int n, const float alpha,
-                  const float *x, const int incx, float *A, const int lda);
-void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
-                   const int k, const float alpha, const float *A,
-                   const int lda, const float beta, float *C,
-                   const int ldc);
-void STARPU_SGER (const int m, const int n, const float alpha,
-                  const float *x, const int incx, const float *y,
-                  const int incy, float *A, const int lda);
-void STARPU_DGER(const int m, const int n, const double alpha,
-                  const double *x, const int incx, const double *y,
-                  const int incy, double *A, const int lda);
+                   const char *diag, const BLASINT m, const BLASINT n,
+                   const double alpha, const double *A, const BLASINT lda,
+                   double *B, const BLASINT ldb);
+void STARPU_SSYR (const char *uplo, const BLASINT n, const float alpha,
+                  const float *x, const BLASINT incx, float *A, const BLASINT lda);
+void STARPU_SSYRK (const char *uplo, const char *trans, const BLASINT n,
+                   const BLASINT k, const float alpha, const float *A,
+                   const BLASINT lda, const float beta, float *C,
+                   const BLASINT ldc);
+void STARPU_SGER (const BLASINT m, const BLASINT n, const float alpha,
+                  const float *x, const BLASINT incx, const float *y,
+                  const BLASINT incy, float *A, const BLASINT lda);
+void STARPU_DGER(const BLASINT m, const BLASINT n, const double alpha,
+                  const double *x, const BLASINT incx, const double *y,
+                  const BLASINT incy, double *A, const BLASINT lda);
 void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, 
-                   const int n, const float *A, const int lda, float *x, 
-                   const int incx);
+                   const BLASINT n, const float *A, const BLASINT lda, float *x, 
+                   const BLASINT incx);
 void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const float alpha, const float *A, const int lda,
-                 float *B, const int ldb);
+                 const char *diag, const BLASINT m, const BLASINT n,
+                 const float alpha, const float *A, const BLASINT lda,
+                 float *B, const BLASINT ldb);
 void STARPU_DTRMM(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int m, const int n,
-                 const double alpha, const double *A, const int lda,
-                 double *B, const int ldb);
+                 const char *diag, const BLASINT m, const BLASINT n,
+                 const double alpha, const double *A, const BLASINT lda,
+                 double *B, const BLASINT ldb);
 void STARPU_STRMV(const char *uplo, const char *transA, const char *diag,
-                 const int n, const float *A, const int lda, float *X,
-                 const int incX);
-void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incy);
-void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY);
-int STARPU_ISAMAX (const int n, float *X, const int incX);
-int STARPU_IDAMAX (const int n, double *X, const int incX);
-float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy);
-double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy);
-void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy);
-void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy);
+                 const BLASINT n, const float *A, const BLASINT lda, float *X,
+                 const BLASINT incX);
+void STARPU_SAXPY(const BLASINT n, const float alpha, float *X, const BLASINT incX, float *Y, const BLASINT incy);
+void STARPU_DAXPY(const BLASINT n, const double alpha, double *X, const BLASINT incX, double *Y, const BLASINT incY);
+BLASINT STARPU_ISAMAX (const BLASINT n, float *X, const BLASINT incX);
+BLASINT STARPU_IDAMAX (const BLASINT n, double *X, const BLASINT incX);
+float STARPU_SDOT(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy);
+double STARPU_DDOT(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy);
+void STARPU_SSWAP(const BLASINT n, float *x, const BLASINT incx, float *y, const BLASINT incy);
+void STARPU_DSWAP(const BLASINT n, double *x, const BLASINT incx, double *y, const BLASINT incy);
 
-#if defined(STARPU_MKL) || defined(STARPU_ARMPL)
-void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda);
-void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda);
-#endif
 
-#if defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL) || defined(STARPU_ARMPL)
-
-extern void sgemm_ (const char *transa, const char *transb, const int *m,
-                   const int *n, const int *k, const float *alpha, 
-                   const float *A, const int *lda, const float *B, 
-                   const int *ldb, const float *beta, float *C, 
-                   const int *ldc);
-extern void dgemm_ (const char *transa, const char *transb, const int *m,
-                   const int *n, const int *k, const double *alpha, 
-                   const double *A, const int *lda, const double *B, 
-                   const int *ldb, const double *beta, double *C, 
-                   const int *ldc);
-extern void sgemv_(const char *trans, const int *m, const int *n, const float *alpha,
-                   const float *a, const int *lda, const float *x, const int *incx, 
-                   const float *beta, float *y, const int *incy);
-extern void dgemv_(const char *trans, const int *m, const int *n, const double *alpha,
-                   const double *a, const int *lda, const double *x, const int *incx,
-                   const double *beta, double *y, const int *incy);
-extern void ssyr_ (const char *uplo, const int *n, const float *alpha,
-                  const float *x, const int *incx, float *A, const int *lda);
-extern void ssyrk_ (const char *uplo, const char *trans, const int *n,
-                   const int *k, const float *alpha, const float *A,
-                   const int *lda, const float *beta, float *C,
-                   const int *ldc);
-extern void strsm_ (const char *side, const char *uplo, const char *transa, 
-                   const char *diag, const int *m, const int *n,
-                   const float *alpha, const float *A, const int *lda,
-                   float *B, const int *ldb);
-extern void dtrsm_ (const char *side, const char *uplo, const char *transa, 
-                   const char *diag, const int *m, const int *n,
-                   const double *alpha, const double *A, const int *lda,
-                   double *B, const int *ldb);
-extern double sasum_ (const int *n, const float *x, const int *incx);
-extern double dasum_ (const int *n, const double *x, const int *incx);
-extern void sscal_ (const int *n, const float *alpha, float *x,
-                   const int *incx);
-extern void dscal_ (const int *n, const double *alpha, double *x,
-                   const int *incx);
-extern void sger_(const int *m, const int *n, const float *alpha,
-                  const float *x, const int *incx, const float *y,
-                  const int *incy, float *A, const int *lda);
-extern void dger_(const int *m, const int *n, const double *alpha,
-                  const double *x, const int *incx, const double *y,
-                  const int *incy, double *A, const int *lda);
-extern void strsv_ (const char *uplo, const char *trans, const char *diag, 
-                   const int *n, const float *A, const int *lda, float *x, 
-                   const int *incx);
-extern void strmm_(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int *m, const int *n,
-                 const float *alpha, const float *A, const int *lda,
-                 float *B, const int *ldb);
-extern void dtrmm_(const char *side, const char *uplo, const char *transA,
-                 const char *diag, const int *m, const int *n,
-                 const double *alpha, const double *A, const int *lda,
-                 double *B, const int *ldb);
-extern void strmv_(const char *uplo, const char *transA, const char *diag,
-                 const int *n, const float *A, const int *lda, float *X,
-                 const int *incX);
-extern void saxpy_(const int *n, const float *alpha, const float *X, const int *incX,
-		float *Y, const int *incy);
-extern void daxpy_(const int *n, const double *alpha, const double *X, const int *incX,
-		double *Y, const int *incy);
-extern int isamax_(const int *n, const float *X, const int *incX);
-extern int idamax_(const int *n, const double *X, const int *incX);
+extern void sgemm_64_ (const char *transa, const char *transb, const BLASINT *m,
+                   const BLASINT *n, const BLASINT *k, const float *alpha, 
+                   const float *A, const BLASINT *lda, const float *B, 
+                   const BLASINT *ldb, const float *beta, float *C, 
+                   const BLASINT *ldc);
+extern void dgemm_64_ (const char *transa, const char *transb, const BLASINT *m,
+                   const BLASINT *n, const BLASINT *k, const double *alpha, 
+                   const double *A, const BLASINT *lda, const double *B, 
+                   const BLASINT *ldb, const double *beta, double *C, 
+                   const BLASINT *ldc);
+extern void sgemv_64_(const char *trans, const BLASINT *m, const BLASINT *n, const float *alpha,
+                   const float *a, const BLASINT *lda, const float *x, const BLASINT *incx, 
+                   const float *beta, float *y, const BLASINT *incy);
+extern void dgemv_64_(const char *trans, const BLASINT *m, const BLASINT *n, const double *alpha,
+                   const double *a, const BLASINT *lda, const double *x, const BLASINT *incx,
+                   const double *beta, double *y, const BLASINT *incy);
+extern void ssyr_64_ (const char *uplo, const BLASINT *n, const float *alpha,
+                  const float *x, const BLASINT *incx, float *A, const BLASINT *lda);
+extern void ssyrk_64_ (const char *uplo, const char *trans, const BLASINT *n,
+                   const BLASINT *k, const float *alpha, const float *A,
+                   const BLASINT *lda, const float *beta, float *C,
+                   const BLASINT *ldc);
+extern void strsm_64_ (const char *side, const char *uplo, const char *transa, 
+                   const char *diag, const BLASINT *m, const BLASINT *n,
+                   const float *alpha, const float *A, const BLASINT *lda,
+                   float *B, const BLASINT *ldb);
+extern void dtrsm_64_ (const char *side, const char *uplo, const char *transa, 
+                   const char *diag, const BLASINT *m, const BLASINT *n,
+                   const double *alpha, const double *A, const BLASINT *lda,
+                   double *B, const BLASINT *ldb);
+extern double sasum_64_ (const BLASINT *n, const float *x, const BLASINT *incx);
+extern double dasum_64_ (const BLASINT *n, const double *x, const BLASINT *incx);
+extern void sscal_64_ (const BLASINT *n, const float *alpha, float *x,
+                   const BLASINT *incx);
+extern void dscal_64_ (const BLASINT *n, const double *alpha, double *x,
+                   const BLASINT *incx);
+extern void sger_64_(const BLASINT *m, const BLASINT *n, const float *alpha,
+                  const float *x, const BLASINT *incx, const float *y,
+                  const BLASINT *incy, float *A, const BLASINT *lda);
+extern void dger_64_(const BLASINT *m, const BLASINT *n, const double *alpha,
+                  const double *x, const BLASINT *incx, const double *y,
+                  const BLASINT *incy, double *A, const BLASINT *lda);
+extern void strsv_64_ (const char *uplo, const char *trans, const char *diag, 
+                   const BLASINT *n, const float *A, const BLASINT *lda, float *x, 
+                   const BLASINT *incx);
+extern void strmm_64_(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const BLASINT *m, const BLASINT *n,
+                 const float *alpha, const float *A, const BLASINT *lda,
+                 float *B, const BLASINT *ldb);
+extern void dtrmm_64_(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const BLASINT *m, const BLASINT *n,
+                 const double *alpha, const double *A, const BLASINT *lda,
+                 double *B, const BLASINT *ldb);
+extern void strmv_64_(const char *uplo, const char *transA, const char *diag,
+                 const BLASINT *n, const float *A, const BLASINT *lda, float *X,
+                 const BLASINT *incX);
+extern void saxpy_64_(const BLASINT *n, const float *alpha, const float *X, const BLASINT *incX,
+		float *Y, const BLASINT *incy);
+extern void daxpy_64_(const BLASINT *n, const double *alpha, const double *X, const BLASINT *incX,
+		double *Y, const BLASINT *incy);
+extern BLASINT isamax_64_(const BLASINT *n, const float *X, const BLASINT *incX);
+extern BLASINT idamax_64_(const BLASINT *n, const double *X, const BLASINT *incX);
 /* for some reason, FLOATRET is not a float but a double in GOTOBLAS */
-extern double sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy);
-extern double ddot_(const int *n, const double *x, const int *incx, const double *y, const int *incy);
-extern void sswap_(const int *n, float *x, const int *incx, float *y, const int *incy);
-extern void dswap_(const int *n, double *x, const int *incx, double *y, const int *incy);
-
-#if (defined STARPU_MKL) || (defined STARPU_ARMPL)
-extern void spotrf_(const char*uplo, const int *n, float *a, const int *lda, int *info);
-extern void dpotrf_(const char*uplo, const int *n, double *a, const int *lda, int *info);
-#endif
-
-#endif
+extern double sdot_64_(const BLASINT *n, const float *x, const BLASINT *incx, const float *y, const BLASINT *incy);
+extern double ddot_64_(const BLASINT *n, const double *x, const BLASINT *incx, const double *y, const BLASINT *incy);
+extern void sswap_64_(const BLASINT *n, float *x, const BLASINT *incx, float *y, const BLASINT *incy);
+extern void dswap_64_(const BLASINT *n, double *x, const BLASINT *incx, double *y, const BLASINT *incy);
 
 #endif /* __BLAS_H__ */