Browse Source

- add support for complex number cases in the LU example

Olivier Aumage 13 years ago
parent
commit
66e35b6222

+ 66 - 0
examples/Makefile.am

@@ -108,6 +108,9 @@ noinst_HEADERS = 				\
 	lu/xlu_kernels.h			\
 	lu/float.h				\
 	lu/double.h				\
+	lu/complex_float.h			\
+	lu/complex_double.h			\
+	lu/blas_complex.h			\
 	cholesky/cholesky.h			\
 	common/blas_model.h			\
 	common/blas.h				\
@@ -191,6 +194,14 @@ examplebin_PROGRAMS +=				\
 	cg/cg
 endif
 
+if MKL_BLAS_LIB
+examplebin_PROGRAMS +=				\
+	lu/lu_example_complex_float		\
+	lu/lu_example_complex_double		\
+	lu/lu_implicit_example_complex_float	\
+	lu/lu_implicit_example_complex_double
+endif
+
 if ATLAS_BLAS_LIB
 examplebin_PROGRAMS +=				\
 	spmv/dw_block_spmv
@@ -240,6 +251,14 @@ STARPU_EXAMPLES +=				\
 	cg/cg
 endif
 
+if MKL_BLAS_LIB
+STARPU_EXAMPLES +=				\
+	lu/lu_example_complex_float		\
+	lu/lu_example_complex_double		\
+	lu/lu_implicit_example_complex_float	\
+	lu/lu_implicit_example_complex_double
+endif
+
 if ATLAS_BLAS_LIB
 STARPU_EXAMPLES +=				\
 	spmv/dw_block_spmv
@@ -462,6 +481,53 @@ lu_lu_implicit_example_double_SOURCES =		\
 
 lu_lu_implicit_example_double_LDADD =		\
 	$(STARPU_BLAS_LDFLAGS)
+
+if MKL_BLAS_LIB
+lu_lu_example_complex_float_SOURCES =		\
+	lu/lu_example_complex_float.c		\
+	lu/clu.c				\
+	lu/clu_pivot.c				\
+	lu/clu_kernels.c			\
+	lu/blas_complex.c			\
+	common/blas.c
+
+lu_lu_example_complex_float_LDADD =		\
+	$(STARPU_BLAS_LDFLAGS)
+
+lu_lu_implicit_example_complex_float_SOURCES =	\
+	lu/lu_example_complex_float.c		\
+	lu/clu_implicit.c			\
+	lu/clu_implicit_pivot.c			\
+	lu/clu_kernels.c			\
+	lu/blas_complex.c			\
+	common/blas.c
+
+lu_lu_implicit_example_complex_float_LDADD =	\
+	$(STARPU_BLAS_LDFLAGS)
+
+lu_lu_example_complex_double_SOURCES =		\
+	lu/lu_example_complex_double.c		\
+	lu/zlu.c				\
+	lu/zlu_pivot.c				\
+	lu/zlu_kernels.c			\
+	lu/blas_complex.c			\
+	common/blas.c
+
+lu_lu_example_complex_double_LDADD =		\
+	$(STARPU_BLAS_LDFLAGS)
+
+lu_lu_implicit_example_complex_double_SOURCES =	\
+	lu/lu_example_complex_double.c		\
+	lu/zlu_implicit.c			\
+	lu/zlu_implicit_pivot.c			\
+	lu/zlu_kernels.c			\
+	lu/blas_complex.c			\
+	common/blas.c
+
+lu_lu_implicit_example_complex_double_LDADD =	\
+	$(STARPU_BLAS_LDFLAGS)
+
+endif
 endif
 
 

+ 214 - 0
examples/lu/blas_complex.c

@@ -0,0 +1,214 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <complex.h>
+
+#include <starpu.h>
+#include "blas_complex.h"
+
+/*
+    This files contains BLAS wrappers for the different BLAS implementations
+  (eg. REFBLAS, STARPU_ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most
+  libraries do not supply C-based ordering.
+ */
+
+#ifdef STARPU_ATLAS
+#error not implemented
+#elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
+#error not implemented
+#elif defined(STARPU_MKL)
+
+inline void CGEMM(char *transa, char *transb, int M, int N, int K, 
+			complex float alpha, complex float *A, int lda, complex float *B, int ldb, 
+			complex float beta, complex float *C, int ldc)
+{
+	cgemm_(transa, transb, &M, &N, &K, &alpha,
+			 A, &lda, B, &ldb,
+			 &beta, C, &ldc);	
+}
+
+inline void ZGEMM(char *transa, char *transb, int M, int N, int K, 
+			complex double alpha, complex double *A, int lda, complex double *B, int ldb, 
+			complex double beta, complex double *C, int ldc)
+{
+	zgemm_(transa, transb, &M, &N, &K, &alpha,
+			 A, &lda, B, &ldb,
+			 &beta, C, &ldc);	
+}
+
+
+inline void CGEMV(char *transa, int M, int N, complex float alpha, complex float *A, int lda,
+		complex float *X, int incX, complex float beta, complex float *Y, int incY)
+{
+	cgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
+}
+
+inline void ZGEMV(char *transa, int M, int N, complex double alpha, complex double *A, int lda,
+		complex double *X, int incX, complex double beta, complex double *Y, int incY)
+{
+	zgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
+}
+
+inline float SCASUM(int N, complex float *X, int incX)
+{
+	return scasum_(&N, X, &incX);
+}
+
+inline double DZASUM(int N, complex double *X, int incX)
+{
+	return dzasum_(&N, X, &incX);
+}
+
+void CSCAL(int N, complex float alpha, complex float *X, int incX)
+{
+	cscal_(&N, &alpha, X, &incX);
+}
+
+void ZSCAL(int N, complex double alpha, complex double *X, int incX)
+{
+	zscal_(&N, &alpha, X, &incX);
+}
+
+void CTRSM (const char *side, const char *uplo, const char *transa,
+                   const char *diag, const int m, const int n,
+                   const complex float alpha, const complex float *A, const int lda,
+                   complex float *B, const int ldb)
+{
+	ctrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+}
+
+void ZTRSM (const char *side, const char *uplo, const char *transa,
+                   const char *diag, const int m, const int n,
+                   const complex double alpha, const complex double *A, const int lda,
+                   complex double *B, const int ldb)
+{
+	ztrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+}
+
+void CSYR (const char *uplo, const int n, const complex float alpha,
+                  const complex float *x, const int incx, complex float *A, const int lda)
+{
+	csyr_(uplo, &n, &alpha, x, &incx, A, &lda); 
+}
+
+void CSYRK (const char *uplo, const char *trans, const int n,
+                   const int k, const complex float alpha, const complex float *A,
+                   const int lda, const complex float beta, complex float *C,
+                   const int ldc)
+{
+	csyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); 
+}
+
+void CGERU(const int m, const int n, const complex float alpha,
+                  const complex float *x, const int incx, const complex float *y,
+                  const int incy, complex float *A, const int lda)
+{
+	cgeru_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
+}
+
+void ZGERU(const int m, const int n, const complex double alpha,
+                  const complex double *x, const int incx, const complex double *y,
+                  const int incy, complex double *A, const int lda)
+{
+	zgeru_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
+}
+
+void CTRSV (const char *uplo, const char *trans, const char *diag, 
+                   const int n, const complex float *A, const int lda, complex float *x, 
+                   const int incx)
+{
+	ctrsv_(uplo, trans, diag, &n, A, &lda, x, &incx);
+}
+
+void CTRMM(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const int m, const int n,
+                 const complex float alpha, const complex float *A, const int lda,
+                 complex float *B, const int ldb)
+{
+	ctrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+}
+
+void ZTRMM(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const int m, const int n,
+                 const complex double alpha, const complex double *A, const int lda,
+                 complex double *B, const int ldb)
+{
+	ztrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
+}
+
+void CTRMV(const char *uplo, const char *transA, const char *diag,
+                 const int n, const complex float *A, const int lda, complex float *X,
+                 const int incX)
+{
+	ctrmv_(uplo, transA, diag, &n, A, &lda, X, &incX);
+}
+
+void CAXPY(const int n, const complex float alpha, complex float *X, const int incX, complex float *Y, const int incY)
+{
+	caxpy_(&n, &alpha, X, &incX, Y, &incY);
+}
+
+void ZAXPY(const int n, const complex double alpha, complex double *X, const int incX, complex double *Y, const int incY)
+{
+	zaxpy_(&n, &alpha, X, &incX, Y, &incY);
+}
+
+int ICAMAX (const int n, complex float *X, const int incX)
+{
+    int retVal;
+    retVal = icamax_ (&n, X, &incX);
+    return retVal;
+}
+
+int IZAMAX (const int n, complex double *X, const int incX)
+{
+    int retVal;
+    retVal = izamax_ (&n, X, &incX);
+    return retVal;
+}
+
+complex float CDOTU(const int n, const complex float *x, const int incx, const complex float *y, const int incy)
+{
+	complex float retVal = 0;
+
+	/* GOTOBLAS will return a FLOATRET which is a double, not a float */
+	retVal = (float)cdotu_(&n, x, &incx, y, &incy);
+
+	return retVal;
+}
+
+complex double ZDOTU(const int n, const complex double *x, const int incx, const complex double *y, const int incy)
+{
+	return zdotu_(&n, x, &incx, y, &incy);
+}
+
+void CSWAP(const int n, complex float *X, const int incX, complex float *Y, const int incY)
+{
+	cswap_(&n, X, &incX, Y, &incY);
+}
+
+void ZSWAP(const int n, complex double *X, const int incX, complex double *Y, const int incY)
+{
+	zswap_(&n, X, &incX, Y, &incY);
+}
+
+
+#else
+#error "no BLAS lib available..."
+#endif

+ 156 - 0
examples/lu/blas_complex.h

@@ -0,0 +1,156 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __BLAS_H__
+#define __BLAS_H__
+
+#include <starpu.h>
+#if defined(STARPU_MKL)
+#define MKLcomplex8 complex float
+#define MKLcomplex16 complex double
+#endif
+
+void CGEMM(char *transa, char *transb, int M, int N, int K, complex float alpha, complex float *A, int lda, 
+		complex float *B, int ldb, complex float beta, complex float *C, int ldc);
+void ZGEMM(char *transa, char *transb, int M, int N, int K, complex double alpha, complex double *A, int lda, 
+		complex double *B, int ldb, complex double beta, complex double *C, int ldc);
+void CGEMV(char *transa, int M, int N, complex float alpha, complex float *A, int lda,
+		complex float *X, int incX, complex float beta, complex float *Y, int incY);
+void ZGEMV(char *transa, int M, int N, complex double alpha, complex double *A, int lda,
+		complex double *X, int incX, complex double beta, complex double *Y, int incY);
+float SCASUM(int N, complex float *X, int incX);
+double DZASUM(int N, complex double *X, int incX);
+void CSCAL(int N, complex float alpha, complex float *X, int incX);
+void ZSCAL(int N, complex double alpha, complex double *X, int incX);
+void CTRSM (const char *side, const char *uplo, const char *transa,
+                   const char *diag, const int m, const int n,
+                   const complex float alpha, const complex float *A, const int lda,
+                   complex float *B, const int ldb);
+void ZTRSM (const char *side, const char *uplo, const char *transa,
+                   const char *diag, const int m, const int n,
+                   const complex double alpha, const complex double *A, const int lda,
+                   complex double *B, const int ldb);
+void CSYR (const char *uplo, const int n, const complex float alpha,
+                  const complex float *x, const int incx, complex float *A, const int lda);
+void CSYRK (const char *uplo, const char *trans, const int n,
+                   const int k, const complex float alpha, const complex float *A,
+                   const int lda, const complex float beta, complex float *C,
+                   const int ldc);
+void CGERU (const int m, const int n, const complex float alpha,
+                  const complex float *x, const int incx, const complex float *y,
+                  const int incy, complex float *A, const int lda);
+void ZGERU(const int m, const int n, const complex double alpha,
+                  const complex double *x, const int incx, const complex double *y,
+                  const int incy, complex double *A, const int lda);
+void CTRSV (const char *uplo, const char *trans, const char *diag, 
+                   const int n, const complex float *A, const int lda, complex float *x, 
+                   const int incx);
+void CTRMM(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const int m, const int n,
+                 const complex float alpha, const complex float *A, const int lda,
+                 complex float *B, const int ldb);
+void ZTRMM(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const int m, const int n,
+                 const complex double alpha, const complex double *A, const int lda,
+                 complex double *B, const int ldb);
+void CTRMV(const char *uplo, const char *transA, const char *diag,
+                 const int n, const complex float *A, const int lda, complex float *X,
+                 const int incX);
+void CAXPY(const int n, const complex float alpha, complex float *X, const int incX, complex float *Y, const int incy);
+void ZAXPY(const int n, const complex double alpha, complex double *X, const int incX, complex double *Y, const int incY);
+int ICAMAX (const int n, complex float *X, const int incX);
+int IZAMAX (const int n, complex double *X, const int incX);
+complex float CDOTU(const int n, const complex float *x, const int incx, const complex float *y, const int incy);
+complex double ZDOTU(const int n, const complex double *x, const int incx, const complex double *y, const int incy);
+void CSWAP(const int n, complex float *x, const int incx, complex float *y, const int incy);
+void ZSWAP(const int n, complex double *x, const int incx, complex double *y, const int incy);
+
+#if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS)
+#error not implemented
+#elif defined(STARPU_MKL)
+
+extern void cgemm_ (const char *transa, const char *transb, const int *m,
+                   const int *n, const int *k, const complex float *alpha, 
+                   const complex float *A, const int *lda, const complex float *B, 
+                   const int *ldb, const complex float *beta, complex float *C, 
+                   const int *ldc);
+extern void zgemm_ (const char *transa, const char *transb, const int *m,
+                   const int *n, const int *k, const complex double *alpha, 
+                   const complex double *A, const int *lda, const complex double *B, 
+                   const int *ldb, const complex double *beta, complex double *C, 
+                   const int *ldc);
+extern void cgemv_(const char *trans, int *m, int *n, complex float *alpha,
+                   void *a, int *lda, void *x, int *incx, 
+                   complex float *beta, void *y, int *incy);
+extern void zgemv_(const char *trans, int *m, int *n, complex double *alpha,
+                   void *a, int *lda, void *x, int *incx,
+                   complex double *beta, void *y, int *incy);
+extern void csyr_ (const char *uplo, const int *n, const complex float *alpha,
+                  const complex float *x, const int *incx, complex float *A, const int *lda);
+extern void csyrk_ (const char *uplo, const char *trans, const int *n,
+                   const int *k, const complex float *alpha, const complex float *A,
+                   const int *lda, const complex float *beta, complex float *C,
+                   const int *ldc);
+extern void ctrsm_ (const char *side, const char *uplo, const char *transa, 
+                   const char *diag, const int *m, const int *n,
+                   const complex float *alpha, const complex float *A, const int *lda,
+                   complex float *B, const int *ldb);
+extern void ztrsm_ (const char *side, const char *uplo, const char *transa, 
+                   const char *diag, const int *m, const int *n,
+                   const complex double *alpha, const complex double *A, const int *lda,
+                   complex double *B, const int *ldb);
+extern complex double scasum_ (const int *n, const complex float *x, const int *incx);
+extern complex double dzasum_ (const int *n, const complex double *x, const int *incx);
+extern void cscal_ (const int *n, const complex float *alpha, complex float *x,
+                   const int *incx);
+extern void zscal_ (const int *n, const complex double *alpha, complex double *x,
+                   const int *incx);
+extern void cgeru_(const int *m, const int *n, const complex float *alpha,
+                  const complex float *x, const int *incx, const complex float *y,
+                  const int *incy, complex float *A, const int *lda);
+extern void zgeru_(const int *m, const int *n, const complex double *alpha,
+                  const complex double *x, const int *incx, const complex double *y,
+                  const int *incy, complex double *A, const int *lda);
+extern void ctrsv_ (const char *uplo, const char *trans, const char *diag, 
+                   const int *n, const complex float *A, const int *lda, complex float *x, 
+                   const int *incx);
+extern void ctrmm_(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const int *m, const int *n,
+                 const complex float *alpha, const complex float *A, const int *lda,
+                 complex float *B, const int *ldb);
+extern void ztrmm_(const char *side, const char *uplo, const char *transA,
+                 const char *diag, const int *m, const int *n,
+                 const complex double *alpha, const complex double *A, const int *lda,
+                 complex double *B, const int *ldb);
+extern void ctrmv_(const char *uplo, const char *transA, const char *diag,
+                 const int *n, const complex float *A, const int *lda, complex float *X,
+                 const int *incX);
+extern void caxpy_(const int *n, const complex float *alpha, complex float *X, const int *incX,
+		complex float *Y, const int *incy);
+extern void zaxpy_(const int *n, const complex double *alpha, complex double *X, const int *incX,
+		complex double *Y, const int *incy);
+extern int icamax_(const int *n, complex float *X, const int *incX);
+extern int izamax_(const int *n, complex double *X, const int *incX);
+/* for some reason, FLOATRET is not a float but a double in GOTOBLAS */
+extern complex double cdotu_(const int *n, const complex float *x, const int *incx, const complex float *y, const int *incy);
+extern complex double zdotu_(const int *n, const complex double *x, const int *incx, const complex double *y, const int *incy);
+extern void cswap_(const int *n, complex float *x, const int *incx, complex float *y, const int *incy);
+extern void zswap_(const int *n, complex double *x, const int *incx, complex double *y, const int *incy);
+
+#endif
+
+#endif /* __BLAS_COMPLEX_H__ */

+ 19 - 0
examples/lu/clu.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_float.h"
+#include "xlu.c"

+ 19 - 0
examples/lu/clu_implicit.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_float.h"
+#include "xlu_implicit.c"

+ 19 - 0
examples/lu/clu_implicit_pivot.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_float.h"
+#include "xlu_implicit_pivot.c"

+ 19 - 0
examples/lu/clu_kernels.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_float.h"
+#include "xlu_kernels.c"

+ 19 - 0
examples/lu/clu_pivot.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_float.h"
+#include "xlu_pivot.c"

+ 52 - 0
examples/lu/complex_double.h

@@ -0,0 +1,52 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <complex.h>
+#include "blas_complex.h"
+#define TYPE complex double
+#define CUBLAS_TYPE cuDoubleComplex
+
+#define STARPU_LU(name)       starpu_zlu_##name
+#define COMPLEX_LU
+
+#ifdef STARPU_HAVE_MAGMA
+#include <magmablas.h>
+#define CUBLAS_GEMM	magmablas_zgemm
+#define CUBLAS_TRSM	magmablas_ztrsm
+#else
+#define CUBLAS_GEMM	cublasZgemm
+#define CUBLAS_TRSM	cublasZtrsm
+#endif
+
+#define CUBLAS_SCAL	cublasZscal
+#define CUBLAS_GER	cublasZgeru
+#define CUBLAS_SWAP	cublasZswap
+#define CUBLAS_IAMAX	cublasIzamax
+
+#define CPU_GEMM	ZGEMM
+#define CPU_TRSM	ZTRSM
+#define CPU_SCAL	ZSCAL
+#define CPU_GER		ZGERU
+#define CPU_SWAP	ZSWAP
+
+#define CPU_TRMM	ZTRMM
+#define CPU_AXPY	ZAXPY
+#define CPU_ASUM	DZASUM
+#define CPU_IAMAX	IZAMAX
+
+#define PIVOT_THRESHHOLD	10e-5

+ 52 - 0
examples/lu/complex_float.h

@@ -0,0 +1,52 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <complex.h>
+#include "blas_complex.h"
+#define TYPE complex float
+#define CUBLAS_TYPE cuComplex
+
+#define STARPU_LU(name)       starpu_clu_##name
+#define COMPLEX_LU
+
+#ifdef STARPU_HAVE_MAGMA
+#include <magmablas.h>
+#define CUBLAS_GEMM	magmablas_cgemm
+#define CUBLAS_TRSM	magmablas_ctrsm
+#else
+#define CUBLAS_GEMM	cublasCgemm
+#define CUBLAS_TRSM	cublasCtrsm
+#endif
+
+#define CUBLAS_SCAL	cublasCscal
+#define CUBLAS_GER	cublasCgeru
+#define CUBLAS_SWAP	cublasCswap
+#define CUBLAS_IAMAX	cublasIcamax
+
+#define CPU_GEMM	CGEMM
+#define CPU_TRSM	CTRSM
+#define CPU_SCAL	CSCAL
+#define CPU_GER		CGERU
+#define CPU_SWAP	CSWAP
+
+#define CPU_TRMM	CTRMM
+#define CPU_AXPY	CAXPY
+#define CPU_ASUM	SCASUM
+#define CPU_IAMAX	ICAMAX
+
+#define PIVOT_THRESHHOLD	10e-5

+ 1 - 0
examples/lu/double.h

@@ -16,6 +16,7 @@
  */
 
 #define TYPE double
+#define CUBLAS_TYPE TYPE
 
 #define STARPU_LU(name)       starpu_dlu_##name
 

+ 1 - 0
examples/lu/float.h

@@ -17,6 +17,7 @@
 
 
 #define TYPE float
+#define CUBLAS_TYPE TYPE
 
 #define STARPU_LU(name)       starpu_slu_##name
 

+ 13 - 0
examples/lu/lu_example.c

@@ -164,6 +164,10 @@ static void init_matrix(void)
 		for (i = 0; i < size; i++)
 		{
 			A[i + j*size] = (TYPE)starpu_drand48();
+#ifdef COMPLEX_LU
+			/* also randomize the imaginary component for complex number cases */
+			A[i + j*size] += (TYPE)(I*starpu_drand48());
+#endif
 		}
 	}
 
@@ -249,11 +253,20 @@ static void check_result(void)
 	CPU_AXPY(size*size, -1.0, A_saved, 1, L, 1);
 	display_matrix(L, size, size, "Residuals");
 	
+#ifdef COMPLEX_LU
+	double err = CPU_ASUM(size*size, L, 1);
+	int max = CPU_IAMAX(size*size, L, 1);
+	TYPE l_max = L[max];
+
+	FPRINTF(stderr, "Avg error : %e\n", err/(size*size));
+	FPRINTF(stderr, "Max error : %e\n", sqrt(creal(l_max)*creal(l_max)+cimag(l_max)*cimag(l_max)));
+#else
 	TYPE err = CPU_ASUM(size*size, L, 1);
 	int max = CPU_IAMAX(size*size, L, 1);
 
 	FPRINTF(stderr, "Avg error : %e\n", err/(size*size));
 	FPRINTF(stderr, "Max error : %e\n", L[max]);
+#endif
 
 	double residual = frobenius_norm(L, size);
 	double matnorm = frobenius_norm(A_saved, size);

+ 19 - 0
examples/lu/lu_example_complex_double.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_double.h"
+#include "lu_example.c"

+ 19 - 0
examples/lu/lu_example_complex_float.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_float.h"
+#include "lu_example.c"

+ 28 - 18
examples/lu/xlu_kernels.c

@@ -22,6 +22,11 @@
 #define xstr(s)        str(s)
 #define STARPU_LU_STR(name)  xstr(STARPU_LU(name))
 
+#ifdef STARPU_USE_CUDA
+static const TYPE p1 =  1.0f;
+static const TYPE m1 = -1.0f;
+#endif
+
 /*
  *   U22 
  */
@@ -54,10 +59,10 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 			break;
 
 #ifdef STARPU_USE_CUDA
-		case 1:
+		case 1: {
 			CUBLAS_GEMM('n', 'n', dx, dy, dz,
-				(TYPE)-1.0, right, ld21, left, ld12,
-				(TYPE)1.0f, center, ld22);
+				*(CUBLAS_TYPE*)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12,
+				*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE *)center, ld22);
 
 			status = cublasGetError();
 			if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS))
@@ -67,6 +72,7 @@ static inline void STARPU_LU(common_u22)(void *descr[],
 				STARPU_CUDA_REPORT_ERROR(cures);
 
 			break;
+		}
 #endif
 		default:
 			STARPU_ABORT();
@@ -140,7 +146,7 @@ static inline void STARPU_LU(common_u12)(void *descr[],
 #ifdef STARPU_USE_CUDA
 		case 1:
 			CUBLAS_TRSM('L', 'L', 'N', 'N', ny12, nx12,
-					(TYPE)1.0, sub11, ld11, sub12, ld12);
+					*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub12, ld12);
 
 			status = cublasGetError();
 			if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS))
@@ -221,7 +227,7 @@ static inline void STARPU_LU(common_u21)(void *descr[],
 #ifdef STARPU_USE_CUDA
 		case 1:
 			CUBLAS_TRSM('R', 'U', 'N', 'U', ny21, nx21,
-					(TYPE)1.0, sub11, ld11, sub21, ld21);
+					*(CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub21, ld21);
 
 			status = cublasGetError();
 			if (status != CUBLAS_STATUS_SUCCESS)
@@ -307,17 +313,19 @@ static inline void STARPU_LU(common_u11)(void *descr[],
 			for (z = 0; z < nx; z++)
 			{
 				TYPE pivot;
+				TYPE inv_pivot;
 				cudaMemcpy(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost);
 				cudaStreamSynchronize(0);
 
 				STARPU_ASSERT(pivot != 0.0);
 				
-				CUBLAS_SCAL(nx - z - 1, 1.0/pivot, &sub11[z+(z+1)*ld], ld);
+				inv_pivot = 1.0/pivot;
+				CUBLAS_SCAL(nx - z - 1, *(CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld);
 				
-				CUBLAS_GER(nx - z - 1, nx - z - 1, -1.0,
-						&sub11[(z+1)+z*ld], 1,
-						&sub11[z+(z+1)*ld], ld,
-						&sub11[(z+1) + (z+1)*ld],ld);
+				CUBLAS_GER(nx - z - 1, nx - z - 1, *(CUBLAS_TYPE*)&m1,
+						(CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1,
+						(CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld,
+						(CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld);
 			}
 			
 			cudaThreadSynchronize();
@@ -423,20 +431,21 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 			for (z = 0; z < nx; z++)
 			{
 				TYPE pivot;
+				TYPE inv_pivot;
 				cudaMemcpy(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost);
 				cudaStreamSynchronize(0);
 
 				if (fabs((double)(pivot)) < PIVOT_THRESHHOLD)
 				{
 					/* find the pivot */
-					int piv_ind = CUBLAS_IAMAX(nx - z, &sub11[z*(ld+1)], ld) - 1;
+					int piv_ind = CUBLAS_IAMAX(nx - z, (CUBLAS_TYPE*)&sub11[z*(ld+1)], ld) - 1;
 	
 					ipiv[z + first] = piv_ind + z + first;
 
 					/* swap if needed */
 					if (piv_ind != 0)
 					{
-						CUBLAS_SWAP(nx, &sub11[z*ld], 1, &sub11[(z+piv_ind)*ld], 1);
+						CUBLAS_SWAP(nx, (CUBLAS_TYPE*)&sub11[z*ld], 1, (CUBLAS_TYPE*)&sub11[(z+piv_ind)*ld], 1);
 					}
 
 					cudaMemcpy(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost);
@@ -445,12 +454,13 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 
 				STARPU_ASSERT(pivot != 0.0);
 				
-				CUBLAS_SCAL(nx - z - 1, 1.0/pivot, &sub11[z+(z+1)*ld], ld);
+				inv_pivot = 1.0/pivot;
+				CUBLAS_SCAL(nx - z - 1, *(CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld);
 				
-				CUBLAS_GER(nx - z - 1, nx - z - 1, -1.0,
-						&sub11[(z+1)+z*ld], 1,
-						&sub11[z+(z+1)*ld], ld,
-						&sub11[(z+1) + (z+1)*ld],ld);
+				CUBLAS_GER(nx - z - 1, nx - z - 1, *(CUBLAS_TYPE*)&m1,
+						(CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1,
+						(CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld,
+						(CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld);
 				
 			}
 
@@ -534,7 +544,7 @@ static inline void STARPU_LU(common_pivot)(void *descr[],
 				unsigned rowpiv = ipiv[row+first] - first;
 				if (rowpiv != row)
 				{
-					CUBLAS_SWAP(nx, &matrix[row*ld], 1, &matrix[rowpiv*ld], 1);
+					CUBLAS_SWAP(nx, (CUBLAS_TYPE*)&matrix[row*ld], 1, (CUBLAS_TYPE*)&matrix[rowpiv*ld], 1);
 				}
 			}
 

+ 19 - 0
examples/lu/zlu.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_double.h"
+#include "xlu.c"

+ 19 - 0
examples/lu/zlu_implicit.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_double.h"
+#include "xlu_implicit.c"

+ 19 - 0
examples/lu/zlu_implicit_pivot.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_double.h"
+#include "xlu_implicit_pivot.c"

+ 19 - 0
examples/lu/zlu_kernels.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_double.h"
+#include "xlu_kernels.c"

+ 19 - 0
examples/lu/zlu_pivot.c

@@ -0,0 +1,19 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include "complex_double.h"
+#include "xlu_pivot.c"