Parcourir la source

Remove the "pastix wrappers" which have been left unmaintained/unused for a
while.

Cédric Augonnet il y a 15 ans
Parent
commit
902b2f1412

+ 0 - 28
examples/pastix-wrappers/Makefile

@@ -1,28 +0,0 @@
-#
-# StarPU
-# Copyright (C) INRIA 2008-2009 (see AUTHORS file)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-
-STARPU= ../../libstarpu.a
-
-all: starpu-blas-wrapper.a
-
-starpu-blas-wrapper.o: starpu-blas-wrapper.c starpu-blas-wrapper.h
-	$(CC) $(CFLAGS) starpu-blas-wrapper.c -c -o starpu-blas-wrapper.o
-
-starpu-blas-wrapper.a: starpu-blas-wrapper.o
-	$(AR) rcs starpu-blas-wrapper.a starpu-blas-wrapper.o
-
-clean:
-	@rm -f *.a *.o *.d *.gcno *.gcda

+ 0 - 34
examples/pastix-wrappers/generated_model.h

@@ -1,34 +0,0 @@
-/*
- * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#define GEMM_CPU_A  9.627597e-05
-#define GEMM_CPU_B  2.909318e-03
-#define GEMM_CPU_C  1.233512e-01
-#define GEMM_CPU_D  -2.789676e-01
-#define GEMM_CPU_E  -1.719838e-01
-#define GEMM_CPU_F  2.032491e+01
-#define GEMM_GPU_A  1.564597e-05
-#define GEMM_GPU_B  1.643119e-04
-#define GEMM_GPU_C  1.990316e-02
-#define GEMM_GPU_D  -1.120220e-02
-#define GEMM_GPU_E  2.416027e-01
-#define GEMM_GPU_F  1.974529e+01
-#define TRSM_GPU_A 4.302117e-06
-#define TRSM_GPU_B 5.423172e-01
-#define TRSM_GPU_C -4.868755e+00
-#define TRSM_CPU_A 1.362886e-05
-#define TRSM_CPU_B 6.283488e-01
-#define TRSM_CPU_C -4.053346e+01

+ 0 - 59
examples/pastix-wrappers/models/Makefile

@@ -1,59 +0,0 @@
-#
-# StarPU
-# Copyright (C) INRIA 2008-2009 (see AUTHORS file)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-
-all: reg_gemm reg_trsm 
-
-reg_axpy: reg_axpy.c num_rec
-	gcc -o reg_axpy reg_axpy.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_copy: reg_copy.c num_rec
-	gcc -o reg_copy reg_copy.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_geam: reg_geam.c num_rec
-	gcc -o reg_geam reg_geam.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_gemm: reg_gemm.c num_rec
-	gcc -g -o reg_gemm reg_gemm.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_pof: reg_pof.c num_rec
-	gcc -o reg_pof reg_pof.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_ppf: reg_ppf.c num_rec
-	gcc -o reg_ppf reg_ppf.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_scal: reg_scal.c num_rec
-	gcc -o reg_scal reg_scal.c num_recipes/*.o -Inum_recipes/ -lm
-
-reg_trsm: reg_trsm.c num_rec
-	gcc -g -o reg_trsm reg_trsm.c num_recipes/*.o -Inum_recipes/ -lm
-
-num_rec: covsrt.o gaussj.o lfit.o nrutil.o num_recipes/nr.h num_recipes/nrutil.h num_recipes/complex.h
-
-covsrt.o: num_recipes/covsrt.c
-	gcc -g -o num_recipes/covsrt.o -c num_recipes/covsrt.c
-
-gaussj.o: num_recipes/gaussj.c
-	gcc -g -o num_recipes/gaussj.o -c num_recipes/gaussj.c
-
-lfit.o: num_recipes/lfit.c
-	gcc -g -o num_recipes/lfit.o -c num_recipes/lfit.c
-
-nrutil.o: num_recipes/nrutil.c
-	gcc -g -o num_recipes/nrutil.o -c num_recipes/nrutil.c
-
-clean:
-	rm -f reg_gemm reg_trsm
-	rm -rf *.o num_recipes/*.o

+ 0 - 31
examples/pastix-wrappers/models/model.sh

@@ -1,31 +0,0 @@
-#!/bin/bash
-
-#
-# StarPU
-# Copyright (C) INRIA 2008-2009 (see AUTHORS file)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 2.1 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
-#
-
-
-rm -f generated_model.h
-
-# contrib compact 
-./reg_gemm /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.core.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.core.debug` 2> /dev/null|sed -s s/GEMM/GEMM_CPU/ >  generated_model.h
-./reg_gemm /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.cuda.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.cuda.debug` 2> /dev/null|sed -s s/GEMM/GEMM_GPU/ >>  generated_model.h
-
-# strsm
-
-./reg_trsm /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.cuda.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.cuda.debug` 2> /dev/null|sed -s s/TRSM/TRSM_GPU/ >>  generated_model.h
-./reg_trsm /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.core.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.core.debug` 2> /dev/null|sed -s s/TRSM/TRSM_CPU/ >>  generated_model.h
-
-cat generated_model.h

+ 0 - 26
examples/pastix-wrappers/models/num_recipes/complex.h

@@ -1,26 +0,0 @@
-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
-   utility file complex.h.  Do not confuse this file with the same-named
-   file complex.h that is supplied in the 'misc' subdirectory.
-   *That* file is the one from the book, and contains both ANSI and
-   traditional K&R versions, along with #ifdef macros to select the
-   correct version.  *This* file contains only ANSI C.               */
-
-#ifndef _NR_COMPLEX_H_
-#define _NR_COMPLEX_H_
-
-#ifndef _FCOMPLEX_DECLARE_T_
-typedef struct FCOMPLEX {float r,i;} fcomplex;
-#define _FCOMPLEX_DECLARE_T_
-#endif /* _FCOMPLEX_DECLARE_T_ */
-
-fcomplex Cadd(fcomplex a, fcomplex b);
-fcomplex Csub(fcomplex a, fcomplex b);
-fcomplex Cmul(fcomplex a, fcomplex b);
-fcomplex Complex(float re, float im);
-fcomplex Conjg(fcomplex z);
-fcomplex Cdiv(fcomplex a, fcomplex b);
-float Cabs(fcomplex z);
-fcomplex Csqrt(fcomplex z);
-fcomplex RCmul(float x, fcomplex a);
-
-#endif /* _NR_COMPLEX_H_ */

+ 0 - 20
examples/pastix-wrappers/models/num_recipes/covsrt.c

@@ -1,20 +0,0 @@
-#define REAL double
-#define SWAP(a,b) {swap=(a);(a)=(b);(b)=swap;}
-
-void covsrt(REAL **covar, int ma, int ia[], int mfit)
-{
-	int i,j,k;
-	REAL swap;
-
-	for (i=mfit+1;i<=ma;i++)
-		for (j=1;j<=i;j++) covar[i][j]=covar[j][i]=0.0;
-	k=mfit;
-	for (j=ma;j>=1;j--) {
-		if (ia[j]) {
-			for (i=1;i<=ma;i++) SWAP(covar[i][k],covar[i][j])
-			for (i=1;i<=ma;i++) SWAP(covar[k][i],covar[j][i])
-			k--;
-		}
-	}
-}
-#undef SWAP

+ 0 - 60
examples/pastix-wrappers/models/num_recipes/gaussj.c

@@ -1,60 +0,0 @@
-#include <math.h>
-#define NRANSI
-#include "nrutil.h"
-//#define REAL float
-#define SWAP(a,b) {temp=(a);(a)=(b);(b)=temp;}
-
-void gaussj(REAL **a, int n, REAL **b, int m)
-{
-	int *indxc,*indxr,*ipiv;
-	int i,icol,irow,j,k,l,ll;
-	REAL big,dum,pivinv,temp;
-
-	indxc=ivector(1,n);
-	indxr=ivector(1,n);
-	ipiv=ivector(1,n);
-	for (j=1;j<=n;j++) ipiv[j]=0;
-	for (i=1;i<=n;i++) {
-		big=0.0;
-		for (j=1;j<=n;j++)
-			if (ipiv[j] != 1)
-				for (k=1;k<=n;k++) {
-					if (ipiv[k] == 0) {
-						if (fabs(a[j][k]) >= big) {
-							big=fabs(a[j][k]);
-							irow=j;
-							icol=k;
-						}
-					} else if (ipiv[k] > 1) nrerror("gaussj: Singular Matrix-1");
-				}
-		++(ipiv[icol]);
-		if (irow != icol) {
-			for (l=1;l<=n;l++) SWAP(a[irow][l],a[icol][l])
-			for (l=1;l<=m;l++) SWAP(b[irow][l],b[icol][l])
-		}
-		indxr[i]=irow;
-		indxc[i]=icol;
-		if (a[icol][icol] == 0.0) nrerror("gaussj: Singular Matrix-2");
-		pivinv=1.0/a[icol][icol];
-		a[icol][icol]=1.0;
-		for (l=1;l<=n;l++) a[icol][l] *= pivinv;
-		for (l=1;l<=m;l++) b[icol][l] *= pivinv;
-		for (ll=1;ll<=n;ll++)
-			if (ll != icol) {
-				dum=a[ll][icol];
-				a[ll][icol]=0.0;
-				for (l=1;l<=n;l++) a[ll][l] -= a[icol][l]*dum;
-				for (l=1;l<=m;l++) b[ll][l] -= b[icol][l]*dum;
-			}
-	}
-	for (l=n;l>=1;l--) {
-		if (indxr[l] != indxc[l])
-			for (k=1;k<=n;k++)
-				SWAP(a[k][indxr[l]],a[k][indxc[l]]);
-	}
-	free_ivector(ipiv,1,n);
-	free_ivector(indxr,1,n);
-	free_ivector(indxc,1,n);
-}
-#undef SWAP
-#undef NRANSI

+ 0 - 66
examples/pastix-wrappers/models/num_recipes/lfit.c

@@ -1,66 +0,0 @@
-#define NRANSI
-#include <stdio.h>
-#include <stdlib.h>
-#include "nrutil.h"
-
-#define REAL float
-
-void lfit(REAL x[], REAL y[], REAL sig[], int ndat, REAL a[], int ia[],
-	int ma, REAL **covar, REAL *chisq, void (*funcs)(REAL, REAL [], int))
-{
-	void covsrt(REAL **covar, int ma, int ia[], int mfit);
-	void gaussj(REAL **a, int n, REAL **b, int m);
-	int i,j,k,l,m,mfit=0;
-	REAL ym,wt,sum,sig2i,**beta,*afunc;
-
-	
-
-	beta=matrix(1,ma,1,1);
-	afunc=vector(1,ma);
-	for (j=1;j<=ma;j++)
-		if (ia[j]) mfit++;
-	if (mfit == 0) nrerror("lfit: no parameters to be fitted");
-	for (j=1;j<=mfit;j++) {
-		for (k=1;k<=mfit;k++) covar[j][k]=0.0;
-		beta[j][1]=0.0;
-	}
-
-	for (i=1;i<=ndat;i++) {
-		(*funcs)(x[i],afunc,ma);
-		ym=y[i];
-		if (mfit < ma) {
-			for (j=1;j<=ma;j++)
-				if (!ia[j]) ym -= a[j]*afunc[j];
-		}
-		sig2i=1.0/SQR(sig[i]);
-		for (j=0,l=1;l<=ma;l++) {
-			if (ia[l]) {
-				wt=afunc[l]*sig2i;
-				for (j++,k=0,m=1;m<=l;m++)
-					if (ia[m]) covar[j][++k] += wt*afunc[m];
-				beta[j][1] += ym*wt;
-			}
-		}
-	}
-	
-	for (j=2;j<=mfit;j++)
-		for (k=1;k<j;k++)
-			covar[k][j]=covar[j][k];
-//	printf("lfit : gaussj\n");	
-	gaussj(covar,mfit,beta,1);
-//	printf("lfit1\n");
-	for (j=0,l=1;l<=ma;l++)
-		if (ia[l]) a[l]=beta[++j][1];
-//	printf("lfit2\n");
-	*chisq=0.0;
-
-	for (i=1;i<=ndat;i++) {
-		(*funcs)(x[i],afunc,ma);
-		for (sum=0.0,j=1;j<=ma;j++) sum += a[j]*afunc[j];
-		*chisq += SQR((y[i]-sum)/sig[i]);
-	}
-	covsrt(covar,ma,ia,mfit);
-	free_vector(afunc,1,ma);
-	free_matrix(beta,1,ma,1,1);
-}
-#undef NRANSI

+ 0 - 530
examples/pastix-wrappers/models/num_recipes/nr.h

@@ -1,530 +0,0 @@
-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
-   utility file nr.h.  Do not confuse this file with the same-named
-   file nr.h that is supplied in the 'misc' subdirectory.
-   *That* file is the one from the book, and contains both ANSI and
-   traditional K&R versions, along with #ifdef macros to select the
-   correct version.  *This* file contains only ANSI C.               */
-
-#ifndef _NR_H_
-#define _NR_H_
-
-#define REAL double
-
-
-#ifndef _FCOMPLEX_DECLARE_T_
-typedef struct FCOMPLEX {REAL r,i;} fcomplex;
-#define _FCOMPLEX_DECLARE_T_
-#endif /* _FCOMPLEX_DECLARE_T_ */
-
-#ifndef _ARITHCODE_DECLARE_T_
-typedef struct {
-	unsigned long *ilob,*iupb,*ncumfq,jdif,nc,minint,nch,ncum,nrad;
-} arithcode;
-#define _ARITHCODE_DECLARE_T_
-#endif /* _ARITHCODE_DECLARE_T_ */
-
-#ifndef _HUFFCODE_DECLARE_T_
-typedef struct {
-	unsigned long *icod,*ncod,*left,*right,nch,nodemax;
-} huffcode;
-#define _HUFFCODE_DECLARE_T_
-#endif /* _HUFFCODE_DECLARE_T_ */
-
-#include <stdio.h>
-
-void addint(double **uf, double **uc, double **res, int nf);
-void airy(REAL x, REAL *ai, REAL *bi, REAL *aip, REAL *bip);
-void amebsa(REAL **p, REAL y[], int ndim, REAL pb[],	REAL *yb,
-	REAL ftol, REAL (*funk)(REAL []), int *iter, REAL temptr);
-void amoeba(REAL **p, REAL y[], int ndim, REAL ftol,
-	REAL (*funk)(REAL []), int *iter);
-REAL amotry(REAL **p, REAL y[], REAL psum[], int ndim,
-	REAL (*funk)(REAL []), int ihi, REAL fac);
-REAL amotsa(REAL **p, REAL y[], REAL psum[], int ndim, REAL pb[],
-	REAL *yb, REAL (*funk)(REAL []), int ihi, REAL *yhi, REAL fac);
-void anneal(REAL x[], REAL y[], int iorder[], int ncity);
-double anorm2(double **a, int n);
-void arcmak(unsigned long nfreq[], unsigned long nchh, unsigned long nradd,
-	arithcode *acode);
-void arcode(unsigned long *ich, unsigned char **codep, unsigned long *lcode,
-	unsigned long *lcd, int isign, arithcode *acode);
-void arcsum(unsigned long iin[], unsigned long iout[], unsigned long ja,
-	int nwk, unsigned long nrad, unsigned long nc);
-void asolve(unsigned long n, double b[], double x[], int itrnsp);
-void atimes(unsigned long n, double x[], double r[], int itrnsp);
-void avevar(REAL data[], unsigned long n, REAL *ave, REAL *var);
-void balanc(REAL **a, int n);
-void banbks(REAL **a, unsigned long n, int m1, int m2, REAL **al,
-	unsigned long indx[], REAL b[]);
-void bandec(REAL **a, unsigned long n, int m1, int m2, REAL **al,
-	unsigned long indx[], REAL *d);
-void banmul(REAL **a, unsigned long n, int m1, int m2, REAL x[], REAL b[]);
-void bcucof(REAL y[], REAL y1[], REAL y2[], REAL y12[], REAL d1,
-	REAL d2, REAL **c);
-void bcuint(REAL y[], REAL y1[], REAL y2[], REAL y12[],
-	REAL x1l, REAL x1u, REAL x2l, REAL x2u, REAL x1,
-	REAL x2, REAL *ansy, REAL *ansy1, REAL *ansy2);
-void beschb(double x, double *gam1, double *gam2, double *gampl,
-	double *gammi);
-REAL bessi(int n, REAL x);
-REAL bessi0(REAL x);
-REAL bessi1(REAL x);
-void bessik(REAL x, REAL xnu, REAL *ri, REAL *rk, REAL *rip,
-	REAL *rkp);
-REAL bessj(int n, REAL x);
-REAL bessj0(REAL x);
-REAL bessj1(REAL x);
-void bessjy(REAL x, REAL xnu, REAL *rj, REAL *ry, REAL *rjp,
-	REAL *ryp);
-REAL bessk(int n, REAL x);
-REAL bessk0(REAL x);
-REAL bessk1(REAL x);
-REAL bessy(int n, REAL x);
-REAL bessy0(REAL x);
-REAL bessy1(REAL x);
-REAL beta(REAL z, REAL w);
-REAL betacf(REAL a, REAL b, REAL x);
-REAL betai(REAL a, REAL b, REAL x);
-REAL bico(int n, int k);
-void bksub(int ne, int nb, int jf, int k1, int k2, REAL ***c);
-REAL bnldev(REAL pp, int n, long *idum);
-REAL brent(REAL ax, REAL bx, REAL cx,
-	REAL (*f)(REAL), REAL tol, REAL *xmin);
-void broydn(REAL x[], int n, int *check,
-	void (*vecfunc)(int, REAL [], REAL []));
-void bsstep(REAL y[], REAL dydx[], int nv, REAL *xx, REAL htry,
-	REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
-	void (*derivs)(REAL, REAL [], REAL []));
-void caldat(long julian, int *mm, int *id, int *iyyy);
-void chder(REAL a, REAL b, REAL c[], REAL cder[], int n);
-REAL chebev(REAL a, REAL b, REAL c[], int m, REAL x);
-void chebft(REAL a, REAL b, REAL c[], int n, REAL (*func)(REAL));
-void chebpc(REAL c[], REAL d[], int n);
-void chint(REAL a, REAL b, REAL c[], REAL cint[], int n);
-REAL chixy(REAL bang);
-void choldc(REAL **a, int n, REAL p[]);
-void cholsl(REAL **a, int n, REAL p[], REAL b[], REAL x[]);
-void chsone(REAL bins[], REAL ebins[], int nbins, int knstrn,
-	REAL *df, REAL *chsq, REAL *prob);
-void chstwo(REAL bins1[], REAL bins2[], int nbins, int knstrn,
-	REAL *df, REAL *chsq, REAL *prob);
-void cisi(REAL x, REAL *ci, REAL *si);
-void cntab1(int **nn, int ni, int nj, REAL *chisq,
-	REAL *df, REAL *prob, REAL *cramrv, REAL *ccc);
-void cntab2(int **nn, int ni, int nj, REAL *h, REAL *hx, REAL *hy,
-	REAL *hygx, REAL *hxgy, REAL *uygx, REAL *uxgy, REAL *uxy);
-void convlv(REAL data[], unsigned long n, REAL respns[], unsigned long m,
-	int isign, REAL ans[]);
-void copy(double **aout, double **ain, int n);
-void correl(REAL data1[], REAL data2[], unsigned long n, REAL ans[]);
-void cosft(REAL y[], int n, int isign);
-void cosft1(REAL y[], int n);
-void cosft2(REAL y[], int n, int isign);
-void covsrt(REAL **covar, int ma, int ia[], int mfit);
-void crank(unsigned long n, REAL w[], REAL *s);
-void cyclic(REAL a[], REAL b[], REAL c[], REAL alpha, REAL beta,
-	REAL r[], REAL x[], unsigned long n);
-void daub4(REAL a[], unsigned long n, int isign);
-REAL dawson(REAL x);
-REAL dbrent(REAL ax, REAL bx, REAL cx,
-	REAL (*f)(REAL), REAL (*df)(REAL), REAL tol, REAL *xmin);
-void ddpoly(REAL c[], int nc, REAL x, REAL pd[], int nd);
-int decchk(char string[], int n, char *ch);
-void derivs(REAL x, REAL y[], REAL dydx[]);
-REAL df1dim(REAL x);
-void dfour1(double data[], unsigned long nn, int isign);
-void dfpmin(REAL p[], int n, REAL gtol, int *iter, REAL *fret,
-	REAL (*func)(REAL []), void (*dfunc)(REAL [], REAL []));
-REAL dfridr(REAL (*func)(REAL), REAL x, REAL h, REAL *err);
-void dftcor(REAL w, REAL delta, REAL a, REAL b, REAL endpts[],
-	REAL *corre, REAL *corim, REAL *corfac);
-void dftint(REAL (*func)(REAL), REAL a, REAL b, REAL w,
-	REAL *cosint, REAL *sinint);
-void difeq(int k, int k1, int k2, int jsf, int is1, int isf,
-	int indexv[], int ne, REAL **s, REAL **y);
-void dlinmin(REAL p[], REAL xi[], int n, REAL *fret,
-	REAL (*func)(REAL []), void (*dfunc)(REAL [], REAL[]));
-double dpythag(double a, double b);
-void drealft(double data[], unsigned long n, int isign);
-void dsprsax(double sa[], unsigned long ija[], double x[], double b[],
-	unsigned long n);
-void dsprstx(double sa[], unsigned long ija[], double x[], double b[],
-	unsigned long n);
-void dsvbksb(double **u, double w[], double **v, int m, int n, double b[],
-	double x[]);
-void dsvdcmp(double **a, int m, int n, double w[], double **v);
-void eclass(int nf[], int n, int lista[], int listb[], int m);
-void eclazz(int nf[], int n, int (*equiv)(int, int));
-REAL ei(REAL x);
-void eigsrt(REAL d[], REAL **v, int n);
-REAL elle(REAL phi, REAL ak);
-REAL ellf(REAL phi, REAL ak);
-REAL ellpi(REAL phi, REAL en, REAL ak);
-void elmhes(REAL **a, int n);
-REAL erfcc(REAL x);
-//REAL erff(REAL x);
-REAL erffc(REAL x);
-void eulsum(REAL *sum, REAL term, int jterm, REAL wksp[]);
-REAL evlmem(REAL fdt, REAL d[], int m, REAL xms);
-REAL expdev(long *idum);
-REAL expint(int n, REAL x);
-REAL f1(REAL x);
-REAL f1dim(REAL x);
-REAL f2(REAL y);
-REAL f3(REAL z);
-REAL factln(int n);
-REAL factrl(int n);
-void fasper(REAL x[], REAL y[], unsigned long n, REAL ofac, REAL hifac,
-	REAL wk1[], REAL wk2[], unsigned long nwk, unsigned long *nout,
-	unsigned long *jmax, REAL *prob);
-void fdjac(int n, REAL x[], REAL fvec[], REAL **df,
-	void (*vecfunc)(int, REAL [], REAL []));
-void fgauss(REAL x, REAL a[], REAL *y, REAL dyda[], int na);
-void fill0(double **u, int n);
-void fit(REAL x[], REAL y[], int ndata, REAL sig[], int mwt,
-	REAL *a, REAL *b, REAL *siga, REAL *sigb, REAL *chi2, REAL *q);
-void fitexy(REAL x[], REAL y[], int ndat, REAL sigx[], REAL sigy[],
-	REAL *a, REAL *b, REAL *siga, REAL *sigb, REAL *chi2, REAL *q);
-void fixrts(REAL d[], int m);
-void fleg(REAL x, REAL pl[], int nl);
-void flmoon(int n, int nph, long *jd, REAL *frac);
-REAL fmin(REAL x[]);
-void four1(REAL data[], unsigned long nn, int isign);
-void fourew(FILE *file[5], int *na, int *nb, int *nc, int *nd);
-void fourfs(FILE *file[5], unsigned long nn[], int ndim, int isign);
-void fourn(REAL data[], unsigned long nn[], int ndim, int isign);
-void fpoly(REAL x, REAL p[], int np);
-void fred2(int n, REAL a, REAL b, REAL t[], REAL f[], REAL w[],
-	REAL (*g)(REAL), REAL (*ak)(REAL, REAL));
-REAL fredin(REAL x, int n, REAL a, REAL b, REAL t[], REAL f[], REAL w[],
-	REAL (*g)(REAL), REAL (*ak)(REAL, REAL));
-void frenel(REAL x, REAL *s, REAL *c);
-void frprmn(REAL p[], int n, REAL ftol, int *iter, REAL *fret,
-	REAL (*func)(REAL []), void (*dfunc)(REAL [], REAL []));
-void ftest(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
-	REAL *f, REAL *prob);
-REAL gamdev(int ia, long *idum);
-REAL gammln(REAL xx);
-REAL gammp(REAL a, REAL x);
-REAL gammq(REAL a, REAL x);
-REAL gasdev(long *idum);
-void gaucof(int n, REAL a[], REAL b[], REAL amu0, REAL x[], REAL w[]);
-void gauher(REAL x[], REAL w[], int n);
-void gaujac(REAL x[], REAL w[], int n, REAL alf, REAL bet);
-void gaulag(REAL x[], REAL w[], int n, REAL alf);
-void gauleg(REAL x1, REAL x2, REAL x[], REAL w[], int n);
-void gaussj(REAL **a, int n, REAL **b, int m);
-void gcf(REAL *gammcf, REAL a, REAL x, REAL *gln);
-REAL golden(REAL ax, REAL bx, REAL cx, REAL (*f)(REAL), REAL tol,
-	REAL *xmin);
-void gser(REAL *gamser, REAL a, REAL x, REAL *gln);
-void hpsel(unsigned long m, unsigned long n, REAL arr[], REAL heap[]);
-void hpsort(unsigned long n, REAL ra[]);
-void hqr(REAL **a, int n, REAL wr[], REAL wi[]);
-void hufapp(unsigned long index[], unsigned long nprob[], unsigned long n,
-	unsigned long i);
-void hufdec(unsigned long *ich, unsigned char *code, unsigned long lcode,
-	unsigned long *nb, huffcode *hcode);
-void hufenc(unsigned long ich, unsigned char **codep, unsigned long *lcode,
-	unsigned long *nb, huffcode *hcode);
-void hufmak(unsigned long nfreq[], unsigned long nchin, unsigned long *ilong,
-	unsigned long *nlong, huffcode *hcode);
-void hunt(REAL xx[], unsigned long n, REAL x, unsigned long *jlo);
-void hypdrv(REAL s, REAL yy[], REAL dyyds[]);
-fcomplex hypgeo(fcomplex a, fcomplex b, fcomplex c, fcomplex z);
-void hypser(fcomplex a, fcomplex b, fcomplex c, fcomplex z,
-	fcomplex *series, fcomplex *deriv);
-unsigned short icrc(unsigned short crc, unsigned char *bufptr,
-	unsigned long len, short jinit, int jrev);
-unsigned short icrc1(unsigned short crc, unsigned char onech);
-unsigned long igray(unsigned long n, int is);
-void iindexx(unsigned long n, long arr[], unsigned long indx[]);
-void indexx(unsigned long n, REAL arr[], unsigned long indx[]);
-void interp(double **uf, double **uc, int nf);
-int irbit1(unsigned long *iseed);
-int irbit2(unsigned long *iseed);
-void jacobi(REAL **a, int n, REAL d[], REAL **v, int *nrot);
-void jacobn(REAL x, REAL y[], REAL dfdx[], REAL **dfdy, int n);
-long julday(int mm, int id, int iyyy);
-void kendl1(REAL data1[], REAL data2[], unsigned long n, REAL *tau, REAL *z,
-	REAL *prob);
-void kendl2(REAL **tab, int i, int j, REAL *tau, REAL *z, REAL *prob);
-void kermom(double w[], double y, int m);
-void ks2d1s(REAL x1[], REAL y1[], unsigned long n1,
-	void (*quadvl)(REAL, REAL, REAL *, REAL *, REAL *, REAL *),
-	REAL *d1, REAL *prob);
-void ks2d2s(REAL x1[], REAL y1[], unsigned long n1, REAL x2[], REAL y2[],
-	unsigned long n2, REAL *d, REAL *prob);
-void ksone(REAL data[], unsigned long n, REAL (*func)(REAL), REAL *d,
-	REAL *prob);
-void kstwo(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
-	REAL *d, REAL *prob);
-void laguer(fcomplex a[], int m, fcomplex *x, int *its);
-void lfit(REAL x[], REAL y[], REAL sig[], int ndat, REAL a[], int ia[],
-	int ma, REAL **covar, REAL *chisq, void (*funcs)(REAL, REAL [], int));
-void linbcg(unsigned long n, double b[], double x[], int itol, double tol,
-	 int itmax, int *iter, double *err);
-void linmin(REAL p[], REAL xi[], int n, REAL *fret,
-	REAL (*func)(REAL []));
-void lnsrch(int n, REAL xold[], REAL fold, REAL g[], REAL p[], REAL x[],
-	 REAL *f, REAL stpmax, int *check, REAL (*func)(REAL []));
-void load(REAL x1, REAL v[], REAL y[]);
-void load1(REAL x1, REAL v1[], REAL y[]);
-void load2(REAL x2, REAL v2[], REAL y[]);
-void locate(REAL xx[], unsigned long n, REAL x, unsigned long *j);
-void lop(double **out, double **u, int n);
-void lubksb(REAL **a, int n, int *indx, REAL b[]);
-void ludcmp(REAL **a, int n, int *indx, REAL *d);
-void machar(int *ibeta, int *it, int *irnd, int *ngrd,
-	int *machep, int *negep, int *iexp, int *minexp, int *maxexp,
-	REAL *eps, REAL *epsneg, REAL *xmin, REAL *xmax);
-void matadd(double **a, double **b, double **c, int n);
-void matsub(double **a, double **b, double **c, int n);
-void medfit(REAL x[], REAL y[], int ndata, REAL *a, REAL *b, REAL *abdev);
-void memcof(REAL data[], int n, int m, REAL *xms, REAL d[]);
-int metrop(REAL de, REAL t);
-void mgfas(double **u, int n, int maxcyc);
-void mglin(double **u, int n, int ncycle);
-REAL midexp(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
-REAL midinf(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
-REAL midpnt(REAL (*func)(REAL), REAL a, REAL b, int n);
-REAL midsql(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
-REAL midsqu(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
-void miser(REAL (*func)(REAL []), REAL regn[], int ndim, unsigned long npts,
-	REAL dith, REAL *ave, REAL *var);
-void mmid(REAL y[], REAL dydx[], int nvar, REAL xs, REAL htot,
-	int nstep, REAL yout[], void (*derivs)(REAL, REAL[], REAL[]));
-void mnbrak(REAL *ax, REAL *bx, REAL *cx, REAL *fa, REAL *fb,
-	REAL *fc, REAL (*func)(REAL));
-void mnewt(int ntrial, REAL x[], int n, REAL tolx, REAL tolf);
-void moment(REAL data[], int n, REAL *ave, REAL *adev, REAL *sdev,
-	REAL *var, REAL *skew, REAL *curt);
-void mp2dfr(unsigned char a[], unsigned char s[], int n, int *m);
-void mpadd(unsigned char w[], unsigned char u[], unsigned char v[], int n);
-void mpdiv(unsigned char q[], unsigned char r[], unsigned char u[],
-	unsigned char v[], int n, int m);
-void mpinv(unsigned char u[], unsigned char v[], int n, int m);
-void mplsh(unsigned char u[], int n);
-void mpmov(unsigned char u[], unsigned char v[], int n);
-void mpmul(unsigned char w[], unsigned char u[], unsigned char v[], int n,
-	int m);
-void mpneg(unsigned char u[], int n);
-void mppi(int n);
-void mprove(REAL **a, REAL **alud, int n, int indx[], REAL b[],
-	REAL x[]);
-void mpsad(unsigned char w[], unsigned char u[], int n, int iv);
-void mpsdv(unsigned char w[], unsigned char u[], int n, int iv, int *ir);
-void mpsmu(unsigned char w[], unsigned char u[], int n, int iv);
-void mpsqrt(unsigned char w[], unsigned char u[], unsigned char v[], int n,
-	int m);
-void mpsub(int *is, unsigned char w[], unsigned char u[], unsigned char v[],
-	int n);
-void mrqcof(REAL x[], REAL y[], REAL sig[], int ndata, REAL a[],
-	int ia[], int ma, REAL **alpha, REAL beta[], REAL *chisq,
-	void (*funcs)(REAL, REAL [], REAL *, REAL [], int));
-void mrqmin(REAL x[], REAL y[], REAL sig[], int ndata, REAL a[],
-	int ia[], int ma, REAL **covar, REAL **alpha, REAL *chisq,
-	void (*funcs)(REAL, REAL [], REAL *, REAL [], int), REAL *alamda);
-void newt(REAL x[], int n, int *check,
-	void (*vecfunc)(int, REAL [], REAL []));
-void odeint(REAL ystart[], int nvar, REAL x1, REAL x2,
-	REAL eps, REAL h1, REAL hmin, int *nok, int *nbad,
-	void (*derivs)(REAL, REAL [], REAL []),
-	void (*rkqs)(REAL [], REAL [], int, REAL *, REAL, REAL,
-	REAL [], REAL *, REAL *, void (*)(REAL, REAL [], REAL [])));
-void orthog(int n, REAL anu[], REAL alpha[], REAL beta[], REAL a[],
-	REAL b[]);
-void pade(double cof[], int n, REAL *resid);
-void pccheb(REAL d[], REAL c[], int n);
-void pcshft(REAL a, REAL b, REAL d[], int n);
-void pearsn(REAL x[], REAL y[], unsigned long n, REAL *r, REAL *prob,
-	REAL *z);
-void period(REAL x[], REAL y[], int n, REAL ofac, REAL hifac,
-	REAL px[], REAL py[], int np, int *nout, int *jmax, REAL *prob);
-void piksr2(int n, REAL arr[], REAL brr[]);
-void piksrt(int n, REAL arr[]);
-void pinvs(int ie1, int ie2, int je1, int jsf, int jc1, int k,
-	REAL ***c, REAL **s);
-REAL plgndr(int l, int m, REAL x);
-REAL poidev(REAL xm, long *idum);
-void polcoe(REAL x[], REAL y[], int n, REAL cof[]);
-void polcof(REAL xa[], REAL ya[], int n, REAL cof[]);
-void poldiv(REAL u[], int n, REAL v[], int nv, REAL q[], REAL r[]);
-void polin2(REAL x1a[], REAL x2a[], REAL **ya, int m, int n,
-	REAL x1, REAL x2, REAL *y, REAL *dy);
-void polint(REAL xa[], REAL ya[], int n, REAL x, REAL *y, REAL *dy);
-void powell(REAL p[], REAL **xi, int n, REAL ftol, int *iter, REAL *fret,
-	REAL (*func)(REAL []));
-void predic(REAL data[], int ndata, REAL d[], int m, REAL future[], int nfut);
-REAL probks(REAL alam);
-void psdes(unsigned long *lword, unsigned long *irword);
-void pwt(REAL a[], unsigned long n, int isign);
-void pwtset(int n);
-REAL pythag(REAL a, REAL b);
-void pzextr(int iest, REAL xest, REAL yest[], REAL yz[], REAL dy[],
-	int nv);
-REAL qgaus(REAL (*func)(REAL), REAL a, REAL b);
-void qrdcmp(REAL **a, int n, REAL *c, REAL *d, int *sing);
-REAL qromb(REAL (*func)(REAL), REAL a, REAL b);
-REAL qromo(REAL (*func)(REAL), REAL a, REAL b,
-	REAL (*choose)(REAL (*)(REAL), REAL, REAL, int));
-void qroot(REAL p[], int n, REAL *b, REAL *c, REAL eps);
-void qrsolv(REAL **a, int n, REAL c[], REAL d[], REAL b[]);
-void qrupdt(REAL **r, REAL **qt, int n, REAL u[], REAL v[]);
-REAL qsimp(REAL (*func)(REAL), REAL a, REAL b);
-REAL qtrap(REAL (*func)(REAL), REAL a, REAL b);
-REAL quad3d(REAL (*func)(REAL, REAL, REAL), REAL x1, REAL x2);
-void quadct(REAL x, REAL y, REAL xx[], REAL yy[], unsigned long nn,
-	REAL *fa, REAL *fb, REAL *fc, REAL *fd);
-void quadmx(REAL **a, int n);
-void quadvl(REAL x, REAL y, REAL *fa, REAL *fb, REAL *fc, REAL *fd);
-REAL ran0(long *idum);
-REAL ran1(long *idum);
-REAL ran2(long *idum);
-REAL ran3(long *idum);
-REAL ran4(long *idum);
-void rank(unsigned long n, unsigned long indx[], unsigned long irank[]);
-void ranpt(REAL pt[], REAL regn[], int n);
-void ratint(REAL xa[], REAL ya[], int n, REAL x, REAL *y, REAL *dy);
-void ratlsq(double (*fn)(double), double a, double b, int mm, int kk,
-	double cof[], double *dev);
-double ratval(double x, double cof[], int mm, int kk);
-REAL rc(REAL x, REAL y);
-REAL rd(REAL x, REAL y, REAL z);
-void realft(REAL data[], unsigned long n, int isign);
-void rebin(REAL rc, int nd, REAL r[], REAL xin[], REAL xi[]);
-void red(int iz1, int iz2, int jz1, int jz2, int jm1, int jm2, int jmf,
-	int ic1, int jc1, int jcf, int kc, REAL ***c, REAL **s);
-void relax(double **u, double **rhs, int n);
-void relax2(double **u, double **rhs, int n);
-void resid(double **res, double **u, double **rhs, int n);
-REAL revcst(REAL x[], REAL y[], int iorder[], int ncity, int n[]);
-void reverse(int iorder[], int ncity, int n[]);
-REAL rf(REAL x, REAL y, REAL z);
-REAL rj(REAL x, REAL y, REAL z, REAL p);
-void rk4(REAL y[], REAL dydx[], int n, REAL x, REAL h, REAL yout[],
-	void (*derivs)(REAL, REAL [], REAL []));
-void rkck(REAL y[], REAL dydx[], int n, REAL x, REAL h,
-	REAL yout[], REAL yerr[], void (*derivs)(REAL, REAL [], REAL []));
-void rkdumb(REAL vstart[], int nvar, REAL x1, REAL x2, int nstep,
-	void (*derivs)(REAL, REAL [], REAL []));
-void rkqs(REAL y[], REAL dydx[], int n, REAL *x,
-	REAL htry, REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
-	void (*derivs)(REAL, REAL [], REAL []));
-void rlft3(REAL ***data, REAL **speq, unsigned long nn1,
-	unsigned long nn2, unsigned long nn3, int isign);
-REAL rofunc(REAL b);
-void rotate(REAL **r, REAL **qt, int n, int i, REAL a, REAL b);
-void rsolv(REAL **a, int n, REAL d[], REAL b[]);
-void rstrct(double **uc, double **uf, int nc);
-REAL rtbis(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
-REAL rtflsp(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
-REAL rtnewt(void (*funcd)(REAL, REAL *, REAL *), REAL x1, REAL x2,
-	REAL xacc);
-REAL rtsafe(void (*funcd)(REAL, REAL *, REAL *), REAL x1, REAL x2,
-	REAL xacc);
-REAL rtsec(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
-void rzextr(int iest, REAL xest, REAL yest[], REAL yz[], REAL dy[], int nv);
-void savgol(REAL c[], int np, int nl, int nr, int ld, int m);
-void score(REAL xf, REAL y[], REAL f[]);
-void scrsho(REAL (*fx)(REAL));
-REAL select_(unsigned long k, unsigned long n, REAL arr[]);
-REAL selip(unsigned long k, unsigned long n, REAL arr[]);
-void shell(unsigned long n, REAL a[]);
-void shoot(int n, REAL v[], REAL f[]);
-void shootf(int n, REAL v[], REAL f[]);
-void simp1(REAL **a, int mm, int ll[], int nll, int iabf, int *kp,
-	REAL *bmax);
-void simp2(REAL **a, int n, int l2[], int nl2, int *ip, int kp, REAL *q1);
-void simp3(REAL **a, int i1, int k1, int ip, int kp);
-void simplx(REAL **a, int m, int n, int m1, int m2, int m3, int *icase,
-	int izrov[], int iposv[]);
-void simpr(REAL y[], REAL dydx[], REAL dfdx[], REAL **dfdy,
-	int n, REAL xs, REAL htot, int nstep, REAL yout[],
-	void (*derivs)(REAL, REAL [], REAL []));
-void sinft(REAL y[], int n);
-void slvsm2(double **u, double **rhs);
-void slvsml(double **u, double **rhs);
-void sncndn(REAL uu, REAL emmc, REAL *sn, REAL *cn, REAL *dn);
-double snrm(unsigned long n, double sx[], int itol);
-void sobseq(int *n, REAL x[]);
-void solvde(int itmax, REAL conv, REAL slowc, REAL scalv[],
-	int indexv[], int ne, int nb, int m, REAL **y, REAL ***c, REAL **s);
-void sor(double **a, double **b, double **c, double **d, double **e,
-	double **f, double **u, int jmax, double rjac);
-void sort(unsigned long n, REAL arr[]);
-void sort2(unsigned long n, REAL arr[], REAL brr[]);
-void sort3(unsigned long n, REAL ra[], REAL rb[], REAL rc[]);
-void spctrm(FILE *fp, REAL p[], int m, int k, int ovrlap);
-void spear(REAL data1[], REAL data2[], unsigned long n, REAL *d, REAL *zd,
-	REAL *probd, REAL *rs, REAL *probrs);
-void sphbes(int n, REAL x, REAL *sj, REAL *sy, REAL *sjp, REAL *syp);
-void splie2(REAL x1a[], REAL x2a[], REAL **ya, int m, int n, REAL **y2a);
-void splin2(REAL x1a[], REAL x2a[], REAL **ya, REAL **y2a, int m, int n,
-	REAL x1, REAL x2, REAL *y);
-void spline(REAL x[], REAL y[], int n, REAL yp1, REAL ypn, REAL y2[]);
-void splint(REAL xa[], REAL ya[], REAL y2a[], int n, REAL x, REAL *y);
-void spread(REAL y, REAL yy[], unsigned long n, REAL x, int m);
-void sprsax(REAL sa[], unsigned long ija[], REAL x[], REAL b[],
-	unsigned long n);
-void sprsin(REAL **a, int n, REAL thresh, unsigned long nmax, REAL sa[],
-	unsigned long ija[]);
-void sprspm(REAL sa[], unsigned long ija[], REAL sb[], unsigned long ijb[],
-	REAL sc[], unsigned long ijc[]);
-void sprstm(REAL sa[], unsigned long ija[], REAL sb[], unsigned long ijb[],
-	REAL thresh, unsigned long nmax, REAL sc[], unsigned long ijc[]);
-void sprstp(REAL sa[], unsigned long ija[], REAL sb[], unsigned long ijb[]);
-void sprstx(REAL sa[], unsigned long ija[], REAL x[], REAL b[],
-	unsigned long n);
-void stifbs(REAL y[], REAL dydx[], int nv, REAL *xx,
-	REAL htry, REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
-	void (*derivs)(REAL, REAL [], REAL []));
-void stiff(REAL y[], REAL dydx[], int n, REAL *x,
-	REAL htry, REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
-	void (*derivs)(REAL, REAL [], REAL []));
-void stoerm(REAL y[], REAL d2y[], int nv, REAL xs,
-	REAL htot, int nstep, REAL yout[],
-	void (*derivs)(REAL, REAL [], REAL []));
-void svbksb(REAL **u, REAL w[], REAL **v, int m, int n, REAL b[],
-	REAL x[]);
-void svdcmp(REAL **a, int m, int n, REAL w[], REAL **v);
-void svdfit(REAL x[], REAL y[], REAL sig[], int ndata, REAL a[],
-	int ma, REAL **u, REAL **v, REAL w[], REAL *chisq,
-	void (*funcs)(REAL, REAL [], int));
-void svdvar(REAL **v, int ma, REAL w[], REAL **cvm);
-void toeplz(REAL r[], REAL x[], REAL y[], int n);
-void tptest(REAL data1[], REAL data2[], unsigned long n, REAL *t, REAL *prob);
-void tqli(REAL d[], REAL e[], int n, REAL **z);
-REAL trapzd(REAL (*func)(REAL), REAL a, REAL b, int n);
-void tred2(REAL **a, int n, REAL d[], REAL e[]);
-void tridag(REAL a[], REAL b[], REAL c[], REAL r[], REAL u[],
-	unsigned long n);
-REAL trncst(REAL x[], REAL y[], int iorder[], int ncity, int n[]);
-void trnspt(int iorder[], int ncity, int n[]);
-void ttest(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
-	REAL *t, REAL *prob);
-void tutest(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
-	REAL *t, REAL *prob);
-void twofft(REAL data1[], REAL data2[], REAL fft1[], REAL fft2[],
-	unsigned long n);
-void vander(double x[], double w[], double q[], int n);
-void vegas(REAL regn[], int ndim, REAL (*fxn)(REAL [], REAL), int init,
-	unsigned long ncall, int itmx, int nprn, REAL *tgral, REAL *sd,
-	REAL *chi2a);
-void voltra(int n, int m, REAL t0, REAL h, REAL *t, REAL **f,
-	REAL (*g)(int, REAL), REAL (*ak)(int, int, REAL, REAL));
-void wt1(REAL a[], unsigned long n, int isign,
-	void (*wtstep)(REAL [], unsigned long, int));
-void wtn(REAL a[], unsigned long nn[], int ndim, int isign,
-	void (*wtstep)(REAL [], unsigned long, int));
-void wwghts(REAL wghts[], int n, REAL h,
-	void (*kermom)(double [], double ,int));
-int zbrac(REAL (*func)(REAL), REAL *x1, REAL *x2);
-void zbrak(REAL (*fx)(REAL), REAL x1, REAL x2, int n, REAL xb1[],
-	REAL xb2[], int *nb);
-REAL zbrent(REAL (*func)(REAL), REAL x1, REAL x2, REAL tol);
-void zrhqr(REAL a[], int m, REAL rtr[], REAL rti[]);
-REAL zriddr(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
-void zroots(fcomplex a[], int m, fcomplex roots[], int polish);
-
-#endif /* _NR_H_ */

+ 0 - 295
examples/pastix-wrappers/models/num_recipes/nrutil.c

@@ -1,295 +0,0 @@
-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
-   utility file nrutil.c.  Do not confuse this file with the same-named
-   file nrutil.c that is supplied in the 'misc' subdirectory.
-   *That* file is the one from the book, and contains both ANSI and
-   traditional K&R versions, along with #ifdef macros to select the
-   correct version.  *This* file contains only ANSI C.               */
-
-#include <stdio.h>
-#include <stddef.h>
-#include <stdlib.h>
-#define NR_END 1
-#define FREE_ARG char*
-#define REAL float
-
-
-void nrerror(char error_text[])
-/* Numerical Recipes standard error handler */
-{
-	fprintf(stderr,"Numerical Recipes run-time error...\n");
-	fprintf(stderr,"%s\n",error_text);
-	fprintf(stderr,"...now exiting to system...\n");
-	exit(1);
-}
-
-REAL *vector(long nl, long nh)
-/* allocate a REAL vector with subscript range v[nl..nh] */
-{
-	REAL *v;
-
-	v=(REAL *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(REAL)));
-	if (!v) nrerror("allocation failure in vector()");
-	return v-nl+NR_END;
-}
-
-int *ivector(long nl, long nh)
-/* allocate an int vector with subscript range v[nl..nh] */
-{
-	int *v;
-
-	v=(int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
-	if (!v) nrerror("allocation failure in ivector()");
-	return v-nl+NR_END;
-}
-
-unsigned char *cvector(long nl, long nh)
-/* allocate an unsigned char vector with subscript range v[nl..nh] */
-{
-	unsigned char *v;
-
-	v=(unsigned char *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(unsigned char)));
-	if (!v) nrerror("allocation failure in cvector()");
-	return v-nl+NR_END;
-}
-
-unsigned long *lvector(long nl, long nh)
-/* allocate an unsigned long vector with subscript range v[nl..nh] */
-{
-	unsigned long *v;
-
-	v=(unsigned long *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(long)));
-	if (!v) nrerror("allocation failure in lvector()");
-	return v-nl+NR_END;
-}
-
-double *dvector(long nl, long nh)
-/* allocate a double vector with subscript range v[nl..nh] */
-{
-	double *v;
-
-	v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
-	if (!v) nrerror("allocation failure in dvector()");
-	return v-nl+NR_END;
-}
-
-REAL **matrix(long nrl, long nrh, long ncl, long nch)
-/* allocate a REAL matrix with subscript range m[nrl..nrh][ncl..nch] */
-{
-	long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
-	REAL **m;
-
-	/* allocate pointers to rows */
-	m=(REAL **) malloc((size_t)((nrow+NR_END)*sizeof(REAL*)));
-	if (!m) nrerror("allocation failure 1 in matrix()");
-	m += NR_END;
-	m -= nrl;
-
-	/* allocate rows and set pointers to them */
-	m[nrl]=(REAL *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(REAL)));
-	if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
-	m[nrl] += NR_END;
-	m[nrl] -= ncl;
-
-	for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
-
-	/* return pointer to array of pointers to rows */
-	return m;
-}
-
-double **dmatrix(long nrl, long nrh, long ncl, long nch)
-/* allocate a double matrix with subscript range m[nrl..nrh][ncl..nch] */
-{
-	long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
-	double **m;
-
-	/* allocate pointers to rows */
-	m=(double **) malloc((size_t)((nrow+NR_END)*sizeof(double*)));
-	if (!m) nrerror("allocation failure 1 in matrix()");
-	m += NR_END;
-	m -= nrl;
-
-	/* allocate rows and set pointers to them */
-	m[nrl]=(double *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
-	if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
-	m[nrl] += NR_END;
-	m[nrl] -= ncl;
-
-	for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
-
-	/* return pointer to array of pointers to rows */
-	return m;
-}
-
-int **imatrix(long nrl, long nrh, long ncl, long nch)
-/* allocate a int matrix with subscript range m[nrl..nrh][ncl..nch] */
-{
-	long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
-	int **m;
-
-	/* allocate pointers to rows */
-	m=(int **) malloc((size_t)((nrow+NR_END)*sizeof(int*)));
-	if (!m) nrerror("allocation failure 1 in matrix()");
-	m += NR_END;
-	m -= nrl;
-
-
-	/* allocate rows and set pointers to them */
-	m[nrl]=(int *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(int)));
-	if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
-	m[nrl] += NR_END;
-	m[nrl] -= ncl;
-
-	for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
-
-	/* return pointer to array of pointers to rows */
-	return m;
-}
-
-REAL **submatrix(REAL **a, long oldrl, long oldrh, long oldcl, long oldch,
-	long newrl, long newcl)
-/* point a submatrix [newrl..][newcl..] to a[oldrl..oldrh][oldcl..oldch] */
-{
-	long i,j,nrow=oldrh-oldrl+1,ncol=oldcl-newcl;
-	REAL **m;
-
-	/* allocate array of pointers to rows */
-	m=(REAL **) malloc((size_t) ((nrow+NR_END)*sizeof(REAL*)));
-	if (!m) nrerror("allocation failure in submatrix()");
-	m += NR_END;
-	m -= newrl;
-
-	/* set pointers to rows */
-	for(i=oldrl,j=newrl;i<=oldrh;i++,j++) m[j]=a[i]+ncol;
-
-	/* return pointer to array of pointers to rows */
-	return m;
-}
-
-REAL **convert_matrix(REAL *a, long nrl, long nrh, long ncl, long nch)
-/* allocate a REAL matrix m[nrl..nrh][ncl..nch] that points to the matrix
-declared in the standard C manner as a[nrow][ncol], where nrow=nrh-nrl+1
-and ncol=nch-ncl+1. The routine should be called with the address
-&a[0][0] as the first argument. */
-{
-	long i,j,nrow=nrh-nrl+1,ncol=nch-ncl+1;
-	REAL **m;
-
-	/* allocate pointers to rows */
-	m=(REAL **) malloc((size_t) ((nrow+NR_END)*sizeof(REAL*)));
-	if (!m) nrerror("allocation failure in convert_matrix()");
-	m += NR_END;
-	m -= nrl;
-
-	/* set pointers to rows */
-	m[nrl]=a-ncl;
-	for(i=1,j=nrl+1;i<nrow;i++,j++) m[j]=m[j-1]+ncol;
-	/* return pointer to array of pointers to rows */
-	return m;
-}
-
-REAL ***f3tensor(long nrl, long nrh, long ncl, long nch, long ndl, long ndh)
-/* allocate a REAL 3tensor with range t[nrl..nrh][ncl..nch][ndl..ndh] */
-{
-	long i,j,nrow=nrh-nrl+1,ncol=nch-ncl+1,ndep=ndh-ndl+1;
-	REAL ***t;
-
-	/* allocate pointers to pointers to rows */
-	t=(REAL ***) malloc((size_t)((nrow+NR_END)*sizeof(REAL**)));
-	if (!t) nrerror("allocation failure 1 in f3tensor()");
-	t += NR_END;
-	t -= nrl;
-
-	/* allocate pointers to rows and set pointers to them */
-	t[nrl]=(REAL **) malloc((size_t)((nrow*ncol+NR_END)*sizeof(REAL*)));
-	if (!t[nrl]) nrerror("allocation failure 2 in f3tensor()");
-	t[nrl] += NR_END;
-	t[nrl] -= ncl;
-
-	/* allocate rows and set pointers to them */
-	t[nrl][ncl]=(REAL *) malloc((size_t)((nrow*ncol*ndep+NR_END)*sizeof(REAL)));
-	if (!t[nrl][ncl]) nrerror("allocation failure 3 in f3tensor()");
-	t[nrl][ncl] += NR_END;
-	t[nrl][ncl] -= ndl;
-
-	for(j=ncl+1;j<=nch;j++) t[nrl][j]=t[nrl][j-1]+ndep;
-	for(i=nrl+1;i<=nrh;i++) {
-		t[i]=t[i-1]+ncol;
-		t[i][ncl]=t[i-1][ncl]+ncol*ndep;
-		for(j=ncl+1;j<=nch;j++) t[i][j]=t[i][j-1]+ndep;
-	}
-
-	/* return pointer to array of pointers to rows */
-	return t;
-}
-
-void free_vector(REAL *v, long nl, long nh)
-/* free a REAL vector allocated with vector() */
-{
-	free((FREE_ARG) (v+nl-NR_END));
-}
-
-void free_ivector(int *v, long nl, long nh)
-/* free an int vector allocated with ivector() */
-{
-	free((FREE_ARG) (v+nl-NR_END));
-}
-
-void free_cvector(unsigned char *v, long nl, long nh)
-/* free an unsigned char vector allocated with cvector() */
-{
-	free((FREE_ARG) (v+nl-NR_END));
-}
-
-void free_lvector(unsigned long *v, long nl, long nh)
-/* free an unsigned long vector allocated with lvector() */
-{
-	free((FREE_ARG) (v+nl-NR_END));
-}
-
-void free_dvector(double *v, long nl, long nh)
-/* free a double vector allocated with dvector() */
-{
-	free((FREE_ARG) (v+nl-NR_END));
-}
-
-void free_matrix(REAL **m, long nrl, long nrh, long ncl, long nch)
-/* free a REAL matrix allocated by matrix() */
-{
-	free((FREE_ARG) (m[nrl]+ncl-NR_END));
-	free((FREE_ARG) (m+nrl-NR_END));
-}
-
-void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch)
-/* free a double matrix allocated by dmatrix() */
-{
-	free((FREE_ARG) (m[nrl]+ncl-NR_END));
-	free((FREE_ARG) (m+nrl-NR_END));
-}
-
-void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch)
-/* free an int matrix allocated by imatrix() */
-{
-	free((FREE_ARG) (m[nrl]+ncl-NR_END));
-	free((FREE_ARG) (m+nrl-NR_END));
-}
-
-void free_submatrix(REAL **b, long nrl, long nrh, long ncl, long nch)
-/* free a submatrix allocated by submatrix() */
-{
-	free((FREE_ARG) (b+nrl-NR_END));
-}
-
-void free_convert_matrix(REAL **b, long nrl, long nrh, long ncl, long nch)
-/* free a matrix allocated by convert_matrix() */
-{
-	free((FREE_ARG) (b+nrl-NR_END));
-}
-
-void free_f3tensor(REAL ***t, long nrl, long nrh, long ncl, long nch,
-	long ndl, long ndh)
-/* free a REAL f3tensor allocated by f3tensor() */
-{
-	free((FREE_ARG) (t[nrl][ncl]+ndl-NR_END));
-	free((FREE_ARG) (t[nrl]+ncl-NR_END));
-	free((FREE_ARG) (t+nrl-NR_END));
-}

+ 0 - 79
examples/pastix-wrappers/models/num_recipes/nrutil.h

@@ -1,79 +0,0 @@
-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
-   utility file nrutil.h.  Do not confuse this file with the same-named
-   file nrutil.h that is supplied in the 'misc' subdirectory.
-   *That* file is the one from the book, and contains both ANSI and
-   traditional K&R versions, along with #ifdef macros to select the
-   correct version.  *This* file contains only ANSI C.               */
-
-#ifndef _NR_UTILS_H_
-#define _NR_UTILS_H_
-
-#define REAL float
-
-static REAL sqrarg;
-#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
-
-static double dsqrarg;
-#define DSQR(a) ((dsqrarg=(a)) == 0.0 ? 0.0 : dsqrarg*dsqrarg)
-
-static double dmaxarg1,dmaxarg2;
-#define DMAX(a,b) (dmaxarg1=(a),dmaxarg2=(b),(dmaxarg1) > (dmaxarg2) ?\
-        (dmaxarg1) : (dmaxarg2))
-
-static double dminarg1,dminarg2;
-#define DMIN(a,b) (dminarg1=(a),dminarg2=(b),(dminarg1) < (dminarg2) ?\
-        (dminarg1) : (dminarg2))
-
-static REAL maxarg1,maxarg2;
-#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
-        (maxarg1) : (maxarg2))
-
-static REAL minarg1,minarg2;
-#define FMIN(a,b) (minarg1=(a),minarg2=(b),(minarg1) < (minarg2) ?\
-        (minarg1) : (minarg2))
-
-static long lmaxarg1,lmaxarg2;
-#define LMAX(a,b) (lmaxarg1=(a),lmaxarg2=(b),(lmaxarg1) > (lmaxarg2) ?\
-        (lmaxarg1) : (lmaxarg2))
-
-static long lminarg1,lminarg2;
-#define LMIN(a,b) (lminarg1=(a),lminarg2=(b),(lminarg1) < (lminarg2) ?\
-        (lminarg1) : (lminarg2))
-
-static int imaxarg1,imaxarg2;
-#define IMAX(a,b) (imaxarg1=(a),imaxarg2=(b),(imaxarg1) > (imaxarg2) ?\
-        (imaxarg1) : (imaxarg2))
-
-static int iminarg1,iminarg2;
-#define IMIN(a,b) (iminarg1=(a),iminarg2=(b),(iminarg1) < (iminarg2) ?\
-        (iminarg1) : (iminarg2))
-
-#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
-
-void nrerror(char error_text[]);
-REAL *vector(long nl, long nh);
-int *ivector(long nl, long nh);
-unsigned char *cvector(long nl, long nh);
-unsigned long *lvector(long nl, long nh);
-double *dvector(long nl, long nh);
-REAL **matrix(long nrl, long nrh, long ncl, long nch);
-double **dmatrix(long nrl, long nrh, long ncl, long nch);
-int **imatrix(long nrl, long nrh, long ncl, long nch);
-REAL **submatrix(REAL **a, long oldrl, long oldrh, long oldcl, long oldch,
-	long newrl, long newcl);
-REAL **convert_matrix(REAL *a, long nrl, long nrh, long ncl, long nch);
-REAL ***f3tensor(long nrl, long nrh, long ncl, long nch, long ndl, long ndh);
-void free_vector(REAL *v, long nl, long nh);
-void free_ivector(int *v, long nl, long nh);
-void free_cvector(unsigned char *v, long nl, long nh);
-void free_lvector(unsigned long *v, long nl, long nh);
-void free_dvector(double *v, long nl, long nh);
-void free_matrix(REAL **m, long nrl, long nrh, long ncl, long nch);
-void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch);
-void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch);
-void free_submatrix(REAL **b, long nrl, long nrh, long ncl, long nch);
-void free_convert_matrix(REAL **b, long nrl, long nrh, long ncl, long nch);
-void free_f3tensor(REAL ***t, long nrl, long nrh, long ncl, long nch,
-	long ndl, long ndh);
-
-#endif /* _NR_UTILS_H_ */

+ 0 - 141
examples/pastix-wrappers/models/reg_gemm.c

@@ -1,141 +0,0 @@
-/*
- * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "nr.h"
-
-#define REAL float
-#define SIZE 2000000
-#define ma 6
-
-
-typedef struct Coord
-{
-  REAL x1;
-  REAL x2;
-  REAL x3;
-} coord, *pcoord;
-
-
-coord tabcoord[SIZE];
-REAL tabx[SIZE];
-REAL taby[SIZE];  
-REAL sig[SIZE];
-REAL afunc[ma+1];
-int ia[ma+1];
-REAL a[ma+1];
-
-
-void funcs( REAL i, REAL afunc[ma+1], int ma2)
-{
-  
-  afunc[1]= 1;
-  afunc[2]= tabcoord[(int)i].x1;
-  afunc[3]= tabcoord[(int)i].x2;
-  afunc[4]= tabcoord[(int)i].x1*tabcoord[(int)i].x2;
-  afunc[5]= tabcoord[(int)i].x2*tabcoord[(int)i].x3;
-  afunc[6]= tabcoord[(int)i].x1*tabcoord[(int)i].x2*tabcoord[(int)i].x3;
-  //printf("%f %f %f \n",afunc[0],afunc[1],afunc[2]);
-}
-
-
-int main(int argc, char * argv[])
-{
-  REAL total=0.0;
-  REAL ecart=0.0;
-  int len=0;
-  char str2[1000];
-/*   long double total=0.0; */
-/*   long double ecart=0.0; */
-  char *filename = argv[1];
-  char perf_h[1000];
-  char str[1000];
-  int k,i;
-  FILE * res;
-  FILE *out;
-  FILE *perf;
-  // FILE *tmpperf;
-  int ndat=atoi(argv[2]);
-  REAL ** covar;
-  REAL *chisq = (REAL *)malloc(sizeof(REAL));
-  res = fopen(filename,"r");
-
-  covar = (REAL**) malloc((ma+1) *sizeof(REAL*));
-  for (i=0;i<ma+1;i++)
-    covar[i]=(REAL*)malloc((ma+1) *sizeof(REAL));
-
-
-  for (k=1;k<ndat+1;k++)
-    {
-      int x0, y0, x1, y1, x2, y2;
-      REAL tmpfloat;
-      fscanf(res,"%f\t%d\t%d\t%d\t%d\t%d\t%d\n", &tmpfloat, &x0, &y0, &x1, &y1, &x2, &y2);
-      tabcoord[k].x1= x0 - y0; 
-      tabcoord[k].x2= y2;
-      tabcoord[k].x3= y0;
-      taby[k]=tmpfloat;
-      sig[k]=1;
-      tabx[k]=k;
-      //fprintf(out,"%f %f %f\n",tabcoord[k].x1 ,tabcoord[k].x2 ,tabcoord[k].x3);
-      //fprintf(out,"%f %f\n",tabx[k] , taby[k]);
-    }
-  for (k=1;k<ma+1;k++)
-    ia[k]=1;
-
-
-  lfit(tabx, taby, sig, ndat, a, ia, ma, covar, chisq, &funcs);
-  
-  for (k=1;k<ma+1;k++)  
-    {    
-//      printf("%.12lf\n", a[k]);
-      //total+=a[k];
-    }
-
-
-
-  //calcul de l'ecart type
-  for (k=1;k<ndat+1;k++)
-    {
-      double abs=0.0;
-      abs += a[1];
-      abs += tabcoord[k].x1*a[2]+tabcoord[k].x2*a[3];
-      abs += tabcoord[k].x1*tabcoord[k].x2*a[4]+tabcoord[k].x2*tabcoord[k].x3*a[5];
-      abs += tabcoord[k].x1*tabcoord[k].x2*tabcoord[k].x3*a[6];
-    //  fprintf(stderr,"k=%i ; calcul : %lf ; reel : %lf ; ", k, abs, taby[k]);
-      abs = abs - taby[k];
-      if (abs < 0)
-	abs = - abs;
-    //  fprintf(stderr,"ecart : %lf\n ", abs);
-
-      total += abs;
-      //printf("%f %f %f\n",tabcoord[k].x1 ,tabcoord[k].x2 ,tabcoord[k].x3);
-    }
-
-
-
-  fprintf(stdout,"#define GEMM_A  %e\n#define GEMM_B  %e\n#define GEMM_C  %e\n#define GEMM_D  %e\n#define GEMM_E  %e\n#define GEMM_F  %e\n",a[6],a[4],a[5],a[2],a[3],a[1]);
-  fprintf(stderr,"#define PERF_GEMM(i,j,k) (GEMM_A*(double)(i)*(double)(j)*(double)(k)+GEMM_B*(double)(i)*(double)(j)+GEMM_C*(double)(j)*(double)(k)+GEMM_D*(double)(i)+GEMM_E*(double)(j)+GEMM_F)\n");
-
-
-  fprintf(stderr, "total %lf\n", total);
-  ecart = total / ndat;  
-  fprintf(stderr, "ecart moyen %lf\n", ecart);
-
-  return 0;
-}

+ 0 - 122
examples/pastix-wrappers/models/reg_trsm.c

@@ -1,122 +0,0 @@
-/*
- * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "nr.h"
-
-#define SIZE 100000
-#define ma 3
-
-
-typedef struct Coord
-{
-  float x1;
-  float x2;
-} coord, *pcoord;
-
-
-coord tabcoord[SIZE];
-float tabx[SIZE];
-float taby[SIZE];  
-float sig[SIZE];
-float afunc[ma+1];
-int ia[ma+1];
-float a[ma+1];
-
-
-void funcs( float i, float afunc[ma+1], int ma2)
-{
-  
-  afunc[1]=1;
-  afunc[2]=tabcoord[(int)i].x1;
-  afunc[3]=tabcoord[(int)i].x1*tabcoord[(int)i].x1*tabcoord[(int)i].x2;
-  
-  //printf("%f %f %f \n",afunc[0],afunc[1],afunc[2]);
-}
-
-
-int main(int argc, char * argv[])
-{
-  float total=0.0;
-  float ecart=0.0;
-/*   long double total=0.0; */
-/*   long double ecart=0.0; */
-  char *filename = argv[1];
-  int k,i;
-  FILE * res;
-  int ndat=atoi(argv[2]);
-  float ** covar;
-  float *chisq = (float *)malloc(sizeof(float));
-  res = fopen(filename,"r");
-  covar = (float**) malloc((ma+1) *sizeof(float*));
-  for (i=0;i<ma+1;i++)
-    covar[i]=(float*)malloc((ma+1) *sizeof(float));
-
-  for (k=1;k<ndat+1;k++)
-    {
-      int i, j;
-      float tmpfloat;
-      fscanf(res,"%f\t%d\t%d\n", &tmpfloat, &i, &j);
-      tabcoord[k].x1=i-j; 
-      tabcoord[k].x2=j;
-      taby[k]=tmpfloat;
-//      printf("%d -> %f %d %d\n", k, tmpfloat, i-j, j);
-      sig[k]=1;
-      tabx[k]=k;
-    }
-  for (k=1;k<ma+1;k++)
-    ia[k]=1;
-  
-  lfit(tabx, taby, sig, ndat, a, ia, ma, covar, chisq, &funcs);
-  
-  for (k=1;k<ma+1;k++)  
-    {    
-  //    printf("%.12lf\n", a[k]);
-      //total+=a[k];
-    }
-
-
-
-  //calcul de l'ecart type
-  for (k=1;k<ndat+1;k++)
-    {
-      double abs=0.0;
-      abs += a[1];
-      abs += tabcoord[k].x1*a[2];
-      abs += tabcoord[k].x1*tabcoord[k].x1*tabcoord[k].x2*a[3];
-//      fprintf(stderr,"k=%i ; calcul : %lf ; reel : %lf ; ", k, abs, taby[k]);
-      abs = abs - taby[k];
-      if (abs < 0)
-	abs = - abs;
- //     fprintf(stderr,"ecart : %lf\n ", abs);
-
-      total += abs;
-      //printf("%f %f %f\n",tabcoord[k].x1 ,tabcoord[k].x2 ,tabcoord[k].x3);
-    }
-  
-  fprintf(stdout,"#define TRSM_A %e\n#define TRSM_B %e\n#define TRSM_C %e\n", a[3], a[2], a[1]);
-  fprintf(stderr,"#define PERF_TRSM(i,j)   (TRSM_A*(double)(i)*(double)(i)*(double)(j)+TRSM_B*(double)(i)+TRSM_C)\n");
-
-
-  fprintf(stderr, "total %lf\n", total);
-  ecart = total / ndat;  
-  fprintf(stderr, "ecart moyen %lf\n", ecart);
-
-  return 0;
-}

+ 0 - 752
examples/pastix-wrappers/starpu-blas-wrapper.c

@@ -1,752 +0,0 @@
-/*
- * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <semaphore.h>
-#include <core/jobs.h>
-#include <core/workers.h>
-#include <core/dependencies/tags.h>
-#include <string.h>
-#include <math.h>
-#include <sys/types.h>
-#include <ctype.h>
-#include <pthread.h>
-#include <signal.h>
-#include <cblas.h>
-
-#include <datawizard/datawizard.h>
-#include <task-models/blas_model.h>
-#include <common/fxt.h>
-
-#include <starpu.h>
-
-#ifdef STARPU_USE_CUDA
-#include <cuda.h>
-#endif
-
-#define BLOCK	75
-
-#include "starpu-blas-wrapper.h"
-
-extern struct starpu_data_interface_ops_t _starpu_interface_blas_ops;
-
-static int cpu_sgemm = 0;
-static int cublas_sgemm = 0;
-static int cpu_strsm = 0;
-static int cublas_strsm = 0;
-
-static int inited = 0;
-
-void STARPU_INIT(void)
-{
-	if (!inited) {
-		inited = 1;
-		starpu_init(NULL);	
-	}
-}
-
-void STARPU_TERMINATE(void)
-{
-	starpu_shutdown();
-
-	fprintf(stderr, "sgemm : cpu %d cublas %d\n", cpu_sgemm, cublas_sgemm);
-	fprintf(stderr, "strsm : cpu %d cublas %d\n", cpu_strsm, cublas_strsm);
-}
-
-/*
- *
- *	Specific to PaStiX !
- *
- */
-
-/*
- *	
- *	We "need" some custom filters
- *
- *			 VECTOR
- *			  (n)
- *			/  |   \		
- * 		   VECTOR  BLAS  VECTOR
- * 		    (n1)  (n2)	 
- *	
- *	if n1 = 0 :
- * 			VECTOR
- *			/   \
- *		     BLAS  VECTOR
- */
-
-struct divide_vector_in_blas_filter_args {
-	uint32_t n1, n2; /* (total size of the first portion (vector length) n < root's n ! */
-	uint32_t stride; /* stride of the first portion (need to be a multiple of n */
-};
-
-void divide_vector_in_blas_filter(starpu_filter *f, starpu_data_handle root_data)
-{
-	starpu_vector_interface_t *vector_root = &root_data->interface[0].vector;
-		uint32_t nx = vector_root->nx;
-		size_t elemsize = vector_root->elemsize;
-
-	struct divide_vector_in_blas_filter_args *args = f->filter_arg_ptr;
-		unsigned n1 = args->n1;
-		unsigned n2 = args->n2;
-		unsigned stride = args->stride;
-		STARPU_ASSERT(n1 + n2 < nx);
-		unsigned n3 = nx - n1 - n2;
-		
-
-	/* first allocate the children starpu_data_handle */
-	starpu_data_create_children(root_data, (n1==0)?2:3, root_data->ops);
-
-	STARPU_ASSERT((n2 % args->stride) == 0);
-
-	unsigned child = 0;
-	unsigned node;
-	
-	if (n1 > 0)
-	{
-		for (node = 0; node < STARPU_MAXNODES; node++)
-		{
-			starpu_vector_interface_t *local = &root_data->children[child].interface[node].vector;
-	
-			local->nx = n1;
-			local->elemsize = elemsize;
-	
-			if (root_data->per_node[node].allocated) {
-				local->ptr = root_data->interface[node].vector.ptr;
-			}
-	
-		}
-
-		child++;
-	}
-	
-	for (node = 0; node < STARPU_MAXNODES; node++)
-	{
-		starpu_blas_interface_t *local = &root_data->children[child].interface[node].blas;
-
-		local->nx = stride;
-		local->ny = n2/stride;
-		local->ld = stride;
-		local->elemsize = elemsize;
-
-		if (root_data->per_node[node].allocated) {
-			local->ptr = root_data->interface[node].vector.ptr + n1*elemsize;
-		}
-
-		struct starpu_data_state_t *state = &root_data->children[child];
-		state->ops = &_starpu_interface_blas_ops;
-	}
-
-	child++;
-
-	for (node = 0; node < STARPU_MAXNODES; node++)
-	{
-		starpu_vector_interface_t *local = &root_data->children[child].interface[node].vector;
-
-		local->nx = n3;
-		local->elemsize = elemsize;
-
-		if (root_data->per_node[node].allocated) {
-			local->ptr = root_data->interface[node].vector.ptr + (n1+n2)*elemsize;
-		}
-	}
-}
-
-
-static data_state *cblktab;
-
-static void _cublas_cblk_strsm_callback(void *sem)
-{
-	sem_t *semptr = sem;
-	sem_post(semptr);
-}
-
-
-void STARPU_MONITOR_DATA(unsigned ncols)
-{
-	cblktab = calloc(ncols, sizeof(data_state));
-}
-
-void STARPU_MONITOR_CBLK(unsigned col, float *data, unsigned stride, unsigned width)
-{
-	//void starpu_register_blas_data(struct starpu_data_state_t *state, uint32_t home_node,
-        //                uintptr_t ptr, uint32_t ld, uint32_t nx,
-        //                uint32_t ny, size_t elemsize);
-
-	//fprintf(stderr, "col %d data %p stride %d width %d\n", col, data, stride, width);
-
-	starpu_register_blas_data(&cblktab[col], 0 /* home */,
-			(uintptr_t) data, stride, stride, width, sizeof(float));
-	
-}
-
-static data_state work_block_1;
-static data_state work_block_2;
-
-void allocate_maxbloktab_on_cublas(void *descr[] __attribute__((unused)), void *arg __attribute__((unused)))
-{
-	starpu_request_data_allocation(&work_block_1, 1);
-	starpu_request_data_allocation(&work_block_2, 1);
-
-
-	starpu_filter f1, f2;
-	struct divide_vector_in_blas_filter_args args1, args2;
-
-	f1.filter_func = divide_vector_in_blas_filter;
-		args1.n1 = 1; /* XXX random ... */
-		args1.n2 = 2;
-		args1.stride = 1;
-
-	f1.filter_arg_ptr = &args1;
-	starpu_partition_data(&work_block_1, &f1);
-
-	f2.filter_func = divide_vector_in_blas_filter;
-		args2.n1 = 0;
-		args2.n2 = 2;
-		args2.stride = 1;
-	f2.filter_arg_ptr = &args2;
-	starpu_partition_data(&work_block_2, &f2);
-}
-
-void STARPU_DECLARE_WORK_BLOCKS(float *maxbloktab1, float *maxbloktab2, unsigned solv_coefmax)
-{
-	starpu_register_vector_data(&work_block_1, 0 /* home */, (uintptr_t)maxbloktab1, solv_coefmax, sizeof(float));
-	starpu_register_vector_data(&work_block_2, 0 /* home */, (uintptr_t)maxbloktab2, solv_coefmax, sizeof(float));
-
-	starpu_codelet cl;
-	starpu_job_t j;
-	sem_t sem;
-
-	/* initialize codelet */
-	cl.where = STARPU_CUDA;
-	cl.cuda_func = allocate_maxbloktab_on_cublas;
-	
-	j = _starpu_job_create();
-	j->cb = _cublas_cblk_strsm_callback;
-	j->argcb = &sem;
-	j->cl = &cl;
-	j->cl_arg = NULL;
-
-	j->nbuffers = 0;
-	j->cl->model = NULL;
-
-	sem_init(&sem, 0, 0U);
-	
-	/* submit the codelet */
-	submit_job(j);
-
-	/* wait for its completion */
-	sem_wait(&sem);
-	sem_destroy(&sem);
-
-}
-
-void _cpu_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
-{
-	uint32_t nx, ny, ld;
-	nx = STARPU_GET_BLAS_NX(descr[0]);
-	ny = STARPU_GET_BLAS_NY(descr[0]);
-	ld = STARPU_GET_BLAS_LD(descr[0]);
-
-	float *diag_cblkdata, *extra_cblkdata;
-	diag_cblkdata = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	extra_cblkdata = diag_cblkdata + ny;
-
-	unsigned m = nx - ny;
-	unsigned n = ny;
-
-//	SOPALIN_TRSM("R","L","T","U",dimb,dima,fun,ga,stride,gb,stride);
-	cpu_strsm++;
-
-	cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasUnit, m, n, 1.0f, 
-			diag_cblkdata, ld, extra_cblkdata, ld);
-}
-
-
-void _cublas_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
-{
-	uint32_t nx, ny, ld;
-	nx = STARPU_GET_BLAS_NX(descr[0]);
-	ny = STARPU_GET_BLAS_NY(descr[0]);
-	ld = STARPU_GET_BLAS_LD(descr[0]);
-
-	float *diag_cblkdata, *extra_cblkdata;
-	diag_cblkdata = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	extra_cblkdata = diag_cblkdata + ny;
-
-	unsigned m = nx - ny;
-	unsigned n = ny;
-	
-	cublas_strsm++;
-
-	cublasStrsm ('R', 'L', 'T', 'U', m, n, 1.0, 
-		diag_cblkdata, ld, 
-		extra_cblkdata, ld);
-	cublasStatus st = cublasGetError();
-	if (st) fprintf(stderr, "ERROR %d\n", st);
-	STARPU_ASSERT(st == CUBLAS_STATUS_SUCCESS);
-}
-
-static struct starpu_perfmodel_t starpu_cblk_strsm = {
-	.per_arch = { 
-		[STARPU_CPU_DEFAULT] = { .cost_model = starpu_cblk_strsm_cpu_cost },
-		[STARPU_CUDA_DEFAULT] = { .cost_model = starpu_cblk_strsm_cuda_cost }
-	},
-//	.type = REGRESSION_BASED,
-	.type = STARPU_PER_ARCH,
-	.symbol = "starpu_cblk_strsm"
-};
-
-
-void STARPU_CBLK_STRSM(unsigned col)
-{
-	/* perform a strsm on the block column */
-	starpu_codelet cl;
-	starpu_job_t j;
-	sem_t sem;
-
-	/* initialize codelet */
-	cl.where = STARPU_CPU|STARPU_CUDA;
-	cl.cpu_func = _cpu_cblk_strsm;
-	cl.cuda_func = _cublas_cblk_strsm;
-	
-	j = _starpu_job_create();
-//	j->where = (starpu_get_blas_nx(&cblktab[col]) > BLOCK && starpu_get_blas_ny(&cblktab[col]) > BLOCK)? CUBLAS:CPU;
-	j->cb = _cublas_cblk_strsm_callback;
-	j->argcb = &sem;
-	j->cl = &cl;
-	j->cl_arg = NULL;
-
-	j->nbuffers = 1;
-	/* we could be a little more precise actually */
-	j->buffers[0].handle = &cblktab[col];
-	j->buffers[0].mode = STARPU_RW;
-	
-	j->cl->model = &starpu_cblk_strsm;
-
-	sem_init(&sem, 0, 0U);
-	
-	/* submit the codelet */
-	submit_job(j);
-
-	/* wait for its completion */
-	sem_wait(&sem);
-	sem_destroy(&sem);
-}
-
-struct starpu_compute_contrib_compact_args {
-	unsigned stride;
-	int dimi;
-	int dimj;
-	int dima;
-};
-
-
-void _cpu_compute_contrib_compact(void *descr[], void *arg)
-{
-	struct starpu_compute_contrib_compact_args *args = arg;
-
-	float *gaik = (float *)STARPU_GET_BLAS_PTR(descr[0]) + args->dima;
-	float *gb = (float *)STARPU_GET_BLAS_PTR(descr[1]); 
-	unsigned strideb = (unsigned)STARPU_GET_BLAS_LD(descr[1]);
-	float *gc = (float *)STARPU_GET_BLAS_PTR(descr[2]);
-	unsigned stridec = (unsigned)STARPU_GET_BLAS_LD(descr[2]);
-
-	cpu_sgemm++;
-
-	cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, 
-			args->dimi, args->dimj, args->dima,
-			1.0f, gaik, args->stride,
-			      gb, strideb,
-			0.0 , gc, stridec);
-
-}
-
-
-void _cublas_compute_contrib_compact(void *descr[], void *arg)
-{
-	struct starpu_compute_contrib_compact_args *args = arg;
-
-	float *gaik = (float *)STARPU_GET_BLAS_PTR(descr[0]) + args->dima;
-	float *gb = (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	unsigned strideb = (unsigned)STARPU_GET_BLAS_LD(descr[1]);
-	float *gc = (float *)STARPU_GET_BLAS_PTR(descr[2]);
-	unsigned stridec = (unsigned)STARPU_GET_BLAS_LD(descr[2]);
-	
-	cublas_sgemm++;
-
-	cublasSgemm('N','T', args->dimi, args->dimj, args->dima, 
-			1.0, gaik, args->stride,
-			     gb, strideb,
-			0.0, gc, stridec);
-
-	cublasStatus st = cublasGetError();
-	if (st) fprintf(stderr, "ERROR %d\n", st);
-	STARPU_ASSERT(st == CUBLAS_STATUS_SUCCESS);
-}
-
-
-static struct starpu_perfmodel_t starpu_compute_contrib_compact = {
-	.per_arch = { 
-		[STARPU_CPU_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_cpu_cost },
-		[STARPU_CUDA_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_cuda_cost }
-	},
-//	.type = REGRESSION_BASED,
-	.type = STARPU_PER_ARCH,
-	.symbol = "starpu_compute_contrib_compact"
-};
-
-int update_work_blocks(unsigned col, int dimi, int dimj, int dima, int stride)
-{
-	/* be paranoid XXX */
-	notify_data_modification(get_sub_data(&work_block_1, 1, 0), 0);
-	notify_data_modification(get_sub_data(&work_block_1, 1, 1), 0);
-	//notify_data_modification(get_sub_data(&work_block_1, 1, 2), 0);
-	notify_data_modification(get_sub_data(&work_block_2, 1, 0), 0);
-	notify_data_modification(get_sub_data(&work_block_2, 1, 1), 0);
-	notify_data_modification(&cblktab[col], 0);
-
-	starpu_unpartition_data(&work_block_1, 0);
-	starpu_unpartition_data(&work_block_2, 0);
-
-	starpu_filter f1, f2;
-	struct divide_vector_in_blas_filter_args args1, args2;
-
-	f1.filter_func = divide_vector_in_blas_filter;
-		args1.n1 = stride - dima - dimi; //STARPU_ASSERT(args1.n1 != 0);
-		args1.n2 = (stride - dima)*dima;
-		args1.stride = (stride - dima);
-
-	f1.filter_arg_ptr = &args1;
-	starpu_partition_data(&work_block_1, &f1);
-
-	f2.filter_func = divide_vector_in_blas_filter;
-		args2.n1 = 0;
-		args2.n2 = dimi*dimj;
-		args2.stride = dimi;
-	f2.filter_arg_ptr = &args2;
-	starpu_partition_data(&work_block_2, &f2);
-
-	return (args1.n1!=0)?3:2;
-}
-
-void STARPU_COMPUTE_CONTRIB_COMPACT(unsigned col, int dimi, int dimj, int dima, int stride)
-{
-//        CUBLAS_SGEMM("N","T",dimi,dimj,dima, 1.0,gaik,stride,gb,stride-dima,
-//               0.0 ,gc,dimi);
-	
-	struct starpu_compute_contrib_compact_args args;
-		args.stride = stride;
-		args.dimi = dimi;
-		args.dimj = dimj;
-		args.dima = dima;
-
-	starpu_codelet cl;
-	starpu_job_t j;
-	sem_t sem;
-
-	/* initialize codelet */
-	cl.where = STARPU_CUDA|STARPU_CPU;
-	cl.cpu_func = _cpu_compute_contrib_compact;
-	cl.cuda_func = _cublas_compute_contrib_compact;
-	
-	j = _starpu_job_create();
-
-	j->cb = _cublas_cblk_strsm_callback;
-	j->argcb = &sem;
-	j->cl = &cl;
-	j->cl_arg = &args;
-	j->cl->model = &starpu_compute_contrib_compact;
-
-	int ret;
-	ret = update_work_blocks(col, dimi, dimj, dima, stride);
-
-	j->nbuffers = 3;
-	/* we could be a little more precise actually */
-	j->buffers[0].handle = &cblktab[col]; // gaik
-	j->buffers[0].mode = STARPU_R;
-	j->buffers[1].handle = get_sub_data(&work_block_1, 1, (ret==2)?0:1);
-	j->buffers[1].mode = STARPU_R;
-	j->buffers[2].handle = get_sub_data(&work_block_2, 1, 0);; 
-	j->buffers[2].mode = STARPU_RW; // XXX STARPU_W
-	
-	sem_init(&sem, 0, 0U);
-	
-	/* submit the codelet */
-	submit_job(j);
-
-	/* wait for its completion */
-	sem_wait(&sem);
-	sem_destroy(&sem);
-
-}
-
-/*
- *
- *	SGEMM
- *
- */
-
-struct sgemm_args {
-	char transa;
-	char transb;
-	int m, n, k;
-	float alpha;
-	float beta;
-};
-
-
-void _cublas_sgemm(void *descr[], void *arg)
-{
-	float *A, *B, *C;
-	uint32_t nxA, nyA, ldA;
-	uint32_t nxB, nyB, ldB;
-	uint32_t nxC, nyC, ldC;
-
-	A = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-	nxA = STARPU_GET_BLAS_NX(descr[0]);
-	nyA = STARPU_GET_BLAS_NY(descr[0]);
-	ldA = STARPU_GET_BLAS_LD(descr[0]);
-
-	B = (float *)STARPU_GET_BLAS_PTR(descr[1]);
-	nxB = STARPU_GET_BLAS_NX(descr[1]);
-	nyB = STARPU_GET_BLAS_NY(descr[1]);
-	ldB = STARPU_GET_BLAS_LD(descr[1]);
-
-	C = (float *)STARPU_GET_BLAS_PTR(descr[2]);
-	nxC = STARPU_GET_BLAS_NX(descr[2]);
-	nyC = STARPU_GET_BLAS_NY(descr[2]);
-	ldC = STARPU_GET_BLAS_LD(descr[2]);
-
-	struct sgemm_args *args = arg;
-
-//	fprintf(stderr, "CUBLAS SGEMM nxA %d nyA %d nxB %d nyB %d nxC %d nyC %d lda %d ldb %d ldc %d\n", nxA, nyA, nxB, nyB, nxC, nyC, ldA, ldB, ldC);
-
-//	STARPU_ASSERT(nxA == nxC);
-//	STARPU_ASSERT(nyA == nxB);
-//	STARPU_ASSERT(nyB == nyC);
-//
-//	STARPU_ASSERT(nxA <= ldA);
-//	STARPU_ASSERT(nxB <= ldB);
-//	STARPU_ASSERT(nxC <= ldC);
-
-	cublasSgemm (args->transa, args->transb, args->m, args->n, args->k, args->alpha, A, (int)ldA,
-			B, (int)ldB, args->beta, C, (int)ldC);
-	cublasStatus st = cublasGetError();
-	if (st) fprintf(stderr, "ERROR %d\n", st);
-	STARPU_ASSERT(st == CUBLAS_STATUS_SUCCESS);
-}
-
-static void _cublas_sgemm_callback(void *sem)
-{
-	sem_t *semptr = sem;
-	sem_post(semptr);
-}
-
-void STARPU_SGEMM (const char *transa, const char *transb, const int m,
-                   const int n, const int k, const float alpha,
-                   const float *A, const int lda, const float *B,
-                   const int ldb, const float beta, float *C, const int ldc)
-{
-	struct sgemm_args args;
-		args.transa = *transa;
-		args.transb = *transb;
-		args.alpha = alpha;
-		args.beta = beta;
-		args.m = m;
-		args.n = n;
-		args.k = k;
-
-	data_state A_state;
-	data_state B_state;
-	data_state C_state;
-
-	starpu_codelet cl;
-	starpu_job_t j;
-	sem_t sem;
-
-//	fprintf(stderr, "STARPU - SGEMM - TRANSA %c TRANSB %c m %d n %d k %d lda %d ldb %d ldc %d \n", *transa, *transb, m, n, k, lda, ldb, ldc);
-
-	if (toupper(*transa) == 'N')
-	{
-		starpu_register_blas_data(&A_state, 0, (uintptr_t)A, lda, m, k, sizeof(float));
-	}
-	else 
-	{
-		starpu_register_blas_data(&A_state, 0, (uintptr_t)A, lda, k, m, sizeof(float));
-	}
-
-	if (toupper(*transb) == 'N')
-	{
-		starpu_register_blas_data(&B_state, 0, (uintptr_t)B, ldb, k, n, sizeof(float));
-	}
-	else 
-	{	
-		starpu_register_blas_data(&B_state, 0, (uintptr_t)B, ldb, n, k, sizeof(float));
-	}
-
-	starpu_register_blas_data(&C_state, 0, (uintptr_t)C, ldc, m, n, sizeof(float));
-
-	/* initialize codelet */
-	cl.where = STARPU_CUDA;
-	//cl.cpu_func = _cpu_strsm;
-	cl.cuda_func = _cublas_sgemm;
-	
-	j = _starpu_job_create();
-	j->cb = _cublas_sgemm_callback;
-	j->argcb = &sem;
-	j->cl = &cl;
-	j->cl_arg = &args;
-
-	j->nbuffers = 3;
-	j->buffers[0].handle = &A_state;
-	j->buffers[0].mode = STARPU_R;
-	j->buffers[1].handle = &B_state;
-	j->buffers[1].mode = STARPU_R;
-	j->buffers[2].handle = &C_state;
-	j->buffers[2].mode = STARPU_RW;
-	
-	j->cl->model = NULL;
-
-	sem_init(&sem, 0, 0U);
-	
-	/* submit the codelet */
-	submit_job(j);
-
-	/* wait for its completion */
-	sem_wait(&sem);
-	sem_destroy(&sem);
-
-	/* make sure data are in memory again */
-	starpu_unpartition_data(&A_state, 0);
-	starpu_unpartition_data(&B_state, 0);
-	starpu_unpartition_data(&C_state, 0);
-	//starpu_delete_data(&A_state);
-	//starpu_delete_data(&B_state);
-	//starpu_delete_data(&C_state);
-	
-//	fprintf(stderr, "SGEMM done\n");
-}
-
-
-/*
- *
- *	STRSM
- *
- */
-
-struct strsm_args {
-	char side;
-	char uplo;
-	char transa;
-	char diag;
-	float alpha;
-	int m,n;
-};
-//
-//void _cpu_strsm(void *descr[], void *arg)
-//{
-//	float *A, *B;
-//	uint32_t nxA, nyA, ldA;
-//	uint32_t nxB, nyB, ldB;
-//
-//	A = (float *)STARPU_GET_BLAS_PTR(descr[0]);
-//	nxA = STARPU_GET_BLAS_NX(descr[0]);
-//	nyA = STARPU_GET_BLAS_NY(descr[0]);
-//	ldA = STARPU_GET_BLAS_LD(descr[0]);
-//
-//	B = (float *)STARPU_GET_BLAS_PTR(descr[1]);
-//	nxB = STARPU_GET_BLAS_NX(descr[1]);
-//	nyB = STARPU_GET_BLAS_NY(descr[1]);
-//	ldB = STARPU_GET_BLAS_LD(descr[1]);
-//
-//	struct strsm_args *args = arg;
-//
-//	fprintf(stderr, "CPU STRSM nxA %d nyA %d nxB %d nyB %d lda %d ldb %d\n", nxA, nyA, nxB, nyB, ldA, ldB);
-//
-//	SOPALIN_TRSM("R","L","T","U",dimb,dima,fun,ga,stride,gb,stride);
-//	
-//}
-
-/* 
- *	
- *	
- *
- */
-
-
-void CUBLAS_SGEMM (const char *transa, const char *transb, const int m,
-                   const int n, const int k, const float alpha,
-                   const float *A, const int lda, const float *B,
-                   const int ldb, const float beta, float *C, const int ldc)
-{
-    int ka, kb;
-    float *devPtrA, *devPtrB, *devPtrC;
-
-//   printf("CUBLAS SGEMM : m %d n %d k %d lda %d ldb %d ldc %d\n", m, n, k, lda, ldb, ldc);
-
-    /*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
-     *           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-     *           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-     *           part of the array  A  must contain the matrix  A,  otherwise
-     *           the leading  k by m  part of the array  A  must contain  the
-     *           matrix A.
-     */
-    ka = (toupper(transa[0]) == 'N') ? k : m;
-    cublasAlloc (lda * ka, sizeof(devPtrA[0]), (void**)&devPtrA);
-    if (toupper(transa[0]) == 'N') {
-        cublasSetMatrix (STARPU_MIN(m,lda), k, sizeof(A[0]), A, lda, devPtrA, 
-                         lda);
-    } else {
-        cublasSetMatrix (STARPU_MIN(k,lda), m, sizeof(A[0]), A, lda, devPtrA, 
-                         lda);
-    }
-
-    /*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
-     *           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-     *           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-     *           part of the array  B  must contain the matrix  B,  otherwise
-     *           the leading  n by k  part of the array  B  must contain  the
-     *           matrix B.
-     */
-    kb = (toupper(transb[0]) == 'N') ? n : k;
-    cublasAlloc (ldb * kb, sizeof(devPtrB[0]), (void**)&devPtrB);
-    if (toupper(transb[0]) == 'N') {
-        cublasSetMatrix (STARPU_MIN(k,ldb), n, sizeof(B[0]), B, ldb, devPtrB, 
-                         ldb);
-    } else {
-        cublasSetMatrix (STARPU_MIN(n,ldb), k, sizeof(B[0]), B, ldb, devPtrB,
-                         ldb);
-    }
-    
-    /*  C      - REAL             array of DIMENSION ( LDC, n ).
-     *           Before entry, the leading  m by n  part of the array  C must
-     *           contain the matrix  C,  except when  beta  is zero, in which
-     *           case C need not be set on entry.
-     *           On exit, the array  C  is overwritten by the  m by n  matrix
-     */
-    cublasAlloc ((ldc) * (n), sizeof(devPtrC[0]), (void**)&devPtrC);
-    cublasSetMatrix (STARPU_MIN(m,ldc), n, sizeof(C[0]), C, ldc, devPtrC, ldc);
-
-    cublasSgemm (transa[0], transb[0], m, n, k, alpha, devPtrA, lda, 
-                 devPtrB, ldb, beta, devPtrC, ldc);
-
-    cublasGetMatrix (STARPU_MIN(m,ldc), n, sizeof(C[0]), devPtrC, ldc, C, ldc);
-    cublasFree (devPtrA);
-    cublasFree (devPtrB);
-    cublasFree (devPtrC);
-}
-
-

+ 0 - 108
examples/pastix-wrappers/starpu-blas-wrapper.h

@@ -1,108 +0,0 @@
-/*
- * StarPU
- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __STARPU_BLAS_WRAPPER_H__
-#define __STARPU_BLAS_WRAPPER_H__
-
-#include "generated_model.h"
-
-#define OVERHEAD	150000.0
-
-static double transfer_time_dtoh(unsigned size)
-{
-	double latency = 0.0;
-	double bandwith = 0.0;
-
-	return latency + size*bandwith;
-}
-
-static double transfer_time_htod(unsigned size)
-{
-	double latency = 0.0;
-	double bandwith = 0.0;
-
-	return latency + size*bandwith;
-}
-
-/*GEMM CPU */
-
-#define PERF_GEMM_CPU(i,j,k) (GEMM_CPU_A*(double)(i)*(double)(j)*(double)(k)+GEMM_CPU_B*(double)(i)*(double)(j)+GEMM_CPU_C*(double)(j)*(double)(k)+GEMM_CPU_D*(double)(i)+GEMM_CPU_E*(double)(j)+GEMM_CPU_F)
-
-static double starpu_compute_contrib_compact_cpu_cost(starpu_buffer_descr *descr)
-{
-	unsigned nx0, ny0, ny2;
-	nx0 = descr[0].handle->interface->blas.nx;
-	ny0 = descr[0].handle->interface->blas.ny;
-	ny2 = descr[2].handle->interface->blas.ny;
-
-	return PERF_GEMM_CPU(nx0-ny0, ny2, ny0); 
-}
-
-
-
-/*GEMM GPU */
-
-#define PERF_GEMM_GPU(i,j,k) (GEMM_GPU_A*(double)(i)*(double)(j)*(double)(k)+GEMM_GPU_B*(double)(i)*(double)(j)+GEMM_GPU_C*(double)(j)*(double)(k)+GEMM_GPU_D*(double)(i)+GEMM_GPU_E*(double)(j)+GEMM_GPU_F)
-
-static double starpu_compute_contrib_compact_cuda_cost(starpu_buffer_descr *descr)
-{
-	unsigned nx0, ny0, ny2;
-	nx0 = descr[0].handle->interface->blas.nx;
-	ny0 = descr[0].handle->interface->blas.ny;
-	ny2 = descr[2].handle->interface->blas.ny;
-
-	return PERF_GEMM_GPU(nx0-ny0, ny2, ny0) + OVERHEAD; 
-}
-
-
-/*TRSM CPU */
-
-#define PERF_TRSM_GPU(i,j)   (TRSM_GPU_A*(double)(i)*(double)(i)*(double)(j)+TRSM_GPU_B*(double)(i)+TRSM_GPU_C)
-
-static double starpu_cblk_strsm_cuda_cost(starpu_buffer_descr *descr)
-{
-	unsigned nx, ny;
-	nx = descr[0].handle->interface->blas.nx;
-	ny = descr[0].handle->interface->blas.ny;
-
-	return PERF_TRSM_GPU(nx-ny, ny) + OVERHEAD; 
-}
-
-/*TRSM CPU */
-
-#define PERF_TRSM_CPU(i,j)   (TRSM_CPU_A*(double)(i)*(double)(i)*(double)(j)+TRSM_CPU_B*(double)(i)+TRSM_CPU_C)
-
-static double starpu_cblk_strsm_cpu_cost(starpu_buffer_descr *descr)
-{
-	unsigned nx, ny;
-	nx = descr[0].handle->interface->blas.nx;
-	ny = descr[0].handle->interface->blas.ny;
-
-	return PERF_TRSM_CPU(nx-ny, ny); 
-}
-
-void STARPU_INIT(void);
-void STARPU_TERMINATE(void);
-void STARPU_SGEMM (const char *transa, const char *transb, const int m,
-                   const int n, const int k, const float alpha,
-                   const float *A, const int lda, const float *B,
-                   const int ldb, const float beta, float *C, const int ldc);
-void STARPU_STRSM (const char *side, const char *uplo, const char *transa, 
-                   const char *diag, const int m, const int n, 
-                   const float alpha, const float *A, const int lda,
-                   float *B, const int ldb);
-
-#endif // __STARPU_BLAS_WRAPPER_H__