il y a 15 ans · 902b2f1412
--- a/examples/pastix-wrappers/Makefile
+++ b/examples/pastix-wrappers/Makefile
@@ -1,28 +0,0 @@
 
				-#
			
 
				-# StarPU
			
 
				-# Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				-#
			
 
				-# This program is free software; you can redistribute it and/or modify
			
 
				-# it under the terms of the GNU Lesser General Public License as published by
			
 
				-# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				-# your option) any later version.
			
 
				-#
			
 
				-# This program is distributed in the hope that it will be useful, but
			
 
				-# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				-#
			
 
				-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				-#
			
 
				-
			
 
				-STARPU= ../../libstarpu.a
			
 
				-
			
 
				-all: starpu-blas-wrapper.a
			
 
				-
			
 
				-starpu-blas-wrapper.o: starpu-blas-wrapper.c starpu-blas-wrapper.h
			
 
				-	$(CC) $(CFLAGS) starpu-blas-wrapper.c -c -o starpu-blas-wrapper.o
			
 
				-
			
 
				-starpu-blas-wrapper.a: starpu-blas-wrapper.o
			
 
				-	$(AR) rcs starpu-blas-wrapper.a starpu-blas-wrapper.o
			
 
				-
			
 
				-clean:
			
 
				-	@rm -f *.a *.o *.d *.gcno *.gcda
			
--- a/examples/pastix-wrappers/generated_model.h
+++ b/examples/pastix-wrappers/generated_model.h
@@ -1,34 +0,0 @@
 
				-/*
			
 
				- * StarPU
			
 
				- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#define GEMM_CPU_A  9.627597e-05
			
 
				-#define GEMM_CPU_B  2.909318e-03
			
 
				-#define GEMM_CPU_C  1.233512e-01
			
 
				-#define GEMM_CPU_D  -2.789676e-01
			
 
				-#define GEMM_CPU_E  -1.719838e-01
			
 
				-#define GEMM_CPU_F  2.032491e+01
			
 
				-#define GEMM_GPU_A  1.564597e-05
			
 
				-#define GEMM_GPU_B  1.643119e-04
			
 
				-#define GEMM_GPU_C  1.990316e-02
			
 
				-#define GEMM_GPU_D  -1.120220e-02
			
 
				-#define GEMM_GPU_E  2.416027e-01
			
 
				-#define GEMM_GPU_F  1.974529e+01
			
 
				-#define TRSM_GPU_A 4.302117e-06
			
 
				-#define TRSM_GPU_B 5.423172e-01
			
 
				-#define TRSM_GPU_C -4.868755e+00
			
 
				-#define TRSM_CPU_A 1.362886e-05
			
 
				-#define TRSM_CPU_B 6.283488e-01
			
 
				-#define TRSM_CPU_C -4.053346e+01
			
--- a/examples/pastix-wrappers/models/Makefile
+++ b/examples/pastix-wrappers/models/Makefile
@@ -1,59 +0,0 @@
 
				-#
			
 
				-# StarPU
			
 
				-# Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				-#
			
 
				-# This program is free software; you can redistribute it and/or modify
			
 
				-# it under the terms of the GNU Lesser General Public License as published by
			
 
				-# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				-# your option) any later version.
			
 
				-#
			
 
				-# This program is distributed in the hope that it will be useful, but
			
 
				-# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				-#
			
 
				-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				-#
			
 
				-
			
 
				-all: reg_gemm reg_trsm 
			
 
				-
			
 
				-reg_axpy: reg_axpy.c num_rec
			
 
				-	gcc -o reg_axpy reg_axpy.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_copy: reg_copy.c num_rec
			
 
				-	gcc -o reg_copy reg_copy.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_geam: reg_geam.c num_rec
			
 
				-	gcc -o reg_geam reg_geam.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_gemm: reg_gemm.c num_rec
			
 
				-	gcc -g -o reg_gemm reg_gemm.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_pof: reg_pof.c num_rec
			
 
				-	gcc -o reg_pof reg_pof.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_ppf: reg_ppf.c num_rec
			
 
				-	gcc -o reg_ppf reg_ppf.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_scal: reg_scal.c num_rec
			
 
				-	gcc -o reg_scal reg_scal.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-reg_trsm: reg_trsm.c num_rec
			
 
				-	gcc -g -o reg_trsm reg_trsm.c num_recipes/*.o -Inum_recipes/ -lm
			
 
				-
			
 
				-num_rec: covsrt.o gaussj.o lfit.o nrutil.o num_recipes/nr.h num_recipes/nrutil.h num_recipes/complex.h
			
 
				-
			
 
				-covsrt.o: num_recipes/covsrt.c
			
 
				-	gcc -g -o num_recipes/covsrt.o -c num_recipes/covsrt.c
			
 
				-
			
 
				-gaussj.o: num_recipes/gaussj.c
			
 
				-	gcc -g -o num_recipes/gaussj.o -c num_recipes/gaussj.c
			
 
				-
			
 
				-lfit.o: num_recipes/lfit.c
			
 
				-	gcc -g -o num_recipes/lfit.o -c num_recipes/lfit.c
			
 
				-
			
 
				-nrutil.o: num_recipes/nrutil.c
			
 
				-	gcc -g -o num_recipes/nrutil.o -c num_recipes/nrutil.c
			
 
				-
			
 
				-clean:
			
 
				-	rm -f reg_gemm reg_trsm
			
 
				-	rm -rf *.o num_recipes/*.o
			
--- a/examples/pastix-wrappers/models/model.sh
+++ b/examples/pastix-wrappers/models/model.sh
@@ -1,31 +0,0 @@
 
				-#!/bin/bash
			
 
				-
			
 
				-#
			
 
				-# StarPU
			
 
				-# Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				-#
			
 
				-# This program is free software; you can redistribute it and/or modify
			
 
				-# it under the terms of the GNU Lesser General Public License as published by
			
 
				-# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				-# your option) any later version.
			
 
				-#
			
 
				-# This program is distributed in the hope that it will be useful, but
			
 
				-# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				-#
			
 
				-# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				-#
			
 
				-
			
 
				-
			
 
				-rm -f generated_model.h
			
 
				-
			
 
				-# contrib compact 
			
 
				-./reg_gemm /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.core.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.core.debug` 2> /dev/null|sed -s s/GEMM/GEMM_CPU/ >  generated_model.h
			
 
				-./reg_gemm /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.cuda.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_compute_contrib_compact.barracuda.cuda.debug` 2> /dev/null|sed -s s/GEMM/GEMM_GPU/ >>  generated_model.h
			
 
				-
			
 
				-# strsm
			
 
				-
			
 
				-./reg_trsm /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.cuda.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.cuda.debug` 2> /dev/null|sed -s s/TRSM/TRSM_GPU/ >>  generated_model.h
			
 
				-./reg_trsm /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.core.debug `wc -l /home/gonnet/These/StarPU-stable/.sampling/starpu_cblk_strsm.barracuda.core.debug` 2> /dev/null|sed -s s/TRSM/TRSM_CPU/ >>  generated_model.h
			
 
				-
			
 
				-cat generated_model.h
			
--- a/examples/pastix-wrappers/models/num_recipes/complex.h
+++ b/examples/pastix-wrappers/models/num_recipes/complex.h
@@ -1,26 +0,0 @@
 
				-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
			
 
				-   utility file complex.h.  Do not confuse this file with the same-named
			
 
				-   file complex.h that is supplied in the 'misc' subdirectory.
			
 
				-   *That* file is the one from the book, and contains both ANSI and
			
 
				-   traditional K&R versions, along with #ifdef macros to select the
			
 
				-   correct version.  *This* file contains only ANSI C.               */
			
 
				-
			
 
				-#ifndef _NR_COMPLEX_H_
			
 
				-#define _NR_COMPLEX_H_
			
 
				-
			
 
				-#ifndef _FCOMPLEX_DECLARE_T_
			
 
				-typedef struct FCOMPLEX {float r,i;} fcomplex;
			
 
				-#define _FCOMPLEX_DECLARE_T_
			
 
				-#endif /* _FCOMPLEX_DECLARE_T_ */
			
 
				-
			
 
				-fcomplex Cadd(fcomplex a, fcomplex b);
			
 
				-fcomplex Csub(fcomplex a, fcomplex b);
			
 
				-fcomplex Cmul(fcomplex a, fcomplex b);
			
 
				-fcomplex Complex(float re, float im);
			
 
				-fcomplex Conjg(fcomplex z);
			
 
				-fcomplex Cdiv(fcomplex a, fcomplex b);
			
 
				-float Cabs(fcomplex z);
			
 
				-fcomplex Csqrt(fcomplex z);
			
 
				-fcomplex RCmul(float x, fcomplex a);
			
 
				-
			
 
				-#endif /* _NR_COMPLEX_H_ */
			
--- a/examples/pastix-wrappers/models/num_recipes/covsrt.c
+++ b/examples/pastix-wrappers/models/num_recipes/covsrt.c
@@ -1,20 +0,0 @@
 
				-#define REAL double
			
 
				-#define SWAP(a,b) {swap=(a);(a)=(b);(b)=swap;}
			
 
				-
			
 
				-void covsrt(REAL **covar, int ma, int ia[], int mfit)
			
 
				-{
			
 
				-	int i,j,k;
			
 
				-	REAL swap;
			
 
				-
			
 
				-	for (i=mfit+1;i<=ma;i++)
			
 
				-		for (j=1;j<=i;j++) covar[i][j]=covar[j][i]=0.0;
			
 
				-	k=mfit;
			
 
				-	for (j=ma;j>=1;j--) {
			
 
				-		if (ia[j]) {
			
 
				-			for (i=1;i<=ma;i++) SWAP(covar[i][k],covar[i][j])
			
 
				-			for (i=1;i<=ma;i++) SWAP(covar[k][i],covar[j][i])
			
 
				-			k--;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-#undef SWAP
			
--- a/examples/pastix-wrappers/models/num_recipes/gaussj.c
+++ b/examples/pastix-wrappers/models/num_recipes/gaussj.c
@@ -1,60 +0,0 @@
 
				-#include <math.h>
			
 
				-#define NRANSI
			
 
				-#include "nrutil.h"
			
 
				-//#define REAL float
			
 
				-#define SWAP(a,b) {temp=(a);(a)=(b);(b)=temp;}
			
 
				-
			
 
				-void gaussj(REAL **a, int n, REAL **b, int m)
			
 
				-{
			
 
				-	int *indxc,*indxr,*ipiv;
			
 
				-	int i,icol,irow,j,k,l,ll;
			
 
				-	REAL big,dum,pivinv,temp;
			
 
				-
			
 
				-	indxc=ivector(1,n);
			
 
				-	indxr=ivector(1,n);
			
 
				-	ipiv=ivector(1,n);
			
 
				-	for (j=1;j<=n;j++) ipiv[j]=0;
			
 
				-	for (i=1;i<=n;i++) {
			
 
				-		big=0.0;
			
 
				-		for (j=1;j<=n;j++)
			
 
				-			if (ipiv[j] != 1)
			
 
				-				for (k=1;k<=n;k++) {
			
 
				-					if (ipiv[k] == 0) {
			
 
				-						if (fabs(a[j][k]) >= big) {
			
 
				-							big=fabs(a[j][k]);
			
 
				-							irow=j;
			
 
				-							icol=k;
			
 
				-						}
			
 
				-					} else if (ipiv[k] > 1) nrerror("gaussj: Singular Matrix-1");
			
 
				-				}
			
 
				-		++(ipiv[icol]);
			
 
				-		if (irow != icol) {
			
 
				-			for (l=1;l<=n;l++) SWAP(a[irow][l],a[icol][l])
			
 
				-			for (l=1;l<=m;l++) SWAP(b[irow][l],b[icol][l])
			
 
				-		}
			
 
				-		indxr[i]=irow;
			
 
				-		indxc[i]=icol;
			
 
				-		if (a[icol][icol] == 0.0) nrerror("gaussj: Singular Matrix-2");
			
 
				-		pivinv=1.0/a[icol][icol];
			
 
				-		a[icol][icol]=1.0;
			
 
				-		for (l=1;l<=n;l++) a[icol][l] *= pivinv;
			
 
				-		for (l=1;l<=m;l++) b[icol][l] *= pivinv;
			
 
				-		for (ll=1;ll<=n;ll++)
			
 
				-			if (ll != icol) {
			
 
				-				dum=a[ll][icol];
			
 
				-				a[ll][icol]=0.0;
			
 
				-				for (l=1;l<=n;l++) a[ll][l] -= a[icol][l]*dum;
			
 
				-				for (l=1;l<=m;l++) b[ll][l] -= b[icol][l]*dum;
			
 
				-			}
			
 
				-	}
			
 
				-	for (l=n;l>=1;l--) {
			
 
				-		if (indxr[l] != indxc[l])
			
 
				-			for (k=1;k<=n;k++)
			
 
				-				SWAP(a[k][indxr[l]],a[k][indxc[l]]);
			
 
				-	}
			
 
				-	free_ivector(ipiv,1,n);
			
 
				-	free_ivector(indxr,1,n);
			
 
				-	free_ivector(indxc,1,n);
			
 
				-}
			
 
				-#undef SWAP
			
 
				-#undef NRANSI
			
--- a/examples/pastix-wrappers/models/num_recipes/lfit.c
+++ b/examples/pastix-wrappers/models/num_recipes/lfit.c
@@ -1,66 +0,0 @@
 
				-#define NRANSI
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include "nrutil.h"
			
 
				-
			
 
				-#define REAL float
			
 
				-
			
 
				-void lfit(REAL x[], REAL y[], REAL sig[], int ndat, REAL a[], int ia[],
			
 
				-	int ma, REAL **covar, REAL *chisq, void (*funcs)(REAL, REAL [], int))
			
 
				-{
			
 
				-	void covsrt(REAL **covar, int ma, int ia[], int mfit);
			
 
				-	void gaussj(REAL **a, int n, REAL **b, int m);
			
 
				-	int i,j,k,l,m,mfit=0;
			
 
				-	REAL ym,wt,sum,sig2i,**beta,*afunc;
			
 
				-
			
 
				-	
			
 
				-
			
 
				-	beta=matrix(1,ma,1,1);
			
 
				-	afunc=vector(1,ma);
			
 
				-	for (j=1;j<=ma;j++)
			
 
				-		if (ia[j]) mfit++;
			
 
				-	if (mfit == 0) nrerror("lfit: no parameters to be fitted");
			
 
				-	for (j=1;j<=mfit;j++) {
			
 
				-		for (k=1;k<=mfit;k++) covar[j][k]=0.0;
			
 
				-		beta[j][1]=0.0;
			
 
				-	}
			
 
				-
			
 
				-	for (i=1;i<=ndat;i++) {
			
 
				-		(*funcs)(x[i],afunc,ma);
			
 
				-		ym=y[i];
			
 
				-		if (mfit < ma) {
			
 
				-			for (j=1;j<=ma;j++)
			
 
				-				if (!ia[j]) ym -= a[j]*afunc[j];
			
 
				-		}
			
 
				-		sig2i=1.0/SQR(sig[i]);
			
 
				-		for (j=0,l=1;l<=ma;l++) {
			
 
				-			if (ia[l]) {
			
 
				-				wt=afunc[l]*sig2i;
			
 
				-				for (j++,k=0,m=1;m<=l;m++)
			
 
				-					if (ia[m]) covar[j][++k] += wt*afunc[m];
			
 
				-				beta[j][1] += ym*wt;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				-	for (j=2;j<=mfit;j++)
			
 
				-		for (k=1;k<j;k++)
			
 
				-			covar[k][j]=covar[j][k];
			
 
				-//	printf("lfit : gaussj\n");	
			
 
				-	gaussj(covar,mfit,beta,1);
			
 
				-//	printf("lfit1\n");
			
 
				-	for (j=0,l=1;l<=ma;l++)
			
 
				-		if (ia[l]) a[l]=beta[++j][1];
			
 
				-//	printf("lfit2\n");
			
 
				-	*chisq=0.0;
			
 
				-
			
 
				-	for (i=1;i<=ndat;i++) {
			
 
				-		(*funcs)(x[i],afunc,ma);
			
 
				-		for (sum=0.0,j=1;j<=ma;j++) sum += a[j]*afunc[j];
			
 
				-		*chisq += SQR((y[i]-sum)/sig[i]);
			
 
				-	}
			
 
				-	covsrt(covar,ma,ia,mfit);
			
 
				-	free_vector(afunc,1,ma);
			
 
				-	free_matrix(beta,1,ma,1,1);
			
 
				-}
			
 
				-#undef NRANSI
			
--- a/examples/pastix-wrappers/models/num_recipes/nr.h
+++ b/examples/pastix-wrappers/models/num_recipes/nr.h
@@ -1,530 +0,0 @@
 
				-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
			
 
				-   utility file nr.h.  Do not confuse this file with the same-named
			
 
				-   file nr.h that is supplied in the 'misc' subdirectory.
			
 
				-   *That* file is the one from the book, and contains both ANSI and
			
 
				-   traditional K&R versions, along with #ifdef macros to select the
			
 
				-   correct version.  *This* file contains only ANSI C.               */
			
 
				-
			
 
				-#ifndef _NR_H_
			
 
				-#define _NR_H_
			
 
				-
			
 
				-#define REAL double
			
 
				-
			
 
				-
			
 
				-#ifndef _FCOMPLEX_DECLARE_T_
			
 
				-typedef struct FCOMPLEX {REAL r,i;} fcomplex;
			
 
				-#define _FCOMPLEX_DECLARE_T_
			
 
				-#endif /* _FCOMPLEX_DECLARE_T_ */
			
 
				-
			
 
				-#ifndef _ARITHCODE_DECLARE_T_
			
 
				-typedef struct {
			
 
				-	unsigned long *ilob,*iupb,*ncumfq,jdif,nc,minint,nch,ncum,nrad;
			
 
				-} arithcode;
			
 
				-#define _ARITHCODE_DECLARE_T_
			
 
				-#endif /* _ARITHCODE_DECLARE_T_ */
			
 
				-
			
 
				-#ifndef _HUFFCODE_DECLARE_T_
			
 
				-typedef struct {
			
 
				-	unsigned long *icod,*ncod,*left,*right,nch,nodemax;
			
 
				-} huffcode;
			
 
				-#define _HUFFCODE_DECLARE_T_
			
 
				-#endif /* _HUFFCODE_DECLARE_T_ */
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-
			
 
				-void addint(double **uf, double **uc, double **res, int nf);
			
 
				-void airy(REAL x, REAL *ai, REAL *bi, REAL *aip, REAL *bip);
			
 
				-void amebsa(REAL **p, REAL y[], int ndim, REAL pb[],	REAL *yb,
			
 
				-	REAL ftol, REAL (*funk)(REAL []), int *iter, REAL temptr);
			
 
				-void amoeba(REAL **p, REAL y[], int ndim, REAL ftol,
			
 
				-	REAL (*funk)(REAL []), int *iter);
			
 
				-REAL amotry(REAL **p, REAL y[], REAL psum[], int ndim,
			
 
				-	REAL (*funk)(REAL []), int ihi, REAL fac);
			
 
				-REAL amotsa(REAL **p, REAL y[], REAL psum[], int ndim, REAL pb[],
			
 
				-	REAL *yb, REAL (*funk)(REAL []), int ihi, REAL *yhi, REAL fac);
			
 
				-void anneal(REAL x[], REAL y[], int iorder[], int ncity);
			
 
				-double anorm2(double **a, int n);
			
 
				-void arcmak(unsigned long nfreq[], unsigned long nchh, unsigned long nradd,
			
 
				-	arithcode *acode);
			
 
				-void arcode(unsigned long *ich, unsigned char **codep, unsigned long *lcode,
			
 
				-	unsigned long *lcd, int isign, arithcode *acode);
			
 
				-void arcsum(unsigned long iin[], unsigned long iout[], unsigned long ja,
			
 
				-	int nwk, unsigned long nrad, unsigned long nc);
			
 
				-void asolve(unsigned long n, double b[], double x[], int itrnsp);
			
 
				-void atimes(unsigned long n, double x[], double r[], int itrnsp);
			
 
				-void avevar(REAL data[], unsigned long n, REAL *ave, REAL *var);
			
 
				-void balanc(REAL **a, int n);
			
 
				-void banbks(REAL **a, unsigned long n, int m1, int m2, REAL **al,
			
 
				-	unsigned long indx[], REAL b[]);
			
 
				-void bandec(REAL **a, unsigned long n, int m1, int m2, REAL **al,
			
 
				-	unsigned long indx[], REAL *d);
			
 
				-void banmul(REAL **a, unsigned long n, int m1, int m2, REAL x[], REAL b[]);
			
 
				-void bcucof(REAL y[], REAL y1[], REAL y2[], REAL y12[], REAL d1,
			
 
				-	REAL d2, REAL **c);
			
 
				-void bcuint(REAL y[], REAL y1[], REAL y2[], REAL y12[],
			
 
				-	REAL x1l, REAL x1u, REAL x2l, REAL x2u, REAL x1,
			
 
				-	REAL x2, REAL *ansy, REAL *ansy1, REAL *ansy2);
			
 
				-void beschb(double x, double *gam1, double *gam2, double *gampl,
			
 
				-	double *gammi);
			
 
				-REAL bessi(int n, REAL x);
			
 
				-REAL bessi0(REAL x);
			
 
				-REAL bessi1(REAL x);
			
 
				-void bessik(REAL x, REAL xnu, REAL *ri, REAL *rk, REAL *rip,
			
 
				-	REAL *rkp);
			
 
				-REAL bessj(int n, REAL x);
			
 
				-REAL bessj0(REAL x);
			
 
				-REAL bessj1(REAL x);
			
 
				-void bessjy(REAL x, REAL xnu, REAL *rj, REAL *ry, REAL *rjp,
			
 
				-	REAL *ryp);
			
 
				-REAL bessk(int n, REAL x);
			
 
				-REAL bessk0(REAL x);
			
 
				-REAL bessk1(REAL x);
			
 
				-REAL bessy(int n, REAL x);
			
 
				-REAL bessy0(REAL x);
			
 
				-REAL bessy1(REAL x);
			
 
				-REAL beta(REAL z, REAL w);
			
 
				-REAL betacf(REAL a, REAL b, REAL x);
			
 
				-REAL betai(REAL a, REAL b, REAL x);
			
 
				-REAL bico(int n, int k);
			
 
				-void bksub(int ne, int nb, int jf, int k1, int k2, REAL ***c);
			
 
				-REAL bnldev(REAL pp, int n, long *idum);
			
 
				-REAL brent(REAL ax, REAL bx, REAL cx,
			
 
				-	REAL (*f)(REAL), REAL tol, REAL *xmin);
			
 
				-void broydn(REAL x[], int n, int *check,
			
 
				-	void (*vecfunc)(int, REAL [], REAL []));
			
 
				-void bsstep(REAL y[], REAL dydx[], int nv, REAL *xx, REAL htry,
			
 
				-	REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void caldat(long julian, int *mm, int *id, int *iyyy);
			
 
				-void chder(REAL a, REAL b, REAL c[], REAL cder[], int n);
			
 
				-REAL chebev(REAL a, REAL b, REAL c[], int m, REAL x);
			
 
				-void chebft(REAL a, REAL b, REAL c[], int n, REAL (*func)(REAL));
			
 
				-void chebpc(REAL c[], REAL d[], int n);
			
 
				-void chint(REAL a, REAL b, REAL c[], REAL cint[], int n);
			
 
				-REAL chixy(REAL bang);
			
 
				-void choldc(REAL **a, int n, REAL p[]);
			
 
				-void cholsl(REAL **a, int n, REAL p[], REAL b[], REAL x[]);
			
 
				-void chsone(REAL bins[], REAL ebins[], int nbins, int knstrn,
			
 
				-	REAL *df, REAL *chsq, REAL *prob);
			
 
				-void chstwo(REAL bins1[], REAL bins2[], int nbins, int knstrn,
			
 
				-	REAL *df, REAL *chsq, REAL *prob);
			
 
				-void cisi(REAL x, REAL *ci, REAL *si);
			
 
				-void cntab1(int **nn, int ni, int nj, REAL *chisq,
			
 
				-	REAL *df, REAL *prob, REAL *cramrv, REAL *ccc);
			
 
				-void cntab2(int **nn, int ni, int nj, REAL *h, REAL *hx, REAL *hy,
			
 
				-	REAL *hygx, REAL *hxgy, REAL *uygx, REAL *uxgy, REAL *uxy);
			
 
				-void convlv(REAL data[], unsigned long n, REAL respns[], unsigned long m,
			
 
				-	int isign, REAL ans[]);
			
 
				-void copy(double **aout, double **ain, int n);
			
 
				-void correl(REAL data1[], REAL data2[], unsigned long n, REAL ans[]);
			
 
				-void cosft(REAL y[], int n, int isign);
			
 
				-void cosft1(REAL y[], int n);
			
 
				-void cosft2(REAL y[], int n, int isign);
			
 
				-void covsrt(REAL **covar, int ma, int ia[], int mfit);
			
 
				-void crank(unsigned long n, REAL w[], REAL *s);
			
 
				-void cyclic(REAL a[], REAL b[], REAL c[], REAL alpha, REAL beta,
			
 
				-	REAL r[], REAL x[], unsigned long n);
			
 
				-void daub4(REAL a[], unsigned long n, int isign);
			
 
				-REAL dawson(REAL x);
			
 
				-REAL dbrent(REAL ax, REAL bx, REAL cx,
			
 
				-	REAL (*f)(REAL), REAL (*df)(REAL), REAL tol, REAL *xmin);
			
 
				-void ddpoly(REAL c[], int nc, REAL x, REAL pd[], int nd);
			
 
				-int decchk(char string[], int n, char *ch);
			
 
				-void derivs(REAL x, REAL y[], REAL dydx[]);
			
 
				-REAL df1dim(REAL x);
			
 
				-void dfour1(double data[], unsigned long nn, int isign);
			
 
				-void dfpmin(REAL p[], int n, REAL gtol, int *iter, REAL *fret,
			
 
				-	REAL (*func)(REAL []), void (*dfunc)(REAL [], REAL []));
			
 
				-REAL dfridr(REAL (*func)(REAL), REAL x, REAL h, REAL *err);
			
 
				-void dftcor(REAL w, REAL delta, REAL a, REAL b, REAL endpts[],
			
 
				-	REAL *corre, REAL *corim, REAL *corfac);
			
 
				-void dftint(REAL (*func)(REAL), REAL a, REAL b, REAL w,
			
 
				-	REAL *cosint, REAL *sinint);
			
 
				-void difeq(int k, int k1, int k2, int jsf, int is1, int isf,
			
 
				-	int indexv[], int ne, REAL **s, REAL **y);
			
 
				-void dlinmin(REAL p[], REAL xi[], int n, REAL *fret,
			
 
				-	REAL (*func)(REAL []), void (*dfunc)(REAL [], REAL[]));
			
 
				-double dpythag(double a, double b);
			
 
				-void drealft(double data[], unsigned long n, int isign);
			
 
				-void dsprsax(double sa[], unsigned long ija[], double x[], double b[],
			
 
				-	unsigned long n);
			
 
				-void dsprstx(double sa[], unsigned long ija[], double x[], double b[],
			
 
				-	unsigned long n);
			
 
				-void dsvbksb(double **u, double w[], double **v, int m, int n, double b[],
			
 
				-	double x[]);
			
 
				-void dsvdcmp(double **a, int m, int n, double w[], double **v);
			
 
				-void eclass(int nf[], int n, int lista[], int listb[], int m);
			
 
				-void eclazz(int nf[], int n, int (*equiv)(int, int));
			
 
				-REAL ei(REAL x);
			
 
				-void eigsrt(REAL d[], REAL **v, int n);
			
 
				-REAL elle(REAL phi, REAL ak);
			
 
				-REAL ellf(REAL phi, REAL ak);
			
 
				-REAL ellpi(REAL phi, REAL en, REAL ak);
			
 
				-void elmhes(REAL **a, int n);
			
 
				-REAL erfcc(REAL x);
			
 
				-//REAL erff(REAL x);
			
 
				-REAL erffc(REAL x);
			
 
				-void eulsum(REAL *sum, REAL term, int jterm, REAL wksp[]);
			
 
				-REAL evlmem(REAL fdt, REAL d[], int m, REAL xms);
			
 
				-REAL expdev(long *idum);
			
 
				-REAL expint(int n, REAL x);
			
 
				-REAL f1(REAL x);
			
 
				-REAL f1dim(REAL x);
			
 
				-REAL f2(REAL y);
			
 
				-REAL f3(REAL z);
			
 
				-REAL factln(int n);
			
 
				-REAL factrl(int n);
			
 
				-void fasper(REAL x[], REAL y[], unsigned long n, REAL ofac, REAL hifac,
			
 
				-	REAL wk1[], REAL wk2[], unsigned long nwk, unsigned long *nout,
			
 
				-	unsigned long *jmax, REAL *prob);
			
 
				-void fdjac(int n, REAL x[], REAL fvec[], REAL **df,
			
 
				-	void (*vecfunc)(int, REAL [], REAL []));
			
 
				-void fgauss(REAL x, REAL a[], REAL *y, REAL dyda[], int na);
			
 
				-void fill0(double **u, int n);
			
 
				-void fit(REAL x[], REAL y[], int ndata, REAL sig[], int mwt,
			
 
				-	REAL *a, REAL *b, REAL *siga, REAL *sigb, REAL *chi2, REAL *q);
			
 
				-void fitexy(REAL x[], REAL y[], int ndat, REAL sigx[], REAL sigy[],
			
 
				-	REAL *a, REAL *b, REAL *siga, REAL *sigb, REAL *chi2, REAL *q);
			
 
				-void fixrts(REAL d[], int m);
			
 
				-void fleg(REAL x, REAL pl[], int nl);
			
 
				-void flmoon(int n, int nph, long *jd, REAL *frac);
			
 
				-REAL fmin(REAL x[]);
			
 
				-void four1(REAL data[], unsigned long nn, int isign);
			
 
				-void fourew(FILE *file[5], int *na, int *nb, int *nc, int *nd);
			
 
				-void fourfs(FILE *file[5], unsigned long nn[], int ndim, int isign);
			
 
				-void fourn(REAL data[], unsigned long nn[], int ndim, int isign);
			
 
				-void fpoly(REAL x, REAL p[], int np);
			
 
				-void fred2(int n, REAL a, REAL b, REAL t[], REAL f[], REAL w[],
			
 
				-	REAL (*g)(REAL), REAL (*ak)(REAL, REAL));
			
 
				-REAL fredin(REAL x, int n, REAL a, REAL b, REAL t[], REAL f[], REAL w[],
			
 
				-	REAL (*g)(REAL), REAL (*ak)(REAL, REAL));
			
 
				-void frenel(REAL x, REAL *s, REAL *c);
			
 
				-void frprmn(REAL p[], int n, REAL ftol, int *iter, REAL *fret,
			
 
				-	REAL (*func)(REAL []), void (*dfunc)(REAL [], REAL []));
			
 
				-void ftest(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
			
 
				-	REAL *f, REAL *prob);
			
 
				-REAL gamdev(int ia, long *idum);
			
 
				-REAL gammln(REAL xx);
			
 
				-REAL gammp(REAL a, REAL x);
			
 
				-REAL gammq(REAL a, REAL x);
			
 
				-REAL gasdev(long *idum);
			
 
				-void gaucof(int n, REAL a[], REAL b[], REAL amu0, REAL x[], REAL w[]);
			
 
				-void gauher(REAL x[], REAL w[], int n);
			
 
				-void gaujac(REAL x[], REAL w[], int n, REAL alf, REAL bet);
			
 
				-void gaulag(REAL x[], REAL w[], int n, REAL alf);
			
 
				-void gauleg(REAL x1, REAL x2, REAL x[], REAL w[], int n);
			
 
				-void gaussj(REAL **a, int n, REAL **b, int m);
			
 
				-void gcf(REAL *gammcf, REAL a, REAL x, REAL *gln);
			
 
				-REAL golden(REAL ax, REAL bx, REAL cx, REAL (*f)(REAL), REAL tol,
			
 
				-	REAL *xmin);
			
 
				-void gser(REAL *gamser, REAL a, REAL x, REAL *gln);
			
 
				-void hpsel(unsigned long m, unsigned long n, REAL arr[], REAL heap[]);
			
 
				-void hpsort(unsigned long n, REAL ra[]);
			
 
				-void hqr(REAL **a, int n, REAL wr[], REAL wi[]);
			
 
				-void hufapp(unsigned long index[], unsigned long nprob[], unsigned long n,
			
 
				-	unsigned long i);
			
 
				-void hufdec(unsigned long *ich, unsigned char *code, unsigned long lcode,
			
 
				-	unsigned long *nb, huffcode *hcode);
			
 
				-void hufenc(unsigned long ich, unsigned char **codep, unsigned long *lcode,
			
 
				-	unsigned long *nb, huffcode *hcode);
			
 
				-void hufmak(unsigned long nfreq[], unsigned long nchin, unsigned long *ilong,
			
 
				-	unsigned long *nlong, huffcode *hcode);
			
 
				-void hunt(REAL xx[], unsigned long n, REAL x, unsigned long *jlo);
			
 
				-void hypdrv(REAL s, REAL yy[], REAL dyyds[]);
			
 
				-fcomplex hypgeo(fcomplex a, fcomplex b, fcomplex c, fcomplex z);
			
 
				-void hypser(fcomplex a, fcomplex b, fcomplex c, fcomplex z,
			
 
				-	fcomplex *series, fcomplex *deriv);
			
 
				-unsigned short icrc(unsigned short crc, unsigned char *bufptr,
			
 
				-	unsigned long len, short jinit, int jrev);
			
 
				-unsigned short icrc1(unsigned short crc, unsigned char onech);
			
 
				-unsigned long igray(unsigned long n, int is);
			
 
				-void iindexx(unsigned long n, long arr[], unsigned long indx[]);
			
 
				-void indexx(unsigned long n, REAL arr[], unsigned long indx[]);
			
 
				-void interp(double **uf, double **uc, int nf);
			
 
				-int irbit1(unsigned long *iseed);
			
 
				-int irbit2(unsigned long *iseed);
			
 
				-void jacobi(REAL **a, int n, REAL d[], REAL **v, int *nrot);
			
 
				-void jacobn(REAL x, REAL y[], REAL dfdx[], REAL **dfdy, int n);
			
 
				-long julday(int mm, int id, int iyyy);
			
 
				-void kendl1(REAL data1[], REAL data2[], unsigned long n, REAL *tau, REAL *z,
			
 
				-	REAL *prob);
			
 
				-void kendl2(REAL **tab, int i, int j, REAL *tau, REAL *z, REAL *prob);
			
 
				-void kermom(double w[], double y, int m);
			
 
				-void ks2d1s(REAL x1[], REAL y1[], unsigned long n1,
			
 
				-	void (*quadvl)(REAL, REAL, REAL *, REAL *, REAL *, REAL *),
			
 
				-	REAL *d1, REAL *prob);
			
 
				-void ks2d2s(REAL x1[], REAL y1[], unsigned long n1, REAL x2[], REAL y2[],
			
 
				-	unsigned long n2, REAL *d, REAL *prob);
			
 
				-void ksone(REAL data[], unsigned long n, REAL (*func)(REAL), REAL *d,
			
 
				-	REAL *prob);
			
 
				-void kstwo(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
			
 
				-	REAL *d, REAL *prob);
			
 
				-void laguer(fcomplex a[], int m, fcomplex *x, int *its);
			
 
				-void lfit(REAL x[], REAL y[], REAL sig[], int ndat, REAL a[], int ia[],
			
 
				-	int ma, REAL **covar, REAL *chisq, void (*funcs)(REAL, REAL [], int));
			
 
				-void linbcg(unsigned long n, double b[], double x[], int itol, double tol,
			
 
				-	 int itmax, int *iter, double *err);
			
 
				-void linmin(REAL p[], REAL xi[], int n, REAL *fret,
			
 
				-	REAL (*func)(REAL []));
			
 
				-void lnsrch(int n, REAL xold[], REAL fold, REAL g[], REAL p[], REAL x[],
			
 
				-	 REAL *f, REAL stpmax, int *check, REAL (*func)(REAL []));
			
 
				-void load(REAL x1, REAL v[], REAL y[]);
			
 
				-void load1(REAL x1, REAL v1[], REAL y[]);
			
 
				-void load2(REAL x2, REAL v2[], REAL y[]);
			
 
				-void locate(REAL xx[], unsigned long n, REAL x, unsigned long *j);
			
 
				-void lop(double **out, double **u, int n);
			
 
				-void lubksb(REAL **a, int n, int *indx, REAL b[]);
			
 
				-void ludcmp(REAL **a, int n, int *indx, REAL *d);
			
 
				-void machar(int *ibeta, int *it, int *irnd, int *ngrd,
			
 
				-	int *machep, int *negep, int *iexp, int *minexp, int *maxexp,
			
 
				-	REAL *eps, REAL *epsneg, REAL *xmin, REAL *xmax);
			
 
				-void matadd(double **a, double **b, double **c, int n);
			
 
				-void matsub(double **a, double **b, double **c, int n);
			
 
				-void medfit(REAL x[], REAL y[], int ndata, REAL *a, REAL *b, REAL *abdev);
			
 
				-void memcof(REAL data[], int n, int m, REAL *xms, REAL d[]);
			
 
				-int metrop(REAL de, REAL t);
			
 
				-void mgfas(double **u, int n, int maxcyc);
			
 
				-void mglin(double **u, int n, int ncycle);
			
 
				-REAL midexp(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
			
 
				-REAL midinf(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
			
 
				-REAL midpnt(REAL (*func)(REAL), REAL a, REAL b, int n);
			
 
				-REAL midsql(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
			
 
				-REAL midsqu(REAL (*funk)(REAL), REAL aa, REAL bb, int n);
			
 
				-void miser(REAL (*func)(REAL []), REAL regn[], int ndim, unsigned long npts,
			
 
				-	REAL dith, REAL *ave, REAL *var);
			
 
				-void mmid(REAL y[], REAL dydx[], int nvar, REAL xs, REAL htot,
			
 
				-	int nstep, REAL yout[], void (*derivs)(REAL, REAL[], REAL[]));
			
 
				-void mnbrak(REAL *ax, REAL *bx, REAL *cx, REAL *fa, REAL *fb,
			
 
				-	REAL *fc, REAL (*func)(REAL));
			
 
				-void mnewt(int ntrial, REAL x[], int n, REAL tolx, REAL tolf);
			
 
				-void moment(REAL data[], int n, REAL *ave, REAL *adev, REAL *sdev,
			
 
				-	REAL *var, REAL *skew, REAL *curt);
			
 
				-void mp2dfr(unsigned char a[], unsigned char s[], int n, int *m);
			
 
				-void mpadd(unsigned char w[], unsigned char u[], unsigned char v[], int n);
			
 
				-void mpdiv(unsigned char q[], unsigned char r[], unsigned char u[],
			
 
				-	unsigned char v[], int n, int m);
			
 
				-void mpinv(unsigned char u[], unsigned char v[], int n, int m);
			
 
				-void mplsh(unsigned char u[], int n);
			
 
				-void mpmov(unsigned char u[], unsigned char v[], int n);
			
 
				-void mpmul(unsigned char w[], unsigned char u[], unsigned char v[], int n,
			
 
				-	int m);
			
 
				-void mpneg(unsigned char u[], int n);
			
 
				-void mppi(int n);
			
 
				-void mprove(REAL **a, REAL **alud, int n, int indx[], REAL b[],
			
 
				-	REAL x[]);
			
 
				-void mpsad(unsigned char w[], unsigned char u[], int n, int iv);
			
 
				-void mpsdv(unsigned char w[], unsigned char u[], int n, int iv, int *ir);
			
 
				-void mpsmu(unsigned char w[], unsigned char u[], int n, int iv);
			
 
				-void mpsqrt(unsigned char w[], unsigned char u[], unsigned char v[], int n,
			
 
				-	int m);
			
 
				-void mpsub(int *is, unsigned char w[], unsigned char u[], unsigned char v[],
			
 
				-	int n);
			
 
				-void mrqcof(REAL x[], REAL y[], REAL sig[], int ndata, REAL a[],
			
 
				-	int ia[], int ma, REAL **alpha, REAL beta[], REAL *chisq,
			
 
				-	void (*funcs)(REAL, REAL [], REAL *, REAL [], int));
			
 
				-void mrqmin(REAL x[], REAL y[], REAL sig[], int ndata, REAL a[],
			
 
				-	int ia[], int ma, REAL **covar, REAL **alpha, REAL *chisq,
			
 
				-	void (*funcs)(REAL, REAL [], REAL *, REAL [], int), REAL *alamda);
			
 
				-void newt(REAL x[], int n, int *check,
			
 
				-	void (*vecfunc)(int, REAL [], REAL []));
			
 
				-void odeint(REAL ystart[], int nvar, REAL x1, REAL x2,
			
 
				-	REAL eps, REAL h1, REAL hmin, int *nok, int *nbad,
			
 
				-	void (*derivs)(REAL, REAL [], REAL []),
			
 
				-	void (*rkqs)(REAL [], REAL [], int, REAL *, REAL, REAL,
			
 
				-	REAL [], REAL *, REAL *, void (*)(REAL, REAL [], REAL [])));
			
 
				-void orthog(int n, REAL anu[], REAL alpha[], REAL beta[], REAL a[],
			
 
				-	REAL b[]);
			
 
				-void pade(double cof[], int n, REAL *resid);
			
 
				-void pccheb(REAL d[], REAL c[], int n);
			
 
				-void pcshft(REAL a, REAL b, REAL d[], int n);
			
 
				-void pearsn(REAL x[], REAL y[], unsigned long n, REAL *r, REAL *prob,
			
 
				-	REAL *z);
			
 
				-void period(REAL x[], REAL y[], int n, REAL ofac, REAL hifac,
			
 
				-	REAL px[], REAL py[], int np, int *nout, int *jmax, REAL *prob);
			
 
				-void piksr2(int n, REAL arr[], REAL brr[]);
			
 
				-void piksrt(int n, REAL arr[]);
			
 
				-void pinvs(int ie1, int ie2, int je1, int jsf, int jc1, int k,
			
 
				-	REAL ***c, REAL **s);
			
 
				-REAL plgndr(int l, int m, REAL x);
			
 
				-REAL poidev(REAL xm, long *idum);
			
 
				-void polcoe(REAL x[], REAL y[], int n, REAL cof[]);
			
 
				-void polcof(REAL xa[], REAL ya[], int n, REAL cof[]);
			
 
				-void poldiv(REAL u[], int n, REAL v[], int nv, REAL q[], REAL r[]);
			
 
				-void polin2(REAL x1a[], REAL x2a[], REAL **ya, int m, int n,
			
 
				-	REAL x1, REAL x2, REAL *y, REAL *dy);
			
 
				-void polint(REAL xa[], REAL ya[], int n, REAL x, REAL *y, REAL *dy);
			
 
				-void powell(REAL p[], REAL **xi, int n, REAL ftol, int *iter, REAL *fret,
			
 
				-	REAL (*func)(REAL []));
			
 
				-void predic(REAL data[], int ndata, REAL d[], int m, REAL future[], int nfut);
			
 
				-REAL probks(REAL alam);
			
 
				-void psdes(unsigned long *lword, unsigned long *irword);
			
 
				-void pwt(REAL a[], unsigned long n, int isign);
			
 
				-void pwtset(int n);
			
 
				-REAL pythag(REAL a, REAL b);
			
 
				-void pzextr(int iest, REAL xest, REAL yest[], REAL yz[], REAL dy[],
			
 
				-	int nv);
			
 
				-REAL qgaus(REAL (*func)(REAL), REAL a, REAL b);
			
 
				-void qrdcmp(REAL **a, int n, REAL *c, REAL *d, int *sing);
			
 
				-REAL qromb(REAL (*func)(REAL), REAL a, REAL b);
			
 
				-REAL qromo(REAL (*func)(REAL), REAL a, REAL b,
			
 
				-	REAL (*choose)(REAL (*)(REAL), REAL, REAL, int));
			
 
				-void qroot(REAL p[], int n, REAL *b, REAL *c, REAL eps);
			
 
				-void qrsolv(REAL **a, int n, REAL c[], REAL d[], REAL b[]);
			
 
				-void qrupdt(REAL **r, REAL **qt, int n, REAL u[], REAL v[]);
			
 
				-REAL qsimp(REAL (*func)(REAL), REAL a, REAL b);
			
 
				-REAL qtrap(REAL (*func)(REAL), REAL a, REAL b);
			
 
				-REAL quad3d(REAL (*func)(REAL, REAL, REAL), REAL x1, REAL x2);
			
 
				-void quadct(REAL x, REAL y, REAL xx[], REAL yy[], unsigned long nn,
			
 
				-	REAL *fa, REAL *fb, REAL *fc, REAL *fd);
			
 
				-void quadmx(REAL **a, int n);
			
 
				-void quadvl(REAL x, REAL y, REAL *fa, REAL *fb, REAL *fc, REAL *fd);
			
 
				-REAL ran0(long *idum);
			
 
				-REAL ran1(long *idum);
			
 
				-REAL ran2(long *idum);
			
 
				-REAL ran3(long *idum);
			
 
				-REAL ran4(long *idum);
			
 
				-void rank(unsigned long n, unsigned long indx[], unsigned long irank[]);
			
 
				-void ranpt(REAL pt[], REAL regn[], int n);
			
 
				-void ratint(REAL xa[], REAL ya[], int n, REAL x, REAL *y, REAL *dy);
			
 
				-void ratlsq(double (*fn)(double), double a, double b, int mm, int kk,
			
 
				-	double cof[], double *dev);
			
 
				-double ratval(double x, double cof[], int mm, int kk);
			
 
				-REAL rc(REAL x, REAL y);
			
 
				-REAL rd(REAL x, REAL y, REAL z);
			
 
				-void realft(REAL data[], unsigned long n, int isign);
			
 
				-void rebin(REAL rc, int nd, REAL r[], REAL xin[], REAL xi[]);
			
 
				-void red(int iz1, int iz2, int jz1, int jz2, int jm1, int jm2, int jmf,
			
 
				-	int ic1, int jc1, int jcf, int kc, REAL ***c, REAL **s);
			
 
				-void relax(double **u, double **rhs, int n);
			
 
				-void relax2(double **u, double **rhs, int n);
			
 
				-void resid(double **res, double **u, double **rhs, int n);
			
 
				-REAL revcst(REAL x[], REAL y[], int iorder[], int ncity, int n[]);
			
 
				-void reverse(int iorder[], int ncity, int n[]);
			
 
				-REAL rf(REAL x, REAL y, REAL z);
			
 
				-REAL rj(REAL x, REAL y, REAL z, REAL p);
			
 
				-void rk4(REAL y[], REAL dydx[], int n, REAL x, REAL h, REAL yout[],
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void rkck(REAL y[], REAL dydx[], int n, REAL x, REAL h,
			
 
				-	REAL yout[], REAL yerr[], void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void rkdumb(REAL vstart[], int nvar, REAL x1, REAL x2, int nstep,
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void rkqs(REAL y[], REAL dydx[], int n, REAL *x,
			
 
				-	REAL htry, REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void rlft3(REAL ***data, REAL **speq, unsigned long nn1,
			
 
				-	unsigned long nn2, unsigned long nn3, int isign);
			
 
				-REAL rofunc(REAL b);
			
 
				-void rotate(REAL **r, REAL **qt, int n, int i, REAL a, REAL b);
			
 
				-void rsolv(REAL **a, int n, REAL d[], REAL b[]);
			
 
				-void rstrct(double **uc, double **uf, int nc);
			
 
				-REAL rtbis(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
			
 
				-REAL rtflsp(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
			
 
				-REAL rtnewt(void (*funcd)(REAL, REAL *, REAL *), REAL x1, REAL x2,
			
 
				-	REAL xacc);
			
 
				-REAL rtsafe(void (*funcd)(REAL, REAL *, REAL *), REAL x1, REAL x2,
			
 
				-	REAL xacc);
			
 
				-REAL rtsec(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
			
 
				-void rzextr(int iest, REAL xest, REAL yest[], REAL yz[], REAL dy[], int nv);
			
 
				-void savgol(REAL c[], int np, int nl, int nr, int ld, int m);
			
 
				-void score(REAL xf, REAL y[], REAL f[]);
			
 
				-void scrsho(REAL (*fx)(REAL));
			
 
				-REAL select_(unsigned long k, unsigned long n, REAL arr[]);
			
 
				-REAL selip(unsigned long k, unsigned long n, REAL arr[]);
			
 
				-void shell(unsigned long n, REAL a[]);
			
 
				-void shoot(int n, REAL v[], REAL f[]);
			
 
				-void shootf(int n, REAL v[], REAL f[]);
			
 
				-void simp1(REAL **a, int mm, int ll[], int nll, int iabf, int *kp,
			
 
				-	REAL *bmax);
			
 
				-void simp2(REAL **a, int n, int l2[], int nl2, int *ip, int kp, REAL *q1);
			
 
				-void simp3(REAL **a, int i1, int k1, int ip, int kp);
			
 
				-void simplx(REAL **a, int m, int n, int m1, int m2, int m3, int *icase,
			
 
				-	int izrov[], int iposv[]);
			
 
				-void simpr(REAL y[], REAL dydx[], REAL dfdx[], REAL **dfdy,
			
 
				-	int n, REAL xs, REAL htot, int nstep, REAL yout[],
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void sinft(REAL y[], int n);
			
 
				-void slvsm2(double **u, double **rhs);
			
 
				-void slvsml(double **u, double **rhs);
			
 
				-void sncndn(REAL uu, REAL emmc, REAL *sn, REAL *cn, REAL *dn);
			
 
				-double snrm(unsigned long n, double sx[], int itol);
			
 
				-void sobseq(int *n, REAL x[]);
			
 
				-void solvde(int itmax, REAL conv, REAL slowc, REAL scalv[],
			
 
				-	int indexv[], int ne, int nb, int m, REAL **y, REAL ***c, REAL **s);
			
 
				-void sor(double **a, double **b, double **c, double **d, double **e,
			
 
				-	double **f, double **u, int jmax, double rjac);
			
 
				-void sort(unsigned long n, REAL arr[]);
			
 
				-void sort2(unsigned long n, REAL arr[], REAL brr[]);
			
 
				-void sort3(unsigned long n, REAL ra[], REAL rb[], REAL rc[]);
			
 
				-void spctrm(FILE *fp, REAL p[], int m, int k, int ovrlap);
			
 
				-void spear(REAL data1[], REAL data2[], unsigned long n, REAL *d, REAL *zd,
			
 
				-	REAL *probd, REAL *rs, REAL *probrs);
			
 
				-void sphbes(int n, REAL x, REAL *sj, REAL *sy, REAL *sjp, REAL *syp);
			
 
				-void splie2(REAL x1a[], REAL x2a[], REAL **ya, int m, int n, REAL **y2a);
			
 
				-void splin2(REAL x1a[], REAL x2a[], REAL **ya, REAL **y2a, int m, int n,
			
 
				-	REAL x1, REAL x2, REAL *y);
			
 
				-void spline(REAL x[], REAL y[], int n, REAL yp1, REAL ypn, REAL y2[]);
			
 
				-void splint(REAL xa[], REAL ya[], REAL y2a[], int n, REAL x, REAL *y);
			
 
				-void spread(REAL y, REAL yy[], unsigned long n, REAL x, int m);
			
 
				-void sprsax(REAL sa[], unsigned long ija[], REAL x[], REAL b[],
			
 
				-	unsigned long n);
			
 
				-void sprsin(REAL **a, int n, REAL thresh, unsigned long nmax, REAL sa[],
			
 
				-	unsigned long ija[]);
			
 
				-void sprspm(REAL sa[], unsigned long ija[], REAL sb[], unsigned long ijb[],
			
 
				-	REAL sc[], unsigned long ijc[]);
			
 
				-void sprstm(REAL sa[], unsigned long ija[], REAL sb[], unsigned long ijb[],
			
 
				-	REAL thresh, unsigned long nmax, REAL sc[], unsigned long ijc[]);
			
 
				-void sprstp(REAL sa[], unsigned long ija[], REAL sb[], unsigned long ijb[]);
			
 
				-void sprstx(REAL sa[], unsigned long ija[], REAL x[], REAL b[],
			
 
				-	unsigned long n);
			
 
				-void stifbs(REAL y[], REAL dydx[], int nv, REAL *xx,
			
 
				-	REAL htry, REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void stiff(REAL y[], REAL dydx[], int n, REAL *x,
			
 
				-	REAL htry, REAL eps, REAL yscal[], REAL *hdid, REAL *hnext,
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void stoerm(REAL y[], REAL d2y[], int nv, REAL xs,
			
 
				-	REAL htot, int nstep, REAL yout[],
			
 
				-	void (*derivs)(REAL, REAL [], REAL []));
			
 
				-void svbksb(REAL **u, REAL w[], REAL **v, int m, int n, REAL b[],
			
 
				-	REAL x[]);
			
 
				-void svdcmp(REAL **a, int m, int n, REAL w[], REAL **v);
			
 
				-void svdfit(REAL x[], REAL y[], REAL sig[], int ndata, REAL a[],
			
 
				-	int ma, REAL **u, REAL **v, REAL w[], REAL *chisq,
			
 
				-	void (*funcs)(REAL, REAL [], int));
			
 
				-void svdvar(REAL **v, int ma, REAL w[], REAL **cvm);
			
 
				-void toeplz(REAL r[], REAL x[], REAL y[], int n);
			
 
				-void tptest(REAL data1[], REAL data2[], unsigned long n, REAL *t, REAL *prob);
			
 
				-void tqli(REAL d[], REAL e[], int n, REAL **z);
			
 
				-REAL trapzd(REAL (*func)(REAL), REAL a, REAL b, int n);
			
 
				-void tred2(REAL **a, int n, REAL d[], REAL e[]);
			
 
				-void tridag(REAL a[], REAL b[], REAL c[], REAL r[], REAL u[],
			
 
				-	unsigned long n);
			
 
				-REAL trncst(REAL x[], REAL y[], int iorder[], int ncity, int n[]);
			
 
				-void trnspt(int iorder[], int ncity, int n[]);
			
 
				-void ttest(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
			
 
				-	REAL *t, REAL *prob);
			
 
				-void tutest(REAL data1[], unsigned long n1, REAL data2[], unsigned long n2,
			
 
				-	REAL *t, REAL *prob);
			
 
				-void twofft(REAL data1[], REAL data2[], REAL fft1[], REAL fft2[],
			
 
				-	unsigned long n);
			
 
				-void vander(double x[], double w[], double q[], int n);
			
 
				-void vegas(REAL regn[], int ndim, REAL (*fxn)(REAL [], REAL), int init,
			
 
				-	unsigned long ncall, int itmx, int nprn, REAL *tgral, REAL *sd,
			
 
				-	REAL *chi2a);
			
 
				-void voltra(int n, int m, REAL t0, REAL h, REAL *t, REAL **f,
			
 
				-	REAL (*g)(int, REAL), REAL (*ak)(int, int, REAL, REAL));
			
 
				-void wt1(REAL a[], unsigned long n, int isign,
			
 
				-	void (*wtstep)(REAL [], unsigned long, int));
			
 
				-void wtn(REAL a[], unsigned long nn[], int ndim, int isign,
			
 
				-	void (*wtstep)(REAL [], unsigned long, int));
			
 
				-void wwghts(REAL wghts[], int n, REAL h,
			
 
				-	void (*kermom)(double [], double ,int));
			
 
				-int zbrac(REAL (*func)(REAL), REAL *x1, REAL *x2);
			
 
				-void zbrak(REAL (*fx)(REAL), REAL x1, REAL x2, int n, REAL xb1[],
			
 
				-	REAL xb2[], int *nb);
			
 
				-REAL zbrent(REAL (*func)(REAL), REAL x1, REAL x2, REAL tol);
			
 
				-void zrhqr(REAL a[], int m, REAL rtr[], REAL rti[]);
			
 
				-REAL zriddr(REAL (*func)(REAL), REAL x1, REAL x2, REAL xacc);
			
 
				-void zroots(fcomplex a[], int m, fcomplex roots[], int polish);
			
 
				-
			
 
				-#endif /* _NR_H_ */
			
--- a/examples/pastix-wrappers/models/num_recipes/nrutil.c
+++ b/examples/pastix-wrappers/models/num_recipes/nrutil.c
@@ -1,295 +0,0 @@
 
				-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
			
 
				-   utility file nrutil.c.  Do not confuse this file with the same-named
			
 
				-   file nrutil.c that is supplied in the 'misc' subdirectory.
			
 
				-   *That* file is the one from the book, and contains both ANSI and
			
 
				-   traditional K&R versions, along with #ifdef macros to select the
			
 
				-   correct version.  *This* file contains only ANSI C.               */
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-#include <stddef.h>
			
 
				-#include <stdlib.h>
			
 
				-#define NR_END 1
			
 
				-#define FREE_ARG char*
			
 
				-#define REAL float
			
 
				-
			
 
				-
			
 
				-void nrerror(char error_text[])
			
 
				-/* Numerical Recipes standard error handler */
			
 
				-{
			
 
				-	fprintf(stderr,"Numerical Recipes run-time error...\n");
			
 
				-	fprintf(stderr,"%s\n",error_text);
			
 
				-	fprintf(stderr,"...now exiting to system...\n");
			
 
				-	exit(1);
			
 
				-}
			
 
				-
			
 
				-REAL *vector(long nl, long nh)
			
 
				-/* allocate a REAL vector with subscript range v[nl..nh] */
			
 
				-{
			
 
				-	REAL *v;
			
 
				-
			
 
				-	v=(REAL *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(REAL)));
			
 
				-	if (!v) nrerror("allocation failure in vector()");
			
 
				-	return v-nl+NR_END;
			
 
				-}
			
 
				-
			
 
				-int *ivector(long nl, long nh)
			
 
				-/* allocate an int vector with subscript range v[nl..nh] */
			
 
				-{
			
 
				-	int *v;
			
 
				-
			
 
				-	v=(int *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(int)));
			
 
				-	if (!v) nrerror("allocation failure in ivector()");
			
 
				-	return v-nl+NR_END;
			
 
				-}
			
 
				-
			
 
				-unsigned char *cvector(long nl, long nh)
			
 
				-/* allocate an unsigned char vector with subscript range v[nl..nh] */
			
 
				-{
			
 
				-	unsigned char *v;
			
 
				-
			
 
				-	v=(unsigned char *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(unsigned char)));
			
 
				-	if (!v) nrerror("allocation failure in cvector()");
			
 
				-	return v-nl+NR_END;
			
 
				-}
			
 
				-
			
 
				-unsigned long *lvector(long nl, long nh)
			
 
				-/* allocate an unsigned long vector with subscript range v[nl..nh] */
			
 
				-{
			
 
				-	unsigned long *v;
			
 
				-
			
 
				-	v=(unsigned long *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(long)));
			
 
				-	if (!v) nrerror("allocation failure in lvector()");
			
 
				-	return v-nl+NR_END;
			
 
				-}
			
 
				-
			
 
				-double *dvector(long nl, long nh)
			
 
				-/* allocate a double vector with subscript range v[nl..nh] */
			
 
				-{
			
 
				-	double *v;
			
 
				-
			
 
				-	v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
			
 
				-	if (!v) nrerror("allocation failure in dvector()");
			
 
				-	return v-nl+NR_END;
			
 
				-}
			
 
				-
			
 
				-REAL **matrix(long nrl, long nrh, long ncl, long nch)
			
 
				-/* allocate a REAL matrix with subscript range m[nrl..nrh][ncl..nch] */
			
 
				-{
			
 
				-	long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
			
 
				-	REAL **m;
			
 
				-
			
 
				-	/* allocate pointers to rows */
			
 
				-	m=(REAL **) malloc((size_t)((nrow+NR_END)*sizeof(REAL*)));
			
 
				-	if (!m) nrerror("allocation failure 1 in matrix()");
			
 
				-	m += NR_END;
			
 
				-	m -= nrl;
			
 
				-
			
 
				-	/* allocate rows and set pointers to them */
			
 
				-	m[nrl]=(REAL *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(REAL)));
			
 
				-	if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
			
 
				-	m[nrl] += NR_END;
			
 
				-	m[nrl] -= ncl;
			
 
				-
			
 
				-	for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
			
 
				-
			
 
				-	/* return pointer to array of pointers to rows */
			
 
				-	return m;
			
 
				-}
			
 
				-
			
 
				-double **dmatrix(long nrl, long nrh, long ncl, long nch)
			
 
				-/* allocate a double matrix with subscript range m[nrl..nrh][ncl..nch] */
			
 
				-{
			
 
				-	long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
			
 
				-	double **m;
			
 
				-
			
 
				-	/* allocate pointers to rows */
			
 
				-	m=(double **) malloc((size_t)((nrow+NR_END)*sizeof(double*)));
			
 
				-	if (!m) nrerror("allocation failure 1 in matrix()");
			
 
				-	m += NR_END;
			
 
				-	m -= nrl;
			
 
				-
			
 
				-	/* allocate rows and set pointers to them */
			
 
				-	m[nrl]=(double *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
			
 
				-	if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
			
 
				-	m[nrl] += NR_END;
			
 
				-	m[nrl] -= ncl;
			
 
				-
			
 
				-	for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
			
 
				-
			
 
				-	/* return pointer to array of pointers to rows */
			
 
				-	return m;
			
 
				-}
			
 
				-
			
 
				-int **imatrix(long nrl, long nrh, long ncl, long nch)
			
 
				-/* allocate a int matrix with subscript range m[nrl..nrh][ncl..nch] */
			
 
				-{
			
 
				-	long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
			
 
				-	int **m;
			
 
				-
			
 
				-	/* allocate pointers to rows */
			
 
				-	m=(int **) malloc((size_t)((nrow+NR_END)*sizeof(int*)));
			
 
				-	if (!m) nrerror("allocation failure 1 in matrix()");
			
 
				-	m += NR_END;
			
 
				-	m -= nrl;
			
 
				-
			
 
				-
			
 
				-	/* allocate rows and set pointers to them */
			
 
				-	m[nrl]=(int *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(int)));
			
 
				-	if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
			
 
				-	m[nrl] += NR_END;
			
 
				-	m[nrl] -= ncl;
			
 
				-
			
 
				-	for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
			
 
				-
			
 
				-	/* return pointer to array of pointers to rows */
			
 
				-	return m;
			
 
				-}
			
 
				-
			
 
				-REAL **submatrix(REAL **a, long oldrl, long oldrh, long oldcl, long oldch,
			
 
				-	long newrl, long newcl)
			
 
				-/* point a submatrix [newrl..][newcl..] to a[oldrl..oldrh][oldcl..oldch] */
			
 
				-{
			
 
				-	long i,j,nrow=oldrh-oldrl+1,ncol=oldcl-newcl;
			
 
				-	REAL **m;
			
 
				-
			
 
				-	/* allocate array of pointers to rows */
			
 
				-	m=(REAL **) malloc((size_t) ((nrow+NR_END)*sizeof(REAL*)));
			
 
				-	if (!m) nrerror("allocation failure in submatrix()");
			
 
				-	m += NR_END;
			
 
				-	m -= newrl;
			
 
				-
			
 
				-	/* set pointers to rows */
			
 
				-	for(i=oldrl,j=newrl;i<=oldrh;i++,j++) m[j]=a[i]+ncol;
			
 
				-
			
 
				-	/* return pointer to array of pointers to rows */
			
 
				-	return m;
			
 
				-}
			
 
				-
			
 
				-REAL **convert_matrix(REAL *a, long nrl, long nrh, long ncl, long nch)
			
 
				-/* allocate a REAL matrix m[nrl..nrh][ncl..nch] that points to the matrix
			
 
				-declared in the standard C manner as a[nrow][ncol], where nrow=nrh-nrl+1
			
 
				-and ncol=nch-ncl+1. The routine should be called with the address
			
 
				-&a[0][0] as the first argument. */
			
 
				-{
			
 
				-	long i,j,nrow=nrh-nrl+1,ncol=nch-ncl+1;
			
 
				-	REAL **m;
			
 
				-
			
 
				-	/* allocate pointers to rows */
			
 
				-	m=(REAL **) malloc((size_t) ((nrow+NR_END)*sizeof(REAL*)));
			
 
				-	if (!m) nrerror("allocation failure in convert_matrix()");
			
 
				-	m += NR_END;
			
 
				-	m -= nrl;
			
 
				-
			
 
				-	/* set pointers to rows */
			
 
				-	m[nrl]=a-ncl;
			
 
				-	for(i=1,j=nrl+1;i<nrow;i++,j++) m[j]=m[j-1]+ncol;
			
 
				-	/* return pointer to array of pointers to rows */
			
 
				-	return m;
			
 
				-}
			
 
				-
			
 
				-REAL ***f3tensor(long nrl, long nrh, long ncl, long nch, long ndl, long ndh)
			
 
				-/* allocate a REAL 3tensor with range t[nrl..nrh][ncl..nch][ndl..ndh] */
			
 
				-{
			
 
				-	long i,j,nrow=nrh-nrl+1,ncol=nch-ncl+1,ndep=ndh-ndl+1;
			
 
				-	REAL ***t;
			
 
				-
			
 
				-	/* allocate pointers to pointers to rows */
			
 
				-	t=(REAL ***) malloc((size_t)((nrow+NR_END)*sizeof(REAL**)));
			
 
				-	if (!t) nrerror("allocation failure 1 in f3tensor()");
			
 
				-	t += NR_END;
			
 
				-	t -= nrl;
			
 
				-
			
 
				-	/* allocate pointers to rows and set pointers to them */
			
 
				-	t[nrl]=(REAL **) malloc((size_t)((nrow*ncol+NR_END)*sizeof(REAL*)));
			
 
				-	if (!t[nrl]) nrerror("allocation failure 2 in f3tensor()");
			
 
				-	t[nrl] += NR_END;
			
 
				-	t[nrl] -= ncl;
			
 
				-
			
 
				-	/* allocate rows and set pointers to them */
			
 
				-	t[nrl][ncl]=(REAL *) malloc((size_t)((nrow*ncol*ndep+NR_END)*sizeof(REAL)));
			
 
				-	if (!t[nrl][ncl]) nrerror("allocation failure 3 in f3tensor()");
			
 
				-	t[nrl][ncl] += NR_END;
			
 
				-	t[nrl][ncl] -= ndl;
			
 
				-
			
 
				-	for(j=ncl+1;j<=nch;j++) t[nrl][j]=t[nrl][j-1]+ndep;
			
 
				-	for(i=nrl+1;i<=nrh;i++) {
			
 
				-		t[i]=t[i-1]+ncol;
			
 
				-		t[i][ncl]=t[i-1][ncl]+ncol*ndep;
			
 
				-		for(j=ncl+1;j<=nch;j++) t[i][j]=t[i][j-1]+ndep;
			
 
				-	}
			
 
				-
			
 
				-	/* return pointer to array of pointers to rows */
			
 
				-	return t;
			
 
				-}
			
 
				-
			
 
				-void free_vector(REAL *v, long nl, long nh)
			
 
				-/* free a REAL vector allocated with vector() */
			
 
				-{
			
 
				-	free((FREE_ARG) (v+nl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_ivector(int *v, long nl, long nh)
			
 
				-/* free an int vector allocated with ivector() */
			
 
				-{
			
 
				-	free((FREE_ARG) (v+nl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_cvector(unsigned char *v, long nl, long nh)
			
 
				-/* free an unsigned char vector allocated with cvector() */
			
 
				-{
			
 
				-	free((FREE_ARG) (v+nl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_lvector(unsigned long *v, long nl, long nh)
			
 
				-/* free an unsigned long vector allocated with lvector() */
			
 
				-{
			
 
				-	free((FREE_ARG) (v+nl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_dvector(double *v, long nl, long nh)
			
 
				-/* free a double vector allocated with dvector() */
			
 
				-{
			
 
				-	free((FREE_ARG) (v+nl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_matrix(REAL **m, long nrl, long nrh, long ncl, long nch)
			
 
				-/* free a REAL matrix allocated by matrix() */
			
 
				-{
			
 
				-	free((FREE_ARG) (m[nrl]+ncl-NR_END));
			
 
				-	free((FREE_ARG) (m+nrl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch)
			
 
				-/* free a double matrix allocated by dmatrix() */
			
 
				-{
			
 
				-	free((FREE_ARG) (m[nrl]+ncl-NR_END));
			
 
				-	free((FREE_ARG) (m+nrl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch)
			
 
				-/* free an int matrix allocated by imatrix() */
			
 
				-{
			
 
				-	free((FREE_ARG) (m[nrl]+ncl-NR_END));
			
 
				-	free((FREE_ARG) (m+nrl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_submatrix(REAL **b, long nrl, long nrh, long ncl, long nch)
			
 
				-/* free a submatrix allocated by submatrix() */
			
 
				-{
			
 
				-	free((FREE_ARG) (b+nrl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_convert_matrix(REAL **b, long nrl, long nrh, long ncl, long nch)
			
 
				-/* free a matrix allocated by convert_matrix() */
			
 
				-{
			
 
				-	free((FREE_ARG) (b+nrl-NR_END));
			
 
				-}
			
 
				-
			
 
				-void free_f3tensor(REAL ***t, long nrl, long nrh, long ncl, long nch,
			
 
				-	long ndl, long ndh)
			
 
				-/* free a REAL f3tensor allocated by f3tensor() */
			
 
				-{
			
 
				-	free((FREE_ARG) (t[nrl][ncl]+ndl-NR_END));
			
 
				-	free((FREE_ARG) (t[nrl]+ncl-NR_END));
			
 
				-	free((FREE_ARG) (t+nrl-NR_END));
			
 
				-}
			
--- a/examples/pastix-wrappers/models/num_recipes/nrutil.h
+++ b/examples/pastix-wrappers/models/num_recipes/nrutil.h
@@ -1,79 +0,0 @@
 
				-/* CAUTION: This is the ANSI C (only) version of the Numerical Recipes
			
 
				-   utility file nrutil.h.  Do not confuse this file with the same-named
			
 
				-   file nrutil.h that is supplied in the 'misc' subdirectory.
			
 
				-   *That* file is the one from the book, and contains both ANSI and
			
 
				-   traditional K&R versions, along with #ifdef macros to select the
			
 
				-   correct version.  *This* file contains only ANSI C.               */
			
 
				-
			
 
				-#ifndef _NR_UTILS_H_
			
 
				-#define _NR_UTILS_H_
			
 
				-
			
 
				-#define REAL float
			
 
				-
			
 
				-static REAL sqrarg;
			
 
				-#define SQR(a) ((sqrarg=(a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
			
 
				-
			
 
				-static double dsqrarg;
			
 
				-#define DSQR(a) ((dsqrarg=(a)) == 0.0 ? 0.0 : dsqrarg*dsqrarg)
			
 
				-
			
 
				-static double dmaxarg1,dmaxarg2;
			
 
				-#define DMAX(a,b) (dmaxarg1=(a),dmaxarg2=(b),(dmaxarg1) > (dmaxarg2) ?\
			
 
				-        (dmaxarg1) : (dmaxarg2))
			
 
				-
			
 
				-static double dminarg1,dminarg2;
			
 
				-#define DMIN(a,b) (dminarg1=(a),dminarg2=(b),(dminarg1) < (dminarg2) ?\
			
 
				-        (dminarg1) : (dminarg2))
			
 
				-
			
 
				-static REAL maxarg1,maxarg2;
			
 
				-#define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1) > (maxarg2) ?\
			
 
				-        (maxarg1) : (maxarg2))
			
 
				-
			
 
				-static REAL minarg1,minarg2;
			
 
				-#define FMIN(a,b) (minarg1=(a),minarg2=(b),(minarg1) < (minarg2) ?\
			
 
				-        (minarg1) : (minarg2))
			
 
				-
			
 
				-static long lmaxarg1,lmaxarg2;
			
 
				-#define LMAX(a,b) (lmaxarg1=(a),lmaxarg2=(b),(lmaxarg1) > (lmaxarg2) ?\
			
 
				-        (lmaxarg1) : (lmaxarg2))
			
 
				-
			
 
				-static long lminarg1,lminarg2;
			
 
				-#define LMIN(a,b) (lminarg1=(a),lminarg2=(b),(lminarg1) < (lminarg2) ?\
			
 
				-        (lminarg1) : (lminarg2))
			
 
				-
			
 
				-static int imaxarg1,imaxarg2;
			
 
				-#define IMAX(a,b) (imaxarg1=(a),imaxarg2=(b),(imaxarg1) > (imaxarg2) ?\
			
 
				-        (imaxarg1) : (imaxarg2))
			
 
				-
			
 
				-static int iminarg1,iminarg2;
			
 
				-#define IMIN(a,b) (iminarg1=(a),iminarg2=(b),(iminarg1) < (iminarg2) ?\
			
 
				-        (iminarg1) : (iminarg2))
			
 
				-
			
 
				-#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
			
 
				-
			
 
				-void nrerror(char error_text[]);
			
 
				-REAL *vector(long nl, long nh);
			
 
				-int *ivector(long nl, long nh);
			
 
				-unsigned char *cvector(long nl, long nh);
			
 
				-unsigned long *lvector(long nl, long nh);
			
 
				-double *dvector(long nl, long nh);
			
 
				-REAL **matrix(long nrl, long nrh, long ncl, long nch);
			
 
				-double **dmatrix(long nrl, long nrh, long ncl, long nch);
			
 
				-int **imatrix(long nrl, long nrh, long ncl, long nch);
			
 
				-REAL **submatrix(REAL **a, long oldrl, long oldrh, long oldcl, long oldch,
			
 
				-	long newrl, long newcl);
			
 
				-REAL **convert_matrix(REAL *a, long nrl, long nrh, long ncl, long nch);
			
 
				-REAL ***f3tensor(long nrl, long nrh, long ncl, long nch, long ndl, long ndh);
			
 
				-void free_vector(REAL *v, long nl, long nh);
			
 
				-void free_ivector(int *v, long nl, long nh);
			
 
				-void free_cvector(unsigned char *v, long nl, long nh);
			
 
				-void free_lvector(unsigned long *v, long nl, long nh);
			
 
				-void free_dvector(double *v, long nl, long nh);
			
 
				-void free_matrix(REAL **m, long nrl, long nrh, long ncl, long nch);
			
 
				-void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch);
			
 
				-void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch);
			
 
				-void free_submatrix(REAL **b, long nrl, long nrh, long ncl, long nch);
			
 
				-void free_convert_matrix(REAL **b, long nrl, long nrh, long ncl, long nch);
			
 
				-void free_f3tensor(REAL ***t, long nrl, long nrh, long ncl, long nch,
			
 
				-	long ndl, long ndh);
			
 
				-
			
 
				-#endif /* _NR_UTILS_H_ */
			
--- a/examples/pastix-wrappers/models/reg_gemm.c
+++ b/examples/pastix-wrappers/models/reg_gemm.c
@@ -1,141 +0,0 @@
 
				-/*
			
 
				- * StarPU
			
 
				- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <math.h>
			
 
				-
			
 
				-#include "nr.h"
			
 
				-
			
 
				-#define REAL float
			
 
				-#define SIZE 2000000
			
 
				-#define ma 6
			
 
				-
			
 
				-
			
 
				-typedef struct Coord
			
 
				-{
			
 
				-  REAL x1;
			
 
				-  REAL x2;
			
 
				-  REAL x3;
			
 
				-} coord, *pcoord;
			
 
				-
			
 
				-
			
 
				-coord tabcoord[SIZE];
			
 
				-REAL tabx[SIZE];
			
 
				-REAL taby[SIZE];  
			
 
				-REAL sig[SIZE];
			
 
				-REAL afunc[ma+1];
			
 
				-int ia[ma+1];
			
 
				-REAL a[ma+1];
			
 
				-
			
 
				-
			
 
				-void funcs( REAL i, REAL afunc[ma+1], int ma2)
			
 
				-{
			
 
				-  
			
 
				-  afunc[1]= 1;
			
 
				-  afunc[2]= tabcoord[(int)i].x1;
			
 
				-  afunc[3]= tabcoord[(int)i].x2;
			
 
				-  afunc[4]= tabcoord[(int)i].x1*tabcoord[(int)i].x2;
			
 
				-  afunc[5]= tabcoord[(int)i].x2*tabcoord[(int)i].x3;
			
 
				-  afunc[6]= tabcoord[(int)i].x1*tabcoord[(int)i].x2*tabcoord[(int)i].x3;
			
 
				-  //printf("%f %f %f \n",afunc[0],afunc[1],afunc[2]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-int main(int argc, char * argv[])
			
 
				-{
			
 
				-  REAL total=0.0;
			
 
				-  REAL ecart=0.0;
			
 
				-  int len=0;
			
 
				-  char str2[1000];
			
 
				-/*   long double total=0.0; */
			
 
				-/*   long double ecart=0.0; */
			
 
				-  char *filename = argv[1];
			
 
				-  char perf_h[1000];
			
 
				-  char str[1000];
			
 
				-  int k,i;
			
 
				-  FILE * res;
			
 
				-  FILE *out;
			
 
				-  FILE *perf;
			
 
				-  // FILE *tmpperf;
			
 
				-  int ndat=atoi(argv[2]);
			
 
				-  REAL ** covar;
			
 
				-  REAL *chisq = (REAL *)malloc(sizeof(REAL));
			
 
				-  res = fopen(filename,"r");
			
 
				-
			
 
				-  covar = (REAL**) malloc((ma+1) *sizeof(REAL*));
			
 
				-  for (i=0;i<ma+1;i++)
			
 
				-    covar[i]=(REAL*)malloc((ma+1) *sizeof(REAL));
			
 
				-
			
 
				-
			
 
				-  for (k=1;k<ndat+1;k++)
			
 
				-    {
			
 
				-      int x0, y0, x1, y1, x2, y2;
			
 
				-      REAL tmpfloat;
			
 
				-      fscanf(res,"%f\t%d\t%d\t%d\t%d\t%d\t%d\n", &tmpfloat, &x0, &y0, &x1, &y1, &x2, &y2);
			
 
				-      tabcoord[k].x1= x0 - y0; 
			
 
				-      tabcoord[k].x2= y2;
			
 
				-      tabcoord[k].x3= y0;
			
 
				-      taby[k]=tmpfloat;
			
 
				-      sig[k]=1;
			
 
				-      tabx[k]=k;
			
 
				-      //fprintf(out,"%f %f %f\n",tabcoord[k].x1 ,tabcoord[k].x2 ,tabcoord[k].x3);
			
 
				-      //fprintf(out,"%f %f\n",tabx[k] , taby[k]);
			
 
				-    }
			
 
				-  for (k=1;k<ma+1;k++)
			
 
				-    ia[k]=1;
			
 
				-
			
 
				-
			
 
				-  lfit(tabx, taby, sig, ndat, a, ia, ma, covar, chisq, &funcs);
			
 
				-  
			
 
				-  for (k=1;k<ma+1;k++)  
			
 
				-    {    
			
 
				-//      printf("%.12lf\n", a[k]);
			
 
				-      //total+=a[k];
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-
			
 
				-  //calcul de l'ecart type
			
 
				-  for (k=1;k<ndat+1;k++)
			
 
				-    {
			
 
				-      double abs=0.0;
			
 
				-      abs += a[1];
			
 
				-      abs += tabcoord[k].x1*a[2]+tabcoord[k].x2*a[3];
			
 
				-      abs += tabcoord[k].x1*tabcoord[k].x2*a[4]+tabcoord[k].x2*tabcoord[k].x3*a[5];
			
 
				-      abs += tabcoord[k].x1*tabcoord[k].x2*tabcoord[k].x3*a[6];
			
 
				-    //  fprintf(stderr,"k=%i ; calcul : %lf ; reel : %lf ; ", k, abs, taby[k]);
			
 
				-      abs = abs - taby[k];
			
 
				-      if (abs < 0)
			
 
				-	abs = - abs;
			
 
				-    //  fprintf(stderr,"ecart : %lf\n ", abs);
			
 
				-
			
 
				-      total += abs;
			
 
				-      //printf("%f %f %f\n",tabcoord[k].x1 ,tabcoord[k].x2 ,tabcoord[k].x3);
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-
			
 
				-  fprintf(stdout,"#define GEMM_A  %e\n#define GEMM_B  %e\n#define GEMM_C  %e\n#define GEMM_D  %e\n#define GEMM_E  %e\n#define GEMM_F  %e\n",a[6],a[4],a[5],a[2],a[3],a[1]);
			
 
				-  fprintf(stderr,"#define PERF_GEMM(i,j,k) (GEMM_A*(double)(i)*(double)(j)*(double)(k)+GEMM_B*(double)(i)*(double)(j)+GEMM_C*(double)(j)*(double)(k)+GEMM_D*(double)(i)+GEMM_E*(double)(j)+GEMM_F)\n");
			
 
				-
			
 
				-
			
 
				-  fprintf(stderr, "total %lf\n", total);
			
 
				-  ecart = total / ndat;  
			
 
				-  fprintf(stderr, "ecart moyen %lf\n", ecart);
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
--- a/examples/pastix-wrappers/models/reg_trsm.c
+++ b/examples/pastix-wrappers/models/reg_trsm.c
@@ -1,122 +0,0 @@
 
				-/*
			
 
				- * StarPU
			
 
				- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <math.h>
			
 
				-
			
 
				-#include "nr.h"
			
 
				-
			
 
				-#define SIZE 100000
			
 
				-#define ma 3
			
 
				-
			
 
				-
			
 
				-typedef struct Coord
			
 
				-{
			
 
				-  float x1;
			
 
				-  float x2;
			
 
				-} coord, *pcoord;
			
 
				-
			
 
				-
			
 
				-coord tabcoord[SIZE];
			
 
				-float tabx[SIZE];
			
 
				-float taby[SIZE];  
			
 
				-float sig[SIZE];
			
 
				-float afunc[ma+1];
			
 
				-int ia[ma+1];
			
 
				-float a[ma+1];
			
 
				-
			
 
				-
			
 
				-void funcs( float i, float afunc[ma+1], int ma2)
			
 
				-{
			
 
				-  
			
 
				-  afunc[1]=1;
			
 
				-  afunc[2]=tabcoord[(int)i].x1;
			
 
				-  afunc[3]=tabcoord[(int)i].x1*tabcoord[(int)i].x1*tabcoord[(int)i].x2;
			
 
				-  
			
 
				-  //printf("%f %f %f \n",afunc[0],afunc[1],afunc[2]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-int main(int argc, char * argv[])
			
 
				-{
			
 
				-  float total=0.0;
			
 
				-  float ecart=0.0;
			
 
				-/*   long double total=0.0; */
			
 
				-/*   long double ecart=0.0; */
			
 
				-  char *filename = argv[1];
			
 
				-  int k,i;
			
 
				-  FILE * res;
			
 
				-  int ndat=atoi(argv[2]);
			
 
				-  float ** covar;
			
 
				-  float *chisq = (float *)malloc(sizeof(float));
			
 
				-  res = fopen(filename,"r");
			
 
				-  covar = (float**) malloc((ma+1) *sizeof(float*));
			
 
				-  for (i=0;i<ma+1;i++)
			
 
				-    covar[i]=(float*)malloc((ma+1) *sizeof(float));
			
 
				-
			
 
				-  for (k=1;k<ndat+1;k++)
			
 
				-    {
			
 
				-      int i, j;
			
 
				-      float tmpfloat;
			
 
				-      fscanf(res,"%f\t%d\t%d\n", &tmpfloat, &i, &j);
			
 
				-      tabcoord[k].x1=i-j; 
			
 
				-      tabcoord[k].x2=j;
			
 
				-      taby[k]=tmpfloat;
			
 
				-//      printf("%d -> %f %d %d\n", k, tmpfloat, i-j, j);
			
 
				-      sig[k]=1;
			
 
				-      tabx[k]=k;
			
 
				-    }
			
 
				-  for (k=1;k<ma+1;k++)
			
 
				-    ia[k]=1;
			
 
				-  
			
 
				-  lfit(tabx, taby, sig, ndat, a, ia, ma, covar, chisq, &funcs);
			
 
				-  
			
 
				-  for (k=1;k<ma+1;k++)  
			
 
				-    {    
			
 
				-  //    printf("%.12lf\n", a[k]);
			
 
				-      //total+=a[k];
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-
			
 
				-  //calcul de l'ecart type
			
 
				-  for (k=1;k<ndat+1;k++)
			
 
				-    {
			
 
				-      double abs=0.0;
			
 
				-      abs += a[1];
			
 
				-      abs += tabcoord[k].x1*a[2];
			
 
				-      abs += tabcoord[k].x1*tabcoord[k].x1*tabcoord[k].x2*a[3];
			
 
				-//      fprintf(stderr,"k=%i ; calcul : %lf ; reel : %lf ; ", k, abs, taby[k]);
			
 
				-      abs = abs - taby[k];
			
 
				-      if (abs < 0)
			
 
				-	abs = - abs;
			
 
				- //     fprintf(stderr,"ecart : %lf\n ", abs);
			
 
				-
			
 
				-      total += abs;
			
 
				-      //printf("%f %f %f\n",tabcoord[k].x1 ,tabcoord[k].x2 ,tabcoord[k].x3);
			
 
				-    }
			
 
				-  
			
 
				-  fprintf(stdout,"#define TRSM_A %e\n#define TRSM_B %e\n#define TRSM_C %e\n", a[3], a[2], a[1]);
			
 
				-  fprintf(stderr,"#define PERF_TRSM(i,j)   (TRSM_A*(double)(i)*(double)(i)*(double)(j)+TRSM_B*(double)(i)+TRSM_C)\n");
			
 
				-
			
 
				-
			
 
				-  fprintf(stderr, "total %lf\n", total);
			
 
				-  ecart = total / ndat;  
			
 
				-  fprintf(stderr, "ecart moyen %lf\n", ecart);
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
--- a/examples/pastix-wrappers/starpu-blas-wrapper.c
+++ b/examples/pastix-wrappers/starpu-blas-wrapper.c
@@ -1,752 +0,0 @@
 
				-/*
			
 
				- * StarPU
			
 
				- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#include <semaphore.h>
			
 
				-#include <core/jobs.h>
			
 
				-#include <core/workers.h>
			
 
				-#include <core/dependencies/tags.h>
			
 
				-#include <string.h>
			
 
				-#include <math.h>
			
 
				-#include <sys/types.h>
			
 
				-#include <ctype.h>
			
 
				-#include <pthread.h>
			
 
				-#include <signal.h>
			
 
				-#include <cblas.h>
			
 
				-
			
 
				-#include <datawizard/datawizard.h>
			
 
				-#include <task-models/blas_model.h>
			
 
				-#include <common/fxt.h>
			
 
				-
			
 
				-#include <starpu.h>
			
 
				-
			
 
				-#ifdef STARPU_USE_CUDA
			
 
				-#include <cuda.h>
			
 
				-#endif
			
 
				-
			
 
				-#define BLOCK	75
			
 
				-
			
 
				-#include "starpu-blas-wrapper.h"
			
 
				-
			
 
				-extern struct starpu_data_interface_ops_t _starpu_interface_blas_ops;
			
 
				-
			
 
				-static int cpu_sgemm = 0;
			
 
				-static int cublas_sgemm = 0;
			
 
				-static int cpu_strsm = 0;
			
 
				-static int cublas_strsm = 0;
			
 
				-
			
 
				-static int inited = 0;
			
 
				-
			
 
				-void STARPU_INIT(void)
			
 
				-{
			
 
				-	if (!inited) {
			
 
				-		inited = 1;
			
 
				-		starpu_init(NULL);	
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void STARPU_TERMINATE(void)
			
 
				-{
			
 
				-	starpu_shutdown();
			
 
				-
			
 
				-	fprintf(stderr, "sgemm : cpu %d cublas %d\n", cpu_sgemm, cublas_sgemm);
			
 
				-	fprintf(stderr, "strsm : cpu %d cublas %d\n", cpu_strsm, cublas_strsm);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- *
			
 
				- *	Specific to PaStiX !
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- *	
			
 
				- *	We "need" some custom filters
			
 
				- *
			
 
				- *			 VECTOR
			
 
				- *			  (n)
			
 
				- *			/  |   \		
			
 
				- * 		   VECTOR  BLAS  VECTOR
			
 
				- * 		    (n1)  (n2)	 
			
 
				- *	
			
 
				- *	if n1 = 0 :
			
 
				- * 			VECTOR
			
 
				- *			/   \
			
 
				- *		     BLAS  VECTOR
			
 
				- */
			
 
				-
			
 
				-struct divide_vector_in_blas_filter_args {
			
 
				-	uint32_t n1, n2; /* (total size of the first portion (vector length) n < root's n ! */
			
 
				-	uint32_t stride; /* stride of the first portion (need to be a multiple of n */
			
 
				-};
			
 
				-
			
 
				-void divide_vector_in_blas_filter(starpu_filter *f, starpu_data_handle root_data)
			
 
				-{
			
 
				-	starpu_vector_interface_t *vector_root = &root_data->interface[0].vector;
			
 
				-		uint32_t nx = vector_root->nx;
			
 
				-		size_t elemsize = vector_root->elemsize;
			
 
				-
			
 
				-	struct divide_vector_in_blas_filter_args *args = f->filter_arg_ptr;
			
 
				-		unsigned n1 = args->n1;
			
 
				-		unsigned n2 = args->n2;
			
 
				-		unsigned stride = args->stride;
			
 
				-		STARPU_ASSERT(n1 + n2 < nx);
			
 
				-		unsigned n3 = nx - n1 - n2;
			
 
				-		
			
 
				-
			
 
				-	/* first allocate the children starpu_data_handle */
			
 
				-	starpu_data_create_children(root_data, (n1==0)?2:3, root_data->ops);
			
 
				-
			
 
				-	STARPU_ASSERT((n2 % args->stride) == 0);
			
 
				-
			
 
				-	unsigned child = 0;
			
 
				-	unsigned node;
			
 
				-	
			
 
				-	if (n1 > 0)
			
 
				-	{
			
 
				-		for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				-		{
			
 
				-			starpu_vector_interface_t *local = &root_data->children[child].interface[node].vector;
			
 
				-	
			
 
				-			local->nx = n1;
			
 
				-			local->elemsize = elemsize;
			
 
				-	
			
 
				-			if (root_data->per_node[node].allocated) {
			
 
				-				local->ptr = root_data->interface[node].vector.ptr;
			
 
				-			}
			
 
				-	
			
 
				-		}
			
 
				-
			
 
				-		child++;
			
 
				-	}
			
 
				-	
			
 
				-	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				-	{
			
 
				-		starpu_blas_interface_t *local = &root_data->children[child].interface[node].blas;
			
 
				-
			
 
				-		local->nx = stride;
			
 
				-		local->ny = n2/stride;
			
 
				-		local->ld = stride;
			
 
				-		local->elemsize = elemsize;
			
 
				-
			
 
				-		if (root_data->per_node[node].allocated) {
			
 
				-			local->ptr = root_data->interface[node].vector.ptr + n1*elemsize;
			
 
				-		}
			
 
				-
			
 
				-		struct starpu_data_state_t *state = &root_data->children[child];
			
 
				-		state->ops = &_starpu_interface_blas_ops;
			
 
				-	}
			
 
				-
			
 
				-	child++;
			
 
				-
			
 
				-	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				-	{
			
 
				-		starpu_vector_interface_t *local = &root_data->children[child].interface[node].vector;
			
 
				-
			
 
				-		local->nx = n3;
			
 
				-		local->elemsize = elemsize;
			
 
				-
			
 
				-		if (root_data->per_node[node].allocated) {
			
 
				-			local->ptr = root_data->interface[node].vector.ptr + (n1+n2)*elemsize;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static data_state *cblktab;
			
 
				-
			
 
				-static void _cublas_cblk_strsm_callback(void *sem)
			
 
				-{
			
 
				-	sem_t *semptr = sem;
			
 
				-	sem_post(semptr);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-void STARPU_MONITOR_DATA(unsigned ncols)
			
 
				-{
			
 
				-	cblktab = calloc(ncols, sizeof(data_state));
			
 
				-}
			
 
				-
			
 
				-void STARPU_MONITOR_CBLK(unsigned col, float *data, unsigned stride, unsigned width)
			
 
				-{
			
 
				-	//void starpu_register_blas_data(struct starpu_data_state_t *state, uint32_t home_node,
			
 
				-        //                uintptr_t ptr, uint32_t ld, uint32_t nx,
			
 
				-        //                uint32_t ny, size_t elemsize);
			
 
				-
			
 
				-	//fprintf(stderr, "col %d data %p stride %d width %d\n", col, data, stride, width);
			
 
				-
			
 
				-	starpu_register_blas_data(&cblktab[col], 0 /* home */,
			
 
				-			(uintptr_t) data, stride, stride, width, sizeof(float));
			
 
				-	
			
 
				-}
			
 
				-
			
 
				-static data_state work_block_1;
			
 
				-static data_state work_block_2;
			
 
				-
			
 
				-void allocate_maxbloktab_on_cublas(void *descr[] __attribute__((unused)), void *arg __attribute__((unused)))
			
 
				-{
			
 
				-	starpu_request_data_allocation(&work_block_1, 1);
			
 
				-	starpu_request_data_allocation(&work_block_2, 1);
			
 
				-
			
 
				-
			
 
				-	starpu_filter f1, f2;
			
 
				-	struct divide_vector_in_blas_filter_args args1, args2;
			
 
				-
			
 
				-	f1.filter_func = divide_vector_in_blas_filter;
			
 
				-		args1.n1 = 1; /* XXX random ... */
			
 
				-		args1.n2 = 2;
			
 
				-		args1.stride = 1;
			
 
				-
			
 
				-	f1.filter_arg_ptr = &args1;
			
 
				-	starpu_partition_data(&work_block_1, &f1);
			
 
				-
			
 
				-	f2.filter_func = divide_vector_in_blas_filter;
			
 
				-		args2.n1 = 0;
			
 
				-		args2.n2 = 2;
			
 
				-		args2.stride = 1;
			
 
				-	f2.filter_arg_ptr = &args2;
			
 
				-	starpu_partition_data(&work_block_2, &f2);
			
 
				-}
			
 
				-
			
 
				-void STARPU_DECLARE_WORK_BLOCKS(float *maxbloktab1, float *maxbloktab2, unsigned solv_coefmax)
			
 
				-{
			
 
				-	starpu_register_vector_data(&work_block_1, 0 /* home */, (uintptr_t)maxbloktab1, solv_coefmax, sizeof(float));
			
 
				-	starpu_register_vector_data(&work_block_2, 0 /* home */, (uintptr_t)maxbloktab2, solv_coefmax, sizeof(float));
			
 
				-
			
 
				-	starpu_codelet cl;
			
 
				-	starpu_job_t j;
			
 
				-	sem_t sem;
			
 
				-
			
 
				-	/* initialize codelet */
			
 
				-	cl.where = STARPU_CUDA;
			
 
				-	cl.cuda_func = allocate_maxbloktab_on_cublas;
			
 
				-	
			
 
				-	j = _starpu_job_create();
			
 
				-	j->cb = _cublas_cblk_strsm_callback;
			
 
				-	j->argcb = &sem;
			
 
				-	j->cl = &cl;
			
 
				-	j->cl_arg = NULL;
			
 
				-
			
 
				-	j->nbuffers = 0;
			
 
				-	j->cl->model = NULL;
			
 
				-
			
 
				-	sem_init(&sem, 0, 0U);
			
 
				-	
			
 
				-	/* submit the codelet */
			
 
				-	submit_job(j);
			
 
				-
			
 
				-	/* wait for its completion */
			
 
				-	sem_wait(&sem);
			
 
				-	sem_destroy(&sem);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-void _cpu_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
			
 
				-{
			
 
				-	uint32_t nx, ny, ld;
			
 
				-	nx = STARPU_GET_BLAS_NX(descr[0]);
			
 
				-	ny = STARPU_GET_BLAS_NY(descr[0]);
			
 
				-	ld = STARPU_GET_BLAS_LD(descr[0]);
			
 
				-
			
 
				-	float *diag_cblkdata, *extra_cblkdata;
			
 
				-	diag_cblkdata = (float *)STARPU_GET_BLAS_PTR(descr[0]);
			
 
				-	extra_cblkdata = diag_cblkdata + ny;
			
 
				-
			
 
				-	unsigned m = nx - ny;
			
 
				-	unsigned n = ny;
			
 
				-
			
 
				-//	SOPALIN_TRSM("R","L","T","U",dimb,dima,fun,ga,stride,gb,stride);
			
 
				-	cpu_strsm++;
			
 
				-
			
 
				-	cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasUnit, m, n, 1.0f, 
			
 
				-			diag_cblkdata, ld, extra_cblkdata, ld);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-void _cublas_cblk_strsm(void *descr[], void *arg __attribute__((unused)))
			
 
				-{
			
 
				-	uint32_t nx, ny, ld;
			
 
				-	nx = STARPU_GET_BLAS_NX(descr[0]);
			
 
				-	ny = STARPU_GET_BLAS_NY(descr[0]);
			
 
				-	ld = STARPU_GET_BLAS_LD(descr[0]);
			
 
				-
			
 
				-	float *diag_cblkdata, *extra_cblkdata;
			
 
				-	diag_cblkdata = (float *)STARPU_GET_BLAS_PTR(descr[0]);
			
 
				-	extra_cblkdata = diag_cblkdata + ny;
			
 
				-
			
 
				-	unsigned m = nx - ny;
			
 
				-	unsigned n = ny;
			
 
				-	
			
 
				-	cublas_strsm++;
			
 
				-
			
 
				-	cublasStrsm ('R', 'L', 'T', 'U', m, n, 1.0, 
			
 
				-		diag_cblkdata, ld, 
			
 
				-		extra_cblkdata, ld);
			
 
				-	cublasStatus st = cublasGetError();
			
 
				-	if (st) fprintf(stderr, "ERROR %d\n", st);
			
 
				-	STARPU_ASSERT(st == CUBLAS_STATUS_SUCCESS);
			
 
				-}
			
 
				-
			
 
				-static struct starpu_perfmodel_t starpu_cblk_strsm = {
			
 
				-	.per_arch = { 
			
 
				-		[STARPU_CPU_DEFAULT] = { .cost_model = starpu_cblk_strsm_cpu_cost },
			
 
				-		[STARPU_CUDA_DEFAULT] = { .cost_model = starpu_cblk_strsm_cuda_cost }
			
 
				-	},
			
 
				-//	.type = REGRESSION_BASED,
			
 
				-	.type = STARPU_PER_ARCH,
			
 
				-	.symbol = "starpu_cblk_strsm"
			
 
				-};
			
 
				-
			
 
				-
			
 
				-void STARPU_CBLK_STRSM(unsigned col)
			
 
				-{
			
 
				-	/* perform a strsm on the block column */
			
 
				-	starpu_codelet cl;
			
 
				-	starpu_job_t j;
			
 
				-	sem_t sem;
			
 
				-
			
 
				-	/* initialize codelet */
			
 
				-	cl.where = STARPU_CPU|STARPU_CUDA;
			
 
				-	cl.cpu_func = _cpu_cblk_strsm;
			
 
				-	cl.cuda_func = _cublas_cblk_strsm;
			
 
				-	
			
 
				-	j = _starpu_job_create();
			
 
				-//	j->where = (starpu_get_blas_nx(&cblktab[col]) > BLOCK && starpu_get_blas_ny(&cblktab[col]) > BLOCK)? CUBLAS:CPU;
			
 
				-	j->cb = _cublas_cblk_strsm_callback;
			
 
				-	j->argcb = &sem;
			
 
				-	j->cl = &cl;
			
 
				-	j->cl_arg = NULL;
			
 
				-
			
 
				-	j->nbuffers = 1;
			
 
				-	/* we could be a little more precise actually */
			
 
				-	j->buffers[0].handle = &cblktab[col];
			
 
				-	j->buffers[0].mode = STARPU_RW;
			
 
				-	
			
 
				-	j->cl->model = &starpu_cblk_strsm;
			
 
				-
			
 
				-	sem_init(&sem, 0, 0U);
			
 
				-	
			
 
				-	/* submit the codelet */
			
 
				-	submit_job(j);
			
 
				-
			
 
				-	/* wait for its completion */
			
 
				-	sem_wait(&sem);
			
 
				-	sem_destroy(&sem);
			
 
				-}
			
 
				-
			
 
				-struct starpu_compute_contrib_compact_args {
			
 
				-	unsigned stride;
			
 
				-	int dimi;
			
 
				-	int dimj;
			
 
				-	int dima;
			
 
				-};
			
 
				-
			
 
				-
			
 
				-void _cpu_compute_contrib_compact(void *descr[], void *arg)
			
 
				-{
			
 
				-	struct starpu_compute_contrib_compact_args *args = arg;
			
 
				-
			
 
				-	float *gaik = (float *)STARPU_GET_BLAS_PTR(descr[0]) + args->dima;
			
 
				-	float *gb = (float *)STARPU_GET_BLAS_PTR(descr[1]); 
			
 
				-	unsigned strideb = (unsigned)STARPU_GET_BLAS_LD(descr[1]);
			
 
				-	float *gc = (float *)STARPU_GET_BLAS_PTR(descr[2]);
			
 
				-	unsigned stridec = (unsigned)STARPU_GET_BLAS_LD(descr[2]);
			
 
				-
			
 
				-	cpu_sgemm++;
			
 
				-
			
 
				-	cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, 
			
 
				-			args->dimi, args->dimj, args->dima,
			
 
				-			1.0f, gaik, args->stride,
			
 
				-			      gb, strideb,
			
 
				-			0.0 , gc, stridec);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-
			
 
				-void _cublas_compute_contrib_compact(void *descr[], void *arg)
			
 
				-{
			
 
				-	struct starpu_compute_contrib_compact_args *args = arg;
			
 
				-
			
 
				-	float *gaik = (float *)STARPU_GET_BLAS_PTR(descr[0]) + args->dima;
			
 
				-	float *gb = (float *)STARPU_GET_BLAS_PTR(descr[1]);
			
 
				-	unsigned strideb = (unsigned)STARPU_GET_BLAS_LD(descr[1]);
			
 
				-	float *gc = (float *)STARPU_GET_BLAS_PTR(descr[2]);
			
 
				-	unsigned stridec = (unsigned)STARPU_GET_BLAS_LD(descr[2]);
			
 
				-	
			
 
				-	cublas_sgemm++;
			
 
				-
			
 
				-	cublasSgemm('N','T', args->dimi, args->dimj, args->dima, 
			
 
				-			1.0, gaik, args->stride,
			
 
				-			     gb, strideb,
			
 
				-			0.0, gc, stridec);
			
 
				-
			
 
				-	cublasStatus st = cublasGetError();
			
 
				-	if (st) fprintf(stderr, "ERROR %d\n", st);
			
 
				-	STARPU_ASSERT(st == CUBLAS_STATUS_SUCCESS);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static struct starpu_perfmodel_t starpu_compute_contrib_compact = {
			
 
				-	.per_arch = { 
			
 
				-		[STARPU_CPU_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_cpu_cost },
			
 
				-		[STARPU_CUDA_DEFAULT] = { .cost_model = starpu_compute_contrib_compact_cuda_cost }
			
 
				-	},
			
 
				-//	.type = REGRESSION_BASED,
			
 
				-	.type = STARPU_PER_ARCH,
			
 
				-	.symbol = "starpu_compute_contrib_compact"
			
 
				-};
			
 
				-
			
 
				-int update_work_blocks(unsigned col, int dimi, int dimj, int dima, int stride)
			
 
				-{
			
 
				-	/* be paranoid XXX */
			
 
				-	notify_data_modification(get_sub_data(&work_block_1, 1, 0), 0);
			
 
				-	notify_data_modification(get_sub_data(&work_block_1, 1, 1), 0);
			
 
				-	//notify_data_modification(get_sub_data(&work_block_1, 1, 2), 0);
			
 
				-	notify_data_modification(get_sub_data(&work_block_2, 1, 0), 0);
			
 
				-	notify_data_modification(get_sub_data(&work_block_2, 1, 1), 0);
			
 
				-	notify_data_modification(&cblktab[col], 0);
			
 
				-
			
 
				-	starpu_unpartition_data(&work_block_1, 0);
			
 
				-	starpu_unpartition_data(&work_block_2, 0);
			
 
				-
			
 
				-	starpu_filter f1, f2;
			
 
				-	struct divide_vector_in_blas_filter_args args1, args2;
			
 
				-
			
 
				-	f1.filter_func = divide_vector_in_blas_filter;
			
 
				-		args1.n1 = stride - dima - dimi; //STARPU_ASSERT(args1.n1 != 0);
			
 
				-		args1.n2 = (stride - dima)*dima;
			
 
				-		args1.stride = (stride - dima);
			
 
				-
			
 
				-	f1.filter_arg_ptr = &args1;
			
 
				-	starpu_partition_data(&work_block_1, &f1);
			
 
				-
			
 
				-	f2.filter_func = divide_vector_in_blas_filter;
			
 
				-		args2.n1 = 0;
			
 
				-		args2.n2 = dimi*dimj;
			
 
				-		args2.stride = dimi;
			
 
				-	f2.filter_arg_ptr = &args2;
			
 
				-	starpu_partition_data(&work_block_2, &f2);
			
 
				-
			
 
				-	return (args1.n1!=0)?3:2;
			
 
				-}
			
 
				-
			
 
				-void STARPU_COMPUTE_CONTRIB_COMPACT(unsigned col, int dimi, int dimj, int dima, int stride)
			
 
				-{
			
 
				-//        CUBLAS_SGEMM("N","T",dimi,dimj,dima, 1.0,gaik,stride,gb,stride-dima,
			
 
				-//               0.0 ,gc,dimi);
			
 
				-	
			
 
				-	struct starpu_compute_contrib_compact_args args;
			
 
				-		args.stride = stride;
			
 
				-		args.dimi = dimi;
			
 
				-		args.dimj = dimj;
			
 
				-		args.dima = dima;
			
 
				-
			
 
				-	starpu_codelet cl;
			
 
				-	starpu_job_t j;
			
 
				-	sem_t sem;
			
 
				-
			
 
				-	/* initialize codelet */
			
 
				-	cl.where = STARPU_CUDA|STARPU_CPU;
			
 
				-	cl.cpu_func = _cpu_compute_contrib_compact;
			
 
				-	cl.cuda_func = _cublas_compute_contrib_compact;
			
 
				-	
			
 
				-	j = _starpu_job_create();
			
 
				-
			
 
				-	j->cb = _cublas_cblk_strsm_callback;
			
 
				-	j->argcb = &sem;
			
 
				-	j->cl = &cl;
			
 
				-	j->cl_arg = &args;
			
 
				-	j->cl->model = &starpu_compute_contrib_compact;
			
 
				-
			
 
				-	int ret;
			
 
				-	ret = update_work_blocks(col, dimi, dimj, dima, stride);
			
 
				-
			
 
				-	j->nbuffers = 3;
			
 
				-	/* we could be a little more precise actually */
			
 
				-	j->buffers[0].handle = &cblktab[col]; // gaik
			
 
				-	j->buffers[0].mode = STARPU_R;
			
 
				-	j->buffers[1].handle = get_sub_data(&work_block_1, 1, (ret==2)?0:1);
			
 
				-	j->buffers[1].mode = STARPU_R;
			
 
				-	j->buffers[2].handle = get_sub_data(&work_block_2, 1, 0);; 
			
 
				-	j->buffers[2].mode = STARPU_RW; // XXX STARPU_W
			
 
				-	
			
 
				-	sem_init(&sem, 0, 0U);
			
 
				-	
			
 
				-	/* submit the codelet */
			
 
				-	submit_job(j);
			
 
				-
			
 
				-	/* wait for its completion */
			
 
				-	sem_wait(&sem);
			
 
				-	sem_destroy(&sem);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- *
			
 
				- *	SGEMM
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-struct sgemm_args {
			
 
				-	char transa;
			
 
				-	char transb;
			
 
				-	int m, n, k;
			
 
				-	float alpha;
			
 
				-	float beta;
			
 
				-};
			
 
				-
			
 
				-
			
 
				-void _cublas_sgemm(void *descr[], void *arg)
			
 
				-{
			
 
				-	float *A, *B, *C;
			
 
				-	uint32_t nxA, nyA, ldA;
			
 
				-	uint32_t nxB, nyB, ldB;
			
 
				-	uint32_t nxC, nyC, ldC;
			
 
				-
			
 
				-	A = (float *)STARPU_GET_BLAS_PTR(descr[0]);
			
 
				-	nxA = STARPU_GET_BLAS_NX(descr[0]);
			
 
				-	nyA = STARPU_GET_BLAS_NY(descr[0]);
			
 
				-	ldA = STARPU_GET_BLAS_LD(descr[0]);
			
 
				-
			
 
				-	B = (float *)STARPU_GET_BLAS_PTR(descr[1]);
			
 
				-	nxB = STARPU_GET_BLAS_NX(descr[1]);
			
 
				-	nyB = STARPU_GET_BLAS_NY(descr[1]);
			
 
				-	ldB = STARPU_GET_BLAS_LD(descr[1]);
			
 
				-
			
 
				-	C = (float *)STARPU_GET_BLAS_PTR(descr[2]);
			
 
				-	nxC = STARPU_GET_BLAS_NX(descr[2]);
			
 
				-	nyC = STARPU_GET_BLAS_NY(descr[2]);
			
 
				-	ldC = STARPU_GET_BLAS_LD(descr[2]);
			
 
				-
			
 
				-	struct sgemm_args *args = arg;
			
 
				-
			
 
				-//	fprintf(stderr, "CUBLAS SGEMM nxA %d nyA %d nxB %d nyB %d nxC %d nyC %d lda %d ldb %d ldc %d\n", nxA, nyA, nxB, nyB, nxC, nyC, ldA, ldB, ldC);
			
 
				-
			
 
				-//	STARPU_ASSERT(nxA == nxC);
			
 
				-//	STARPU_ASSERT(nyA == nxB);
			
 
				-//	STARPU_ASSERT(nyB == nyC);
			
 
				-//
			
 
				-//	STARPU_ASSERT(nxA <= ldA);
			
 
				-//	STARPU_ASSERT(nxB <= ldB);
			
 
				-//	STARPU_ASSERT(nxC <= ldC);
			
 
				-
			
 
				-	cublasSgemm (args->transa, args->transb, args->m, args->n, args->k, args->alpha, A, (int)ldA,
			
 
				-			B, (int)ldB, args->beta, C, (int)ldC);
			
 
				-	cublasStatus st = cublasGetError();
			
 
				-	if (st) fprintf(stderr, "ERROR %d\n", st);
			
 
				-	STARPU_ASSERT(st == CUBLAS_STATUS_SUCCESS);
			
 
				-}
			
 
				-
			
 
				-static void _cublas_sgemm_callback(void *sem)
			
 
				-{
			
 
				-	sem_t *semptr = sem;
			
 
				-	sem_post(semptr);
			
 
				-}
			
 
				-
			
 
				-void STARPU_SGEMM (const char *transa, const char *transb, const int m,
			
 
				-                   const int n, const int k, const float alpha,
			
 
				-                   const float *A, const int lda, const float *B,
			
 
				-                   const int ldb, const float beta, float *C, const int ldc)
			
 
				-{
			
 
				-	struct sgemm_args args;
			
 
				-		args.transa = *transa;
			
 
				-		args.transb = *transb;
			
 
				-		args.alpha = alpha;
			
 
				-		args.beta = beta;
			
 
				-		args.m = m;
			
 
				-		args.n = n;
			
 
				-		args.k = k;
			
 
				-
			
 
				-	data_state A_state;
			
 
				-	data_state B_state;
			
 
				-	data_state C_state;
			
 
				-
			
 
				-	starpu_codelet cl;
			
 
				-	starpu_job_t j;
			
 
				-	sem_t sem;
			
 
				-
			
 
				-//	fprintf(stderr, "STARPU - SGEMM - TRANSA %c TRANSB %c m %d n %d k %d lda %d ldb %d ldc %d \n", *transa, *transb, m, n, k, lda, ldb, ldc);
			
 
				-
			
 
				-	if (toupper(*transa) == 'N')
			
 
				-	{
			
 
				-		starpu_register_blas_data(&A_state, 0, (uintptr_t)A, lda, m, k, sizeof(float));
			
 
				-	}
			
 
				-	else 
			
 
				-	{
			
 
				-		starpu_register_blas_data(&A_state, 0, (uintptr_t)A, lda, k, m, sizeof(float));
			
 
				-	}
			
 
				-
			
 
				-	if (toupper(*transb) == 'N')
			
 
				-	{
			
 
				-		starpu_register_blas_data(&B_state, 0, (uintptr_t)B, ldb, k, n, sizeof(float));
			
 
				-	}
			
 
				-	else 
			
 
				-	{	
			
 
				-		starpu_register_blas_data(&B_state, 0, (uintptr_t)B, ldb, n, k, sizeof(float));
			
 
				-	}
			
 
				-
			
 
				-	starpu_register_blas_data(&C_state, 0, (uintptr_t)C, ldc, m, n, sizeof(float));
			
 
				-
			
 
				-	/* initialize codelet */
			
 
				-	cl.where = STARPU_CUDA;
			
 
				-	//cl.cpu_func = _cpu_strsm;
			
 
				-	cl.cuda_func = _cublas_sgemm;
			
 
				-	
			
 
				-	j = _starpu_job_create();
			
 
				-	j->cb = _cublas_sgemm_callback;
			
 
				-	j->argcb = &sem;
			
 
				-	j->cl = &cl;
			
 
				-	j->cl_arg = &args;
			
 
				-
			
 
				-	j->nbuffers = 3;
			
 
				-	j->buffers[0].handle = &A_state;
			
 
				-	j->buffers[0].mode = STARPU_R;
			
 
				-	j->buffers[1].handle = &B_state;
			
 
				-	j->buffers[1].mode = STARPU_R;
			
 
				-	j->buffers[2].handle = &C_state;
			
 
				-	j->buffers[2].mode = STARPU_RW;
			
 
				-	
			
 
				-	j->cl->model = NULL;
			
 
				-
			
 
				-	sem_init(&sem, 0, 0U);
			
 
				-	
			
 
				-	/* submit the codelet */
			
 
				-	submit_job(j);
			
 
				-
			
 
				-	/* wait for its completion */
			
 
				-	sem_wait(&sem);
			
 
				-	sem_destroy(&sem);
			
 
				-
			
 
				-	/* make sure data are in memory again */
			
 
				-	starpu_unpartition_data(&A_state, 0);
			
 
				-	starpu_unpartition_data(&B_state, 0);
			
 
				-	starpu_unpartition_data(&C_state, 0);
			
 
				-	//starpu_delete_data(&A_state);
			
 
				-	//starpu_delete_data(&B_state);
			
 
				-	//starpu_delete_data(&C_state);
			
 
				-	
			
 
				-//	fprintf(stderr, "SGEMM done\n");
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- *
			
 
				- *	STRSM
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-struct strsm_args {
			
 
				-	char side;
			
 
				-	char uplo;
			
 
				-	char transa;
			
 
				-	char diag;
			
 
				-	float alpha;
			
 
				-	int m,n;
			
 
				-};
			
 
				-//
			
 
				-//void _cpu_strsm(void *descr[], void *arg)
			
 
				-//{
			
 
				-//	float *A, *B;
			
 
				-//	uint32_t nxA, nyA, ldA;
			
 
				-//	uint32_t nxB, nyB, ldB;
			
 
				-//
			
 
				-//	A = (float *)STARPU_GET_BLAS_PTR(descr[0]);
			
 
				-//	nxA = STARPU_GET_BLAS_NX(descr[0]);
			
 
				-//	nyA = STARPU_GET_BLAS_NY(descr[0]);
			
 
				-//	ldA = STARPU_GET_BLAS_LD(descr[0]);
			
 
				-//
			
 
				-//	B = (float *)STARPU_GET_BLAS_PTR(descr[1]);
			
 
				-//	nxB = STARPU_GET_BLAS_NX(descr[1]);
			
 
				-//	nyB = STARPU_GET_BLAS_NY(descr[1]);
			
 
				-//	ldB = STARPU_GET_BLAS_LD(descr[1]);
			
 
				-//
			
 
				-//	struct strsm_args *args = arg;
			
 
				-//
			
 
				-//	fprintf(stderr, "CPU STRSM nxA %d nyA %d nxB %d nyB %d lda %d ldb %d\n", nxA, nyA, nxB, nyB, ldA, ldB);
			
 
				-//
			
 
				-//	SOPALIN_TRSM("R","L","T","U",dimb,dima,fun,ga,stride,gb,stride);
			
 
				-//	
			
 
				-//}
			
 
				-
			
 
				-/* 
			
 
				- *	
			
 
				- *	
			
 
				- *
			
 
				- */
			
 
				-
			
 
				-
			
 
				-void CUBLAS_SGEMM (const char *transa, const char *transb, const int m,
			
 
				-                   const int n, const int k, const float alpha,
			
 
				-                   const float *A, const int lda, const float *B,
			
 
				-                   const int ldb, const float beta, float *C, const int ldc)
			
 
				-{
			
 
				-    int ka, kb;
			
 
				-    float *devPtrA, *devPtrB, *devPtrC;
			
 
				-
			
 
				-//   printf("CUBLAS SGEMM : m %d n %d k %d lda %d ldb %d ldc %d\n", m, n, k, lda, ldb, ldc);
			
 
				-
			
 
				-    /*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
			
 
				-     *           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
			
 
				-     *           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
			
 
				-     *           part of the array  A  must contain the matrix  A,  otherwise
			
 
				-     *           the leading  k by m  part of the array  A  must contain  the
			
 
				-     *           matrix A.
			
 
				-     */
			
 
				-    ka = (toupper(transa[0]) == 'N') ? k : m;
			
 
				-    cublasAlloc (lda * ka, sizeof(devPtrA[0]), (void**)&devPtrA);
			
 
				-    if (toupper(transa[0]) == 'N') {
			
 
				-        cublasSetMatrix (STARPU_MIN(m,lda), k, sizeof(A[0]), A, lda, devPtrA, 
			
 
				-                         lda);
			
 
				-    } else {
			
 
				-        cublasSetMatrix (STARPU_MIN(k,lda), m, sizeof(A[0]), A, lda, devPtrA, 
			
 
				-                         lda);
			
 
				-    }
			
 
				-
			
 
				-    /*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
			
 
				-     *           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
			
 
				-     *           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
			
 
				-     *           part of the array  B  must contain the matrix  B,  otherwise
			
 
				-     *           the leading  n by k  part of the array  B  must contain  the
			
 
				-     *           matrix B.
			
 
				-     */
			
 
				-    kb = (toupper(transb[0]) == 'N') ? n : k;
			
 
				-    cublasAlloc (ldb * kb, sizeof(devPtrB[0]), (void**)&devPtrB);
			
 
				-    if (toupper(transb[0]) == 'N') {
			
 
				-        cublasSetMatrix (STARPU_MIN(k,ldb), n, sizeof(B[0]), B, ldb, devPtrB, 
			
 
				-                         ldb);
			
 
				-    } else {
			
 
				-        cublasSetMatrix (STARPU_MIN(n,ldb), k, sizeof(B[0]), B, ldb, devPtrB,
			
 
				-                         ldb);
			
 
				-    }
			
 
				-    
			
 
				-    /*  C      - REAL             array of DIMENSION ( LDC, n ).
			
 
				-     *           Before entry, the leading  m by n  part of the array  C must
			
 
				-     *           contain the matrix  C,  except when  beta  is zero, in which
			
 
				-     *           case C need not be set on entry.
			
 
				-     *           On exit, the array  C  is overwritten by the  m by n  matrix
			
 
				-     */
			
 
				-    cublasAlloc ((ldc) * (n), sizeof(devPtrC[0]), (void**)&devPtrC);
			
 
				-    cublasSetMatrix (STARPU_MIN(m,ldc), n, sizeof(C[0]), C, ldc, devPtrC, ldc);
			
 
				-
			
 
				-    cublasSgemm (transa[0], transb[0], m, n, k, alpha, devPtrA, lda, 
			
 
				-                 devPtrB, ldb, beta, devPtrC, ldc);
			
 
				-
			
 
				-    cublasGetMatrix (STARPU_MIN(m,ldc), n, sizeof(C[0]), devPtrC, ldc, C, ldc);
			
 
				-    cublasFree (devPtrA);
			
 
				-    cublasFree (devPtrB);
			
 
				-    cublasFree (devPtrC);
			
 
				-}
			
 
				-
			
 
				-
			
--- a/examples/pastix-wrappers/starpu-blas-wrapper.h
+++ b/examples/pastix-wrappers/starpu-blas-wrapper.h
@@ -1,108 +0,0 @@
 
				-/*
			
 
				- * StarPU
			
 
				- * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#ifndef __STARPU_BLAS_WRAPPER_H__
			
 
				-#define __STARPU_BLAS_WRAPPER_H__
			
 
				-
			
 
				-#include "generated_model.h"
			
 
				-
			
 
				-#define OVERHEAD	150000.0
			
 
				-
			
 
				-static double transfer_time_dtoh(unsigned size)
			
 
				-{
			
 
				-	double latency = 0.0;
			
 
				-	double bandwith = 0.0;
			
 
				-
			
 
				-	return latency + size*bandwith;
			
 
				-}
			
 
				-
			
 
				-static double transfer_time_htod(unsigned size)
			
 
				-{
			
 
				-	double latency = 0.0;
			
 
				-	double bandwith = 0.0;
			
 
				-
			
 
				-	return latency + size*bandwith;
			
 
				-}
			
 
				-
			
 
				-/*GEMM CPU */
			
 
				-
			
 
				-#define PERF_GEMM_CPU(i,j,k) (GEMM_CPU_A*(double)(i)*(double)(j)*(double)(k)+GEMM_CPU_B*(double)(i)*(double)(j)+GEMM_CPU_C*(double)(j)*(double)(k)+GEMM_CPU_D*(double)(i)+GEMM_CPU_E*(double)(j)+GEMM_CPU_F)
			
 
				-
			
 
				-static double starpu_compute_contrib_compact_cpu_cost(starpu_buffer_descr *descr)
			
 
				-{
			
 
				-	unsigned nx0, ny0, ny2;
			
 
				-	nx0 = descr[0].handle->interface->blas.nx;
			
 
				-	ny0 = descr[0].handle->interface->blas.ny;
			
 
				-	ny2 = descr[2].handle->interface->blas.ny;
			
 
				-
			
 
				-	return PERF_GEMM_CPU(nx0-ny0, ny2, ny0); 
			
 
				-}
			
 
				-
			
 
				-
			
 
				-
			
 
				-/*GEMM GPU */
			
 
				-
			
 
				-#define PERF_GEMM_GPU(i,j,k) (GEMM_GPU_A*(double)(i)*(double)(j)*(double)(k)+GEMM_GPU_B*(double)(i)*(double)(j)+GEMM_GPU_C*(double)(j)*(double)(k)+GEMM_GPU_D*(double)(i)+GEMM_GPU_E*(double)(j)+GEMM_GPU_F)
			
 
				-
			
 
				-static double starpu_compute_contrib_compact_cuda_cost(starpu_buffer_descr *descr)
			
 
				-{
			
 
				-	unsigned nx0, ny0, ny2;
			
 
				-	nx0 = descr[0].handle->interface->blas.nx;
			
 
				-	ny0 = descr[0].handle->interface->blas.ny;
			
 
				-	ny2 = descr[2].handle->interface->blas.ny;
			
 
				-
			
 
				-	return PERF_GEMM_GPU(nx0-ny0, ny2, ny0) + OVERHEAD; 
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*TRSM CPU */
			
 
				-
			
 
				-#define PERF_TRSM_GPU(i,j)   (TRSM_GPU_A*(double)(i)*(double)(i)*(double)(j)+TRSM_GPU_B*(double)(i)+TRSM_GPU_C)
			
 
				-
			
 
				-static double starpu_cblk_strsm_cuda_cost(starpu_buffer_descr *descr)
			
 
				-{
			
 
				-	unsigned nx, ny;
			
 
				-	nx = descr[0].handle->interface->blas.nx;
			
 
				-	ny = descr[0].handle->interface->blas.ny;
			
 
				-
			
 
				-	return PERF_TRSM_GPU(nx-ny, ny) + OVERHEAD; 
			
 
				-}
			
 
				-
			
 
				-/*TRSM CPU */
			
 
				-
			
 
				-#define PERF_TRSM_CPU(i,j)   (TRSM_CPU_A*(double)(i)*(double)(i)*(double)(j)+TRSM_CPU_B*(double)(i)+TRSM_CPU_C)
			
 
				-
			
 
				-static double starpu_cblk_strsm_cpu_cost(starpu_buffer_descr *descr)
			
 
				-{
			
 
				-	unsigned nx, ny;
			
 
				-	nx = descr[0].handle->interface->blas.nx;
			
 
				-	ny = descr[0].handle->interface->blas.ny;
			
 
				-
			
 
				-	return PERF_TRSM_CPU(nx-ny, ny); 
			
 
				-}
			
 
				-
			
 
				-void STARPU_INIT(void);
			
 
				-void STARPU_TERMINATE(void);
			
 
				-void STARPU_SGEMM (const char *transa, const char *transb, const int m,
			
 
				-                   const int n, const int k, const float alpha,
			
 
				-                   const float *A, const int lda, const float *B,
			
 
				-                   const int ldb, const float beta, float *C, const int ldc);
			
 
				-void STARPU_STRSM (const char *side, const char *uplo, const char *transa, 
			
 
				-                   const char *diag, const int m, const int n, 
			
 
				-                   const float alpha, const float *A, const int lda,
			
 
				-                   float *B, const int ldb);
			
 
				-
			
 
				-#endif // __STARPU_BLAS_WRAPPER_H__