лет назад: 9 · 0dbf966aef
--- a/AUTHORS
+++ b/AUTHORS
@@ -4,6 +4,7 @@ Olivier Aumage <olivier.aumage@inria.fr>
 
				 William Braik <wbraik@gmail.com>
			
 
				 Berenger Bramas <berenger.bramas@inria.fr>
			
 
				 Alfredo Buttari <alfredo.buttari@enseeiht.fr>
			
 
				+Adrien Cassagne <adrien.cassagne@inria.fr>
			
 
				 Jérôme Clet-Ortega <jerome.clet-ortega@labri.fr>
			
 
				 Nicolas Collin <nicolas.collin@inria.fr>
			
 
				 Ludovic Courtès <ludovic.courtes@inria.fr>
			
--- a/ChangeLog
+++ b/ChangeLog
@@ -22,6 +22,7 @@ New features:
 
				   * New scheduler with heterogeneous priorities
			
 
				   * Support priorities for data transfers.
			
 
				   * Add support for Ayudame version 2.x debugging library.
			
 
				+  * Add support for multiple linear regression performance models
			
 
				 
			
 
				 Small features:
			
 
				   * Scheduling contexts may now be associated a user data pointer at creation
			
@@ -40,6 +41,7 @@ New features:
 
				   * Add starpu_tasks_rec_complete tool to add estimation times in tasks.rec
			
 
				     files.
			
 
				   * Add STARPU_FXT_TRACE environment variable.
			
 
				+  * Add starpu_data_set_user_data and starpu_data_get_user_data.
			
 
				 
			
 
				 StarPU 1.2.0 (svn revision 18521)
			
 
				 ==============================================
			
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,7 +19,14 @@
 
				 ACLOCAL_AMFLAGS=-I m4
			
 
				 CLEANFILES = *.gcno *.gcda *.linkinfo
			
 
				 
			
 
				-SUBDIRS = src
			
 
				+SUBDIRS = 
			
 
				+
			
 
				+if STARPU_USE_MIN_DGELS
			
 
				+SUBDIRS += min-dgels
			
 
				+endif
			
 
				+
			
 
				+SUBDIRS += src
			
 
				+
			
 
				 SUBDIRS += tools
			
 
				 
			
 
				 if BUILD_TESTS
			
--- a/configure.ac
+++ b/configure.ac
@@ -81,6 +81,14 @@ AC_PROG_FC
 
				 AC_CHECK_PROGS(PROG_STAT,gstat stat)
			
 
				 AC_CHECK_PROGS(PROG_DATE,gdate date)
			
 
				 AC_OPENMP
			
 
				+#c++11 detection
			
 
				+AX_CXX_COMPILE_STDCXX(11,noext,optional)
			
 
				+
			
 
				+AC_SUBST([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
			
 
				+AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1])
			
 
				+if test $HAVE_CXX11 -eq 1; then
			
 
				+  AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11])
			
 
				+fi
			
 
				 
			
 
				 if test x$enable_perf_debug = xyes; then
			
 
				     enable_shared=no
			
@@ -581,6 +589,7 @@ AC_DEFUN([STARPU_CHECK_CUDA],
 
				 	    AC_MSG_WARN(['nvcc' not found, disabling CUDA])
			
 
				 	    have_valid_cuda=no
			
 
				 	else
			
 
				+	    # This is for very old cuda, to enable the use of double etc.
			
 
				 	    AC_MSG_CHECKING(whether nvcc supports sm_13 architecture)
			
 
				 	    OLD_NVCCFLAGS="$NVCCFLAGS"
			
 
				 	    NVCCFLAGS="$NVCCFLAGS -arch sm_13"
			
@@ -593,6 +602,21 @@ AC_DEFUN([STARPU_CHECK_CUDA],
 
				 		AC_MSG_RESULT(no)
			
 
				 		NVCCFLAGS="$OLD_NVCCFLAGS"
			
 
				 	    fi
			
 
				+
			
 
				+	    # This is for recent cuda, which complains if we don't actually set an arch!?
			
 
				+	    AC_MSG_CHECKING(whether nvcc supports -Wno-deprecated-gpu-targets)
			
 
				+	    OLD_NVCCFLAGS="$NVCCFLAGS"
			
 
				+	    NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets"
			
 
				+	    echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu
			
 
				+	    $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1
			
 
				+	    if test $? -eq 0
			
 
				+	    then
			
 
				+		AC_MSG_RESULT(yes)
			
 
				+	    else
			
 
				+		AC_MSG_RESULT(no)
			
 
				+		NVCCFLAGS="$OLD_NVCCFLAGS"
			
 
				+	    fi
			
 
				+
			
 
				 	    rm -f cuda_test*
			
 
				 	fi
			
 
				 
			
@@ -1147,6 +1171,46 @@ fi
 
				 
			
 
				 ###############################################################################
			
 
				 #                                                                             #
			
 
				+#			 Multiple linear regression			      #
			
 
				+#                                                                             #
			
 
				+###############################################################################
			
 
				+AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--disable-mlr],
			
 
				+			[Disable multiple linear regression models])],
			
 
				+			enable_mlr=$enableval, enable_mlr=yes)
			
 
				+
			
 
				+AC_MSG_CHECKING(whether multiple linear regression models are disabled)
			
 
				+if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then
			
 
				+   	AC_MSG_RESULT(no)
			
 
				+	install_min_dgels=no
			
 
				+   	STARPU_SEARCH_LIBS(LAPACK,[dgels_],[lapack],use_system_lapack=yes,,)
			
 
				+	if test x$use_system_lapack = xyes; then
			
 
				+	        AC_DEFINE(STARPU_MLR_MODEL, [1], [use reflapack library])
			
 
				+		LDFLAGS="-llapack $LDFLAGS"
			
 
				+	else
			
 
				+		AC_MSG_CHECKING(whether min-dgels is linked)
			
 
				+		if test x"$DGELS_LIBS" != x; then
			
 
				+		   	AC_MSG_RESULT(yes)
			
 
				+        		AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library])
			
 
				+			AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels])
			
 
				+		else
			
 
				+			AC_MSG_RESULT(no)
			
 
				+			AC_MSG_CHECKING(min-dgels source)
			
 
				+			cp -r $srcdir/min-dgels $PWD/
			
 
				+			AC_MSG_RESULT(yes)
			
 
				+			DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/*.a -Wl,--end-group"
			
 
				+			AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library])
			
 
				+			AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels])
			
 
				+			install_min_dgels=yes
			
 
				+		fi
			
 
				+	fi
			
 
				+else
			
 
				+ 	AC_MSG_RESULT(yes)
			
 
				+	install_min_dgels=no
			
 
				+fi
			
 
				+AM_CONDITIONAL(STARPU_USE_MIN_DGELS, test x$install_min_dgels = xyes)
			
 
				+
			
 
				+###############################################################################
			
 
				+#                                                                             #
			
 
				 #                                 MIC settings                                #
			
 
				 #                                                                             #
			
 
				 ###############################################################################
			
@@ -1604,8 +1668,8 @@ if test x$use_fxt = xyes; then
 
				 	LIBS="$save_LIBS"
			
 
				 	save_CFLAGS="$CFLAGS"
			
 
				 	CFLAGS="$CFLAGS $FXT_CFLAGS"
			
 
				-	AC_CHECK_DECLS([enable_fut_flush])
			
 
				-	AC_CHECK_DECLS([fut_set_filename])
			
 
				+	AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include <fut.h>]])
			
 
				+	AC_CHECK_DECLS([fut_set_filename], [], [], [[#include <fut.h>]])
			
 
				 	CFLAGS="$save_CFLAGS"
			
 
				 
			
 
				 	if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then
			
@@ -2296,6 +2360,7 @@ AC_SUBST([pkglibdir])
 
				 AC_ARG_ENABLE(fortran, [AS_HELP_STRING([--disable-fortran],
			
 
				 			[disable build of fortran examples])],
			
 
				 			enable_build_fortran=$enableval, enable_build_fortran=yes)
			
 
				+use_mpi_fort=no
			
 
				 if test "x$FC" != "x"; then
			
 
				 	if $FC --version|grep -q 'GNU Fortran'; then
			
 
				 		gfortran_fc_version=`$FC --version|head -1|sed 's/.*)//;s/^.*\([[0-9]][[0-9]]*\)\.\([[0-9]][[0-9]]*\)\.\([[0-9]][[0-9]]*\).*/\1.\2.\3/'`
			
@@ -2316,7 +2381,15 @@ if test "x$FC" != "x"; then
 
				 				enable_build_fortran="no"
			
 
				 			fi
			
 
				 		else
			
 
				-			AC_MSG_WARN(Fortran compiler has not been tested for StarPU native Fortran support)
			
 
				+			if $FC -qversion 2>&1|grep -q 'IBM XL Fortran'; then
			
 
				+				xlf_fc_version=`$FC -V 2>&1 |tail -1|sed 's/.*Version: //'`
			
 
				+				xlf_maj_version=`echo $xlf_fc_version|cut -d. -f1`
			
 
				+
			
 
				+				AC_MSG_WARN([IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built])
			
 
				+				enable_build_fortran="no"
			
 
				+			else
			
 
				+				AC_MSG_WARN(Fortran compiler has not been tested for StarPU native Fortran support)
			
 
				+			fi
			
 
				 		fi
			
 
				 	fi
			
 
				 	if test "x$enable_build_fortran" = "xyes" ; then
			
@@ -2935,7 +3008,7 @@ AM_CONDITIONAL(BUILD_DOC, [test x$enable_build_doc != xno])
 
				 ###############################################################################
			
 
				 
			
 
				 # these are the flags needed for linking libstarpu (and thus also for static linking)
			
 
				-LIBSTARPU_LDFLAGS="$HWLOC_LIBS $FXT_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAGS $STARPU_RCCE_LDFLAGS $STARPU_LEVELDB_LDFLAGS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LIBS $STARPU_BLAS_LDFLAGS $STARPU_OMP_LDFLAGS"
			
 
				+LIBSTARPU_LDFLAGS="$HWLOC_LIBS $FXT_LIBS $STARPU_COI_LDFLAGS $STARPU_SCIF_LDFLAGS $STARPU_RCCE_LDFLAGS $STARPU_LEVELDB_LDFLAGS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LIBS $STARPU_BLAS_LDFLAGS $STARPU_OMP_LDFLAGS $DGELS_LIBS"
			
 
				 AC_SUBST([LIBSTARPU_LDFLAGS])
			
 
				 
			
 
				 LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la
			
@@ -2965,11 +3038,13 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 
				   chmod +x tools/starpu_paje_draw_histogram
			
 
				   chmod +x tools/starpu_paje_state_stats
			
 
				   chmod +x tools/starpu_paje_summary
			
 
				+  chmod +x tools/starpu_mlr_analysis
			
 
				   chmod +x tools/starpu_paje_sort
			
 
				   chmod +x tools/starpu_smpirun
			
 
				   chmod +x doc/doxygen/doxygen_filter.sh
			
 
				   mkdir -p tests/microbenchs
			
 
				   test -e tests/microbenchs/tasks_size_overhead.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.sh tests/microbenchs/
			
 
				+  test -e tests/microbenchs/tasks_size_overhead_sched.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_sched.sh tests/microbenchs/
			
 
				   test -e tests/microbenchs/tasks_size_overhead_scheds.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead_scheds.sh tests/microbenchs/
			
 
				   test -e tests/microbenchs/tasks_size_overhead.gp || ln -sf $ac_abs_top_srcdir/tests/microbenchs/tasks_size_overhead.gp tests/microbenchs/
			
 
				   test -e tests/microbenchs/microbench.sh || ln -sf $ac_abs_top_srcdir/tests/microbenchs/microbench.sh tests/microbenchs/
			
@@ -3022,6 +3097,7 @@ AC_OUTPUT([
 
				 	tools/starpu_paje_draw_histogram
			
 
				 	tools/starpu_paje_state_stats
			
 
				 	tools/starpu_paje_summary
			
 
				+	tools/starpu_mlr_analysis
			
 
				 	tools/starpu_paje_sort
			
 
				 	tools/starpu_smpirun
			
 
				 	socl/Makefile
			
@@ -3073,6 +3149,7 @@ AC_OUTPUT([
 
				 	doc/doxygen/doxygen-config.cfg
			
 
				 	doc/doxygen/doxygen_filter.sh
			
 
				 	tools/msvc/starpu_var.bat
			
 
				+	min-dgels/Makefile
			
 
				 ])
			
 
				 
			
 
				 AC_MSG_NOTICE([
			
@@ -3112,20 +3189,21 @@ AC_MSG_NOTICE([
 
				         Examples:          $enable_build_examples
			
 
				 
			
 
				 	StarPU Extensions:
			
 
				-	       StarPU MPI enabled:                          $build_mpi_lib
			
 
				-	       MPI test suite:                              $running_mpi_check
			
 
				-	       Master-Slave MPI enabled:                    $use_mpi_master_slave
			
 
				-	       FFT Support:                                 $fft_support
			
 
				-	       GCC plug-in:                                 $build_gcc_plugin
			
 
				-	       GCC plug-in test suite (requires GNU Guile): $run_gcc_plugin_test_suite
			
 
				-	       OpenMP runtime support enabled:              $enable_openmp
			
 
				-	       SOCL enabled:                                $build_socl
			
 
				-               SOCL test suite:                             $run_socl_check
			
 
				-               Scheduler Hypervisor:                        $build_sc_hypervisor
			
 
				-               simgrid enabled:                             $enable_simgrid
			
 
				-               ayudame enabled:                             $ayu_msg
			
 
				-	       Native fortran support:                      $enable_build_fortran
			
 
				-	       Native MPI fortran support:                  $use_mpi_fort
			
 
				+	       StarPU MPI enabled:                            $build_mpi_lib
			
 
				+	       MPI test suite:                                $running_mpi_check
			
 
				+	       Master-Slave MPI enabled:                      $use_mpi_master_slave
			
 
				+	       FFT Support:                                   $fft_support
			
 
				+	       GCC plug-in:                                   $build_gcc_plugin
			
 
				+	       GCC plug-in test suite (requires GNU Guile):   $run_gcc_plugin_test_suite
			
 
				+	       OpenMP runtime support enabled:                $enable_openmp
			
 
				+	       SOCL enabled:                                  $build_socl
			
 
				+               SOCL test suite:                               $run_socl_check
			
 
				+               Scheduler Hypervisor:                          $build_sc_hypervisor
			
 
				+               simgrid enabled:                               $enable_simgrid
			
 
				+               ayudame enabled:                               $ayu_msg
			
 
				+	       Native fortran support:                        $enable_build_fortran
			
 
				+	       Native MPI fortran support:                    $use_mpi_fort
			
 
				+	       Support for multiple linear regression models: $install_min_dgels
			
 
				 ])
			
 
				 
			
 
				 if test "$build_socl" = "yes" -a "$run_socl_check" = "no" ; then
			
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -162,6 +162,7 @@ chapters/version.html: $(chapters)
 
				 		if test -f $$f ; then $(RM) $$f ; fi ;\
			
 
				 	done
			
 
				 
			
 
				+if BUILD_DOC
			
 
				 EXTRA_DIST	= 					\
			
 
				 	$(chapters) 					\
			
 
				 	chapters/version.sty				\
			
@@ -209,6 +210,7 @@ EXTRA_DIST	= 					\
 
				 	doxygen.cfg 					\
			
 
				 	refman.tex					\
			
 
				 	$(DOX_HTML_DIR)
			
 
				+endif
			
 
				 
			
 
				 dox_inputs = $(DOX_CONFIG) 				\
			
 
				 	$(chapters) 					\
			
@@ -277,7 +279,7 @@ $(DOX_TAG): $(dox_inputs)
 
				 	@cat $(top_srcdir)/doc/doxygen/refman.tex >> $(DOX_LATEX_DIR)/refman.tex
			
 
				 
			
 
				 if BUILD_DOC
			
 
				-dist_pdf_DATA = $(DOX_PDF)
			
 
				+EXTRA_DIST += $(DOX_PDF)
			
 
				 
			
 
				 $(DOX_PDF): $(DOX_TAG) refman.tex
			
 
				 	@cp $(top_srcdir)/doc/doxygen/chapters/version.sty $(DOX_LATEX_DIR)
			
--- a/doc/doxygen/chapters/370_online_performance_tools.doxy
+++ b/doc/doxygen/chapters/370_online_performance_tools.doxy
@@ -2,7 +2,7 @@
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
			
 
				- * Copyright (C) 2011, 2012 INRIA
			
 
				+ * Copyright (C) 2011, 2012, 2016 INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
 
				 
			
@@ -397,6 +397,48 @@ performance model to perform scheduling, without using regression.
 
				 </li>
			
 
				 
			
 
				 <li>
			
 
				+
			
 
				+Another type of model is ::STARPU_MULTIPLE_REGRESSION_BASED, which
			
 
				+is based on multiple linear regression. In this model, the user
			
 
				+defines both the relevant parameters and the equation for computing the
			
 
				+task duration.
			
 
				+
			
 
				+
			
 
				+\f[
			
 
				+T_{kernel} = a + b(M^{\alpha_1} * N^{\beta_1} * K^{\gamma_1}) + c(M^{\alpha_2} * N^{\beta_2} * K^{\gamma_2}) + ...
			
 
				+\f]
			
 
				+
			
 
				+
			
 
				+\f$M, N, K\f$ are the parameters of the task, added at the task
			
 
				+creation. These need to be extracted by the <c>cl_perf_func</c>
			
 
				+function, which should be defined by the user. \f$\alpha, \beta,
			
 
				+\gamma\f$ are the exponents defined by the user in
			
 
				+<c>model->combinations</c> table. Finally, coefficients \f$a, b, c\f$
			
 
				+are computed automatically by the StarPU at the end of the execution, using least
			
 
				+squares method of the <c>dgels_</c> LAPACK function.
			
 
				+
			
 
				+<c>examples/mlr/mlr.c</c> example provides more details on
			
 
				+the usage of ::STARPU_MULTIPLE_REGRESSION_BASED models.
			
 
				+
			
 
				+Coefficients computation is done at the end of the execution, and the
			
 
				+results are stored in standard codelet perfmodel files. Additional
			
 
				+files containing the duration of task together with the value of each
			
 
				+parameter are stored in <c>.starpu/sampling/codelets/tmp/</c>
			
 
				+directory. These files are reused when \ref STARPU_CALIBRATE
			
 
				+environment variable is set to <c>1</c>, to recompute coefficients
			
 
				+based on the current, but also on the previous
			
 
				+executions. Additionally, when multiple linear regression models are
			
 
				+disabled (using "--disable-mlr" configuration option) or when the
			
 
				+<c>model->combinations</c> are not defined, StarPU will still write
			
 
				+output files into <c>.starpu/sampling/codelets/tmp/</c> to allow
			
 
				+performing an analysis. This analysis typically aims at finding the
			
 
				+most appropriate equation for the codelet and
			
 
				+<c>tools/starpu_mlr_analysis</c> script provides an example of how to
			
 
				+perform such study.
			
 
				+
			
 
				+</li>
			
 
				+
			
 
				+<li>
			
 
				 Provided as an estimation from the application itself (model type
			
 
				 ::STARPU_COMMON and field starpu_perfmodel::cost_function),
			
 
				 see for instance
			
--- a/doc/doxygen/chapters/390_faq.doxy
+++ b/doc/doxygen/chapters/390_faq.doxy
@@ -226,4 +226,100 @@ starpu_resume();
 
				 starpu_shutdown();
			
 
				 \endcode
			
 
				 
			
 
				+\section CUDADrivers StarPU does not see my CUDA device
			
 
				+
			
 
				+First make sure that CUDA is properly running outside StarPU: build and
			
 
				+run the following program with -lcudart:
			
 
				+
			
 
				+\code{.c}
			
 
				+#include <stdio.h>
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime.h>
			
 
				+
			
 
				+int main(void) {
			
 
				+	int n, i, version;
			
 
				+	cudaError_t err;
			
 
				+
			
 
				+	err = cudaGetDeviceCount(&n);
			
 
				+	if (err)
			
 
				+	{
			
 
				+		fprintf(stderr,"cuda error %d\n", err);
			
 
				+		exit(1);
			
 
				+	}
			
 
				+	cudaDriverGetVersion(&version);
			
 
				+	printf("driver version %d\n", version);
			
 
				+	cudaRuntimeGetVersion(&version);
			
 
				+	printf("runtime version %d\n", version);
			
 
				+	printf("\n");
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		struct cudaDeviceProp props;
			
 
				+		printf("CUDA%d\n", i);
			
 
				+		err = cudaGetDeviceProperties(&props, i);
			
 
				+		if (err)
			
 
				+		{
			
 
				+			fprintf(stderr,"cuda error %d\n", err);
			
 
				+			continue;
			
 
				+		}
			
 
				+		printf("%s\n", props.name);
			
 
				+		printf("%0.3f GB\n", (float) props.totalGlobalMem / (1<<30));
			
 
				+		printf("%u MP\n", props.multiProcessorCount);
			
 
				+		printf("\n");
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+If that program does not find your device, the problem is not at the StarPU
			
 
				+level, but the CUDA drivers, check the documentation of your CUDA
			
 
				+setup.
			
 
				+
			
 
				+\section OpenCLDrivers StarPU does not see my OpenCL device
			
 
				+
			
 
				+First make sure that OpenCL is properly running outside StarPU: build and
			
 
				+run the following program with -lOpenCL:
			
 
				+
			
 
				+\code{.c}
			
 
				+#include <CL/cl.h>
			
 
				+#include <stdio.h>
			
 
				+#include <assert.h>
			
 
				+
			
 
				+int main(void) {
			
 
				+    cl_device_id did[16];
			
 
				+    cl_int err;
			
 
				+    cl_platform_id pid, pids[16];
			
 
				+    cl_uint nbplat, nb;
			
 
				+    char buf[128];
			
 
				+    size_t size;
			
 
				+    int i, j;
			
 
				+
			
 
				+    err = clGetPlatformIDs(sizeof(pids)/sizeof(pids[0]), pids, &nbplat);
			
 
				+    assert(err == CL_SUCCESS);
			
 
				+    printf("%u platforms\n", nbplat);
			
 
				+    for (j = 0; j < nbplat; j++) {
			
 
				+        pid = pids[j];
			
 
				+        printf("    platform %d\n", j);
			
 
				+        err = clGetPlatformInfo(pid, CL_PLATFORM_VERSION, sizeof(buf)-1, buf, &size);
			
 
				+        assert(err == CL_SUCCESS);
			
 
				+        buf[size] = 0;
			
 
				+        printf("        platform version %s\n", buf);
			
 
				+
			
 
				+        err = clGetDeviceIDs(pid, CL_DEVICE_TYPE_ALL, sizeof(did)/sizeof(did[0]), did, &nb);
			
 
				+        assert(err == CL_SUCCESS);
			
 
				+        printf("%d devices\n", nb);
			
 
				+        for (i = 0; i < nb; i++) {
			
 
				+            err = clGetDeviceInfo(did[i], CL_DEVICE_VERSION, sizeof(buf)-1, buf, &size);
			
 
				+            buf[size] = 0;
			
 
				+            printf("    device %d version %s\n", i, buf);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+\endcode
			
 
				+
			
 
				+If that program does not find your device, the problem is not at the StarPU
			
 
				+level, but the OpenCL drivers, check the documentation of your OpenCL
			
 
				+implementation.
			
 
				+
			
 
				 */
			
--- a/doc/doxygen/chapters/450_native_fortran_support.doxy
+++ b/doc/doxygen/chapters/450_native_fortran_support.doxy
@@ -30,6 +30,9 @@ standard. It has currently been tested successfully with GNU GFortran 4.9,
 
				 GFortran 5.x, GFortran 6.x and the Intel Fortran Compiler >= 2016. It is known
			
 
				 not to work with GNU GFortran < 4.9, Intel Fortran Compiler < 2016.
			
 
				 
			
 
				+See Section \ref OldFortran on information on how to write StarPU
			
 
				+Fortran code with older compilers.
			
 
				+
			
 
				 \subsection Configuration Configuration
			
 
				 
			
 
				 The Native Fortran API is enabled and its companion
			
@@ -209,6 +212,14 @@ with StarPU using C marshalling functions as exemplified in StarPU's
 
				 <c>examples/fortran/</c> and <c>examples/fortran90/</c> example
			
 
				 directories, though the process will be less convenient.
			
 
				 
			
 
				+Basically, the main FORTRAN code calls some C wrapper functions to
			
 
				+submit tasks to StarPU. Then, when StarPU starts a task, another C
			
 
				+wrapper function calls the FORTRAN routine for the task.
			
 
				+
			
 
				+Note that this marshalled FORTRAN support remains available even
			
 
				+when specifying configure option \ref disable-fortran "--disable-fortran"
			
 
				+(which only disables StarPU's native Fortran layer).
			
 
				+
			
 
				 \subsection APIMIX Valid API Mixes and Language Mixes
			
 
				 
			
 
				 Mixing uses of
			
--- a/doc/doxygen/chapters/470_simgrid.doxy
+++ b/doc/doxygen/chapters/470_simgrid.doxy
@@ -61,7 +61,8 @@ run several times before the model is calibrated.
 
				 \section Simulation Simulation
			
 
				 
			
 
				 Then, recompile StarPU, passing \ref enable-simgrid "--enable-simgrid"
			
 
				-to <c>./configure</c>.
			
 
				+to <c>./configure</c>. Make sure to keep all other <c>./configure</c> options
			
 
				+the same, and notably options such as <c>--enable-maxcudadev</c>.
			
 
				 
			
 
				 \verbatim
			
 
				 $ ./configure --enable-simgrid
			
--- a/doc/doxygen/chapters/api/data_management.doxy
+++ b/doc/doxygen/chapters/api/data_management.doxy
@@ -243,6 +243,17 @@ done with the codelet \p redux_cl.
 
				 \ingroup API_Data_Management
			
 
				 todo
			
 
				 
			
 
				+\fn void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data)
			
 
				+\ingroup API_Data_Management
			
 
				+This sets the "user_data" field for the \p handle to \p user_data . It can
			
 
				+then be retrieved with starpu_data_get_user_data. \p user_data can be any
			
 
				+application-defined value, for instance a pointer to an object-oriented
			
 
				+container for the data.
			
 
				+
			
 
				+\fn void *starpu_data_get_user_data(starpu_data_handle_t handle)
			
 
				+\ingroup API_Data_Management
			
 
				+This retrieves the "user_data" field previously set for the \p handle .
			
 
				+
			
 
				 @name Access registered data from the application
			
 
				 \ingroup API_Data_Management
			
 
				 
			
@@ -291,22 +302,39 @@ Similarly to starpu_data_acquire_cb(), this function is
 
				 non-blocking and may be called from task callbacks. Upon successful
			
 
				 completion, this function returns 0.
			
 
				 
			
 
				+\def STARPU_ACQUIRE_NO_NODE
			
 
				+\ingroup API_Data_Management
			
 
				+This macro can be used to acquire data, but not require it to be available on a given node, only enforce R/W dependencies.
			
 
				+This can for instance be used to wait for tasks which produce the data, but without requesting a fetch to the main memory.
			
 
				+
			
 
				+\def STARPU_ACQUIRE_ALL_NODES
			
 
				+\ingroup API_Data_Management
			
 
				+This is the same as STARPU_ACQUIRE_NO_NODE, but will lock the data on all nodes, preventing them from being evicted for instance.
			
 
				+This is mostly useful inside starpu only.
			
 
				+
			
 
				 \fn int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode)
			
 
				 \ingroup API_Data_Management
			
 
				 This is the same as starpu_data_acquire(), except that the data
			
 
				-will be available on the given memory node instead of main memory.
			
 
				+will be available on the given memory node instead of main
			
 
				+memory.
			
 
				+STARPU_ACQUIRE_NO_NODE and STARPU_ACQUIRE_ALL_NODES can be used instead of an
			
 
				+explicit node number.
			
 
				 
			
 
				 \fn int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
			
 
				 \ingroup API_Data_Management
			
 
				 This is the same as starpu_data_acquire_cb(), except that the
			
 
				 data will be available on the given memory node instead of main
			
 
				 memory.
			
 
				+STARPU_ACQUIRE_NO_NODE and STARPU_ACQUIRE_ALL_NODES can be used instead of an
			
 
				+explicit node number.
			
 
				 
			
 
				 \fn int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 \ingroup API_Data_Management
			
 
				 This is the same as starpu_data_acquire_cb_sequential_consistency(), except that the
			
 
				 data will be available on the given memory node instead of main
			
 
				 memory.
			
 
				+STARPU_ACQUIRE_NO_NODE and STARPU_ACQUIRE_ALL_NODES can be used instead of an
			
 
				+explicit node number.
			
 
				 
			
 
				 \def STARPU_DATA_ACQUIRE_CB(handle, mode, code)
			
 
				 \ingroup API_Data_Management
			
@@ -327,6 +355,7 @@ starpu_data_acquire_cb().
 
				 \ingroup API_Data_Management
			
 
				 This is the same as starpu_data_release(), except that the data
			
 
				 will be available on the given memory \p node instead of main memory.
			
 
				+The \p node parameter must be exactly the same as the corresponding starpu_data_acquire_on_node* call.
			
 
				 
			
 
				 \fn starpu_arbiter_t starpu_arbiter_create(void)
			
 
				 \ingroup API_Data_Management
			
--- a/doc/doxygen/chapters/api/performance_model.doxy
+++ b/doc/doxygen/chapters/api/performance_model.doxy
@@ -2,7 +2,7 @@
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				- * Copyright (C) 2011, 2012 INRIA
			
 
				+ * Copyright (C) 2011, 2012, 2016 INRIA
			
 
				  * See the file version.doxy for copying conditions.
			
 
				  */
			
 
				 
			
@@ -26,6 +26,9 @@ Automatic linear regression-based cost model  (alpha * size ^ beta)
 
				 \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
			
 
				 \ingroup API_Performance_Model
			
 
				 Automatic non-linear regression-based cost model (a * size ^ b + c)
			
 
				+\var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
			
 
				+\ingroup API_Performance_Model
			
 
				+Automatic multiple linear regression-based cost model. Application provides parameters, their combinations and exponents
			
 
				 
			
 
				 \struct starpu_perfmodel_device
			
 
				 todo
			
@@ -60,6 +63,8 @@ is the type of performance model
 
				 ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided,
			
 
				 this is purely history-based.
			
 
				 </li>
			
 
				+<li> ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields starpu_perfmodel::nparameters (number of different parameters),  starpu_perfmodel::ncombinations (number of parameters combinations-tuples) and table starpu_perfmodel::combinations which defines exponents of the equation. Function cl_perf_func also needs to define how to extract parameters from the task.
			
 
				+</li>
			
 
				 <li> ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be
			
 
				 filled with a function that returns the cost in micro-seconds on the arch given
			
 
				 as parameter, or field starpu_perfmodel::per_arch has to be
			
@@ -99,6 +104,18 @@ Whether the performance model is already loaded from the disk.
 
				 todo
			
 
				 \var starpu_perfmodel_state_t starpu_perfmodel::state
			
 
				 \private
			
 
				+\var const char ** starpu_perfmodel::parameters_names
			
 
				+\private
			
 
				+Names of parameters used for multiple linear regression models (M, N, K)
			
 
				+\var unsigned starpu_perfmodel::nparameters
			
 
				+\private
			
 
				+Number of parameters used for multiple linear regression models
			
 
				+\var unsigned ** starpu_perfmodel::combinations
			
 
				+\private
			
 
				+Table of combinations of parameters (and the exponents) used for multiple linear regression models
			
 
				+\var unsigned starpu_perfmodel::ncombinations
			
 
				+\private
			
 
				+Number of combination of parameters used for multiple linear regression models
			
 
				 
			
 
				 
			
 
				 \struct starpu_perfmodel_regression_model
			
@@ -132,6 +149,12 @@ estimated = a size ^b + c
 
				 whether the non-linear regression model is valid (i.e. enough measures)
			
 
				 \var unsigned starpu_perfmodel_regression_model::nsample
			
 
				 number of sample values for non-linear regression
			
 
				+\var double starpu_perfmodel_regression_model::coeff[]
			
 
				+list of computed coefficients for multiple linear regression model
			
 
				+\var double starpu_perfmodel_regression_model::ncoeff
			
 
				+number of coefficients for multiple linear regression model
			
 
				+\var double starpu_perfmodel_regression_model::multi_valid
			
 
				+whether the multiple linear regression model is valid
			
 
				 
			
 
				 \struct starpu_perfmodel_per_arch
			
 
				 contains information about the performance model of a given
			
@@ -150,12 +173,12 @@ depends on the architecture-specific implementation.
 
				 The history of performance measurements.
			
 
				 \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
			
 
				 \private
			
 
				-Used by ::STARPU_HISTORY_BASED and ::STARPU_NL_REGRESSION_BASED,
			
 
				+Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED,
			
 
				 records all execution history measures.
			
 
				 \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
			
 
				 \private
			
 
				-Used by ::STARPU_REGRESSION_BASED and
			
 
				-::STARPU_NL_REGRESSION_BASED, contains the estimated factors of the
			
 
				+Used by ::STARPU_REGRESSION_BASED, 
			
 
				+::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated factors of the
			
 
				 regression.
			
 
				 
			
 
				 \struct starpu_perfmodel_history_list
			
--- a/doc/tutorial/hello_world.c
+++ b/doc/tutorial/hello_world.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2011  Université de Bordeaux
			
 
				- * Copyright (C) 2010-2011, 2013  CNRS
			
 
				+ * Copyright (C) 2010-2011, 2013, 2016  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -38,7 +38,7 @@ struct starpu_codelet cl =
 
				 
			
 
				 void callback_func(void *callback_arg)
			
 
				 {
			
 
				-	printf("Callback function (arg %x)\n", callback_arg);
			
 
				+	printf("Callback function (arg %p)\n", callback_arg);
			
 
				 }
			
 
				 
			
 
				 int main(int argc, char **argv)
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -199,7 +199,9 @@ STARPU_EXAMPLES +=				\
 
				 	basic_examples/variable			\
			
 
				 	basic_examples/multiformat              \
			
 
				 	basic_examples/dynamic_handles          \
			
 
				+	mlr/mlr					\
			
 
				 	cpp/incrementer_cpp			\
			
 
				+	cpp/add_vectors				\
			
 
				 	filters/fvector				\
			
 
				 	filters/fblock				\
			
 
				 	filters/fmatrix				\
			
@@ -228,12 +230,18 @@ STARPU_EXAMPLES +=				\
 
				 	worker_collections/worker_tree_example  \
			
 
				 	reductions/dot_product			\
			
 
				 	reductions/minmax_reduction
			
 
				+
			
 
				 endif
			
 
				 
			
 
				 if !STARPU_SIMGRID
			
 
				 STARPU_EXAMPLES +=				\
			
 
				 	scheduler/dummy_sched
			
 
				 
			
 
				+if STARPU_HAVE_CXX11
			
 
				+STARPU_EXAMPLES +=	\
			
 
				+	cpp/add_vectors_cpp11
			
 
				+endif
			
 
				+
			
 
				 if STARPU_HAVE_F77
			
 
				 if STARPU_HAVE_F77_H
			
 
				 STARPU_EXAMPLES +=				\
			
@@ -837,6 +845,18 @@ cpp_incrementer_cpp_SOURCES += \
 
				 	incrementer/incrementer_kernels_opencl.c
			
 
				 endif
			
 
				 
			
 
				+###########################
			
 
				+# C++ Add vectors example #
			
 
				+###########################
			
 
				+
			
 
				+cpp_add_vectors_SOURCES	=	\
			
 
				+	cpp/add_vectors.cpp
			
 
				+	
			
 
				+if STARPU_HAVE_CXX11
			
 
				+cpp_add_vectors_cpp11_SOURCES	=	\
			
 
				+	cpp/add_vectors_cpp11.cpp
			
 
				+endif
			
 
				+
			
 
				 #######################
			
 
				 # Incrementer example #
			
 
				 #######################
			
--- a/examples/cpp/Makefile.add_vectors
+++ b/examples/cpp/Makefile.add_vectors
@@ -0,0 +1,37 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2016  Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+
			
 
				+PROG = add_vectors
			
 
				+
			
 
				+SRCCXX = add_vectors.cpp
			
 
				+
			
 
				+CC = g++
			
 
				+
			
 
				+CFLAGS = -g -DPRINT_OUTPUT $(shell pkg-config --cflags starpu-1.3)
			
 
				+LDLIBS =  $(shell pkg-config --libs starpu-1.3)
			
 
				+
			
 
				+OBJS = $(SRCCXX:%.cpp=%.o)
			
 
				+
			
 
				+.phony: all clean
			
 
				+all: $(PROG)
			
 
				+
			
 
				+$(PROG): $(OBJS)
			
 
				+	$(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS)
			
 
				+
			
 
				+%.o: %.cpp
			
 
				+	$(CC) $(CFLAGS) -c -o $@ $<
			
 
				+
			
 
				+clean:
			
 
				+	rm -fv *.o $(PROG)
			
--- a/examples/cpp/Makefile.add_vectors_cpp11
+++ b/examples/cpp/Makefile.add_vectors_cpp11
@@ -0,0 +1,37 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2016  Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+
			
 
				+PROG = add_vectors_cpp11
			
 
				+
			
 
				+SRCCXX = add_vectors_cpp11.cpp
			
 
				+
			
 
				+CC = g++
			
 
				+
			
 
				+CFLAGS = -g -std=c++11 -DPRINT_OUTPUT $(shell pkg-config --cflags starpu-1.3)
			
 
				+LDLIBS =  $(shell pkg-config --libs starpu-1.3)
			
 
				+
			
 
				+OBJS = $(SRCCXX:%.cpp=%.o)
			
 
				+
			
 
				+.phony: all clean
			
 
				+all: $(PROG)
			
 
				+
			
 
				+$(PROG): $(OBJS)
			
 
				+	$(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS)
			
 
				+
			
 
				+%.o: %.cpp
			
 
				+	$(CC) $(CFLAGS) -c -o $@ $<
			
 
				+
			
 
				+clean:
			
 
				+	rm -fv *.o $(PROG)
			
--- a/examples/cpp/add_vectors.cpp
+++ b/examples/cpp/add_vectors.cpp
@@ -0,0 +1,148 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This is a small example of a C++ program using starpu.  We here just
			
 
				+ * add two std::vector without copying them (0 copy).
			
 
				+ */
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <vector>
			
 
				+
			
 
				+#ifdef PRINT_OUTPUT
			
 
				+#include <iostream>
			
 
				+#endif
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+void cpu_kernel_add_vectors(void *buffers[], void *cl_arg)
			
 
				+{
			
 
				+	// get the current task
			
 
				+	starpu_task* task = starpu_task_get_current();
			
 
				+
			
 
				+	// get the user data (pointers to the vec_A, vec_B, vec_C std::vector)
			
 
				+	void* u_data0 = starpu_data_get_user_data(task->handles[0]); assert(u_data0);
			
 
				+	void* u_data1 = starpu_data_get_user_data(task->handles[1]); assert(u_data1);
			
 
				+	void* u_data2 = starpu_data_get_user_data(task->handles[2]); assert(u_data2);
			
 
				+
			
 
				+	// cast void* in std::vector<char>*
			
 
				+	std::vector<char>* vec_A = static_cast<std::vector<char>*>(u_data0);
			
 
				+	std::vector<char>* vec_B = static_cast<std::vector<char>*>(u_data1);
			
 
				+	std::vector<char>* vec_C = static_cast<std::vector<char>*>(u_data2);
			
 
				+
			
 
				+	// all the std::vector have to have the same size
			
 
				+	assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size());
			
 
				+
			
 
				+	// performs the vector addition (vec_C[] = vec_A[] + vec_B[])
			
 
				+	for (size_t i = 0; i < vec_C->size(); i++)
			
 
				+		(*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i];
			
 
				+}
			
 
				+
			
 
				+#define VEC_SIZE 1024
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	std::vector<char> vec_A(VEC_SIZE, 2); // all the vector is initialized to 2
			
 
				+	std::vector<char> vec_B(VEC_SIZE, 3); // all the vector is initialized to 3
			
 
				+	std::vector<char> vec_C(VEC_SIZE, 0); // all the vector is initialized to 0
			
 
				+
			
 
				+	// initialize StarPU with default configuration
			
 
				+	int ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return 77;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	// StarPU data registering
			
 
				+	starpu_data_handle_t spu_vec_A;
			
 
				+	starpu_data_handle_t spu_vec_B;
			
 
				+	starpu_data_handle_t spu_vec_C;
			
 
				+
			
 
				+	// give the data of the vector to StarPU (C array)
			
 
				+	starpu_vector_data_register(&spu_vec_A, STARPU_MAIN_RAM, (uintptr_t)&vec_A[0], vec_A.size(), sizeof(char));
			
 
				+	starpu_vector_data_register(&spu_vec_B, STARPU_MAIN_RAM, (uintptr_t)&vec_B[0], vec_B.size(), sizeof(char));
			
 
				+	starpu_vector_data_register(&spu_vec_C, STARPU_MAIN_RAM, (uintptr_t)&vec_C[0], vec_C.size(), sizeof(char));
			
 
				+
			
 
				+	// pass the pointer to the C++ vector object to StarPU
			
 
				+	starpu_data_set_user_data(spu_vec_A, (void*)&vec_A);
			
 
				+	starpu_data_set_user_data(spu_vec_B, (void*)&vec_B);
			
 
				+	starpu_data_set_user_data(spu_vec_C, (void*)&vec_C);
			
 
				+
			
 
				+	// create the StarPU codelet
			
 
				+	starpu_codelet cl;
			
 
				+	starpu_codelet_init(&cl);
			
 
				+	cl.cpu_funcs     [0] = cpu_kernel_add_vectors;
			
 
				+	cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors";
			
 
				+	cl.nbuffers          = 3;
			
 
				+	cl.modes         [0] = STARPU_R;
			
 
				+	cl.modes         [1] = STARPU_R;
			
 
				+	cl.modes         [2] = STARPU_W;
			
 
				+	cl.name              = "add_vectors";
			
 
				+
			
 
				+	// submit a new StarPU task to execute
			
 
				+	ret = starpu_task_insert(&cl,
			
 
				+	                         STARPU_R, spu_vec_A,
			
 
				+	                         STARPU_R, spu_vec_B,
			
 
				+	                         STARPU_W, spu_vec_C,
			
 
				+	                         0);
			
 
				+	if (ret == -ENODEV)
			
 
				+	{
			
 
				+		// StarPU data unregistering
			
 
				+		starpu_data_unregister(spu_vec_C);
			
 
				+		starpu_data_unregister(spu_vec_B);
			
 
				+		starpu_data_unregister(spu_vec_A);
			
 
				+
			
 
				+		// terminate StarPU, no task can be submitted after
			
 
				+		starpu_shutdown();
			
 
				+
			
 
				+		return 77;
			
 
				+	}
			
 
				+
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors");
			
 
				+
			
 
				+	// wait the task
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	// StarPU data unregistering
			
 
				+	starpu_data_unregister(spu_vec_C);
			
 
				+	starpu_data_unregister(spu_vec_B);
			
 
				+	starpu_data_unregister(spu_vec_A);
			
 
				+
			
 
				+	// terminate StarPU, no task can be submitted after
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	// check results
			
 
				+	bool fail = false;
			
 
				+	int i = 0;
			
 
				+	while (!fail && i < VEC_SIZE)
			
 
				+		fail = vec_C[i++] != 5;
			
 
				+
			
 
				+	if (fail)
			
 
				+	{
			
 
				+#ifdef PRINT_OUTPUT
			
 
				+		std::cout << "Example failed..." << std::endl;
			
 
				+#endif
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+#ifdef PRINT_OUTPUT
			
 
				+		std::cout << "Example successfully passed!" << std::endl;
			
 
				+#endif
			
 
				+		return EXIT_SUCCESS;
			
 
				+	}
			
 
				+}
			
--- a/examples/cpp/add_vectors_cpp11.cpp
+++ b/examples/cpp/add_vectors_cpp11.cpp
@@ -0,0 +1,155 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009, 2010-2011, 2013-2015  Université de Bordeaux
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
			
 
				+ * Copyright (C) 2012 INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This is a small example of a C++ program using starpu.  We here just
			
 
				+ * add two std::vector without copying them (0 copy).
			
 
				+ */
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <vector>
			
 
				+
			
 
				+#ifdef PRINT_OUTPUT
			
 
				+#include <iostream>
			
 
				+#endif
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#if !defined(STARPU_HAVE_CXX11)
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
 
				+#else
			
 
				+void cpu_kernel_add_vectors(void *buffers[], void *cl_arg)
			
 
				+{
			
 
				+	// get the current task
			
 
				+	auto task = starpu_task_get_current();
			
 
				+
			
 
				+	// get the user data (pointers to the vec_A, vec_B, vec_C std::vector)
			
 
				+	auto u_data0 = starpu_data_get_user_data(task->handles[0]); assert(u_data0);
			
 
				+	auto u_data1 = starpu_data_get_user_data(task->handles[1]); assert(u_data1);
			
 
				+	auto u_data2 = starpu_data_get_user_data(task->handles[2]); assert(u_data2);
			
 
				+
			
 
				+	// cast void* in std::vector<char>*
			
 
				+	auto vec_A = static_cast<std::vector<char>*>(u_data0);
			
 
				+	auto vec_B = static_cast<std::vector<char>*>(u_data1);
			
 
				+	auto vec_C = static_cast<std::vector<char>*>(u_data2);
			
 
				+
			
 
				+	// all the std::vector have to have the same size
			
 
				+	assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size());
			
 
				+
			
 
				+	// performs the vector addition (vec_C[] = vec_A[] + vec_B[])
			
 
				+	for (size_t i = 0; i < vec_C->size(); i++)
			
 
				+		(*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i];
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	constexpr int vec_size = 1024;
			
 
				+
			
 
				+	std::vector<char> vec_A(vec_size, 2); // all the vector is initialized to 2
			
 
				+	std::vector<char> vec_B(vec_size, 3); // all the vector is initialized to 3
			
 
				+	std::vector<char> vec_C(vec_size, 0); // all the vector is initialized to 0
			
 
				+
			
 
				+	// initialize StarPU with default configuration
			
 
				+	auto ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return 77;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	// StarPU data registering
			
 
				+	starpu_data_handle_t spu_vec_A;
			
 
				+	starpu_data_handle_t spu_vec_B;
			
 
				+	starpu_data_handle_t spu_vec_C;
			
 
				+
			
 
				+	// give the data of the vector to StarPU (C array)
			
 
				+	starpu_vector_data_register(&spu_vec_A, STARPU_MAIN_RAM, (uintptr_t)vec_A.data(), vec_A.size(), sizeof(char));
			
 
				+	starpu_vector_data_register(&spu_vec_B, STARPU_MAIN_RAM, (uintptr_t)vec_B.data(), vec_B.size(), sizeof(char));
			
 
				+	starpu_vector_data_register(&spu_vec_C, STARPU_MAIN_RAM, (uintptr_t)vec_C.data(), vec_C.size(), sizeof(char));
			
 
				+
			
 
				+	// pass the pointer to the C++ vector object to StarPU
			
 
				+	starpu_data_set_user_data(spu_vec_A, (void*)&vec_A);
			
 
				+	starpu_data_set_user_data(spu_vec_B, (void*)&vec_B);
			
 
				+	starpu_data_set_user_data(spu_vec_C, (void*)&vec_C);
			
 
				+
			
 
				+	// create the StarPU codelet
			
 
				+	starpu_codelet cl;
			
 
				+	starpu_codelet_init(&cl);
			
 
				+	cl.cpu_funcs     [0] = cpu_kernel_add_vectors;
			
 
				+	cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors";
			
 
				+	cl.nbuffers          = 3;
			
 
				+	cl.modes         [0] = STARPU_R;
			
 
				+	cl.modes         [1] = STARPU_R;
			
 
				+	cl.modes         [2] = STARPU_W;
			
 
				+	cl.name              = "add_vectors";
			
 
				+
			
 
				+	// submit a new StarPU task to execute
			
 
				+	ret = starpu_task_insert(&cl,
			
 
				+	                         STARPU_R, spu_vec_A,
			
 
				+	                         STARPU_R, spu_vec_B,
			
 
				+	                         STARPU_W, spu_vec_C,
			
 
				+	                         0);
			
 
				+
			
 
				+	if (ret == -ENODEV)
			
 
				+	{
			
 
				+		// StarPU data unregistering
			
 
				+		starpu_data_unregister(spu_vec_C);
			
 
				+		starpu_data_unregister(spu_vec_B);
			
 
				+		starpu_data_unregister(spu_vec_A);
			
 
				+
			
 
				+		// terminate StarPU, no task can be submitted after
			
 
				+		starpu_shutdown();
			
 
				+
			
 
				+		return 77;
			
 
				+	}
			
 
				+
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors");
			
 
				+
			
 
				+	// wait the task
			
 
				+	starpu_task_wait_for_all();
			
 
				+
			
 
				+	// StarPU data unregistering
			
 
				+	starpu_data_unregister(spu_vec_C);
			
 
				+	starpu_data_unregister(spu_vec_B);
			
 
				+	starpu_data_unregister(spu_vec_A);
			
 
				+
			
 
				+	// terminate StarPU, no task can be submitted after
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	// check results
			
 
				+	auto fail = false;
			
 
				+	auto i = 0;
			
 
				+	while (!fail && i < vec_size)
			
 
				+		fail = vec_C[i++] != 5;
			
 
				+
			
 
				+	if (fail)
			
 
				+	{
			
 
				+#ifdef PRINT_OUTPUT
			
 
				+		std::cout << "Example failed..." << std::endl;
			
 
				+#endif
			
 
				+		return EXIT_FAILURE;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+#ifdef PRINT_OUTPUT
			
 
				+		std::cout << "Example successfully passed!" << std::endl;
			
 
				+#endif
			
 
				+		return EXIT_SUCCESS;
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
--- a/examples/heat/dw_factolu_grain.c
+++ b/examples/heat/dw_factolu_grain.c
@@ -379,5 +379,6 @@ void dw_factoLU_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks,
 
				 
			
 
				 #ifdef CHECK_RESULTS
			
 
				 	compare_A_LU(Asaved, matA, size, ld);
			
 
				+	free(Asaved);
			
 
				 #endif
			
 
				 }
			
--- a/examples/mlr/mlr.c
+++ b/examples/mlr/mlr.c
@@ -0,0 +1,172 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This examples demonstrates how to use multiple linear regression
			
 
				+   models.
			
 
				+
			
 
				+   First, there is mlr_codelet__init codelet for which we know the
			
 
				+   parameters, but not the their exponents and relations. This tasks
			
 
				+   should be benchmarked and analyzed to find the model, using
			
 
				+   "tools/starpu_mlr_analysis" script as a template.
			
 
				+
			
 
				+   For the second (codelet cl_model_final), it is assumed that the
			
 
				+   analysis has already been performed and that the duration of the
			
 
				+   codelet mlr_codelet_final will be computed using the following
			
 
				+   equation:
			
 
				+
			
 
				+   T = a + b * (M^2*N) + c * (N^3*K)
			
 
				+
			
 
				+   where M, N, K are the parameters of the task, exponents are coming
			
 
				+   from model->combinations[..][..]  and finally a, b, c are
			
 
				+   coefficients which mostly depend on the machine speed.
			
 
				+   
			
 
				+   These coefficients are going to be automatically computed using
			
 
				+   least square method.
			
 
				+
			
 
				+ */
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stdint.h>
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+static long sum;
			
 
				+
			
 
				+/* Performance function of the task, which is in this case very simple, as the parameter values just need to be written in the array "parameters" */
			
 
				+static void cl_params(struct starpu_task *task, double *parameters)
			
 
				+{
			
 
				+	int m, n, k;
			
 
				+	starpu_codelet_unpack_args(task->cl_arg, &m, &n, &k);
			
 
				+	parameters[0] = m;
			
 
				+	parameters[1] = n;
			
 
				+	parameters[2] = k;
			
 
				+}
			
 
				+
			
 
				+/* Function of the task that will be executed. In this case running dummy cycles, just to make sure task duration is significant */
			
 
				+void cpu_func(void *buffers[], void *cl_arg)
			
 
				+{
			
 
				+	long i;
			
 
				+	int m,n,k;
			
 
				+	starpu_codelet_unpack_args(cl_arg,
			
 
				+			     	  &m,
			
 
				+     			     	  &n,
			
 
				+     			     	  &k);
			
 
				+	
			
 
				+	for(i=0; i < (long) (m*m*n); i++)
			
 
				+		sum+=i;
			
 
				+
			
 
				+	for(i=0; i < (long) (n*n*n*k); i++)
			
 
				+		sum+=i;
			
 
				+}
			
 
				+
			
 
				+/* ############################################ */
			
 
				+/* Start of the part specific to multiple linear regression perfmodels */
			
 
				+
			
 
				+/* Defining perfmodel, number of parameters and their names Initially
			
 
				+   application developer only knows these parameters. The execution of
			
 
				+   this codelet will generate traces that can be analyzed using
			
 
				+   "tools/starpu_mlr_analysis" as a template to obtain the parameters
			
 
				+   combinations and exponents.
			
 
				+ */
			
 
				+
			
 
				+static const char * parameters_names[]	= {	"M",	"N",	"K", };
			
 
				+
			
 
				+static struct starpu_perfmodel cl_model_init = {
			
 
				+	.type = STARPU_MULTIPLE_REGRESSION_BASED,
			
 
				+	.symbol = "mlr_init",
			
 
				+	.parameters = cl_params,
			
 
				+	.nparameters = 3,
			
 
				+	.parameters_names = parameters_names,
			
 
				+};
			
 
				+
			
 
				+/* Defining the equation for modeling duration of the task. The
			
 
				+   parameters combinations and exponents are computed externally
			
 
				+   offline, for example using "tools/starpu_mlr_analysis" tool as a
			
 
				+   template.
			
 
				+ */
			
 
				+
			
 
				+static unsigned combi1 [3]		= {	2,	1,	0 };
			
 
				+static unsigned combi2 [3]		= {	0,	3,	1 };
			
 
				+
			
 
				+static unsigned *combinations[] = { combi1, combi2 };
			
 
				+
			
 
				+static struct starpu_perfmodel cl_model_final = {
			
 
				+	.type = STARPU_MULTIPLE_REGRESSION_BASED,
			
 
				+	.symbol = "mlr_final",
			
 
				+	.parameters = cl_params,
			
 
				+	.nparameters = 3,
			
 
				+	.parameters_names = parameters_names,
			
 
				+	.ncombinations = 2,
			
 
				+	.combinations = combinations,
			
 
				+};
			
 
				+
			
 
				+/* End of the part specific to multiple linear regression perfmodels */
			
 
				+/* ############################################ */
			
 
				+
			
 
				+static struct starpu_codelet cl_init = {
			
 
				+	.cpu_funcs = { cpu_func },
			
 
				+	.cpu_funcs_name = { "mlr_codelet_init" },
			
 
				+	.nbuffers = 0,
			
 
				+	.model = &cl_model_init,
			
 
				+};
			
 
				+
			
 
				+static struct starpu_codelet cl_final = {
			
 
				+	.cpu_funcs = { cpu_func },
			
 
				+	.cpu_funcs_name = { "mlr_codelet_final" },
			
 
				+	.nbuffers = 0,
			
 
				+	.model = &cl_model_final,
			
 
				+};
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	/* Initialization */
			
 
				+	unsigned i,j;
			
 
				+	int ret;
			
 
				+	ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV)
			
 
				+		return 77;
			
 
				+	
			
 
				+	sum=0;
			
 
				+	int m,n,k;
			
 
				+
			
 
				+        /* Giving pseudo-random values to the M,N,K parameters and inserting tasks */
			
 
				+	for(i=0; i < 42; i++)
			
 
				+	{
			
 
				+		m = (int) ((rand() % 10)+1);
			
 
				+		n = (int) ((rand() % 10)+1);
			
 
				+		k = (int) ((rand() % 10)+1);
			
 
				+		
			
 
				+		for(j=0; j < 42; j++)
			
 
				+		{
			
 
				+			starpu_insert_task(&cl_init,
			
 
				+				   STARPU_VALUE, &m, sizeof(int),
			
 
				+				   STARPU_VALUE, &n, sizeof(int),
			
 
				+				   STARPU_VALUE, &k, sizeof(int),
			
 
				+				   0);
			
 
				+			starpu_insert_task(&cl_final,
			
 
				+				   STARPU_VALUE, &m, sizeof(int),
			
 
				+				   STARPU_VALUE, &n, sizeof(int),
			
 
				+				   STARPU_VALUE, &k, sizeof(int),
			
 
				+				   0);
			
 
				+		}
			
 
				+	}
			
 
				+			  
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/examples/ppm_downscaler/yuv_downscaler.c
+++ b/examples/ppm_downscaler/yuv_downscaler.c
@@ -112,6 +112,7 @@ static struct starpu_data_filter filter_uv =
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				 	int ret;
			
 
				+	size_t sret;
			
 
				 
			
 
				 	assert(HEIGHT % (2*BLOCK_HEIGHT) == 0);
			
 
				 	assert(HEIGHT % FACTOR == 0);
			
@@ -146,8 +147,8 @@ int main(int argc, char **argv)
 
				 	FILE *f_out = fopen(filename_out, "w+");
			
 
				 	assert(f_out);
			
 
				 
			
 
				-	ret = fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);
			
 
				-	assert(ret == nframes);
			
 
				+	sret = fread(yuv_in_buffer, FRAMESIZE, nframes, f_in);
			
 
				+	assert(sret == nframes);
			
 
				 
			
 
				 	starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
			
 
				 	starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *)  calloc(nframes, sizeof(starpu_data_handle_t));
			
--- a/examples/sched_ctx/parallel_tasks_reuse_handle.c
+++ b/examples/sched_ctx/parallel_tasks_reuse_handle.c
@@ -68,6 +68,8 @@ static struct starpu_codelet init_parallel_worker_cl=
 
				 void parallel_task_init_one_context(unsigned * context_id)

			
 
				 {

			
 
				 	struct starpu_task * t;

			
 
				+	int ret;

			
 
				+

			
 
				 	t = starpu_task_build(&init_parallel_worker_cl,

			
 
				 			      STARPU_SCHED_CTX, *context_id,

			
 
				 			      0);

			
@@ -78,7 +80,8 @@ void parallel_task_init_one_context(unsigned * context_id)
 
				 	t->prologue_callback_pop_arg=context_id;

			
 
				 	t->prologue_callback_pop_arg_free=0;

			
 
				 

			
 
				-	starpu_task_submit(t);

			
 
				+	ret = starpu_task_submit(t);

			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

			
 
				 }

			
 
				 

			
 
				 struct context main_context;

			
--- a/examples/scheduler/dummy_sched.c
+++ b/examples/scheduler/dummy_sched.c
@@ -156,6 +156,10 @@ int main(int argc, char **argv)
 
				 	int ret;
			
 
				 	struct starpu_conf conf;
			
 
				 
			
 
				+#ifdef STARPU_HAVE_UNSETENV
			
 
				+	unsetenv("STARPU_SCHED");
			
 
				+#endif
			
 
				+
			
 
				 	starpu_conf_init(&conf);
			
 
				 	conf.sched_policy = &dummy_sched_policy,
			
 
				 	ret = starpu_init(&conf);
			
--- a/examples/scheduler/heteroprio_test.c
+++ b/examples/scheduler/heteroprio_test.c
@@ -117,7 +117,7 @@ int main(int argc, char** argv)
 
				 
			
 
				 	ncpus = starpu_cpu_worker_get_count();
			
 
				 	nopencls = starpu_opencl_worker_get_count();
			
 
				-	FPRINTF(stderr, "Worker = %d\n",  starpu_worker_get_count());
			
 
				+	FPRINTF(stderr, "Worker = %u\n",  starpu_worker_get_count());
			
 
				 	FPRINTF(stderr, "Worker CPU = %d\n", ncpus);
			
 
				 	FPRINTF(stderr, "Worker OpenCL = %d\n", nopencls);
			
 
				 	if (ncpus + nopencls == 0)
			
--- a/examples/spmv/dw_block_spmv.c
+++ b/examples/spmv/dw_block_spmv.c
@@ -63,7 +63,7 @@ void create_data(void)
 
				 
			
 
				 	/* declare the corresponding block CSR to the runtime */
			
 
				 	starpu_bcsr_data_register(&sparse_matrix, STARPU_MAIN_RAM, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks,
			
 
				-	                (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr, 
			
 
				+	                (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr,
			
 
				 			0, bcsr_matrix->r, bcsr_matrix->c, sizeof(float));
			
 
				 
			
 
				 	size = c*r*starpu_bcsr_get_nnz(sparse_matrix);
			
@@ -125,7 +125,7 @@ unsigned get_bcsr_nchildren(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f
 
				   return (unsigned)starpu_bcsr_get_nnz(handle);
			
 
				 }
			
 
				 
			
 
				-struct starpu_data_interface_ops *get_bcsr_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) 
			
 
				+struct starpu_data_interface_ops *get_bcsr_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child)
			
 
				 {
			
 
				   return &starpu_interface_matrix_ops;
			
 
				 }
			
@@ -145,7 +145,7 @@ void call_filters(void)
 
				 	vector_in_f.nchildren  = size/c;
			
 
				 	vector_in_f.get_nchildren  = NULL;
			
 
				 	vector_in_f.get_child_ops  = NULL;
			
 
				-	
			
 
				+
			
 
				 	vector_out_f.filter_func = starpu_vector_filter_block;
			
 
				 	vector_out_f.nchildren  = size/r;
			
 
				 	vector_out_f.get_nchildren  = NULL;
			
@@ -178,8 +178,8 @@ void launch_spmv_codelets(void)
 
				 	int ret;
			
 
				 
			
 
				 	/* we call one codelet per block */
			
 
				-	unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix); 
			
 
				-	unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix); 
			
 
				+	unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix);
			
 
				+	unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix);
			
 
				 
			
 
				 	remainingtasks = NSPMV*nblocks;
			
 
				 	totaltasks = remainingtasks;
			
@@ -235,7 +235,7 @@ void launch_spmv_codelets(void)
 
				 				task->handles[1] = starpu_data_get_sub_data(vector_in, 1, i);
			
 
				 				task->handles[2] = starpu_data_get_sub_data(vector_out, 1, j);
			
 
				 
			
 
				-				/* all tasks in the same row are dependant so that we don't wait too much for data 
			
 
				+				/* all tasks in the same row are dependant so that we don't wait too much for data
			
 
				 				 * we need to wait on the previous task if we are not the first task of a row */
			
 
				 				if (index != rowptr[row & ~0x3])
			
 
				 				{
			
@@ -272,6 +272,7 @@ void launch_spmv_codelets(void)
 
				 	}
			
 
				 
			
 
				 	printf("end of task submission (there was %u chains for %u tasks : ratio %u tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains);
			
 
				+	free(is_entry_tab);
			
 
				 }
			
 
				 
			
 
				 void init_problem(void)
			
--- a/examples/spmv/matrix_market/mmio.c
+++ b/examples/spmv/matrix_market/mmio.c
@@ -103,12 +103,12 @@ int mm_is_valid(MM_typecode matcode)
 
				 
			
 
				 int mm_read_banner(FILE *f, MM_typecode *matcode)
			
 
				 {
			
 
				-	char line[MM_MAX_LINE_LENGTH];
			
 
				-	char banner[MM_MAX_TOKEN_LENGTH];
			
 
				-	char mtx[MM_MAX_TOKEN_LENGTH];
			
 
				-	char crd[MM_MAX_TOKEN_LENGTH];
			
 
				-	char data_type[MM_MAX_TOKEN_LENGTH];
			
 
				-	char storage_scheme[MM_MAX_TOKEN_LENGTH];
			
 
				+	char line[MM_MAX_LINE_LENGTH+1];
			
 
				+	char banner[MM_MAX_TOKEN_LENGTH+1];
			
 
				+	char mtx[MM_MAX_TOKEN_LENGTH+1];
			
 
				+	char crd[MM_MAX_TOKEN_LENGTH+1];
			
 
				+	char data_type[MM_MAX_TOKEN_LENGTH+1];
			
 
				+	char storage_scheme[MM_MAX_TOKEN_LENGTH+1];
			
 
				 	char *p;
			
 
				 
			
 
				 	mm_clear_typecode(matcode);
			
@@ -116,7 +116,7 @@ int mm_read_banner(FILE *f, MM_typecode *matcode)
 
				 	if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
			
 
				 		return MM_PREMATURE_EOF;
			
 
				 
			
 
				-	if (sscanf(line, "%MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs %MM_MAX_TOKEN_LENGTHs", banner, mtx, crd, data_type, storage_scheme) != 5)
			
 
				+	if (sscanf(line, "%"MM_MAX_TOKEN_LENGTH_S"s %"MM_MAX_TOKEN_LENGTH_S"s %"MM_MAX_TOKEN_LENGTH_S"s %"MM_MAX_TOKEN_LENGTH_S"s %"MM_MAX_TOKEN_LENGTH_S"s", banner, mtx, crd, data_type, storage_scheme) != 5)
			
 
				 		return MM_PREMATURE_EOF;
			
 
				 
			
 
				 	for (p=mtx; *p!='\0'; *p=tolower(*p),p++);  /* convert to lower case */
			
--- a/examples/spmv/matrix_market/mmio.h
+++ b/examples/spmv/matrix_market/mmio.h
@@ -27,6 +27,7 @@
 
				 #define MM_MAX_LINE_LENGTH 1025
			
 
				 #define MatrixMarketBanner "%%MatrixMarket"
			
 
				 #define MM_MAX_TOKEN_LENGTH 64
			
 
				+#define MM_MAX_TOKEN_LENGTH_S "64"
			
 
				 
			
 
				 typedef char MM_typecode[4];
			
 
				 
			
--- a/examples/stencil/Makefile.am
+++ b/examples/stencil/Makefile.am
@@ -93,12 +93,14 @@ endif
 
				 ###################
			
 
				 
			
 
				 STARPU_EXAMPLES =				\
			
 
				-	stencil
			
 
				+	stencil					\
			
 
				+	implicit_stencil
			
 
				 
			
 
				 examplebindir = $(libdir)/starpu/examples/stencil
			
 
				 
			
 
				 examplebin_PROGRAMS =				\
			
 
				-	stencil
			
 
				+	stencil					\
			
 
				+	implicit_stencil
			
 
				 
			
 
				 stencil_SOURCES =				\
			
 
				 	life.c					\
			
@@ -109,6 +111,7 @@ stencil_SOURCES =				\
 
				 
			
 
				 noinst_HEADERS =				\
			
 
				 	stencil.h				\
			
 
				+	implicit-stencil.h			\
			
 
				 	shadow.h
			
 
				 
			
 
				 if STARPU_USE_CUDA
			
@@ -123,6 +126,30 @@ stencil_SOURCES +=				\
 
				 	shadow_opencl.c
			
 
				 endif
			
 
				 
			
 
				+implicit_stencil_SOURCES =			\
			
 
				+	life.c					\
			
 
				+	implicit-stencil-kernels.c		\
			
 
				+	implicit-stencil-tasks.c		\
			
 
				+	implicit-stencil-blocks.c		\
			
 
				+	implicit-stencil.c
			
 
				+
			
 
				+noinst_HEADERS =				\
			
 
				+	stencil.h				\
			
 
				+	implicit-stencil.h			\
			
 
				+	shadow.h
			
 
				+
			
 
				+if STARPU_USE_CUDA
			
 
				+implicit_stencil_SOURCES +=			\
			
 
				+	life_cuda.cu				\
			
 
				+	shadow.cu
			
 
				+endif
			
 
				+
			
 
				+if STARPU_USE_OPENCL
			
 
				+implicit_stencil_SOURCES +=			\
			
 
				+	life_opencl.c				\
			
 
				+	shadow_opencl.c
			
 
				+endif
			
 
				+
			
 
				 outs =						\
			
 
				 	0.5.out					\
			
 
				 	0.out					\
			
--- a/examples/stencil/README
+++ b/examples/stencil/README
@@ -42,3 +42,7 @@ penalty ratio), run make pics or make view to get pictures.
 
				 mpi.out: results on MPI.
			
 
				 
			
 
				 results: a few results
			
 
				+
			
 
				+You can also use the implicit distributed flavour of this application (e.g.
			
 
				+with communications between processes automatically inferred by StarPU-MPI),
			
 
				+which is called implicit_stencil.
			
--- a/examples/stencil/implicit-stencil-blocks.c
+++ b/examples/stencil/implicit-stencil-blocks.c
@@ -0,0 +1,444 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010, 2013-2016  Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "implicit-stencil.h"
			
 
				+#include <math.h>
			
 
				+
			
 
				+/* Manage block and tags allocation */
			
 
				+
			
 
				+static struct block_description *blocks;
			
 
				+static unsigned sizex, sizey, sizez;
			
 
				+static unsigned nbz;
			
 
				+static unsigned *block_sizes_z;
			
 
				+
			
 
				+/*
			
 
				+ *	Tags for various codelet completion
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * common tag format:
			
 
				+ */
			
 
				+static starpu_tag_t tag_common(int z, int dir, int type)
			
 
				+{
			
 
				+	return (((((starpu_tag_t)type) << 4) | ((dir+1)/2)) << 32)|(starpu_tag_t)z;
			
 
				+}
			
 
				+
			
 
				+/* Completion of last update tasks */
			
 
				+starpu_tag_t TAG_FINISH(int z)
			
 
				+{
			
 
				+	z = (z + nbz)%nbz;
			
 
				+
			
 
				+	starpu_tag_t tag = tag_common(z, 0, 1);
			
 
				+	return tag;
			
 
				+}
			
 
				+
			
 
				+/* Completion of the save codelet for MPI send/recv */
			
 
				+starpu_tag_t TAG_START(int z, int dir)
			
 
				+{
			
 
				+	z = (z + nbz)%nbz;
			
 
				+
			
 
				+	starpu_tag_t tag = tag_common(z, dir, 2);
			
 
				+	return tag;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * common MPI tag format:
			
 
				+ */
			
 
				+static int mpi_tag_common(int z, int dir, int layer_or_boundary, int buffer)
			
 
				+{
			
 
				+	return (z<<12) | (layer_or_boundary << 8) | ((((1+dir)/2))<<4) | buffer;
			
 
				+}
			
 
				+
			
 
				+int MPI_TAG_LAYERS(int z, int buffer)
			
 
				+{
			
 
				+	z = (z + nbz)%nbz;
			
 
				+
			
 
				+    /* No direction for layers ; layer is 0 */
			
 
				+	int tag = mpi_tag_common(z, 0, 0, buffer);
			
 
				+
			
 
				+	return tag;
			
 
				+}
			
 
				+
			
 
				+int MPI_TAG_BOUNDARIES(int z, int dir, int buffer)
			
 
				+{
			
 
				+	z = (z + nbz)%nbz;
			
 
				+
			
 
				+	int tag = mpi_tag_common(z, dir, 1, buffer);
			
 
				+
			
 
				+	return tag;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ *	Block descriptors
			
 
				+ */
			
 
				+
			
 
				+/* Compute the size of the different blocks */
			
 
				+static void compute_block_sizes(void)
			
 
				+{
			
 
				+	block_sizes_z = (unsigned *) malloc(nbz*sizeof(unsigned));
			
 
				+	STARPU_ASSERT(block_sizes_z);
			
 
				+
			
 
				+	/* Perhaps the last chunk is smaller */
			
 
				+	unsigned default_block_size = (sizez+nbz-1)/nbz;
			
 
				+	unsigned remaining = sizez;
			
 
				+
			
 
				+	unsigned b;
			
 
				+	for (b = 0; b < nbz; b++)
			
 
				+	{
			
 
				+		block_sizes_z[b] = MIN(default_block_size, remaining);
			
 
				+		remaining -= block_sizes_z[b];
			
 
				+	}
			
 
				+
			
 
				+	STARPU_ASSERT(remaining == 0);
			
 
				+}
			
 
				+
			
 
				+unsigned get_block_size(int bz)
			
 
				+{
			
 
				+	return block_sizes_z[bz];
			
 
				+}
			
 
				+
			
 
				+struct block_description *get_block_description(int z)
			
 
				+{
			
 
				+	z = (z + nbz)%nbz;
			
 
				+
			
 
				+	STARPU_ASSERT(&blocks[z]);
			
 
				+
			
 
				+	return &blocks[z];
			
 
				+}
			
 
				+
			
 
				+int get_block_mpi_node(int z)
			
 
				+{
			
 
				+	z = (z + nbz)%nbz;
			
 
				+	return blocks[z].mpi_node;
			
 
				+}
			
 
				+
			
 
				+void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsigned _nbz)
			
 
				+{
			
 
				+	/* Store the parameters */
			
 
				+	nbz = _nbz;
			
 
				+	sizex = _sizex;
			
 
				+	sizey = _sizey;
			
 
				+	sizez = _sizez;
			
 
				+
			
 
				+	/* Create a grid of block descriptors */
			
 
				+	blocks = (struct block_description *) calloc(nbz, sizeof(struct block_description));
			
 
				+	STARPU_ASSERT(blocks);
			
 
				+
			
 
				+	/* What is the size of the different blocks ? */
			
 
				+	compute_block_sizes();
			
 
				+
			
 
				+	unsigned bz;
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description * block =
			
 
				+				get_block_description(bz);
			
 
				+
			
 
				+		/* Which block is it ? */
			
 
				+		block->bz = bz;
			
 
				+
			
 
				+		/* For simplicity, we store which are the neighbours blocks */
			
 
				+		block->boundary_blocks[B] = get_block_description((bz-1+nbz)%nbz);
			
 
				+		block->boundary_blocks[T] = get_block_description((bz+1)%nbz);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void free_blocks_array()
			
 
				+{
			
 
				+	free(blocks);
			
 
				+	free(block_sizes_z);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ *	Initialization of the blocks
			
 
				+ */
			
 
				+
			
 
				+void assign_blocks_to_workers(int rank)
			
 
				+{
			
 
				+	unsigned bz;
			
 
				+
			
 
				+	/* NB: perhaps we could count a GPU as multiple workers */
			
 
				+
			
 
				+	/* how many workers are there ? */
			
 
				+	/*unsigned nworkers = starpu_worker_get_count();*/
			
 
				+
			
 
				+	/* how many blocks are on that MPI node ? */
			
 
				+	unsigned nblocks = 0;
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block =
			
 
				+				get_block_description(bz);
			
 
				+
			
 
				+		if (block->mpi_node == rank)
			
 
				+			nblocks++;
			
 
				+	}
			
 
				+
			
 
				+	/* how many blocks per worker ? */
			
 
				+	/*unsigned nblocks_per_worker = (nblocks + nworkers - 1)/nworkers;*/
			
 
				+
			
 
				+	/* we now attribute up to nblocks_per_worker blocks per workers */
			
 
				+	unsigned attributed = 0;
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block =
			
 
				+				get_block_description(bz);
			
 
				+
			
 
				+		if (block->mpi_node == rank)
			
 
				+		{
			
 
				+			unsigned workerid;
			
 
				+			/* Manage initial block distribution between CPU and GPU */
			
 
				+		#if 0
			
 
				+			#if 1
			
 
				+			/* GPUs then CPUs */
			
 
				+			if (attributed < 3*18)
			
 
				+				workerid = attributed / 18;
			
 
				+			else
			
 
				+				workerid = 3+ (attributed - 3*18) / 2;
			
 
				+			#else
			
 
				+			/* GPUs interleaved with CPUs */
			
 
				+			if ((attributed % 20) <= 1)
			
 
				+				workerid = 3 + attributed / 20;
			
 
				+			else if (attributed < 60)
			
 
				+				workerid = attributed / 20;
			
 
				+			else
			
 
				+				workerid = (attributed - 60)/2 + 6;
			
 
				+			#endif
			
 
				+		#else
			
 
				+			/* Only GPUS */
			
 
				+			workerid = (attributed / 21) % 3;
			
 
				+		#endif
			
 
				+			/*= attributed/nblocks_per_worker;*/
			
 
				+
			
 
				+			block->preferred_worker = workerid;
			
 
				+
			
 
				+			attributed++;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+void assign_blocks_to_mpi_nodes(int world_size)
			
 
				+{
			
 
				+	unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size;
			
 
				+
			
 
				+	unsigned bz;
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block =
			
 
				+				get_block_description(bz);
			
 
				+
			
 
				+		block->mpi_node = bz / nzblocks_per_process;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static size_t allocated = 0;
			
 
				+
			
 
				+static void allocate_block_on_node(starpu_data_handle_t *handleptr, TYPE **ptr, unsigned nx, unsigned ny, unsigned nz)
			
 
				+{
			
 
				+	int ret;
			
 
				+	size_t block_size = nx*ny*nz*sizeof(TYPE);
			
 
				+
			
 
				+	/* Allocate memory */
			
 
				+#if 1
			
 
				+	ret = starpu_malloc_flags((void **)ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
			
 
				+	STARPU_ASSERT(ret == 0);
			
 
				+#else
			
 
				+	*ptr = malloc(block_size);
			
 
				+	STARPU_ASSERT(*ptr);
			
 
				+#endif
			
 
				+
			
 
				+	allocated += block_size;
			
 
				+
			
 
				+//#ifndef STARPU_SIMGRID
			
 
				+//	/* Fill the blocks with 0 */
			
 
				+//	memset(*ptr, 0, block_size);
			
 
				+//#endif
			
 
				+
			
 
				+	/* Register it to StarPU */
			
 
				+	starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
			
 
				+}
			
 
				+
			
 
				+static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsigned ny, unsigned nz)
			
 
				+{
			
 
				+	void *ptr = (void *) starpu_block_get_local_ptr(handleptr);
			
 
				+	size_t block_size = nx*ny*nz*sizeof(TYPE);
			
 
				+	starpu_data_unregister(handleptr);
			
 
				+	starpu_free_flags(ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
			
 
				+}
			
 
				+
			
 
				+void display_memory_consumption(int rank, double time)
			
 
				+{
			
 
				+	FPRINTF(stderr, "%lu B of memory were allocated on node %d in %f ms\n", (unsigned long)allocated, rank, time/1000);
			
 
				+}
			
 
				+
			
 
				+void allocate_memory_on_node(int rank)
			
 
				+{
			
 
				+	unsigned bz;
			
 
				+
			
 
				+	/* Correctly allocate and declare all data handles to StarPU. */
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block = get_block_description(bz);
			
 
				+		int node = block->mpi_node;
			
 
				+		unsigned size_bz = block_sizes_z[bz];
			
 
				+
			
 
				+		if (node == rank)
			
 
				+		{
			
 
				+			/* Main blocks */
			
 
				+			allocate_block_on_node(&block->layers_handle[0], &block->layers[0],
			
 
				+					       (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K));
			
 
				+			allocate_block_on_node(&block->layers_handle[1], &block->layers[1],
			
 
				+					       (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K));
			
 
				+
			
 
				+			/* Boundary blocks : Top */
			
 
				+			allocate_block_on_node(&block->boundaries_handle[T][0], &block->boundaries[T][0],
			
 
				+					       (sizex + 2*K), (sizey + 2*K), K);
			
 
				+			allocate_block_on_node(&block->boundaries_handle[T][1], &block->boundaries[T][1],
			
 
				+					       (sizex + 2*K), (sizey + 2*K), K);
			
 
				+
			
 
				+			/* Boundary blocks : Bottom */
			
 
				+			allocate_block_on_node(&block->boundaries_handle[B][0], &block->boundaries[B][0],
			
 
				+					       (sizex + 2*K), (sizey + 2*K), K);
			
 
				+			allocate_block_on_node(&block->boundaries_handle[B][1], &block->boundaries[B][1],
			
 
				+					       (sizex + 2*K), (sizey + 2*K), K);
			
 
				+		}
			
 
				+		/* Register void blocks to StarPU, that StarPU-MPI will request to
			
 
				+		 * neighbour nodes if needed for the local computation */
			
 
				+		else
			
 
				+		{
			
 
				+			/* Main blocks */
			
 
				+			starpu_block_data_register(&block->layers_handle[0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K), sizeof(TYPE));
			
 
				+			starpu_block_data_register(&block->layers_handle[1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K), sizeof(TYPE));
			
 
				+
			
 
				+			/* Boundary blocks : Top */
			
 
				+			starpu_block_data_register(&block->boundaries_handle[T][0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE));
			
 
				+			starpu_block_data_register(&block->boundaries_handle[T][1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE));
			
 
				+
			
 
				+			/* Boundary blocks : Bottom */
			
 
				+			starpu_block_data_register(&block->boundaries_handle[B][0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE));
			
 
				+			starpu_block_data_register(&block->boundaries_handle[B][1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE));
			
 
				+		}
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI
			
 
				+		/* Register all data to StarPU-MPI, even the ones that are not
			
 
				+		 * allocated on the local node. */
			
 
				+
			
 
				+		/* Main blocks */
			
 
				+		starpu_mpi_data_register(block->layers_handle[0], MPI_TAG_LAYERS(bz, 0), node);
			
 
				+		starpu_mpi_data_register(block->layers_handle[1], MPI_TAG_LAYERS(bz, 1), node);
			
 
				+
			
 
				+		/* Boundary blocks : Top */
			
 
				+		starpu_mpi_data_register(block->boundaries_handle[T][0], MPI_TAG_BOUNDARIES(bz, T, 0), node);
			
 
				+		starpu_mpi_data_register(block->boundaries_handle[T][1], MPI_TAG_BOUNDARIES(bz, T, 1), node);
			
 
				+
			
 
				+		/* Boundary blocks : Bottom */
			
 
				+		starpu_mpi_data_register(block->boundaries_handle[B][0], MPI_TAG_BOUNDARIES(bz, B, 0), node);
			
 
				+		starpu_mpi_data_register(block->boundaries_handle[B][1], MPI_TAG_BOUNDARIES(bz, B, 1), node);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/* Initialize all the data in parallel */
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block = get_block_description(bz);
			
 
				+		int node = block->mpi_node;
			
 
				+
			
 
				+		if (node == rank)
			
 
				+		{
			
 
				+			/* Set all the data to 0 */
			
 
				+			create_task_memset(sizex, sizey, bz);
			
 
				+
			
 
				+			/* Initialize the first layer with some random data */
			
 
				+			create_task_initlayer(sizex, sizey, bz);
			
 
				+		}
			
 
				+	}
			
 
				+	starpu_task_wait_for_all();
			
 
				+}
			
 
				+
			
 
				+void free_memory_on_node(int rank)
			
 
				+{
			
 
				+	unsigned bz;
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block = get_block_description(bz);
			
 
				+
			
 
				+		int node = block->mpi_node;
			
 
				+
			
 
				+		/* Main blocks */
			
 
				+		if (node == rank)
			
 
				+		{
			
 
				+			free_block_on_node(block->layers_handle[0], (sizex + 2*K), (sizey + 2*K), K);
			
 
				+			free_block_on_node(block->layers_handle[1], (sizex + 2*K), (sizey + 2*K), K);
			
 
				+		}
			
 
				+        else
			
 
				+        {
			
 
				+            starpu_data_unregister(block->layers_handle[0]);
			
 
				+            starpu_data_unregister(block->layers_handle[1]);
			
 
				+        }
			
 
				+
			
 
				+		/* Boundary blocks : Top */
			
 
				+		if (node == rank)
			
 
				+		{
			
 
				+			free_block_on_node(block->boundaries_handle[T][0], (sizex + 2*K), (sizey + 2*K), K);
			
 
				+			free_block_on_node(block->boundaries_handle[T][1], (sizex + 2*K), (sizey + 2*K), K);
			
 
				+		}
			
 
				+        else
			
 
				+        {
			
 
				+            starpu_data_unregister(block->boundaries_handle[T][0]);
			
 
				+            starpu_data_unregister(block->boundaries_handle[T][1]);
			
 
				+        }
			
 
				+
			
 
				+		/* Boundary blocks : Bottom */
			
 
				+		if (node == rank)
			
 
				+		{
			
 
				+			free_block_on_node(block->boundaries_handle[B][0], (sizex + 2*K), (sizey + 2*K), K);
			
 
				+			free_block_on_node(block->boundaries_handle[B][1], (sizex + 2*K), (sizey + 2*K), K);
			
 
				+		}
			
 
				+        else
			
 
				+        {
			
 
				+            starpu_data_unregister(block->boundaries_handle[B][0]);
			
 
				+            starpu_data_unregister(block->boundaries_handle[B][1]);
			
 
				+        }
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* check how many cells are alive */
			
 
				+void check(int rank)
			
 
				+{
			
 
				+	unsigned bz;
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				+		struct block_description *block = get_block_description(bz);
			
 
				+
			
 
				+		int node = block->mpi_node;
			
 
				+
			
 
				+		/* Main blocks */
			
 
				+		if (node == rank)
			
 
				+		{
			
 
				+			unsigned size_bz = block_sizes_z[bz];
			
 
				+#ifdef LIFE
			
 
				+			unsigned x, y, z;
			
 
				+			unsigned sum = 0;
			
 
				+			for (x = 0; x < sizex; x++)
			
 
				+				for (y = 0; y < sizey; y++)
			
 
				+					for (z = 0; z < size_bz; z++)
			
 
				+						sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)];
			
 
				+			printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz);
			
 
				+#endif
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/examples/stencil/implicit-stencil-kernels.c
+++ b/examples/stencil/implicit-stencil-kernels.c
@@ -0,0 +1,769 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2015  Université de Bordeaux
			
 
				+ * Copyright (C) 2012, 2013, 2016  CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "implicit-stencil.h"
			
 
				+
			
 
				+/* Computation Kernels */
			
 
				+
			
 
				+/*
			
 
				+ * There are three codeletets:
			
 
				+ *
			
 
				+ * - cl_update, which takes a block and the boundaries of its neighbours, loads
			
 
				+ *   the boundaries into the block and perform some update loops:
			
 
				+ *
			
 
				+ *     comp. buffer      save. buffers        comp. buffer        save. buffers        comp. buffer
			
 
				+ *   |     ...     |                                                                                      
			
 
				+ *   |             | +------------------+ +------------------+                                            
			
 
				+ *   |     #N+1    | | #N+1 bottom copy====>#N+1 bottom copy |                                            
			
 
				+ *   +-------------+ +------------------+ +------------------+                                            
			
 
				+ *   | #N top copy | |   #N top copy    | |                  |                                            
			
 
				+ *   +-------------+ +------------------+ |                  |                                            
			
 
				+ *                                        | #N               |                                            
			
 
				+ *                                                 ...                                                    
			
 
				+ *                                        |                  | +----------------+ +----------------------+
			
 
				+ *                                        |                  | | #N bottom copy | | block #N bottom copy |
			
 
				+ * ^                                      +------------------+ +----------------+ +----------------------+
			
 
				+ * |                                      | #N-1 top copy   <====#N-1 top copy  | |  block #N-1          |
			
 
				+ * |                                      +------------------+ +----------------+ |                      |
			
 
				+ * Z                                                                                     ...
			
 
				+ *
			
 
				+ * - save_cl_top, which take a block and its top boundary, and saves the top of
			
 
				+ *   the block into the boundary (to be given as bottom of the neighbour above
			
 
				+ *   this block).
			
 
				+ *
			
 
				+ *     comp. buffer      save. buffers        comp. buffer        save. buffers        comp. buffer
			
 
				+ *   |     ...     |                                                                                      
			
 
				+ *   |             | +------------------+ +------------------+                                            
			
 
				+ *   |     #N+1    | | #N+1 bottom copy | | #N+1 bottom copy |                                            
			
 
				+ *   +-------------+ +------------------+ +------------------+                                            
			
 
				+ *   | #N top copy | |   #N top copy   <====                 |                                            
			
 
				+ *   +-------------+ +------------------+ |..................|                                            
			
 
				+ *                                        | #N               |                                            
			
 
				+ *                                                 ...                                                    
			
 
				+ *                                        |                  | +----------------+ +----------------------+
			
 
				+ *                                        |                  | | #N bottom copy | | block #N bottom copy |
			
 
				+ * ^                                      +------------------+ +----------------+ +----------------------+
			
 
				+ * |                                      | #N-1 top copy    | | #N-1 top copy  | |  block #N-1          |
			
 
				+ * |                                      +------------------+ +----------------+ |                      |
			
 
				+ * Z                                                                                     ...
			
 
				+ *
			
 
				+ * - save_cl_bottom, same for the bottom
			
 
				+ *     comp. buffer      save. buffers        comp. buffer        save. buffers        comp. buffer
			
 
				+ *   |     ...     |                                                                                      
			
 
				+ *   |             | +------------------+ +------------------+                                            
			
 
				+ *   |     #N+1    | | #N+1 bottom copy | | #N+1 bottom copy |                                            
			
 
				+ *   +-------------+ +------------------+ +------------------+                                            
			
 
				+ *   | #N top copy | |   #N top copy    | |                  |                                            
			
 
				+ *   +-------------+ +------------------+ |                  |                                            
			
 
				+ *                                        | #N               |                                            
			
 
				+ *                                                 ...                                                    
			
 
				+ *                                        |..................| +----------------+ +----------------------+
			
 
				+ *                                        |                 ====>#N bottom copy | | block #N bottom copy |
			
 
				+ * ^                                      +------------------+ +----------------+ +----------------------+
			
 
				+ * |                                      | #N-1 top copy    | | #N-1 top copy  | |  block #N-1          |
			
 
				+ * |                                      +------------------+ +----------------+ |                      |
			
 
				+ * Z                                                                                     ...
			
 
				+ *
			
 
				+ * The idea is that the computation buffers thus don't have to move, only their
			
 
				+ * boundaries are copied to buffers that do move (be it CPU/GPU, GPU/GPU or via
			
 
				+ * MPI)
			
 
				+ *
			
 
				+ * For each of the buffers above, there are two (0/1) buffers to make new/old switch costless.
			
 
				+ */
			
 
				+
			
 
				+#if 0
			
 
				+# define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
			
 
				+#else
			
 
				+# define DEBUG(fmt, ...) (void) 0
			
 
				+#endif
			
 
				+
			
 
				+/* Record which GPU ran which block, for nice pictures */
			
 
				+int who_runs_what_len;
			
 
				+int *who_runs_what;
			
 
				+int *who_runs_what_index;
			
 
				+double *last_tick;
			
 
				+
			
 
				+/* Achieved iterations */
			
 
				+static int achieved_iter;
			
 
				+
			
 
				+/* Record how many updates each worker performed */
			
 
				+unsigned update_per_worker[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+static void record_who_runs_what(struct block_description *block)
			
 
				+{
			
 
				+	double now, now2, diff, delta = get_ticks() * 1000;
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+
			
 
				+	now = starpu_timing_now();
			
 
				+	now2 = now - start;
			
 
				+	diff = now2 - last_tick[block->bz];
			
 
				+	while (diff >= delta)
			
 
				+	{
			
 
				+		last_tick[block->bz] += delta;
			
 
				+		diff = now2 - last_tick[block->bz];
			
 
				+		if (who_runs_what_index[block->bz] < who_runs_what_len)
			
 
				+			who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = -1;
			
 
				+	}
			
 
				+
			
 
				+	if (who_runs_what_index[block->bz] < who_runs_what_len)
			
 
				+		who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = global_workerid(workerid);
			
 
				+}
			
 
				+
			
 
				+static void check_load(struct starpu_block_interface *block, struct starpu_block_interface *boundary)
			
 
				+{
			
 
				+	/* Sanity checks */
			
 
				+	STARPU_ASSERT(block->nx == boundary->nx);
			
 
				+	STARPU_ASSERT(block->ny == boundary->ny);
			
 
				+	STARPU_ASSERT(boundary->nz == K);
			
 
				+
			
 
				+	/* NB: this is not fully garanteed ... but it's *very* likely and that
			
 
				+	 * makes our life much simpler */
			
 
				+	STARPU_ASSERT(block->ldy == boundary->ldy);
			
 
				+	STARPU_ASSERT(block->ldz == boundary->ldz);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Load a neighbour's boundary into block, CPU version
			
 
				+ */
			
 
				+static void load_subblock_from_buffer_cpu(void *_block,
			
 
				+					void *_boundary,
			
 
				+					unsigned firstz)
			
 
				+{
			
 
				+	struct starpu_block_interface *block = (struct starpu_block_interface *)_block;
			
 
				+	struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary;
			
 
				+	check_load(block, boundary);
			
 
				+
			
 
				+	/* We do a contiguous memory transfer */
			
 
				+	size_t boundary_size = K*block->ldz*block->elemsize;
			
 
				+
			
 
				+	unsigned offset = firstz*block->ldz;
			
 
				+	TYPE *block_data = (TYPE *)block->ptr;
			
 
				+	TYPE *boundary_data = (TYPE *)boundary->ptr;
			
 
				+	memcpy(&block_data[offset], boundary_data, boundary_size);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Load a neighbour's boundary into block, CUDA version
			
 
				+ */
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void load_subblock_from_buffer_cuda(void *_block,
			
 
				+					void *_boundary,
			
 
				+					unsigned firstz)
			
 
				+{
			
 
				+	struct starpu_block_interface *block = (struct starpu_block_interface *)_block;
			
 
				+	struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary;
			
 
				+	check_load(block, boundary);
			
 
				+
			
 
				+	/* We do a contiguous memory transfer */
			
 
				+	size_t boundary_size = K*block->ldz*block->elemsize;
			
 
				+
			
 
				+	unsigned offset = firstz*block->ldz;
			
 
				+	TYPE *block_data = (TYPE *)block->ptr;
			
 
				+	TYPE *boundary_data = (TYPE *)boundary->ptr;
			
 
				+	cudaMemcpyAsync(&block_data[offset], boundary_data, boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * cl_update (CUDA version)
			
 
				+ */
			
 
				+static void update_func_cuda(void *descr[], void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			
 
				+	if (block->bz == 0)
			
 
				+		FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid);
			
 
				+	else
			
 
				+		DEBUG( "!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid);
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	int rank = 0;
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	DEBUG( "!!!           RANK %d              !!!\n", rank);
			
 
				+#endif
			
 
				+	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			
 
				+
			
 
				+	unsigned block_size_z = get_block_size(block->bz);
			
 
				+	unsigned i;
			
 
				+	update_per_worker[workerid]++;
			
 
				+
			
 
				+	record_who_runs_what(block);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Load neighbours' boundaries : TOP
			
 
				+	 */
			
 
				+
			
 
				+	/* The offset along the z axis is (block_size_z + K) */
			
 
				+	load_subblock_from_buffer_cuda(descr[0], descr[2], block_size_z+K);
			
 
				+	load_subblock_from_buffer_cuda(descr[1], descr[3], block_size_z+K);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Load neighbours' boundaries : BOTTOM
			
 
				+	 */
			
 
				+	load_subblock_from_buffer_cuda(descr[0], descr[4], 0);
			
 
				+	load_subblock_from_buffer_cuda(descr[1], descr[5], 0);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Stencils ... do the actual work here :) TODO
			
 
				+	 */
			
 
				+
			
 
				+	for (i=1; i<=K; i++)
			
 
				+	{
			
 
				+		struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2];
			
 
				+		TYPE *old = (void*) oldb->ptr, *newer = (void*) newb->ptr;
			
 
				+
			
 
				+		/* Shadow data */
			
 
				+		cuda_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
			
 
				+
			
 
				+		/* And perform actual computation */
			
 
				+#ifdef LIFE
			
 
				+		cuda_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
			
 
				+#else
			
 
				+		cudaMemcpyAsync(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream());
			
 
				+#endif /* LIFE */
			
 
				+	}
			
 
				+
			
 
				+	if (block->bz == 0)
			
 
				+		starpu_top_update_data_integer(starpu_top_achieved_loop, ++achieved_iter);
			
 
				+}
			
 
				+#endif /* STARPU_USE_CUDA */
			
 
				+
			
 
				+/*
			
 
				+ * Load a neighbour's boundary into block, OpenCL version
			
 
				+ */
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void load_subblock_from_buffer_opencl(struct starpu_block_interface *block,
			
 
				+					struct starpu_block_interface *boundary,
			
 
				+					unsigned firstz)
			
 
				+{
			
 
				+	check_load(block, boundary);
			
 
				+
			
 
				+	/* We do a contiguous memory transfer */
			
 
				+	size_t boundary_size = K*block->ldz*block->elemsize;
			
 
				+
			
 
				+	unsigned offset = firstz*block->ldz;
			
 
				+	cl_mem block_data = (cl_mem)block->dev_handle;
			
 
				+	cl_mem boundary_data = (cl_mem)boundary->dev_handle;
			
 
				+
			
 
				+        cl_command_queue cq;
			
 
				+        starpu_opencl_get_current_queue(&cq);
			
 
				+        cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, NULL);
			
 
				+	if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * cl_update (OpenCL version)
			
 
				+ */
			
 
				+static void update_func_opencl(void *descr[], void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			
 
				+	if (block->bz == 0)
			
 
				+		FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid);
			
 
				+	else
			
 
				+		DEBUG( "!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid);
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	int rank = 0;
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	DEBUG( "!!!           RANK %d              !!!\n", rank);
			
 
				+#endif
			
 
				+	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			
 
				+
			
 
				+	unsigned block_size_z = get_block_size(block->bz);
			
 
				+	unsigned i;
			
 
				+	update_per_worker[workerid]++;
			
 
				+
			
 
				+	record_who_runs_what(block);
			
 
				+
			
 
				+        cl_command_queue cq;
			
 
				+        starpu_opencl_get_current_queue(&cq);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Load neighbours' boundaries : TOP
			
 
				+	 */
			
 
				+
			
 
				+	/* The offset along the z axis is (block_size_z + K) */
			
 
				+	load_subblock_from_buffer_opencl(descr[0], descr[2], block_size_z+K);
			
 
				+	load_subblock_from_buffer_opencl(descr[1], descr[3], block_size_z+K);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Load neighbours' boundaries : BOTTOM
			
 
				+	 */
			
 
				+	load_subblock_from_buffer_opencl(descr[0], descr[4], 0);
			
 
				+	load_subblock_from_buffer_opencl(descr[1], descr[5], 0);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Stencils ... do the actual work here :) TODO
			
 
				+	 */
			
 
				+
			
 
				+	for (i=1; i<=K; i++)
			
 
				+	{
			
 
				+		struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2];
			
 
				+		TYPE *old = (void*) oldb->dev_handle, *newer = (void*) newb->dev_handle;
			
 
				+
			
 
				+		/* Shadow data */
			
 
				+		opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
			
 
				+
			
 
				+		/* And perform actual computation */
			
 
				+#ifdef LIFE
			
 
				+		opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
			
 
				+#else
			
 
				+		cl_event event;
			
 
				+                cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event);
			
 
				+		if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
			
 
				+
			
 
				+#endif /* LIFE */
			
 
				+	}
			
 
				+
			
 
				+	if (block->bz == 0)
			
 
				+		starpu_top_update_data_integer(starpu_top_achieved_loop, ++achieved_iter);
			
 
				+}
			
 
				+#endif /* STARPU_USE_OPENCL */
			
 
				+
			
 
				+/*
			
 
				+ * cl_update (CPU version)
			
 
				+ */
			
 
				+void update_func_cpu(void *descr[], void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			
 
				+	if (block->bz == 0)
			
 
				+		DEBUG("!!! DO update_func_cpu z %u CPU%d !!!\n", block->bz, workerid);
			
 
				+	else
			
 
				+		DEBUG("!!! DO update_func_cpu z %u CPU%d !!!\n", block->bz, workerid);
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	int rank = 0;
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	DEBUG( "!!!           RANK %d            !!!\n", rank);
			
 
				+#endif
			
 
				+	DEBUG( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			
 
				+
			
 
				+	unsigned block_size_z = get_block_size(block->bz);
			
 
				+	unsigned i;
			
 
				+	update_per_worker[workerid]++;
			
 
				+
			
 
				+	record_who_runs_what(block);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Load neighbours' boundaries : TOP
			
 
				+	 */
			
 
				+
			
 
				+	/* The offset along the z axis is (block_size_z + K) */
			
 
				+	load_subblock_from_buffer_cpu(descr[0], descr[2], block_size_z+K);
			
 
				+	load_subblock_from_buffer_cpu(descr[1], descr[3], block_size_z+K);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Load neighbours' boundaries : BOTTOM
			
 
				+	 */
			
 
				+	load_subblock_from_buffer_cpu(descr[0], descr[4], 0);
			
 
				+	load_subblock_from_buffer_cpu(descr[1], descr[5], 0);
			
 
				+
			
 
				+	/*
			
 
				+	 *	Stencils ... do the actual work here :) TODO
			
 
				+	 */
			
 
				+
			
 
				+	for (i=1; i<=K; i++)
			
 
				+	{
			
 
				+		struct starpu_block_interface *oldb = (struct starpu_block_interface *) descr[i%2], *newb = (struct starpu_block_interface *) descr[(i+1)%2];
			
 
				+		TYPE *old = (TYPE*) oldb->ptr, *newer = (TYPE*) newb->ptr;
			
 
				+
			
 
				+		/* Shadow data */
			
 
				+		unsigned ldy = oldb->ldy, ldz = oldb->ldz;
			
 
				+		unsigned nx = oldb->nx, ny = oldb->ny, nz = oldb->nz;
			
 
				+		unsigned x, y, z;
			
 
				+		unsigned stepx = 1;
			
 
				+		unsigned stepy = 1;
			
 
				+		unsigned stepz = 1;
			
 
				+		unsigned idx = 0;
			
 
				+		unsigned idy = 0;
			
 
				+		unsigned idz = 0;
			
 
				+		TYPE *ptr = old;
			
 
				+
			
 
				+#		include "shadow.h"
			
 
				+
			
 
				+		/* And perform actual computation */
			
 
				+#ifdef LIFE
			
 
				+		life_update(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i);
			
 
				+#else
			
 
				+		memcpy(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer));
			
 
				+#endif /* LIFE */
			
 
				+	}
			
 
				+
			
 
				+	if (block->bz == 0)
			
 
				+		starpu_top_update_data_integer(starpu_top_achieved_loop, ++achieved_iter);
			
 
				+}
			
 
				+
			
 
				+/* Performance model and codelet structure */
			
 
				+static struct starpu_perfmodel cl_update_model =
			
 
				+{
			
 
				+	.type = STARPU_HISTORY_BASED,
			
 
				+	.symbol = "cl_update" 
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cl_update =
			
 
				+{
			
 
				+	.cpu_funcs = {update_func_cpu},
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = {update_func_cuda},
			
 
				+	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_funcs = {update_func_opencl},
			
 
				+	.opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+	.model = &cl_update_model,
			
 
				+	.nbuffers = 6,
			
 
				+	.modes = {STARPU_RW, STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Save the block internal boundaries to give them to our neighbours.
			
 
				+ */
			
 
				+
			
 
				+/* CPU version */
			
 
				+static void load_subblock_into_buffer_cpu(void *_block,
			
 
				+					void *_boundary,
			
 
				+					unsigned firstz)
			
 
				+{
			
 
				+	struct starpu_block_interface *block = (struct starpu_block_interface *)_block;
			
 
				+	struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary;
			
 
				+	check_load(block, boundary);
			
 
				+
			
 
				+	/* We do a contiguous memory transfer */
			
 
				+	size_t boundary_size = K*block->ldz*block->elemsize;
			
 
				+
			
 
				+	unsigned offset = firstz*block->ldz;
			
 
				+	TYPE *block_data = (TYPE *)block->ptr;
			
 
				+	TYPE *boundary_data = (TYPE *)boundary->ptr;
			
 
				+	memcpy(boundary_data, &block_data[offset], boundary_size);
			
 
				+}
			
 
				+
			
 
				+/* CUDA version */
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void load_subblock_into_buffer_cuda(void *_block,
			
 
				+					void *_boundary,
			
 
				+					unsigned firstz)
			
 
				+{
			
 
				+	struct starpu_block_interface *block = (struct starpu_block_interface *)_block;
			
 
				+	struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary;
			
 
				+	check_load(block, boundary);
			
 
				+
			
 
				+	/* We do a contiguous memory transfer */
			
 
				+	size_t boundary_size = K*block->ldz*block->elemsize;
			
 
				+
			
 
				+	unsigned offset = firstz*block->ldz;
			
 
				+	TYPE *block_data = (TYPE *)block->ptr;
			
 
				+	TYPE *boundary_data = (TYPE *)boundary->ptr;
			
 
				+	cudaMemcpyAsync(boundary_data, &block_data[offset], boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream());
			
 
				+}
			
 
				+#endif /* STARPU_USE_CUDA */
			
 
				+
			
 
				+/* OPENCL version */
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void load_subblock_into_buffer_opencl(struct starpu_block_interface *block,
			
 
				+					struct starpu_block_interface *boundary,
			
 
				+					unsigned firstz)
			
 
				+{
			
 
				+	check_load(block, boundary);
			
 
				+
			
 
				+	/* We do a contiguous memory transfer */
			
 
				+	size_t boundary_size = K*block->ldz*block->elemsize;
			
 
				+
			
 
				+	unsigned offset = firstz*block->ldz;
			
 
				+	cl_mem block_data = (cl_mem)block->dev_handle;
			
 
				+	cl_mem boundary_data = (cl_mem)boundary->dev_handle;
			
 
				+
			
 
				+        cl_command_queue cq;
			
 
				+        starpu_opencl_get_current_queue(&cq);
			
 
				+
			
 
				+        cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL);
			
 
				+	if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret);
			
 
				+}
			
 
				+#endif /* STARPU_USE_OPENCL */
			
 
				+
			
 
				+/* Record how many top/bottom saves each worker performed */
			
 
				+unsigned top_per_worker[STARPU_NMAXWORKERS];
			
 
				+unsigned bottom_per_worker[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+/* top save, CPU version */
			
 
				+void dummy_func_top_cpu(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	top_per_worker[workerid]++;
			
 
				+
			
 
				+	DEBUG( "DO SAVE Bottom block %d\n", block->bz);
			
 
				+
			
 
				+	/* The offset along the z axis is (block_size_z + K)- K */
			
 
				+	unsigned block_size_z = get_block_size(block->bz);
			
 
				+
			
 
				+	load_subblock_into_buffer_cpu(descr[0], descr[2], block_size_z);
			
 
				+	load_subblock_into_buffer_cpu(descr[1], descr[3], block_size_z);
			
 
				+}
			
 
				+
			
 
				+/* bottom save, CPU version */
			
 
				+void dummy_func_bottom_cpu(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+    STARPU_ASSERT(block);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	bottom_per_worker[workerid]++;
			
 
				+
			
 
				+	DEBUG( "DO SAVE Top block %d\n", block->bz);
			
 
				+
			
 
				+	load_subblock_into_buffer_cpu(descr[0], descr[2], K);
			
 
				+	load_subblock_into_buffer_cpu(descr[1], descr[3], K);
			
 
				+}
			
 
				+
			
 
				+/* top save, CUDA version */
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+static void dummy_func_top_cuda(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	top_per_worker[workerid]++;
			
 
				+
			
 
				+	DEBUG( "DO SAVE Top block %d\n", block->bz);
			
 
				+
			
 
				+	/* The offset along the z axis is (block_size_z + K)- K */
			
 
				+	unsigned block_size_z = get_block_size(block->bz);
			
 
				+
			
 
				+	load_subblock_into_buffer_cuda(descr[0], descr[2], block_size_z);
			
 
				+	load_subblock_into_buffer_cuda(descr[1], descr[3], block_size_z);
			
 
				+}
			
 
				+
			
 
				+/* bottom save, CUDA version */
			
 
				+static void dummy_func_bottom_cuda(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+	unsigned z;
			
 
				+	starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	bottom_per_worker[workerid]++;
			
 
				+
			
 
				+	DEBUG( "DO SAVE Bottom block %d on CUDA\n", block->bz);
			
 
				+
			
 
				+	load_subblock_into_buffer_cuda(descr[0], descr[2], K);
			
 
				+	load_subblock_into_buffer_cuda(descr[1], descr[3], K);
			
 
				+}
			
 
				+#endif /* STARPU_USE_CUDA */
			
 
				+
			
 
				+/* top save, OpenCL version */
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+static void dummy_func_top_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	top_per_worker[workerid]++;
			
 
				+
			
 
				+	DEBUG( "DO SAVE Top block %d\n", block->bz);
			
 
				+
			
 
				+	/* The offset along the z axis is (block_size_z + K)- K */
			
 
				+	unsigned block_size_z = get_block_size(block->bz);
			
 
				+
			
 
				+	load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z);
			
 
				+	load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z);
			
 
				+}
			
 
				+
			
 
				+/* bottom save, OPENCL version */
			
 
				+static void dummy_func_bottom_opencl(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned z;
			
 
				+    starpu_codelet_unpack_args(arg, &z);
			
 
				+	struct block_description *block = get_block_description(z);
			
 
				+
			
 
				+	int workerid = starpu_worker_get_id_check();
			
 
				+	bottom_per_worker[workerid]++;
			
 
				+
			
 
				+	DEBUG( "DO SAVE Bottom block %d on OPENCL\n", block->bz);
			
 
				+
			
 
				+	load_subblock_into_buffer_opencl(descr[0], descr[2], K);
			
 
				+	load_subblock_into_buffer_opencl(descr[1], descr[3], K);
			
 
				+}
			
 
				+#endif /* STARPU_USE_OPENCL */
			
 
				+
			
 
				+/* Performance models and codelet for save */
			
 
				+static struct starpu_perfmodel save_cl_bottom_model =
			
 
				+{
			
 
				+	.type = STARPU_HISTORY_BASED,
			
 
				+	.symbol = "save_cl_bottom" 
			
 
				+};
			
 
				+
			
 
				+static struct starpu_perfmodel save_cl_top_model =
			
 
				+{
			
 
				+	.type = STARPU_HISTORY_BASED,
			
 
				+	.symbol = "save_cl_top" 
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet save_cl_bottom =
			
 
				+{
			
 
				+	.cpu_funcs = {dummy_func_bottom_cpu},
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = {dummy_func_bottom_cuda},
			
 
				+	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_funcs = {dummy_func_bottom_opencl},
			
 
				+	.opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+	.model = &save_cl_bottom_model,
			
 
				+	.nbuffers = 4,
			
 
				+	.modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W}
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet save_cl_top =
			
 
				+{
			
 
				+	.cpu_funcs = {dummy_func_top_cpu},
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+	.cuda_funcs = {dummy_func_top_cuda},
			
 
				+	.cuda_flags = {STARPU_CUDA_ASYNC},
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+	.opencl_funcs = {dummy_func_top_opencl},
			
 
				+	.opencl_flags = {STARPU_OPENCL_ASYNC},
			
 
				+#endif
			
 
				+	.model = &save_cl_top_model,
			
 
				+	.nbuffers = 4,
			
 
				+	.modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W}
			
 
				+};
			
 
				+
			
 
				+/* Memset a block's buffers */
			
 
				+static void memset_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned sizex, sizey, bz;
			
 
				+    starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz);
			
 
				+	struct block_description *block = get_block_description(bz);
			
 
				+    unsigned size_bz = get_block_size(bz);
			
 
				+
			
 
				+    unsigned x,y,z;
			
 
				+    for (x = 0; x < sizex + 2*K; x++)
			
 
				+    {
			
 
				+        for (y = 0; y < sizey + 2*K; y++)
			
 
				+        {
			
 
				+            /* Main blocks */
			
 
				+            for (z = 0; z < size_bz + 2*K; z++)
			
 
				+            {
			
 
				+                block->layers[0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; 
			
 
				+                block->layers[1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; 
			
 
				+            }
			
 
				+            for (z = 0; z < K; z++)
			
 
				+            {
			
 
				+                /* Boundary blocks : Top */
			
 
				+                block->boundaries[T][0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; 
			
 
				+                block->boundaries[T][1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; 
			
 
				+
			
 
				+                /* Boundary blocks : Bottom */
			
 
				+                block->boundaries[B][0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; 
			
 
				+                block->boundaries[B][1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; 
			
 
				+            }
			
 
				+
			
 
				+        }
			
 
				+    }
			
 
				+    //memset(block->layers[0], 0, (sizex + 2*K)*(sizey + 2*K)*(size_bz + 2*K)*sizeof(block->layers[0]));
			
 
				+    //memset(block->layers[1], 0, (sizex + 2*K)*(sizey + 2*K)*(size_bz + 2*K)*sizeof(block->layers[1]));
			
 
				+
			
 
				+    //memset(block->boundaries[T][0], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[T][0]));
			
 
				+    //memset(block->boundaries[T][1], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[T][1]));
			
 
				+
			
 
				+    //memset(block->boundaries[B][0], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[B][0]));
			
 
				+    //memset(block->boundaries[B][1], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[B][1]));
			
 
				+}
			
 
				+
			
 
				+static double memset_cost_function(struct starpu_task *task, unsigned nimpl)
			
 
				+{
			
 
				+	(void) task;
			
 
				+	(void) nimpl;
			
 
				+	return 0.000001;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel memset_model =
			
 
				+{
			
 
				+	.type = STARPU_COMMON,
			
 
				+	.cost_function = memset_cost_function,
			
 
				+	.symbol = "memset"
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cl_memset =
			
 
				+{
			
 
				+	.cpu_funcs = {memset_func},
			
 
				+	.model = &memset_model,
			
 
				+	.nbuffers = 6,
			
 
				+	.modes = {STARPU_W, STARPU_W, STARPU_W, STARPU_W, STARPU_W, STARPU_W}
			
 
				+};
			
 
				+
			
 
				+/* Initialize a block's layer */
			
 
				+static void initlayer_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg)
			
 
				+{
			
 
				+    unsigned sizex, sizey, bz;
			
 
				+    starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz);
			
 
				+	struct block_description *block = get_block_description(bz);
			
 
				+    unsigned size_bz = get_block_size(bz);
			
 
				+
			
 
				+    /* Initialize layer with some random data */
			
 
				+    unsigned x, y, z;
			
 
				+    unsigned sum = 0;
			
 
				+    for (x = 0; x < sizex; x++)
			
 
				+        for (y = 0; y < sizey; y++)
			
 
				+            for (z = 0; z < size_bz; z++)
			
 
				+                sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)] = (int)((x/7.+y/13.+(bz*size_bz + z)/17.) * 10.) % 2;
			
 
				+}
			
 
				+
			
 
				+static double initlayer_cost_function(struct starpu_task *task, unsigned nimpl)
			
 
				+{
			
 
				+	(void) task;
			
 
				+	(void) nimpl;
			
 
				+	return 0.000001;
			
 
				+}
			
 
				+
			
 
				+static struct starpu_perfmodel initlayer_model =
			
 
				+{
			
 
				+	.type = STARPU_COMMON,
			
 
				+	.cost_function = initlayer_cost_function,
			
 
				+	.symbol = "initlayer"
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet cl_initlayer =
			
 
				+{
			
 
				+	.cpu_funcs = {initlayer_func},
			
 
				+	.model = &initlayer_model,
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = {STARPU_W}
			
 
				+};
			
 
				+
			
--- a/examples/stencil/implicit-stencil-tasks.c
+++ b/examples/stencil/implicit-stencil-tasks.c
@@ -0,0 +1,204 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010, 2013-2015  Université de Bordeaux
			
 
				+ * Copyright (C) 2012, 2013, 2015, 2016  CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "implicit-stencil.h"
			
 
				+
			
 
				+#define BIND_LAST 1
			
 
				+
			
 
				+/*
			
 
				+ * Schedule tasks for updates and saves
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT
			
 
				+ *
			
 
				+ * dir is -1 or +1.
			
 
				+ */
			
 
				+
			
 
				+#if 0
			
 
				+# define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__)
			
 
				+#else
			
 
				+# define DEBUG(fmt, ...)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI
			
 
				+#include <starpu_mpi.h>
			
 
				+#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Schedule initialization tasks
			
 
				+ */
			
 
				+
			
 
				+void create_task_memset(unsigned sizex, unsigned sizey, unsigned z)
			
 
				+{
			
 
				+	struct block_description *descr = get_block_description(z);
			
 
				+    struct starpu_codelet *codelet = &cl_memset;
			
 
				+
			
 
				+    int ret = starpu_insert_task(
			
 
				+            codelet,
			
 
				+            STARPU_VALUE,   &sizex,  sizeof(unsigned),
			
 
				+            STARPU_VALUE,   &sizey,  sizeof(unsigned),
			
 
				+            STARPU_VALUE,   &z,  sizeof(unsigned),
			
 
				+            STARPU_W,   descr->layers_handle[0],
			
 
				+            STARPU_W,   descr->layers_handle[1],
			
 
				+            STARPU_W,   descr->boundaries_handle[T][0],
			
 
				+            STARPU_W,   descr->boundaries_handle[T][1],
			
 
				+            STARPU_W,   descr->boundaries_handle[B][0],
			
 
				+            STARPU_W,   descr->boundaries_handle[B][1],
			
 
				+                0);
			
 
				+
			
 
				+    if (ret)
			
 
				+    {
			
 
				+        FPRINTF(stderr, "Could not submit task save: %d\n", ret);
			
 
				+        if (ret == -ENODEV)
			
 
				+            exit(77);
			
 
				+        STARPU_ABORT();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z)
			
 
				+{
			
 
				+	struct block_description *descr = get_block_description(z);
			
 
				+    struct starpu_codelet *codelet = &cl_initlayer;
			
 
				+
			
 
				+    int ret = starpu_insert_task(
			
 
				+            codelet,
			
 
				+            STARPU_VALUE,   &sizex,  sizeof(unsigned),
			
 
				+            STARPU_VALUE,   &sizey,  sizeof(unsigned),
			
 
				+            STARPU_VALUE,   &z,  sizeof(unsigned),
			
 
				+            STARPU_W,   descr->layers_handle[0],
			
 
				+                0);
			
 
				+
			
 
				+    if (ret)
			
 
				+    {
			
 
				+        FPRINTF(stderr, "Could not submit task save: %d\n", ret);
			
 
				+        if (ret == -ENODEV)
			
 
				+            exit(77);
			
 
				+        STARPU_ABORT();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Schedule saving boundaries of blocks to communication buffers
			
 
				+ */
			
 
				+
			
 
				+static void create_task_save_local(unsigned iter, unsigned z, int dir, int local_rank)
			
 
				+{
			
 
				+	struct block_description *descr = get_block_description(z);
			
 
				+	struct starpu_codelet *codelet;
			
 
				+	int ret;
			
 
				+
			
 
				+	codelet = (dir == -1)?&save_cl_bottom:&save_cl_top;
			
 
				+	ret = starpu_insert_task(
			
 
				+				 codelet,
			
 
				+				 STARPU_VALUE,   &z,  sizeof(unsigned),
			
 
				+				 STARPU_R,   descr->layers_handle[0],
			
 
				+				 STARPU_R,   descr->layers_handle[1],
			
 
				+				 STARPU_W,   descr->boundaries_handle[(1-dir)/2][0],
			
 
				+				 STARPU_W,   descr->boundaries_handle[(1-dir)/2][1],
			
 
				+				 STARPU_PRIORITY,    STARPU_MAX_PRIO,
			
 
				+				 0);
			
 
				+
			
 
				+	if (ret)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "Could not submit task save: %d\n", ret);
			
 
				+		if (ret == -ENODEV)
			
 
				+			exit(77);
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Schedule update computation in computation buffer
			
 
				+ */
			
 
				+
			
 
				+void create_task_update(unsigned iter, unsigned z, int local_rank)
			
 
				+{
			
 
				+	STARPU_ASSERT(iter != 0);
			
 
				+
			
 
				+	unsigned old_layer = (K*(iter-1)) % 2;
			
 
				+	unsigned new_layer = (old_layer + 1) % 2;
			
 
				+
			
 
				+	struct block_description *descr = get_block_description(z);
			
 
				+	struct block_description *bottom_neighbour = descr->boundary_blocks[B];
			
 
				+	struct block_description *top_neighbour = descr->boundary_blocks[T];
			
 
				+
			
 
				+	struct starpu_codelet *codelet = &cl_update;
			
 
				+
			
 
				+    // Simple-level prio
			
 
				+    //int prio = ((bottom_neighbour->mpi_node != local_rank) || (top_neighbour->mpi_node != local_rank )) ? STARPU_MAX_PRIO : STARPU_DEFAULT_PRIO;
			
 
				+
			
 
				+    // Two-level prio
			
 
				+    int prio = ((bottom_neighbour->mpi_node != local_rank) || (top_neighbour->mpi_node != local_rank )) ? STARPU_MAX_PRIO :
			
 
				+               ((bottom_neighbour->boundary_blocks[B]->mpi_node != local_rank) || (top_neighbour->boundary_blocks[T]->mpi_node != local_rank )) ? STARPU_MAX_PRIO-1 : STARPU_DEFAULT_PRIO;
			
 
				+
			
 
				+    int ret = starpu_insert_task(
			
 
				+            codelet,
			
 
				+            STARPU_VALUE,   &z,  sizeof(unsigned),
			
 
				+	        STARPU_RW,      descr->layers_handle[old_layer],
			
 
				+	        STARPU_RW,      descr->layers_handle[new_layer],
			
 
				+	        STARPU_R,       bottom_neighbour->boundaries_handle[T][old_layer],
			
 
				+	        STARPU_R,       bottom_neighbour->boundaries_handle[T][new_layer],
			
 
				+	        STARPU_R,       top_neighbour->boundaries_handle[B][old_layer],
			
 
				+	        STARPU_R,       top_neighbour->boundaries_handle[B][new_layer],
			
 
				+            STARPU_PRIORITY,    prio,
			
 
				+                0);
			
 
				+	if (ret)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "Could not submit task update block: %d\n", ret);
			
 
				+		if (ret == -ENODEV)
			
 
				+			exit(77);
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Create all the tasks
			
 
				+ */
			
 
				+void create_tasks(int rank)
			
 
				+{
			
 
				+	int iter;
			
 
				+	int bz;
			
 
				+	int niter = get_niter();
			
 
				+	int nbz = get_nbz();
			
 
				+
			
 
				+	for (iter = 0; iter <= niter; iter++)
			
 
				+	{
			
 
				+	     for (bz = 0; bz < nbz; bz++)
			
 
				+	     {
			
 
				+		    if ((iter > 0) && ((get_block_mpi_node(bz) == rank)|| (get_block_mpi_node(bz+1) == rank)|| (get_block_mpi_node(bz-1) == rank)))
			
 
				+			    create_task_update(iter, bz, rank);
			
 
				+	     }
			
 
				+
			
 
				+	     for (bz = 0; bz < nbz; bz++)
			
 
				+	     {
			
 
				+		     if (iter != niter)
			
 
				+		     {
			
 
				+			     int node_z = get_block_mpi_node(bz);
			
 
				+			     int node_z_and_b = get_block_mpi_node(bz-1);
			
 
				+			     int node_z_and_t = get_block_mpi_node(bz+1);
			
 
				+
			
 
				+			     if ((node_z == rank) || ((node_z != node_z_and_b) && (node_z_and_b == rank)))
			
 
				+				     create_task_save_local(iter, bz, +1, rank);
			
 
				+
			
 
				+			     if ((node_z == rank) || ((node_z != node_z_and_t) && (node_z_and_t == rank)))
			
 
				+				     create_task_save_local(iter, bz, -1, rank);
			
 
				+		     }
			
 
				+	     }
			
 
				+	}
			
 
				+}
			
--- a/examples/stencil/implicit-stencil.c
+++ b/examples/stencil/implicit-stencil.c
@@ -0,0 +1,387 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
			
 
				+ * Copyright (C) 2010-2012, 2014  Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include "implicit-stencil.h"
			
 
				+
			
 
				+/* Main application */
			
 
				+
			
 
				+/* default parameter values */
			
 
				+static unsigned  bind_tasks = 0;
			
 
				+
			
 
				+static unsigned ticks = 1000;
			
 
				+
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+static unsigned niter = 4;
			
 
				+#define SIZE 16
			
 
				+#else
			
 
				+static unsigned niter = 32;
			
 
				+#define SIZE 128
			
 
				+#endif
			
 
				+
			
 
				+/* Problem size */
			
 
				+static unsigned sizex = SIZE;
			
 
				+static unsigned sizey = SIZE;
			
 
				+static unsigned sizez = 64*SIZE;
			
 
				+
			
 
				+/* Number of blocks (scattered over the different MPI processes) */
			
 
				+unsigned nbz = 64;
			
 
				+
			
 
				+/* StarPU top variables */
			
 
				+struct starpu_top_data* starpu_top_init_loop;
			
 
				+struct starpu_top_data* starpu_top_achieved_loop;
			
 
				+
			
 
				+double start;
			
 
				+double begin, end;
			
 
				+double timing; 
			
 
				+
			
 
				+/*
			
 
				+ *	Initialization
			
 
				+ */
			
 
				+
			
 
				+unsigned get_bind_tasks(void)
			
 
				+{
			
 
				+	return bind_tasks;
			
 
				+}
			
 
				+
			
 
				+unsigned get_nbz(void)
			
 
				+{
			
 
				+	return nbz;
			
 
				+}
			
 
				+
			
 
				+unsigned get_niter(void)
			
 
				+{
			
 
				+	return niter;
			
 
				+}
			
 
				+
			
 
				+unsigned get_ticks(void)
			
 
				+{
			
 
				+	return ticks;
			
 
				+}
			
 
				+
			
 
				+static void parse_args(int argc, char **argv)
			
 
				+{
			
 
				+	int i;
			
 
				+	for (i = 1; i < argc; i++)
			
 
				+	{
			
 
				+		if (strcmp(argv[i], "-b") == 0)
			
 
				+		{
			
 
				+			bind_tasks = 1;
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-nbz") == 0)
			
 
				+		{
			
 
				+			nbz = atoi(argv[++i]);
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-sizex") == 0)
			
 
				+		{
			
 
				+			sizex = atoi(argv[++i]);
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-sizey") == 0)
			
 
				+		{
			
 
				+			sizey = atoi(argv[++i]);
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-sizez") == 0)
			
 
				+		{
			
 
				+			sizez = atoi(argv[++i]);
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-niter") == 0)
			
 
				+		{
			
 
				+			niter = atoi(argv[++i]);
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-ticks") == 0)
			
 
				+		{
			
 
				+			ticks = atoi(argv[++i]);
			
 
				+		}
			
 
				+
			
 
				+		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
			
 
				+		{
			
 
				+			 fprintf(stderr, "Usage : %s [options...]\n", argv[0]);
			
 
				+			 fprintf(stderr, "\n");
			
 
				+			 fprintf(stderr, "Options:\n");
			
 
				+			 fprintf(stderr, "-b			bind tasks on CPUs/GPUs\n");
			
 
				+			 fprintf(stderr, "-nbz <n>		Number of blocks on Z axis (%u by default)\n", nbz);
			
 
				+			 fprintf(stderr, "-size[xyz] <size>	Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez);
			
 
				+			 fprintf(stderr, "-niter <n>		Number of iterations (%u by default)\n", niter);
			
 
				+			 fprintf(stderr, "-ticks <t>		How often to put ticks in the output (ms, %u by default)\n", ticks);
			
 
				+			 exit(0);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void init_problem(int argc, char **argv, int rank, int world_size)
			
 
				+{
			
 
				+	parse_args(argc, argv);
			
 
				+
			
 
				+	if (getenv("STARPU_TOP"))
			
 
				+	{
			
 
				+		starpu_top_init_loop = starpu_top_add_data_integer("Task creation iter", 0, niter, 1);
			
 
				+		starpu_top_achieved_loop = starpu_top_add_data_integer("Task achieved iter", 0, niter, 1);
			
 
				+		starpu_top_init_and_wait("stencil_top example");
			
 
				+	}
			
 
				+	create_blocks_array(sizex, sizey, sizez, nbz);
			
 
				+
			
 
				+	/* Select the MPI process which should compute the different blocks */
			
 
				+	assign_blocks_to_mpi_nodes(world_size);
			
 
				+
			
 
				+	assign_blocks_to_workers(rank);
			
 
				+
			
 
				+	/* Allocate the different memory blocks, if used by the MPI process */
			
 
				+	start = starpu_timing_now();
			
 
				+
			
 
				+	allocate_memory_on_node(rank);
			
 
				+
			
 
				+	end = starpu_timing_now();
			
 
				+	timing = end - begin;
			
 
				+
			
 
				+	display_memory_consumption(rank, timing);
			
 
				+
			
 
				+	who_runs_what_len = 2*niter;
			
 
				+	who_runs_what = (int *) calloc(nbz * who_runs_what_len, sizeof(*who_runs_what));
			
 
				+	who_runs_what_index = (int *) calloc(nbz, sizeof(*who_runs_what_index));
			
 
				+	last_tick = (double *) calloc(nbz, sizeof(*last_tick));
			
 
				+}
			
 
				+
			
 
				+static void free_problem(int rank)
			
 
				+{
			
 
				+    free_memory_on_node(rank);
			
 
				+	free_blocks_array();
			
 
				+	free(who_runs_what);
			
 
				+	free(who_runs_what_index);
			
 
				+	free(last_tick);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ *	Main body
			
 
				+ */
			
 
				+
			
 
				+void f(unsigned task_per_worker[STARPU_NMAXWORKERS])
			
 
				+{
			
 
				+	unsigned total = 0;
			
 
				+	int worker;
			
 
				+
			
 
				+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
			
 
				+		total += task_per_worker[worker];
			
 
				+	for (worker = 0; worker < STARPU_NMAXWORKERS; worker++)
			
 
				+	{
			
 
				+		if (task_per_worker[worker])
			
 
				+		{
			
 
				+			char name[32];
			
 
				+			starpu_worker_get_name(worker, name, sizeof(name));
			
 
				+			FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+unsigned global_workerid(unsigned local_workerid)
			
 
				+{
			
 
				+#ifdef STARPU_USE_MPI
			
 
				+	int rank;
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	unsigned workers_per_node = starpu_worker_get_count();
			
 
				+
			
 
				+	return (local_workerid + rank*workers_per_node);
			
 
				+#else
			
 
				+	return local_workerid;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	int rank;
			
 
				+	int world_size;
			
 
				+	int ret;
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	int thread_support;
			
 
				+	if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support))
			
 
				+	{
			
 
				+		FPRINTF(stderr, "MPI_Init_thread failed\n");
			
 
				+	}
			
 
				+	if (thread_support == MPI_THREAD_FUNNELED)
			
 
				+		FPRINTF(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n");
			
 
				+	if (thread_support < MPI_THREAD_FUNNELED)
			
 
				+		FPRINTF(stderr,"Warning: MPI does not have thread support!\n");
			
 
				+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
			
 
				+#else
			
 
				+	rank = 0;
			
 
				+	world_size = 1;
			
 
				+#endif
			
 
				+
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				+		FPRINTF(stderr, "Running on %d nodes\n", world_size);
			
 
				+		fflush(stderr);
			
 
				+	}
			
 
				+
			
 
				+	ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV) return 77;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	ret = starpu_mpi_init(NULL, NULL, 0);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        opencl_life_init();
			
 
				+        opencl_shadow_init();
			
 
				+#endif /*STARPU_USE_OPENCL*/
			
 
				+
			
 
				+	init_problem(argc, argv, rank, world_size);
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	int barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
			
 
				+#endif
			
 
				+	if (rank == 0)
			
 
				+		FPRINTF(stderr, "GO !\n");
			
 
				+
			
 
				+	start = starpu_timing_now();
			
 
				+
			
 
				+	begin = starpu_timing_now();
			
 
				+
			
 
				+	create_tasks(rank);
			
 
				+
			
 
				+	//starpu_tag_notify_from_apps(TAG_INIT_TASK);
			
 
				+
			
 
				+	//wait_end_tasks(rank);
			
 
				+
			
 
				+    starpu_task_wait_for_all();
			
 
				+
			
 
				+	end = starpu_timing_now();
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	barrier_ret = MPI_Barrier(MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(barrier_ret == MPI_SUCCESS);
			
 
				+#endif
			
 
				+
			
 
				+#if 0
			
 
				+	check(rank);
			
 
				+#endif
			
 
				+
			
 
				+	/*display_debug(nbz, niter, rank);*/
			
 
				+
			
 
				+	/* timing in us */
			
 
				+	timing = end - begin;
			
 
				+
			
 
				+	double min_timing = timing;
			
 
				+	double max_timing = timing;
			
 
				+	double sum_timing = timing;
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	int reduce_ret;
			
 
				+
			
 
				+	reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
			
 
				+
			
 
				+	reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
			
 
				+
			
 
				+	reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
			
 
				+
			
 
				+	/* XXX we should do a gather instead, here we assume that non initialized values are still 0 */
			
 
				+	int *who_runs_what_tmp = malloc(nbz * who_runs_what_len * sizeof(*who_runs_what));
			
 
				+	reduce_ret = MPI_Reduce(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
			
 
				+
			
 
				+	memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what));
			
 
				+	free(who_runs_what_tmp);
			
 
				+
			
 
				+	/* XXX we should do a gather instead, here we assume that non initialized values are still 0 */
			
 
				+	int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index));
			
 
				+	reduce_ret = MPI_Reduce(who_runs_what_index, who_runs_what_index_tmp, nbz, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
			
 
				+	STARPU_ASSERT(reduce_ret == MPI_SUCCESS);
			
 
				+
			
 
				+	memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index));
			
 
				+	free(who_runs_what_index_tmp);
			
 
				+#endif
			
 
				+
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				+#if 1 
			
 
				+		FPRINTF(stderr, "update:\n");
			
 
				+		f(update_per_worker);
			
 
				+		FPRINTF(stderr, "top:\n");
			
 
				+		f(top_per_worker);
			
 
				+		FPRINTF(stderr, "bottom:\n");
			
 
				+		f(bottom_per_worker);
			
 
				+#endif
			
 
				+#if 1
			
 
				+		unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size;
			
 
				+
			
 
				+		int iter;
			
 
				+		for (iter = 0; iter < who_runs_what_len; iter++)
			
 
				+		{
			
 
				+			unsigned last, bz;
			
 
				+			last = 1;
			
 
				+			for (bz = 0; bz < nbz; bz++)
			
 
				+			{
			
 
				+				if ((bz % nzblocks_per_process) == 0)
			
 
				+					FPRINTF(stderr, "| ");
			
 
				+
			
 
				+				if (who_runs_what_index[bz] <= iter)
			
 
				+					FPRINTF(stderr,"_ ");
			
 
				+				else
			
 
				+				{
			
 
				+					last = 0;
			
 
				+					if (who_runs_what[bz + iter * nbz] == -1)
			
 
				+						FPRINTF(stderr,"* ");
			
 
				+					else
			
 
				+						FPRINTF(stderr, "%d ", who_runs_what[bz + iter * nbz]);
			
 
				+				}
			
 
				+			}
			
 
				+			FPRINTF(stderr, "\n");
			
 
				+
			
 
				+			if (last)
			
 
				+				break;
			
 
				+		}
			
 
				+#endif
			
 
				+
			
 
				+		fflush(stderr);
			
 
				+
			
 
				+		FPRINTF(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size);
			
 
				+		FPRINTF(stdout, "\tMIN : %f ms\n", min_timing/1000);
			
 
				+		FPRINTF(stdout, "\tMAX : %f ms\n", max_timing/1000);
			
 
				+		FPRINTF(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000));
			
 
				+	}
			
 
				+
			
 
				+	free_problem(rank);
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	starpu_mpi_shutdown();
			
 
				+#endif
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+#if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID)
			
 
				+	MPI_Finalize();
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+        opencl_life_free();
			
 
				+        opencl_shadow_free();
			
 
				+#endif /*STARPU_USE_OPENCL*/
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
--- a/examples/stencil/implicit-stencil.h
+++ b/examples/stencil/implicit-stencil.h
@@ -0,0 +1,157 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  CNRS
			
 
				+ * Copyright (C) 2010-2011, 2014  Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __IMPLICIT_STENCIL_H__
			
 
				+#define __IMPLICIT_STENCIL_H__
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <stdio.h>
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifndef __CUDACC__
			
 
				+#ifdef STARPU_USE_MPI
			
 
				+#include <mpi.h>
			
 
				+#include <starpu_mpi.h>
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				+
			
 
				+#define LIFE
			
 
				+
			
 
				+#ifdef LIFE
			
 
				+#define TYPE	unsigned char
			
 
				+extern void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter);
			
 
				+#else
			
 
				+#define TYPE	float
			
 
				+#endif
			
 
				+
			
 
				+#define K	1
			
 
				+
			
 
				+#define NDIRS 2
			
 
				+extern struct starpu_top_data* starpu_top_init_loop;
			
 
				+extern struct starpu_top_data* starpu_top_achieved_loop;
			
 
				+
			
 
				+
			
 
				+/* Split only on the z axis to make things simple */
			
 
				+typedef enum
			
 
				+{
			
 
				+	B = 0,
			
 
				+	T = 1
			
 
				+} direction;
			
 
				+
			
 
				+/* Description of a domain block */
			
 
				+struct block_description
			
 
				+{
			
 
				+	/* Which MPI node should process that block ? */
			
 
				+	int mpi_node;
			
 
				+
			
 
				+	unsigned preferred_worker;
			
 
				+
			
 
				+	unsigned bz;
			
 
				+
			
 
				+
			
 
				+	/* For each of the following buffers, there are two (0/1) buffers to
			
 
				+	 * make new/old switch costless. */
			
 
				+
			
 
				+	/* This is the computation buffer for this block, it includes
			
 
				+	 * neighbours' border to make computation easier */
			
 
				+	TYPE *layers[2];
			
 
				+	starpu_data_handle_t layers_handle[2];
			
 
				+
			
 
				+	/* This is the "save" buffer, i.e. a copy of our neighbour's border.
			
 
				+	 * This one is used for CPU/GPU or MPI communication (rather than the
			
 
				+	 * whole domain block) */
			
 
				+	TYPE *boundaries[NDIRS][2];
			
 
				+	starpu_data_handle_t boundaries_handle[NDIRS][2];
			
 
				+
			
 
				+	/* Shortcut pointer to the neighbours */
			
 
				+	struct block_description *boundary_blocks[NDIRS];
			
 
				+};
			
 
				+
			
 
				+#define TAG_INIT_TASK			((starpu_tag_t)1)
			
 
				+
			
 
				+starpu_tag_t TAG_FINISH(int z);
			
 
				+starpu_tag_t TAG_START(int z, int dir);
			
 
				+int MPI_TAG0(int z, int iter, int dir);
			
 
				+int MPI_TAG1(int z, int iter, int dir);
			
 
				+
			
 
				+#define MIN(a,b)	((a)<(b)?(a):(b))
			
 
				+
			
 
				+void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz);
			
 
				+void free_blocks_array();
			
 
				+struct block_description *get_block_description(int z);
			
 
				+void assign_blocks_to_mpi_nodes(int world_size);
			
 
				+void allocate_memory_on_node(int rank);
			
 
				+void assign_blocks_to_workers(int rank);
			
 
				+void create_tasks(int rank);
			
 
				+void wait_end_tasks(int rank);
			
 
				+void check(int rank);
			
 
				+void free_memory_on_node(int rank);
			
 
				+
			
 
				+void display_memory_consumption(int rank, double time);
			
 
				+
			
 
				+int get_block_mpi_node(int z);
			
 
				+unsigned get_block_size(int z);
			
 
				+unsigned get_bind_tasks(void);
			
 
				+
			
 
				+unsigned get_nbz(void);
			
 
				+unsigned get_niter(void);
			
 
				+unsigned get_ticks(void);
			
 
				+
			
 
				+unsigned global_workerid(unsigned local_workerid);
			
 
				+
			
 
				+void create_task_memset(unsigned sizex, unsigned sizey, unsigned z);
			
 
				+void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z);
			
 
				+void create_task_update(unsigned iter, unsigned z, int local_rank);
			
 
				+void create_task_save(unsigned iter, unsigned z, int dir, int local_rank);
			
 
				+
			
 
				+extern int starpu_mpi_initialize(void);
			
 
				+extern int starpu_mpi_shutdown(void);
			
 
				+
			
 
				+/* kernels */
			
 
				+extern struct starpu_codelet cl_update;
			
 
				+extern struct starpu_codelet save_cl_bottom;
			
 
				+extern struct starpu_codelet save_cl_top;
			
 
				+extern struct starpu_codelet cl_memset;
			
 
				+extern struct starpu_codelet cl_initlayer;
			
 
				+
			
 
				+extern unsigned update_per_worker[STARPU_NMAXWORKERS];
			
 
				+extern unsigned top_per_worker[STARPU_NMAXWORKERS];
			
 
				+extern unsigned bottom_per_worker[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+extern double start;
			
 
				+extern int who_runs_what_len;
			
 
				+extern int *who_runs_what;
			
 
				+extern int *who_runs_what_index;
			
 
				+extern double *last_tick;
			
 
				+
			
 
				+#ifndef _externC
			
 
				+#define _externC
			
 
				+#endif
			
 
				+
			
 
				+_externC void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter);
			
 
				+_externC void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i);
			
 
				+
			
 
				+_externC void opencl_shadow_init(void);
			
 
				+_externC void opencl_shadow_free(void);
			
 
				+_externC void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i);
			
 
				+_externC void opencl_life_init(void);
			
 
				+_externC void opencl_life_free(void);
			
 
				+_externC void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter);
			
 
				+
			
 
				+#endif /* __IMPLICIT_STENCIL_H__ */
			
--- a/examples/stencil/stencil-blocks.c
+++ b/examples/stencil/stencil-blocks.c
@@ -283,7 +283,7 @@ static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsi
 
				 
			
 
				 void display_memory_consumption(int rank)
			
 
				 {
			
 
				-	FPRINTF(stderr, "%lu B of memory were allocated on node %d\n", allocated, rank);
			
 
				+	FPRINTF(stderr, "%lu B of memory were allocated on node %d\n", (unsigned long) allocated, rank);
			
 
				 }
			
 
				 
			
 
				 void allocate_memory_on_node(int rank)
			
--- a/examples/worker_collections/worker_tree_example.c
+++ b/examples/worker_collections/worker_tree_example.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010-2015  CNRS
			
 
				+ * Copyright (C) 2010-2016  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -54,7 +54,7 @@ int main()
 
				 	co->init_iterator = worker_tree.init_iterator;
			
 
				 	co->type = STARPU_WORKER_TREE;
			
 
				 
			
 
				-	FPRINTF(stderr, "ncpus %d \n", ncpus);
			
 
				+	FPRINTF(stderr, "ncpus %u \n", ncpus);
			
 
				 
			
 
				 	double start_time;
			
 
				 	double end_time;
			
@@ -81,7 +81,7 @@ int main()
 
				 	while(co->has_next(co, &it))
			
 
				 	{
			
 
				 		pu = co->get_next(co, &it);
			
 
				-		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
			
 
				+		FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers);
			
 
				 	}
			
 
				 
			
 
				 	unsigned six = 6;
			
@@ -90,13 +90,13 @@ int main()
 
				 	for(i = 0; i < six; i++)
			
 
				 	{
			
 
				 		co->remove(co, i);
			
 
				-		FPRINTF(stderr, "remove %d out of %d workers\n", i, co->nworkers);
			
 
				+		FPRINTF(stderr, "remove %u out of %u workers\n", i, co->nworkers);
			
 
				 	}
			
 
				 
			
 
				 	while(co->has_next(co, &it))
			
 
				 	{
			
 
				 		pu = co->get_next(co, &it);
			
 
				-		FPRINTF(stderr, "pu = %d out of %d workers \n", pu, co->nworkers);
			
 
				+		FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers);
			
 
				 	}
			
 
				 
			
 
				 	FPRINTF(stderr, "timing init = %lf \n", timing);
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -83,7 +83,7 @@ struct starpu_conf
 
				 
			
 
				 	const char *sched_policy_name;
			
 
				 	struct starpu_sched_policy *sched_policy;
			
 
				-	void (*sched_policy_init)(void);
			
 
				+	void (*sched_policy_init)(unsigned);
			
 
				 
			
 
				 	int ncpus;
			
 
				 	int ncuda;
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -145,4 +145,6 @@ typedef ssize_t starpu_ssize_t;
 
				 
			
 
				 #undef STARPU_HAVE_DARWIN
			
 
				 
			
 
				+#undef STARPU_HAVE_CXX11
			
 
				+
			
 
				 #endif
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -61,6 +61,8 @@ void starpu_data_invalidate_submit(starpu_data_handle_t handle);
 
				 
			
 
				 void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
			
 
				 
			
 
				+#define STARPU_ACQUIRE_NO_NODE -1
			
 
				+#define STARPU_ACQUIRE_ALL_NODES -2
			
 
				 int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
			
 
				 int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
			
 
				 int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
			
@@ -143,6 +145,8 @@ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsi
 
				 
			
 
				 void starpu_memchunk_tidy(unsigned memory_node);
			
 
				 
			
 
				+void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
			
 
				+void *starpu_data_get_user_data(starpu_data_handle_t handle);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -3,6 +3,7 @@
 
				  * Copyright (C) 2010-2014, 2016  Université de Bordeaux
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				+ * Copyright (C) 2016  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -60,6 +61,10 @@ struct starpu_perfmodel_history_entry
 
				 	uint32_t footprint;
			
 
				 	size_t size;
			
 
				 	double flops;
			
 
				+
			
 
				+	double duration;
			
 
				+	starpu_tag_t tag;
			
 
				+	double *parameters;
			
 
				 };
			
 
				 
			
 
				 struct starpu_perfmodel_history_list
			
@@ -88,6 +93,10 @@ struct starpu_perfmodel_regression_model
 
				 	unsigned nl_valid;
			
 
				 
			
 
				 	unsigned nsample;
			
 
				+
			
 
				+	double *coeff;
			
 
				+	unsigned ncoeff;
			
 
				+	unsigned multi_valid;
			
 
				 };
			
 
				 
			
 
				 struct starpu_perfmodel_history_table;
			
@@ -116,7 +125,8 @@ enum starpu_perfmodel_type
 
				 	STARPU_COMMON,
			
 
				 	STARPU_HISTORY_BASED,
			
 
				 	STARPU_REGRESSION_BASED,
			
 
				-	STARPU_NL_REGRESSION_BASED
			
 
				+	STARPU_NL_REGRESSION_BASED,
			
 
				+	STARPU_MULTIPLE_REGRESSION_BASED
			
 
				 };
			
 
				 
			
 
				 struct _starpu_perfmodel_state;
			
@@ -138,6 +148,12 @@ struct starpu_perfmodel
 
				 	unsigned benchmarking;
			
 
				 	unsigned is_init;
			
 
				 
			
 
				+	void (*parameters)(struct starpu_task * task, double *parameters);
			
 
				+	const char **parameters_names;
			
 
				+	unsigned nparameters;
			
 
				+	unsigned **combinations;
			
 
				+	unsigned ncombinations;
			
 
				+
			
 
				 	starpu_perfmodel_state_t state;
			
 
				 };
			
 
				 
			
--- a/include/starpu_profiling.h
+++ b/include/starpu_profiling.h
@@ -108,6 +108,10 @@ int starpu_bus_get_count(void);
 
				 int starpu_bus_get_id(int src, int dst);
			
 
				 int starpu_bus_get_src(int busid);
			
 
				 int starpu_bus_get_dst(int busid);
			
 
				+void starpu_bus_set_direct(int busid, int direct);
			
 
				+int starpu_bus_get_direct(int busid);
			
 
				+void starpu_bus_set_ngpus(int busid, int ngpus);
			
 
				+int starpu_bus_get_ngpus(int busid);
			
 
				 
			
 
				 int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info);
			
 
				 
			
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -112,9 +112,9 @@ extern "C"
 
				 #endif
			
 
				 
			
 
				 #ifdef STARPU_NO_ASSERT
			
 
				-#define STARPU_ASSERT(x)		do { } while(0)
			
 
				-#define STARPU_ASSERT_ACCESSIBLE(x)	do { } while(0)
			
 
				-#define STARPU_ASSERT_MSG(x, msg, ...)	do { } while(0)
			
 
				+#define STARPU_ASSERT(x)		do { if (0) { (void) (x); } } while(0)
			
 
				+#define STARPU_ASSERT_ACCESSIBLE(x)	do { if (0) { (void) (x); } } while(0)
			
 
				+#define STARPU_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); (void) msg; } } while(0)
			
 
				 #else
			
 
				 #  if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS)
			
 
				 #    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); *(int*)NULL = 0; } } while(0)
			
--- a/m4/ax_cxx_compile_stdcxx.m4
+++ b/m4/ax_cxx_compile_stdcxx.m4
@@ -0,0 +1,562 @@
 
				+# ===========================================================================
			
 
				+#   http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
			
 
				+# ===========================================================================
			
 
				+#
			
 
				+# SYNOPSIS
			
 
				+#
			
 
				+#   AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional])
			
 
				+#
			
 
				+# DESCRIPTION
			
 
				+#
			
 
				+#   Check for baseline language coverage in the compiler for the specified
			
 
				+#   version of the C++ standard.  If necessary, add switches to CXX and
			
 
				+#   CXXCPP to enable support.  VERSION may be '11' (for the C++11 standard)
			
 
				+#   or '14' (for the C++14 standard).
			
 
				+#
			
 
				+#   The second argument, if specified, indicates whether you insist on an
			
 
				+#   extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
			
 
				+#   -std=c++11).  If neither is specified, you get whatever works, with
			
 
				+#   preference for an extended mode.
			
 
				+#
			
 
				+#   The third argument, if specified 'mandatory' or if left unspecified,
			
 
				+#   indicates that baseline support for the specified C++ standard is
			
 
				+#   required and that the macro should error out if no mode with that
			
 
				+#   support is found.  If specified 'optional', then configuration proceeds
			
 
				+#   regardless, after defining HAVE_CXX${VERSION} if and only if a
			
 
				+#   supporting mode is found.
			
 
				+#
			
 
				+# LICENSE
			
 
				+#
			
 
				+#   Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
			
 
				+#   Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
			
 
				+#   Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
			
 
				+#   Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
			
 
				+#   Copyright (c) 2015 Paul Norman <penorman@mac.com>
			
 
				+#   Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
			
 
				+#
			
 
				+#   Copying and distribution of this file, with or without modification, are
			
 
				+#   permitted in any medium without royalty provided the copyright notice
			
 
				+#   and this notice are preserved.  This file is offered as-is, without any
			
 
				+#   warranty.
			
 
				+
			
 
				+#serial 4
			
 
				+
			
 
				+dnl  This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
			
 
				+dnl  (serial version number 13).
			
 
				+
			
 
				+AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
			
 
				+  m4_if([$1], [11], [],
			
 
				+        [$1], [14], [],
			
 
				+        [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])],
			
 
				+        [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
			
 
				+  m4_if([$2], [], [],
			
 
				+        [$2], [ext], [],
			
 
				+        [$2], [noext], [],
			
 
				+        [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl
			
 
				+  m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true],
			
 
				+        [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true],
			
 
				+        [$3], [optional], [ax_cxx_compile_cxx$1_required=false],
			
 
				+        [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
			
 
				+  AC_LANG_PUSH([C++])dnl
			
 
				+  ac_success=no
			
 
				+  AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
			
 
				+  ax_cv_cxx_compile_cxx$1,
			
 
				+  [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
			
 
				+    [ax_cv_cxx_compile_cxx$1=yes],
			
 
				+    [ax_cv_cxx_compile_cxx$1=no])])
			
 
				+  if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
			
 
				+    ac_success=yes
			
 
				+  fi
			
 
				+
			
 
				+  m4_if([$2], [noext], [], [dnl
			
 
				+  if test x$ac_success = xno; then
			
 
				+    for switch in -std=gnu++$1 -std=gnu++0x; do
			
 
				+      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
			
 
				+      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
			
 
				+                     $cachevar,
			
 
				+        [ac_save_CXX="$CXX"
			
 
				+         CXX="$CXX $switch"
			
 
				+         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
			
 
				+          [eval $cachevar=yes],
			
 
				+          [eval $cachevar=no])
			
 
				+         CXX="$ac_save_CXX"])
			
 
				+      if eval test x\$$cachevar = xyes; then
			
 
				+        CXX="$CXX $switch"
			
 
				+        if test -n "$CXXCPP" ; then
			
 
				+          CXXCPP="$CXXCPP $switch"
			
 
				+        fi
			
 
				+        ac_success=yes
			
 
				+        break
			
 
				+      fi
			
 
				+    done
			
 
				+  fi])
			
 
				+
			
 
				+  m4_if([$2], [ext], [], [dnl
			
 
				+  if test x$ac_success = xno; then
			
 
				+    dnl HP's aCC needs +std=c++11 according to:
			
 
				+    dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
			
 
				+    dnl Cray's crayCC needs "-h std=c++11"
			
 
				+    for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do
			
 
				+      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
			
 
				+      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
			
 
				+                     $cachevar,
			
 
				+        [ac_save_CXX="$CXX"
			
 
				+         CXX="$CXX $switch"
			
 
				+         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
			
 
				+          [eval $cachevar=yes],
			
 
				+          [eval $cachevar=no])
			
 
				+         CXX="$ac_save_CXX"])
			
 
				+      if eval test x\$$cachevar = xyes; then
			
 
				+        CXX="$CXX $switch"
			
 
				+        if test -n "$CXXCPP" ; then
			
 
				+          CXXCPP="$CXXCPP $switch"
			
 
				+        fi
			
 
				+        ac_success=yes
			
 
				+        break
			
 
				+      fi
			
 
				+    done
			
 
				+  fi])
			
 
				+  AC_LANG_POP([C++])
			
 
				+  if test x$ax_cxx_compile_cxx$1_required = xtrue; then
			
 
				+    if test x$ac_success = xno; then
			
 
				+      AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.])
			
 
				+    fi
			
 
				+  fi
			
 
				+  if test x$ac_success = xno; then
			
 
				+    HAVE_CXX$1=0
			
 
				+    AC_MSG_NOTICE([No compiler with C++$1 support was found])
			
 
				+  else
			
 
				+    HAVE_CXX$1=1
			
 
				+    AC_DEFINE(HAVE_CXX$1,1,
			
 
				+              [define if the compiler supports basic C++$1 syntax])
			
 
				+  fi
			
 
				+  AC_SUBST(HAVE_CXX$1)
			
 
				+])
			
 
				+
			
 
				+
			
 
				+dnl  Test body for checking C++11 support
			
 
				+
			
 
				+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11],
			
 
				+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
			
 
				+)
			
 
				+
			
 
				+
			
 
				+dnl  Test body for checking C++14 support
			
 
				+
			
 
				+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
			
 
				+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
			
 
				+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
			
 
				+)
			
 
				+
			
 
				+
			
 
				+dnl  Tests for new features in C++11
			
 
				+
			
 
				+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
			
 
				+
			
 
				+// If the compiler admits that it is not ready for C++11, why torture it?
			
 
				+// Hopefully, this will speed up the test.
			
 
				+
			
 
				+#ifndef __cplusplus
			
 
				+
			
 
				+#error "This is not a C++ compiler"
			
 
				+
			
 
				+#elif __cplusplus < 201103L
			
 
				+
			
 
				+#error "This is not a C++11 compiler"
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+namespace cxx11
			
 
				+{
			
 
				+
			
 
				+  namespace test_static_assert
			
 
				+  {
			
 
				+
			
 
				+    template <typename T>
			
 
				+    struct check
			
 
				+    {
			
 
				+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
			
 
				+    };
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_final_override
			
 
				+  {
			
 
				+
			
 
				+    struct Base
			
 
				+    {
			
 
				+      virtual void f() {}
			
 
				+    };
			
 
				+
			
 
				+    struct Derived : public Base
			
 
				+    {
			
 
				+      virtual void f() override {}
			
 
				+    };
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_double_right_angle_brackets
			
 
				+  {
			
 
				+
			
 
				+    template < typename T >
			
 
				+    struct check {};
			
 
				+
			
 
				+    typedef check<void> single_type;
			
 
				+    typedef check<check<void>> double_type;
			
 
				+    typedef check<check<check<void>>> triple_type;
			
 
				+    typedef check<check<check<check<void>>>> quadruple_type;
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_decltype
			
 
				+  {
			
 
				+
			
 
				+    int
			
 
				+    f()
			
 
				+    {
			
 
				+      int a = 1;
			
 
				+      decltype(a) b = 2;
			
 
				+      return a + b;
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_type_deduction
			
 
				+  {
			
 
				+
			
 
				+    template < typename T1, typename T2 >
			
 
				+    struct is_same
			
 
				+    {
			
 
				+      static const bool value = false;
			
 
				+    };
			
 
				+
			
 
				+    template < typename T >
			
 
				+    struct is_same<T, T>
			
 
				+    {
			
 
				+      static const bool value = true;
			
 
				+    };
			
 
				+
			
 
				+    template < typename T1, typename T2 >
			
 
				+    auto
			
 
				+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
			
 
				+    {
			
 
				+      return a1 + a2;
			
 
				+    }
			
 
				+
			
 
				+    int
			
 
				+    test(const int c, volatile int v)
			
 
				+    {
			
 
				+      static_assert(is_same<int, decltype(0)>::value == true, "");
			
 
				+      static_assert(is_same<int, decltype(c)>::value == false, "");
			
 
				+      static_assert(is_same<int, decltype(v)>::value == false, "");
			
 
				+      auto ac = c;
			
 
				+      auto av = v;
			
 
				+      auto sumi = ac + av + 'x';
			
 
				+      auto sumf = ac + av + 1.0;
			
 
				+      static_assert(is_same<int, decltype(ac)>::value == true, "");
			
 
				+      static_assert(is_same<int, decltype(av)>::value == true, "");
			
 
				+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
			
 
				+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
			
 
				+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
			
 
				+      return (sumf > 0.0) ? sumi : add(c, v);
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_noexcept
			
 
				+  {
			
 
				+
			
 
				+    int f() { return 0; }
			
 
				+    int g() noexcept { return 0; }
			
 
				+
			
 
				+    static_assert(noexcept(f()) == false, "");
			
 
				+    static_assert(noexcept(g()) == true, "");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_constexpr
			
 
				+  {
			
 
				+
			
 
				+    template < typename CharT >
			
 
				+    unsigned long constexpr
			
 
				+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
			
 
				+    {
			
 
				+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
			
 
				+    }
			
 
				+
			
 
				+    template < typename CharT >
			
 
				+    unsigned long constexpr
			
 
				+    strlen_c(const CharT *const s) noexcept
			
 
				+    {
			
 
				+      return strlen_c_r(s, 0UL);
			
 
				+    }
			
 
				+
			
 
				+    static_assert(strlen_c("") == 0UL, "");
			
 
				+    static_assert(strlen_c("1") == 1UL, "");
			
 
				+    static_assert(strlen_c("example") == 7UL, "");
			
 
				+    static_assert(strlen_c("another\0example") == 7UL, "");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_rvalue_references
			
 
				+  {
			
 
				+
			
 
				+    template < int N >
			
 
				+    struct answer
			
 
				+    {
			
 
				+      static constexpr int value = N;
			
 
				+    };
			
 
				+
			
 
				+    answer<1> f(int&)       { return answer<1>(); }
			
 
				+    answer<2> f(const int&) { return answer<2>(); }
			
 
				+    answer<3> f(int&&)      { return answer<3>(); }
			
 
				+
			
 
				+    void
			
 
				+    test()
			
 
				+    {
			
 
				+      int i = 0;
			
 
				+      const int c = 0;
			
 
				+      static_assert(decltype(f(i))::value == 1, "");
			
 
				+      static_assert(decltype(f(c))::value == 2, "");
			
 
				+      static_assert(decltype(f(0))::value == 3, "");
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_uniform_initialization
			
 
				+  {
			
 
				+
			
 
				+    struct test
			
 
				+    {
			
 
				+      static const int zero {};
			
 
				+      static const int one {1};
			
 
				+    };
			
 
				+
			
 
				+    static_assert(test::zero == 0, "");
			
 
				+    static_assert(test::one == 1, "");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_lambdas
			
 
				+  {
			
 
				+
			
 
				+    void
			
 
				+    test1()
			
 
				+    {
			
 
				+      auto lambda1 = [](){};
			
 
				+      auto lambda2 = lambda1;
			
 
				+      lambda1();
			
 
				+      lambda2();
			
 
				+    }
			
 
				+
			
 
				+    int
			
 
				+    test2()
			
 
				+    {
			
 
				+      auto a = [](int i, int j){ return i + j; }(1, 2);
			
 
				+      auto b = []() -> int { return '0'; }();
			
 
				+      auto c = [=](){ return a + b; }();
			
 
				+      auto d = [&](){ return c; }();
			
 
				+      auto e = [a, &b](int x) mutable {
			
 
				+        const auto identity = [](int y){ return y; };
			
 
				+        for (auto i = 0; i < a; ++i)
			
 
				+          a += b--;
			
 
				+        return x + identity(a + b);
			
 
				+      }(0);
			
 
				+      return a + b + c + d + e;
			
 
				+    }
			
 
				+
			
 
				+    int
			
 
				+    test3()
			
 
				+    {
			
 
				+      const auto nullary = [](){ return 0; };
			
 
				+      const auto unary = [](int x){ return x; };
			
 
				+      using nullary_t = decltype(nullary);
			
 
				+      using unary_t = decltype(unary);
			
 
				+      const auto higher1st = [](nullary_t f){ return f(); };
			
 
				+      const auto higher2nd = [unary](nullary_t f1){
			
 
				+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
			
 
				+      };
			
 
				+      return higher1st(nullary) + higher2nd(nullary)(unary);
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_variadic_templates
			
 
				+  {
			
 
				+
			
 
				+    template <int...>
			
 
				+    struct sum;
			
 
				+
			
 
				+    template <int N0, int... N1toN>
			
 
				+    struct sum<N0, N1toN...>
			
 
				+    {
			
 
				+      static constexpr auto value = N0 + sum<N1toN...>::value;
			
 
				+    };
			
 
				+
			
 
				+    template <>
			
 
				+    struct sum<>
			
 
				+    {
			
 
				+      static constexpr auto value = 0;
			
 
				+    };
			
 
				+
			
 
				+    static_assert(sum<>::value == 0, "");
			
 
				+    static_assert(sum<1>::value == 1, "");
			
 
				+    static_assert(sum<23>::value == 23, "");
			
 
				+    static_assert(sum<1, 2>::value == 3, "");
			
 
				+    static_assert(sum<5, 5, 11>::value == 21, "");
			
 
				+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
			
 
				+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
			
 
				+  // because of this.
			
 
				+  namespace test_template_alias_sfinae
			
 
				+  {
			
 
				+
			
 
				+    struct foo {};
			
 
				+
			
 
				+    template<typename T>
			
 
				+    using member = typename T::member_type;
			
 
				+
			
 
				+    template<typename T>
			
 
				+    void func(...) {}
			
 
				+
			
 
				+    template<typename T>
			
 
				+    void func(member<T>*) {}
			
 
				+
			
 
				+    void test();
			
 
				+
			
 
				+    void test() { func<foo>(0); }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+}  // namespace cxx11
			
 
				+
			
 
				+#endif  // __cplusplus >= 201103L
			
 
				+
			
 
				+]])
			
 
				+
			
 
				+
			
 
				+dnl  Tests for new features in C++14
			
 
				+
			
 
				+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[
			
 
				+
			
 
				+// If the compiler admits that it is not ready for C++14, why torture it?
			
 
				+// Hopefully, this will speed up the test.
			
 
				+
			
 
				+#ifndef __cplusplus
			
 
				+
			
 
				+#error "This is not a C++ compiler"
			
 
				+
			
 
				+#elif __cplusplus < 201402L
			
 
				+
			
 
				+#error "This is not a C++14 compiler"
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+namespace cxx14
			
 
				+{
			
 
				+
			
 
				+  namespace test_polymorphic_lambdas
			
 
				+  {
			
 
				+
			
 
				+    int
			
 
				+    test()
			
 
				+    {
			
 
				+      const auto lambda = [](auto&&... args){
			
 
				+        const auto istiny = [](auto x){
			
 
				+          return (sizeof(x) == 1UL) ? 1 : 0;
			
 
				+        };
			
 
				+        const int aretiny[] = { istiny(args)... };
			
 
				+        return aretiny[0];
			
 
				+      };
			
 
				+      return lambda(1, 1L, 1.0f, '1');
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_binary_literals
			
 
				+  {
			
 
				+
			
 
				+    constexpr auto ivii = 0b0000000000101010;
			
 
				+    static_assert(ivii == 42, "wrong value");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_generalized_constexpr
			
 
				+  {
			
 
				+
			
 
				+    template < typename CharT >
			
 
				+    constexpr unsigned long
			
 
				+    strlen_c(const CharT *const s) noexcept
			
 
				+    {
			
 
				+      auto length = 0UL;
			
 
				+      for (auto p = s; *p; ++p)
			
 
				+        ++length;
			
 
				+      return length;
			
 
				+    }
			
 
				+
			
 
				+    static_assert(strlen_c("") == 0UL, "");
			
 
				+    static_assert(strlen_c("x") == 1UL, "");
			
 
				+    static_assert(strlen_c("test") == 4UL, "");
			
 
				+    static_assert(strlen_c("another\0test") == 7UL, "");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_lambda_init_capture
			
 
				+  {
			
 
				+
			
 
				+    int
			
 
				+    test()
			
 
				+    {
			
 
				+      auto x = 0;
			
 
				+      const auto lambda1 = [a = x](int b){ return a + b; };
			
 
				+      const auto lambda2 = [a = lambda1(x)](){ return a; };
			
 
				+      return lambda2();
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_digit_seperators
			
 
				+  {
			
 
				+
			
 
				+    constexpr auto ten_million = 100'000'000;
			
 
				+    static_assert(ten_million == 100000000, "");
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  namespace test_return_type_deduction
			
 
				+  {
			
 
				+
			
 
				+    auto f(int& x) { return x; }
			
 
				+    decltype(auto) g(int& x) { return x; }
			
 
				+
			
 
				+    template < typename T1, typename T2 >
			
 
				+    struct is_same
			
 
				+    {
			
 
				+      static constexpr auto value = false;
			
 
				+    };
			
 
				+
			
 
				+    template < typename T >
			
 
				+    struct is_same<T, T>
			
 
				+    {
			
 
				+      static constexpr auto value = true;
			
 
				+    };
			
 
				+
			
 
				+    int
			
 
				+    test()
			
 
				+    {
			
 
				+      auto x = 0;
			
 
				+      static_assert(is_same<int, decltype(f(x))>::value, "");
			
 
				+      static_assert(is_same<int&, decltype(g(x))>::value, "");
			
 
				+      return x;
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+}  // namespace cxx14
			
 
				+
			
 
				+#endif  // __cplusplus >= 201402L
			
 
				+
			
 
				+]])
			
--- a/mic-configure
+++ b/mic-configure
@@ -17,6 +17,7 @@ declare -a mic_params
 
				 unset host_params
			
 
				 unset mic_params
			
 
				 native_mic=0
			
 
				+mpi=0
			
 
				 for arg in "$@"
			
 
				 do
			
 
				 	case $arg in
			
@@ -44,6 +45,9 @@ do
 
				 		--with-host-param=*)
			
 
				 			host_params+=("${arg#--with-host-param=}")
			
 
				 			;;
			
 
				+		--with-mpi*)
			
 
				+			mpi=1
			
 
				+			;;
			
 
				 		--help)
			
 
				 			cat << EOF
			
 
				 mic-configure specific options:
			
@@ -97,8 +101,11 @@ then
 
				 fi
			
 
				 
			
 
				 # prepend mic_params with "--with-mpicc=mpicc -mmic", to allow possible override by the user
			
 
				-mic_params=("--with-mpicc=mpicc -mmic" "${mic_params[@]}")
			
 
				-mic_params=("--with-mpifort=mpifort -mmic" "${mic_params[@]}")
			
 
				+if [ $mpi = 1 ]
			
 
				+then
			
 
				+	mic_params=("--with-mpicc=mpicc -mmic" "${mic_params[@]}")
			
 
				+	mic_params=("--with-mpifort=mpifort -mmic" "${mic_params[@]}")
			
 
				+fi
			
 
				 
			
 
				 for arch in $dev_list #host mic
			
 
				 do
			
@@ -110,6 +117,9 @@ do
 
				 		*) command="../${ROOT_DIR}/configure";;
			
 
				 	esac
			
 
				 
			
 
				+	declare -a params
			
 
				+	params=("--prefix=$prefix/$arch" "--disable-fstack-protector-all")
			
 
				+
			
 
				 	if [ "$arch" = mic ] ; then
			
 
				 		if [ $compiler = "icc" ] ; then
			
 
				 		    export CC="icc -mmic"
			
@@ -130,9 +140,6 @@ do
 
				 		fi
			
 
				 	fi
			
 
				 
			
 
				-	declare -a params
			
 
				-	params=("--prefix=$prefix/$arch" "--disable-fstack-protector-all")
			
 
				-
			
 
				 	if [ "$native_mic" -eq "0" ]
			
 
				 	then
			
 
				 		params+=(--enable-mic "--with-coi-dir=$coi_dir" "--with-scif-dir=$scif_dir")
			
@@ -164,6 +171,13 @@ do
 
				 		CPLUS_INCLUDE_PATH=$SINK_CPLUS_INCLUDE_PATH \
			
 
				 		PKG_CONFIG_PATH=$SINK_PKG_CONFIG_PATH \
			
 
				 		$command "$@" "${params[@]}" "${mic_params[@]}"
			
 
				+		MIC_BUILD_ENV="\
			
 
				+LIBRARY_PATH=$SINK_LIBRARY_PATH \\
			
 
				+	INCLUDE=$SINK_INCLUDE \\
			
 
				+	C_INCLUDE_PATH=$SINK_C_INCLUDE_PATH \\
			
 
				+	CPLUS_INCLUDE_PATH=$SINK_CPLUS_INCLUDE_PATH \\
			
 
				+	PKG_CONFIG_PATH=$SINK_PKG_CONFIG_PATH \\\
			
 
				+"
			
 
				 	else
			
 
				 		$command "$@" "${params[@]}""${host_params[@]}"
			
 
				 	fi
			
@@ -177,6 +191,7 @@ if [ "$native_mic" -eq "1" ]
 
				 then
			
 
				 cat > Makefile << EOF
			
 
				 all:
			
 
				+	$MIC_BUILD_ENV
			
 
				 	\$(MAKE) -C build_mic
			
 
				 
			
 
				 clean:
			
@@ -186,12 +201,14 @@ distclean: clean
 
				 	rm -f Makefile
			
 
				 
			
 
				 check:
			
 
				+	$MIC_BUILD_ENV
			
 
				 	\$(MAKE) -C build_mic check
			
 
				 
			
 
				 showcheck:
			
 
				 	\$(MAKE) -C build_mic showcheck
			
 
				 
			
 
				 install:
			
 
				+	$MIC_BUILD_ENV
			
 
				 	\$(MAKE) -C build_mic install
			
 
				 	ln -sf "${prefix}/mic/lib/pkgconfig/starpu-1.3.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.3-mic.pc"
			
 
				 EOF
			
@@ -199,6 +216,7 @@ else
 
				 cat > Makefile << EOF
			
 
				 all:
			
 
				 	\$(MAKE) -C build_host
			
 
				+	$MIC_BUILD_ENV
			
 
				 	\$(MAKE) -C build_mic
			
 
				 
			
 
				 clean:
			
@@ -210,6 +228,7 @@ distclean: clean
 
				 
			
 
				 check:
			
 
				 	\$(MAKE) -C build_host check
			
 
				+	$MIC_BUILD_ENV
			
 
				 	\$(MAKE) -C build_mic check ; \
			
 
				 	RET=\$\$? ; \
			
 
				 	STARPU_NCPUS=0 \$(MAKE) -C build_mic check && [ \$\$RET == 0 ]
			
@@ -220,6 +239,7 @@ showcheck:
 
				 
			
 
				 install:
			
 
				 	\$(MAKE) -C build_host install
			
 
				+	$MIC_BUILD_ENV
			
 
				 	\$(MAKE) -C build_mic install
			
 
				 	ln -sf "${prefix}/mic/lib/pkgconfig/starpu-1.3.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.3-mic.pc"
			
 
				 EOF
			
--- a/min-dgels/Makefile.in
+++ b/min-dgels/Makefile.in
@@ -0,0 +1,30 @@
 
				+CLAPACK=base
			
 
				+ADDITIONAL=additional
			
 
				+
			
 
				+all:
			
 
				+	mkdir -p build
			
 
				+	cd $(CLAPACK) && $(MAKE) blaslib
			
 
				+	cd $(CLAPACK) && $(MAKE) f2clib
			
 
				+	cd $(ADDITIONAL) && gcc -c -fPIC *.c && ar cr ../build/minlibdgels.a *.o && ranlib ../build/minlibdgels.a
			
 
				+
			
 
				+install:
			
 
				+
			
 
				+clean:
			
 
				+	-cd $(CLAPACK) && $(MAKE) clean && rm -rf *~
			
 
				+	-cd $(ADDITIONAL) && rm -rf *.o *~
			
 
				+	rm -rf build *~
			
 
				+
			
 
				+distclean: clean
			
 
				+
			
 
				+# This part is needed by StarPU
			
 
				+
			
 
				+STARPU_SRCDIR = @srcdir@
			
 
				+
			
 
				+distdir:
			
 
				+	cp -fRp $(STARPU_SRCDIR)/* $(distdir)
			
 
				+	cd $(distdir) && make -f Makefile.in clean
			
 
				+
			
 
				+check:
			
 
				+	echo "No checks are implemented for min-dgels"
			
 
				+
			
 
				+showcheck: check
			
--- a/min-dgels/additional/blaswrap.h
+++ b/min-dgels/additional/blaswrap.h
@@ -0,0 +1,160 @@
 
				+/* CLAPACK 3.0 BLAS wrapper macros
			
 
				+ * Feb 5, 2000
			
 
				+ */
			
 
				+
			
 
				+#ifndef __BLASWRAP_H
			
 
				+#define __BLASWRAP_H
			
 
				+
			
 
				+#ifndef NO_BLAS_WRAP
			
 
				+ 
			
 
				+/* BLAS1 routines */
			
 
				+#define srotg_ f2c_srotg
			
 
				+#define crotg_ f2c_crotg
			
 
				+#define drotg_ f2c_drotg
			
 
				+#define zrotg_ f2c_zrotg
			
 
				+#define srotmg_ f2c_srotmg
			
 
				+#define drotmg_ f2c_drotmg
			
 
				+#define srot_ f2c_srot
			
 
				+#define drot_ f2c_drot
			
 
				+#define srotm_ f2c_srotm
			
 
				+#define drotm_ f2c_drotm
			
 
				+#define sswap_ f2c_sswap
			
 
				+#define dswap_ f2c_dswap
			
 
				+#define cswap_ f2c_cswap
			
 
				+#define zswap_ f2c_zswap
			
 
				+#define sscal_ f2c_sscal
			
 
				+#define dscal_ f2c_dscal
			
 
				+#define cscal_ f2c_cscal
			
 
				+#define zscal_ f2c_zscal
			
 
				+#define csscal_ f2c_csscal
			
 
				+#define zdscal_ f2c_zdscal
			
 
				+#define scopy_ f2c_scopy
			
 
				+#define dcopy_ f2c_dcopy
			
 
				+#define ccopy_ f2c_ccopy
			
 
				+#define zcopy_ f2c_zcopy
			
 
				+#define saxpy_ f2c_saxpy
			
 
				+#define daxpy_ f2c_daxpy
			
 
				+#define caxpy_ f2c_caxpy
			
 
				+#define zaxpy_ f2c_zaxpy
			
 
				+#define sdot_ f2c_sdot
			
 
				+#define ddot_ f2c_ddot
			
 
				+#define cdotu_ f2c_cdotu
			
 
				+#define zdotu_ f2c_zdotu
			
 
				+#define cdotc_ f2c_cdotc
			
 
				+#define zdotc_ f2c_zdotc
			
 
				+#define snrm2_ f2c_snrm2
			
 
				+#define dnrm2_ f2c_dnrm2
			
 
				+#define scnrm2_ f2c_scnrm2
			
 
				+#define dznrm2_ f2c_dznrm2
			
 
				+#define sasum_ f2c_sasum
			
 
				+#define dasum_ f2c_dasum
			
 
				+#define scasum_ f2c_scasum
			
 
				+#define dzasum_ f2c_dzasum
			
 
				+#define isamax_ f2c_isamax
			
 
				+#define idamax_ f2c_idamax
			
 
				+#define icamax_ f2c_icamax
			
 
				+#define izamax_ f2c_izamax
			
 
				+ 
			
 
				+/* BLAS2 routines */
			
 
				+#define sgemv_ f2c_sgemv
			
 
				+#define dgemv_ f2c_dgemv
			
 
				+#define cgemv_ f2c_cgemv
			
 
				+#define zgemv_ f2c_zgemv
			
 
				+#define sgbmv_ f2c_sgbmv
			
 
				+#define dgbmv_ f2c_dgbmv
			
 
				+#define cgbmv_ f2c_cgbmv
			
 
				+#define zgbmv_ f2c_zgbmv
			
 
				+#define chemv_ f2c_chemv
			
 
				+#define zhemv_ f2c_zhemv
			
 
				+#define chbmv_ f2c_chbmv
			
 
				+#define zhbmv_ f2c_zhbmv
			
 
				+#define chpmv_ f2c_chpmv
			
 
				+#define zhpmv_ f2c_zhpmv
			
 
				+#define ssymv_ f2c_ssymv
			
 
				+#define dsymv_ f2c_dsymv
			
 
				+#define ssbmv_ f2c_ssbmv
			
 
				+#define dsbmv_ f2c_dsbmv
			
 
				+#define sspmv_ f2c_sspmv
			
 
				+#define dspmv_ f2c_dspmv
			
 
				+#define strmv_ f2c_strmv
			
 
				+#define dtrmv_ f2c_dtrmv
			
 
				+#define ctrmv_ f2c_ctrmv
			
 
				+#define ztrmv_ f2c_ztrmv
			
 
				+#define stbmv_ f2c_stbmv
			
 
				+#define dtbmv_ f2c_dtbmv
			
 
				+#define ctbmv_ f2c_ctbmv
			
 
				+#define ztbmv_ f2c_ztbmv
			
 
				+#define stpmv_ f2c_stpmv
			
 
				+#define dtpmv_ f2c_dtpmv
			
 
				+#define ctpmv_ f2c_ctpmv
			
 
				+#define ztpmv_ f2c_ztpmv
			
 
				+#define strsv_ f2c_strsv
			
 
				+#define dtrsv_ f2c_dtrsv
			
 
				+#define ctrsv_ f2c_ctrsv
			
 
				+#define ztrsv_ f2c_ztrsv
			
 
				+#define stbsv_ f2c_stbsv
			
 
				+#define dtbsv_ f2c_dtbsv
			
 
				+#define ctbsv_ f2c_ctbsv
			
 
				+#define ztbsv_ f2c_ztbsv
			
 
				+#define stpsv_ f2c_stpsv
			
 
				+#define dtpsv_ f2c_dtpsv
			
 
				+#define ctpsv_ f2c_ctpsv
			
 
				+#define ztpsv_ f2c_ztpsv
			
 
				+#define sger_ f2c_sger
			
 
				+#define dger_ f2c_dger
			
 
				+#define cgeru_ f2c_cgeru
			
 
				+#define zgeru_ f2c_zgeru
			
 
				+#define cgerc_ f2c_cgerc
			
 
				+#define zgerc_ f2c_zgerc
			
 
				+#define cher_ f2c_cher
			
 
				+#define zher_ f2c_zher
			
 
				+#define chpr_ f2c_chpr
			
 
				+#define zhpr_ f2c_zhpr
			
 
				+#define cher2_ f2c_cher2
			
 
				+#define zher2_ f2c_zher2
			
 
				+#define chpr2_ f2c_chpr2
			
 
				+#define zhpr2_ f2c_zhpr2
			
 
				+#define ssyr_ f2c_ssyr
			
 
				+#define dsyr_ f2c_dsyr
			
 
				+#define sspr_ f2c_sspr
			
 
				+#define dspr_ f2c_dspr
			
 
				+#define ssyr2_ f2c_ssyr2
			
 
				+#define dsyr2_ f2c_dsyr2
			
 
				+#define sspr2_ f2c_sspr2
			
 
				+#define dspr2_ f2c_dspr2
			
 
				+ 
			
 
				+/* BLAS3 routines */
			
 
				+#define sgemm_ f2c_sgemm
			
 
				+#define dgemm_ f2c_dgemm
			
 
				+#define cgemm_ f2c_cgemm
			
 
				+#define zgemm_ f2c_zgemm
			
 
				+#define ssymm_ f2c_ssymm
			
 
				+#define dsymm_ f2c_dsymm
			
 
				+#define csymm_ f2c_csymm
			
 
				+#define zsymm_ f2c_zsymm
			
 
				+#define chemm_ f2c_chemm
			
 
				+#define zhemm_ f2c_zhemm
			
 
				+#define ssyrk_ f2c_ssyrk
			
 
				+#define dsyrk_ f2c_dsyrk
			
 
				+#define csyrk_ f2c_csyrk
			
 
				+#define zsyrk_ f2c_zsyrk
			
 
				+#define cherk_ f2c_cherk
			
 
				+#define zherk_ f2c_zherk
			
 
				+#define ssyr2k_ f2c_ssyr2k
			
 
				+#define dsyr2k_ f2c_dsyr2k
			
 
				+#define csyr2k_ f2c_csyr2k
			
 
				+#define zsyr2k_ f2c_zsyr2k
			
 
				+#define cher2k_ f2c_cher2k
			
 
				+#define zher2k_ f2c_zher2k
			
 
				+#define strmm_ f2c_strmm
			
 
				+#define dtrmm_ f2c_dtrmm
			
 
				+#define ctrmm_ f2c_ctrmm
			
 
				+#define ztrmm_ f2c_ztrmm
			
 
				+#define strsm_ f2c_strsm
			
 
				+#define dtrsm_ f2c_dtrsm
			
 
				+#define ctrsm_ f2c_ctrsm
			
 
				+#define ztrsm_ f2c_ztrsm
			
 
				+
			
 
				+#endif /* NO_BLAS_WRAP */
			
 
				+
			
 
				+#endif /* __BLASWRAP_H */
			
--- a/min-dgels/additional/clapack.h
+++ b/min-dgels/additional/clapack.h
--- a/min-dgels/additional/d_lg10.c
+++ b/min-dgels/additional/d_lg10.c
@@ -0,0 +1,21 @@
 
				+#include "f2c.h"
			
 
				+
			
 
				+#define log10e 0.43429448190325182765
			
 
				+
			
 
				+#ifdef KR_headers
			
 
				+double log();
			
 
				+double d_lg10(x) doublereal *x;
			
 
				+#else
			
 
				+#undef abs
			
 
				+#include "math.h"
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+double d_lg10(doublereal *x)
			
 
				+#endif
			
 
				+{
			
 
				+return( log10e * log(*x) );
			
 
				+}
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/min-dgels/additional/d_sign.c
+++ b/min-dgels/additional/d_sign.c
@@ -0,0 +1,18 @@
 
				+#include "f2c.h"
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#ifdef KR_headers
			
 
				+double d_sign(a,b) doublereal *a, *b;
			
 
				+#else
			
 
				+double d_sign(doublereal *a, doublereal *b)
			
 
				+#endif
			
 
				+{
			
 
				+double x;
			
 
				+x = (*a >= 0 ? *a : - *a);
			
 
				+return( *b >= 0 ? x : -x);
			
 
				+}
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/min-dgels/additional/dcopy.c
+++ b/min-dgels/additional/dcopy.c
@@ -0,0 +1,107 @@
 
				+/* dcopy.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, 
			
 
				+	doublereal *dy, integer *incy)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, m, ix, iy, mp1;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*     copies a vector, x, to a vector, y. */
			
 
				+/*     uses unrolled loops for increments equal to one. */
			
 
				+/*     jack dongarra, linpack, 3/11/78. */
			
 
				+/*     modified 12/3/93, array(1) declarations changed to array(*) */
			
 
				+
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+    /* Parameter adjustments */
			
 
				+    --dy;
			
 
				+    --dx;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n <= 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+    if (*incx == 1 && *incy == 1) {
			
 
				+	goto L20;
			
 
				+    }
			
 
				+
			
 
				+/*        code for unequal increments or equal increments */
			
 
				+/*          not equal to 1 */
			
 
				+
			
 
				+    ix = 1;
			
 
				+    iy = 1;
			
 
				+    if (*incx < 0) {
			
 
				+	ix = (-(*n) + 1) * *incx + 1;
			
 
				+    }
			
 
				+    if (*incy < 0) {
			
 
				+	iy = (-(*n) + 1) * *incy + 1;
			
 
				+    }
			
 
				+    i__1 = *n;
			
 
				+    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+	dy[iy] = dx[ix];
			
 
				+	ix += *incx;
			
 
				+	iy += *incy;
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*        code for both increments equal to 1 */
			
 
				+
			
 
				+
			
 
				+/*        clean-up loop */
			
 
				+
			
 
				+L20:
			
 
				+    m = *n % 7;
			
 
				+    if (m == 0) {
			
 
				+	goto L40;
			
 
				+    }
			
 
				+    i__1 = m;
			
 
				+    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+	dy[i__] = dx[i__];
			
 
				+/* L30: */
			
 
				+    }
			
 
				+    if (*n < 7) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+L40:
			
 
				+    mp1 = m + 1;
			
 
				+    i__1 = *n;
			
 
				+    for (i__ = mp1; i__ <= i__1; i__ += 7) {
			
 
				+	dy[i__] = dx[i__];
			
 
				+	dy[i__ + 1] = dx[i__ + 1];
			
 
				+	dy[i__ + 2] = dx[i__ + 2];
			
 
				+	dy[i__ + 3] = dx[i__ + 3];
			
 
				+	dy[i__ + 4] = dx[i__ + 4];
			
 
				+	dy[i__ + 5] = dx[i__ + 5];
			
 
				+	dy[i__ + 6] = dx[i__ + 6];
			
 
				+/* L50: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+} /* dcopy_ */
			
--- a/min-dgels/additional/dgelq2.c
+++ b/min-dgels/additional/dgelq2.c
@@ -0,0 +1,157 @@
 
				+/* dgelq2.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
			
 
				+	lda, doublereal *tau, doublereal *work, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2, i__3;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, k;
			
 
				+    doublereal aii;
			
 
				+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *), dlarfp_(integer *, doublereal *, 
			
 
				+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGELQ2 computes an LQ factorization of a real m by n matrix A: */
			
 
				+/*  A = L * Q. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          On entry, the m by n matrix A. */
			
 
				+/*          On exit, the elements on and below the diagonal of the array */
			
 
				+/*          contain the m by min(m,n) lower trapezoidal matrix L (L is */
			
 
				+/*          lower triangular if m <= n); the elements above the diagonal, */
			
 
				+/*          with the array TAU, represent the orthogonal matrix Q as a */
			
 
				+/*          product of elementary reflectors (see Further Details). */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N)) */
			
 
				+/*          The scalar factors of the elementary reflectors (see Further */
			
 
				+/*          Details). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension (M) */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0: successful exit */
			
 
				+/*          < 0: if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*  The matrix Q is represented as a product of elementary reflectors */
			
 
				+
			
 
				+/*     Q = H(k) . . . H(2) H(1), where k = min(m,n). */
			
 
				+
			
 
				+/*  Each H(i) has the form */
			
 
				+
			
 
				+/*     H(i) = I - tau * v * v' */
			
 
				+
			
 
				+/*  where tau is a real scalar, and v is a real vector with */
			
 
				+/*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
			
 
				+/*  and tau in TAU(i). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    if (*m < 0) {
			
 
				+	*info = -1;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -2;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	*info = -4;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DGELQ2", &i__1);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    k = min(*m,*n);
			
 
				+
			
 
				+    i__1 = k;
			
 
				+    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+
			
 
				+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
			
 
				+
			
 
				+	i__2 = *n - i__ + 1;
			
 
				+/* Computing MIN */
			
 
				+	i__3 = i__ + 1;
			
 
				+	dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1]
			
 
				+, lda, &tau[i__]);
			
 
				+	if (i__ < *m) {
			
 
				+
			
 
				+/*           Apply H(i) to A(i+1:m,i:n) from the right */
			
 
				+
			
 
				+	    aii = a[i__ + i__ * a_dim1];
			
 
				+	    a[i__ + i__ * a_dim1] = 1.;
			
 
				+	    i__2 = *m - i__;
			
 
				+	    i__3 = *n - i__ + 1;
			
 
				+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
			
 
				+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
			
 
				+	    a[i__ + i__ * a_dim1] = aii;
			
 
				+	}
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGELQ2 */
			
 
				+
			
 
				+} /* dgelq2_ */
			
--- a/min-dgels/additional/dgelqf.c
+++ b/min-dgels/additional/dgelqf.c
@@ -0,0 +1,251 @@
 
				+/* dgelqf.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static integer c_n1 = -1;
			
 
				+static integer c__3 = 3;
			
 
				+static integer c__2 = 2;
			
 
				+
			
 
				+/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
			
 
				+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
			
 
				+    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, 
			
 
				+	     char *, char *, char *, integer *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal 
			
 
				+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
			
 
				+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
			
 
				+	    integer *, integer *);
			
 
				+    integer ldwork, lwkopt;
			
 
				+    logical lquery;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGELQF computes an LQ factorization of a real M-by-N matrix A: */
			
 
				+/*  A = L * Q. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          On entry, the M-by-N matrix A. */
			
 
				+/*          On exit, the elements on and below the diagonal of the array */
			
 
				+/*          contain the m-by-min(m,n) lower trapezoidal matrix L (L is */
			
 
				+/*          lower triangular if m <= n); the elements above the diagonal, */
			
 
				+/*          with the array TAU, represent the orthogonal matrix Q as a */
			
 
				+/*          product of elementary reflectors (see Further Details). */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N)) */
			
 
				+/*          The scalar factors of the elementary reflectors (see Further */
			
 
				+/*          Details). */
			
 
				+
			
 
				+/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
			
 
				+/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
			
 
				+
			
 
				+/*  LWORK   (input) INTEGER */
			
 
				+/*          The dimension of the array WORK.  LWORK >= max(1,M). */
			
 
				+/*          For optimum performance LWORK >= M*NB, where NB is the */
			
 
				+/*          optimal blocksize. */
			
 
				+
			
 
				+/*          If LWORK = -1, then a workspace query is assumed; the routine */
			
 
				+/*          only calculates the optimal size of the WORK array, returns */
			
 
				+/*          this value as the first entry of the WORK array, and no error */
			
 
				+/*          message related to LWORK is issued by XERBLA. */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0:  successful exit */
			
 
				+/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*  The matrix Q is represented as a product of elementary reflectors */
			
 
				+
			
 
				+/*     Q = H(k) . . . H(2) H(1), where k = min(m,n). */
			
 
				+
			
 
				+/*  Each H(i) has the form */
			
 
				+
			
 
				+/*     H(i) = I - tau * v * v' */
			
 
				+
			
 
				+/*  where tau is a real scalar, and v is a real vector with */
			
 
				+/*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
			
 
				+/*  and tau in TAU(i). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1);
			
 
				+    lwkopt = *m * nb;
			
 
				+    work[1] = (doublereal) lwkopt;
			
 
				+    lquery = *lwork == -1;
			
 
				+    if (*m < 0) {
			
 
				+	*info = -1;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -2;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	*info = -4;
			
 
				+    } else if (*lwork < max(1,*m) && ! lquery) {
			
 
				+	*info = -7;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DGELQF", &i__1);
			
 
				+	return 0;
			
 
				+    } else if (lquery) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    k = min(*m,*n);
			
 
				+    if (k == 0) {
			
 
				+	work[1] = 1.;
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    nbmin = 2;
			
 
				+    nx = 0;
			
 
				+    iws = *m;
			
 
				+    if (nb > 1 && nb < k) {
			
 
				+
			
 
				+/*        Determine when to cross over from blocked to unblocked code. */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1);
			
 
				+	nx = max(i__1,i__2);
			
 
				+	if (nx < k) {
			
 
				+
			
 
				+/*           Determine if workspace is large enough for blocked code. */
			
 
				+
			
 
				+	    ldwork = *m;
			
 
				+	    iws = ldwork * nb;
			
 
				+	    if (*lwork < iws) {
			
 
				+
			
 
				+/*              Not enough workspace to use optimal NB:  reduce NB and */
			
 
				+/*              determine the minimum value of NB. */
			
 
				+
			
 
				+		nb = *lwork / ldwork;
			
 
				+/* Computing MAX */
			
 
				+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
			
 
				+			c_n1);
			
 
				+		nbmin = max(i__1,i__2);
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    if (nb >= nbmin && nb < k && nx < k) {
			
 
				+
			
 
				+/*        Use blocked code initially */
			
 
				+
			
 
				+	i__1 = k - nx;
			
 
				+	i__2 = nb;
			
 
				+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+/* Computing MIN */
			
 
				+	    i__3 = k - i__ + 1;
			
 
				+	    ib = min(i__3,nb);
			
 
				+
			
 
				+/*           Compute the LQ factorization of the current block */
			
 
				+/*           A(i:i+ib-1,i:n) */
			
 
				+
			
 
				+	    i__3 = *n - i__ + 1;
			
 
				+	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
			
 
				+		    1], &iinfo);
			
 
				+	    if (i__ + ib <= *m) {
			
 
				+
			
 
				+/*              Form the triangular factor of the block reflector */
			
 
				+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
			
 
				+
			
 
				+		i__3 = *n - i__ + 1;
			
 
				+		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * 
			
 
				+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
			
 
				+
			
 
				+/*              Apply H to A(i+ib:m,i:n) from the right */
			
 
				+
			
 
				+		i__3 = *m - i__ - ib + 1;
			
 
				+		i__4 = *n - i__ + 1;
			
 
				+		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
			
 
				+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
			
 
				+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 
			
 
				+			1], &ldwork);
			
 
				+	    }
			
 
				+/* L10: */
			
 
				+	}
			
 
				+    } else {
			
 
				+	i__ = 1;
			
 
				+    }
			
 
				+
			
 
				+/*     Use unblocked code to factor the last or only block. */
			
 
				+
			
 
				+    if (i__ <= k) {
			
 
				+	i__2 = *m - i__ + 1;
			
 
				+	i__1 = *n - i__ + 1;
			
 
				+	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
			
 
				+, &iinfo);
			
 
				+    }
			
 
				+
			
 
				+    work[1] = (doublereal) iws;
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGELQF */
			
 
				+
			
 
				+} /* dgelqf_ */
			
--- a/min-dgels/additional/dgels.c
+++ b/min-dgels/additional/dgels.c
@@ -0,0 +1,515 @@
 
				+/* dgels.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static integer c_n1 = -1;
			
 
				+static doublereal c_b33 = 0.;
			
 
				+static integer c__0 = 0;
			
 
				+
			
 
				+/* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer *
			
 
				+	nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, 
			
 
				+	doublereal *work, integer *lwork, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, nb, mn;
			
 
				+    doublereal anrm, bnrm;
			
 
				+    integer brow;
			
 
				+    logical tpsd;
			
 
				+    integer iascl, ibscl;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer wsize;
			
 
				+    doublereal rwork[1];
			
 
				+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
			
 
				+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
			
 
				+	    integer *, doublereal *, integer *, doublereal *);
			
 
				+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, doublereal *, integer *, integer *), 
			
 
				+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
			
 
				+	    integer *, integer *, doublereal *, integer *, integer *),
			
 
				+	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
			
 
				+	    doublereal *, doublereal *, integer *, integer *), dlaset_(char *, 
			
 
				+	     integer *, integer *, doublereal *, doublereal *, doublereal *, 
			
 
				+	    integer *), xerbla_(char *, integer *);
			
 
				+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
			
 
				+	    integer *, integer *);
			
 
				+    integer scllen;
			
 
				+    doublereal bignum;
			
 
				+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, 
			
 
				+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *, integer *), 
			
 
				+	    dormqr_(char *, char *, integer *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *, integer *, integer *);
			
 
				+    doublereal smlnum;
			
 
				+    logical lquery;
			
 
				+    extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, 
			
 
				+	    integer *, doublereal *, integer *, doublereal *, integer *, 
			
 
				+	    integer *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK driver routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGELS solves overdetermined or underdetermined real linear systems */
			
 
				+/*  involving an M-by-N matrix A, or its transpose, using a QR or LQ */
			
 
				+/*  factorization of A.  It is assumed that A has full rank. */
			
 
				+
			
 
				+/*  The following options are provided: */
			
 
				+
			
 
				+/*  1. If TRANS = 'N' and m >= n:  find the least squares solution of */
			
 
				+/*     an overdetermined system, i.e., solve the least squares problem */
			
 
				+/*                  minimize || B - A*X ||. */
			
 
				+
			
 
				+/*  2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
			
 
				+/*     an underdetermined system A * X = B. */
			
 
				+
			
 
				+/*  3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
			
 
				+/*     an undetermined system A**T * X = B. */
			
 
				+
			
 
				+/*  4. If TRANS = 'T' and m < n:  find the least squares solution of */
			
 
				+/*     an overdetermined system, i.e., solve the least squares problem */
			
 
				+/*                  minimize || B - A**T * X ||. */
			
 
				+
			
 
				+/*  Several right hand side vectors b and solution vectors x can be */
			
 
				+/*  handled in a single call; they are stored as the columns of the */
			
 
				+/*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
			
 
				+/*  matrix X. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          = 'N': the linear system involves A; */
			
 
				+/*          = 'T': the linear system involves A**T. */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  NRHS    (input) INTEGER */
			
 
				+/*          The number of right hand sides, i.e., the number of */
			
 
				+/*          columns of the matrices B and X. NRHS >=0. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          On entry, the M-by-N matrix A. */
			
 
				+/*          On exit, */
			
 
				+/*            if M >= N, A is overwritten by details of its QR */
			
 
				+/*                       factorization as returned by DGEQRF; */
			
 
				+/*            if M <  N, A is overwritten by details of its LQ */
			
 
				+/*                       factorization as returned by DGELQF. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */
			
 
				+/*          On entry, the matrix B of right hand side vectors, stored */
			
 
				+/*          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
			
 
				+/*          if TRANS = 'T'. */
			
 
				+/*          On exit, if INFO = 0, B is overwritten by the solution */
			
 
				+/*          vectors, stored columnwise: */
			
 
				+/*          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
			
 
				+/*          squares solution vectors; the residual sum of squares for the */
			
 
				+/*          solution in each column is given by the sum of squares of */
			
 
				+/*          elements N+1 to M in that column; */
			
 
				+/*          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
			
 
				+/*          minimum norm solution vectors; */
			
 
				+/*          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
			
 
				+/*          minimum norm solution vectors; */
			
 
				+/*          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
			
 
				+/*          least squares solution vectors; the residual sum of squares */
			
 
				+/*          for the solution in each column is given by the sum of */
			
 
				+/*          squares of elements M+1 to N in that column. */
			
 
				+
			
 
				+/*  LDB     (input) INTEGER */
			
 
				+/*          The leading dimension of the array B. LDB >= MAX(1,M,N). */
			
 
				+
			
 
				+/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
			
 
				+/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
			
 
				+
			
 
				+/*  LWORK   (input) INTEGER */
			
 
				+/*          The dimension of the array WORK. */
			
 
				+/*          LWORK >= max( 1, MN + max( MN, NRHS ) ). */
			
 
				+/*          For optimal performance, */
			
 
				+/*          LWORK >= max( 1, MN + max( MN, NRHS )*NB ). */
			
 
				+/*          where MN = min(M,N) and NB is the optimum block size. */
			
 
				+
			
 
				+/*          If LWORK = -1, then a workspace query is assumed; the routine */
			
 
				+/*          only calculates the optimal size of the WORK array, returns */
			
 
				+/*          this value as the first entry of the WORK array, and no error */
			
 
				+/*          message related to LWORK is issued by XERBLA. */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0:  successful exit */
			
 
				+/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
			
 
				+/*          > 0:  if INFO =  i, the i-th diagonal element of the */
			
 
				+/*                triangular factor of A is zero, so that A does not have */
			
 
				+/*                full rank; the least squares solution could not be */
			
 
				+/*                computed. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Arrays .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    b_dim1 = *ldb;
			
 
				+    b_offset = 1 + b_dim1;
			
 
				+    b -= b_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    mn = min(*m,*n);
			
 
				+    lquery = *lwork == -1;
			
 
				+    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
			
 
				+	*info = -1;
			
 
				+    } else if (*m < 0) {
			
 
				+	*info = -2;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -3;
			
 
				+    } else if (*nrhs < 0) {
			
 
				+	*info = -4;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	*info = -6;
			
 
				+    } else /* if(complicated condition) */ {
			
 
				+/* Computing MAX */
			
 
				+	i__1 = max(1,*m);
			
 
				+	if (*ldb < max(i__1,*n)) {
			
 
				+	    *info = -8;
			
 
				+	} else /* if(complicated condition) */ {
			
 
				+/* Computing MAX */
			
 
				+	    i__1 = 1, i__2 = mn + max(mn,*nrhs);
			
 
				+	    if (*lwork < max(i__1,i__2) && ! lquery) {
			
 
				+		*info = -10;
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+/*     Figure out optimal block size */
			
 
				+
			
 
				+    if (*info == 0 || *info == -10) {
			
 
				+
			
 
				+	tpsd = TRUE_;
			
 
				+	if (lsame_(trans, "N")) {
			
 
				+	    tpsd = FALSE_;
			
 
				+	}
			
 
				+
			
 
				+	if (*m >= *n) {
			
 
				+	    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1);
			
 
				+	    if (tpsd) {
			
 
				+/* Computing MAX */
			
 
				+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, &
			
 
				+			c_n1);
			
 
				+		nb = max(i__1,i__2);
			
 
				+	    } else {
			
 
				+/* Computing MAX */
			
 
				+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, &
			
 
				+			c_n1);
			
 
				+		nb = max(i__1,i__2);
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1);
			
 
				+	    if (tpsd) {
			
 
				+/* Computing MAX */
			
 
				+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, &
			
 
				+			c_n1);
			
 
				+		nb = max(i__1,i__2);
			
 
				+	    } else {
			
 
				+/* Computing MAX */
			
 
				+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, &
			
 
				+			c_n1);
			
 
				+		nb = max(i__1,i__2);
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+	i__1 = 1, i__2 = mn + max(mn,*nrhs) * nb;
			
 
				+	wsize = max(i__1,i__2);
			
 
				+	work[1] = (doublereal) wsize;
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DGELS ", &i__1);
			
 
				+	return 0;
			
 
				+    } else if (lquery) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+/* Computing MIN */
			
 
				+    i__1 = min(*m,*n);
			
 
				+    if (min(i__1,*nrhs) == 0) {
			
 
				+	i__1 = max(*m,*n);
			
 
				+	dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Get machine parameters */
			
 
				+
			
 
				+    smlnum = dlamch_("S") / dlamch_("P");
			
 
				+    bignum = 1. / smlnum;
			
 
				+    dlabad_(&smlnum, &bignum);
			
 
				+
			
 
				+/*     Scale A, B if max element outside range [SMLNUM,BIGNUM] */
			
 
				+
			
 
				+    anrm = dlange_("M", m, n, &a[a_offset], lda, rwork);
			
 
				+    iascl = 0;
			
 
				+    if (anrm > 0. && anrm < smlnum) {
			
 
				+
			
 
				+/*        Scale matrix norm up to SMLNUM */
			
 
				+
			
 
				+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
			
 
				+		info);
			
 
				+	iascl = 1;
			
 
				+    } else if (anrm > bignum) {
			
 
				+
			
 
				+/*        Scale matrix norm down to BIGNUM */
			
 
				+
			
 
				+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
			
 
				+		info);
			
 
				+	iascl = 2;
			
 
				+    } else if (anrm == 0.) {
			
 
				+
			
 
				+/*        Matrix all zero. Return zero solution. */
			
 
				+
			
 
				+	i__1 = max(*m,*n);
			
 
				+	dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
			
 
				+	goto L50;
			
 
				+    }
			
 
				+
			
 
				+    brow = *m;
			
 
				+    if (tpsd) {
			
 
				+	brow = *n;
			
 
				+    }
			
 
				+    bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork);
			
 
				+    ibscl = 0;
			
 
				+    if (bnrm > 0. && bnrm < smlnum) {
			
 
				+
			
 
				+/*        Scale matrix norm up to SMLNUM */
			
 
				+
			
 
				+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
			
 
				+		ldb, info);
			
 
				+	ibscl = 1;
			
 
				+    } else if (bnrm > bignum) {
			
 
				+
			
 
				+/*        Scale matrix norm down to BIGNUM */
			
 
				+
			
 
				+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
			
 
				+		ldb, info);
			
 
				+	ibscl = 2;
			
 
				+    }
			
 
				+
			
 
				+    if (*m >= *n) {
			
 
				+
			
 
				+/*        compute QR factorization of A */
			
 
				+
			
 
				+	i__1 = *lwork - mn;
			
 
				+	dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
			
 
				+		;
			
 
				+
			
 
				+/*        workspace at least N, optimally N*NB */
			
 
				+
			
 
				+	if (! tpsd) {
			
 
				+
			
 
				+/*           Least-Squares Problem min || A * X - B || */
			
 
				+
			
 
				+/*           B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */
			
 
				+
			
 
				+	    i__1 = *lwork - mn;
			
 
				+	    dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[
			
 
				+		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
			
 
				+
			
 
				+/*           workspace at least NRHS, optimally NRHS*NB */
			
 
				+
			
 
				+/*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
			
 
				+
			
 
				+	    dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset]
			
 
				+, lda, &b[b_offset], ldb, info);
			
 
				+
			
 
				+	    if (*info > 0) {
			
 
				+		return 0;
			
 
				+	    }
			
 
				+
			
 
				+	    scllen = *n;
			
 
				+
			
 
				+	} else {
			
 
				+
			
 
				+/*           Overdetermined system of equations A' * X = B */
			
 
				+
			
 
				+/*           B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) */
			
 
				+
			
 
				+	    dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], 
			
 
				+		    lda, &b[b_offset], ldb, info);
			
 
				+
			
 
				+	    if (*info > 0) {
			
 
				+		return 0;
			
 
				+	    }
			
 
				+
			
 
				+/*           B(N+1:M,1:NRHS) = ZERO */
			
 
				+
			
 
				+	    i__1 = *nrhs;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = *n + 1; i__ <= i__2; ++i__) {
			
 
				+		    b[i__ + j * b_dim1] = 0.;
			
 
				+/* L10: */
			
 
				+		}
			
 
				+/* L20: */
			
 
				+	    }
			
 
				+
			
 
				+/*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
			
 
				+
			
 
				+	    i__1 = *lwork - mn;
			
 
				+	    dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, &
			
 
				+		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
			
 
				+
			
 
				+/*           workspace at least NRHS, optimally NRHS*NB */
			
 
				+
			
 
				+	    scllen = *m;
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+    } else {
			
 
				+
			
 
				+/*        Compute LQ factorization of A */
			
 
				+
			
 
				+	i__1 = *lwork - mn;
			
 
				+	dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
			
 
				+		;
			
 
				+
			
 
				+/*        workspace at least M, optimally M*NB. */
			
 
				+
			
 
				+	if (! tpsd) {
			
 
				+
			
 
				+/*           underdetermined system of equations A * X = B */
			
 
				+
			
 
				+/*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
			
 
				+
			
 
				+	    dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset]
			
 
				+, lda, &b[b_offset], ldb, info);
			
 
				+
			
 
				+	    if (*info > 0) {
			
 
				+		return 0;
			
 
				+	    }
			
 
				+
			
 
				+/*           B(M+1:N,1:NRHS) = 0 */
			
 
				+
			
 
				+	    i__1 = *nrhs;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		i__2 = *n;
			
 
				+		for (i__ = *m + 1; i__ <= i__2; ++i__) {
			
 
				+		    b[i__ + j * b_dim1] = 0.;
			
 
				+/* L30: */
			
 
				+		}
			
 
				+/* L40: */
			
 
				+	    }
			
 
				+
			
 
				+/*           B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) */
			
 
				+
			
 
				+	    i__1 = *lwork - mn;
			
 
				+	    dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[
			
 
				+		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
			
 
				+
			
 
				+/*           workspace at least NRHS, optimally NRHS*NB */
			
 
				+
			
 
				+	    scllen = *n;
			
 
				+
			
 
				+	} else {
			
 
				+
			
 
				+/*           overdetermined system min || A' * X - B || */
			
 
				+
			
 
				+/*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
			
 
				+
			
 
				+	    i__1 = *lwork - mn;
			
 
				+	    dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, &
			
 
				+		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
			
 
				+
			
 
				+/*           workspace at least NRHS, optimally NRHS*NB */
			
 
				+
			
 
				+/*           B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) */
			
 
				+
			
 
				+	    dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
			
 
				+		    lda, &b[b_offset], ldb, info);
			
 
				+
			
 
				+	    if (*info > 0) {
			
 
				+		return 0;
			
 
				+	    }
			
 
				+
			
 
				+	    scllen = *m;
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+/*     Undo scaling */
			
 
				+
			
 
				+    if (iascl == 1) {
			
 
				+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
			
 
				+, ldb, info);
			
 
				+    } else if (iascl == 2) {
			
 
				+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
			
 
				+, ldb, info);
			
 
				+    }
			
 
				+    if (ibscl == 1) {
			
 
				+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
			
 
				+, ldb, info);
			
 
				+    } else if (ibscl == 2) {
			
 
				+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
			
 
				+, ldb, info);
			
 
				+    }
			
 
				+
			
 
				+L50:
			
 
				+    work[1] = (doublereal) wsize;
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGELS */
			
 
				+
			
 
				+} /* dgels_ */
			
--- a/min-dgels/additional/dgemm.c
+++ b/min-dgels/additional/dgemm.c
@@ -0,0 +1,389 @@
 
				+/* dgemm.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
			
 
				+	n, integer *k, doublereal *alpha, doublereal *a, integer *lda, 
			
 
				+	doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, 
			
 
				+	integer *ldc)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
			
 
				+	    i__3;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, l, info;
			
 
				+    logical nota, notb;
			
 
				+    doublereal temp;
			
 
				+    integer ncola;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer nrowa, nrowb;
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGEMM  performs one of the matrix-matrix operations */
			
 
				+
			
 
				+/*     C := alpha*op( A )*op( B ) + beta*C, */
			
 
				+
			
 
				+/*  where  op( X ) is one of */
			
 
				+
			
 
				+/*     op( X ) = X   or   op( X ) = X', */
			
 
				+
			
 
				+/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
			
 
				+/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========== */
			
 
				+
			
 
				+/*  TRANSA - CHARACTER*1. */
			
 
				+/*           On entry, TRANSA specifies the form of op( A ) to be used in */
			
 
				+/*           the matrix multiplication as follows: */
			
 
				+
			
 
				+/*              TRANSA = 'N' or 'n',  op( A ) = A. */
			
 
				+
			
 
				+/*              TRANSA = 'T' or 't',  op( A ) = A'. */
			
 
				+
			
 
				+/*              TRANSA = 'C' or 'c',  op( A ) = A'. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  TRANSB - CHARACTER*1. */
			
 
				+/*           On entry, TRANSB specifies the form of op( B ) to be used in */
			
 
				+/*           the matrix multiplication as follows: */
			
 
				+
			
 
				+/*              TRANSB = 'N' or 'n',  op( B ) = B. */
			
 
				+
			
 
				+/*              TRANSB = 'T' or 't',  op( B ) = B'. */
			
 
				+
			
 
				+/*              TRANSB = 'C' or 'c',  op( B ) = B'. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  M      - INTEGER. */
			
 
				+/*           On entry,  M  specifies  the number  of rows  of the  matrix */
			
 
				+/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  N      - INTEGER. */
			
 
				+/*           On entry,  N  specifies the number  of columns of the matrix */
			
 
				+/*           op( B ) and the number of columns of the matrix C. N must be */
			
 
				+/*           at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  K      - INTEGER. */
			
 
				+/*           On entry,  K  specifies  the number of columns of the matrix */
			
 
				+/*           op( A ) and the number of rows of the matrix op( B ). K must */
			
 
				+/*           be at least  zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  ALPHA  - DOUBLE PRECISION. */
			
 
				+/*           On entry, ALPHA specifies the scalar alpha. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
			
 
				+/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
			
 
				+/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
			
 
				+/*           part of the array  A  must contain the matrix  A,  otherwise */
			
 
				+/*           the leading  k by m  part of the array  A  must contain  the */
			
 
				+/*           matrix A. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  LDA    - INTEGER. */
			
 
				+/*           On entry, LDA specifies the first dimension of A as declared */
			
 
				+/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
			
 
				+/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
			
 
				+/*           least  max( 1, k ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
			
 
				+/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
			
 
				+/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
			
 
				+/*           part of the array  B  must contain the matrix  B,  otherwise */
			
 
				+/*           the leading  n by k  part of the array  B  must contain  the */
			
 
				+/*           matrix B. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  LDB    - INTEGER. */
			
 
				+/*           On entry, LDB specifies the first dimension of B as declared */
			
 
				+/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
			
 
				+/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
			
 
				+/*           least  max( 1, n ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  BETA   - DOUBLE PRECISION. */
			
 
				+/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
			
 
				+/*           supplied as zero then C need not be set on input. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
			
 
				+/*           Before entry, the leading  m by n  part of the array  C must */
			
 
				+/*           contain the matrix  C,  except when  beta  is zero, in which */
			
 
				+/*           case C need not be set on entry. */
			
 
				+/*           On exit, the array  C  is overwritten by the  m by n  matrix */
			
 
				+/*           ( alpha*op( A )*op( B ) + beta*C ). */
			
 
				+
			
 
				+/*  LDC    - INTEGER. */
			
 
				+/*           On entry, LDC specifies the first dimension of C as declared */
			
 
				+/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
			
 
				+/*           max( 1, m ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+
			
 
				+/*  Level 3 Blas routine. */
			
 
				+
			
 
				+/*  -- Written on 8-February-1989. */
			
 
				+/*     Jack Dongarra, Argonne National Laboratory. */
			
 
				+/*     Iain Duff, AERE Harwell. */
			
 
				+/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
			
 
				+/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
			
 
				+
			
 
				+
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
			
 
				+/*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows */
			
 
				+/*     and  columns of  A  and the  number of  rows  of  B  respectively. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    b_dim1 = *ldb;
			
 
				+    b_offset = 1 + b_dim1;
			
 
				+    b -= b_offset;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    nota = lsame_(transa, "N");
			
 
				+    notb = lsame_(transb, "N");
			
 
				+    if (nota) {
			
 
				+	nrowa = *m;
			
 
				+	ncola = *k;
			
 
				+    } else {
			
 
				+	nrowa = *k;
			
 
				+	ncola = *m;
			
 
				+    }
			
 
				+    if (notb) {
			
 
				+	nrowb = *k;
			
 
				+    } else {
			
 
				+	nrowb = *n;
			
 
				+    }
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    info = 0;
			
 
				+    if (! nota && ! lsame_(transa, "C") && ! lsame_(
			
 
				+	    transa, "T")) {
			
 
				+	info = 1;
			
 
				+    } else if (! notb && ! lsame_(transb, "C") && ! 
			
 
				+	    lsame_(transb, "T")) {
			
 
				+	info = 2;
			
 
				+    } else if (*m < 0) {
			
 
				+	info = 3;
			
 
				+    } else if (*n < 0) {
			
 
				+	info = 4;
			
 
				+    } else if (*k < 0) {
			
 
				+	info = 5;
			
 
				+    } else if (*lda < max(1,nrowa)) {
			
 
				+	info = 8;
			
 
				+    } else if (*ldb < max(1,nrowb)) {
			
 
				+	info = 10;
			
 
				+    } else if (*ldc < max(1,*m)) {
			
 
				+	info = 13;
			
 
				+    }
			
 
				+    if (info != 0) {
			
 
				+	xerbla_("DGEMM ", &info);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible. */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     And if  alpha.eq.zero. */
			
 
				+
			
 
				+    if (*alpha == 0.) {
			
 
				+	if (*beta == 0.) {
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    c__[i__ + j * c_dim1] = 0.;
			
 
				+/* L10: */
			
 
				+		}
			
 
				+/* L20: */
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
			
 
				+/* L30: */
			
 
				+		}
			
 
				+/* L40: */
			
 
				+	    }
			
 
				+	}
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Start the operations. */
			
 
				+
			
 
				+    if (notb) {
			
 
				+	if (nota) {
			
 
				+
			
 
				+/*           Form  C := alpha*A*B + beta*C. */
			
 
				+
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		if (*beta == 0.) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + j * c_dim1] = 0.;
			
 
				+/* L50: */
			
 
				+		    }
			
 
				+		} else if (*beta != 1.) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
			
 
				+/* L60: */
			
 
				+		    }
			
 
				+		}
			
 
				+		i__2 = *k;
			
 
				+		for (l = 1; l <= i__2; ++l) {
			
 
				+		    if (b[l + j * b_dim1] != 0.) {
			
 
				+			temp = *alpha * b[l + j * b_dim1];
			
 
				+			i__3 = *m;
			
 
				+			for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+			    c__[i__ + j * c_dim1] += temp * a[i__ + l * 
			
 
				+				    a_dim1];
			
 
				+/* L70: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L80: */
			
 
				+		}
			
 
				+/* L90: */
			
 
				+	    }
			
 
				+	} else {
			
 
				+
			
 
				+/*           Form  C := alpha*A'*B + beta*C */
			
 
				+
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    temp = 0.;
			
 
				+		    i__3 = *k;
			
 
				+		    for (l = 1; l <= i__3; ++l) {
			
 
				+			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
			
 
				+/* L100: */
			
 
				+		    }
			
 
				+		    if (*beta == 0.) {
			
 
				+			c__[i__ + j * c_dim1] = *alpha * temp;
			
 
				+		    } else {
			
 
				+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
			
 
				+				i__ + j * c_dim1];
			
 
				+		    }
			
 
				+/* L110: */
			
 
				+		}
			
 
				+/* L120: */
			
 
				+	    }
			
 
				+	}
			
 
				+    } else {
			
 
				+	if (nota) {
			
 
				+
			
 
				+/*           Form  C := alpha*A*B' + beta*C */
			
 
				+
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		if (*beta == 0.) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + j * c_dim1] = 0.;
			
 
				+/* L130: */
			
 
				+		    }
			
 
				+		} else if (*beta != 1.) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
			
 
				+/* L140: */
			
 
				+		    }
			
 
				+		}
			
 
				+		i__2 = *k;
			
 
				+		for (l = 1; l <= i__2; ++l) {
			
 
				+		    if (b[j + l * b_dim1] != 0.) {
			
 
				+			temp = *alpha * b[j + l * b_dim1];
			
 
				+			i__3 = *m;
			
 
				+			for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+			    c__[i__ + j * c_dim1] += temp * a[i__ + l * 
			
 
				+				    a_dim1];
			
 
				+/* L150: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L160: */
			
 
				+		}
			
 
				+/* L170: */
			
 
				+	    }
			
 
				+	} else {
			
 
				+
			
 
				+/*           Form  C := alpha*A'*B' + beta*C */
			
 
				+
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    temp = 0.;
			
 
				+		    i__3 = *k;
			
 
				+		    for (l = 1; l <= i__3; ++l) {
			
 
				+			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
			
 
				+/* L180: */
			
 
				+		    }
			
 
				+		    if (*beta == 0.) {
			
 
				+			c__[i__ + j * c_dim1] = *alpha * temp;
			
 
				+		    } else {
			
 
				+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
			
 
				+				i__ + j * c_dim1];
			
 
				+		    }
			
 
				+/* L190: */
			
 
				+		}
			
 
				+/* L200: */
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGEMM . */
			
 
				+
			
 
				+} /* dgemm_ */
			
--- a/min-dgels/additional/dgemv.c
+++ b/min-dgels/additional/dgemv.c
@@ -0,0 +1,312 @@
 
				+/* dgemv.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
			
 
				+	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
			
 
				+	doublereal *beta, doublereal *y, integer *incy)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, ix, iy, jx, jy, kx, ky, info;
			
 
				+    doublereal temp;
			
 
				+    integer lenx, leny;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGEMV  performs one of the matrix-vector operations */
			
 
				+
			
 
				+/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
			
 
				+
			
 
				+/*  where alpha and beta are scalars, x and y are vectors and A is an */
			
 
				+/*  m by n matrix. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========== */
			
 
				+
			
 
				+/*  TRANS  - CHARACTER*1. */
			
 
				+/*           On entry, TRANS specifies the operation to be performed as */
			
 
				+/*           follows: */
			
 
				+
			
 
				+/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
			
 
				+
			
 
				+/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
			
 
				+
			
 
				+/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  M      - INTEGER. */
			
 
				+/*           On entry, M specifies the number of rows of the matrix A. */
			
 
				+/*           M must be at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  N      - INTEGER. */
			
 
				+/*           On entry, N specifies the number of columns of the matrix A. */
			
 
				+/*           N must be at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  ALPHA  - DOUBLE PRECISION. */
			
 
				+/*           On entry, ALPHA specifies the scalar alpha. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
			
 
				+/*           Before entry, the leading m by n part of the array A must */
			
 
				+/*           contain the matrix of coefficients. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  LDA    - INTEGER. */
			
 
				+/*           On entry, LDA specifies the first dimension of A as declared */
			
 
				+/*           in the calling (sub) program. LDA must be at least */
			
 
				+/*           max( 1, m ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  X      - DOUBLE PRECISION array of DIMENSION at least */
			
 
				+/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
			
 
				+/*           and at least */
			
 
				+/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
			
 
				+/*           Before entry, the incremented array X must contain the */
			
 
				+/*           vector x. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  INCX   - INTEGER. */
			
 
				+/*           On entry, INCX specifies the increment for the elements of */
			
 
				+/*           X. INCX must not be zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  BETA   - DOUBLE PRECISION. */
			
 
				+/*           On entry, BETA specifies the scalar beta. When BETA is */
			
 
				+/*           supplied as zero then Y need not be set on input. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
			
 
				+/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
			
 
				+/*           and at least */
			
 
				+/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
			
 
				+/*           Before entry with BETA non-zero, the incremented array Y */
			
 
				+/*           must contain the vector y. On exit, Y is overwritten by the */
			
 
				+/*           updated vector y. */
			
 
				+
			
 
				+/*  INCY   - INTEGER. */
			
 
				+/*           On entry, INCY specifies the increment for the elements of */
			
 
				+/*           Y. INCY must not be zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+
			
 
				+/*  Level 2 Blas routine. */
			
 
				+
			
 
				+/*  -- Written on 22-October-1986. */
			
 
				+/*     Jack Dongarra, Argonne National Lab. */
			
 
				+/*     Jeremy Du Croz, Nag Central Office. */
			
 
				+/*     Sven Hammarling, Nag Central Office. */
			
 
				+/*     Richard Hanson, Sandia National Labs. */
			
 
				+
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --x;
			
 
				+    --y;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    info = 0;
			
 
				+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
			
 
				+	    ) {
			
 
				+	info = 1;
			
 
				+    } else if (*m < 0) {
			
 
				+	info = 2;
			
 
				+    } else if (*n < 0) {
			
 
				+	info = 3;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	info = 6;
			
 
				+    } else if (*incx == 0) {
			
 
				+	info = 8;
			
 
				+    } else if (*incy == 0) {
			
 
				+	info = 11;
			
 
				+    }
			
 
				+    if (info != 0) {
			
 
				+	xerbla_("DGEMV ", &info);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible. */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
			
 
				+/*     up the start points in  X  and  Y. */
			
 
				+
			
 
				+    if (lsame_(trans, "N")) {
			
 
				+	lenx = *n;
			
 
				+	leny = *m;
			
 
				+    } else {
			
 
				+	lenx = *m;
			
 
				+	leny = *n;
			
 
				+    }
			
 
				+    if (*incx > 0) {
			
 
				+	kx = 1;
			
 
				+    } else {
			
 
				+	kx = 1 - (lenx - 1) * *incx;
			
 
				+    }
			
 
				+    if (*incy > 0) {
			
 
				+	ky = 1;
			
 
				+    } else {
			
 
				+	ky = 1 - (leny - 1) * *incy;
			
 
				+    }
			
 
				+
			
 
				+/*     Start the operations. In this version the elements of A are */
			
 
				+/*     accessed sequentially with one pass through A. */
			
 
				+
			
 
				+/*     First form  y := beta*y. */
			
 
				+
			
 
				+    if (*beta != 1.) {
			
 
				+	if (*incy == 1) {
			
 
				+	    if (*beta == 0.) {
			
 
				+		i__1 = leny;
			
 
				+		for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+		    y[i__] = 0.;
			
 
				+/* L10: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = leny;
			
 
				+		for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+		    y[i__] = *beta * y[i__];
			
 
				+/* L20: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    iy = ky;
			
 
				+	    if (*beta == 0.) {
			
 
				+		i__1 = leny;
			
 
				+		for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+		    y[iy] = 0.;
			
 
				+		    iy += *incy;
			
 
				+/* L30: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = leny;
			
 
				+		for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+		    y[iy] = *beta * y[iy];
			
 
				+		    iy += *incy;
			
 
				+/* L40: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+    if (*alpha == 0.) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+    if (lsame_(trans, "N")) {
			
 
				+
			
 
				+/*        Form  y := alpha*A*x + y. */
			
 
				+
			
 
				+	jx = kx;
			
 
				+	if (*incy == 1) {
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		if (x[jx] != 0.) {
			
 
				+		    temp = *alpha * x[jx];
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			y[i__] += temp * a[i__ + j * a_dim1];
			
 
				+/* L50: */
			
 
				+		    }
			
 
				+		}
			
 
				+		jx += *incx;
			
 
				+/* L60: */
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		if (x[jx] != 0.) {
			
 
				+		    temp = *alpha * x[jx];
			
 
				+		    iy = ky;
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			y[iy] += temp * a[i__ + j * a_dim1];
			
 
				+			iy += *incy;
			
 
				+/* L70: */
			
 
				+		    }
			
 
				+		}
			
 
				+		jx += *incx;
			
 
				+/* L80: */
			
 
				+	    }
			
 
				+	}
			
 
				+    } else {
			
 
				+
			
 
				+/*        Form  y := alpha*A'*x + y. */
			
 
				+
			
 
				+	jy = ky;
			
 
				+	if (*incx == 1) {
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		temp = 0.;
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    temp += a[i__ + j * a_dim1] * x[i__];
			
 
				+/* L90: */
			
 
				+		}
			
 
				+		y[jy] += *alpha * temp;
			
 
				+		jy += *incy;
			
 
				+/* L100: */
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    i__1 = *n;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		temp = 0.;
			
 
				+		ix = kx;
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    temp += a[i__ + j * a_dim1] * x[ix];
			
 
				+		    ix += *incx;
			
 
				+/* L110: */
			
 
				+		}
			
 
				+		y[jy] += *alpha * temp;
			
 
				+		jy += *incy;
			
 
				+/* L120: */
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGEMV . */
			
 
				+
			
 
				+} /* dgemv_ */
			
--- a/min-dgels/additional/dgeqr2.c
+++ b/min-dgels/additional/dgeqr2.c
@@ -0,0 +1,161 @@
 
				+/* dgeqr2.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+
			
 
				+/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
			
 
				+	lda, doublereal *tau, doublereal *work, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2, i__3;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, k;
			
 
				+    doublereal aii;
			
 
				+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *), dlarfp_(integer *, doublereal *, 
			
 
				+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGEQR2 computes a QR factorization of a real m by n matrix A: */
			
 
				+/*  A = Q * R. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          On entry, the m by n matrix A. */
			
 
				+/*          On exit, the elements on and above the diagonal of the array */
			
 
				+/*          contain the min(m,n) by n upper trapezoidal matrix R (R is */
			
 
				+/*          upper triangular if m >= n); the elements below the diagonal, */
			
 
				+/*          with the array TAU, represent the orthogonal matrix Q as a */
			
 
				+/*          product of elementary reflectors (see Further Details). */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N)) */
			
 
				+/*          The scalar factors of the elementary reflectors (see Further */
			
 
				+/*          Details). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension (N) */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0: successful exit */
			
 
				+/*          < 0: if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*  The matrix Q is represented as a product of elementary reflectors */
			
 
				+
			
 
				+/*     Q = H(1) H(2) . . . H(k), where k = min(m,n). */
			
 
				+
			
 
				+/*  Each H(i) has the form */
			
 
				+
			
 
				+/*     H(i) = I - tau * v * v' */
			
 
				+
			
 
				+/*  where tau is a real scalar, and v is a real vector with */
			
 
				+/*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
			
 
				+/*  and tau in TAU(i). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    if (*m < 0) {
			
 
				+	*info = -1;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -2;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	*info = -4;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DGEQR2", &i__1);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    k = min(*m,*n);
			
 
				+
			
 
				+    i__1 = k;
			
 
				+    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+
			
 
				+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
			
 
				+
			
 
				+	i__2 = *m - i__ + 1;
			
 
				+/* Computing MIN */
			
 
				+	i__3 = i__ + 1;
			
 
				+	dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1]
			
 
				+, &c__1, &tau[i__]);
			
 
				+	if (i__ < *n) {
			
 
				+
			
 
				+/*           Apply H(i) to A(i:m,i+1:n) from the left */
			
 
				+
			
 
				+	    aii = a[i__ + i__ * a_dim1];
			
 
				+	    a[i__ + i__ * a_dim1] = 1.;
			
 
				+	    i__2 = *m - i__ + 1;
			
 
				+	    i__3 = *n - i__;
			
 
				+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
			
 
				+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
			
 
				+	    a[i__ + i__ * a_dim1] = aii;
			
 
				+	}
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGEQR2 */
			
 
				+
			
 
				+} /* dgeqr2_ */
			
--- a/min-dgels/additional/dgeqrf.c
+++ b/min-dgels/additional/dgeqrf.c
@@ -0,0 +1,252 @@
 
				+/* dgeqrf.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static integer c_n1 = -1;
			
 
				+static integer c__3 = 3;
			
 
				+static integer c__2 = 2;
			
 
				+
			
 
				+/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
			
 
				+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
			
 
				+    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, 
			
 
				+	     char *, char *, char *, integer *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal 
			
 
				+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
			
 
				+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
			
 
				+	    integer *, integer *);
			
 
				+    integer ldwork, lwkopt;
			
 
				+    logical lquery;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGEQRF computes a QR factorization of a real M-by-N matrix A: */
			
 
				+/*  A = Q * R. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          On entry, the M-by-N matrix A. */
			
 
				+/*          On exit, the elements on and above the diagonal of the array */
			
 
				+/*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is */
			
 
				+/*          upper triangular if m >= n); the elements below the diagonal, */
			
 
				+/*          with the array TAU, represent the orthogonal matrix Q as a */
			
 
				+/*          product of min(m,n) elementary reflectors (see Further */
			
 
				+/*          Details). */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N)) */
			
 
				+/*          The scalar factors of the elementary reflectors (see Further */
			
 
				+/*          Details). */
			
 
				+
			
 
				+/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
			
 
				+/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
			
 
				+
			
 
				+/*  LWORK   (input) INTEGER */
			
 
				+/*          The dimension of the array WORK.  LWORK >= max(1,N). */
			
 
				+/*          For optimum performance LWORK >= N*NB, where NB is */
			
 
				+/*          the optimal blocksize. */
			
 
				+
			
 
				+/*          If LWORK = -1, then a workspace query is assumed; the routine */
			
 
				+/*          only calculates the optimal size of the WORK array, returns */
			
 
				+/*          this value as the first entry of the WORK array, and no error */
			
 
				+/*          message related to LWORK is issued by XERBLA. */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0:  successful exit */
			
 
				+/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*  The matrix Q is represented as a product of elementary reflectors */
			
 
				+
			
 
				+/*     Q = H(1) H(2) . . . H(k), where k = min(m,n). */
			
 
				+
			
 
				+/*  Each H(i) has the form */
			
 
				+
			
 
				+/*     H(i) = I - tau * v * v' */
			
 
				+
			
 
				+/*  where tau is a real scalar, and v is a real vector with */
			
 
				+/*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
			
 
				+/*  and tau in TAU(i). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1);
			
 
				+    lwkopt = *n * nb;
			
 
				+    work[1] = (doublereal) lwkopt;
			
 
				+    lquery = *lwork == -1;
			
 
				+    if (*m < 0) {
			
 
				+	*info = -1;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -2;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	*info = -4;
			
 
				+    } else if (*lwork < max(1,*n) && ! lquery) {
			
 
				+	*info = -7;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DGEQRF", &i__1);
			
 
				+	return 0;
			
 
				+    } else if (lquery) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    k = min(*m,*n);
			
 
				+    if (k == 0) {
			
 
				+	work[1] = 1.;
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    nbmin = 2;
			
 
				+    nx = 0;
			
 
				+    iws = *n;
			
 
				+    if (nb > 1 && nb < k) {
			
 
				+
			
 
				+/*        Determine when to cross over from blocked to unblocked code. */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1);
			
 
				+	nx = max(i__1,i__2);
			
 
				+	if (nx < k) {
			
 
				+
			
 
				+/*           Determine if workspace is large enough for blocked code. */
			
 
				+
			
 
				+	    ldwork = *n;
			
 
				+	    iws = ldwork * nb;
			
 
				+	    if (*lwork < iws) {
			
 
				+
			
 
				+/*              Not enough workspace to use optimal NB:  reduce NB and */
			
 
				+/*              determine the minimum value of NB. */
			
 
				+
			
 
				+		nb = *lwork / ldwork;
			
 
				+/* Computing MAX */
			
 
				+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
			
 
				+			c_n1);
			
 
				+		nbmin = max(i__1,i__2);
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    if (nb >= nbmin && nb < k && nx < k) {
			
 
				+
			
 
				+/*        Use blocked code initially */
			
 
				+
			
 
				+	i__1 = k - nx;
			
 
				+	i__2 = nb;
			
 
				+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+/* Computing MIN */
			
 
				+	    i__3 = k - i__ + 1;
			
 
				+	    ib = min(i__3,nb);
			
 
				+
			
 
				+/*           Compute the QR factorization of the current block */
			
 
				+/*           A(i:m,i:i+ib-1) */
			
 
				+
			
 
				+	    i__3 = *m - i__ + 1;
			
 
				+	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
			
 
				+		    1], &iinfo);
			
 
				+	    if (i__ + ib <= *n) {
			
 
				+
			
 
				+/*              Form the triangular factor of the block reflector */
			
 
				+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
			
 
				+
			
 
				+		i__3 = *m - i__ + 1;
			
 
				+		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
			
 
				+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
			
 
				+
			
 
				+/*              Apply H' to A(i:m,i+ib:n) from the left */
			
 
				+
			
 
				+		i__3 = *m - i__ + 1;
			
 
				+		i__4 = *n - i__ - ib + 1;
			
 
				+		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
			
 
				+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
			
 
				+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
			
 
				+			+ 1], &ldwork);
			
 
				+	    }
			
 
				+/* L10: */
			
 
				+	}
			
 
				+    } else {
			
 
				+	i__ = 1;
			
 
				+    }
			
 
				+
			
 
				+/*     Use unblocked code to factor the last or only block. */
			
 
				+
			
 
				+    if (i__ <= k) {
			
 
				+	i__2 = *m - i__ + 1;
			
 
				+	i__1 = *n - i__ + 1;
			
 
				+	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
			
 
				+, &iinfo);
			
 
				+    }
			
 
				+
			
 
				+    work[1] = (doublereal) iws;
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGEQRF */
			
 
				+
			
 
				+} /* dgeqrf_ */
			
--- a/min-dgels/additional/dger.c
+++ b/min-dgels/additional/dger.c
@@ -0,0 +1,194 @@
 
				+/* dger.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, 
			
 
				+	doublereal *x, integer *incx, doublereal *y, integer *incy, 
			
 
				+	doublereal *a, integer *lda)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, ix, jy, kx, info;
			
 
				+    doublereal temp;
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DGER   performs the rank 1 operation */
			
 
				+
			
 
				+/*     A := alpha*x*y' + A, */
			
 
				+
			
 
				+/*  where alpha is a scalar, x is an m element vector, y is an n element */
			
 
				+/*  vector and A is an m by n matrix. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========== */
			
 
				+
			
 
				+/*  M      - INTEGER. */
			
 
				+/*           On entry, M specifies the number of rows of the matrix A. */
			
 
				+/*           M must be at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  N      - INTEGER. */
			
 
				+/*           On entry, N specifies the number of columns of the matrix A. */
			
 
				+/*           N must be at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  ALPHA  - DOUBLE PRECISION. */
			
 
				+/*           On entry, ALPHA specifies the scalar alpha. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  X      - DOUBLE PRECISION array of dimension at least */
			
 
				+/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
			
 
				+/*           Before entry, the incremented array X must contain the m */
			
 
				+/*           element vector x. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  INCX   - INTEGER. */
			
 
				+/*           On entry, INCX specifies the increment for the elements of */
			
 
				+/*           X. INCX must not be zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  Y      - DOUBLE PRECISION array of dimension at least */
			
 
				+/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
			
 
				+/*           Before entry, the incremented array Y must contain the n */
			
 
				+/*           element vector y. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  INCY   - INTEGER. */
			
 
				+/*           On entry, INCY specifies the increment for the elements of */
			
 
				+/*           Y. INCY must not be zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
			
 
				+/*           Before entry, the leading m by n part of the array A must */
			
 
				+/*           contain the matrix of coefficients. On exit, A is */
			
 
				+/*           overwritten by the updated matrix. */
			
 
				+
			
 
				+/*  LDA    - INTEGER. */
			
 
				+/*           On entry, LDA specifies the first dimension of A as declared */
			
 
				+/*           in the calling (sub) program. LDA must be at least */
			
 
				+/*           max( 1, m ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+
			
 
				+/*  Level 2 Blas routine. */
			
 
				+
			
 
				+/*  -- Written on 22-October-1986. */
			
 
				+/*     Jack Dongarra, Argonne National Lab. */
			
 
				+/*     Jeremy Du Croz, Nag Central Office. */
			
 
				+/*     Sven Hammarling, Nag Central Office. */
			
 
				+/*     Richard Hanson, Sandia National Labs. */
			
 
				+
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    --x;
			
 
				+    --y;
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    info = 0;
			
 
				+    if (*m < 0) {
			
 
				+	info = 1;
			
 
				+    } else if (*n < 0) {
			
 
				+	info = 2;
			
 
				+    } else if (*incx == 0) {
			
 
				+	info = 5;
			
 
				+    } else if (*incy == 0) {
			
 
				+	info = 7;
			
 
				+    } else if (*lda < max(1,*m)) {
			
 
				+	info = 9;
			
 
				+    }
			
 
				+    if (info != 0) {
			
 
				+	xerbla_("DGER  ", &info);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible. */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || *alpha == 0.) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Start the operations. In this version the elements of A are */
			
 
				+/*     accessed sequentially with one pass through A. */
			
 
				+
			
 
				+    if (*incy > 0) {
			
 
				+	jy = 1;
			
 
				+    } else {
			
 
				+	jy = 1 - (*n - 1) * *incy;
			
 
				+    }
			
 
				+    if (*incx == 1) {
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    if (y[jy] != 0.) {
			
 
				+		temp = *alpha * y[jy];
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    a[i__ + j * a_dim1] += x[i__] * temp;
			
 
				+/* L10: */
			
 
				+		}
			
 
				+	    }
			
 
				+	    jy += *incy;
			
 
				+/* L20: */
			
 
				+	}
			
 
				+    } else {
			
 
				+	if (*incx > 0) {
			
 
				+	    kx = 1;
			
 
				+	} else {
			
 
				+	    kx = 1 - (*m - 1) * *incx;
			
 
				+	}
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    if (y[jy] != 0.) {
			
 
				+		temp = *alpha * y[jy];
			
 
				+		ix = kx;
			
 
				+		i__2 = *m;
			
 
				+		for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		    a[i__ + j * a_dim1] += x[ix] * temp;
			
 
				+		    ix += *incx;
			
 
				+/* L30: */
			
 
				+		}
			
 
				+	    }
			
 
				+	    jy += *incy;
			
 
				+/* L40: */
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DGER  . */
			
 
				+
			
 
				+} /* dger_ */
			
--- a/min-dgels/additional/disnan.c
+++ b/min-dgels/additional/disnan.c
@@ -0,0 +1,52 @@
 
				+/* disnan.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+logical disnan_(doublereal *din)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    logical ret_val;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    extern logical dlaisnan_(doublereal *, doublereal *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */
			
 
				+/*  otherwise.  To be replaced by the Fortran 2003 intrinsic in the */
			
 
				+/*  future. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  DIN      (input) DOUBLE PRECISION */
			
 
				+/*          Input to test for NaN. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*  .. External Functions .. */
			
 
				+/*  .. */
			
 
				+/*  .. Executable Statements .. */
			
 
				+    ret_val = dlaisnan_(din, din);
			
 
				+    return ret_val;
			
 
				+} /* disnan_ */
			
--- a/min-dgels/additional/dlabad.c
+++ b/min-dgels/additional/dlabad.c
@@ -0,0 +1,72 @@
 
				+/* dlabad.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
			
 
				+{
			
 
				+    /* Builtin functions */
			
 
				+    double d_lg10(doublereal *), sqrt(doublereal);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLABAD takes as input the values computed by DLAMCH for underflow and */
			
 
				+/*  overflow, and returns the square root of each of these values if the */
			
 
				+/*  log of LARGE is sufficiently large.  This subroutine is intended to */
			
 
				+/*  identify machines with a large exponent range, such as the Crays, and */
			
 
				+/*  redefine the underflow and overflow limits to be the square roots of */
			
 
				+/*  the values computed by DLAMCH.  This subroutine is needed because */
			
 
				+/*  DLAMCH does not compensate for poor arithmetic in the upper half of */
			
 
				+/*  the exponent range, as is found on a Cray. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SMALL   (input/output) DOUBLE PRECISION */
			
 
				+/*          On entry, the underflow threshold as computed by DLAMCH. */
			
 
				+/*          On exit, if LOG10(LARGE) is sufficiently large, the square */
			
 
				+/*          root of SMALL, otherwise unchanged. */
			
 
				+
			
 
				+/*  LARGE   (input/output) DOUBLE PRECISION */
			
 
				+/*          On entry, the overflow threshold as computed by DLAMCH. */
			
 
				+/*          On exit, if LOG10(LARGE) is sufficiently large, the square */
			
 
				+/*          root of LARGE, otherwise unchanged. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     If it looks like we're on a Cray, take the square root of */
			
 
				+/*     SMALL and LARGE to avoid overflow and underflow problems. */
			
 
				+
			
 
				+    if (d_lg10(large) > 2e3) {
			
 
				+	*small = sqrt(*small);
			
 
				+	*large = sqrt(*large);
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLABAD */
			
 
				+
			
 
				+} /* dlabad_ */
			
--- a/min-dgels/additional/dlaisnan.c
+++ b/min-dgels/additional/dlaisnan.c
@@ -0,0 +1,58 @@
 
				+/* dlaisnan.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+logical dlaisnan_(doublereal *din1, doublereal *din2)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    logical ret_val;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  This routine is not for general use.  It exists solely to avoid */
			
 
				+/*  over-optimization in DISNAN. */
			
 
				+
			
 
				+/*  DLAISNAN checks for NaNs by comparing its two arguments for */
			
 
				+/*  inequality.  NaN is the only floating-point value where NaN != NaN */
			
 
				+/*  returns .TRUE.  To check for NaNs, pass the same variable as both */
			
 
				+/*  arguments. */
			
 
				+
			
 
				+/*  A compiler must assume that the two arguments are */
			
 
				+/*  not the same variable, and the test will not be optimized away. */
			
 
				+/*  Interprocedural or whole-program optimization may delete this */
			
 
				+/*  test.  The ISNAN functions will be replaced by the correct */
			
 
				+/*  Fortran 03 intrinsic once the intrinsic is widely available. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  DIN1     (input) DOUBLE PRECISION */
			
 
				+/*  DIN2     (input) DOUBLE PRECISION */
			
 
				+/*          Two numbers to compare for inequality. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*  .. Executable Statements .. */
			
 
				+    ret_val = *din1 != *din2;
			
 
				+    return ret_val;
			
 
				+} /* dlaisnan_ */
			
--- a/min-dgels/additional/dlamch.c
+++ b/min-dgels/additional/dlamch.c
--- a/min-dgels/additional/dlange.c
+++ b/min-dgels/additional/dlange.c
@@ -0,0 +1,199 @@
 
				+/* dlange.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+
			
 
				+doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer 
			
 
				+	*lda, doublereal *work)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2;
			
 
				+    doublereal ret_val, d__1, d__2, d__3;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    double sqrt(doublereal);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j;
			
 
				+    doublereal sum, scale;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    doublereal value;
			
 
				+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, 
			
 
				+	    doublereal *, doublereal *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLANGE  returns the value of the one norm,  or the Frobenius norm, or */
			
 
				+/*  the  infinity norm,  or the  element of  largest absolute value  of a */
			
 
				+/*  real matrix A. */
			
 
				+
			
 
				+/*  Description */
			
 
				+/*  =========== */
			
 
				+
			
 
				+/*  DLANGE returns the value */
			
 
				+
			
 
				+/*     DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */
			
 
				+/*              ( */
			
 
				+/*              ( norm1(A),         NORM = '1', 'O' or 'o' */
			
 
				+/*              ( */
			
 
				+/*              ( normI(A),         NORM = 'I' or 'i' */
			
 
				+/*              ( */
			
 
				+/*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e' */
			
 
				+
			
 
				+/*  where  norm1  denotes the  one norm of a matrix (maximum column sum), */
			
 
				+/*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and */
			
 
				+/*  normF  denotes the  Frobenius norm of a matrix (square root of sum of */
			
 
				+/*  squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  NORM    (input) CHARACTER*1 */
			
 
				+/*          Specifies the value to be returned in DLANGE as described */
			
 
				+/*          above. */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0.  When M = 0, */
			
 
				+/*          DLANGE is set to zero. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0.  When N = 0, */
			
 
				+/*          DLANGE is set to zero. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          The m by n matrix A. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(M,1). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */
			
 
				+/*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not */
			
 
				+/*          referenced. */
			
 
				+
			
 
				+/* ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (min(*m,*n) == 0) {
			
 
				+	value = 0.;
			
 
				+    } else if (lsame_(norm, "M")) {
			
 
				+
			
 
				+/*        Find max(abs(A(i,j))). */
			
 
				+
			
 
				+	value = 0.;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+/* Computing MAX */
			
 
				+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
			
 
				+		value = max(d__2,d__3);
			
 
				+/* L10: */
			
 
				+	    }
			
 
				+/* L20: */
			
 
				+	}
			
 
				+    } else if (lsame_(norm, "O") || *(unsigned char *)
			
 
				+	    norm == '1') {
			
 
				+
			
 
				+/*        Find norm1(A). */
			
 
				+
			
 
				+	value = 0.;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    sum = 0.;
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
			
 
				+/* L30: */
			
 
				+	    }
			
 
				+	    value = max(value,sum);
			
 
				+/* L40: */
			
 
				+	}
			
 
				+    } else if (lsame_(norm, "I")) {
			
 
				+
			
 
				+/*        Find normI(A). */
			
 
				+
			
 
				+	i__1 = *m;
			
 
				+	for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+	    work[i__] = 0.;
			
 
				+/* L50: */
			
 
				+	}
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
			
 
				+/* L60: */
			
 
				+	    }
			
 
				+/* L70: */
			
 
				+	}
			
 
				+	value = 0.;
			
 
				+	i__1 = *m;
			
 
				+	for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+/* Computing MAX */
			
 
				+	    d__1 = value, d__2 = work[i__];
			
 
				+	    value = max(d__1,d__2);
			
 
				+/* L80: */
			
 
				+	}
			
 
				+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
			
 
				+
			
 
				+/*        Find normF(A). */
			
 
				+
			
 
				+	scale = 0.;
			
 
				+	sum = 1.;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
			
 
				+/* L90: */
			
 
				+	}
			
 
				+	value = scale * sqrt(sum);
			
 
				+    }
			
 
				+
			
 
				+    ret_val = value;
			
 
				+    return ret_val;
			
 
				+
			
 
				+/*     End of DLANGE */
			
 
				+
			
 
				+} /* dlange_ */
			
--- a/min-dgels/additional/dlapy2.c
+++ b/min-dgels/additional/dlapy2.c
@@ -0,0 +1,73 @@
 
				+/* dlapy2.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+doublereal dlapy2_(doublereal *x, doublereal *y)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    doublereal ret_val, d__1;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    double sqrt(doublereal);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    doublereal w, z__, xabs, yabs;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary */
			
 
				+/*  overflow. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  X       (input) DOUBLE PRECISION */
			
 
				+/*  Y       (input) DOUBLE PRECISION */
			
 
				+/*          X and Y specify the values x and y. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    xabs = abs(*x);
			
 
				+    yabs = abs(*y);
			
 
				+    w = max(xabs,yabs);
			
 
				+    z__ = min(xabs,yabs);
			
 
				+    if (z__ == 0.) {
			
 
				+	ret_val = w;
			
 
				+    } else {
			
 
				+/* Computing 2nd power */
			
 
				+	d__1 = z__ / w;
			
 
				+	ret_val = w * sqrt(d__1 * d__1 + 1.);
			
 
				+    }
			
 
				+    return ret_val;
			
 
				+
			
 
				+/*     End of DLAPY2 */
			
 
				+
			
 
				+} /* dlapy2_ */
			
--- a/min-dgels/additional/dlarf.c
+++ b/min-dgels/additional/dlarf.c
@@ -0,0 +1,193 @@
 
				+/* dlarf.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static doublereal c_b4 = 1.;
			
 
				+static doublereal c_b5 = 0.;
			
 
				+static integer c__1 = 1;
			
 
				+
			
 
				+/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v, 
			
 
				+	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc, 
			
 
				+	doublereal *work)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer c_dim1, c_offset;
			
 
				+    doublereal d__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__;
			
 
				+    logical applyleft;
			
 
				+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
			
 
				+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
			
 
				+	    integer *);
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
			
 
				+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
			
 
				+	    doublereal *, doublereal *, integer *);
			
 
				+    integer lastc, lastv;
			
 
				+    extern integer iladlc_(integer *, integer *, doublereal *, integer *), 
			
 
				+	    iladlr_(integer *, integer *, doublereal *, integer *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLARF applies a real elementary reflector H to a real m by n matrix */
			
 
				+/*  C, from either the left or the right. H is represented in the form */
			
 
				+
			
 
				+/*        H = I - tau * v * v' */
			
 
				+
			
 
				+/*  where tau is a real scalar and v is a real vector. */
			
 
				+
			
 
				+/*  If tau = 0, then H is taken to be the unit matrix. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SIDE    (input) CHARACTER*1 */
			
 
				+/*          = 'L': form  H * C */
			
 
				+/*          = 'R': form  C * H */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix C. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix C. */
			
 
				+
			
 
				+/*  V       (input) DOUBLE PRECISION array, dimension */
			
 
				+/*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L' */
			
 
				+/*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */
			
 
				+/*          The vector v in the representation of H. V is not used if */
			
 
				+/*          TAU = 0. */
			
 
				+
			
 
				+/*  INCV    (input) INTEGER */
			
 
				+/*          The increment between elements of v. INCV <> 0. */
			
 
				+
			
 
				+/*  TAU     (input) DOUBLE PRECISION */
			
 
				+/*          The value tau in the representation of H. */
			
 
				+
			
 
				+/*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
			
 
				+/*          On entry, the m by n matrix C. */
			
 
				+/*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
			
 
				+/*          or C * H if SIDE = 'R'. */
			
 
				+
			
 
				+/*  LDC     (input) INTEGER */
			
 
				+/*          The leading dimension of the array C. LDC >= max(1,M). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension */
			
 
				+/*                         (N) if SIDE = 'L' */
			
 
				+/*                      or (M) if SIDE = 'R' */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    --v;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    applyleft = lsame_(side, "L");
			
 
				+    lastv = 0;
			
 
				+    lastc = 0;
			
 
				+    if (*tau != 0.) {
			
 
				+/*     Set up variables for scanning V.  LASTV begins pointing to the end */
			
 
				+/*     of V. */
			
 
				+	if (applyleft) {
			
 
				+	    lastv = *m;
			
 
				+	} else {
			
 
				+	    lastv = *n;
			
 
				+	}
			
 
				+	if (*incv > 0) {
			
 
				+	    i__ = (lastv - 1) * *incv + 1;
			
 
				+	} else {
			
 
				+	    i__ = 1;
			
 
				+	}
			
 
				+/*     Look for the last non-zero row in V. */
			
 
				+	while(lastv > 0 && v[i__] == 0.) {
			
 
				+	    --lastv;
			
 
				+	    i__ -= *incv;
			
 
				+	}
			
 
				+	if (applyleft) {
			
 
				+/*     Scan for the last non-zero column in C(1:lastv,:). */
			
 
				+	    lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
			
 
				+	} else {
			
 
				+/*     Scan for the last non-zero row in C(:,1:lastv). */
			
 
				+	    lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
			
 
				+	}
			
 
				+    }
			
 
				+/*     Note that lastc.eq.0 renders the BLAS operations null; no special */
			
 
				+/*     case is needed at this level. */
			
 
				+    if (applyleft) {
			
 
				+
			
 
				+/*        Form  H * C */
			
 
				+
			
 
				+	if (lastv > 0) {
			
 
				+
			
 
				+/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */
			
 
				+
			
 
				+	    dgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, &
			
 
				+		    v[1], incv, &c_b5, &work[1], &c__1);
			
 
				+
			
 
				+/*           C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */
			
 
				+
			
 
				+	    d__1 = -(*tau);
			
 
				+	    dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[
			
 
				+		    c_offset], ldc);
			
 
				+	}
			
 
				+    } else {
			
 
				+
			
 
				+/*        Form  C * H */
			
 
				+
			
 
				+	if (lastv > 0) {
			
 
				+
			
 
				+/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */
			
 
				+
			
 
				+	    dgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, 
			
 
				+		     &v[1], incv, &c_b5, &work[1], &c__1);
			
 
				+
			
 
				+/*           C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */
			
 
				+
			
 
				+	    d__1 = -(*tau);
			
 
				+	    dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[
			
 
				+		    c_offset], ldc);
			
 
				+	}
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLARF */
			
 
				+
			
 
				+} /* dlarf_ */
			
--- a/min-dgels/additional/dlarfb.c
+++ b/min-dgels/additional/dlarfb.c
@@ -0,0 +1,774 @@
 
				+/* dlarfb.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static doublereal c_b14 = 1.;
			
 
				+static doublereal c_b25 = -1.;
			
 
				+
			
 
				+/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char *
			
 
				+	storev, integer *m, integer *n, integer *k, doublereal *v, integer *
			
 
				+	ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, 
			
 
				+	doublereal *work, integer *ldwork)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, 
			
 
				+	    work_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j;
			
 
				+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
			
 
				+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, doublereal *, integer *);
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer lastc;
			
 
				+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
			
 
				+	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *, 
			
 
				+	    integer *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *, integer *);
			
 
				+    integer lastv;
			
 
				+    extern integer iladlc_(integer *, integer *, doublereal *, integer *), 
			
 
				+	    iladlr_(integer *, integer *, doublereal *, integer *);
			
 
				+    char transt[1];
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLARFB applies a real block reflector H or its transpose H' to a */
			
 
				+/*  real m by n matrix C, from either the left or the right. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SIDE    (input) CHARACTER*1 */
			
 
				+/*          = 'L': apply H or H' from the Left */
			
 
				+/*          = 'R': apply H or H' from the Right */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          = 'N': apply H (No transpose) */
			
 
				+/*          = 'T': apply H' (Transpose) */
			
 
				+
			
 
				+/*  DIRECT  (input) CHARACTER*1 */
			
 
				+/*          Indicates how H is formed from a product of elementary */
			
 
				+/*          reflectors */
			
 
				+/*          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
			
 
				+/*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
			
 
				+
			
 
				+/*  STOREV  (input) CHARACTER*1 */
			
 
				+/*          Indicates how the vectors which define the elementary */
			
 
				+/*          reflectors are stored: */
			
 
				+/*          = 'C': Columnwise */
			
 
				+/*          = 'R': Rowwise */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix C. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix C. */
			
 
				+
			
 
				+/*  K       (input) INTEGER */
			
 
				+/*          The order of the matrix T (= the number of elementary */
			
 
				+/*          reflectors whose product defines the block reflector). */
			
 
				+
			
 
				+/*  V       (input) DOUBLE PRECISION array, dimension */
			
 
				+/*                                (LDV,K) if STOREV = 'C' */
			
 
				+/*                                (LDV,M) if STOREV = 'R' and SIDE = 'L' */
			
 
				+/*                                (LDV,N) if STOREV = 'R' and SIDE = 'R' */
			
 
				+/*          The matrix V. See further details. */
			
 
				+
			
 
				+/*  LDV     (input) INTEGER */
			
 
				+/*          The leading dimension of the array V. */
			
 
				+/*          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */
			
 
				+/*          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */
			
 
				+/*          if STOREV = 'R', LDV >= K. */
			
 
				+
			
 
				+/*  T       (input) DOUBLE PRECISION array, dimension (LDT,K) */
			
 
				+/*          The triangular k by k matrix T in the representation of the */
			
 
				+/*          block reflector. */
			
 
				+
			
 
				+/*  LDT     (input) INTEGER */
			
 
				+/*          The leading dimension of the array T. LDT >= K. */
			
 
				+
			
 
				+/*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
			
 
				+/*          On entry, the m by n matrix C. */
			
 
				+/*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */
			
 
				+
			
 
				+/*  LDC     (input) INTEGER */
			
 
				+/*          The leading dimension of the array C. LDA >= max(1,M). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */
			
 
				+
			
 
				+/*  LDWORK  (input) INTEGER */
			
 
				+/*          The leading dimension of the array WORK. */
			
 
				+/*          If SIDE = 'L', LDWORK >= max(1,N); */
			
 
				+/*          if SIDE = 'R', LDWORK >= max(1,M). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    v_dim1 = *ldv;
			
 
				+    v_offset = 1 + v_dim1;
			
 
				+    v -= v_offset;
			
 
				+    t_dim1 = *ldt;
			
 
				+    t_offset = 1 + t_dim1;
			
 
				+    t -= t_offset;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+    work_dim1 = *ldwork;
			
 
				+    work_offset = 1 + work_dim1;
			
 
				+    work -= work_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*m <= 0 || *n <= 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (lsame_(trans, "N")) {
			
 
				+	*(unsigned char *)transt = 'T';
			
 
				+    } else {
			
 
				+	*(unsigned char *)transt = 'N';
			
 
				+    }
			
 
				+
			
 
				+    if (lsame_(storev, "C")) {
			
 
				+
			
 
				+	if (lsame_(direct, "F")) {
			
 
				+
			
 
				+/*           Let  V =  ( V1 )    (first K rows) */
			
 
				+/*                     ( V2 ) */
			
 
				+/*           where  V1  is unit lower triangular. */
			
 
				+
			
 
				+	    if (lsame_(side, "L")) {
			
 
				+
			
 
				+/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
			
 
				+/*                                                  ( C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK) */
			
 
				+
			
 
				+/*              W := C1' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 
			
 
				+			    + 1], &c__1);
			
 
				+/* L10: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1 */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C2'*V2 */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
			
 
				+			    c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + 
			
 
				+			    v_dim1], ldv, &c_b14, &work[work_offset], ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T'  or  W * T */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
			
 
				+			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - V * W' */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C2 := C2 - V2 * W' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
			
 
				+			    c_b25, &v[*k + 1 + v_dim1], ldv, &work[
			
 
				+			    work_offset], ldwork, &c_b14, &c__[*k + 1 + 
			
 
				+			    c_dim1], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1' */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C1 := C1 - W' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
			
 
				+/* L20: */
			
 
				+		    }
			
 
				+/* L30: */
			
 
				+		}
			
 
				+
			
 
				+	    } else if (lsame_(side, "R")) {
			
 
				+
			
 
				+/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK) */
			
 
				+
			
 
				+/*              W := C1 */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * 
			
 
				+			    work_dim1 + 1], &c__1);
			
 
				+/* L40: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1 */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C2 * V2 */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
			
 
				+			    c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + 
			
 
				+			    1 + v_dim1], ldv, &c_b14, &work[work_offset], 
			
 
				+			    ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T  or  W * T' */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, 
			
 
				+			 &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - W * V' */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C2 := C2 - W * V2' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
			
 
				+			    c_b25, &work[work_offset], ldwork, &v[*k + 1 + 
			
 
				+			    v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], 
			
 
				+			     ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1' */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C1 := C1 - W */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
			
 
				+/* L50: */
			
 
				+		    }
			
 
				+/* L60: */
			
 
				+		}
			
 
				+	    }
			
 
				+
			
 
				+	} else {
			
 
				+
			
 
				+/*           Let  V =  ( V1 ) */
			
 
				+/*                     ( V2 )    (last K rows) */
			
 
				+/*           where  V2  is unit upper triangular. */
			
 
				+
			
 
				+	    if (lsame_(side, "L")) {
			
 
				+
			
 
				+/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
			
 
				+/*                                                  ( C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK) */
			
 
				+
			
 
				+/*              W := C2' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
			
 
				+			    j * work_dim1 + 1], &c__1);
			
 
				+/* L70: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2 */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C1'*V1 */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
			
 
				+			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
			
 
				+			    c_b14, &work[work_offset], ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T'  or  W * T */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
			
 
				+			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - V * W' */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C1 := C1 - V1 * W' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
			
 
				+			    c_b25, &v[v_offset], ldv, &work[work_offset], 
			
 
				+			    ldwork, &c_b14, &c__[c_offset], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2' */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+
			
 
				+/*              C2 := C2 - W' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * 
			
 
				+				work_dim1];
			
 
				+/* L80: */
			
 
				+		    }
			
 
				+/* L90: */
			
 
				+		}
			
 
				+
			
 
				+	    } else if (lsame_(side, "R")) {
			
 
				+
			
 
				+/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK) */
			
 
				+
			
 
				+/*              W := C2 */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &
			
 
				+			    work[j * work_dim1 + 1], &c__1);
			
 
				+/* L100: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2 */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C1 * V1 */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
			
 
				+			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
			
 
				+			    c_b14, &work[work_offset], ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T  or  W * T' */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, 
			
 
				+			 &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - W * V' */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C1 := C1 - W * V1' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
			
 
				+			    c_b25, &work[work_offset], ldwork, &v[v_offset], 
			
 
				+			    ldv, &c_b14, &c__[c_offset], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2' */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+
			
 
				+/*              C2 := C2 - W */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
			
 
				+				 work_dim1];
			
 
				+/* L110: */
			
 
				+		    }
			
 
				+/* L120: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+    } else if (lsame_(storev, "R")) {
			
 
				+
			
 
				+	if (lsame_(direct, "F")) {
			
 
				+
			
 
				+/*           Let  V =  ( V1  V2 )    (V1: first K columns) */
			
 
				+/*           where  V1  is unit upper triangular. */
			
 
				+
			
 
				+	    if (lsame_(side, "L")) {
			
 
				+
			
 
				+/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
			
 
				+/*                                                  ( C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK) */
			
 
				+
			
 
				+/*              W := C1' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 
			
 
				+			    + 1], &c__1);
			
 
				+/* L130: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1' */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C2'*V2' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, 
			
 
				+			     &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 
			
 
				+			    + 1], ldv, &c_b14, &work[work_offset], ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T'  or  W * T */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
			
 
				+			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - V' * W' */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C2 := C2 - V2' * W' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, 
			
 
				+			     &v[(*k + 1) * v_dim1 + 1], ldv, &work[
			
 
				+			    work_offset], ldwork, &c_b14, &c__[*k + 1 + 
			
 
				+			    c_dim1], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1 */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C1 := C1 - W' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
			
 
				+/* L140: */
			
 
				+		    }
			
 
				+/* L150: */
			
 
				+		}
			
 
				+
			
 
				+	    } else if (lsame_(side, "R")) {
			
 
				+
			
 
				+/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK) */
			
 
				+
			
 
				+/*              W := C1 */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * 
			
 
				+			    work_dim1 + 1], &c__1);
			
 
				+/* L160: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1' */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C2 * V2' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
			
 
				+			    c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 
			
 
				+			    1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], 
			
 
				+			     ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T  or  W * T' */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, 
			
 
				+			 &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - W * V */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C2 := C2 - W * V2 */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
			
 
				+			    c_b25, &work[work_offset], ldwork, &v[(*k + 1) * 
			
 
				+			    v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 
			
 
				+			    + 1], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V1 */
			
 
				+
			
 
				+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C1 := C1 - W */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
			
 
				+/* L170: */
			
 
				+		    }
			
 
				+/* L180: */
			
 
				+		}
			
 
				+
			
 
				+	    }
			
 
				+
			
 
				+	} else {
			
 
				+
			
 
				+/*           Let  V =  ( V1  V2 )    (V2: last K columns) */
			
 
				+/*           where  V2  is unit lower triangular. */
			
 
				+
			
 
				+	    if (lsame_(side, "L")) {
			
 
				+
			
 
				+/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
			
 
				+/*                                                  ( C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK) */
			
 
				+
			
 
				+/*              W := C2' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
			
 
				+			    j * work_dim1 + 1], &c__1);
			
 
				+/* L190: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2' */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C1'*V1' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, 
			
 
				+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, &
			
 
				+			    work[work_offset], ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T'  or  W * T */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
			
 
				+			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - V' * W' */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C1 := C1 - V1' * W' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, 
			
 
				+			     &v[v_offset], ldv, &work[work_offset], ldwork, &
			
 
				+			    c_b14, &c__[c_offset], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2 */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+
			
 
				+/*              C2 := C2 - W' */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * 
			
 
				+				work_dim1];
			
 
				+/* L200: */
			
 
				+		    }
			
 
				+/* L210: */
			
 
				+		}
			
 
				+
			
 
				+	    } else if (lsame_(side, "R")) {
			
 
				+
			
 
				+/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
			
 
				+
			
 
				+/* Computing MAX */
			
 
				+		i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv);
			
 
				+		lastv = max(i__1,i__2);
			
 
				+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
			
 
				+
			
 
				+/*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK) */
			
 
				+
			
 
				+/*              W := C2 */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, 
			
 
				+			     &work[j * work_dim1 + 1], &c__1);
			
 
				+/* L220: */
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2' */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 W := W + C1 * V1' */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
			
 
				+			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
			
 
				+			    c_b14, &work[work_offset], ldwork);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * T  or  W * T' */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, 
			
 
				+			 &t[t_offset], ldt, &work[work_offset], ldwork);
			
 
				+
			
 
				+/*              C := C - W * V */
			
 
				+
			
 
				+		if (lastv > *k) {
			
 
				+
			
 
				+/*                 C1 := C1 - W * V1 */
			
 
				+
			
 
				+		    i__1 = lastv - *k;
			
 
				+		    dgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
			
 
				+			    c_b25, &work[work_offset], ldwork, &v[v_offset], 
			
 
				+			    ldv, &c_b14, &c__[c_offset], ldc);
			
 
				+		}
			
 
				+
			
 
				+/*              W := W * V2 */
			
 
				+
			
 
				+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			
 
				+			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			
 
				+			work_offset], ldwork);
			
 
				+
			
 
				+/*              C1 := C1 - W */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = lastc;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
			
 
				+				 work_dim1];
			
 
				+/* L230: */
			
 
				+		    }
			
 
				+/* L240: */
			
 
				+		}
			
 
				+
			
 
				+	    }
			
 
				+
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLARFB */
			
 
				+
			
 
				+} /* dlarfb_ */
			
--- a/min-dgels/additional/dlarfg.c
+++ b/min-dgels/additional/dlarfg.c
@@ -0,0 +1,170 @@
 
				+/* dlarfg.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x, 
			
 
				+	integer *incx, doublereal *tau)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1;
			
 
				+    doublereal d__1;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    double d_sign(doublereal *, doublereal *);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer j, knt;
			
 
				+    doublereal beta;
			
 
				+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
			
 
				+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
			
 
				+	    integer *);
			
 
				+    doublereal xnorm;
			
 
				+    extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *);
			
 
				+    doublereal safmin, rsafmn;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLARFG generates a real elementary reflector H of order n, such */
			
 
				+/*  that */
			
 
				+
			
 
				+/*        H * ( alpha ) = ( beta ),   H' * H = I. */
			
 
				+/*            (   x   )   (   0  ) */
			
 
				+
			
 
				+/*  where alpha and beta are scalars, and x is an (n-1)-element real */
			
 
				+/*  vector. H is represented in the form */
			
 
				+
			
 
				+/*        H = I - tau * ( 1 ) * ( 1 v' ) , */
			
 
				+/*                      ( v ) */
			
 
				+
			
 
				+/*  where tau is a real scalar and v is a real (n-1)-element */
			
 
				+/*  vector. */
			
 
				+
			
 
				+/*  If the elements of x are all zero, then tau = 0 and H is taken to be */
			
 
				+/*  the unit matrix. */
			
 
				+
			
 
				+/*  Otherwise  1 <= tau <= 2. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The order of the elementary reflector. */
			
 
				+
			
 
				+/*  ALPHA   (input/output) DOUBLE PRECISION */
			
 
				+/*          On entry, the value alpha. */
			
 
				+/*          On exit, it is overwritten with the value beta. */
			
 
				+
			
 
				+/*  X       (input/output) DOUBLE PRECISION array, dimension */
			
 
				+/*                         (1+(N-2)*abs(INCX)) */
			
 
				+/*          On entry, the vector x. */
			
 
				+/*          On exit, it is overwritten with the vector v. */
			
 
				+
			
 
				+/*  INCX    (input) INTEGER */
			
 
				+/*          The increment between elements of X. INCX > 0. */
			
 
				+
			
 
				+/*  TAU     (output) DOUBLE PRECISION */
			
 
				+/*          The value tau. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    --x;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n <= 1) {
			
 
				+	*tau = 0.;
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    i__1 = *n - 1;
			
 
				+    xnorm = dnrm2_(&i__1, &x[1], incx);
			
 
				+
			
 
				+    if (xnorm == 0.) {
			
 
				+
			
 
				+/*        H  =  I */
			
 
				+
			
 
				+	*tau = 0.;
			
 
				+    } else {
			
 
				+
			
 
				+/*        general case */
			
 
				+
			
 
				+	d__1 = dlapy2_(alpha, &xnorm);
			
 
				+	beta = -d_sign(&d__1, alpha);
			
 
				+	safmin = dlamch_("S") / dlamch_("E");
			
 
				+	knt = 0;
			
 
				+	if (abs(beta) < safmin) {
			
 
				+
			
 
				+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
			
 
				+
			
 
				+	    rsafmn = 1. / safmin;
			
 
				+L10:
			
 
				+	    ++knt;
			
 
				+	    i__1 = *n - 1;
			
 
				+	    dscal_(&i__1, &rsafmn, &x[1], incx);
			
 
				+	    beta *= rsafmn;
			
 
				+	    *alpha *= rsafmn;
			
 
				+	    if (abs(beta) < safmin) {
			
 
				+		goto L10;
			
 
				+	    }
			
 
				+
			
 
				+/*           New BETA is at most 1, at least SAFMIN */
			
 
				+
			
 
				+	    i__1 = *n - 1;
			
 
				+	    xnorm = dnrm2_(&i__1, &x[1], incx);
			
 
				+	    d__1 = dlapy2_(alpha, &xnorm);
			
 
				+	    beta = -d_sign(&d__1, alpha);
			
 
				+	}
			
 
				+	*tau = (beta - *alpha) / beta;
			
 
				+	i__1 = *n - 1;
			
 
				+	d__1 = 1. / (*alpha - beta);
			
 
				+	dscal_(&i__1, &d__1, &x[1], incx);
			
 
				+
			
 
				+/*        If ALPHA is subnormal, it may lose relative accuracy */
			
 
				+
			
 
				+	i__1 = knt;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    beta *= safmin;
			
 
				+/* L20: */
			
 
				+	}
			
 
				+	*alpha = beta;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLARFG */
			
 
				+
			
 
				+} /* dlarfg_ */
			
--- a/min-dgels/additional/dlarfp.c
+++ b/min-dgels/additional/dlarfp.c
@@ -0,0 +1,192 @@
 
				+/* dlarfp.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dlarfp_(integer *n, doublereal *alpha, doublereal *x, 
			
 
				+	integer *incx, doublereal *tau)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1;
			
 
				+    doublereal d__1;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    double d_sign(doublereal *, doublereal *);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer j, knt;
			
 
				+    doublereal beta;
			
 
				+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
			
 
				+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
			
 
				+	    integer *);
			
 
				+    doublereal xnorm;
			
 
				+    extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *);
			
 
				+    doublereal safmin, rsafmn;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLARFP generates a real elementary reflector H of order n, such */
			
 
				+/*  that */
			
 
				+
			
 
				+/*        H * ( alpha ) = ( beta ),   H' * H = I. */
			
 
				+/*            (   x   )   (   0  ) */
			
 
				+
			
 
				+/*  where alpha and beta are scalars, beta is non-negative, and x is */
			
 
				+/*  an (n-1)-element real vector.  H is represented in the form */
			
 
				+
			
 
				+/*        H = I - tau * ( 1 ) * ( 1 v' ) , */
			
 
				+/*                      ( v ) */
			
 
				+
			
 
				+/*  where tau is a real scalar and v is a real (n-1)-element */
			
 
				+/*  vector. */
			
 
				+
			
 
				+/*  If the elements of x are all zero, then tau = 0 and H is taken to be */
			
 
				+/*  the unit matrix. */
			
 
				+
			
 
				+/*  Otherwise  1 <= tau <= 2. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The order of the elementary reflector. */
			
 
				+
			
 
				+/*  ALPHA   (input/output) DOUBLE PRECISION */
			
 
				+/*          On entry, the value alpha. */
			
 
				+/*          On exit, it is overwritten with the value beta. */
			
 
				+
			
 
				+/*  X       (input/output) DOUBLE PRECISION array, dimension */
			
 
				+/*                         (1+(N-2)*abs(INCX)) */
			
 
				+/*          On entry, the vector x. */
			
 
				+/*          On exit, it is overwritten with the vector v. */
			
 
				+
			
 
				+/*  INCX    (input) INTEGER */
			
 
				+/*          The increment between elements of X. INCX > 0. */
			
 
				+
			
 
				+/*  TAU     (output) DOUBLE PRECISION */
			
 
				+/*          The value tau. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    --x;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n <= 0) {
			
 
				+	*tau = 0.;
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    i__1 = *n - 1;
			
 
				+    xnorm = dnrm2_(&i__1, &x[1], incx);
			
 
				+
			
 
				+    if (xnorm == 0.) {
			
 
				+
			
 
				+/*        H  =  [+/-1, 0; I], sign chosen so ALPHA >= 0 */
			
 
				+
			
 
				+	if (*alpha >= 0.) {
			
 
				+/*           When TAU.eq.ZERO, the vector is special-cased to be */
			
 
				+/*           all zeros in the application routines.  We do not need */
			
 
				+/*           to clear it. */
			
 
				+	    *tau = 0.;
			
 
				+	} else {
			
 
				+/*           However, the application routines rely on explicit */
			
 
				+/*           zero checks when TAU.ne.ZERO, and we must clear X. */
			
 
				+	    *tau = 2.;
			
 
				+	    i__1 = *n - 1;
			
 
				+	    for (j = 1; j <= i__1; ++j) {
			
 
				+		x[(j - 1) * *incx + 1] = 0.;
			
 
				+	    }
			
 
				+	    *alpha = -(*alpha);
			
 
				+	}
			
 
				+    } else {
			
 
				+
			
 
				+/*        general case */
			
 
				+
			
 
				+	d__1 = dlapy2_(alpha, &xnorm);
			
 
				+	beta = d_sign(&d__1, alpha);
			
 
				+	safmin = dlamch_("S") / dlamch_("E");
			
 
				+	knt = 0;
			
 
				+	if (abs(beta) < safmin) {
			
 
				+
			
 
				+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
			
 
				+
			
 
				+	    rsafmn = 1. / safmin;
			
 
				+L10:
			
 
				+	    ++knt;
			
 
				+	    i__1 = *n - 1;
			
 
				+	    dscal_(&i__1, &rsafmn, &x[1], incx);
			
 
				+	    beta *= rsafmn;
			
 
				+	    *alpha *= rsafmn;
			
 
				+	    if (abs(beta) < safmin) {
			
 
				+		goto L10;
			
 
				+	    }
			
 
				+
			
 
				+/*           New BETA is at most 1, at least SAFMIN */
			
 
				+
			
 
				+	    i__1 = *n - 1;
			
 
				+	    xnorm = dnrm2_(&i__1, &x[1], incx);
			
 
				+	    d__1 = dlapy2_(alpha, &xnorm);
			
 
				+	    beta = d_sign(&d__1, alpha);
			
 
				+	}
			
 
				+	*alpha += beta;
			
 
				+	if (beta < 0.) {
			
 
				+	    beta = -beta;
			
 
				+	    *tau = -(*alpha) / beta;
			
 
				+	} else {
			
 
				+	    *alpha = xnorm * (xnorm / *alpha);
			
 
				+	    *tau = *alpha / beta;
			
 
				+	    *alpha = -(*alpha);
			
 
				+	}
			
 
				+	i__1 = *n - 1;
			
 
				+	d__1 = 1. / *alpha;
			
 
				+	dscal_(&i__1, &d__1, &x[1], incx);
			
 
				+
			
 
				+/*        If BETA is subnormal, it may lose relative accuracy */
			
 
				+
			
 
				+	i__1 = knt;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    beta *= safmin;
			
 
				+/* L20: */
			
 
				+	}
			
 
				+	*alpha = beta;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLARFP */
			
 
				+
			
 
				+} /* dlarfp_ */
			
--- a/min-dgels/additional/dlarft.c
+++ b/min-dgels/additional/dlarft.c
@@ -0,0 +1,325 @@
 
				+/* dlarft.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static doublereal c_b8 = 0.;
			
 
				+
			
 
				+/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
			
 
				+	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, 
			
 
				+	integer *ldt)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
			
 
				+    doublereal d__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, prevlastv;
			
 
				+    doublereal vii;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
			
 
				+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
			
 
				+	    doublereal *, doublereal *, integer *);
			
 
				+    integer lastv;
			
 
				+    extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, integer *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLARFT forms the triangular factor T of a real block reflector H */
			
 
				+/*  of order n, which is defined as a product of k elementary reflectors. */
			
 
				+
			
 
				+/*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
			
 
				+
			
 
				+/*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
			
 
				+
			
 
				+/*  If STOREV = 'C', the vector which defines the elementary reflector */
			
 
				+/*  H(i) is stored in the i-th column of the array V, and */
			
 
				+
			
 
				+/*     H  =  I - V * T * V' */
			
 
				+
			
 
				+/*  If STOREV = 'R', the vector which defines the elementary reflector */
			
 
				+/*  H(i) is stored in the i-th row of the array V, and */
			
 
				+
			
 
				+/*     H  =  I - V' * T * V */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  DIRECT  (input) CHARACTER*1 */
			
 
				+/*          Specifies the order in which the elementary reflectors are */
			
 
				+/*          multiplied to form the block reflector: */
			
 
				+/*          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
			
 
				+/*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
			
 
				+
			
 
				+/*  STOREV  (input) CHARACTER*1 */
			
 
				+/*          Specifies how the vectors which define the elementary */
			
 
				+/*          reflectors are stored (see also Further Details): */
			
 
				+/*          = 'C': columnwise */
			
 
				+/*          = 'R': rowwise */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The order of the block reflector H. N >= 0. */
			
 
				+
			
 
				+/*  K       (input) INTEGER */
			
 
				+/*          The order of the triangular factor T (= the number of */
			
 
				+/*          elementary reflectors). K >= 1. */
			
 
				+
			
 
				+/*  V       (input/output) DOUBLE PRECISION array, dimension */
			
 
				+/*                               (LDV,K) if STOREV = 'C' */
			
 
				+/*                               (LDV,N) if STOREV = 'R' */
			
 
				+/*          The matrix V. See further details. */
			
 
				+
			
 
				+/*  LDV     (input) INTEGER */
			
 
				+/*          The leading dimension of the array V. */
			
 
				+/*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */
			
 
				+
			
 
				+/*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
			
 
				+/*          TAU(i) must contain the scalar factor of the elementary */
			
 
				+/*          reflector H(i). */
			
 
				+
			
 
				+/*  T       (output) DOUBLE PRECISION array, dimension (LDT,K) */
			
 
				+/*          The k by k triangular factor T of the block reflector. */
			
 
				+/*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
			
 
				+/*          lower triangular. The rest of the array is not used. */
			
 
				+
			
 
				+/*  LDT     (input) INTEGER */
			
 
				+/*          The leading dimension of the array T. LDT >= K. */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*  The shape of the matrix V and the storage of the vectors which define */
			
 
				+/*  the H(i) is best illustrated by the following example with n = 5 and */
			
 
				+/*  k = 3. The elements equal to 1 are not stored; the corresponding */
			
 
				+/*  array elements are modified but restored on exit. The rest of the */
			
 
				+/*  array is not used. */
			
 
				+
			
 
				+/*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
			
 
				+
			
 
				+/*               V = (  1       )                 V = (  1 v1 v1 v1 v1 ) */
			
 
				+/*                   ( v1  1    )                     (     1 v2 v2 v2 ) */
			
 
				+/*                   ( v1 v2  1 )                     (        1 v3 v3 ) */
			
 
				+/*                   ( v1 v2 v3 ) */
			
 
				+/*                   ( v1 v2 v3 ) */
			
 
				+
			
 
				+/*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
			
 
				+
			
 
				+/*               V = ( v1 v2 v3 )                 V = ( v1 v1  1       ) */
			
 
				+/*                   ( v1 v2 v3 )                     ( v2 v2 v2  1    ) */
			
 
				+/*                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 ) */
			
 
				+/*                   (     1 v3 ) */
			
 
				+/*                   (        1 ) */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    v_dim1 = *ldv;
			
 
				+    v_offset = 1 + v_dim1;
			
 
				+    v -= v_offset;
			
 
				+    --tau;
			
 
				+    t_dim1 = *ldt;
			
 
				+    t_offset = 1 + t_dim1;
			
 
				+    t -= t_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (lsame_(direct, "F")) {
			
 
				+	prevlastv = *n;
			
 
				+	i__1 = *k;
			
 
				+	for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+	    prevlastv = max(i__,prevlastv);
			
 
				+	    if (tau[i__] == 0.) {
			
 
				+
			
 
				+/*              H(i)  =  I */
			
 
				+
			
 
				+		i__2 = i__;
			
 
				+		for (j = 1; j <= i__2; ++j) {
			
 
				+		    t[j + i__ * t_dim1] = 0.;
			
 
				+/* L10: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+
			
 
				+/*              general case */
			
 
				+
			
 
				+		vii = v[i__ + i__ * v_dim1];
			
 
				+		v[i__ + i__ * v_dim1] = 1.;
			
 
				+		if (lsame_(storev, "C")) {
			
 
				+/*                 Skip any trailing zeros. */
			
 
				+		    i__2 = i__ + 1;
			
 
				+		    for (lastv = *n; lastv >= i__2; --lastv) {
			
 
				+			if (v[lastv + i__ * v_dim1] != 0.) {
			
 
				+			    break;
			
 
				+			}
			
 
				+		    }
			
 
				+		    j = min(lastv,prevlastv);
			
 
				+
			
 
				+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */
			
 
				+
			
 
				+		    i__2 = j - i__ + 1;
			
 
				+		    i__3 = i__ - 1;
			
 
				+		    d__1 = -tau[i__];
			
 
				+		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], 
			
 
				+			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b8, &t[
			
 
				+			    i__ * t_dim1 + 1], &c__1);
			
 
				+		} else {
			
 
				+/*                 Skip any trailing zeros. */
			
 
				+		    i__2 = i__ + 1;
			
 
				+		    for (lastv = *n; lastv >= i__2; --lastv) {
			
 
				+			if (v[i__ + lastv * v_dim1] != 0.) {
			
 
				+			    break;
			
 
				+			}
			
 
				+		    }
			
 
				+		    j = min(lastv,prevlastv);
			
 
				+
			
 
				+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */
			
 
				+
			
 
				+		    i__2 = i__ - 1;
			
 
				+		    i__3 = j - i__ + 1;
			
 
				+		    d__1 = -tau[i__];
			
 
				+		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * 
			
 
				+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
			
 
				+			    c_b8, &t[i__ * t_dim1 + 1], &c__1);
			
 
				+		}
			
 
				+		v[i__ + i__ * v_dim1] = vii;
			
 
				+
			
 
				+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
			
 
				+
			
 
				+		i__2 = i__ - 1;
			
 
				+		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
			
 
				+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
			
 
				+		t[i__ + i__ * t_dim1] = tau[i__];
			
 
				+		if (i__ > 1) {
			
 
				+		    prevlastv = max(prevlastv,lastv);
			
 
				+		} else {
			
 
				+		    prevlastv = lastv;
			
 
				+		}
			
 
				+	    }
			
 
				+/* L20: */
			
 
				+	}
			
 
				+    } else {
			
 
				+	prevlastv = 1;
			
 
				+	for (i__ = *k; i__ >= 1; --i__) {
			
 
				+	    if (tau[i__] == 0.) {
			
 
				+
			
 
				+/*              H(i)  =  I */
			
 
				+
			
 
				+		i__1 = *k;
			
 
				+		for (j = i__; j <= i__1; ++j) {
			
 
				+		    t[j + i__ * t_dim1] = 0.;
			
 
				+/* L30: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+
			
 
				+/*              general case */
			
 
				+
			
 
				+		if (i__ < *k) {
			
 
				+		    if (lsame_(storev, "C")) {
			
 
				+			vii = v[*n - *k + i__ + i__ * v_dim1];
			
 
				+			v[*n - *k + i__ + i__ * v_dim1] = 1.;
			
 
				+/*                    Skip any leading zeros. */
			
 
				+			i__1 = i__ - 1;
			
 
				+			for (lastv = 1; lastv <= i__1; ++lastv) {
			
 
				+			    if (v[lastv + i__ * v_dim1] != 0.) {
			
 
				+				break;
			
 
				+			    }
			
 
				+			}
			
 
				+			j = max(lastv,prevlastv);
			
 
				+
			
 
				+/*                    T(i+1:k,i) := */
			
 
				+/*                            - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) */
			
 
				+
			
 
				+			i__1 = *n - *k + i__ - j + 1;
			
 
				+			i__2 = *k - i__;
			
 
				+			d__1 = -tau[i__];
			
 
				+			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ 
			
 
				+				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
			
 
				+				c__1, &c_b8, &t[i__ + 1 + i__ * t_dim1], &
			
 
				+				c__1);
			
 
				+			v[*n - *k + i__ + i__ * v_dim1] = vii;
			
 
				+		    } else {
			
 
				+			vii = v[i__ + (*n - *k + i__) * v_dim1];
			
 
				+			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
			
 
				+/*                    Skip any leading zeros. */
			
 
				+			i__1 = i__ - 1;
			
 
				+			for (lastv = 1; lastv <= i__1; ++lastv) {
			
 
				+			    if (v[i__ + lastv * v_dim1] != 0.) {
			
 
				+				break;
			
 
				+			    }
			
 
				+			}
			
 
				+			j = max(lastv,prevlastv);
			
 
				+
			
 
				+/*                    T(i+1:k,i) := */
			
 
				+/*                            - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' */
			
 
				+
			
 
				+			i__1 = *k - i__;
			
 
				+			i__2 = *n - *k + i__ - j + 1;
			
 
				+			d__1 = -tau[i__];
			
 
				+			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + 
			
 
				+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
			
 
				+				ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1);
			
 
				+			v[i__ + (*n - *k + i__) * v_dim1] = vii;
			
 
				+		    }
			
 
				+
			
 
				+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
			
 
				+
			
 
				+		    i__1 = *k - i__;
			
 
				+		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
			
 
				+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
			
 
				+			     t_dim1], &c__1)
			
 
				+			    ;
			
 
				+		    if (i__ > 1) {
			
 
				+			prevlastv = min(prevlastv,lastv);
			
 
				+		    } else {
			
 
				+			prevlastv = lastv;
			
 
				+		    }
			
 
				+		}
			
 
				+		t[i__ + i__ * t_dim1] = tau[i__];
			
 
				+	    }
			
 
				+/* L40: */
			
 
				+	}
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLARFT */
			
 
				+
			
 
				+} /* dlarft_ */
			
--- a/min-dgels/additional/dlascl.c
+++ b/min-dgels/additional/dlascl.c
@@ -0,0 +1,354 @@
 
				+/* dlascl.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku, 
			
 
				+	doublereal *cfrom, doublereal *cto, integer *m, integer *n, 
			
 
				+	doublereal *a, integer *lda, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, k1, k2, k3, k4;
			
 
				+    doublereal mul, cto1;
			
 
				+    logical done;
			
 
				+    doublereal ctoc;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer itype;
			
 
				+    doublereal cfrom1;
			
 
				+    extern doublereal dlamch_(char *);
			
 
				+    doublereal cfromc;
			
 
				+    extern logical disnan_(doublereal *);
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+    doublereal bignum, smlnum;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLASCL multiplies the M by N real matrix A by the real scalar */
			
 
				+/*  CTO/CFROM.  This is done without over/underflow as long as the final */
			
 
				+/*  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */
			
 
				+/*  A may be full, upper triangular, lower triangular, upper Hessenberg, */
			
 
				+/*  or banded. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  TYPE    (input) CHARACTER*1 */
			
 
				+/*          TYPE indices the storage type of the input matrix. */
			
 
				+/*          = 'G':  A is a full matrix. */
			
 
				+/*          = 'L':  A is a lower triangular matrix. */
			
 
				+/*          = 'U':  A is an upper triangular matrix. */
			
 
				+/*          = 'H':  A is an upper Hessenberg matrix. */
			
 
				+/*          = 'B':  A is a symmetric band matrix with lower bandwidth KL */
			
 
				+/*                  and upper bandwidth KU and with the only the lower */
			
 
				+/*                  half stored. */
			
 
				+/*          = 'Q':  A is a symmetric band matrix with lower bandwidth KL */
			
 
				+/*                  and upper bandwidth KU and with the only the upper */
			
 
				+/*                  half stored. */
			
 
				+/*          = 'Z':  A is a band matrix with lower bandwidth KL and upper */
			
 
				+/*                  bandwidth KU. */
			
 
				+
			
 
				+/*  KL      (input) INTEGER */
			
 
				+/*          The lower bandwidth of A.  Referenced only if TYPE = 'B', */
			
 
				+/*          'Q' or 'Z'. */
			
 
				+
			
 
				+/*  KU      (input) INTEGER */
			
 
				+/*          The upper bandwidth of A.  Referenced only if TYPE = 'B', */
			
 
				+/*          'Q' or 'Z'. */
			
 
				+
			
 
				+/*  CFROM   (input) DOUBLE PRECISION */
			
 
				+/*  CTO     (input) DOUBLE PRECISION */
			
 
				+/*          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */
			
 
				+/*          without over/underflow if the final result CTO*A(I,J)/CFROM */
			
 
				+/*          can be represented without over/underflow.  CFROM must be */
			
 
				+/*          nonzero. */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          The matrix to be multiplied by CTO/CFROM.  See TYPE for the */
			
 
				+/*          storage type. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          0  - successful exit */
			
 
				+/*          <0 - if INFO = -i, the i-th argument had an illegal value. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+
			
 
				+    if (lsame_(type__, "G")) {
			
 
				+	itype = 0;
			
 
				+    } else if (lsame_(type__, "L")) {
			
 
				+	itype = 1;
			
 
				+    } else if (lsame_(type__, "U")) {
			
 
				+	itype = 2;
			
 
				+    } else if (lsame_(type__, "H")) {
			
 
				+	itype = 3;
			
 
				+    } else if (lsame_(type__, "B")) {
			
 
				+	itype = 4;
			
 
				+    } else if (lsame_(type__, "Q")) {
			
 
				+	itype = 5;
			
 
				+    } else if (lsame_(type__, "Z")) {
			
 
				+	itype = 6;
			
 
				+    } else {
			
 
				+	itype = -1;
			
 
				+    }
			
 
				+
			
 
				+    if (itype == -1) {
			
 
				+	*info = -1;
			
 
				+    } else if (*cfrom == 0. || disnan_(cfrom)) {
			
 
				+	*info = -4;
			
 
				+    } else if (disnan_(cto)) {
			
 
				+	*info = -5;
			
 
				+    } else if (*m < 0) {
			
 
				+	*info = -6;
			
 
				+    } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
			
 
				+	*info = -7;
			
 
				+    } else if (itype <= 3 && *lda < max(1,*m)) {
			
 
				+	*info = -9;
			
 
				+    } else if (itype >= 4) {
			
 
				+/* Computing MAX */
			
 
				+	i__1 = *m - 1;
			
 
				+	if (*kl < 0 || *kl > max(i__1,0)) {
			
 
				+	    *info = -2;
			
 
				+	} else /* if(complicated condition) */ {
			
 
				+/* Computing MAX */
			
 
				+	    i__1 = *n - 1;
			
 
				+	    if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && 
			
 
				+		    *kl != *ku) {
			
 
				+		*info = -3;
			
 
				+	    } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
			
 
				+		    ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
			
 
				+		*info = -9;
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DLASCL", &i__1);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    if (*n == 0 || *m == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Get machine parameters */
			
 
				+
			
 
				+    smlnum = dlamch_("S");
			
 
				+    bignum = 1. / smlnum;
			
 
				+
			
 
				+    cfromc = *cfrom;
			
 
				+    ctoc = *cto;
			
 
				+
			
 
				+L10:
			
 
				+    cfrom1 = cfromc * smlnum;
			
 
				+    if (cfrom1 == cfromc) {
			
 
				+/*        CFROMC is an inf.  Multiply by a correctly signed zero for */
			
 
				+/*        finite CTOC, or a NaN if CTOC is infinite. */
			
 
				+	mul = ctoc / cfromc;
			
 
				+	done = TRUE_;
			
 
				+	cto1 = ctoc;
			
 
				+    } else {
			
 
				+	cto1 = ctoc / bignum;
			
 
				+	if (cto1 == ctoc) {
			
 
				+/*           CTOC is either 0 or an inf.  In both cases, CTOC itself */
			
 
				+/*           serves as the correct multiplication factor. */
			
 
				+	    mul = ctoc;
			
 
				+	    done = TRUE_;
			
 
				+	    cfromc = 1.;
			
 
				+	} else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
			
 
				+	    mul = smlnum;
			
 
				+	    done = FALSE_;
			
 
				+	    cfromc = cfrom1;
			
 
				+	} else if (abs(cto1) > abs(cfromc)) {
			
 
				+	    mul = bignum;
			
 
				+	    done = FALSE_;
			
 
				+	    ctoc = cto1;
			
 
				+	} else {
			
 
				+	    mul = ctoc / cfromc;
			
 
				+	    done = TRUE_;
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    if (itype == 0) {
			
 
				+
			
 
				+/*        Full matrix */
			
 
				+
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L20: */
			
 
				+	    }
			
 
				+/* L30: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (itype == 1) {
			
 
				+
			
 
				+/*        Lower triangular matrix */
			
 
				+
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = j; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L40: */
			
 
				+	    }
			
 
				+/* L50: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (itype == 2) {
			
 
				+
			
 
				+/*        Upper triangular matrix */
			
 
				+
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = min(j,*m);
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L60: */
			
 
				+	    }
			
 
				+/* L70: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (itype == 3) {
			
 
				+
			
 
				+/*        Upper Hessenberg matrix */
			
 
				+
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+/* Computing MIN */
			
 
				+	    i__3 = j + 1;
			
 
				+	    i__2 = min(i__3,*m);
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L80: */
			
 
				+	    }
			
 
				+/* L90: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (itype == 4) {
			
 
				+
			
 
				+/*        Lower half of a symmetric band matrix */
			
 
				+
			
 
				+	k3 = *kl + 1;
			
 
				+	k4 = *n + 1;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+/* Computing MIN */
			
 
				+	    i__3 = k3, i__4 = k4 - j;
			
 
				+	    i__2 = min(i__3,i__4);
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L100: */
			
 
				+	    }
			
 
				+/* L110: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (itype == 5) {
			
 
				+
			
 
				+/*        Upper half of a symmetric band matrix */
			
 
				+
			
 
				+	k1 = *ku + 2;
			
 
				+	k3 = *ku + 1;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+/* Computing MAX */
			
 
				+	    i__2 = k1 - j;
			
 
				+	    i__3 = k3;
			
 
				+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L120: */
			
 
				+	    }
			
 
				+/* L130: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (itype == 6) {
			
 
				+
			
 
				+/*        Band matrix */
			
 
				+
			
 
				+	k1 = *kl + *ku + 2;
			
 
				+	k2 = *kl + 1;
			
 
				+	k3 = (*kl << 1) + *ku + 1;
			
 
				+	k4 = *kl + *ku + 1 + *m;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+/* Computing MAX */
			
 
				+	    i__3 = k1 - j;
			
 
				+/* Computing MIN */
			
 
				+	    i__4 = k3, i__5 = k4 - j;
			
 
				+	    i__2 = min(i__4,i__5);
			
 
				+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] *= mul;
			
 
				+/* L140: */
			
 
				+	    }
			
 
				+/* L150: */
			
 
				+	}
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    if (! done) {
			
 
				+	goto L10;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLASCL */
			
 
				+
			
 
				+} /* dlascl_ */
			
--- a/min-dgels/additional/dlaset.c
+++ b/min-dgels/additional/dlaset.c
@@ -0,0 +1,152 @@
 
				+/* dlaset.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal *
			
 
				+	alpha, doublereal *beta, doublereal *a, integer *lda)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2, i__3;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLASET initializes an m-by-n matrix A to BETA on the diagonal and */
			
 
				+/*  ALPHA on the offdiagonals. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  UPLO    (input) CHARACTER*1 */
			
 
				+/*          Specifies the part of the matrix A to be set. */
			
 
				+/*          = 'U':      Upper triangular part is set; the strictly lower */
			
 
				+/*                      triangular part of A is not changed. */
			
 
				+/*          = 'L':      Lower triangular part is set; the strictly upper */
			
 
				+/*                      triangular part of A is not changed. */
			
 
				+/*          Otherwise:  All of the matrix A is set. */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A.  M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  ALPHA   (input) DOUBLE PRECISION */
			
 
				+/*          The constant to which the offdiagonal elements are to be set. */
			
 
				+
			
 
				+/*  BETA    (input) DOUBLE PRECISION */
			
 
				+/*          The constant to which the diagonal elements are to be set. */
			
 
				+
			
 
				+/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          On exit, the leading m-by-n submatrix of A is set as follows: */
			
 
				+
			
 
				+/*          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */
			
 
				+/*          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */
			
 
				+/*          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */
			
 
				+
			
 
				+/*          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,M). */
			
 
				+
			
 
				+/* ===================================================================== */
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (lsame_(uplo, "U")) {
			
 
				+
			
 
				+/*        Set the strictly upper triangular or trapezoidal part of the */
			
 
				+/*        array to ALPHA. */
			
 
				+
			
 
				+	i__1 = *n;
			
 
				+	for (j = 2; j <= i__1; ++j) {
			
 
				+/* Computing MIN */
			
 
				+	    i__3 = j - 1;
			
 
				+	    i__2 = min(i__3,*m);
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] = *alpha;
			
 
				+/* L10: */
			
 
				+	    }
			
 
				+/* L20: */
			
 
				+	}
			
 
				+
			
 
				+    } else if (lsame_(uplo, "L")) {
			
 
				+
			
 
				+/*        Set the strictly lower triangular or trapezoidal part of the */
			
 
				+/*        array to ALPHA. */
			
 
				+
			
 
				+	i__1 = min(*m,*n);
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] = *alpha;
			
 
				+/* L30: */
			
 
				+	    }
			
 
				+/* L40: */
			
 
				+	}
			
 
				+
			
 
				+    } else {
			
 
				+
			
 
				+/*        Set the leading m-by-n submatrix to ALPHA. */
			
 
				+
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		a[i__ + j * a_dim1] = *alpha;
			
 
				+/* L50: */
			
 
				+	    }
			
 
				+/* L60: */
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+/*     Set the first min(M,N) diagonal elements to BETA. */
			
 
				+
			
 
				+    i__1 = min(*m,*n);
			
 
				+    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+	a[i__ + i__ * a_dim1] = *beta;
			
 
				+/* L70: */
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLASET */
			
 
				+
			
 
				+} /* dlaset_ */
			
--- a/min-dgels/additional/dlassq.c
+++ b/min-dgels/additional/dlassq.c
@@ -0,0 +1,116 @@
 
				+/* dlassq.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx, 
			
 
				+	doublereal *scale, doublereal *sumsq)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1, i__2;
			
 
				+    doublereal d__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer ix;
			
 
				+    doublereal absxi;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DLASSQ  returns the values  scl  and  smsq  such that */
			
 
				+
			
 
				+/*     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */
			
 
				+
			
 
				+/*  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is */
			
 
				+/*  assumed to be non-negative and  scl  returns the value */
			
 
				+
			
 
				+/*     scl = max( scale, abs( x( i ) ) ). */
			
 
				+
			
 
				+/*  scale and sumsq must be supplied in SCALE and SUMSQ and */
			
 
				+/*  scl and smsq are overwritten on SCALE and SUMSQ respectively. */
			
 
				+
			
 
				+/*  The routine makes only one pass through the vector x. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of elements to be used from the vector X. */
			
 
				+
			
 
				+/*  X       (input) DOUBLE PRECISION array, dimension (N) */
			
 
				+/*          The vector for which a scaled sum of squares is computed. */
			
 
				+/*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */
			
 
				+
			
 
				+/*  INCX    (input) INTEGER */
			
 
				+/*          The increment between successive values of the vector X. */
			
 
				+/*          INCX > 0. */
			
 
				+
			
 
				+/*  SCALE   (input/output) DOUBLE PRECISION */
			
 
				+/*          On entry, the value  scale  in the equation above. */
			
 
				+/*          On exit, SCALE is overwritten with  scl , the scaling factor */
			
 
				+/*          for the sum of squares. */
			
 
				+
			
 
				+/*  SUMSQ   (input/output) DOUBLE PRECISION */
			
 
				+/*          On entry, the value  sumsq  in the equation above. */
			
 
				+/*          On exit, SUMSQ is overwritten with  smsq , the basic sum of */
			
 
				+/*          squares from which  scl  has been factored out. */
			
 
				+
			
 
				+/* ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    --x;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n > 0) {
			
 
				+	i__1 = (*n - 1) * *incx + 1;
			
 
				+	i__2 = *incx;
			
 
				+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
			
 
				+	    if (x[ix] != 0.) {
			
 
				+		absxi = (d__1 = x[ix], abs(d__1));
			
 
				+		if (*scale < absxi) {
			
 
				+/* Computing 2nd power */
			
 
				+		    d__1 = *scale / absxi;
			
 
				+		    *sumsq = *sumsq * (d__1 * d__1) + 1;
			
 
				+		    *scale = absxi;
			
 
				+		} else {
			
 
				+/* Computing 2nd power */
			
 
				+		    d__1 = absxi / *scale;
			
 
				+		    *sumsq += d__1 * d__1;
			
 
				+		}
			
 
				+	    }
			
 
				+/* L10: */
			
 
				+	}
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DLASSQ */
			
 
				+
			
 
				+} /* dlassq_ */
			
--- a/min-dgels/additional/dnrm2.c
+++ b/min-dgels/additional/dnrm2.c
@@ -0,0 +1,95 @@
 
				+/* dnrm2.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1, i__2;
			
 
				+    doublereal ret_val, d__1;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    double sqrt(doublereal);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer ix;
			
 
				+    doublereal ssq, norm, scale, absxi;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DNRM2 returns the euclidean norm of a vector via the function */
			
 
				+/*  name, so that */
			
 
				+
			
 
				+/*     DNRM2 := sqrt( x'*x ) */
			
 
				+
			
 
				+
			
 
				+/*  -- This version written on 25-October-1982. */
			
 
				+/*     Modified on 14-October-1993 to inline the call to DLASSQ. */
			
 
				+/*     Sven Hammarling, Nag Ltd. */
			
 
				+
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+    /* Parameter adjustments */
			
 
				+    --x;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n < 1 || *incx < 1) {
			
 
				+	norm = 0.;
			
 
				+    } else if (*n == 1) {
			
 
				+	norm = abs(x[1]);
			
 
				+    } else {
			
 
				+	scale = 0.;
			
 
				+	ssq = 1.;
			
 
				+/*        The following loop is equivalent to this call to the LAPACK */
			
 
				+/*        auxiliary routine: */
			
 
				+/*        CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */
			
 
				+
			
 
				+	i__1 = (*n - 1) * *incx + 1;
			
 
				+	i__2 = *incx;
			
 
				+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
			
 
				+	    if (x[ix] != 0.) {
			
 
				+		absxi = (d__1 = x[ix], abs(d__1));
			
 
				+		if (scale < absxi) {
			
 
				+/* Computing 2nd power */
			
 
				+		    d__1 = scale / absxi;
			
 
				+		    ssq = ssq * (d__1 * d__1) + 1.;
			
 
				+		    scale = absxi;
			
 
				+		} else {
			
 
				+/* Computing 2nd power */
			
 
				+		    d__1 = absxi / scale;
			
 
				+		    ssq += d__1 * d__1;
			
 
				+		}
			
 
				+	    }
			
 
				+/* L10: */
			
 
				+	}
			
 
				+	norm = scale * sqrt(ssq);
			
 
				+    }
			
 
				+
			
 
				+    ret_val = norm;
			
 
				+    return ret_val;
			
 
				+
			
 
				+/*     End of DNRM2. */
			
 
				+
			
 
				+} /* dnrm2_ */
			
--- a/min-dgels/additional/dorm2r.c
+++ b/min-dgels/additional/dorm2r.c
@@ -0,0 +1,235 @@
 
				+/* dorm2r.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+
			
 
				+/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n, 
			
 
				+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
			
 
				+	c__, integer *ldc, doublereal *work, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
			
 
				+    doublereal aii;
			
 
				+    logical left;
			
 
				+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *);
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+    logical notran;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DORM2R overwrites the general real m by n matrix C with */
			
 
				+
			
 
				+/*        Q * C  if SIDE = 'L' and TRANS = 'N', or */
			
 
				+
			
 
				+/*        Q'* C  if SIDE = 'L' and TRANS = 'T', or */
			
 
				+
			
 
				+/*        C * Q  if SIDE = 'R' and TRANS = 'N', or */
			
 
				+
			
 
				+/*        C * Q' if SIDE = 'R' and TRANS = 'T', */
			
 
				+
			
 
				+/*  where Q is a real orthogonal matrix defined as the product of k */
			
 
				+/*  elementary reflectors */
			
 
				+
			
 
				+/*        Q = H(1) H(2) . . . H(k) */
			
 
				+
			
 
				+/*  as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n */
			
 
				+/*  if SIDE = 'R'. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SIDE    (input) CHARACTER*1 */
			
 
				+/*          = 'L': apply Q or Q' from the Left */
			
 
				+/*          = 'R': apply Q or Q' from the Right */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          = 'N': apply Q  (No transpose) */
			
 
				+/*          = 'T': apply Q' (Transpose) */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix C. M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix C. N >= 0. */
			
 
				+
			
 
				+/*  K       (input) INTEGER */
			
 
				+/*          The number of elementary reflectors whose product defines */
			
 
				+/*          the matrix Q. */
			
 
				+/*          If SIDE = 'L', M >= K >= 0; */
			
 
				+/*          if SIDE = 'R', N >= K >= 0. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension (LDA,K) */
			
 
				+/*          The i-th column must contain the vector which defines the */
			
 
				+/*          elementary reflector H(i), for i = 1,2,...,k, as returned by */
			
 
				+/*          DGEQRF in the first k columns of its array argument A. */
			
 
				+/*          A is modified by the routine but restored on exit. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A. */
			
 
				+/*          If SIDE = 'L', LDA >= max(1,M); */
			
 
				+/*          if SIDE = 'R', LDA >= max(1,N). */
			
 
				+
			
 
				+/*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
			
 
				+/*          TAU(i) must contain the scalar factor of the elementary */
			
 
				+/*          reflector H(i), as returned by DGEQRF. */
			
 
				+
			
 
				+/*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
			
 
				+/*          On entry, the m by n matrix C. */
			
 
				+/*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */
			
 
				+
			
 
				+/*  LDC     (input) INTEGER */
			
 
				+/*          The leading dimension of the array C. LDC >= max(1,M). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension */
			
 
				+/*                                   (N) if SIDE = 'L', */
			
 
				+/*                                   (M) if SIDE = 'R' */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0: successful exit */
			
 
				+/*          < 0: if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    left = lsame_(side, "L");
			
 
				+    notran = lsame_(trans, "N");
			
 
				+
			
 
				+/*     NQ is the order of Q */
			
 
				+
			
 
				+    if (left) {
			
 
				+	nq = *m;
			
 
				+    } else {
			
 
				+	nq = *n;
			
 
				+    }
			
 
				+    if (! left && ! lsame_(side, "R")) {
			
 
				+	*info = -1;
			
 
				+    } else if (! notran && ! lsame_(trans, "T")) {
			
 
				+	*info = -2;
			
 
				+    } else if (*m < 0) {
			
 
				+	*info = -3;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -4;
			
 
				+    } else if (*k < 0 || *k > nq) {
			
 
				+	*info = -5;
			
 
				+    } else if (*lda < max(1,nq)) {
			
 
				+	*info = -7;
			
 
				+    } else if (*ldc < max(1,*m)) {
			
 
				+	*info = -10;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DORM2R", &i__1);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || *k == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (left && ! notran || ! left && notran) {
			
 
				+	i1 = 1;
			
 
				+	i2 = *k;
			
 
				+	i3 = 1;
			
 
				+    } else {
			
 
				+	i1 = *k;
			
 
				+	i2 = 1;
			
 
				+	i3 = -1;
			
 
				+    }
			
 
				+
			
 
				+    if (left) {
			
 
				+	ni = *n;
			
 
				+	jc = 1;
			
 
				+    } else {
			
 
				+	mi = *m;
			
 
				+	ic = 1;
			
 
				+    }
			
 
				+
			
 
				+    i__1 = i2;
			
 
				+    i__2 = i3;
			
 
				+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+	if (left) {
			
 
				+
			
 
				+/*           H(i) is applied to C(i:m,1:n) */
			
 
				+
			
 
				+	    mi = *m - i__ + 1;
			
 
				+	    ic = i__;
			
 
				+	} else {
			
 
				+
			
 
				+/*           H(i) is applied to C(1:m,i:n) */
			
 
				+
			
 
				+	    ni = *n - i__ + 1;
			
 
				+	    jc = i__;
			
 
				+	}
			
 
				+
			
 
				+/*        Apply H(i) */
			
 
				+
			
 
				+	aii = a[i__ + i__ * a_dim1];
			
 
				+	a[i__ + i__ * a_dim1] = 1.;
			
 
				+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
			
 
				+		ic + jc * c_dim1], ldc, &work[1]);
			
 
				+	a[i__ + i__ * a_dim1] = aii;
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DORM2R */
			
 
				+
			
 
				+} /* dorm2r_ */
			
--- a/min-dgels/additional/dorml2.c
+++ b/min-dgels/additional/dorml2.c
@@ -0,0 +1,231 @@
 
				+/* dorml2.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n, 
			
 
				+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
			
 
				+	c__, integer *ldc, doublereal *work, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
			
 
				+    doublereal aii;
			
 
				+    logical left;
			
 
				+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *);
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+    logical notran;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DORML2 overwrites the general real m by n matrix C with */
			
 
				+
			
 
				+/*        Q * C  if SIDE = 'L' and TRANS = 'N', or */
			
 
				+
			
 
				+/*        Q'* C  if SIDE = 'L' and TRANS = 'T', or */
			
 
				+
			
 
				+/*        C * Q  if SIDE = 'R' and TRANS = 'N', or */
			
 
				+
			
 
				+/*        C * Q' if SIDE = 'R' and TRANS = 'T', */
			
 
				+
			
 
				+/*  where Q is a real orthogonal matrix defined as the product of k */
			
 
				+/*  elementary reflectors */
			
 
				+
			
 
				+/*        Q = H(k) . . . H(2) H(1) */
			
 
				+
			
 
				+/*  as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n */
			
 
				+/*  if SIDE = 'R'. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SIDE    (input) CHARACTER*1 */
			
 
				+/*          = 'L': apply Q or Q' from the Left */
			
 
				+/*          = 'R': apply Q or Q' from the Right */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          = 'N': apply Q  (No transpose) */
			
 
				+/*          = 'T': apply Q' (Transpose) */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix C. M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix C. N >= 0. */
			
 
				+
			
 
				+/*  K       (input) INTEGER */
			
 
				+/*          The number of elementary reflectors whose product defines */
			
 
				+/*          the matrix Q. */
			
 
				+/*          If SIDE = 'L', M >= K >= 0; */
			
 
				+/*          if SIDE = 'R', N >= K >= 0. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension */
			
 
				+/*                               (LDA,M) if SIDE = 'L', */
			
 
				+/*                               (LDA,N) if SIDE = 'R' */
			
 
				+/*          The i-th row must contain the vector which defines the */
			
 
				+/*          elementary reflector H(i), for i = 1,2,...,k, as returned by */
			
 
				+/*          DGELQF in the first k rows of its array argument A. */
			
 
				+/*          A is modified by the routine but restored on exit. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A. LDA >= max(1,K). */
			
 
				+
			
 
				+/*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
			
 
				+/*          TAU(i) must contain the scalar factor of the elementary */
			
 
				+/*          reflector H(i), as returned by DGELQF. */
			
 
				+
			
 
				+/*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
			
 
				+/*          On entry, the m by n matrix C. */
			
 
				+/*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */
			
 
				+
			
 
				+/*  LDC     (input) INTEGER */
			
 
				+/*          The leading dimension of the array C. LDC >= max(1,M). */
			
 
				+
			
 
				+/*  WORK    (workspace) DOUBLE PRECISION array, dimension */
			
 
				+/*                                   (N) if SIDE = 'L', */
			
 
				+/*                                   (M) if SIDE = 'R' */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0: successful exit */
			
 
				+/*          < 0: if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    left = lsame_(side, "L");
			
 
				+    notran = lsame_(trans, "N");
			
 
				+
			
 
				+/*     NQ is the order of Q */
			
 
				+
			
 
				+    if (left) {
			
 
				+	nq = *m;
			
 
				+    } else {
			
 
				+	nq = *n;
			
 
				+    }
			
 
				+    if (! left && ! lsame_(side, "R")) {
			
 
				+	*info = -1;
			
 
				+    } else if (! notran && ! lsame_(trans, "T")) {
			
 
				+	*info = -2;
			
 
				+    } else if (*m < 0) {
			
 
				+	*info = -3;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -4;
			
 
				+    } else if (*k < 0 || *k > nq) {
			
 
				+	*info = -5;
			
 
				+    } else if (*lda < max(1,*k)) {
			
 
				+	*info = -7;
			
 
				+    } else if (*ldc < max(1,*m)) {
			
 
				+	*info = -10;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DORML2", &i__1);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || *k == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (left && notran || ! left && ! notran) {
			
 
				+	i1 = 1;
			
 
				+	i2 = *k;
			
 
				+	i3 = 1;
			
 
				+    } else {
			
 
				+	i1 = *k;
			
 
				+	i2 = 1;
			
 
				+	i3 = -1;
			
 
				+    }
			
 
				+
			
 
				+    if (left) {
			
 
				+	ni = *n;
			
 
				+	jc = 1;
			
 
				+    } else {
			
 
				+	mi = *m;
			
 
				+	ic = 1;
			
 
				+    }
			
 
				+
			
 
				+    i__1 = i2;
			
 
				+    i__2 = i3;
			
 
				+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+	if (left) {
			
 
				+
			
 
				+/*           H(i) is applied to C(i:m,1:n) */
			
 
				+
			
 
				+	    mi = *m - i__ + 1;
			
 
				+	    ic = i__;
			
 
				+	} else {
			
 
				+
			
 
				+/*           H(i) is applied to C(1:m,i:n) */
			
 
				+
			
 
				+	    ni = *n - i__ + 1;
			
 
				+	    jc = i__;
			
 
				+	}
			
 
				+
			
 
				+/*        Apply H(i) */
			
 
				+
			
 
				+	aii = a[i__ + i__ * a_dim1];
			
 
				+	a[i__ + i__ * a_dim1] = 1.;
			
 
				+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
			
 
				+		ic + jc * c_dim1], ldc, &work[1]);
			
 
				+	a[i__ + i__ * a_dim1] = aii;
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DORML2 */
			
 
				+
			
 
				+} /* dorml2_ */
			
--- a/min-dgels/additional/dormlq.c
+++ b/min-dgels/additional/dormlq.c
@@ -0,0 +1,334 @@
 
				+/* dormlq.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static integer c_n1 = -1;
			
 
				+static integer c__2 = 2;
			
 
				+static integer c__65 = 65;
			
 
				+
			
 
				+/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n, 
			
 
				+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
			
 
				+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    address a__1[2];
			
 
				+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 
			
 
				+	    i__5;
			
 
				+    char ch__1[2];
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__;
			
 
				+    doublereal t[4160]	/* was [65][64] */;
			
 
				+    integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
			
 
				+    logical left;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer nbmin, iinfo;
			
 
				+    extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *, 
			
 
				+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *), dlarfb_(char 
			
 
				+	    *, char *, char *, char *, integer *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal 
			
 
				+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
			
 
				+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
			
 
				+	    integer *, integer *);
			
 
				+    logical notran;
			
 
				+    integer ldwork;
			
 
				+    char transt[1];
			
 
				+    integer lwkopt;
			
 
				+    logical lquery;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DORMLQ overwrites the general real M-by-N matrix C with */
			
 
				+
			
 
				+/*                  SIDE = 'L'     SIDE = 'R' */
			
 
				+/*  TRANS = 'N':      Q * C          C * Q */
			
 
				+/*  TRANS = 'T':      Q**T * C       C * Q**T */
			
 
				+
			
 
				+/*  where Q is a real orthogonal matrix defined as the product of k */
			
 
				+/*  elementary reflectors */
			
 
				+
			
 
				+/*        Q = H(k) . . . H(2) H(1) */
			
 
				+
			
 
				+/*  as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N */
			
 
				+/*  if SIDE = 'R'. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SIDE    (input) CHARACTER*1 */
			
 
				+/*          = 'L': apply Q or Q**T from the Left; */
			
 
				+/*          = 'R': apply Q or Q**T from the Right. */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          = 'N':  No transpose, apply Q; */
			
 
				+/*          = 'T':  Transpose, apply Q**T. */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix C. M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix C. N >= 0. */
			
 
				+
			
 
				+/*  K       (input) INTEGER */
			
 
				+/*          The number of elementary reflectors whose product defines */
			
 
				+/*          the matrix Q. */
			
 
				+/*          If SIDE = 'L', M >= K >= 0; */
			
 
				+/*          if SIDE = 'R', N >= K >= 0. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension */
			
 
				+/*                               (LDA,M) if SIDE = 'L', */
			
 
				+/*                               (LDA,N) if SIDE = 'R' */
			
 
				+/*          The i-th row must contain the vector which defines the */
			
 
				+/*          elementary reflector H(i), for i = 1,2,...,k, as returned by */
			
 
				+/*          DGELQF in the first k rows of its array argument A. */
			
 
				+/*          A is modified by the routine but restored on exit. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A. LDA >= max(1,K). */
			
 
				+
			
 
				+/*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
			
 
				+/*          TAU(i) must contain the scalar factor of the elementary */
			
 
				+/*          reflector H(i), as returned by DGELQF. */
			
 
				+
			
 
				+/*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
			
 
				+/*          On entry, the M-by-N matrix C. */
			
 
				+/*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
			
 
				+
			
 
				+/*  LDC     (input) INTEGER */
			
 
				+/*          The leading dimension of the array C. LDC >= max(1,M). */
			
 
				+
			
 
				+/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
			
 
				+/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
			
 
				+
			
 
				+/*  LWORK   (input) INTEGER */
			
 
				+/*          The dimension of the array WORK. */
			
 
				+/*          If SIDE = 'L', LWORK >= max(1,N); */
			
 
				+/*          if SIDE = 'R', LWORK >= max(1,M). */
			
 
				+/*          For optimum performance LWORK >= N*NB if SIDE = 'L', and */
			
 
				+/*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal */
			
 
				+/*          blocksize. */
			
 
				+
			
 
				+/*          If LWORK = -1, then a workspace query is assumed; the routine */
			
 
				+/*          only calculates the optimal size of the WORK array, returns */
			
 
				+/*          this value as the first entry of the WORK array, and no error */
			
 
				+/*          message related to LWORK is issued by XERBLA. */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0:  successful exit */
			
 
				+/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Arrays .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    left = lsame_(side, "L");
			
 
				+    notran = lsame_(trans, "N");
			
 
				+    lquery = *lwork == -1;
			
 
				+
			
 
				+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
			
 
				+
			
 
				+    if (left) {
			
 
				+	nq = *m;
			
 
				+	nw = *n;
			
 
				+    } else {
			
 
				+	nq = *n;
			
 
				+	nw = *m;
			
 
				+    }
			
 
				+    if (! left && ! lsame_(side, "R")) {
			
 
				+	*info = -1;
			
 
				+    } else if (! notran && ! lsame_(trans, "T")) {
			
 
				+	*info = -2;
			
 
				+    } else if (*m < 0) {
			
 
				+	*info = -3;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -4;
			
 
				+    } else if (*k < 0 || *k > nq) {
			
 
				+	*info = -5;
			
 
				+    } else if (*lda < max(1,*k)) {
			
 
				+	*info = -7;
			
 
				+    } else if (*ldc < max(1,*m)) {
			
 
				+	*info = -10;
			
 
				+    } else if (*lwork < max(1,nw) && ! lquery) {
			
 
				+	*info = -12;
			
 
				+    }
			
 
				+
			
 
				+    if (*info == 0) {
			
 
				+
			
 
				+/*        Determine the block size.  NB may be at most NBMAX, where NBMAX */
			
 
				+/*        is used to define the local array T. */
			
 
				+
			
 
				+/* Computing MIN */
			
 
				+/* Writing concatenation */
			
 
				+	i__3[0] = 1, a__1[0] = side;
			
 
				+	i__3[1] = 1, a__1[1] = trans;
			
 
				+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
			
 
				+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1);
			
 
				+	nb = min(i__1,i__2);
			
 
				+	lwkopt = max(1,nw) * nb;
			
 
				+	work[1] = (doublereal) lwkopt;
			
 
				+    }
			
 
				+
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DORMLQ", &i__1);
			
 
				+	return 0;
			
 
				+    } else if (lquery) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || *k == 0) {
			
 
				+	work[1] = 1.;
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    nbmin = 2;
			
 
				+    ldwork = nw;
			
 
				+    if (nb > 1 && nb < *k) {
			
 
				+	iws = nw * nb;
			
 
				+	if (*lwork < iws) {
			
 
				+	    nb = *lwork / ldwork;
			
 
				+/* Computing MAX */
			
 
				+/* Writing concatenation */
			
 
				+	    i__3[0] = 1, a__1[0] = side;
			
 
				+	    i__3[1] = 1, a__1[1] = trans;
			
 
				+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
			
 
				+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1);
			
 
				+	    nbmin = max(i__1,i__2);
			
 
				+	}
			
 
				+    } else {
			
 
				+	iws = nw;
			
 
				+    }
			
 
				+
			
 
				+    if (nb < nbmin || nb >= *k) {
			
 
				+
			
 
				+/*        Use unblocked code */
			
 
				+
			
 
				+	dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
			
 
				+		c_offset], ldc, &work[1], &iinfo);
			
 
				+    } else {
			
 
				+
			
 
				+/*        Use blocked code */
			
 
				+
			
 
				+	if (left && notran || ! left && ! notran) {
			
 
				+	    i1 = 1;
			
 
				+	    i2 = *k;
			
 
				+	    i3 = nb;
			
 
				+	} else {
			
 
				+	    i1 = (*k - 1) / nb * nb + 1;
			
 
				+	    i2 = 1;
			
 
				+	    i3 = -nb;
			
 
				+	}
			
 
				+
			
 
				+	if (left) {
			
 
				+	    ni = *n;
			
 
				+	    jc = 1;
			
 
				+	} else {
			
 
				+	    mi = *m;
			
 
				+	    ic = 1;
			
 
				+	}
			
 
				+
			
 
				+	if (notran) {
			
 
				+	    *(unsigned char *)transt = 'T';
			
 
				+	} else {
			
 
				+	    *(unsigned char *)transt = 'N';
			
 
				+	}
			
 
				+
			
 
				+	i__1 = i2;
			
 
				+	i__2 = i3;
			
 
				+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+/* Computing MIN */
			
 
				+	    i__4 = nb, i__5 = *k - i__ + 1;
			
 
				+	    ib = min(i__4,i__5);
			
 
				+
			
 
				+/*           Form the triangular factor of the block reflector */
			
 
				+/*           H = H(i) H(i+1) . . . H(i+ib-1) */
			
 
				+
			
 
				+	    i__4 = nq - i__ + 1;
			
 
				+	    dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], 
			
 
				+		    lda, &tau[i__], t, &c__65);
			
 
				+	    if (left) {
			
 
				+
			
 
				+/*              H or H' is applied to C(i:m,1:n) */
			
 
				+
			
 
				+		mi = *m - i__ + 1;
			
 
				+		ic = i__;
			
 
				+	    } else {
			
 
				+
			
 
				+/*              H or H' is applied to C(1:m,i:n) */
			
 
				+
			
 
				+		ni = *n - i__ + 1;
			
 
				+		jc = i__;
			
 
				+	    }
			
 
				+
			
 
				+/*           Apply H or H' */
			
 
				+
			
 
				+	    dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ 
			
 
				+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], 
			
 
				+		    ldc, &work[1], &ldwork);
			
 
				+/* L10: */
			
 
				+	}
			
 
				+    }
			
 
				+    work[1] = (doublereal) lwkopt;
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DORMLQ */
			
 
				+
			
 
				+} /* dormlq_ */
			
--- a/min-dgels/additional/dormqr.c
+++ b/min-dgels/additional/dormqr.c
@@ -0,0 +1,327 @@
 
				+/* dormqr.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static integer c_n1 = -1;
			
 
				+static integer c__2 = 2;
			
 
				+static integer c__65 = 65;
			
 
				+
			
 
				+/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n, 
			
 
				+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
			
 
				+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    address a__1[2];
			
 
				+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 
			
 
				+	    i__5;
			
 
				+    char ch__1[2];
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__;
			
 
				+    doublereal t[4160]	/* was [65][64] */;
			
 
				+    integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
			
 
				+    logical left;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer nbmin, iinfo;
			
 
				+    extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *, 
			
 
				+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *), dlarfb_(char 
			
 
				+	    *, char *, char *, char *, integer *, integer *, integer *, 
			
 
				+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
			
 
				+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal 
			
 
				+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
			
 
				+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
			
 
				+	    integer *, integer *);
			
 
				+    logical notran;
			
 
				+    integer ldwork, lwkopt;
			
 
				+    logical lquery;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DORMQR overwrites the general real M-by-N matrix C with */
			
 
				+
			
 
				+/*                  SIDE = 'L'     SIDE = 'R' */
			
 
				+/*  TRANS = 'N':      Q * C          C * Q */
			
 
				+/*  TRANS = 'T':      Q**T * C       C * Q**T */
			
 
				+
			
 
				+/*  where Q is a real orthogonal matrix defined as the product of k */
			
 
				+/*  elementary reflectors */
			
 
				+
			
 
				+/*        Q = H(1) H(2) . . . H(k) */
			
 
				+
			
 
				+/*  as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N */
			
 
				+/*  if SIDE = 'R'. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SIDE    (input) CHARACTER*1 */
			
 
				+/*          = 'L': apply Q or Q**T from the Left; */
			
 
				+/*          = 'R': apply Q or Q**T from the Right. */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          = 'N':  No transpose, apply Q; */
			
 
				+/*          = 'T':  Transpose, apply Q**T. */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix C. M >= 0. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix C. N >= 0. */
			
 
				+
			
 
				+/*  K       (input) INTEGER */
			
 
				+/*          The number of elementary reflectors whose product defines */
			
 
				+/*          the matrix Q. */
			
 
				+/*          If SIDE = 'L', M >= K >= 0; */
			
 
				+/*          if SIDE = 'R', N >= K >= 0. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension (LDA,K) */
			
 
				+/*          The i-th column must contain the vector which defines the */
			
 
				+/*          elementary reflector H(i), for i = 1,2,...,k, as returned by */
			
 
				+/*          DGEQRF in the first k columns of its array argument A. */
			
 
				+/*          A is modified by the routine but restored on exit. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A. */
			
 
				+/*          If SIDE = 'L', LDA >= max(1,M); */
			
 
				+/*          if SIDE = 'R', LDA >= max(1,N). */
			
 
				+
			
 
				+/*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
			
 
				+/*          TAU(i) must contain the scalar factor of the elementary */
			
 
				+/*          reflector H(i), as returned by DGEQRF. */
			
 
				+
			
 
				+/*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
			
 
				+/*          On entry, the M-by-N matrix C. */
			
 
				+/*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
			
 
				+
			
 
				+/*  LDC     (input) INTEGER */
			
 
				+/*          The leading dimension of the array C. LDC >= max(1,M). */
			
 
				+
			
 
				+/*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
			
 
				+/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
			
 
				+
			
 
				+/*  LWORK   (input) INTEGER */
			
 
				+/*          The dimension of the array WORK. */
			
 
				+/*          If SIDE = 'L', LWORK >= max(1,N); */
			
 
				+/*          if SIDE = 'R', LWORK >= max(1,M). */
			
 
				+/*          For optimum performance LWORK >= N*NB if SIDE = 'L', and */
			
 
				+/*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal */
			
 
				+/*          blocksize. */
			
 
				+
			
 
				+/*          If LWORK = -1, then a workspace query is assumed; the routine */
			
 
				+/*          only calculates the optimal size of the WORK array, returns */
			
 
				+/*          this value as the first entry of the WORK array, and no error */
			
 
				+/*          message related to LWORK is issued by XERBLA. */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0:  successful exit */
			
 
				+/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Arrays .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input arguments */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --tau;
			
 
				+    c_dim1 = *ldc;
			
 
				+    c_offset = 1 + c_dim1;
			
 
				+    c__ -= c_offset;
			
 
				+    --work;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    left = lsame_(side, "L");
			
 
				+    notran = lsame_(trans, "N");
			
 
				+    lquery = *lwork == -1;
			
 
				+
			
 
				+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
			
 
				+
			
 
				+    if (left) {
			
 
				+	nq = *m;
			
 
				+	nw = *n;
			
 
				+    } else {
			
 
				+	nq = *n;
			
 
				+	nw = *m;
			
 
				+    }
			
 
				+    if (! left && ! lsame_(side, "R")) {
			
 
				+	*info = -1;
			
 
				+    } else if (! notran && ! lsame_(trans, "T")) {
			
 
				+	*info = -2;
			
 
				+    } else if (*m < 0) {
			
 
				+	*info = -3;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -4;
			
 
				+    } else if (*k < 0 || *k > nq) {
			
 
				+	*info = -5;
			
 
				+    } else if (*lda < max(1,nq)) {
			
 
				+	*info = -7;
			
 
				+    } else if (*ldc < max(1,*m)) {
			
 
				+	*info = -10;
			
 
				+    } else if (*lwork < max(1,nw) && ! lquery) {
			
 
				+	*info = -12;
			
 
				+    }
			
 
				+
			
 
				+    if (*info == 0) {
			
 
				+
			
 
				+/*        Determine the block size.  NB may be at most NBMAX, where NBMAX */
			
 
				+/*        is used to define the local array T. */
			
 
				+
			
 
				+/* Computing MIN */
			
 
				+/* Writing concatenation */
			
 
				+	i__3[0] = 1, a__1[0] = side;
			
 
				+	i__3[1] = 1, a__1[1] = trans;
			
 
				+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
			
 
				+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1);
			
 
				+	nb = min(i__1,i__2);
			
 
				+	lwkopt = max(1,nw) * nb;
			
 
				+	work[1] = (doublereal) lwkopt;
			
 
				+    }
			
 
				+
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DORMQR", &i__1);
			
 
				+	return 0;
			
 
				+    } else if (lquery) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0 || *k == 0) {
			
 
				+	work[1] = 1.;
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    nbmin = 2;
			
 
				+    ldwork = nw;
			
 
				+    if (nb > 1 && nb < *k) {
			
 
				+	iws = nw * nb;
			
 
				+	if (*lwork < iws) {
			
 
				+	    nb = *lwork / ldwork;
			
 
				+/* Computing MAX */
			
 
				+/* Writing concatenation */
			
 
				+	    i__3[0] = 1, a__1[0] = side;
			
 
				+	    i__3[1] = 1, a__1[1] = trans;
			
 
				+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
			
 
				+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1);
			
 
				+	    nbmin = max(i__1,i__2);
			
 
				+	}
			
 
				+    } else {
			
 
				+	iws = nw;
			
 
				+    }
			
 
				+
			
 
				+    if (nb < nbmin || nb >= *k) {
			
 
				+
			
 
				+/*        Use unblocked code */
			
 
				+
			
 
				+	dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
			
 
				+		c_offset], ldc, &work[1], &iinfo);
			
 
				+    } else {
			
 
				+
			
 
				+/*        Use blocked code */
			
 
				+
			
 
				+	if (left && ! notran || ! left && notran) {
			
 
				+	    i1 = 1;
			
 
				+	    i2 = *k;
			
 
				+	    i3 = nb;
			
 
				+	} else {
			
 
				+	    i1 = (*k - 1) / nb * nb + 1;
			
 
				+	    i2 = 1;
			
 
				+	    i3 = -nb;
			
 
				+	}
			
 
				+
			
 
				+	if (left) {
			
 
				+	    ni = *n;
			
 
				+	    jc = 1;
			
 
				+	} else {
			
 
				+	    mi = *m;
			
 
				+	    ic = 1;
			
 
				+	}
			
 
				+
			
 
				+	i__1 = i2;
			
 
				+	i__2 = i3;
			
 
				+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+/* Computing MIN */
			
 
				+	    i__4 = nb, i__5 = *k - i__ + 1;
			
 
				+	    ib = min(i__4,i__5);
			
 
				+
			
 
				+/*           Form the triangular factor of the block reflector */
			
 
				+/*           H = H(i) H(i+1) . . . H(i+ib-1) */
			
 
				+
			
 
				+	    i__4 = nq - i__ + 1;
			
 
				+	    dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * 
			
 
				+		    a_dim1], lda, &tau[i__], t, &c__65)
			
 
				+		    ;
			
 
				+	    if (left) {
			
 
				+
			
 
				+/*              H or H' is applied to C(i:m,1:n) */
			
 
				+
			
 
				+		mi = *m - i__ + 1;
			
 
				+		ic = i__;
			
 
				+	    } else {
			
 
				+
			
 
				+/*              H or H' is applied to C(1:m,i:n) */
			
 
				+
			
 
				+		ni = *n - i__ + 1;
			
 
				+		jc = i__;
			
 
				+	    }
			
 
				+
			
 
				+/*           Apply H or H' */
			
 
				+
			
 
				+	    dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
			
 
				+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * 
			
 
				+		    c_dim1], ldc, &work[1], &ldwork);
			
 
				+/* L10: */
			
 
				+	}
			
 
				+    }
			
 
				+    work[1] = (doublereal) lwkopt;
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DORMQR */
			
 
				+
			
 
				+} /* dormqr_ */
			
--- a/min-dgels/additional/dscal.c
+++ b/min-dgels/additional/dscal.c
@@ -0,0 +1,96 @@
 
				+/* dscal.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, 
			
 
				+	integer *incx)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, m, mp1, nincx;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+/* * */
			
 
				+/*     scales a vector by a constant. */
			
 
				+/*     uses unrolled loops for increment equal to one. */
			
 
				+/*     jack dongarra, linpack, 3/11/78. */
			
 
				+/*     modified 3/93 to return if incx .le. 0. */
			
 
				+/*     modified 12/3/93, array(1) declarations changed to array(*) */
			
 
				+
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+    /* Parameter adjustments */
			
 
				+    --dx;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n <= 0 || *incx <= 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+    if (*incx == 1) {
			
 
				+	goto L20;
			
 
				+    }
			
 
				+
			
 
				+/*        code for increment not equal to 1 */
			
 
				+
			
 
				+    nincx = *n * *incx;
			
 
				+    i__1 = nincx;
			
 
				+    i__2 = *incx;
			
 
				+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+	dx[i__] = *da * dx[i__];
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+
			
 
				+/*        code for increment equal to 1 */
			
 
				+
			
 
				+
			
 
				+/*        clean-up loop */
			
 
				+
			
 
				+L20:
			
 
				+    m = *n % 5;
			
 
				+    if (m == 0) {
			
 
				+	goto L40;
			
 
				+    }
			
 
				+    i__2 = m;
			
 
				+    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+	dx[i__] = *da * dx[i__];
			
 
				+/* L30: */
			
 
				+    }
			
 
				+    if (*n < 5) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+L40:
			
 
				+    mp1 = m + 1;
			
 
				+    i__2 = *n;
			
 
				+    for (i__ = mp1; i__ <= i__2; i__ += 5) {
			
 
				+	dx[i__] = *da * dx[i__];
			
 
				+	dx[i__ + 1] = *da * dx[i__ + 1];
			
 
				+	dx[i__ + 2] = *da * dx[i__ + 2];
			
 
				+	dx[i__ + 3] = *da * dx[i__ + 3];
			
 
				+	dx[i__ + 4] = *da * dx[i__ + 4];
			
 
				+/* L50: */
			
 
				+    }
			
 
				+    return 0;
			
 
				+} /* dscal_ */
			
--- a/min-dgels/additional/dtrmm.c
+++ b/min-dgels/additional/dtrmm.c
@@ -0,0 +1,453 @@
 
				+/* dtrmm.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, 
			
 
				+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
			
 
				+	lda, doublereal *b, integer *ldb)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, k, info;
			
 
				+    doublereal temp;
			
 
				+    logical lside;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer nrowa;
			
 
				+    logical upper;
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+    logical nounit;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DTRMM  performs one of the matrix-matrix operations */
			
 
				+
			
 
				+/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ), */
			
 
				+
			
 
				+/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
			
 
				+/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
			
 
				+
			
 
				+/*     op( A ) = A   or   op( A ) = A'. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========== */
			
 
				+
			
 
				+/*  SIDE   - CHARACTER*1. */
			
 
				+/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
			
 
				+/*           the left or right as follows: */
			
 
				+
			
 
				+/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
			
 
				+
			
 
				+/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  UPLO   - CHARACTER*1. */
			
 
				+/*           On entry, UPLO specifies whether the matrix A is an upper or */
			
 
				+/*           lower triangular matrix as follows: */
			
 
				+
			
 
				+/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
			
 
				+
			
 
				+/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  TRANSA - CHARACTER*1. */
			
 
				+/*           On entry, TRANSA specifies the form of op( A ) to be used in */
			
 
				+/*           the matrix multiplication as follows: */
			
 
				+
			
 
				+/*              TRANSA = 'N' or 'n'   op( A ) = A. */
			
 
				+
			
 
				+/*              TRANSA = 'T' or 't'   op( A ) = A'. */
			
 
				+
			
 
				+/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  DIAG   - CHARACTER*1. */
			
 
				+/*           On entry, DIAG specifies whether or not A is unit triangular */
			
 
				+/*           as follows: */
			
 
				+
			
 
				+/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
			
 
				+
			
 
				+/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
			
 
				+/*                                  triangular. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  M      - INTEGER. */
			
 
				+/*           On entry, M specifies the number of rows of B. M must be at */
			
 
				+/*           least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  N      - INTEGER. */
			
 
				+/*           On entry, N specifies the number of columns of B.  N must be */
			
 
				+/*           at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  ALPHA  - DOUBLE PRECISION. */
			
 
				+/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
			
 
				+/*           zero then  A is not referenced and  B need not be set before */
			
 
				+/*           entry. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */
			
 
				+/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
			
 
				+/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
			
 
				+/*           upper triangular part of the array  A must contain the upper */
			
 
				+/*           triangular matrix  and the strictly lower triangular part of */
			
 
				+/*           A is not referenced. */
			
 
				+/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
			
 
				+/*           lower triangular part of the array  A must contain the lower */
			
 
				+/*           triangular matrix  and the strictly upper triangular part of */
			
 
				+/*           A is not referenced. */
			
 
				+/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
			
 
				+/*           A  are not referenced either,  but are assumed to be  unity. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  LDA    - INTEGER. */
			
 
				+/*           On entry, LDA specifies the first dimension of A as declared */
			
 
				+/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
			
 
				+/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
			
 
				+/*           then LDA must be at least max( 1, n ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
			
 
				+/*           Before entry,  the leading  m by n part of the array  B must */
			
 
				+/*           contain the matrix  B,  and  on exit  is overwritten  by the */
			
 
				+/*           transformed matrix. */
			
 
				+
			
 
				+/*  LDB    - INTEGER. */
			
 
				+/*           On entry, LDB specifies the first dimension of B as declared */
			
 
				+/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
			
 
				+/*           max( 1, m ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+
			
 
				+/*  Level 3 Blas routine. */
			
 
				+
			
 
				+/*  -- Written on 8-February-1989. */
			
 
				+/*     Jack Dongarra, Argonne National Laboratory. */
			
 
				+/*     Iain Duff, AERE Harwell. */
			
 
				+/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
			
 
				+/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
			
 
				+
			
 
				+
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    b_dim1 = *ldb;
			
 
				+    b_offset = 1 + b_dim1;
			
 
				+    b -= b_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    lside = lsame_(side, "L");
			
 
				+    if (lside) {
			
 
				+	nrowa = *m;
			
 
				+    } else {
			
 
				+	nrowa = *n;
			
 
				+    }
			
 
				+    nounit = lsame_(diag, "N");
			
 
				+    upper = lsame_(uplo, "U");
			
 
				+
			
 
				+    info = 0;
			
 
				+    if (! lside && ! lsame_(side, "R")) {
			
 
				+	info = 1;
			
 
				+    } else if (! upper && ! lsame_(uplo, "L")) {
			
 
				+	info = 2;
			
 
				+    } else if (! lsame_(transa, "N") && ! lsame_(transa, 
			
 
				+	     "T") && ! lsame_(transa, "C")) {
			
 
				+	info = 3;
			
 
				+    } else if (! lsame_(diag, "U") && ! lsame_(diag, 
			
 
				+	    "N")) {
			
 
				+	info = 4;
			
 
				+    } else if (*m < 0) {
			
 
				+	info = 5;
			
 
				+    } else if (*n < 0) {
			
 
				+	info = 6;
			
 
				+    } else if (*lda < max(1,nrowa)) {
			
 
				+	info = 9;
			
 
				+    } else if (*ldb < max(1,*m)) {
			
 
				+	info = 11;
			
 
				+    }
			
 
				+    if (info != 0) {
			
 
				+	xerbla_("DTRMM ", &info);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible. */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     And when  alpha.eq.zero. */
			
 
				+
			
 
				+    if (*alpha == 0.) {
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		b[i__ + j * b_dim1] = 0.;
			
 
				+/* L10: */
			
 
				+	    }
			
 
				+/* L20: */
			
 
				+	}
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Start the operations. */
			
 
				+
			
 
				+    if (lside) {
			
 
				+	if (lsame_(transa, "N")) {
			
 
				+
			
 
				+/*           Form  B := alpha*A*B. */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (k = 1; k <= i__2; ++k) {
			
 
				+			if (b[k + j * b_dim1] != 0.) {
			
 
				+			    temp = *alpha * b[k + j * b_dim1];
			
 
				+			    i__3 = k - 1;
			
 
				+			    for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+				b[i__ + j * b_dim1] += temp * a[i__ + k * 
			
 
				+					a_dim1];
			
 
				+/* L30: */
			
 
				+			    }
			
 
				+			    if (nounit) {
			
 
				+				temp *= a[k + k * a_dim1];
			
 
				+			    }
			
 
				+			    b[k + j * b_dim1] = temp;
			
 
				+			}
			
 
				+/* L40: */
			
 
				+		    }
			
 
				+/* L50: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    for (k = *m; k >= 1; --k) {
			
 
				+			if (b[k + j * b_dim1] != 0.) {
			
 
				+			    temp = *alpha * b[k + j * b_dim1];
			
 
				+			    b[k + j * b_dim1] = temp;
			
 
				+			    if (nounit) {
			
 
				+				b[k + j * b_dim1] *= a[k + k * a_dim1];
			
 
				+			    }
			
 
				+			    i__2 = *m;
			
 
				+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
			
 
				+				b[i__ + j * b_dim1] += temp * a[i__ + k * 
			
 
				+					a_dim1];
			
 
				+/* L60: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L70: */
			
 
				+		    }
			
 
				+/* L80: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+
			
 
				+/*           Form  B := alpha*A'*B. */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    for (i__ = *m; i__ >= 1; --i__) {
			
 
				+			temp = b[i__ + j * b_dim1];
			
 
				+			if (nounit) {
			
 
				+			    temp *= a[i__ + i__ * a_dim1];
			
 
				+			}
			
 
				+			i__2 = i__ - 1;
			
 
				+			for (k = 1; k <= i__2; ++k) {
			
 
				+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
			
 
				+/* L90: */
			
 
				+			}
			
 
				+			b[i__ + j * b_dim1] = *alpha * temp;
			
 
				+/* L100: */
			
 
				+		    }
			
 
				+/* L110: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			temp = b[i__ + j * b_dim1];
			
 
				+			if (nounit) {
			
 
				+			    temp *= a[i__ + i__ * a_dim1];
			
 
				+			}
			
 
				+			i__3 = *m;
			
 
				+			for (k = i__ + 1; k <= i__3; ++k) {
			
 
				+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
			
 
				+/* L120: */
			
 
				+			}
			
 
				+			b[i__ + j * b_dim1] = *alpha * temp;
			
 
				+/* L130: */
			
 
				+		    }
			
 
				+/* L140: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    } else {
			
 
				+	if (lsame_(transa, "N")) {
			
 
				+
			
 
				+/*           Form  B := alpha*B*A. */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		for (j = *n; j >= 1; --j) {
			
 
				+		    temp = *alpha;
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[j + j * a_dim1];
			
 
				+		    }
			
 
				+		    i__1 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
			
 
				+/* L150: */
			
 
				+		    }
			
 
				+		    i__1 = j - 1;
			
 
				+		    for (k = 1; k <= i__1; ++k) {
			
 
				+			if (a[k + j * a_dim1] != 0.) {
			
 
				+			    temp = *alpha * a[k + j * a_dim1];
			
 
				+			    i__2 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+				b[i__ + j * b_dim1] += temp * b[i__ + k * 
			
 
				+					b_dim1];
			
 
				+/* L160: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L170: */
			
 
				+		    }
			
 
				+/* L180: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    temp = *alpha;
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[j + j * a_dim1];
			
 
				+		    }
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
			
 
				+/* L190: */
			
 
				+		    }
			
 
				+		    i__2 = *n;
			
 
				+		    for (k = j + 1; k <= i__2; ++k) {
			
 
				+			if (a[k + j * a_dim1] != 0.) {
			
 
				+			    temp = *alpha * a[k + j * a_dim1];
			
 
				+			    i__3 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+				b[i__ + j * b_dim1] += temp * b[i__ + k * 
			
 
				+					b_dim1];
			
 
				+/* L200: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L210: */
			
 
				+		    }
			
 
				+/* L220: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+
			
 
				+/*           Form  B := alpha*B*A'. */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		i__1 = *n;
			
 
				+		for (k = 1; k <= i__1; ++k) {
			
 
				+		    i__2 = k - 1;
			
 
				+		    for (j = 1; j <= i__2; ++j) {
			
 
				+			if (a[j + k * a_dim1] != 0.) {
			
 
				+			    temp = *alpha * a[j + k * a_dim1];
			
 
				+			    i__3 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+				b[i__ + j * b_dim1] += temp * b[i__ + k * 
			
 
				+					b_dim1];
			
 
				+/* L230: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L240: */
			
 
				+		    }
			
 
				+		    temp = *alpha;
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[k + k * a_dim1];
			
 
				+		    }
			
 
				+		    if (temp != 1.) {
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
			
 
				+/* L250: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L260: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		for (k = *n; k >= 1; --k) {
			
 
				+		    i__1 = *n;
			
 
				+		    for (j = k + 1; j <= i__1; ++j) {
			
 
				+			if (a[j + k * a_dim1] != 0.) {
			
 
				+			    temp = *alpha * a[j + k * a_dim1];
			
 
				+			    i__2 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+				b[i__ + j * b_dim1] += temp * b[i__ + k * 
			
 
				+					b_dim1];
			
 
				+/* L270: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L280: */
			
 
				+		    }
			
 
				+		    temp = *alpha;
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[k + k * a_dim1];
			
 
				+		    }
			
 
				+		    if (temp != 1.) {
			
 
				+			i__1 = *m;
			
 
				+			for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
			
 
				+/* L290: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L300: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DTRMM . */
			
 
				+
			
 
				+} /* dtrmm_ */
			
--- a/min-dgels/additional/dtrmv.c
+++ b/min-dgels/additional/dtrmv.c
@@ -0,0 +1,345 @@
 
				+/* dtrmv.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, 
			
 
				+	doublereal *a, integer *lda, doublereal *x, integer *incx)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, i__1, i__2;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, ix, jx, kx, info;
			
 
				+    doublereal temp;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+    logical nounit;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DTRMV  performs one of the matrix-vector operations */
			
 
				+
			
 
				+/*     x := A*x,   or   x := A'*x, */
			
 
				+
			
 
				+/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
			
 
				+/*  upper or lower triangular matrix. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========== */
			
 
				+
			
 
				+/*  UPLO   - CHARACTER*1. */
			
 
				+/*           On entry, UPLO specifies whether the matrix is an upper or */
			
 
				+/*           lower triangular matrix as follows: */
			
 
				+
			
 
				+/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
			
 
				+
			
 
				+/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  TRANS  - CHARACTER*1. */
			
 
				+/*           On entry, TRANS specifies the operation to be performed as */
			
 
				+/*           follows: */
			
 
				+
			
 
				+/*              TRANS = 'N' or 'n'   x := A*x. */
			
 
				+
			
 
				+/*              TRANS = 'T' or 't'   x := A'*x. */
			
 
				+
			
 
				+/*              TRANS = 'C' or 'c'   x := A'*x. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  DIAG   - CHARACTER*1. */
			
 
				+/*           On entry, DIAG specifies whether or not A is unit */
			
 
				+/*           triangular as follows: */
			
 
				+
			
 
				+/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
			
 
				+
			
 
				+/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
			
 
				+/*                                  triangular. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  N      - INTEGER. */
			
 
				+/*           On entry, N specifies the order of the matrix A. */
			
 
				+/*           N must be at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
			
 
				+/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
			
 
				+/*           upper triangular part of the array A must contain the upper */
			
 
				+/*           triangular matrix and the strictly lower triangular part of */
			
 
				+/*           A is not referenced. */
			
 
				+/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
			
 
				+/*           lower triangular part of the array A must contain the lower */
			
 
				+/*           triangular matrix and the strictly upper triangular part of */
			
 
				+/*           A is not referenced. */
			
 
				+/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
			
 
				+/*           A are not referenced either, but are assumed to be unity. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  LDA    - INTEGER. */
			
 
				+/*           On entry, LDA specifies the first dimension of A as declared */
			
 
				+/*           in the calling (sub) program. LDA must be at least */
			
 
				+/*           max( 1, n ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  X      - DOUBLE PRECISION array of dimension at least */
			
 
				+/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
			
 
				+/*           Before entry, the incremented array X must contain the n */
			
 
				+/*           element vector x. On exit, X is overwritten with the */
			
 
				+/*           tranformed vector x. */
			
 
				+
			
 
				+/*  INCX   - INTEGER. */
			
 
				+/*           On entry, INCX specifies the increment for the elements of */
			
 
				+/*           X. INCX must not be zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+
			
 
				+/*  Level 2 Blas routine. */
			
 
				+
			
 
				+/*  -- Written on 22-October-1986. */
			
 
				+/*     Jack Dongarra, Argonne National Lab. */
			
 
				+/*     Jeremy Du Croz, Nag Central Office. */
			
 
				+/*     Sven Hammarling, Nag Central Office. */
			
 
				+/*     Richard Hanson, Sandia National Labs. */
			
 
				+
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    --x;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    info = 0;
			
 
				+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
			
 
				+	info = 1;
			
 
				+    } else if (! lsame_(trans, "N") && ! lsame_(trans, 
			
 
				+	    "T") && ! lsame_(trans, "C")) {
			
 
				+	info = 2;
			
 
				+    } else if (! lsame_(diag, "U") && ! lsame_(diag, 
			
 
				+	    "N")) {
			
 
				+	info = 3;
			
 
				+    } else if (*n < 0) {
			
 
				+	info = 4;
			
 
				+    } else if (*lda < max(1,*n)) {
			
 
				+	info = 6;
			
 
				+    } else if (*incx == 0) {
			
 
				+	info = 8;
			
 
				+    }
			
 
				+    if (info != 0) {
			
 
				+	xerbla_("DTRMV ", &info);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible. */
			
 
				+
			
 
				+    if (*n == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+    nounit = lsame_(diag, "N");
			
 
				+
			
 
				+/*     Set up the start point in X if the increment is not unity. This */
			
 
				+/*     will be  ( N - 1 )*INCX  too small for descending loops. */
			
 
				+
			
 
				+    if (*incx <= 0) {
			
 
				+	kx = 1 - (*n - 1) * *incx;
			
 
				+    } else if (*incx != 1) {
			
 
				+	kx = 1;
			
 
				+    }
			
 
				+
			
 
				+/*     Start the operations. In this version the elements of A are */
			
 
				+/*     accessed sequentially with one pass through A. */
			
 
				+
			
 
				+    if (lsame_(trans, "N")) {
			
 
				+
			
 
				+/*        Form  x := A*x. */
			
 
				+
			
 
				+	if (lsame_(uplo, "U")) {
			
 
				+	    if (*incx == 1) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    if (x[j] != 0.) {
			
 
				+			temp = x[j];
			
 
				+			i__2 = j - 1;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    x[i__] += temp * a[i__ + j * a_dim1];
			
 
				+/* L10: */
			
 
				+			}
			
 
				+			if (nounit) {
			
 
				+			    x[j] *= a[j + j * a_dim1];
			
 
				+			}
			
 
				+		    }
			
 
				+/* L20: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		jx = kx;
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    if (x[jx] != 0.) {
			
 
				+			temp = x[jx];
			
 
				+			ix = kx;
			
 
				+			i__2 = j - 1;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    x[ix] += temp * a[i__ + j * a_dim1];
			
 
				+			    ix += *incx;
			
 
				+/* L30: */
			
 
				+			}
			
 
				+			if (nounit) {
			
 
				+			    x[jx] *= a[j + j * a_dim1];
			
 
				+			}
			
 
				+		    }
			
 
				+		    jx += *incx;
			
 
				+/* L40: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    if (*incx == 1) {
			
 
				+		for (j = *n; j >= 1; --j) {
			
 
				+		    if (x[j] != 0.) {
			
 
				+			temp = x[j];
			
 
				+			i__1 = j + 1;
			
 
				+			for (i__ = *n; i__ >= i__1; --i__) {
			
 
				+			    x[i__] += temp * a[i__ + j * a_dim1];
			
 
				+/* L50: */
			
 
				+			}
			
 
				+			if (nounit) {
			
 
				+			    x[j] *= a[j + j * a_dim1];
			
 
				+			}
			
 
				+		    }
			
 
				+/* L60: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		kx += (*n - 1) * *incx;
			
 
				+		jx = kx;
			
 
				+		for (j = *n; j >= 1; --j) {
			
 
				+		    if (x[jx] != 0.) {
			
 
				+			temp = x[jx];
			
 
				+			ix = kx;
			
 
				+			i__1 = j + 1;
			
 
				+			for (i__ = *n; i__ >= i__1; --i__) {
			
 
				+			    x[ix] += temp * a[i__ + j * a_dim1];
			
 
				+			    ix -= *incx;
			
 
				+/* L70: */
			
 
				+			}
			
 
				+			if (nounit) {
			
 
				+			    x[jx] *= a[j + j * a_dim1];
			
 
				+			}
			
 
				+		    }
			
 
				+		    jx -= *incx;
			
 
				+/* L80: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    } else {
			
 
				+
			
 
				+/*        Form  x := A'*x. */
			
 
				+
			
 
				+	if (lsame_(uplo, "U")) {
			
 
				+	    if (*incx == 1) {
			
 
				+		for (j = *n; j >= 1; --j) {
			
 
				+		    temp = x[j];
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[j + j * a_dim1];
			
 
				+		    }
			
 
				+		    for (i__ = j - 1; i__ >= 1; --i__) {
			
 
				+			temp += a[i__ + j * a_dim1] * x[i__];
			
 
				+/* L90: */
			
 
				+		    }
			
 
				+		    x[j] = temp;
			
 
				+/* L100: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		jx = kx + (*n - 1) * *incx;
			
 
				+		for (j = *n; j >= 1; --j) {
			
 
				+		    temp = x[jx];
			
 
				+		    ix = jx;
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[j + j * a_dim1];
			
 
				+		    }
			
 
				+		    for (i__ = j - 1; i__ >= 1; --i__) {
			
 
				+			ix -= *incx;
			
 
				+			temp += a[i__ + j * a_dim1] * x[ix];
			
 
				+/* L110: */
			
 
				+		    }
			
 
				+		    x[jx] = temp;
			
 
				+		    jx -= *incx;
			
 
				+/* L120: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+	    if (*incx == 1) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    temp = x[j];
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[j + j * a_dim1];
			
 
				+		    }
			
 
				+		    i__2 = *n;
			
 
				+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
			
 
				+			temp += a[i__ + j * a_dim1] * x[i__];
			
 
				+/* L130: */
			
 
				+		    }
			
 
				+		    x[j] = temp;
			
 
				+/* L140: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		jx = kx;
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    temp = x[jx];
			
 
				+		    ix = jx;
			
 
				+		    if (nounit) {
			
 
				+			temp *= a[j + j * a_dim1];
			
 
				+		    }
			
 
				+		    i__2 = *n;
			
 
				+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
			
 
				+			ix += *incx;
			
 
				+			temp += a[i__ + j * a_dim1] * x[ix];
			
 
				+/* L150: */
			
 
				+		    }
			
 
				+		    x[jx] = temp;
			
 
				+		    jx += *incx;
			
 
				+/* L160: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DTRMV . */
			
 
				+
			
 
				+} /* dtrmv_ */
			
--- a/min-dgels/additional/dtrsm.c
+++ b/min-dgels/additional/dtrsm.c
@@ -0,0 +1,490 @@
 
				+/* dtrsm.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, 
			
 
				+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
			
 
				+	lda, doublereal *b, integer *ldb)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j, k, info;
			
 
				+    doublereal temp;
			
 
				+    logical lside;
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    integer nrowa;
			
 
				+    logical upper;
			
 
				+    extern /* Subroutine */ int xerbla_(char *, integer *);
			
 
				+    logical nounit;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DTRSM  solves one of the matrix equations */
			
 
				+
			
 
				+/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
			
 
				+
			
 
				+/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
			
 
				+/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
			
 
				+
			
 
				+/*     op( A ) = A   or   op( A ) = A'. */
			
 
				+
			
 
				+/*  The matrix X is overwritten on B. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========== */
			
 
				+
			
 
				+/*  SIDE   - CHARACTER*1. */
			
 
				+/*           On entry, SIDE specifies whether op( A ) appears on the left */
			
 
				+/*           or right of X as follows: */
			
 
				+
			
 
				+/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
			
 
				+
			
 
				+/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  UPLO   - CHARACTER*1. */
			
 
				+/*           On entry, UPLO specifies whether the matrix A is an upper or */
			
 
				+/*           lower triangular matrix as follows: */
			
 
				+
			
 
				+/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
			
 
				+
			
 
				+/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  TRANSA - CHARACTER*1. */
			
 
				+/*           On entry, TRANSA specifies the form of op( A ) to be used in */
			
 
				+/*           the matrix multiplication as follows: */
			
 
				+
			
 
				+/*              TRANSA = 'N' or 'n'   op( A ) = A. */
			
 
				+
			
 
				+/*              TRANSA = 'T' or 't'   op( A ) = A'. */
			
 
				+
			
 
				+/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  DIAG   - CHARACTER*1. */
			
 
				+/*           On entry, DIAG specifies whether or not A is unit triangular */
			
 
				+/*           as follows: */
			
 
				+
			
 
				+/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
			
 
				+
			
 
				+/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
			
 
				+/*                                  triangular. */
			
 
				+
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  M      - INTEGER. */
			
 
				+/*           On entry, M specifies the number of rows of B. M must be at */
			
 
				+/*           least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  N      - INTEGER. */
			
 
				+/*           On entry, N specifies the number of columns of B.  N must be */
			
 
				+/*           at least zero. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  ALPHA  - DOUBLE PRECISION. */
			
 
				+/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
			
 
				+/*           zero then  A is not referenced and  B need not be set before */
			
 
				+/*           entry. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */
			
 
				+/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
			
 
				+/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
			
 
				+/*           upper triangular part of the array  A must contain the upper */
			
 
				+/*           triangular matrix  and the strictly lower triangular part of */
			
 
				+/*           A is not referenced. */
			
 
				+/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
			
 
				+/*           lower triangular part of the array  A must contain the lower */
			
 
				+/*           triangular matrix  and the strictly upper triangular part of */
			
 
				+/*           A is not referenced. */
			
 
				+/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
			
 
				+/*           A  are not referenced either,  but are assumed to be  unity. */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  LDA    - INTEGER. */
			
 
				+/*           On entry, LDA specifies the first dimension of A as declared */
			
 
				+/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
			
 
				+/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
			
 
				+/*           then LDA must be at least max( 1, n ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
			
 
				+/*           Before entry,  the leading  m by n part of the array  B must */
			
 
				+/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
			
 
				+/*           overwritten by the solution matrix  X. */
			
 
				+
			
 
				+/*  LDB    - INTEGER. */
			
 
				+/*           On entry, LDB specifies the first dimension of B as declared */
			
 
				+/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
			
 
				+/*           max( 1, m ). */
			
 
				+/*           Unchanged on exit. */
			
 
				+
			
 
				+
			
 
				+/*  Level 3 Blas routine. */
			
 
				+
			
 
				+
			
 
				+/*  -- Written on 8-February-1989. */
			
 
				+/*     Jack Dongarra, Argonne National Laboratory. */
			
 
				+/*     Iain Duff, AERE Harwell. */
			
 
				+/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
			
 
				+/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
			
 
				+
			
 
				+
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    b_dim1 = *ldb;
			
 
				+    b_offset = 1 + b_dim1;
			
 
				+    b -= b_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    lside = lsame_(side, "L");
			
 
				+    if (lside) {
			
 
				+	nrowa = *m;
			
 
				+    } else {
			
 
				+	nrowa = *n;
			
 
				+    }
			
 
				+    nounit = lsame_(diag, "N");
			
 
				+    upper = lsame_(uplo, "U");
			
 
				+
			
 
				+    info = 0;
			
 
				+    if (! lside && ! lsame_(side, "R")) {
			
 
				+	info = 1;
			
 
				+    } else if (! upper && ! lsame_(uplo, "L")) {
			
 
				+	info = 2;
			
 
				+    } else if (! lsame_(transa, "N") && ! lsame_(transa, 
			
 
				+	     "T") && ! lsame_(transa, "C")) {
			
 
				+	info = 3;
			
 
				+    } else if (! lsame_(diag, "U") && ! lsame_(diag, 
			
 
				+	    "N")) {
			
 
				+	info = 4;
			
 
				+    } else if (*m < 0) {
			
 
				+	info = 5;
			
 
				+    } else if (*n < 0) {
			
 
				+	info = 6;
			
 
				+    } else if (*lda < max(1,nrowa)) {
			
 
				+	info = 9;
			
 
				+    } else if (*ldb < max(1,*m)) {
			
 
				+	info = 11;
			
 
				+    }
			
 
				+    if (info != 0) {
			
 
				+	xerbla_("DTRSM ", &info);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible. */
			
 
				+
			
 
				+    if (*m == 0 || *n == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     And when  alpha.eq.zero. */
			
 
				+
			
 
				+    if (*alpha == 0.) {
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    i__2 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+		b[i__ + j * b_dim1] = 0.;
			
 
				+/* L10: */
			
 
				+	    }
			
 
				+/* L20: */
			
 
				+	}
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Start the operations. */
			
 
				+
			
 
				+    if (lside) {
			
 
				+	if (lsame_(transa, "N")) {
			
 
				+
			
 
				+/*           Form  B := alpha*inv( A )*B. */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    if (*alpha != 1.) {
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
			
 
				+				    ;
			
 
				+/* L30: */
			
 
				+			}
			
 
				+		    }
			
 
				+		    for (k = *m; k >= 1; --k) {
			
 
				+			if (b[k + j * b_dim1] != 0.) {
			
 
				+			    if (nounit) {
			
 
				+				b[k + j * b_dim1] /= a[k + k * a_dim1];
			
 
				+			    }
			
 
				+			    i__2 = k - 1;
			
 
				+			    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
			
 
				+					i__ + k * a_dim1];
			
 
				+/* L40: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L50: */
			
 
				+		    }
			
 
				+/* L60: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    if (*alpha != 1.) {
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
			
 
				+				    ;
			
 
				+/* L70: */
			
 
				+			}
			
 
				+		    }
			
 
				+		    i__2 = *m;
			
 
				+		    for (k = 1; k <= i__2; ++k) {
			
 
				+			if (b[k + j * b_dim1] != 0.) {
			
 
				+			    if (nounit) {
			
 
				+				b[k + j * b_dim1] /= a[k + k * a_dim1];
			
 
				+			    }
			
 
				+			    i__3 = *m;
			
 
				+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
			
 
				+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
			
 
				+					i__ + k * a_dim1];
			
 
				+/* L80: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L90: */
			
 
				+		    }
			
 
				+/* L100: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+
			
 
				+/*           Form  B := alpha*inv( A' )*B. */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    i__2 = *m;
			
 
				+		    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			temp = *alpha * b[i__ + j * b_dim1];
			
 
				+			i__3 = i__ - 1;
			
 
				+			for (k = 1; k <= i__3; ++k) {
			
 
				+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
			
 
				+/* L110: */
			
 
				+			}
			
 
				+			if (nounit) {
			
 
				+			    temp /= a[i__ + i__ * a_dim1];
			
 
				+			}
			
 
				+			b[i__ + j * b_dim1] = temp;
			
 
				+/* L120: */
			
 
				+		    }
			
 
				+/* L130: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    for (i__ = *m; i__ >= 1; --i__) {
			
 
				+			temp = *alpha * b[i__ + j * b_dim1];
			
 
				+			i__2 = *m;
			
 
				+			for (k = i__ + 1; k <= i__2; ++k) {
			
 
				+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
			
 
				+/* L140: */
			
 
				+			}
			
 
				+			if (nounit) {
			
 
				+			    temp /= a[i__ + i__ * a_dim1];
			
 
				+			}
			
 
				+			b[i__ + j * b_dim1] = temp;
			
 
				+/* L150: */
			
 
				+		    }
			
 
				+/* L160: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    } else {
			
 
				+	if (lsame_(transa, "N")) {
			
 
				+
			
 
				+/*           Form  B := alpha*B*inv( A ). */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		i__1 = *n;
			
 
				+		for (j = 1; j <= i__1; ++j) {
			
 
				+		    if (*alpha != 1.) {
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
			
 
				+				    ;
			
 
				+/* L170: */
			
 
				+			}
			
 
				+		    }
			
 
				+		    i__2 = j - 1;
			
 
				+		    for (k = 1; k <= i__2; ++k) {
			
 
				+			if (a[k + j * a_dim1] != 0.) {
			
 
				+			    i__3 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
			
 
				+					i__ + k * b_dim1];
			
 
				+/* L180: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L190: */
			
 
				+		    }
			
 
				+		    if (nounit) {
			
 
				+			temp = 1. / a[j + j * a_dim1];
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
			
 
				+/* L200: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L210: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		for (j = *n; j >= 1; --j) {
			
 
				+		    if (*alpha != 1.) {
			
 
				+			i__1 = *m;
			
 
				+			for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
			
 
				+				    ;
			
 
				+/* L220: */
			
 
				+			}
			
 
				+		    }
			
 
				+		    i__1 = *n;
			
 
				+		    for (k = j + 1; k <= i__1; ++k) {
			
 
				+			if (a[k + j * a_dim1] != 0.) {
			
 
				+			    i__2 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
			
 
				+					i__ + k * b_dim1];
			
 
				+/* L230: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L240: */
			
 
				+		    }
			
 
				+		    if (nounit) {
			
 
				+			temp = 1. / a[j + j * a_dim1];
			
 
				+			i__1 = *m;
			
 
				+			for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
			
 
				+/* L250: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L260: */
			
 
				+		}
			
 
				+	    }
			
 
				+	} else {
			
 
				+
			
 
				+/*           Form  B := alpha*B*inv( A' ). */
			
 
				+
			
 
				+	    if (upper) {
			
 
				+		for (k = *n; k >= 1; --k) {
			
 
				+		    if (nounit) {
			
 
				+			temp = 1. / a[k + k * a_dim1];
			
 
				+			i__1 = *m;
			
 
				+			for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
			
 
				+/* L270: */
			
 
				+			}
			
 
				+		    }
			
 
				+		    i__1 = k - 1;
			
 
				+		    for (j = 1; j <= i__1; ++j) {
			
 
				+			if (a[j + k * a_dim1] != 0.) {
			
 
				+			    temp = a[j + k * a_dim1];
			
 
				+			    i__2 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+				b[i__ + j * b_dim1] -= temp * b[i__ + k * 
			
 
				+					b_dim1];
			
 
				+/* L280: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L290: */
			
 
				+		    }
			
 
				+		    if (*alpha != 1.) {
			
 
				+			i__1 = *m;
			
 
				+			for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
			
 
				+				    ;
			
 
				+/* L300: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L310: */
			
 
				+		}
			
 
				+	    } else {
			
 
				+		i__1 = *n;
			
 
				+		for (k = 1; k <= i__1; ++k) {
			
 
				+		    if (nounit) {
			
 
				+			temp = 1. / a[k + k * a_dim1];
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
			
 
				+/* L320: */
			
 
				+			}
			
 
				+		    }
			
 
				+		    i__2 = *n;
			
 
				+		    for (j = k + 1; j <= i__2; ++j) {
			
 
				+			if (a[j + k * a_dim1] != 0.) {
			
 
				+			    temp = a[j + k * a_dim1];
			
 
				+			    i__3 = *m;
			
 
				+			    for (i__ = 1; i__ <= i__3; ++i__) {
			
 
				+				b[i__ + j * b_dim1] -= temp * b[i__ + k * 
			
 
				+					b_dim1];
			
 
				+/* L330: */
			
 
				+			    }
			
 
				+			}
			
 
				+/* L340: */
			
 
				+		    }
			
 
				+		    if (*alpha != 1.) {
			
 
				+			i__2 = *m;
			
 
				+			for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
			
 
				+				    ;
			
 
				+/* L350: */
			
 
				+			}
			
 
				+		    }
			
 
				+/* L360: */
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DTRSM . */
			
 
				+
			
 
				+} /* dtrsm_ */
			
--- a/min-dgels/additional/dtrtrs.c
+++ b/min-dgels/additional/dtrtrs.c
@@ -0,0 +1,183 @@
 
				+/* dtrtrs.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static doublereal c_b12 = 1.;
			
 
				+
			
 
				+/* Subroutine */ int dtrtrs_(char *uplo, char *trans, char *diag, integer *n, 
			
 
				+	integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *
			
 
				+	ldb, integer *info)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    extern logical lsame_(char *, char *);
			
 
				+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
			
 
				+	    integer *, integer *, doublereal *, doublereal *, integer *, 
			
 
				+	    doublereal *, integer *), xerbla_(
			
 
				+	    char *, integer *);
			
 
				+    logical nounit;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  DTRTRS solves a triangular system of the form */
			
 
				+
			
 
				+/*     A * X = B  or  A**T * X = B, */
			
 
				+
			
 
				+/*  where A is a triangular matrix of order N, and B is an N-by-NRHS */
			
 
				+/*  matrix.  A check is made to verify that A is nonsingular. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  UPLO    (input) CHARACTER*1 */
			
 
				+/*          = 'U':  A is upper triangular; */
			
 
				+/*          = 'L':  A is lower triangular. */
			
 
				+
			
 
				+/*  TRANS   (input) CHARACTER*1 */
			
 
				+/*          Specifies the form of the system of equations: */
			
 
				+/*          = 'N':  A * X = B  (No transpose) */
			
 
				+/*          = 'T':  A**T * X = B  (Transpose) */
			
 
				+/*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
			
 
				+
			
 
				+/*  DIAG    (input) CHARACTER*1 */
			
 
				+/*          = 'N':  A is non-unit triangular; */
			
 
				+/*          = 'U':  A is unit triangular. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The order of the matrix A.  N >= 0. */
			
 
				+
			
 
				+/*  NRHS    (input) INTEGER */
			
 
				+/*          The number of right hand sides, i.e., the number of columns */
			
 
				+/*          of the matrix B.  NRHS >= 0. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N */
			
 
				+/*          upper triangular part of the array A contains the upper */
			
 
				+/*          triangular matrix, and the strictly lower triangular part of */
			
 
				+/*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower */
			
 
				+/*          triangular part of the array A contains the lower triangular */
			
 
				+/*          matrix, and the strictly upper triangular part of A is not */
			
 
				+/*          referenced.  If DIAG = 'U', the diagonal elements of A are */
			
 
				+/*          also not referenced and are assumed to be 1. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A.  LDA >= max(1,N). */
			
 
				+
			
 
				+/*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */
			
 
				+/*          On entry, the right hand side matrix B. */
			
 
				+/*          On exit, if INFO = 0, the solution matrix X. */
			
 
				+
			
 
				+/*  LDB     (input) INTEGER */
			
 
				+/*          The leading dimension of the array B.  LDB >= max(1,N). */
			
 
				+
			
 
				+/*  INFO    (output) INTEGER */
			
 
				+/*          = 0:  successful exit */
			
 
				+/*          < 0: if INFO = -i, the i-th argument had an illegal value */
			
 
				+/*          > 0: if INFO = i, the i-th diagonal element of A is zero, */
			
 
				+/*               indicating that the matrix is singular and the solutions */
			
 
				+/*               X have not been computed. */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Subroutines .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test the input parameters. */
			
 
				+
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+    b_dim1 = *ldb;
			
 
				+    b_offset = 1 + b_dim1;
			
 
				+    b -= b_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    *info = 0;
			
 
				+    nounit = lsame_(diag, "N");
			
 
				+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
			
 
				+	*info = -1;
			
 
				+    } else if (! lsame_(trans, "N") && ! lsame_(trans, 
			
 
				+	    "T") && ! lsame_(trans, "C")) {
			
 
				+	*info = -2;
			
 
				+    } else if (! nounit && ! lsame_(diag, "U")) {
			
 
				+	*info = -3;
			
 
				+    } else if (*n < 0) {
			
 
				+	*info = -4;
			
 
				+    } else if (*nrhs < 0) {
			
 
				+	*info = -5;
			
 
				+    } else if (*lda < max(1,*n)) {
			
 
				+	*info = -7;
			
 
				+    } else if (*ldb < max(1,*n)) {
			
 
				+	*info = -9;
			
 
				+    }
			
 
				+    if (*info != 0) {
			
 
				+	i__1 = -(*info);
			
 
				+	xerbla_("DTRTRS", &i__1);
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Quick return if possible */
			
 
				+
			
 
				+    if (*n == 0) {
			
 
				+	return 0;
			
 
				+    }
			
 
				+
			
 
				+/*     Check for singularity. */
			
 
				+
			
 
				+    if (nounit) {
			
 
				+	i__1 = *n;
			
 
				+	for (*info = 1; *info <= i__1; ++(*info)) {
			
 
				+	    if (a[*info + *info * a_dim1] == 0.) {
			
 
				+		return 0;
			
 
				+	    }
			
 
				+/* L10: */
			
 
				+	}
			
 
				+    }
			
 
				+    *info = 0;
			
 
				+
			
 
				+/*     Solve A * x = b  or  A' * x = b. */
			
 
				+
			
 
				+    dtrsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[
			
 
				+	    b_offset], ldb);
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+/*     End of DTRTRS */
			
 
				+
			
 
				+} /* dtrtrs_ */
			
--- a/min-dgels/additional/f2c.h
+++ b/min-dgels/additional/f2c.h
@@ -0,0 +1,223 @@
 
				+/* f2c.h  --  Standard Fortran to C header file */
			
 
				+
			
 
				+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
			
 
				+
			
 
				+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
			
 
				+
			
 
				+#ifndef F2C_INCLUDE
			
 
				+#define F2C_INCLUDE
			
 
				+
			
 
				+typedef long int integer;
			
 
				+typedef unsigned long int uinteger;
			
 
				+typedef char *address;
			
 
				+typedef short int shortint;
			
 
				+typedef float real;
			
 
				+typedef double doublereal;
			
 
				+typedef struct { real r, i; } complex;
			
 
				+typedef struct { doublereal r, i; } doublecomplex;
			
 
				+typedef long int logical;
			
 
				+typedef short int shortlogical;
			
 
				+typedef char logical1;
			
 
				+typedef char integer1;
			
 
				+#ifdef INTEGER_STAR_8	/* Adjust for integer*8. */
			
 
				+typedef long long longint;		/* system-dependent */
			
 
				+typedef unsigned long long ulongint;	/* system-dependent */
			
 
				+#define qbit_clear(a,b)	((a) & ~((ulongint)1 << (b)))
			
 
				+#define qbit_set(a,b)	((a) |  ((ulongint)1 << (b)))
			
 
				+#endif
			
 
				+
			
 
				+#define TRUE_ (1)
			
 
				+#define FALSE_ (0)
			
 
				+
			
 
				+/* Extern is for use with -E */
			
 
				+#ifndef Extern
			
 
				+#define Extern extern
			
 
				+#endif
			
 
				+
			
 
				+/* I/O stuff */
			
 
				+
			
 
				+#ifdef f2c_i2
			
 
				+/* for -i2 */
			
 
				+typedef short flag;
			
 
				+typedef short ftnlen;
			
 
				+typedef short ftnint;
			
 
				+#else
			
 
				+typedef long int flag;
			
 
				+typedef long int ftnlen;
			
 
				+typedef long int ftnint;
			
 
				+#endif
			
 
				+
			
 
				+/*external read, write*/
			
 
				+typedef struct
			
 
				+{	flag cierr;
			
 
				+	ftnint ciunit;
			
 
				+	flag ciend;
			
 
				+	char *cifmt;
			
 
				+	ftnint cirec;
			
 
				+} cilist;
			
 
				+
			
 
				+/*internal read, write*/
			
 
				+typedef struct
			
 
				+{	flag icierr;
			
 
				+	char *iciunit;
			
 
				+	flag iciend;
			
 
				+	char *icifmt;
			
 
				+	ftnint icirlen;
			
 
				+	ftnint icirnum;
			
 
				+} icilist;
			
 
				+
			
 
				+/*open*/
			
 
				+typedef struct
			
 
				+{	flag oerr;
			
 
				+	ftnint ounit;
			
 
				+	char *ofnm;
			
 
				+	ftnlen ofnmlen;
			
 
				+	char *osta;
			
 
				+	char *oacc;
			
 
				+	char *ofm;
			
 
				+	ftnint orl;
			
 
				+	char *oblnk;
			
 
				+} olist;
			
 
				+
			
 
				+/*close*/
			
 
				+typedef struct
			
 
				+{	flag cerr;
			
 
				+	ftnint cunit;
			
 
				+	char *csta;
			
 
				+} cllist;
			
 
				+
			
 
				+/*rewind, backspace, endfile*/
			
 
				+typedef struct
			
 
				+{	flag aerr;
			
 
				+	ftnint aunit;
			
 
				+} alist;
			
 
				+
			
 
				+/* inquire */
			
 
				+typedef struct
			
 
				+{	flag inerr;
			
 
				+	ftnint inunit;
			
 
				+	char *infile;
			
 
				+	ftnlen infilen;
			
 
				+	ftnint	*inex;	/*parameters in standard's order*/
			
 
				+	ftnint	*inopen;
			
 
				+	ftnint	*innum;
			
 
				+	ftnint	*innamed;
			
 
				+	char	*inname;
			
 
				+	ftnlen	innamlen;
			
 
				+	char	*inacc;
			
 
				+	ftnlen	inacclen;
			
 
				+	char	*inseq;
			
 
				+	ftnlen	inseqlen;
			
 
				+	char 	*indir;
			
 
				+	ftnlen	indirlen;
			
 
				+	char	*infmt;
			
 
				+	ftnlen	infmtlen;
			
 
				+	char	*inform;
			
 
				+	ftnint	informlen;
			
 
				+	char	*inunf;
			
 
				+	ftnlen	inunflen;
			
 
				+	ftnint	*inrecl;
			
 
				+	ftnint	*innrec;
			
 
				+	char	*inblank;
			
 
				+	ftnlen	inblanklen;
			
 
				+} inlist;
			
 
				+
			
 
				+#define VOID void
			
 
				+
			
 
				+union Multitype {	/* for multiple entry points */
			
 
				+	integer1 g;
			
 
				+	shortint h;
			
 
				+	integer i;
			
 
				+	/* longint j; */
			
 
				+	real r;
			
 
				+	doublereal d;
			
 
				+	complex c;
			
 
				+	doublecomplex z;
			
 
				+	};
			
 
				+
			
 
				+typedef union Multitype Multitype;
			
 
				+
			
 
				+/*typedef long int Long;*/	/* No longer used; formerly in Namelist */
			
 
				+
			
 
				+struct Vardesc {	/* for Namelist */
			
 
				+	char *name;
			
 
				+	char *addr;
			
 
				+	ftnlen *dims;
			
 
				+	int  type;
			
 
				+	};
			
 
				+typedef struct Vardesc Vardesc;
			
 
				+
			
 
				+struct Namelist {
			
 
				+	char *name;
			
 
				+	Vardesc **vars;
			
 
				+	int nvars;
			
 
				+	};
			
 
				+typedef struct Namelist Namelist;
			
 
				+
			
 
				+#define abs(x) ((x) >= 0 ? (x) : -(x))
			
 
				+#define dabs(x) (doublereal)abs(x)
			
 
				+#define min(a,b) ((a) <= (b) ? (a) : (b))
			
 
				+#define max(a,b) ((a) >= (b) ? (a) : (b))
			
 
				+#define dmin(a,b) (doublereal)min(a,b)
			
 
				+#define dmax(a,b) (doublereal)max(a,b)
			
 
				+#define bit_test(a,b)	((a) >> (b) & 1)
			
 
				+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
			
 
				+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
			
 
				+
			
 
				+/* procedure parameter types for -A and -C++ */
			
 
				+
			
 
				+#define F2C_proc_par_types 1
			
 
				+#ifdef __cplusplus
			
 
				+typedef int /* Unknown procedure type */ (*U_fp)(...);
			
 
				+typedef shortint (*J_fp)(...);
			
 
				+typedef integer (*I_fp)(...);
			
 
				+typedef real (*R_fp)(...);
			
 
				+typedef doublereal (*D_fp)(...), (*E_fp)(...);
			
 
				+typedef /* Complex */ VOID (*C_fp)(...);
			
 
				+typedef /* Double Complex */ VOID (*Z_fp)(...);
			
 
				+typedef logical (*L_fp)(...);
			
 
				+typedef shortlogical (*K_fp)(...);
			
 
				+typedef /* Character */ VOID (*H_fp)(...);
			
 
				+typedef /* Subroutine */ int (*S_fp)(...);
			
 
				+#else
			
 
				+typedef int /* Unknown procedure type */ (*U_fp)();
			
 
				+typedef shortint (*J_fp)();
			
 
				+typedef integer (*I_fp)();
			
 
				+typedef real (*R_fp)();
			
 
				+typedef doublereal (*D_fp)(), (*E_fp)();
			
 
				+typedef /* Complex */ VOID (*C_fp)();
			
 
				+typedef /* Double Complex */ VOID (*Z_fp)();
			
 
				+typedef logical (*L_fp)();
			
 
				+typedef shortlogical (*K_fp)();
			
 
				+typedef /* Character */ VOID (*H_fp)();
			
 
				+typedef /* Subroutine */ int (*S_fp)();
			
 
				+#endif
			
 
				+/* E_fp is for real functions when -R is not specified */
			
 
				+typedef VOID C_f;	/* complex function */
			
 
				+typedef VOID H_f;	/* character function */
			
 
				+typedef VOID Z_f;	/* double complex function */
			
 
				+typedef doublereal E_f;	/* real function with -R not specified */
			
 
				+
			
 
				+/* undef any lower-case symbols that your C compiler predefines, e.g.: */
			
 
				+
			
 
				+#ifndef Skip_f2c_Undefs
			
 
				+#undef cray
			
 
				+#undef gcos
			
 
				+#undef mc68010
			
 
				+#undef mc68020
			
 
				+#undef mips
			
 
				+#undef pdp11
			
 
				+#undef sgi
			
 
				+#undef sparc
			
 
				+#undef sun
			
 
				+#undef sun2
			
 
				+#undef sun3
			
 
				+#undef sun4
			
 
				+#undef u370
			
 
				+#undef u3b
			
 
				+#undef u3b2
			
 
				+#undef u3b5
			
 
				+#undef unix
			
 
				+#undef vax
			
 
				+#endif
			
 
				+#endif
			
--- a/min-dgels/additional/fio.h
+++ b/min-dgels/additional/fio.h
@@ -0,0 +1,141 @@
 
				+#ifndef SYSDEP_H_INCLUDED
			
 
				+#include "sysdep1.h"
			
 
				+#endif
			
 
				+#include "stdio.h"
			
 
				+#include "errno.h"
			
 
				+#ifndef NULL
			
 
				+/* ANSI C */
			
 
				+#include "stddef.h"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef SEEK_SET
			
 
				+#define SEEK_SET 0
			
 
				+#define SEEK_CUR 1
			
 
				+#define SEEK_END 2
			
 
				+#endif
			
 
				+
			
 
				+#ifndef FOPEN
			
 
				+#define FOPEN fopen
			
 
				+#endif
			
 
				+
			
 
				+#ifndef FREOPEN
			
 
				+#define FREOPEN freopen
			
 
				+#endif
			
 
				+
			
 
				+#ifndef FSEEK
			
 
				+#define FSEEK fseek
			
 
				+#endif
			
 
				+
			
 
				+#ifndef FSTAT
			
 
				+#define FSTAT fstat
			
 
				+#endif
			
 
				+
			
 
				+#ifndef FTELL
			
 
				+#define FTELL ftell
			
 
				+#endif
			
 
				+
			
 
				+#ifndef OFF_T
			
 
				+#define OFF_T long
			
 
				+#endif
			
 
				+
			
 
				+#ifndef STAT_ST
			
 
				+#define STAT_ST stat
			
 
				+#endif
			
 
				+
			
 
				+#ifndef STAT
			
 
				+#define STAT stat
			
 
				+#endif
			
 
				+
			
 
				+#ifdef MSDOS
			
 
				+#ifndef NON_UNIX_STDIO
			
 
				+#define NON_UNIX_STDIO
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef UIOLEN_int
			
 
				+typedef int uiolen;
			
 
				+#else
			
 
				+typedef long uiolen;
			
 
				+#endif
			
 
				+
			
 
				+/*units*/
			
 
				+typedef struct
			
 
				+{	FILE *ufd;	/*0=unconnected*/
			
 
				+	char *ufnm;
			
 
				+#ifndef MSDOS
			
 
				+	long uinode;
			
 
				+	int udev;
			
 
				+#endif
			
 
				+	int url;	/*0=sequential*/
			
 
				+	flag useek;	/*true=can backspace, use dir, ...*/
			
 
				+	flag ufmt;
			
 
				+	flag urw;	/* (1 for can read) | (2 for can write) */
			
 
				+	flag ublnk;
			
 
				+	flag uend;
			
 
				+	flag uwrt;	/*last io was write*/
			
 
				+	flag uscrtch;
			
 
				+} unit;
			
 
				+
			
 
				+#undef Void
			
 
				+#ifdef KR_headers
			
 
				+#define Void /*void*/
			
 
				+extern int (*f__getn)();	/* for formatted input */
			
 
				+extern void (*f__putn)();	/* for formatted output */
			
 
				+extern void x_putc();
			
 
				+extern long f__inode();
			
 
				+extern VOID sig_die();
			
 
				+extern int (*f__donewrec)(), t_putc(), x_wSL();
			
 
				+extern int c_sfe(), err__fl(), xrd_SL(), f__putbuf();
			
 
				+#else
			
 
				+#define Void void
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+extern int (*f__getn)(void);	/* for formatted input */
			
 
				+extern void (*f__putn)(int);	/* for formatted output */
			
 
				+extern void x_putc(int);
			
 
				+extern long f__inode(char*,int*);
			
 
				+extern void sig_die(const char*,int);
			
 
				+extern void f__fatal(int, const char*);
			
 
				+extern int t_runc(alist*);
			
 
				+extern int f__nowreading(unit*), f__nowwriting(unit*);
			
 
				+extern int fk_open(int,int,ftnint);
			
 
				+extern int en_fio(void);
			
 
				+extern void f_init(void);
			
 
				+extern int (*f__donewrec)(void), t_putc(int), x_wSL(void);
			
 
				+extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*);
			
 
				+extern int c_sfe(cilist*), z_rnew(void);
			
 
				+extern int err__fl(int,int,const char*);
			
 
				+extern int xrd_SL(void);
			
 
				+extern int f__putbuf(int);
			
 
				+#endif
			
 
				+extern flag f__init;
			
 
				+extern cilist *f__elist;	/*active external io list*/
			
 
				+extern flag f__reading,f__external,f__sequential,f__formatted;
			
 
				+extern int (*f__doend)(Void);
			
 
				+extern FILE *f__cf;	/*current file*/
			
 
				+extern unit *f__curunit;	/*current unit*/
			
 
				+extern unit f__units[];
			
 
				+#define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);}
			
 
				+#define errfl(f,m,s) return err__fl((int)f,m,s)
			
 
				+
			
 
				+/*Table sizes*/
			
 
				+#define MXUNIT 100
			
 
				+
			
 
				+extern int f__recpos;	/*position in current record*/
			
 
				+extern OFF_T f__cursor;	/* offset to move to */
			
 
				+extern OFF_T f__hiwater;	/* so TL doesn't confuse us */
			
 
				+#ifdef __cplusplus
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+#define WRITE	1
			
 
				+#define READ	2
			
 
				+#define SEQ	3
			
 
				+#define DIR	4
			
 
				+#define FMT	5
			
 
				+#define UNF	6
			
 
				+#define EXT	7
			
 
				+#define INT	8
			
 
				+
			
 
				+#define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ)
			
--- a/min-dgels/additional/fmt.c
+++ b/min-dgels/additional/fmt.c
@@ -0,0 +1,530 @@
 
				+#include "f2c.h"
			
 
				+#include "fio.h"
			
 
				+#include "fmt.h"
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+#define skip(s) while(*s==' ') s++
			
 
				+#ifdef interdata
			
 
				+#define SYLMX 300
			
 
				+#endif
			
 
				+#ifdef pdp11
			
 
				+#define SYLMX 300
			
 
				+#endif
			
 
				+#ifdef vax
			
 
				+#define SYLMX 300
			
 
				+#endif
			
 
				+#ifndef SYLMX
			
 
				+#define SYLMX 300
			
 
				+#endif
			
 
				+#define GLITCH '\2'
			
 
				+	/* special quote character for stu */
			
 
				+extern flag f__cblank,f__cplus;	/*blanks in I and compulsory plus*/
			
 
				+static struct syl f__syl[SYLMX];
			
 
				+int f__parenlvl,f__pc,f__revloc;
			
 
				+#ifdef KR_headers
			
 
				+#define Const /*nothing*/
			
 
				+#else
			
 
				+#define Const const
			
 
				+#endif
			
 
				+
			
 
				+ static
			
 
				+#ifdef KR_headers
			
 
				+char *ap_end(s) char *s;
			
 
				+#else
			
 
				+const char *ap_end(const char *s)
			
 
				+#endif
			
 
				+{	char quote;
			
 
				+	quote= *s++;
			
 
				+	for(;*s;s++)
			
 
				+	{	if(*s!=quote) continue;
			
 
				+		if(*++s!=quote) return(s);
			
 
				+	}
			
 
				+	if(f__elist->cierr) {
			
 
				+		errno = 100;
			
 
				+		return(NULL);
			
 
				+	}
			
 
				+	f__fatal(100, "bad string");
			
 
				+	/*NOTREACHED*/ return 0;
			
 
				+}
			
 
				+ static int
			
 
				+#ifdef KR_headers
			
 
				+op_gen(a,b,c,d)
			
 
				+#else
			
 
				+op_gen(int a, int b, int c, int d)
			
 
				+#endif
			
 
				+{	struct syl *p= &f__syl[f__pc];
			
 
				+	if(f__pc>=SYLMX)
			
 
				+	{	fprintf(stderr,"format too complicated:\n");
			
 
				+		sig_die(f__fmtbuf, 1);
			
 
				+	}
			
 
				+	p->op=a;
			
 
				+	p->p1=b;
			
 
				+	p->p2.i[0]=c;
			
 
				+	p->p2.i[1]=d;
			
 
				+	return(f__pc++);
			
 
				+}
			
 
				+#ifdef KR_headers
			
 
				+static char *f_list();
			
 
				+static char *gt_num(s,n,n1) char *s; int *n, n1;
			
 
				+#else
			
 
				+static const char *f_list(const char*);
			
 
				+static const char *gt_num(const char *s, int *n, int n1)
			
 
				+#endif
			
 
				+{	int m=0,f__cnt=0;
			
 
				+	char c;
			
 
				+	for(c= *s;;c = *s)
			
 
				+	{	if(c==' ')
			
 
				+		{	s++;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if(c>'9' || c<'0') break;
			
 
				+		m=10*m+c-'0';
			
 
				+		f__cnt++;
			
 
				+		s++;
			
 
				+	}
			
 
				+	if(f__cnt==0) {
			
 
				+		if (!n1)
			
 
				+			s = 0;
			
 
				+		*n=n1;
			
 
				+		}
			
 
				+	else *n=m;
			
 
				+	return(s);
			
 
				+}
			
 
				+
			
 
				+ static
			
 
				+#ifdef KR_headers
			
 
				+char *f_s(s,curloc) char *s;
			
 
				+#else
			
 
				+const char *f_s(const char *s, int curloc)
			
 
				+#endif
			
 
				+{
			
 
				+	skip(s);
			
 
				+	if(*s++!='(')
			
 
				+	{
			
 
				+		return(NULL);
			
 
				+	}
			
 
				+	if(f__parenlvl++ ==1) f__revloc=curloc;
			
 
				+	if(op_gen(RET1,curloc,0,0)<0 ||
			
 
				+		(s=f_list(s))==NULL)
			
 
				+	{
			
 
				+		return(NULL);
			
 
				+	}
			
 
				+	skip(s);
			
 
				+	return(s);
			
 
				+}
			
 
				+
			
 
				+ static int
			
 
				+#ifdef KR_headers
			
 
				+ne_d(s,p) char *s,**p;
			
 
				+#else
			
 
				+ne_d(const char *s, const char **p)
			
 
				+#endif
			
 
				+{	int n,x,sign=0;
			
 
				+	struct syl *sp;
			
 
				+	switch(*s)
			
 
				+	{
			
 
				+	default:
			
 
				+		return(0);
			
 
				+	case ':': (void) op_gen(COLON,0,0,0); break;
			
 
				+	case '$':
			
 
				+		(void) op_gen(NONL, 0, 0, 0); break;
			
 
				+	case 'B':
			
 
				+	case 'b':
			
 
				+		if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0);
			
 
				+		else (void) op_gen(BN,0,0,0);
			
 
				+		break;
			
 
				+	case 'S':
			
 
				+	case 's':
			
 
				+		if(*(s+1)=='s' || *(s+1) == 'S')
			
 
				+		{	x=SS;
			
 
				+			s++;
			
 
				+		}
			
 
				+		else if(*(s+1)=='p' || *(s+1) == 'P')
			
 
				+		{	x=SP;
			
 
				+			s++;
			
 
				+		}
			
 
				+		else x=S;
			
 
				+		(void) op_gen(x,0,0,0);
			
 
				+		break;
			
 
				+	case '/': (void) op_gen(SLASH,0,0,0); break;
			
 
				+	case '-': sign=1;
			
 
				+	case '+':	s++;	/*OUTRAGEOUS CODING TRICK*/
			
 
				+	case '0': case '1': case '2': case '3': case '4':
			
 
				+	case '5': case '6': case '7': case '8': case '9':
			
 
				+		if (!(s=gt_num(s,&n,0))) {
			
 
				+ bad:			*p = 0;
			
 
				+			return 1;
			
 
				+			}
			
 
				+		switch(*s)
			
 
				+		{
			
 
				+		default:
			
 
				+			return(0);
			
 
				+		case 'P':
			
 
				+		case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break;
			
 
				+		case 'X':
			
 
				+		case 'x': (void) op_gen(X,n,0,0); break;
			
 
				+		case 'H':
			
 
				+		case 'h':
			
 
				+			sp = &f__syl[op_gen(H,n,0,0)];
			
 
				+			sp->p2.s = (char*)s + 1;
			
 
				+			s+=n;
			
 
				+			break;
			
 
				+		}
			
 
				+		break;
			
 
				+	case GLITCH:
			
 
				+	case '"':
			
 
				+	case '\'':
			
 
				+		sp = &f__syl[op_gen(APOS,0,0,0)];
			
 
				+		sp->p2.s = (char*)s;
			
 
				+		if((*p = ap_end(s)) == NULL)
			
 
				+			return(0);
			
 
				+		return(1);
			
 
				+	case 'T':
			
 
				+	case 't':
			
 
				+		if(*(s+1)=='l' || *(s+1) == 'L')
			
 
				+		{	x=TL;
			
 
				+			s++;
			
 
				+		}
			
 
				+		else if(*(s+1)=='r'|| *(s+1) == 'R')
			
 
				+		{	x=TR;
			
 
				+			s++;
			
 
				+		}
			
 
				+		else x=T;
			
 
				+		if (!(s=gt_num(s+1,&n,0)))
			
 
				+			goto bad;
			
 
				+		s--;
			
 
				+		(void) op_gen(x,n,0,0);
			
 
				+		break;
			
 
				+	case 'X':
			
 
				+	case 'x': (void) op_gen(X,1,0,0); break;
			
 
				+	case 'P':
			
 
				+	case 'p': (void) op_gen(P,1,0,0); break;
			
 
				+	}
			
 
				+	s++;
			
 
				+	*p=s;
			
 
				+	return(1);
			
 
				+}
			
 
				+
			
 
				+ static int
			
 
				+#ifdef KR_headers
			
 
				+e_d(s,p) char *s,**p;
			
 
				+#else
			
 
				+e_d(const char *s, const char **p)
			
 
				+#endif
			
 
				+{	int i,im,n,w,d,e,found=0,x=0;
			
 
				+	Const char *sv=s;
			
 
				+	s=gt_num(s,&n,1);
			
 
				+	(void) op_gen(STACK,n,0,0);
			
 
				+	switch(*s++)
			
 
				+	{
			
 
				+	default: break;
			
 
				+	case 'E':
			
 
				+	case 'e':	x=1;
			
 
				+	case 'G':
			
 
				+	case 'g':
			
 
				+		found=1;
			
 
				+		if (!(s=gt_num(s,&w,0))) {
			
 
				+ bad:
			
 
				+			*p = 0;
			
 
				+			return 1;
			
 
				+			}
			
 
				+		if(w==0) break;
			
 
				+		if(*s=='.') {
			
 
				+			if (!(s=gt_num(s+1,&d,0)))
			
 
				+				goto bad;
			
 
				+			}
			
 
				+		else d=0;
			
 
				+		if(*s!='E' && *s != 'e')
			
 
				+			(void) op_gen(x==1?E:G,w,d,0);	/* default is Ew.dE2 */
			
 
				+		else {
			
 
				+			if (!(s=gt_num(s+1,&e,0)))
			
 
				+				goto bad;
			
 
				+			(void) op_gen(x==1?EE:GE,w,d,e);
			
 
				+			}
			
 
				+		break;
			
 
				+	case 'O':
			
 
				+	case 'o':
			
 
				+		i = O;
			
 
				+		im = OM;
			
 
				+		goto finish_I;
			
 
				+	case 'Z':
			
 
				+	case 'z':
			
 
				+		i = Z;
			
 
				+		im = ZM;
			
 
				+		goto finish_I;
			
 
				+	case 'L':
			
 
				+	case 'l':
			
 
				+		found=1;
			
 
				+		if (!(s=gt_num(s,&w,0)))
			
 
				+			goto bad;
			
 
				+		if(w==0) break;
			
 
				+		(void) op_gen(L,w,0,0);
			
 
				+		break;
			
 
				+	case 'A':
			
 
				+	case 'a':
			
 
				+		found=1;
			
 
				+		skip(s);
			
 
				+		if(*s>='0' && *s<='9')
			
 
				+		{	s=gt_num(s,&w,1);
			
 
				+			if(w==0) break;
			
 
				+			(void) op_gen(AW,w,0,0);
			
 
				+			break;
			
 
				+		}
			
 
				+		(void) op_gen(A,0,0,0);
			
 
				+		break;
			
 
				+	case 'F':
			
 
				+	case 'f':
			
 
				+		if (!(s=gt_num(s,&w,0)))
			
 
				+			goto bad;
			
 
				+		found=1;
			
 
				+		if(w==0) break;
			
 
				+		if(*s=='.') {
			
 
				+			if (!(s=gt_num(s+1,&d,0)))
			
 
				+				goto bad;
			
 
				+			}
			
 
				+		else d=0;
			
 
				+		(void) op_gen(F,w,d,0);
			
 
				+		break;
			
 
				+	case 'D':
			
 
				+	case 'd':
			
 
				+		found=1;
			
 
				+		if (!(s=gt_num(s,&w,0)))
			
 
				+			goto bad;
			
 
				+		if(w==0) break;
			
 
				+		if(*s=='.') {
			
 
				+			if (!(s=gt_num(s+1,&d,0)))
			
 
				+				goto bad;
			
 
				+			}
			
 
				+		else d=0;
			
 
				+		(void) op_gen(D,w,d,0);
			
 
				+		break;
			
 
				+	case 'I':
			
 
				+	case 'i':
			
 
				+		i = I;
			
 
				+		im = IM;
			
 
				+ finish_I:
			
 
				+		if (!(s=gt_num(s,&w,0)))
			
 
				+			goto bad;
			
 
				+		found=1;
			
 
				+		if(w==0) break;
			
 
				+		if(*s!='.')
			
 
				+		{	(void) op_gen(i,w,0,0);
			
 
				+			break;
			
 
				+		}
			
 
				+		if (!(s=gt_num(s+1,&d,0)))
			
 
				+			goto bad;
			
 
				+		(void) op_gen(im,w,d,0);
			
 
				+		break;
			
 
				+	}
			
 
				+	if(found==0)
			
 
				+	{	f__pc--; /*unSTACK*/
			
 
				+		*p=sv;
			
 
				+		return(0);
			
 
				+	}
			
 
				+	*p=s;
			
 
				+	return(1);
			
 
				+}
			
 
				+ static
			
 
				+#ifdef KR_headers
			
 
				+char *i_tem(s) char *s;
			
 
				+#else
			
 
				+const char *i_tem(const char *s)
			
 
				+#endif
			
 
				+{	const char *t;
			
 
				+	int n,curloc;
			
 
				+	if(*s==')') return(s);
			
 
				+	if(ne_d(s,&t)) return(t);
			
 
				+	if(e_d(s,&t)) return(t);
			
 
				+	s=gt_num(s,&n,1);
			
 
				+	if((curloc=op_gen(STACK,n,0,0))<0) return(NULL);
			
 
				+	return(f_s(s,curloc));
			
 
				+}
			
 
				+
			
 
				+ static
			
 
				+#ifdef KR_headers
			
 
				+char *f_list(s) char *s;
			
 
				+#else
			
 
				+const char *f_list(const char *s)
			
 
				+#endif
			
 
				+{
			
 
				+	for(;*s!=0;)
			
 
				+	{	skip(s);
			
 
				+		if((s=i_tem(s))==NULL) return(NULL);
			
 
				+		skip(s);
			
 
				+		if(*s==',') s++;
			
 
				+		else if(*s==')')
			
 
				+		{	if(--f__parenlvl==0)
			
 
				+			{
			
 
				+				(void) op_gen(REVERT,f__revloc,0,0);
			
 
				+				return(++s);
			
 
				+			}
			
 
				+			(void) op_gen(GOTO,0,0,0);
			
 
				+			return(++s);
			
 
				+		}
			
 
				+	}
			
 
				+	return(NULL);
			
 
				+}
			
 
				+
			
 
				+ int
			
 
				+#ifdef KR_headers
			
 
				+pars_f(s) char *s;
			
 
				+#else
			
 
				+pars_f(const char *s)
			
 
				+#endif
			
 
				+{
			
 
				+	f__parenlvl=f__revloc=f__pc=0;
			
 
				+	if(f_s(s,0) == NULL)
			
 
				+	{
			
 
				+		return(-1);
			
 
				+	}
			
 
				+	return(0);
			
 
				+}
			
 
				+#define STKSZ 10
			
 
				+int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp;
			
 
				+flag f__workdone, f__nonl;
			
 
				+
			
 
				+ static int
			
 
				+#ifdef KR_headers
			
 
				+type_f(n)
			
 
				+#else
			
 
				+type_f(int n)
			
 
				+#endif
			
 
				+{
			
 
				+	switch(n)
			
 
				+	{
			
 
				+	default:
			
 
				+		return(n);
			
 
				+	case RET1:
			
 
				+		return(RET1);
			
 
				+	case REVERT: return(REVERT);
			
 
				+	case GOTO: return(GOTO);
			
 
				+	case STACK: return(STACK);
			
 
				+	case X:
			
 
				+	case SLASH:
			
 
				+	case APOS: case H:
			
 
				+	case T: case TL: case TR:
			
 
				+		return(NED);
			
 
				+	case F:
			
 
				+	case I:
			
 
				+	case IM:
			
 
				+	case A: case AW:
			
 
				+	case O: case OM:
			
 
				+	case L:
			
 
				+	case E: case EE: case D:
			
 
				+	case G: case GE:
			
 
				+	case Z: case ZM:
			
 
				+		return(ED);
			
 
				+	}
			
 
				+}
			
 
				+#ifdef KR_headers
			
 
				+integer do_fio(number,ptr,len) ftnint *number; ftnlen len; char *ptr;
			
 
				+#else
			
 
				+integer do_fio(ftnint *number, char *ptr, ftnlen len)
			
 
				+#endif
			
 
				+{	struct syl *p;
			
 
				+	int n,i;
			
 
				+	for(i=0;i<*number;i++,ptr+=len)
			
 
				+	{
			
 
				+loop:	switch(type_f((p= &f__syl[f__pc])->op))
			
 
				+	{
			
 
				+	default:
			
 
				+		fprintf(stderr,"unknown code in do_fio: %d\n%s\n",
			
 
				+			p->op,f__fmtbuf);
			
 
				+		err(f__elist->cierr,100,"do_fio");
			
 
				+	case NED:
			
 
				+		if((*f__doned)(p))
			
 
				+		{	f__pc++;
			
 
				+			goto loop;
			
 
				+		}
			
 
				+		f__pc++;
			
 
				+		continue;
			
 
				+	case ED:
			
 
				+		if(f__cnt[f__cp]<=0)
			
 
				+		{	f__cp--;
			
 
				+			f__pc++;
			
 
				+			goto loop;
			
 
				+		}
			
 
				+		if(ptr==NULL)
			
 
				+			return((*f__doend)());
			
 
				+		f__cnt[f__cp]--;
			
 
				+		f__workdone=1;
			
 
				+		if((n=(*f__doed)(p,ptr,len))>0)
			
 
				+			errfl(f__elist->cierr,errno,"fmt");
			
 
				+		if(n<0)
			
 
				+			err(f__elist->ciend,(EOF),"fmt");
			
 
				+		continue;
			
 
				+	case STACK:
			
 
				+		f__cnt[++f__cp]=p->p1;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case RET1:
			
 
				+		f__ret[++f__rp]=p->p1;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case GOTO:
			
 
				+		if(--f__cnt[f__cp]<=0)
			
 
				+		{	f__cp--;
			
 
				+			f__rp--;
			
 
				+			f__pc++;
			
 
				+			goto loop;
			
 
				+		}
			
 
				+		f__pc=1+f__ret[f__rp--];
			
 
				+		goto loop;
			
 
				+	case REVERT:
			
 
				+		f__rp=f__cp=0;
			
 
				+		f__pc = p->p1;
			
 
				+		if(ptr==NULL)
			
 
				+			return((*f__doend)());
			
 
				+		if(!f__workdone) return(0);
			
 
				+		if((n=(*f__dorevert)()) != 0) return(n);
			
 
				+		goto loop;
			
 
				+	case COLON:
			
 
				+		if(ptr==NULL)
			
 
				+			return((*f__doend)());
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case NONL:
			
 
				+		f__nonl = 1;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case S:
			
 
				+	case SS:
			
 
				+		f__cplus=0;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case SP:
			
 
				+		f__cplus = 1;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case P:	f__scale=p->p1;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case BN:
			
 
				+		f__cblank=0;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	case BZ:
			
 
				+		f__cblank=1;
			
 
				+		f__pc++;
			
 
				+		goto loop;
			
 
				+	}
			
 
				+	}
			
 
				+	return(0);
			
 
				+}
			
 
				+
			
 
				+ int
			
 
				+en_fio(Void)
			
 
				+{	ftnint one=1;
			
 
				+	return(do_fio(&one,(char *)NULL,(ftnint)0));
			
 
				+}
			
 
				+
			
 
				+ VOID
			
 
				+fmt_bg(Void)
			
 
				+{
			
 
				+	f__workdone=f__cp=f__rp=f__pc=f__cursor=0;
			
 
				+	f__cnt[0]=f__ret[0]=0;
			
 
				+}
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/min-dgels/additional/fmt.h
+++ b/min-dgels/additional/fmt.h
@@ -0,0 +1,105 @@
 
				+struct syl
			
 
				+{	int op;
			
 
				+	int p1;
			
 
				+	union { int i[2]; char *s;} p2;
			
 
				+	};
			
 
				+#define RET1 1
			
 
				+#define REVERT 2
			
 
				+#define GOTO 3
			
 
				+#define X 4
			
 
				+#define SLASH 5
			
 
				+#define STACK 6
			
 
				+#define I 7
			
 
				+#define ED 8
			
 
				+#define NED 9
			
 
				+#define IM 10
			
 
				+#define APOS 11
			
 
				+#define H 12
			
 
				+#define TL 13
			
 
				+#define TR 14
			
 
				+#define T 15
			
 
				+#define COLON 16
			
 
				+#define S 17
			
 
				+#define SP 18
			
 
				+#define SS 19
			
 
				+#define P 20
			
 
				+#define BN 21
			
 
				+#define BZ 22
			
 
				+#define F 23
			
 
				+#define E 24
			
 
				+#define EE 25
			
 
				+#define D 26
			
 
				+#define G 27
			
 
				+#define GE 28
			
 
				+#define L 29
			
 
				+#define A 30
			
 
				+#define AW 31
			
 
				+#define O 32
			
 
				+#define NONL 33
			
 
				+#define OM 34
			
 
				+#define Z 35
			
 
				+#define ZM 36
			
 
				+typedef union
			
 
				+{	real pf;
			
 
				+	doublereal pd;
			
 
				+} ufloat;
			
 
				+typedef union
			
 
				+{	short is;
			
 
				+#ifndef KR_headers
			
 
				+	signed
			
 
				+#endif
			
 
				+		char ic;
			
 
				+	integer il;
			
 
				+#ifdef Allow_TYQUAD
			
 
				+	longint ili;
			
 
				+#endif
			
 
				+} Uint;
			
 
				+#ifdef KR_headers
			
 
				+extern int (*f__doed)(),(*f__doned)();
			
 
				+extern int (*f__dorevert)();
			
 
				+extern int rd_ed(),rd_ned();
			
 
				+extern int w_ed(),w_ned();
			
 
				+extern int signbit_f2c();
			
 
				+extern char *f__fmtbuf;
			
 
				+#else
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#define Cextern extern "C"
			
 
				+#else
			
 
				+#define Cextern extern
			
 
				+#endif
			
 
				+extern const char *f__fmtbuf;
			
 
				+extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*);
			
 
				+extern int (*f__dorevert)(void);
			
 
				+extern void fmt_bg(void);
			
 
				+extern int pars_f(const char*);
			
 
				+extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*);
			
 
				+extern int signbit_f2c(double*);
			
 
				+extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*);
			
 
				+extern int wrt_E(ufloat*, int, int, int, ftnlen);
			
 
				+extern int wrt_F(ufloat*, int, int, ftnlen);
			
 
				+extern int wrt_L(Uint*, int, ftnlen);
			
 
				+#endif
			
 
				+extern int f__pc,f__parenlvl,f__revloc;
			
 
				+extern flag f__cblank,f__cplus,f__workdone, f__nonl;
			
 
				+extern int f__scale;
			
 
				+#ifdef __cplusplus
			
 
				+	}
			
 
				+#endif
			
 
				+#define GET(x) if((x=(*f__getn)())<0) return(x)
			
 
				+#define VAL(x) (x!='\n'?x:' ')
			
 
				+#define PUT(x) (*f__putn)(x)
			
 
				+
			
 
				+#undef TYQUAD
			
 
				+#ifndef Allow_TYQUAD
			
 
				+#undef longint
			
 
				+#define longint long
			
 
				+#else
			
 
				+#define TYQUAD 14
			
 
				+#endif
			
 
				+
			
 
				+#ifdef KR_headers
			
 
				+extern char *f__icvt();
			
 
				+#else
			
 
				+Cextern char *f__icvt(longint, int*, int*, int);
			
 
				+#endif
			
--- a/min-dgels/additional/ieeeck.c
+++ b/min-dgels/additional/ieeeck.c
@@ -0,0 +1,166 @@
 
				+/* ieeeck.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+integer ieeeck_(integer *ispec, real *zero, real *one)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer ret_val;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, newzro;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  IEEECK is called from the ILAENV to verify that Infinity and */
			
 
				+/*  possibly NaN arithmetic is safe (i.e. will not trap). */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  ISPEC   (input) INTEGER */
			
 
				+/*          Specifies whether to test just for inifinity arithmetic */
			
 
				+/*          or whether to test for infinity and NaN arithmetic. */
			
 
				+/*          = 0: Verify infinity arithmetic only. */
			
 
				+/*          = 1: Verify infinity and NaN arithmetic. */
			
 
				+
			
 
				+/*  ZERO    (input) REAL */
			
 
				+/*          Must contain the value 0.0 */
			
 
				+/*          This is passed to prevent the compiler from optimizing */
			
 
				+/*          away this code. */
			
 
				+
			
 
				+/*  ONE     (input) REAL */
			
 
				+/*          Must contain the value 1.0 */
			
 
				+/*          This is passed to prevent the compiler from optimizing */
			
 
				+/*          away this code. */
			
 
				+
			
 
				+/*  RETURN VALUE:  INTEGER */
			
 
				+/*          = 0:  Arithmetic failed to produce the correct answers */
			
 
				+/*          = 1:  Arithmetic produced the correct answers */
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+    ret_val = 1;
			
 
				+
			
 
				+    posinf = *one / *zero;
			
 
				+    if (posinf <= *one) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    neginf = -(*one) / *zero;
			
 
				+    if (neginf >= *zero) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    negzro = *one / (neginf + *one);
			
 
				+    if (negzro != *zero) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    neginf = *one / negzro;
			
 
				+    if (neginf >= *zero) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    newzro = negzro + *zero;
			
 
				+    if (newzro != *zero) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    posinf = *one / newzro;
			
 
				+    if (posinf <= *one) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    neginf *= posinf;
			
 
				+    if (neginf >= *zero) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    posinf *= posinf;
			
 
				+    if (posinf <= *one) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/*     Return if we were only asked to check infinity arithmetic */
			
 
				+
			
 
				+    if (*ispec == 0) {
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    nan1 = posinf + neginf;
			
 
				+
			
 
				+    nan2 = posinf / neginf;
			
 
				+
			
 
				+    nan3 = posinf / posinf;
			
 
				+
			
 
				+    nan4 = posinf * *zero;
			
 
				+
			
 
				+    nan5 = neginf * negzro;
			
 
				+
			
 
				+    nan6 = nan5 * 0.f;
			
 
				+
			
 
				+    if (nan1 == nan1) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    if (nan2 == nan2) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    if (nan3 == nan3) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    if (nan4 == nan4) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    if (nan5 == nan5) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    if (nan6 == nan6) {
			
 
				+	ret_val = 0;
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+    return ret_val;
			
 
				+} /* ieeeck_ */
			
--- a/min-dgels/additional/iladlc.c
+++ b/min-dgels/additional/iladlc.c
@@ -0,0 +1,88 @@
 
				+/* iladlc.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+integer iladlc_(integer *m, integer *n, doublereal *a, integer *lda)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, ret_val, i__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
			
 
				+
			
 
				+/*  -- April 2009                                                      -- */
			
 
				+
			
 
				+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
			
 
				+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  ILADLC scans A for its last non-zero column. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          The m by n matrix A. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A. LDA >= max(1,M). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Quick test for the common case where one corner is non-zero. */
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*n == 0) {
			
 
				+	ret_val = *n;
			
 
				+    } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) {
			
 
				+	ret_val = *n;
			
 
				+    } else {
			
 
				+/*     Now scan each column from the end, returning with the first non-zero. */
			
 
				+	for (ret_val = *n; ret_val >= 1; --ret_val) {
			
 
				+	    i__1 = *m;
			
 
				+	    for (i__ = 1; i__ <= i__1; ++i__) {
			
 
				+		if (a[i__ + ret_val * a_dim1] != 0.) {
			
 
				+		    return ret_val;
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+    return ret_val;
			
 
				+} /* iladlc_ */
			
--- a/min-dgels/additional/iladlr.c
+++ b/min-dgels/additional/iladlr.c
@@ -0,0 +1,90 @@
 
				+/* iladlr.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+integer iladlr_(integer *m, integer *n, doublereal *a, integer *lda)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer a_dim1, a_offset, ret_val, i__1;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, j;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
			
 
				+
			
 
				+/*  -- April 2009                                                      -- */
			
 
				+
			
 
				+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
			
 
				+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  ILADLR scans A for its last non-zero row. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  M       (input) INTEGER */
			
 
				+/*          The number of rows of the matrix A. */
			
 
				+
			
 
				+/*  N       (input) INTEGER */
			
 
				+/*          The number of columns of the matrix A. */
			
 
				+
			
 
				+/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
			
 
				+/*          The m by n matrix A. */
			
 
				+
			
 
				+/*  LDA     (input) INTEGER */
			
 
				+/*          The leading dimension of the array A. LDA >= max(1,M). */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Quick test for the common case where one corner is non-zero. */
			
 
				+    /* Parameter adjustments */
			
 
				+    a_dim1 = *lda;
			
 
				+    a_offset = 1 + a_dim1;
			
 
				+    a -= a_offset;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    if (*m == 0) {
			
 
				+	ret_val = *m;
			
 
				+    } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) {
			
 
				+	ret_val = *m;
			
 
				+    } else {
			
 
				+/*     Scan up each column tracking the last zero row seen. */
			
 
				+	ret_val = 0;
			
 
				+	i__1 = *n;
			
 
				+	for (j = 1; j <= i__1; ++j) {
			
 
				+	    for (i__ = *m; i__ >= 1; --i__) {
			
 
				+		if (a[i__ + j * a_dim1] != 0.) {
			
 
				+		    break;
			
 
				+		}
			
 
				+	    }
			
 
				+	    ret_val = max(ret_val,i__);
			
 
				+	}
			
 
				+    }
			
 
				+    return ret_val;
			
 
				+} /* iladlr_ */
			
--- a/min-dgels/additional/ilaenv.c
+++ b/min-dgels/additional/ilaenv.c
@@ -0,0 +1,654 @@
 
				+/* ilaenv.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+#include "string.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+static real c_b163 = 0.f;
			
 
				+static real c_b164 = 1.f;
			
 
				+static integer c__0 = 0;
			
 
				+
			
 
				+integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, 
			
 
				+	integer *n2, integer *n3, integer *n4)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer ret_val;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
			
 
				+    integer s_cmp(char *, char *, ftnlen, ftnlen);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__;
			
 
				+    char c1[1], c2[1], c3[1], c4[1];
			
 
				+    integer ic, nb, iz, nx;
			
 
				+    logical cname;
			
 
				+    integer nbmin;
			
 
				+    logical sname;
			
 
				+    extern integer ieeeck_(integer *, real *, real *);
			
 
				+    char subnam[1];
			
 
				+    extern integer iparmq_(integer *, char *, char *, integer *, integer *, 
			
 
				+	    integer *, integer *);
			
 
				+
			
 
				+    ftnlen name_len, opts_len;
			
 
				+
			
 
				+    name_len = strlen (name__);
			
 
				+    opts_len = strlen (opts);
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     January 2007 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  ILAENV is called from the LAPACK routines to choose problem-dependent */
			
 
				+/*  parameters for the local environment.  See ISPEC for a description of */
			
 
				+/*  the parameters. */
			
 
				+
			
 
				+/*  ILAENV returns an INTEGER */
			
 
				+/*  if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
			
 
				+/*  if ILAENV < 0:  if ILAENV = -k, the k-th argument had an illegal value. */
			
 
				+
			
 
				+/*  This version provides a set of parameters which should give good, */
			
 
				+/*  but not optimal, performance on many of the currently available */
			
 
				+/*  computers.  Users are encouraged to modify this subroutine to set */
			
 
				+/*  the tuning parameters for their particular machine using the option */
			
 
				+/*  and problem size information in the arguments. */
			
 
				+
			
 
				+/*  This routine will not function correctly if it is converted to all */
			
 
				+/*  lower case.  Converting it to all upper case is allowed. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  ISPEC   (input) INTEGER */
			
 
				+/*          Specifies the parameter to be returned as the value of */
			
 
				+/*          ILAENV. */
			
 
				+/*          = 1: the optimal blocksize; if this value is 1, an unblocked */
			
 
				+/*               algorithm will give the best performance. */
			
 
				+/*          = 2: the minimum block size for which the block routine */
			
 
				+/*               should be used; if the usable block size is less than */
			
 
				+/*               this value, an unblocked routine should be used. */
			
 
				+/*          = 3: the crossover point (in a block routine, for N less */
			
 
				+/*               than this value, an unblocked routine should be used) */
			
 
				+/*          = 4: the number of shifts, used in the nonsymmetric */
			
 
				+/*               eigenvalue routines (DEPRECATED) */
			
 
				+/*          = 5: the minimum column dimension for blocking to be used; */
			
 
				+/*               rectangular blocks must have dimension at least k by m, */
			
 
				+/*               where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
			
 
				+/*          = 6: the crossover point for the SVD (when reducing an m by n */
			
 
				+/*               matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
			
 
				+/*               this value, a QR factorization is used first to reduce */
			
 
				+/*               the matrix to a triangular form.) */
			
 
				+/*          = 7: the number of processors */
			
 
				+/*          = 8: the crossover point for the multishift QR method */
			
 
				+/*               for nonsymmetric eigenvalue problems (DEPRECATED) */
			
 
				+/*          = 9: maximum size of the subproblems at the bottom of the */
			
 
				+/*               computation tree in the divide-and-conquer algorithm */
			
 
				+/*               (used by xGELSD and xGESDD) */
			
 
				+/*          =10: ieee NaN arithmetic can be trusted not to trap */
			
 
				+/*          =11: infinity arithmetic can be trusted not to trap */
			
 
				+/*          12 <= ISPEC <= 16: */
			
 
				+/*               xHSEQR or one of its subroutines, */
			
 
				+/*               see IPARMQ for detailed explanation */
			
 
				+
			
 
				+/*  NAME    (input) CHARACTER*(*) */
			
 
				+/*          The name of the calling subroutine, in either upper case or */
			
 
				+/*          lower case. */
			
 
				+
			
 
				+/*  OPTS    (input) CHARACTER*(*) */
			
 
				+/*          The character options to the subroutine NAME, concatenated */
			
 
				+/*          into a single character string.  For example, UPLO = 'U', */
			
 
				+/*          TRANS = 'T', and DIAG = 'N' for a triangular routine would */
			
 
				+/*          be specified as OPTS = 'UTN'. */
			
 
				+
			
 
				+/*  N1      (input) INTEGER */
			
 
				+/*  N2      (input) INTEGER */
			
 
				+/*  N3      (input) INTEGER */
			
 
				+/*  N4      (input) INTEGER */
			
 
				+/*          Problem dimensions for the subroutine NAME; these may not all */
			
 
				+/*          be required. */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*  The following conventions have been used when calling ILAENV from the */
			
 
				+/*  LAPACK routines: */
			
 
				+/*  1)  OPTS is a concatenation of all of the character options to */
			
 
				+/*      subroutine NAME, in the same order that they appear in the */
			
 
				+/*      argument list for NAME, even if they are not used in determining */
			
 
				+/*      the value of the parameter specified by ISPEC. */
			
 
				+/*  2)  The problem dimensions N1, N2, N3, N4 are specified in the order */
			
 
				+/*      that they appear in the argument list for NAME.  N1 is used */
			
 
				+/*      first, N2 second, and so on, and unused problem dimensions are */
			
 
				+/*      passed a value of -1. */
			
 
				+/*  3)  The parameter value returned by ILAENV is checked for validity in */
			
 
				+/*      the calling subroutine.  For example, ILAENV is used to retrieve */
			
 
				+/*      the optimal blocksize for STRTRI as follows: */
			
 
				+
			
 
				+/*      NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
			
 
				+/*      IF( NB.LE.1 ) NB = MAX( 1, N ) */
			
 
				+
			
 
				+/*  ===================================================================== */
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. External Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    switch (*ispec) {
			
 
				+	case 1:  goto L10;
			
 
				+	case 2:  goto L10;
			
 
				+	case 3:  goto L10;
			
 
				+	case 4:  goto L80;
			
 
				+	case 5:  goto L90;
			
 
				+	case 6:  goto L100;
			
 
				+	case 7:  goto L110;
			
 
				+	case 8:  goto L120;
			
 
				+	case 9:  goto L130;
			
 
				+	case 10:  goto L140;
			
 
				+	case 11:  goto L150;
			
 
				+	case 12:  goto L160;
			
 
				+	case 13:  goto L160;
			
 
				+	case 14:  goto L160;
			
 
				+	case 15:  goto L160;
			
 
				+	case 16:  goto L160;
			
 
				+    }
			
 
				+
			
 
				+/*     Invalid value for ISPEC */
			
 
				+
			
 
				+    ret_val = -1;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L10:
			
 
				+
			
 
				+/*     Convert NAME to upper case if the first character is lower case. */
			
 
				+
			
 
				+    ret_val = 1;
			
 
				+    s_copy(subnam, name__, (ftnlen)1, name_len);
			
 
				+    ic = *(unsigned char *)subnam;
			
 
				+    iz = 'Z';
			
 
				+    if (iz == 90 || iz == 122) {
			
 
				+
			
 
				+/*        ASCII character set */
			
 
				+
			
 
				+	if (ic >= 97 && ic <= 122) {
			
 
				+	    *(unsigned char *)subnam = (char) (ic - 32);
			
 
				+	    for (i__ = 2; i__ <= 6; ++i__) {
			
 
				+		ic = *(unsigned char *)&subnam[i__ - 1];
			
 
				+		if (ic >= 97 && ic <= 122) {
			
 
				+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
			
 
				+		}
			
 
				+/* L20: */
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+    } else if (iz == 233 || iz == 169) {
			
 
				+
			
 
				+/*        EBCDIC character set */
			
 
				+
			
 
				+	if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && 
			
 
				+		ic <= 169) {
			
 
				+	    *(unsigned char *)subnam = (char) (ic + 64);
			
 
				+	    for (i__ = 2; i__ <= 6; ++i__) {
			
 
				+		ic = *(unsigned char *)&subnam[i__ - 1];
			
 
				+		if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 
			
 
				+			162 && ic <= 169) {
			
 
				+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
			
 
				+		}
			
 
				+/* L30: */
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+    } else if (iz == 218 || iz == 250) {
			
 
				+
			
 
				+/*        Prime machines:  ASCII+128 */
			
 
				+
			
 
				+	if (ic >= 225 && ic <= 250) {
			
 
				+	    *(unsigned char *)subnam = (char) (ic - 32);
			
 
				+	    for (i__ = 2; i__ <= 6; ++i__) {
			
 
				+		ic = *(unsigned char *)&subnam[i__ - 1];
			
 
				+		if (ic >= 225 && ic <= 250) {
			
 
				+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
			
 
				+		}
			
 
				+/* L40: */
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    *(unsigned char *)c1 = *(unsigned char *)subnam;
			
 
				+    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
			
 
				+    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
			
 
				+    if (! (cname || sname)) {
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+    s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2);
			
 
				+    s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3);
			
 
				+    s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2);
			
 
				+
			
 
				+    switch (*ispec) {
			
 
				+	case 1:  goto L50;
			
 
				+	case 2:  goto L60;
			
 
				+	case 3:  goto L70;
			
 
				+    }
			
 
				+
			
 
				+L50:
			
 
				+
			
 
				+/*     ISPEC = 1:  block size */
			
 
				+
			
 
				+/*     In these examples, separate code is provided for setting NB for */
			
 
				+/*     real and complex.  We assume that NB will take the same value in */
			
 
				+/*     single or double precision. */
			
 
				+
			
 
				+    nb = 1;
			
 
				+
			
 
				+    if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 64;
			
 
				+	    } else {
			
 
				+		nb = 64;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, 
			
 
				+		"RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
			
 
				+		1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) 
			
 
				+		== 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 32;
			
 
				+	    } else {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 32;
			
 
				+	    } else {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 32;
			
 
				+	    } else {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 64;
			
 
				+	    } else {
			
 
				+		nb = 64;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 64;
			
 
				+	    } else {
			
 
				+		nb = 64;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 64;
			
 
				+	    } else {
			
 
				+		nb = 64;
			
 
				+	    }
			
 
				+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nb = 32;
			
 
				+	} else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nb = 64;
			
 
				+	}
			
 
				+    } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nb = 64;
			
 
				+	} else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nb = 32;
			
 
				+	} else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nb = 64;
			
 
				+	}
			
 
				+    } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (*(unsigned char *)c3 == 'G') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	} else if (*(unsigned char *)c3 == 'M') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (*(unsigned char *)c3 == 'G') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	} else if (*(unsigned char *)c3 == 'M') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nb = 32;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		if (*n4 <= 64) {
			
 
				+		    nb = 1;
			
 
				+		} else {
			
 
				+		    nb = 32;
			
 
				+		}
			
 
				+	    } else {
			
 
				+		if (*n4 <= 64) {
			
 
				+		    nb = 1;
			
 
				+		} else {
			
 
				+		    nb = 32;
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		if (*n2 <= 64) {
			
 
				+		    nb = 1;
			
 
				+		} else {
			
 
				+		    nb = 32;
			
 
				+		}
			
 
				+	    } else {
			
 
				+		if (*n2 <= 64) {
			
 
				+		    nb = 1;
			
 
				+		} else {
			
 
				+		    nb = 32;
			
 
				+		}
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 64;
			
 
				+	    } else {
			
 
				+		nb = 64;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nb = 64;
			
 
				+	    } else {
			
 
				+		nb = 64;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nb = 1;
			
 
				+	}
			
 
				+    }
			
 
				+    ret_val = nb;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L60:
			
 
				+
			
 
				+/*     ISPEC = 2:  minimum block size */
			
 
				+
			
 
				+    nbmin = 2;
			
 
				+    if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
			
 
				+		ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, (
			
 
				+		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0)
			
 
				+		 {
			
 
				+	    if (sname) {
			
 
				+		nbmin = 2;
			
 
				+	    } else {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nbmin = 2;
			
 
				+	    } else {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nbmin = 2;
			
 
				+	    } else {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nbmin = 2;
			
 
				+	    } else {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nbmin = 8;
			
 
				+	    } else {
			
 
				+		nbmin = 8;
			
 
				+	    }
			
 
				+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nbmin = 2;
			
 
				+	}
			
 
				+    } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nbmin = 2;
			
 
				+	}
			
 
				+    } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (*(unsigned char *)c3 == 'G') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	} else if (*(unsigned char *)c3 == 'M') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (*(unsigned char *)c3 == 'G') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	} else if (*(unsigned char *)c3 == 'M') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nbmin = 2;
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+    ret_val = nbmin;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L70:
			
 
				+
			
 
				+/*     ISPEC = 3:  crossover point */
			
 
				+
			
 
				+    nx = 0;
			
 
				+    if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
			
 
				+		ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, (
			
 
				+		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0)
			
 
				+		 {
			
 
				+	    if (sname) {
			
 
				+		nx = 128;
			
 
				+	    } else {
			
 
				+		nx = 128;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nx = 128;
			
 
				+	    } else {
			
 
				+		nx = 128;
			
 
				+	    }
			
 
				+	} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    if (sname) {
			
 
				+		nx = 128;
			
 
				+	    } else {
			
 
				+		nx = 128;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nx = 32;
			
 
				+	}
			
 
				+    } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
			
 
				+	    nx = 32;
			
 
				+	}
			
 
				+    } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (*(unsigned char *)c3 == 'G') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nx = 128;
			
 
				+	    }
			
 
				+	}
			
 
				+    } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
			
 
				+	if (*(unsigned char *)c3 == 'G') {
			
 
				+	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
			
 
				+		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
			
 
				+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
			
 
				+		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
			
 
				+		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
			
 
				+		    ftnlen)1, (ftnlen)2) == 0) {
			
 
				+		nx = 128;
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+    ret_val = nx;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L80:
			
 
				+
			
 
				+/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
			
 
				+
			
 
				+    ret_val = 6;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L90:
			
 
				+
			
 
				+/*     ISPEC = 5:  minimum column dimension (not used) */
			
 
				+
			
 
				+    ret_val = 2;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L100:
			
 
				+
			
 
				+/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
			
 
				+
			
 
				+    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
			
 
				+    return ret_val;
			
 
				+
			
 
				+L110:
			
 
				+
			
 
				+/*     ISPEC = 7:  number of processors (not used) */
			
 
				+
			
 
				+    ret_val = 1;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L120:
			
 
				+
			
 
				+/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
			
 
				+
			
 
				+    ret_val = 50;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L130:
			
 
				+
			
 
				+/*     ISPEC = 9:  maximum size of the subproblems at the bottom of the */
			
 
				+/*                 computation tree in the divide-and-conquer algorithm */
			
 
				+/*                 (used by xGELSD and xGESDD) */
			
 
				+
			
 
				+    ret_val = 25;
			
 
				+    return ret_val;
			
 
				+
			
 
				+L140:
			
 
				+
			
 
				+/*     ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
			
 
				+
			
 
				+/*     ILAENV = 0 */
			
 
				+    ret_val = 1;
			
 
				+    if (ret_val == 1) {
			
 
				+	ret_val = ieeeck_(&c__1, &c_b163, &c_b164);
			
 
				+    }
			
 
				+    return ret_val;
			
 
				+
			
 
				+L150:
			
 
				+
			
 
				+/*     ISPEC = 11: infinity arithmetic can be trusted not to trap */
			
 
				+
			
 
				+/*     ILAENV = 0 */
			
 
				+    ret_val = 1;
			
 
				+    if (ret_val == 1) {
			
 
				+	ret_val = ieeeck_(&c__0, &c_b163, &c_b164);
			
 
				+    }
			
 
				+    return ret_val;
			
 
				+
			
 
				+L160:
			
 
				+
			
 
				+/*     12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
			
 
				+
			
 
				+    ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4)
			
 
				+	    ;
			
 
				+    return ret_val;
			
 
				+
			
 
				+/*     End of ILAENV */
			
 
				+
			
 
				+} /* ilaenv_ */
			
--- a/min-dgels/additional/iparmq.c
+++ b/min-dgels/additional/iparmq.c
@@ -0,0 +1,282 @@
 
				+/* iparmq.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+integer iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer 
			
 
				+	*ilo, integer *ihi, integer *lwork)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer ret_val, i__1, i__2;
			
 
				+    real r__1;
			
 
				+
			
 
				+    /* Builtin functions */
			
 
				+    double log(doublereal);
			
 
				+    integer i_nint(real *);
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer nh, ns;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*       This program sets problem and machine dependent parameters */
			
 
				+/*       useful for xHSEQR and its subroutines. It is called whenever */
			
 
				+/*       ILAENV is called with 12 <= ISPEC <= 16 */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*       ISPEC  (input) integer scalar */
			
 
				+/*              ISPEC specifies which tunable parameter IPARMQ should */
			
 
				+/*              return. */
			
 
				+
			
 
				+/*              ISPEC=12: (INMIN)  Matrices of order nmin or less */
			
 
				+/*                        are sent directly to xLAHQR, the implicit */
			
 
				+/*                        double shift QR algorithm.  NMIN must be */
			
 
				+/*                        at least 11. */
			
 
				+
			
 
				+/*              ISPEC=13: (INWIN)  Size of the deflation window. */
			
 
				+/*                        This is best set greater than or equal to */
			
 
				+/*                        the number of simultaneous shifts NS. */
			
 
				+/*                        Larger matrices benefit from larger deflation */
			
 
				+/*                        windows. */
			
 
				+
			
 
				+/*              ISPEC=14: (INIBL) Determines when to stop nibbling and */
			
 
				+/*                        invest in an (expensive) multi-shift QR sweep. */
			
 
				+/*                        If the aggressive early deflation subroutine */
			
 
				+/*                        finds LD converged eigenvalues from an order */
			
 
				+/*                        NW deflation window and LD.GT.(NW*NIBBLE)/100, */
			
 
				+/*                        then the next QR sweep is skipped and early */
			
 
				+/*                        deflation is applied immediately to the */
			
 
				+/*                        remaining active diagonal block.  Setting */
			
 
				+/*                        IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
			
 
				+/*                        multi-shift QR sweep whenever early deflation */
			
 
				+/*                        finds a converged eigenvalue.  Setting */
			
 
				+/*                        IPARMQ(ISPEC=14) greater than or equal to 100 */
			
 
				+/*                        prevents TTQRE from skipping a multi-shift */
			
 
				+/*                        QR sweep. */
			
 
				+
			
 
				+/*              ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
			
 
				+/*                        a multi-shift QR iteration. */
			
 
				+
			
 
				+/*              ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
			
 
				+/*                        following meanings. */
			
 
				+/*                        0:  During the multi-shift QR sweep, */
			
 
				+/*                            xLAQR5 does not accumulate reflections and */
			
 
				+/*                            does not use matrix-matrix multiply to */
			
 
				+/*                            update the far-from-diagonal matrix */
			
 
				+/*                            entries. */
			
 
				+/*                        1:  During the multi-shift QR sweep, */
			
 
				+/*                            xLAQR5 and/or xLAQRaccumulates reflections and uses */
			
 
				+/*                            matrix-matrix multiply to update the */
			
 
				+/*                            far-from-diagonal matrix entries. */
			
 
				+/*                        2:  During the multi-shift QR sweep. */
			
 
				+/*                            xLAQR5 accumulates reflections and takes */
			
 
				+/*                            advantage of 2-by-2 block structure during */
			
 
				+/*                            matrix-matrix multiplies. */
			
 
				+/*                        (If xTRMM is slower than xGEMM, then */
			
 
				+/*                        IPARMQ(ISPEC=16)=1 may be more efficient than */
			
 
				+/*                        IPARMQ(ISPEC=16)=2 despite the greater level of */
			
 
				+/*                        arithmetic work implied by the latter choice.) */
			
 
				+
			
 
				+/*       NAME    (input) character string */
			
 
				+/*               Name of the calling subroutine */
			
 
				+
			
 
				+/*       OPTS    (input) character string */
			
 
				+/*               This is a concatenation of the string arguments to */
			
 
				+/*               TTQRE. */
			
 
				+
			
 
				+/*       N       (input) integer scalar */
			
 
				+/*               N is the order of the Hessenberg matrix H. */
			
 
				+
			
 
				+/*       ILO     (input) INTEGER */
			
 
				+/*       IHI     (input) INTEGER */
			
 
				+/*               It is assumed that H is already upper triangular */
			
 
				+/*               in rows and columns 1:ILO-1 and IHI+1:N. */
			
 
				+
			
 
				+/*       LWORK   (input) integer scalar */
			
 
				+/*               The amount of workspace available. */
			
 
				+
			
 
				+/*  Further Details */
			
 
				+/*  =============== */
			
 
				+
			
 
				+/*       Little is known about how best to choose these parameters. */
			
 
				+/*       It is possible to use different values of the parameters */
			
 
				+/*       for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
			
 
				+
			
 
				+/*       It is probably best to choose different parameters for */
			
 
				+/*       different matrices and different parameters at different */
			
 
				+/*       times during the iteration, but this has not been */
			
 
				+/*       implemented --- yet. */
			
 
				+
			
 
				+
			
 
				+/*       The best choices of most of the parameters depend */
			
 
				+/*       in an ill-understood way on the relative execution */
			
 
				+/*       rate of xLAQR3 and xLAQR5 and on the nature of each */
			
 
				+/*       particular eigenvalue problem.  Experiment may be the */
			
 
				+/*       only practical way to determine which choices are most */
			
 
				+/*       effective. */
			
 
				+
			
 
				+/*       Following is a list of default values supplied by IPARMQ. */
			
 
				+/*       These defaults may be adjusted in order to attain better */
			
 
				+/*       performance in any particular computational environment. */
			
 
				+
			
 
				+/*       IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
			
 
				+/*                        Default: 75. (Must be at least 11.) */
			
 
				+
			
 
				+/*       IPARMQ(ISPEC=13) Recommended deflation window size. */
			
 
				+/*                        This depends on ILO, IHI and NS, the */
			
 
				+/*                        number of simultaneous shifts returned */
			
 
				+/*                        by IPARMQ(ISPEC=15).  The default for */
			
 
				+/*                        (IHI-ILO+1).LE.500 is NS.  The default */
			
 
				+/*                        for (IHI-ILO+1).GT.500 is 3*NS/2. */
			
 
				+
			
 
				+/*       IPARMQ(ISPEC=14) Nibble crossover point.  Default: 14. */
			
 
				+
			
 
				+/*       IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
			
 
				+/*                        a multi-shift QR iteration. */
			
 
				+
			
 
				+/*                        If IHI-ILO+1 is ... */
			
 
				+
			
 
				+/*                        greater than      ...but less    ... the */
			
 
				+/*                        or equal to ...      than        default is */
			
 
				+
			
 
				+/*                                0               30       NS =   2+ */
			
 
				+/*                               30               60       NS =   4+ */
			
 
				+/*                               60              150       NS =  10 */
			
 
				+/*                              150              590       NS =  ** */
			
 
				+/*                              590             3000       NS =  64 */
			
 
				+/*                             3000             6000       NS = 128 */
			
 
				+/*                             6000             infinity   NS = 256 */
			
 
				+
			
 
				+/*                    (+)  By default matrices of this order are */
			
 
				+/*                         passed to the implicit double shift routine */
			
 
				+/*                         xLAHQR.  See IPARMQ(ISPEC=12) above.   These */
			
 
				+/*                         values of NS are used only in case of a rare */
			
 
				+/*                         xLAHQR failure. */
			
 
				+
			
 
				+/*                    (**) The asterisks (**) indicate an ad-hoc */
			
 
				+/*                         function increasing from 10 to 64. */
			
 
				+
			
 
				+/*       IPARMQ(ISPEC=16) Select structured matrix multiply. */
			
 
				+/*                        (See ISPEC=16 above for details.) */
			
 
				+/*                        Default: 3. */
			
 
				+
			
 
				+/*     ================================================================ */
			
 
				+/*     .. Parameters .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+    if (*ispec == 15 || *ispec == 13 || *ispec == 16) {
			
 
				+
			
 
				+/*        ==== Set the number simultaneous shifts ==== */
			
 
				+
			
 
				+	nh = *ihi - *ilo + 1;
			
 
				+	ns = 2;
			
 
				+	if (nh >= 30) {
			
 
				+	    ns = 4;
			
 
				+	}
			
 
				+	if (nh >= 60) {
			
 
				+	    ns = 10;
			
 
				+	}
			
 
				+	if (nh >= 150) {
			
 
				+/* Computing MAX */
			
 
				+	    r__1 = log((real) nh) / log(2.f);
			
 
				+	    i__1 = 10, i__2 = nh / i_nint(&r__1);
			
 
				+	    ns = max(i__1,i__2);
			
 
				+	}
			
 
				+	if (nh >= 590) {
			
 
				+	    ns = 64;
			
 
				+	}
			
 
				+	if (nh >= 3000) {
			
 
				+	    ns = 128;
			
 
				+	}
			
 
				+	if (nh >= 6000) {
			
 
				+	    ns = 256;
			
 
				+	}
			
 
				+/* Computing MAX */
			
 
				+	i__1 = 2, i__2 = ns - ns % 2;
			
 
				+	ns = max(i__1,i__2);
			
 
				+    }
			
 
				+
			
 
				+    if (*ispec == 12) {
			
 
				+
			
 
				+
			
 
				+/*        ===== Matrices of order smaller than NMIN get sent */
			
 
				+/*        .     to xLAHQR, the classic double shift algorithm. */
			
 
				+/*        .     This must be at least 11. ==== */
			
 
				+
			
 
				+	ret_val = 75;
			
 
				+
			
 
				+    } else if (*ispec == 14) {
			
 
				+
			
 
				+/*        ==== INIBL: skip a multi-shift qr iteration and */
			
 
				+/*        .    whenever aggressive early deflation finds */
			
 
				+/*        .    at least (NIBBLE*(window size)/100) deflations. ==== */
			
 
				+
			
 
				+	ret_val = 14;
			
 
				+
			
 
				+    } else if (*ispec == 15) {
			
 
				+
			
 
				+/*        ==== NSHFTS: The number of simultaneous shifts ===== */
			
 
				+
			
 
				+	ret_val = ns;
			
 
				+
			
 
				+    } else if (*ispec == 13) {
			
 
				+
			
 
				+/*        ==== NW: deflation window size.  ==== */
			
 
				+
			
 
				+	if (nh <= 500) {
			
 
				+	    ret_val = ns;
			
 
				+	} else {
			
 
				+	    ret_val = ns * 3 / 2;
			
 
				+	}
			
 
				+
			
 
				+    } else if (*ispec == 16) {
			
 
				+
			
 
				+/*        ==== IACC22: Whether to accumulate reflections */
			
 
				+/*        .     before updating the far-from-diagonal elements */
			
 
				+/*        .     and whether to use 2-by-2 block structure while */
			
 
				+/*        .     doing it.  A small amount of work could be saved */
			
 
				+/*        .     by making this choice dependent also upon the */
			
 
				+/*        .     NH=IHI-ILO+1. */
			
 
				+
			
 
				+	ret_val = 0;
			
 
				+	if (ns >= 14) {
			
 
				+	    ret_val = 1;
			
 
				+	}
			
 
				+	if (ns >= 14) {
			
 
				+	    ret_val = 2;
			
 
				+	}
			
 
				+
			
 
				+    } else {
			
 
				+/*        ===== invalid value of ispec ===== */
			
 
				+	ret_val = -1;
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+/*     ==== End of IPARMQ ==== */
			
 
				+
			
 
				+    return ret_val;
			
 
				+} /* iparmq_ */
			
--- a/min-dgels/additional/lsame.c
+++ b/min-dgels/additional/lsame.c
@@ -0,0 +1,117 @@
 
				+/* lsame.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+logical lsame_(char *ca, char *cb)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    logical ret_val;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer inta, intb, zcode;
			
 
				+
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  LSAME returns .TRUE. if CA is the same letter as CB regardless of */
			
 
				+/*  case. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  CA      (input) CHARACTER*1 */
			
 
				+/*  CB      (input) CHARACTER*1 */
			
 
				+/*          CA and CB specify the single characters to be compared. */
			
 
				+
			
 
				+/* ===================================================================== */
			
 
				+
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+/*     Test if the characters are equal */
			
 
				+
			
 
				+    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
			
 
				+    if (ret_val) {
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+
			
 
				+/*     Now test for equivalence if both characters are alphabetic. */
			
 
				+
			
 
				+    zcode = 'Z';
			
 
				+
			
 
				+/*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime */
			
 
				+/*     machines, on which ICHAR returns a value with bit 8 set. */
			
 
				+/*     ICHAR('A') on Prime machines returns 193 which is the same as */
			
 
				+/*     ICHAR('A') on an EBCDIC machine. */
			
 
				+
			
 
				+    inta = *(unsigned char *)ca;
			
 
				+    intb = *(unsigned char *)cb;
			
 
				+
			
 
				+    if (zcode == 90 || zcode == 122) {
			
 
				+
			
 
				+/*        ASCII is assumed - ZCODE is the ASCII code of either lower or */
			
 
				+/*        upper case 'Z'. */
			
 
				+
			
 
				+	if (inta >= 97 && inta <= 122) {
			
 
				+	    inta += -32;
			
 
				+	}
			
 
				+	if (intb >= 97 && intb <= 122) {
			
 
				+	    intb += -32;
			
 
				+	}
			
 
				+
			
 
				+    } else if (zcode == 233 || zcode == 169) {
			
 
				+
			
 
				+/*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
			
 
				+/*        upper case 'Z'. */
			
 
				+
			
 
				+	if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta 
			
 
				+		>= 162 && inta <= 169) {
			
 
				+	    inta += 64;
			
 
				+	}
			
 
				+	if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb 
			
 
				+		>= 162 && intb <= 169) {
			
 
				+	    intb += 64;
			
 
				+	}
			
 
				+
			
 
				+    } else if (zcode == 218 || zcode == 250) {
			
 
				+
			
 
				+/*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
			
 
				+/*        plus 128 of either lower or upper case 'Z'. */
			
 
				+
			
 
				+	if (inta >= 225 && inta <= 250) {
			
 
				+	    inta += -32;
			
 
				+	}
			
 
				+	if (intb >= 225 && intb <= 250) {
			
 
				+	    intb += -32;
			
 
				+	}
			
 
				+    }
			
 
				+    ret_val = inta == intb;
			
 
				+
			
 
				+/*     RETURN */
			
 
				+
			
 
				+/*     End of LSAME */
			
 
				+
			
 
				+    return ret_val;
			
 
				+} /* lsame_ */
			
--- a/min-dgels/additional/mindgels.h
+++ b/min-dgels/additional/mindgels.h
@@ -0,0 +1,8 @@
 
				+#ifndef DGELS_H 
			
 
				+#define DGELS_H
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+
			
 
				+int dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info);
			
 
				+
			
 
				+#endif
			
--- a/min-dgels/additional/pow_di.c
+++ b/min-dgels/additional/pow_di.c
@@ -0,0 +1,41 @@
 
				+#include "f2c.h"
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#ifdef KR_headers
			
 
				+double pow_di(ap, bp) doublereal *ap; integer *bp;
			
 
				+#else
			
 
				+double pow_di(doublereal *ap, integer *bp)
			
 
				+#endif
			
 
				+{
			
 
				+double pow, x;
			
 
				+integer n;
			
 
				+unsigned long u;
			
 
				+
			
 
				+pow = 1;
			
 
				+x = *ap;
			
 
				+n = *bp;
			
 
				+
			
 
				+if(n != 0)
			
 
				+	{
			
 
				+	if(n < 0)
			
 
				+		{
			
 
				+		n = -n;
			
 
				+		x = 1/x;
			
 
				+		}
			
 
				+	for(u = n; ; )
			
 
				+		{
			
 
				+		if(u & 01)
			
 
				+			pow *= x;
			
 
				+		if(u >>= 1)
			
 
				+			x *= x;
			
 
				+		else
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+return(pow);
			
 
				+}
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/min-dgels/additional/s_cat.c
+++ b/min-dgels/additional/s_cat.c
@@ -0,0 +1,86 @@
 
				+/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the
			
 
				+ * target of a concatenation to appear on its right-hand side (contrary
			
 
				+ * to the Fortran 77 Standard, but in accordance with Fortran 90).
			
 
				+ */
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#ifndef NO_OVERWRITE
			
 
				+#include "stdio.h"
			
 
				+#undef abs
			
 
				+#ifdef KR_headers
			
 
				+ extern char *F77_aloc();
			
 
				+ extern void free();
			
 
				+ extern void exit_();
			
 
				+#else
			
 
				+#undef min
			
 
				+#undef max
			
 
				+#include "stdlib.h"
			
 
				+extern
			
 
				+#ifdef __cplusplus
			
 
				+	"C"
			
 
				+#endif
			
 
				+	char *F77_aloc(ftnlen, const char*);
			
 
				+#endif
			
 
				+#include "string.h"
			
 
				+#endif /* NO_OVERWRITE */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+ VOID
			
 
				+#ifdef KR_headers
			
 
				+s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnint rnp[], *np; ftnlen ll;
			
 
				+#else
			
 
				+s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll)
			
 
				+#endif
			
 
				+{
			
 
				+	ftnlen i, nc;
			
 
				+	char *rp;
			
 
				+	ftnlen n = *np;
			
 
				+#ifndef NO_OVERWRITE
			
 
				+	ftnlen L, m;
			
 
				+	char *lp0, *lp1;
			
 
				+
			
 
				+	lp0 = 0;
			
 
				+	lp1 = lp;
			
 
				+	L = ll;
			
 
				+	i = 0;
			
 
				+	while(i < n) {
			
 
				+		rp = rpp[i];
			
 
				+		m = rnp[i++];
			
 
				+		if (rp >= lp1 || rp + m <= lp) {
			
 
				+			if ((L -= m) <= 0) {
			
 
				+				n = i;
			
 
				+				break;
			
 
				+				}
			
 
				+			lp1 += m;
			
 
				+			continue;
			
 
				+			}
			
 
				+		lp0 = lp;
			
 
				+		lp = lp1 = F77_aloc(L = ll, "s_cat");
			
 
				+		break;
			
 
				+		}
			
 
				+	lp1 = lp;
			
 
				+#endif /* NO_OVERWRITE */
			
 
				+	for(i = 0 ; i < n ; ++i) {
			
 
				+		nc = ll;
			
 
				+		if(rnp[i] < nc)
			
 
				+			nc = rnp[i];
			
 
				+		ll -= nc;
			
 
				+		rp = rpp[i];
			
 
				+		while(--nc >= 0)
			
 
				+			*lp++ = *rp++;
			
 
				+		}
			
 
				+	while(--ll >= 0)
			
 
				+		*lp++ = ' ';
			
 
				+#ifndef NO_OVERWRITE
			
 
				+	if (lp0) {
			
 
				+		memcpy(lp0, lp1, L);
			
 
				+		free(lp1);
			
 
				+		}
			
 
				+#endif
			
 
				+	}
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/min-dgels/additional/sysdep1.h
+++ b/min-dgels/additional/sysdep1.h
@@ -0,0 +1,66 @@
 
				+#ifndef SYSDEP_H_INCLUDED
			
 
				+#define SYSDEP_H_INCLUDED
			
 
				+#undef USE_LARGEFILE
			
 
				+#ifndef NO_LONG_LONG
			
 
				+
			
 
				+#ifdef __sun__
			
 
				+#define USE_LARGEFILE
			
 
				+#define OFF_T off64_t
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __linux__
			
 
				+#define USE_LARGEFILE
			
 
				+#define OFF_T __off64_t
			
 
				+#endif
			
 
				+
			
 
				+#ifdef _AIX43
			
 
				+#define _LARGE_FILES
			
 
				+#define _LARGE_FILE_API
			
 
				+#define USE_LARGEFILE
			
 
				+#endif /*_AIX43*/
			
 
				+
			
 
				+#ifdef __hpux
			
 
				+#define _FILE64
			
 
				+#define _LARGEFILE64_SOURCE
			
 
				+#define USE_LARGEFILE
			
 
				+#endif /*__hpux*/
			
 
				+
			
 
				+#ifdef __sgi
			
 
				+#define USE_LARGEFILE
			
 
				+#endif /*__sgi*/
			
 
				+
			
 
				+#ifdef __FreeBSD__
			
 
				+#define OFF_T off_t
			
 
				+#define FSEEK fseeko
			
 
				+#define FTELL ftello
			
 
				+#endif
			
 
				+
			
 
				+#ifdef USE_LARGEFILE
			
 
				+#ifndef OFF_T
			
 
				+#define OFF_T off64_t
			
 
				+#endif
			
 
				+#define _LARGEFILE_SOURCE
			
 
				+#define _LARGEFILE64_SOURCE
			
 
				+#include <sys/types.h>
			
 
				+#include <sys/stat.h>
			
 
				+#define FOPEN fopen64
			
 
				+#define FREOPEN freopen64
			
 
				+#define FSEEK fseeko64
			
 
				+#define FSTAT fstat64
			
 
				+#define FTELL ftello64
			
 
				+#define FTRUNCATE ftruncate64
			
 
				+#define STAT stat64
			
 
				+#define STAT_ST stat64
			
 
				+#endif /*USE_LARGEFILE*/
			
 
				+#endif /*NO_LONG_LONG*/
			
 
				+
			
 
				+#ifndef NON_UNIX_STDIO
			
 
				+#ifndef USE_LARGEFILE
			
 
				+#define _INCLUDE_POSIX_SOURCE	/* for HP-UX */
			
 
				+#define _INCLUDE_XOPEN_SOURCE	/* for HP-UX */
			
 
				+#include "sys/types.h"
			
 
				+#include "sys/stat.h"
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#endif /*SYSDEP_H_INCLUDED*/
			
--- a/min-dgels/additional/wsfe.c
+++ b/min-dgels/additional/wsfe.c
@@ -0,0 +1,78 @@
 
				+/*write sequential formatted external*/
			
 
				+#include "f2c.h"
			
 
				+#include "fio.h"
			
 
				+#include "fmt.h"
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+ int
			
 
				+x_wSL(Void)
			
 
				+{
			
 
				+	int n = f__putbuf('\n');
			
 
				+	f__hiwater = f__recpos = f__cursor = 0;
			
 
				+	return(n == 0);
			
 
				+}
			
 
				+
			
 
				+ static int
			
 
				+xw_end(Void)
			
 
				+{
			
 
				+	int n;
			
 
				+
			
 
				+	if(f__nonl) {
			
 
				+		f__putbuf(n = 0);
			
 
				+		fflush(f__cf);
			
 
				+		}
			
 
				+	else
			
 
				+		n = f__putbuf('\n');
			
 
				+	f__hiwater = f__recpos = f__cursor = 0;
			
 
				+	return n;
			
 
				+}
			
 
				+
			
 
				+ static int
			
 
				+xw_rev(Void)
			
 
				+{
			
 
				+	int n = 0;
			
 
				+	if(f__workdone) {
			
 
				+		n = f__putbuf('\n');
			
 
				+		f__workdone = 0;
			
 
				+		}
			
 
				+	f__hiwater = f__recpos = f__cursor = 0;
			
 
				+	return n;
			
 
				+}
			
 
				+
			
 
				+#ifdef KR_headers
			
 
				+integer s_wsfe(a) cilist *a;	/*start*/
			
 
				+#else
			
 
				+integer s_wsfe(cilist *a)	/*start*/
			
 
				+#endif
			
 
				+{	int n;
			
 
				+	if(!f__init) f_init();
			
 
				+	f__reading=0;
			
 
				+	f__sequential=1;
			
 
				+	f__formatted=1;
			
 
				+	f__external=1;
			
 
				+	if(n=c_sfe(a)) return(n);
			
 
				+	f__elist=a;
			
 
				+	f__hiwater = f__cursor=f__recpos=0;
			
 
				+	f__nonl = 0;
			
 
				+	f__scale=0;
			
 
				+	f__fmtbuf=a->cifmt;
			
 
				+	f__cf=f__curunit->ufd;
			
 
				+	if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio");
			
 
				+	f__putn= x_putc;
			
 
				+	f__doed= w_ed;
			
 
				+	f__doned= w_ned;
			
 
				+	f__doend=xw_end;
			
 
				+	f__dorevert=xw_rev;
			
 
				+	f__donewrec=x_wSL;
			
 
				+	fmt_bg();
			
 
				+	f__cplus=0;
			
 
				+	f__cblank=f__curunit->ublnk;
			
 
				+	if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit))
			
 
				+		err(a->cierr,errno,"write start");
			
 
				+	return(0);
			
 
				+}
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
--- a/min-dgels/additional/xerbla.c
+++ b/min-dgels/additional/xerbla.c
@@ -0,0 +1,65 @@
 
				+/* xerbla.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+#include "stdio.h"
			
 
				+
			
 
				+/* Table of constant values */
			
 
				+
			
 
				+static integer c__1 = 1;
			
 
				+
			
 
				+/* Subroutine */ int xerbla_(char *srname, integer *info)
			
 
				+{
			
 
				+    
			
 
				+
			
 
				+/*  -- LAPACK auxiliary routine (version 3.2) -- */
			
 
				+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
			
 
				+/*     November 2006 */
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*  XERBLA  is an error handler for the LAPACK routines. */
			
 
				+/*  It is called by an LAPACK routine if an input parameter has an */
			
 
				+/*  invalid value.  A message is printed and execution stops. */
			
 
				+
			
 
				+/*  Installers may consider modifying the STOP statement in order to */
			
 
				+/*  call system-specific exception-handling facilities. */
			
 
				+
			
 
				+/*  Arguments */
			
 
				+/*  ========= */
			
 
				+
			
 
				+/*  SRNAME  (input) CHARACTER*(*) */
			
 
				+/*          The name of the routine which called XERBLA. */
			
 
				+
			
 
				+/*  INFO    (input) INTEGER */
			
 
				+/*          The position of the invalid parameter in the parameter list */
			
 
				+/*          of the calling routine. */
			
 
				+
			
 
				+/* ===================================================================== */
			
 
				+
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+/*     .. Executable Statements .. */
			
 
				+
			
 
				+    printf("** On entry to %s, parameter number %ld had an illegal value\n",
			
 
				+		srname, *info);
			
 
				+
			
 
				+
			
 
				+/*     End of XERBLA */
			
 
				+
			
 
				+    return 0;
			
 
				+} /* xerbla_ */
			
--- a/min-dgels/base/BLAS/SRC/Makefile
+++ b/min-dgels/base/BLAS/SRC/Makefile
@@ -0,0 +1,115 @@
 
				+TOPDIR=../..
			
 
				+include $(TOPDIR)/make.inc
			
 
				+
			
 
				+#######################################################################
			
 
				+#  This is the makefile to create a library for the BLAS.
			
 
				+#  The files are grouped as follows:
			
 
				+#
			
 
				+#       SBLAS1 -- Single precision real BLAS routines
			
 
				+#       CBLAS1 -- Single precision complex BLAS routines
			
 
				+#       DBLAS1 -- Double precision real BLAS routines
			
 
				+#       ZBLAS1 -- Double precision complex BLAS routines
			
 
				+#
			
 
				+#       CB1AUX -- Real BLAS routines called by complex routines
			
 
				+#       ZB1AUX -- D.P. real BLAS routines called by d.p. complex
			
 
				+#                 routines
			
 
				+#
			
 
				+#      ALLBLAS -- Auxiliary routines for Level 2 and 3 BLAS
			
 
				+#
			
 
				+#       SBLAS2 -- Single precision real BLAS2 routines
			
 
				+#       CBLAS2 -- Single precision complex BLAS2 routines
			
 
				+#       DBLAS2 -- Double precision real BLAS2 routines
			
 
				+#       ZBLAS2 -- Double precision complex BLAS2 routines
			
 
				+#
			
 
				+#       SBLAS3 -- Single precision real BLAS3 routines
			
 
				+#       CBLAS3 -- Single precision complex BLAS3 routines
			
 
				+#       DBLAS3 -- Double precision real BLAS3 routines
			
 
				+#       ZBLAS3 -- Double precision complex BLAS3 routines
			
 
				+#
			
 
				+#  The library can be set up to include routines for any combination
			
 
				+#  of the four precisions.  To create or add to the library, enter make
			
 
				+#  followed by one or more of the precisions desired.  Some examples:
			
 
				+#       make single
			
 
				+#       make single complex
			
 
				+#       make single double complex complex16
			
 
				+#  Note that these commands are not safe for parallel builds.
			
 
				+#
			
 
				+#  Alternatively, the commands
			
 
				+#       make all
			
 
				+#  or
			
 
				+#       make
			
 
				+#  without any arguments creates a library of all four precisions.
			
 
				+#  The name of the library is held in BLASLIB, which is set in the
			
 
				+#  top-level make.inc
			
 
				+#
			
 
				+#  To remove the object files after the library is created, enter
			
 
				+#       make clean
			
 
				+#  To force the source files to be recompiled, enter, for example,
			
 
				+#       make single FRC=FRC
			
 
				+#
			
 
				+#---------------------------------------------------------------------
			
 
				+#
			
 
				+#  Edward Anderson, University of Tennessee
			
 
				+#  March 26, 1990
			
 
				+#  Susan Ostrouchov, Last updated September 30, 1994
			
 
				+#  ejr, May 2006.
			
 
				+#
			
 
				+#######################################################################
			
 
				+
			
 
				+all: $(BLASLIB)
			
 
				+ 
			
 
				+#---------------------------------------------------------
			
 
				+#  Comment out the next 6 definitions if you already have
			
 
				+#  the Level 1 BLAS.
			
 
				+#---------------------------------------------------------
			
 
				+
			
 
				+DBLAS1 = idamax.o dasum.o daxpy.o dcopy.o ddot.o dnrm2.o \
			
 
				+	drot.o drotg.o dscal.o dsdot.o dswap.o drotmg.o drotm.o
			
 
				+$(DBLAS1): $(FRC)
			
 
				+
			
 
				+#---------------------------------------------------------------------
			
 
				+#  The following line defines auxiliary routines needed by both the
			
 
				+#  Level 2 and Level 3 BLAS.  Comment it out only if you already have
			
 
				+#  both the Level 2 and 3 BLAS.
			
 
				+#---------------------------------------------------------------------
			
 
				+
			
 
				+ALLBLAS  = lsame.o xerbla.o xerbla_array.o
			
 
				+$(ALLBLAS) : $(FRC)
			
 
				+
			
 
				+#---------------------------------------------------------
			
 
				+#  Comment out the next 4 definitions if you already have
			
 
				+#  the Level 2 BLAS.
			
 
				+#---------------------------------------------------------
			
 
				+
			
 
				+DBLAS2 = dgemv.o dgbmv.o dsymv.o dsbmv.o dspmv.o \
			
 
				+	dtrmv.o dtbmv.o dtpmv.o dtrsv.o dtbsv.o dtpsv.o \
			
 
				+	dger.o dsyr.o dspr.o dsyr2.o dspr2.o
			
 
				+$(DBLAS2): $(FRC)
			
 
				+
			
 
				+#---------------------------------------------------------
			
 
				+#  Comment out the next 4 definitions if you already have
			
 
				+#  the Level 3 BLAS.
			
 
				+#---------------------------------------------------------
			
 
				+
			
 
				+DBLAS3 = dgemm.o dsymm.o dsyrk.o dsyr2k.o dtrmm.o dtrsm.o
			
 
				+$(DBLAS3): $(FRC)
			
 
				+
			
 
				+ALLOBJ= $(DBLAS1) $(DBLAS2) $(DBLAS3) $(ALLBLAS)
			
 
				+
			
 
				+$(BLASLIB): $(ALLOBJ)
			
 
				+	$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ)
			
 
				+	$(RANLIB) $@
			
 
				+
			
 
				+double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3)
			
 
				+	$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \
			
 
				+	$(DBLAS2) $(DBLAS3)
			
 
				+	$(RANLIB) $(BLASLIB)
			
 
				+
			
 
				+FRC:
			
 
				+	@FRC=$(FRC)
			
 
				+
			
 
				+clean:
			
 
				+	rm -f *.o
			
 
				+
			
 
				+.c.o: 
			
 
				+	$(CC) $(CFLAGS) -c $< -o $@
			
--- a/min-dgels/base/BLAS/SRC/dasum.c
+++ b/min-dgels/base/BLAS/SRC/dasum.c
@@ -0,0 +1,101 @@
 
				+/* dasum.f -- translated by f2c (version 20061008).
			
 
				+   You must link the resulting object file with libf2c:
			
 
				+	on Microsoft Windows system, link with libf2c.lib;
			
 
				+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
			
 
				+	or, if you install libf2c.a in a standard place, with -lf2c -lm
			
 
				+	-- in that order, at the end of the command line, as in
			
 
				+		cc *.o -lf2c -lm
			
 
				+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
			
 
				+
			
 
				+		http://www.netlib.org/f2c/libf2c.zip
			
 
				+*/
			
 
				+
			
 
				+#include "f2c.h"
			
 
				+#include "blaswrap.h"
			
 
				+
			
 
				+doublereal dasum_(integer *n, doublereal *dx, integer *incx)
			
 
				+{
			
 
				+    /* System generated locals */
			
 
				+    integer i__1, i__2;
			
 
				+    doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    integer i__, m, mp1;
			
 
				+    doublereal dtemp;
			
 
				+    integer nincx;
			
 
				+
			
 
				+/*     .. Scalar Arguments .. */
			
 
				+/*     .. */
			
 
				+/*     .. Array Arguments .. */
			
 
				+/*     .. */
			
 
				+
			
 
				+/*  Purpose */
			
 
				+/*  ======= */
			
 
				+
			
 
				+/*     takes the sum of the absolute values. */
			
 
				+/*     jack dongarra, linpack, 3/11/78. */
			
 
				+/*     modified 3/93 to return if incx .le. 0. */
			
 
				+/*     modified 12/3/93, array(1) declarations changed to array(*) */
			
 
				+
			
 
				+
			
 
				+/*     .. Local Scalars .. */
			
 
				+/*     .. */
			
 
				+/*     .. Intrinsic Functions .. */
			
 
				+/*     .. */
			
 
				+    /* Parameter adjustments */
			
 
				+    --dx;
			
 
				+
			
 
				+    /* Function Body */
			
 
				+    ret_val = 0.;
			
 
				+    dtemp = 0.;
			
 
				+    if (*n <= 0 || *incx <= 0) {
			
 
				+	return ret_val;
			
 
				+    }
			
 
				+    if (*incx == 1) {
			
 
				+	goto L20;
			
 
				+    }
			
 
				+
			
 
				+/*        code for increment not equal to 1 */
			
 
				+
			
 
				+    nincx = *n * *incx;
			
 
				+    i__1 = nincx;
			
 
				+    i__2 = *incx;
			
 
				+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
			
 
				+	dtemp += (d__1 = dx[i__], abs(d__1));
			
 
				+/* L10: */
			
 
				+    }
			
 
				+    ret_val = dtemp;
			
 
				+    return ret_val;
			
 
				+
			
 
				+/*        code for increment equal to 1 */
			
 
				+
			
 
				+
			
 
				+/*        clean-up loop */
			
 
				+
			
 
				+L20:
			
 
				+    m = *n % 6;
			
 
				+    if (m == 0) {
			
 
				+	goto L40;
			
 
				+    }
			
 
				+    i__2 = m;
			
 
				+    for (i__ = 1; i__ <= i__2; ++i__) {
			
 
				+	dtemp += (d__1 = dx[i__], abs(d__1));
			
 
				+/* L30: */
			
 
				+    }
			
 
				+    if (*n < 6) {
			
 
				+	goto L60;
			
 
				+    }
			
 
				+L40:
			
 
				+    mp1 = m + 1;
			
 
				+    i__2 = *n;
			
 
				+    for (i__ = mp1; i__ <= i__2; i__ += 6) {
			
 
				+	dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], 
			
 
				+		abs(d__2)) + (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ 
			
 
				+		+ 3], abs(d__4)) + (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = 
			
 
				+		dx[i__ + 5], abs(d__6));
			
 
				+/* L50: */
			
 
				+    }
			
 
				+L60:
			
 
				+    ret_val = dtemp;
			
 
				+    return ret_val;
			
 
				+} /* dasum_ */
			
--- a/min-dgels/base/BLAS/SRC/daxpy.c
+++ b/min-dgels/base/BLAS/SRC/daxpy.c