il y a 12 ans · 69c30703a4
--- a/ChangeLog
+++ b/ChangeLog
@@ -134,10 +134,6 @@ Small features:
 
				   * New configure option --enable-mpi-progression-hook to enable the
			
 
				     activity polling method for StarPU-MPI.
			
 
				   * Permit to disable sequential consistency for a given task.
			
 
				-  * New batch files to run StarPU applications with Microsoft Visual C
			
 
				-  * Add examples/release/Makefile to test StarPU examples against an
			
 
				-    installed version of StarPU. That can also be used to test
			
 
				-    examples using a previous API.
			
 
				 
			
 
				 Changes:
			
 
				   * Fix the block filter functions.
			
@@ -184,6 +180,11 @@ Changes:
 
				     priorities
			
 
				   * starpu_scheduler.h is no longer automatically included by
			
 
				     starpu.h, it has to be manually included when needed
			
 
				+  * New batch files to run StarPU applications with Microsoft Visual C
			
 
				+  * Add examples/release/Makefile to test StarPU examples against an
			
 
				+    installed version of StarPU. That can also be used to test
			
 
				+    examples using a previous API.
			
 
				+  * Tutorial is installed in ${docdir}/tutorial
			
 
				 
			
 
				 Small changes:
			
 
				   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is
			
--- a/configure.ac
+++ b/configure.ac
@@ -1855,14 +1855,14 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 
				 
			
 
				 # Create links to ICD files in build/socl/vendors directory. SOCL will use this
			
 
				 # directory as the OCL_ICD_VENDORS directory
			
 
				-if test -d /etc/OpenCL/vendors; then
			
 
				-    for icd in /etc/OpenCL/vendors/*.icd ; do
			
 
				-        if test "$(basename $icd)" != "socl.icd" ; then
			
 
				-            new_icd=$(basename $icd)
			
 
				- 	    AC_CONFIG_LINKS([socl/vendors/$new_icd:$icd])
			
 
				+for icd in /etc/OpenCL/vendors/*.icd ; do
			
 
				+	if test -f $icd ; then
			
 
				+	        if test "$(basename $icd)" != "socl.icd" ; then
			
 
				+        		new_icd=$(basename $icd)
			
 
				+			AC_CONFIG_LINKS([socl/vendors/$new_icd:$icd])
			
 
				+		fi
			
 
				         fi
			
 
				-    done
			
 
				-fi
			
 
				+done
			
 
				 
			
 
				 AC_CONFIG_FILES(tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only)
			
 
				 AC_CONFIG_HEADER(src/common/config.h include/starpu_config.h gcc-plugin/include/starpu-gcc/config.h starpu-top/config.h)
			
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -43,7 +43,32 @@ starpu_TEXINFOS = 		\
 
				 
			
 
				 MAINTAINERCLEANFILES = starpu.pdf starpu.html
			
 
				 
			
 
				-EXTRA_DIST = starpu.css
			
 
				+EXTRA_DIST = starpu.css				\
			
 
				+	tutorial/README				\
			
 
				+	tutorial/Makefile			\
			
 
				+	tutorial/hello_world.c			\
			
 
				+	tutorial/hello_world_plugin.c		\
			
 
				+	tutorial/vector_scal.c			\
			
 
				+	tutorial/vector_scal_cpu.c		\
			
 
				+	tutorial/vector_scal_cuda.cu		\
			
 
				+	tutorial/vector_scal_opencl.c		\
			
 
				+	tutorial/vector_scal_opencl_kernel.cl	\
			
 
				+	tutorial/vector_scal_plugin.c		\
			
 
				+	tutorial/vector_scal_plugin_cuda.cu
			
 
				+
			
 
				+starpu_tutorial_dir	=	$(docdir)/tutorial
			
 
				+starpu_tutorial__DATA	=			\
			
 
				+	tutorial/README				\
			
 
				+	tutorial/Makefile			\
			
 
				+	tutorial/hello_world.c			\
			
 
				+	tutorial/hello_world_plugin.c		\
			
 
				+	tutorial/vector_scal.c			\
			
 
				+	tutorial/vector_scal_cpu.c		\
			
 
				+	tutorial/vector_scal_cuda.cu		\
			
 
				+	tutorial/vector_scal_opencl.c		\
			
 
				+	tutorial/vector_scal_opencl_kernel.cl	\
			
 
				+	tutorial/vector_scal_plugin.c		\
			
 
				+	tutorial/vector_scal_plugin_cuda.cu
			
 
				 
			
 
				 dist_pdf_DATA = starpu.pdf
			
 
				 dist_html_DATA = starpu.html
			
--- a/doc/chapters/mpi-support.texi
+++ b/doc/chapters/mpi-support.texi
@@ -30,7 +30,7 @@ according to the task graph and an application-provided distribution.
 
				 @node Simple Example
			
 
				 @section Simple Example
			
 
				 
			
 
				-The flags required to compile or link against the MPI layer are then
			
 
				+The flags required to compile or link against the MPI layer are
			
 
				 accessible with the following commands:
			
 
				 
			
 
				 @example
			
@@ -38,7 +38,8 @@ $ pkg-config --cflags starpumpi-1.0  # options for the compiler
 
				 $ pkg-config --libs starpumpi-1.0    # options for the linker
			
 
				 @end example
			
 
				 
			
 
				-Also pass the @code{--static} option if the application is to be linked statically.
			
 
				+You also need pass the @code{--static} option if the application is to
			
 
				+be linked statically.
			
 
				 
			
 
				 @cartouche
			
 
				 @smallexample
			
--- a/doc/tutorial/Makefile
+++ b/doc/tutorial/Makefile
@@ -24,7 +24,7 @@ HAS_OPENCL	=	$(shell pkg-config --libs starpu-1.1 |grep -i opencl)
 
				 %.o: %.cu
			
 
				 	nvcc $(CFLAGS) $< -c
			
 
				 
			
 
				-TARGETS = hello_world vector_scal hello_world_plugin
			
 
				+TARGETS = hello_world vector_scal hello_world_plugin vector_scal_plugin
			
 
				 
			
 
				 all: $(TARGETS)
			
 
				 
			
@@ -45,5 +45,19 @@ vector_scal: $(VECTOR_SCAL_PREREQUISITES)
 
				 hello_world_plugin: hello_world_plugin.c
			
 
				 	$(CC) $(CFLAGS) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $(LDFLAGS) $^ -o $@
			
 
				 
			
 
				+VECTOR_SCAL_PLUGIN_PREREQUISITES	=	vector_scal_plugin.o
			
 
				+ifneq ($(strip $(HAS_CUDA)),)
			
 
				+VECTOR_SCAL_PLUGIN_PREREQUISITES	+=	vector_scal_plugin_cuda.o
			
 
				+VECTOR_SCAL_PLUGIN_COMPILER		=	$(NVCC)
			
 
				+else
			
 
				+VECTOR_SCAL_PLUGIN_COMPILER		=	$(CC)
			
 
				+endif
			
 
				+
			
 
				+vector_scal_plugin.o: vector_scal_plugin.c
			
 
				+	$(CC) -c $(CFLAGS) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $^ -o $@
			
 
				+
			
 
				+vector_scal_plugin: $(VECTOR_SCAL_PLUGIN_PREREQUISITES)
			
 
				+	$(CC) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $(LDFLAGS) $(VECTOR_SCAL_PLUGIN_PREREQUISITES) -o $@
			
 
				+
			
 
				 clean:
			
 
				 	rm -f $(TARGETS) *.o
			
--- a/doc/tutorial/README
+++ b/doc/tutorial/README
@@ -47,3 +47,7 @@ Instructions on how to compile and run StarPU examples
 
				 % make hello_world_plugin
			
 
				 % ./hello_world_plugin
			
 
				 
			
 
				+% make vector_scal_plugin
			
 
				+% STARPU_NCPU=0 STARPU_NCUDA=0 ./vector_scal_plugin
			
 
				+% STARPU_NCPU=0 STARPU_NOPENCL=0 ./vector_scal_plugin
			
 
				+% STARPU_NOPENCL=0 STARPU_NCUDA=0 ./vector_scal_plugin
			
--- a/doc/tutorial/vector_scal_opencl_kernel.cl
+++ b/doc/tutorial/vector_scal_opencl_kernel.cl
@@ -1,7 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -15,11 +14,11 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-__kernel void vector_mult_opencl(__global float* val, int nx, float factor)
			
 
				+__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor)
			
 
				 {
			
 
				         const int i = get_global_id(0);
			
 
				-        if (i < nx) {
			
 
				+        if (i < nx)
			
 
				+	{
			
 
				                 val[i] *= factor;
			
 
				         }
			
 
				 }
			
 
				-
			
--- a/doc/tutorial/vector_scal_plugin.c
+++ b/doc/tutorial/vector_scal_plugin.c
@@ -0,0 +1,194 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 Institut National de Recherche en Informatique et Automatique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/* This example showcases features of the StarPU GCC plug-in.  It defines a
			
 
				+   "vector scaling" task with multiple CPU implementations, an OpenCL
			
 
				+   implementation, and a CUDA implementation.
			
 
				+
			
 
				+   Compiling it without `-fplugin=starpu.so' yields valid sequential code.  */
			
 
				+
			
 
				+#include <math.h>
			
 
				+#include <stdbool.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+
			
 
				+/* Declare and define the standard CPU implementation.  */
			
 
				+
			
 
				+static void vector_scal (unsigned int size, float vector[size], float factor)
			
 
				+  __attribute__ ((task));
			
 
				+
			
 
				+/* The CPU implementation.  */
			
 
				+static void
			
 
				+vector_scal (unsigned int size, float vector[size], float factor)
			
 
				+{
			
 
				+  unsigned int i;
			
 
				+  for (i = 0; i < size; i++)
			
 
				+    vector[i] *= factor;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#if defined STARPU_GCC_PLUGIN && defined __SSE__
			
 
				+/* The SSE-capable CPU implementation.  */
			
 
				+
			
 
				+#include <xmmintrin.h>
			
 
				+
			
 
				+static void vector_scal_sse (unsigned int size, float vector[size], float factor)
			
 
				+  __attribute__ ((task_implementation ("cpu", vector_scal)));
			
 
				+
			
 
				+static void
			
 
				+vector_scal_sse (unsigned int size, float vector[size], float factor)
			
 
				+{
			
 
				+  unsigned int n_iterations = size / 4;
			
 
				+
			
 
				+  __m128 *VECTOR = (__m128 *) vector;
			
 
				+  __m128 _FACTOR __attribute__ ((aligned (16)));
			
 
				+  _FACTOR = _mm_set1_ps (factor);
			
 
				+
			
 
				+  unsigned int i;
			
 
				+  for (i = 0; i < n_iterations; i++)
			
 
				+    VECTOR[i] = _mm_mul_ps (_FACTOR, VECTOR[i]);
			
 
				+
			
 
				+  unsigned int remainder = size % 4;
			
 
				+  if (remainder != 0)
			
 
				+    {
			
 
				+      unsigned int start = 4 * n_iterations;
			
 
				+      for (i = start; i < start + remainder; ++i)
			
 
				+	vector[i] = factor * vector[i];
			
 
				+    }
			
 
				+}
			
 
				+#endif /* __SSE__ */
			
 
				+
			
 
				+
			
 
				+/* Declaration and definition of the OpenCL implementation.  */
			
 
				+
			
 
				+#if defined STARPU_GCC_PLUGIN && defined STARPU_USE_OPENCL
			
 
				+
			
 
				+#include <starpu_opencl.h>
			
 
				+
			
 
				+/* The OpenCL programs, loaded from `main'.  */
			
 
				+static struct starpu_opencl_program cl_programs;
			
 
				+
			
 
				+static void vector_scal_opencl (unsigned int size, float vector[size], float factor)
			
 
				+  __attribute__ ((task_implementation ("opencl", vector_scal)));
			
 
				+
			
 
				+static void
			
 
				+vector_scal_opencl (unsigned int size, float vector[size], float factor)
			
 
				+{
			
 
				+  int id, devid, err;
			
 
				+  cl_kernel kernel;
			
 
				+  cl_command_queue queue;
			
 
				+  cl_event event;
			
 
				+
			
 
				+  cl_mem val = (cl_mem) vector;
			
 
				+
			
 
				+  id = starpu_worker_get_id ();
			
 
				+  devid = starpu_worker_get_devid (id);
			
 
				+
			
 
				+  /* Prepare to invoke the kernel.  In the future, this will be largely
			
 
				+     automated.  */
			
 
				+  err = starpu_opencl_load_kernel (&kernel, &queue, &cl_programs,
			
 
				+				   "vector_mult_opencl", devid);
			
 
				+  if (err != CL_SUCCESS)
			
 
				+    STARPU_OPENCL_REPORT_ERROR (err);
			
 
				+
			
 
				+  err = clSetKernelArg (kernel, 0, sizeof (size), &size);
			
 
				+  err |= clSetKernelArg (kernel, 1, sizeof (val), &val);
			
 
				+  err |= clSetKernelArg (kernel, 2, sizeof (factor), &factor);
			
 
				+  if (err)
			
 
				+    STARPU_OPENCL_REPORT_ERROR (err);
			
 
				+
			
 
				+  size_t global = size, local = 1;
			
 
				+  err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &global, &local, 0,
			
 
				+				NULL, &event);
			
 
				+  if (err != CL_SUCCESS)
			
 
				+    STARPU_OPENCL_REPORT_ERROR (err);
			
 
				+
			
 
				+  clFinish (queue);
			
 
				+  starpu_opencl_collect_stats (event);
			
 
				+  clReleaseEvent (event);
			
 
				+
			
 
				+  starpu_opencl_release_kernel (kernel);
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+
			
 
				+/* Declaration of the CUDA implementation.  The definition itself is in the
			
 
				+   `.cu' file itself.  */
			
 
				+
			
 
				+extern void vector_scal_cuda (unsigned int size, float vector[size], float factor)
			
 
				+  __attribute__ ((task_implementation ("cuda", vector_scal)));
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#define EPSILON 1e-3
			
 
				+static bool
			
 
				+check (unsigned int size, float vector[size], float factor)
			
 
				+{
			
 
				+  unsigned int i;
			
 
				+
			
 
				+  for (i = 0; i < size; i++)
			
 
				+    {
			
 
				+      if (fabs(vector[i] - i * factor) > i*factor*EPSILON)
			
 
				+        {
			
 
				+          fprintf(stderr, "%.2f != %.2f\n", vector[i], i*factor);
			
 
				+          return false;
			
 
				+        }
			
 
				+    }
			
 
				+  fprintf(stderr, "computation is correct\n");
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int
			
 
				+main (void)
			
 
				+{
			
 
				+  bool valid;
			
 
				+
			
 
				+#pragma starpu initialize
			
 
				+
			
 
				+#if defined STARPU_GCC_PLUGIN && defined STARPU_USE_OPENCL
			
 
				+  starpu_opencl_load_opencl_from_file ("vector_scal_opencl_kernel.cl",
			
 
				+				       &cl_programs, "");
			
 
				+#endif
			
 
				+
			
 
				+#define NX     0x100000
			
 
				+#define FACTOR 3.14
			
 
				+
			
 
				+  {
			
 
				+    float vector[NX] __attribute__ ((heap_allocated, registered));
			
 
				+
			
 
				+    unsigned int i;
			
 
				+    for (i = 0; i < NX; i++)
			
 
				+      vector[i] = (float) i;
			
 
				+
			
 
				+    vector_scal (NX, vector, FACTOR);
			
 
				+
			
 
				+#pragma starpu wait
			
 
				+#pragma starpu acquire vector
			
 
				+    valid = check (NX, vector, FACTOR);
			
 
				+#pragma starpu release vector
			
 
				+
			
 
				+  } /* VECTOR is automatically freed here.  */
			
 
				+
			
 
				+#pragma starpu shutdown
			
 
				+
			
 
				+  return valid ? EXIT_SUCCESS : EXIT_FAILURE;
			
 
				+}
			
--- a/gcc-plugin/examples/vector_scal/vector_scal_opencl_kernel.cl
+++ b/gcc-plugin/examples/vector_scal/vector_scal_opencl_kernel.cl
@@ -16,11 +16,29 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor)
			
 
				+/* CUDA implementation of the `vector_scal' task.  */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+static __global__ void
			
 
				+vector_mult_cuda (unsigned int n, float *val, float factor)
			
 
				 {
			
 
				-        const int i = get_global_id(0);
			
 
				-        if (i < nx) {
			
 
				-                val[i] *= factor;
			
 
				-        }
			
 
				+  unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
			
 
				+
			
 
				+  if (i < n)
			
 
				+    val[i] *= factor;
			
 
				 }
			
 
				 
			
 
				+extern "C" void
			
 
				+vector_scal_cuda (unsigned int size, float vector[], float factor)
			
 
				+{
			
 
				+  unsigned threads_per_block = 64;
			
 
				+  unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
			
 
				+
			
 
				+  vector_mult_cuda <<< nblocks, threads_per_block, 0,
			
 
				+       starpu_cuda_get_local_stream () >>> (size, vector, factor);
			
 
				+
			
 
				+  cudaStreamSynchronize (starpu_cuda_get_local_stream ());
			
 
				+}
			
--- a/gcc-plugin/examples/Makefile.am
+++ b/gcc-plugin/examples/Makefile.am
@@ -13,15 +13,44 @@
 
				 #
			
 
				 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 
			
 
				+TESTS	=
			
 
				+
			
 
				+examplebindir = $(libdir)/starpu/plugin
			
 
				+
			
 
				 if STARPU_USE_CPU
			
 
				 
			
 
				-noinst_PROGRAMS =				\
			
 
				-  hello-starpu matrix-mult stencil5 vector_scal/vector_scal
			
 
				+if STARPU_HAVE_WINDOWS
			
 
				+check_PROGRAMS	=	$(TESTS)
			
 
				+else
			
 
				+check_PROGRAMS	=	$(LOADER) $(TESTS)
			
 
				+endif
			
 
				+
			
 
				+if !STARPU_HAVE_WINDOWS
			
 
				+## test loader program
			
 
				+LOADER			=	loader
			
 
				+loader_CPPFLAGS =  $(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
			
 
				+LOADER_BIN		=	$(abs_top_builddir)/examples/$(LOADER)
			
 
				+loader_SOURCES		=	../../tests/loader.c
			
 
				+
			
 
				+if STARPU_HAVE_AM111
			
 
				+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
			
 
				+LOG_COMPILER		=	$(LOADER_BIN)
			
 
				+else
			
 
				+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN)
			
 
				+endif
			
 
				+
			
 
				+endif
			
 
				+
			
 
				+examplebin_PROGRAMS =			\
			
 
				+	hello-starpu 			\
			
 
				+	matrix-mult			\
			
 
				+	stencil5			\
			
 
				+	vector_scal/vector_scal
			
 
				 
			
 
				-if !NO_BLAS_LIB
			
 
				-noinst_PROGRAMS +=				\
			
 
				-  cholesky/cholesky
			
 
				-endif !NO_BLAS_LIB
			
 
				+#if !NO_BLAS_LIB
			
 
				+#examplebin_PROGRAMS +=				\
			
 
				+#  cholesky/cholesky
			
 
				+#endif !NO_BLAS_LIB
			
 
				 endif STARPU_USE_CPU
			
 
				 
			
 
				 AM_LDFLAGS = $(top_builddir)/src/@LIBSTARPU_LINK@
			
@@ -38,22 +67,20 @@ AM_CFLAGS =							\
 
				   -fplugin-arg-starpu-verbose					\
			
 
				   -Wall
			
 
				 
			
 
				-noinst_HEADERS =				\
			
 
				-  cholesky/cholesky.h				\
			
 
				-  cholesky/cholesky_kernels.h
			
 
				-
			
 
				-EXTRA_DIST = vector_scal/vector_scal_opencl_kernel.cl
			
 
				+#noinst_HEADERS =				\
			
 
				+#  cholesky/cholesky.h				\
			
 
				+#  cholesky/cholesky_kernels.h
			
 
				 
			
 
				-if !NO_BLAS_LIB
			
 
				-cholesky_cholesky_SOURCES	=		\
			
 
				-	cholesky/cholesky.c		\
			
 
				-	cholesky/cholesky_models.c	\
			
 
				-	cholesky/cholesky_kernels.c	\
			
 
				-	$(top_srcdir)/examples/common/blas.c
			
 
				-
			
 
				-cholesky_cholesky_LDADD	=	\
			
 
				-	$(STARPU_BLAS_LDFLAGS)
			
 
				-endif
			
 
				+#if !NO_BLAS_LIB
			
 
				+#cholesky_cholesky_SOURCES	=		\
			
 
				+#	cholesky/cholesky.c		\
			
 
				+#	cholesky/cholesky_models.c	\
			
 
				+#	cholesky/cholesky_kernels.c	\
			
 
				+#	$(top_srcdir)/examples/common/blas.c
			
 
				+#
			
 
				+#cholesky_cholesky_LDADD	=	\
			
 
				+#	$(STARPU_BLAS_LDFLAGS)
			
 
				+#endif
			
 
				 
			
 
				 vector_scal_vector_scal_SOURCES = vector_scal/vector_scal.c
			
 
				 
			
@@ -67,10 +94,13 @@ vector_scal_vector_scal_SOURCES += vector_scal/vector_scal_cuda.cu
 
				 
			
 
				 else !STARPU_USE_CUDA
			
 
				 
			
 
				-EXTRA_DIST += vector_scal/vector_scal_cuda.cu
			
 
				+EXTRA_DIST = vector_scal/vector_scal_cuda.cu
			
 
				 
			
 
				 endif
			
 
				 
			
 
				+TESTS += $(examplebin_PROGRAMS)
			
 
				+
			
 
				+
			
 
				 showcheck:
			
 
				 	-cat $(TEST_LOGS) /dev/null
			
 
				 	for i in $(SUBDIRS) ; do \
			
--- a/gcc-plugin/examples/matrix-mult.c
+++ b/gcc-plugin/examples/matrix-mult.c
@@ -137,13 +137,18 @@ main (int argc, char **argv)
 
				 
			
 
				   if (argc < 4)
			
 
				     {
			
 
				-      fprintf (stderr, "Usage: %s NLOOPS MATRIX-SIZE NSLICES\n", argv[0]);
			
 
				-      return EXIT_FAILURE;
			
 
				+      fprintf (stderr, "Using default values.\nCorrect usage: %s NLOOPS MATRIX-SIZE NSLICES\n", argv[0]);
			
 
				+      mloop = nloop = 10;
			
 
				+      zdim = ydim = xdim = 16;
			
 
				+      nslicesz = nslicesy = nslicesx = 4;
			
 
				+    }
			
 
				+  else
			
 
				+    {
			
 
				+      mloop = nloop = atoi (argv[1]);
			
 
				+      zdim = ydim = xdim = atoi (argv[2]);
			
 
				+      nslicesz = nslicesy = nslicesx = atoi (argv[3]);
			
 
				     }
			
 
				 
			
 
				-  mloop = nloop = atoi (argv[1]);
			
 
				-  zdim = ydim = xdim = atoi (argv[2]);
			
 
				-  nslicesz = nslicesy = nslicesx = atoi (argv[3]);
			
 
				   bxdim = xdim / nslicesx;
			
 
				   bydim = ydim / nslicesy;
			
 
				   bzdim = zdim / nslicesz;
			
@@ -166,9 +171,9 @@ main (int argc, char **argv)
 
				 
			
 
				   gettimeofday (&start_all, NULL);
			
 
				 
			
 
				-  float A[zdim * ydim] __heap;
			
 
				-  float B[xdim * zdim] __heap;
			
 
				-  float C[xdim * ydim] __heap;
			
 
				+  float A[zdim * ydim];
			
 
				+  float B[xdim * zdim];
			
 
				+  float C[xdim * ydim];
			
 
				 
			
 
				   srand (time (NULL));
			
 
				   for (i = 0; i < zdim * ydim; i++)
			
--- a/gcc-plugin/examples/stencil5.c
+++ b/gcc-plugin/examples/stencil5.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -34,9 +34,15 @@ static void stencil5_cpu(float *xy, const float *xm1y, const float *xp1y, const
 
				 	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
			
 
				 }
			
 
				 
			
 
				-#define NITER_DEF 20000
			
 
				-#define X         10
			
 
				-#define Y         10
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#  define NITER_DEF	5
			
 
				+#  define X         	3
			
 
				+#  define Y         	3
			
 
				+#else
			
 
				+#  define NITER_DEF	500
			
 
				+#  define X         	20
			
 
				+#  define Y         	20
			
 
				+#endif
			
 
				 
			
 
				 int display = 0;
			
 
				 int niter = NITER_DEF;
			
--- a/gcc-plugin/examples/vector_scal/vector_scal.c
+++ b/gcc-plugin/examples/vector_scal/vector_scal.c
@@ -105,8 +105,8 @@ vector_scal_opencl (unsigned int size, float vector[size], float factor)
 
				   if (err != CL_SUCCESS)
			
 
				     STARPU_OPENCL_REPORT_ERROR (err);
			
 
				 
			
 
				-  err = clSetKernelArg (kernel, 0, sizeof (val), &val);
			
 
				-  err |= clSetKernelArg (kernel, 1, sizeof (size), &size);
			
 
				+  err = clSetKernelArg (kernel, 0, sizeof (size), &size);
			
 
				+  err |= clSetKernelArg (kernel, 1, sizeof (val), &val);
			
 
				   err |= clSetKernelArg (kernel, 2, sizeof (factor), &factor);
			
 
				   if (err)
			
 
				     STARPU_OPENCL_REPORT_ERROR (err);
			
@@ -140,9 +140,9 @@ extern void vector_scal_cuda (unsigned int size, float vector[size], float facto
 
				 
			
 
				 #define EPSILON 1e-3
			
 
				 static bool
			
 
				-check (size_t size, float vector[size], float factor)
			
 
				+check (unsigned int size, float vector[size], float factor)
			
 
				 {
			
 
				-  size_t i;
			
 
				+  unsigned int i;
			
 
				 
			
 
				   for (i = 0; i < size; i++)
			
 
				     {
			
@@ -164,7 +164,7 @@ main (void)
 
				 #pragma starpu initialize
			
 
				 
			
 
				 #if defined STARPU_GCC_PLUGIN && defined STARPU_USE_OPENCL
			
 
				-  starpu_opencl_load_opencl_from_file ("vector_scal_opencl_kernel.cl",
			
 
				+  starpu_opencl_load_opencl_from_file ("examples/basic_examples/vector_scal_opencl_kernel.cl",
			
 
				 				       &cl_programs, "");
			
 
				 #endif
			
 
				 
			
@@ -174,15 +174,16 @@ main (void)
 
				   {
			
 
				     float vector[NX] __attribute__ ((heap_allocated, registered));
			
 
				 
			
 
				-    size_t i;
			
 
				+    unsigned int i;
			
 
				     for (i = 0; i < NX; i++)
			
 
				       vector[i] = (float) i;
			
 
				 
			
 
				     vector_scal (NX, vector, FACTOR);
			
 
				 
			
 
				 #pragma starpu wait
			
 
				-
			
 
				+#pragma starpu acquire vector
			
 
				     valid = check (NX, vector, FACTOR);
			
 
				+#pragma starpu release vector
			
 
				 
			
 
				   } /* VECTOR is automatically freed here.  */
			
 
				 
			
--- a/gcc-plugin/examples/vector_scal/vector_scal_cuda.cu
+++ b/gcc-plugin/examples/vector_scal/vector_scal_cuda.cu
@@ -32,7 +32,7 @@ vector_mult_cuda (unsigned int n, float *val, float factor)
 
				 }
			
 
				 
			
 
				 extern "C" void
			
 
				-vector_scal_cuda (size_t size, float vector[], float factor)
			
 
				+vector_scal_cuda (unsigned int size, float vector[], float factor)
			
 
				 {
			
 
				   unsigned threads_per_block = 64;
			
 
				   unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
			
--- a/sc_hypervisor/include/sc_hypervisor_lp.h
+++ b/sc_hypervisor/include/sc_hypervisor_lp.h
@@ -32,9 +32,7 @@ extern "C"
 
				 #include <glpk.h>
			
 
				 #endif //STARPU_HAVE_GLPK_H
			
 
				 
			
 
				-/* returns 1/tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
			
 
				-double sc_hypervisor_lp_compute_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], double flops[nsched_ctxs], 
			
 
				-				    double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
			
 
				+struct sc_hypervisor_policy_task_pool; 
			
 
				 
			
 
				 /* returns tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
			
 
				 double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
			
@@ -63,6 +61,19 @@ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw
 
				 					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw], 
			
 
				 									     unsigned is_integer, double tmax, void *specifc_data));
			
 
				 
			
 
				+#ifdef STARPU_HAVE_GLPK_H
			
 
				+/* linear program that returns 1/tmax, and computes in table res the nr of workers needed by each context st 
			
 
				+   the system ends up in the smallest tmax*/
			
 
				+double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], 
			
 
				+					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
			
 
				+
			
 
				+/* linear program that simulates a distribution of tasks that minimises the execution time of the tasks in the pool */
			
 
				+double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt],
			
 
				+					       double times[nw][nt], unsigned is_integer, double tmax, int *in_sched_ctxs,
			
 
				+					       struct sc_hypervisor_policy_task_pool *tmp_task_pools);
			
 
				+
			
 
				+#endif // STARPU_HAVE_GLPK_H
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/sc_hypervisor/include/sc_hypervisor_policy.h
+++ b/sc_hypervisor/include/sc_hypervisor_policy.h
@@ -52,6 +52,9 @@ void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32
 
				 /* clone a task wrapper linked list */
			
 
				 struct sc_hypervisor_policy_task_pool* sc_hypervisor_policy_clone_task_pool(struct sc_hypervisor_policy_task_pool *tp);
			
 
				 
			
 
				+/* get the execution time of the submitted tasks out of starpu's calibration files */
			
 
				+void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools);
			
 
				+
			
 
				 /* find the context with the lowest priority in order to move some workers */
			
 
				 unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move);
			
 
				 
			
@@ -97,6 +100,7 @@ int sc_hypervisor_has_velocity_gap_btw_ctxs(void);
 
				 /* get the list of workers grouped by type */
			
 
				 void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_of_workers, int total_nw[ntypes_of_workers]);
			
 
				 
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/sc_hypervisor/src/Makefile.am
+++ b/sc_hypervisor/src/Makefile.am
@@ -27,6 +27,7 @@ libsc_hypervisor_la_SOURCES = 				\
 
				 	policies_utils/policy_tools.c			\
			
 
				 	policies_utils/task_pool.c			\
			
 
				 	policies_utils/lp_tools.c			\
			
 
				+	policies_utils/lp_programs.c			\
			
 
				 	policies_utils/dichotomy.c			\
			
 
				 	hypervisor_policies/idle_policy.c		\
			
 
				 	hypervisor_policies/app_driven_policy.c		\
			
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -34,53 +34,7 @@ struct teft_lp_data
 
				 	unsigned size_ctxs;
			
 
				 };
			
 
				 
			
 
				-static void _get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs)
			
 
				-{
			
 
				-        struct sc_hypervisor_policy_task_pool *tp;
			
 
				-        int w, t;
			
 
				-        for (w = 0; w < nw; w++)
			
 
				-        {
			
 
				-                for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				-                {
			
 
				-			int worker = workers == NULL ? w : workers[w];
			
 
				-                        enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(worker);
			
 
				-                        double length = starpu_history_based_expected_perf(tp->cl->model, arch, tp->footprint);
			
 
				-
			
 
				-                        if (isnan(length))
			
 
				-                                times[w][t] = NAN;
			
 
				-			else
			
 
				-			{
			
 
				-                                times[w][t] = length / 1000.;
			
 
				-
			
 
				-				double transfer_time = 0.0;
			
 
				-				enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				-				if(arch == STARPU_CUDA_WORKER)
			
 
				-				{
			
 
				-					unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
			
 
				-					if(!worker_in_ctx && !size_ctxs)
			
 
				-					{
			
 
				-						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
			
 
				-						transfer_time +=  (tp->footprint / transfer_velocity) / 1000. ;
			
 
				-					}
			
 
				-					double latency = starpu_get_latency_RAM_CUDA(worker);
			
 
				-					transfer_time += latency/1000.;
			
 
				-
			
 
				-				}
			
 
				-//				printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time);
			
 
				-				times[w][t] += transfer_time;
			
 
				-			}
			
 
				-                }
			
 
				-        }
			
 
				-}
			
 
				-
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * GNU Linear Programming Kit backend
			
 
				- */
			
 
				-#ifdef STARPU_HAVE_GLPK_H
			
 
				-#include <glpk.h>
			
 
				-static double _glp_resolve(int ns, int nw, double final_w_in_s[ns][nw], 
			
 
				+static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw], 
			
 
				 			   unsigned is_integer, double tmax, void *specific_data)
			
 
				 {
			
 
				 	struct teft_lp_data *sd = (struct teft_lp_data *)specific_data;
			
@@ -91,237 +45,23 @@ static double _glp_resolve(int ns, int nw, double final_w_in_s[ns][nw],
 
				 	int *workers = sd->workers;
			
 
				 	struct sc_hypervisor_policy_task_pool *tmp_task_pools = sd->tmp_task_pools;
			
 
				 	unsigned size_ctxs = sd->size_ctxs;
			
 
				-	
			
 
				-	double w_in_s[ns][nw];
			
 
				-	double tasks[nw][nt];
			
 
				-	
			
 
				+		
			
 
				 	if(tmp_task_pools == NULL)
			
 
				 		return 0.0;
			
 
				-	struct sc_hypervisor_policy_task_pool * tp;
			
 
				-	int t, w, s;
			
 
				-	glp_prob *lp;
			
 
				-
			
 
				-	lp = glp_create_prob();
			
 
				-	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				-	glp_set_obj_dir(lp, GLP_MAX);
			
 
				-	glp_set_obj_name(lp, "total execution time");
			
 
				-
			
 
				-	{
			
 
				-		double times[nw][nt];
			
 
				-		int ne = nt * nw /* worker execution time */
			
 
				-			+ nw * ns
			
 
				-			+ nw * (nt + ns)
			
 
				-			+ 1; /* glp dumbness */
			
 
				-		int n = 1;
			
 
				-		int ia[ne], ja[ne];
			
 
				-		double ar[ne];
			
 
				-
			
 
				-		_get_tasks_times(nw, nt, times, workers, size_ctxs);
			
 
				-
			
 
				-		/* Variables: number of tasks i assigned to worker j, and tmax */
			
 
				-		glp_add_cols(lp, nw*nt+ns*nw);
			
 
				-#define colnum(w, t) ((t)*nw+(w)+1)
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-			for(w = 0; w < nw; w++)
			
 
				-				glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.);
			
 
				-
			
 
				-		for (w = 0; w < nw; w++)
			
 
				-			for (t = 0; t < nt; t++)
			
 
				-			{
			
 
				-				char name[32];
			
 
				-				snprintf(name, sizeof(name), "w%dt%dn", w, t);
			
 
				-				glp_set_col_name(lp, colnum(w, t), name);
			
 
				-/* 				if (integer) */
			
 
				-/*                                 { */
			
 
				-/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
			
 
				-/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
			
 
				-/*                                 } */
			
 
				-/* 				else */
			
 
				-					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
			
 
				-			}
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-			for(w = 0; w < nw; w++)
			
 
				-			{
			
 
				-				char name[32];
			
 
				-				snprintf(name, sizeof(name), "w%ds%dn", w, s);
			
 
				-				glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
			
 
				-				if (is_integer)
			
 
				-                                {
			
 
				-                                        glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV);
			
 
				-                                        glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1);
			
 
				-                                }
			
 
				-                                else
			
 
				-					glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
			
 
				-			}
			
 
				-
			
 
				-		int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				-
			
 
				-		int curr_row_idx = 0;
			
 
				-		/* Total worker execution time */
			
 
				-		glp_add_rows(lp, nw*ns);
			
 
				-		for (t = 0; t < nt; t++)
			
 
				-		{
			
 
				-			int someone = 0;
			
 
				-			for (w = 0; w < nw; w++)
			
 
				-				if (!isnan(times[w][t]))
			
 
				-					someone = 1;
			
 
				-			if (!someone)
			
 
				-			{
			
 
				-				/* This task does not have any performance model at all, abort */
			
 
				-				printf("NO PERF MODELS\n");
			
 
				-				glp_delete_prob(lp);
			
 
				-				return 0.0;
			
 
				-			}
			
 
				-		}
			
 
				-		/*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-		{
			
 
				-			for (w = 0; w < nw; w++)
			
 
				-			{
			
 
				-				char name[32], title[64];
			
 
				-				starpu_worker_get_name(w, name, sizeof(name));
			
 
				-				snprintf(title, sizeof(title), "worker %s", name);
			
 
				-				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
			
 
				-				for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
			
 
				-				{
			
 
				-					if((int)tp->sched_ctx_id == sched_ctxs[s])
			
 
				-					{
			
 
				-						ia[n] = curr_row_idx+s*nw+w+1;
			
 
				-						ja[n] = colnum(w, t);
			
 
				-						if (isnan(times[w][t]))
			
 
				-							ar[n] = 1000000000.;
			
 
				-						else
			
 
				-							ar[n] = times[w][t];
			
 
				-						n++;
			
 
				-					}
			
 
				-				}
			
 
				-				/* x[s][w] = 1 | 0 */
			
 
				-				ia[n] = curr_row_idx+s*nw+w+1;
			
 
				-				ja[n] = nw*nt+s*nw+w+1;
			
 
				-				ar[n] = (-1) * tmax;
			
 
				-				n++;
			
 
				-				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		curr_row_idx += nw*ns;
			
 
				-
			
 
				-		/* Total task completion */
			
 
				-		glp_add_rows(lp, nt);
			
 
				-		for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
			
 
				-		{
			
 
				-			char name[32], title[64];
			
 
				-			starpu_worker_get_name(w, name, sizeof(name));
			
 
				-			snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
			
 
				-			glp_set_row_name(lp, curr_row_idx+t+1, title);
			
 
				-			for (w = 0; w < nw; w++)
			
 
				-			{
			
 
				-				ia[n] = curr_row_idx+t+1;
			
 
				-				ja[n] = colnum(w, t);
			
 
				-				ar[n] = 1;
			
 
				-				n++;
			
 
				-			}
			
 
				-			glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n);
			
 
				-		}
			
 
				-
			
 
				-		curr_row_idx += nt;
			
 
				-
			
 
				-		/* sum(x[s][i]) = 1 */
			
 
				-		glp_add_rows(lp, nw);
			
 
				-		for (w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			char name[32], title[64];
			
 
				-			starpu_worker_get_name(w, name, sizeof(name));
			
 
				-			snprintf(title, sizeof(title), "w%x", w);
			
 
				-			glp_set_row_name(lp, curr_row_idx+w+1, title);
			
 
				-			for(s = 0; s < ns; s++)
			
 
				-			{
			
 
				-				ia[n] = curr_row_idx+w+1;
			
 
				-				ja[n] = nw*nt+s*nw+w+1;
			
 
				-				ar[n] = 1;
			
 
				-				n++;
			
 
				-			}
			
 
				-			if(is_integer)
			
 
				-                                glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
			
 
				-			else
			
 
				-				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				-		}
			
 
				-		if(n != ne)
			
 
				-			printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
			
 
				-		STARPU_ASSERT(n == ne);
			
 
				-
			
 
				-		glp_load_matrix(lp, ne-1, ia, ja, ar);
			
 
				-	}
			
 
				-
			
 
				-	glp_smcp parm;
			
 
				-	glp_init_smcp(&parm);
			
 
				-	parm.msg_lev = GLP_MSG_OFF;
			
 
				-	int ret = glp_simplex(lp, &parm);
			
 
				-
			
 
				-/* 	char str[50]; */
			
 
				-/* 	sprintf(str, "outpu_lp_%g", tmax); */
			
 
				-
			
 
				-/* 	glp_print_sol(lp, str); */
			
 
				-
			
 
				-	if (ret)
			
 
				-	{
			
 
				-		printf("error in simplex\n");
			
 
				-		glp_delete_prob(lp);
			
 
				-		lp = NULL;
			
 
				-		return 0.0;
			
 
				-	}
			
 
				 
			
 
				-	int stat = glp_get_prim_stat(lp);
			
 
				-	/* if we don't have a solution return */
			
 
				-	if(stat == GLP_NOFEAS)
			
 
				-	{
			
 
				-		glp_delete_prob(lp);
			
 
				-//		printf("no_sol in tmax = %lf\n", tmax);
			
 
				-		lp = NULL;
			
 
				-		return 0.0;
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	if (is_integer)
			
 
				-        {
			
 
				-                glp_iocp iocp;
			
 
				-                glp_init_iocp(&iocp);
			
 
				-                iocp.msg_lev = GLP_MSG_OFF;
			
 
				-		glp_intopt(lp, &iocp);
			
 
				-		int stat = glp_mip_status(lp);
			
 
				-		/* if we don't have a solution return */
			
 
				-		if(stat == GLP_NOFEAS)
			
 
				-		{
			
 
				-//			printf("no int sol in tmax = %lf\n", tmax);
			
 
				-			glp_delete_prob(lp);
			
 
				-			lp = NULL;
			
 
				-			return 0.0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	double res = glp_get_obj_val(lp);
			
 
				-	for (w = 0; w < nw; w++)
			
 
				-		for (t = 0; t < nt; t++)
			
 
				-/* 			if (integer) */
			
 
				-/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
			
 
				-/*                         else */
			
 
				-				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
			
 
				+	double w_in_s[ns][nw];
			
 
				+	double tasks[nw][nt];
			
 
				+	double times[nw][nt];
			
 
				 	
			
 
				-//	printf("for tmax %lf\n", tmax);
			
 
				-	for(s = 0; s < ns; s++)
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			if (is_integer)
			
 
				-				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
			
 
				-                        else
			
 
				-				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
			
 
				-//			printf("w_in_s[%d][%d]=%lf\n", s, w, w_in_s[s][w]);
			
 
				-		}
			
 
				-//	printf("\n");
			
 
				+	sc_hypervisor_get_tasks_times(nw, nt, times, workers, size_ctxs, task_pools);
			
 
				 
			
 
				-	glp_delete_prob(lp);
			
 
				+	double res = 0.0;
			
 
				+#ifdef STARPU_HAVE_GLPK_H
			
 
				+	res = sc_hypervisor_lp_simulate_distrib_tasks(ns, nw, nt, w_in_s, tasks, times, is_integer, tmax, in_sched_ctxs, tmp_task_pools);
			
 
				+#endif //STARPU_HAVE_GLPK_H
			
 
				 	if(res != 0.0)
			
 
				 	{
			
 
				+		int s, w, t;
			
 
				 		for(s = 0; s < ns; s++)
			
 
				 			for(w = 0; w < nw; w++)
			
 
				 				final_w_in_s[s][w] = w_in_s[s][w];
			
@@ -344,7 +84,12 @@ static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nwor
 
				 		nt++;
			
 
				 
			
 
				 	double w_in_s[ns][nw];
			
 
				-	double tasks[nw][nt];
			
 
				+//	double tasks[nw][nt];
			
 
				+	double **tasks=(double**)malloc(nw*sizeof(double*));
			
 
				+	int i;
			
 
				+	for(i = 0; i < nw; i++)
			
 
				+		tasks[i] = (double*)malloc(nt*sizeof(double));
			
 
				+
			
 
				 
			
 
				 	struct teft_lp_data specific_data;
			
 
				 	specific_data.nt = nt;
			
@@ -362,12 +107,17 @@ static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nwor
 
				 	double tmin = smallest_tmax;
			
 
				 
			
 
				 	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
			
 
				-								tmin, tmax, smallest_tmax, _glp_resolve);
			
 
				+								tmin, tmax, smallest_tmax, _compute_workers_distrib);
			
 
				 
			
 
				 	starpu_pthread_mutex_unlock(&mutex);
			
 
				 	/* if we did find at least one solution redistribute the resources */
			
 
				 	if(found_sol)
			
 
				 		sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 1);
			
 
				+	
			
 
				+	for(i = 0; i < nw; i++)
			
 
				+		free(tasks[i]);
			
 
				+	free(tasks);
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static void size_if_required()
			
@@ -460,7 +210,7 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 
				 			double tmax = possible_tmax * ns;
			
 
				 			double tmin = smallest_tmax;
			
 
				 			unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
			
 
				-								tmin, tmax, smallest_tmax, _glp_resolve);
			
 
				+								tmin, tmax, smallest_tmax, _compute_workers_distrib);
			
 
				 //			starpu_pthread_mutex_unlock(&mutex);
			
 
				 
			
 
				 			/* if we did find at least one solution redistribute the resources */
			
@@ -507,5 +257,3 @@ struct sc_hypervisor_policy teft_lp_policy = {
 
				 	.custom = 0,
			
 
				 	.name = "teft_lp"
			
 
				 };
			
 
				-
			
 
				-#endif /* STARPU_HAVE_GLPK_H */
			
--- a/sc_hypervisor/src/policies_utils/lp_programs.c
+++ b/sc_hypervisor/src/policies_utils/lp_programs.c
@@ -0,0 +1,450 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2011 - 2013  INRIA
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+/*
			
 
				+ * GNU Linear Programming Kit backend
			
 
				+ */
			
 
				+
			
 
				+#include "sc_hypervisor_policy.h"
			
 
				+#include "sc_hypervisor_lp.h"
			
 
				+
			
 
				+#ifdef STARPU_HAVE_GLPK_H
			
 
				+
			
 
				+double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt],
			
 
				+					       double times[nw][nt], unsigned is_integer, double tmax, int *in_sched_ctxs,
			
 
				+					       struct sc_hypervisor_policy_task_pool *tmp_task_pools)
			
 
				+{
			
 
				+	struct sc_hypervisor_policy_task_pool * tp;
			
 
				+	int t, w, s;
			
 
				+	glp_prob *lp;
			
 
				+
			
 
				+	lp = glp_create_prob();
			
 
				+	glp_set_prob_name(lp, "StarPU theoretical bound");
			
 
				+	glp_set_obj_dir(lp, GLP_MAX);
			
 
				+	glp_set_obj_name(lp, "total execution time");
			
 
				+
			
 
				+	{
			
 
				+		int ne = nt * nw /* worker execution time */
			
 
				+			+ nw * ns
			
 
				+			+ nw * (nt + ns)
			
 
				+			+ 1; /* glp dumbness */
			
 
				+		int n = 1;
			
 
				+		int ia[ne], ja[ne];
			
 
				+		double ar[ne];
			
 
				+
			
 
				+		/* Variables: number of tasks i assigned to worker j, and tmax */
			
 
				+		glp_add_cols(lp, nw*nt+ns*nw);
			
 
				+#define colnum(w, t) ((t)*nw+(w)+1)
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+				glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.);
			
 
				+
			
 
				+		for (w = 0; w < nw; w++)
			
 
				+			for (t = 0; t < nt; t++)
			
 
				+			{
			
 
				+				char name[32];
			
 
				+				snprintf(name, sizeof(name), "w%dt%dn", w, t);
			
 
				+				glp_set_col_name(lp, colnum(w, t), name);
			
 
				+/* 				if (integer) */
			
 
				+/*                                 { */
			
 
				+/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
			
 
				+/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
			
 
				+/*                                 } */
			
 
				+/* 				else */
			
 
				+					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
			
 
				+			}
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+			for(w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				char name[32];
			
 
				+				snprintf(name, sizeof(name), "w%ds%dn", w, s);
			
 
				+				glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
			
 
				+				if (is_integer)
			
 
				+                                {
			
 
				+                                        glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV);
			
 
				+                                        glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1);
			
 
				+                                }
			
 
				+                                else
			
 
				+					glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
			
 
				+			}
			
 
				+
			
 
				+		int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
			
 
				+
			
 
				+		int curr_row_idx = 0;
			
 
				+		/* Total worker execution time */
			
 
				+		glp_add_rows(lp, nw*ns);
			
 
				+		for (t = 0; t < nt; t++)
			
 
				+		{
			
 
				+			int someone = 0;
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+				if (!isnan(times[w][t]))
			
 
				+					someone = 1;
			
 
				+			if (!someone)
			
 
				+			{
			
 
				+				/* This task does not have any performance model at all, abort */
			
 
				+				printf("NO PERF MODELS\n");
			
 
				+				glp_delete_prob(lp);
			
 
				+				return 0.0;
			
 
				+			}
			
 
				+		}
			
 
				+		/*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				char name[32], title[64];
			
 
				+				starpu_worker_get_name(w, name, sizeof(name));
			
 
				+				snprintf(title, sizeof(title), "worker %s", name);
			
 
				+				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
			
 
				+				for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
			
 
				+				{
			
 
				+					if((int)tp->sched_ctx_id == sched_ctxs[s])
			
 
				+					{
			
 
				+						ia[n] = curr_row_idx+s*nw+w+1;
			
 
				+						ja[n] = colnum(w, t);
			
 
				+						if (isnan(times[w][t]))
			
 
				+							ar[n] = 1000000000.;
			
 
				+						else
			
 
				+							ar[n] = times[w][t];
			
 
				+						n++;
			
 
				+					}
			
 
				+				}
			
 
				+				/* x[s][w] = 1 | 0 */
			
 
				+				ia[n] = curr_row_idx+s*nw+w+1;
			
 
				+				ja[n] = nw*nt+s*nw+w+1;
			
 
				+				ar[n] = (-1) * tmax;
			
 
				+				n++;
			
 
				+				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		curr_row_idx += nw*ns;
			
 
				+
			
 
				+		/* Total task completion */
			
 
				+		glp_add_rows(lp, nt);
			
 
				+		for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
			
 
				+		{
			
 
				+			char name[32], title[64];
			
 
				+			starpu_worker_get_name(w, name, sizeof(name));
			
 
				+			snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
			
 
				+			glp_set_row_name(lp, curr_row_idx+t+1, title);
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				+				ia[n] = curr_row_idx+t+1;
			
 
				+				ja[n] = colnum(w, t);
			
 
				+				ar[n] = 1;
			
 
				+				n++;
			
 
				+			}
			
 
				+			glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n);
			
 
				+		}
			
 
				+
			
 
				+		curr_row_idx += nt;
			
 
				+
			
 
				+		/* sum(x[s][i]) = 1 */
			
 
				+		glp_add_rows(lp, nw);
			
 
				+		for (w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			char name[32], title[64];
			
 
				+			starpu_worker_get_name(w, name, sizeof(name));
			
 
				+			snprintf(title, sizeof(title), "w%x", w);
			
 
				+			glp_set_row_name(lp, curr_row_idx+w+1, title);
			
 
				+			for(s = 0; s < ns; s++)
			
 
				+			{
			
 
				+				ia[n] = curr_row_idx+w+1;
			
 
				+				ja[n] = nw*nt+s*nw+w+1;
			
 
				+				ar[n] = 1;
			
 
				+				n++;
			
 
				+			}
			
 
				+			if(is_integer)
			
 
				+                                glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
			
 
				+			else
			
 
				+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
			
 
				+		}
			
 
				+		if(n != ne)
			
 
				+			printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
			
 
				+		STARPU_ASSERT(n == ne);
			
 
				+
			
 
				+		glp_load_matrix(lp, ne-1, ia, ja, ar);
			
 
				+	}
			
 
				+
			
 
				+	glp_smcp parm;
			
 
				+	glp_init_smcp(&parm);
			
 
				+	parm.msg_lev = GLP_MSG_OFF;
			
 
				+	int ret = glp_simplex(lp, &parm);
			
 
				+
			
 
				+/* 	char str[50]; */
			
 
				+/* 	sprintf(str, "outpu_lp_%g", tmax); */
			
 
				+
			
 
				+/* 	glp_print_sol(lp, str); */
			
 
				+
			
 
				+	if (ret)
			
 
				+	{
			
 
				+		printf("error in simplex\n");
			
 
				+		glp_delete_prob(lp);
			
 
				+		lp = NULL;
			
 
				+		return 0.0;
			
 
				+	}
			
 
				+
			
 
				+	int stat = glp_get_prim_stat(lp);
			
 
				+	/* if we don't have a solution return */
			
 
				+	if(stat == GLP_NOFEAS)
			
 
				+	{
			
 
				+		glp_delete_prob(lp);
			
 
				+//		printf("no_sol in tmax = %lf\n", tmax);
			
 
				+		lp = NULL;
			
 
				+		return 0.0;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	if (is_integer)
			
 
				+        {
			
 
				+                glp_iocp iocp;
			
 
				+                glp_init_iocp(&iocp);
			
 
				+                iocp.msg_lev = GLP_MSG_OFF;
			
 
				+		glp_intopt(lp, &iocp);
			
 
				+		int stat = glp_mip_status(lp);
			
 
				+		/* if we don't have a solution return */
			
 
				+		if(stat == GLP_NOFEAS)
			
 
				+		{
			
 
				+//			printf("no int sol in tmax = %lf\n", tmax);
			
 
				+			glp_delete_prob(lp);
			
 
				+			lp = NULL;
			
 
				+			return 0.0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	double res = glp_get_obj_val(lp);
			
 
				+	for (w = 0; w < nw; w++)
			
 
				+		for (t = 0; t < nt; t++)
			
 
				+/* 			if (integer) */
			
 
				+/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
			
 
				+/*                         else */
			
 
				+				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
			
 
				+	
			
 
				+//	printf("for tmax %lf\n", tmax);
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			if (is_integer)
			
 
				+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
			
 
				+                        else
			
 
				+				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
			
 
				+//			printf("w_in_s[%d][%d]=%lf\n", s, w, w_in_s[s][w]);
			
 
				+		}
			
 
				+//	printf("\n");
			
 
				+
			
 
				+	glp_delete_prob(lp);
			
 
				+	return res;
			
 
				+}
			
 
				+
			
 
				+double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
			
 
				+{
			
 
				+	int integer = 1;
			
 
				+	int s, w;
			
 
				+	glp_prob *lp;
			
 
				+
			
 
				+	int ne =
			
 
				+		(ns*nw+1)*(ns+nw)
			
 
				+		+ 1; /* glp dumbness */
			
 
				+	int n = 1;
			
 
				+	int ia[ne], ja[ne];
			
 
				+	double ar[ne];
			
 
				+
			
 
				+	lp = glp_create_prob();
			
 
				+
			
 
				+	glp_set_prob_name(lp, "sample");
			
 
				+	glp_set_obj_dir(lp, GLP_MAX);
			
 
				+        glp_set_obj_name(lp, "max speed");
			
 
				+
			
 
				+	/* we add nw*ns columns one for each type of worker in each context
			
 
				+	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
			
 
				+	glp_add_cols(lp, nw*ns+1);
			
 
				+
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			char name[32];
			
 
				+			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
			
 
				+			glp_set_col_name(lp, n, name);
			
 
				+			if (integer)
			
 
				+			{
			
 
				+				glp_set_col_kind(lp, n, GLP_IV);
			
 
				+				glp_set_col_bnds(lp, n, GLP_LO, 0, 0);
			
 
				+			}
			
 
				+			else
			
 
				+				glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
			
 
				+			n++;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*1/tmax should belong to the interval [0.0;1.0]*/
			
 
				+	glp_set_col_name(lp, n, "vmax");
			
 
				+	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
			
 
				+	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
			
 
				+	glp_set_obj_coef(lp, n, 1.0);
			
 
				+
			
 
				+	n = 1;
			
 
				+	/* one row corresponds to one ctx*/
			
 
				+	glp_add_rows(lp, ns);
			
 
				+
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		char name[32];
			
 
				+		snprintf(name, sizeof(name), "ctx%d", s);
			
 
				+		glp_set_row_name(lp, s+1, name);
			
 
				+		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
			
 
				+
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			int s2;
			
 
				+			for(s2 = 0; s2 < ns; s2++)
			
 
				+			{
			
 
				+				if(s2 == s)
			
 
				+				{
			
 
				+					ia[n] = s+1;
			
 
				+					ja[n] = w + nw*s2 + 1;
			
 
				+					ar[n] = v[s][w];
			
 
				+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					ia[n] = s+1;
			
 
				+					ja[n] = w + nw*s2 + 1;
			
 
				+					ar[n] = 0.0;
			
 
				+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				+				}
			
 
				+				n++;
			
 
				+			}
			
 
				+		}
			
 
				+		/* 1/tmax */
			
 
				+		ia[n] = s+1;
			
 
				+		ja[n] = ns*nw+1;
			
 
				+		ar[n] = (-1) * flops[s];
			
 
				+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				+		n++;
			
 
				+	}
			
 
				+
			
 
				+	/*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
			
 
				+	glp_add_rows(lp, nw);
			
 
				+
			
 
				+	for(w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		char name[32];
			
 
				+		snprintf(name, sizeof(name), "w%d", w);
			
 
				+		glp_set_row_name(lp, ns+w+1, name);
			
 
				+		for(s = 0; s < ns; s++)
			
 
				+		{
			
 
				+			int w2;
			
 
				+			for(w2 = 0; w2 < nw; w2++)
			
 
				+			{
			
 
				+				if(w2 == w)
			
 
				+				{
			
 
				+					ia[n] = ns+w+1;
			
 
				+					ja[n] = w2+s*nw + 1;
			
 
				+					ar[n] = 1.0;
			
 
				+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					ia[n] = ns+w+1;
			
 
				+					ja[n] = w2+s*nw + 1;
			
 
				+					ar[n] = 0.0;
			
 
				+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				+				}
			
 
				+				n++;
			
 
				+			}
			
 
				+		}
			
 
				+		/* 1/tmax */
			
 
				+		ia[n] = ns+w+1;
			
 
				+		ja[n] = ns*nw+1;
			
 
				+		ar[n] = 0.0;
			
 
				+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				+		n++;
			
 
				+
			
 
				+		/*sum(all gpus) = 3*/
			
 
				+		if(w == 0)
			
 
				+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
			
 
				+
			
 
				+		/*sum(all cpus) = 9*/
			
 
				+		if(w == 1)
			
 
				+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
			
 
				+	}
			
 
				+
			
 
				+	STARPU_ASSERT(n == ne);
			
 
				+
			
 
				+	glp_load_matrix(lp, ne-1, ia, ja, ar);
			
 
				+
			
 
				+	glp_smcp parm;
			
 
				+	glp_init_smcp(&parm);
			
 
				+	parm.msg_lev = GLP_MSG_OFF;
			
 
				+	int ret = glp_simplex(lp, &parm);
			
 
				+	if (ret)
			
 
				+        {
			
 
				+                printf("error in simplex\n");
			
 
				+		glp_delete_prob(lp);
			
 
				+                lp = NULL;
			
 
				+                return 0.0;
			
 
				+        }
			
 
				+
			
 
				+	int stat = glp_get_prim_stat(lp);
			
 
				+        /* if we don't have a solution return */
			
 
				+        if(stat == GLP_NOFEAS)
			
 
				+        {
			
 
				+                glp_delete_prob(lp);
			
 
				+//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
			
 
				+                lp = NULL;
			
 
				+                return 0.0;
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+	if (integer)
			
 
				+        {
			
 
				+                glp_iocp iocp;
			
 
				+                glp_init_iocp(&iocp);
			
 
				+                iocp.msg_lev = GLP_MSG_OFF;
			
 
				+                glp_intopt(lp, &iocp);
			
 
				+                int stat = glp_mip_status(lp);
			
 
				+                /* if we don't have a solution return */
			
 
				+                if(stat == GLP_NOFEAS)
			
 
				+                {
			
 
				+//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
			
 
				+                        glp_delete_prob(lp);
			
 
				+                        lp = NULL;
			
 
				+                        return 0.0;
			
 
				+                }
			
 
				+        }
			
 
				+
			
 
				+	double vmax = glp_get_obj_val(lp);
			
 
				+
			
 
				+	n = 1;
			
 
				+	for(s = 0; s < ns; s++)
			
 
				+	{
			
 
				+		for(w = 0; w < nw; w++)
			
 
				+		{
			
 
				+			if (integer)
			
 
				+                                res[s][w] = (double)glp_mip_col_val(lp, n);
			
 
				+			else
			
 
				+				res[s][w] = glp_get_col_prim(lp, n);
			
 
				+//			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
			
 
				+			n++;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	glp_delete_prob(lp);
			
 
				+	return vmax;
			
 
				+}
			
 
				+
			
 
				+#endif // STARPU_HAVE_GLPK_H
			
--- a/sc_hypervisor/src/policies_utils/lp_tools.c
+++ b/sc_hypervisor/src/policies_utils/lp_tools.c
@@ -21,203 +21,6 @@
 
				 
			
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 
			
 
				-double sc_hypervisor_lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
			
 
				-{
			
 
				-	int integer = 1;
			
 
				-	int s, w;
			
 
				-	glp_prob *lp;
			
 
				-
			
 
				-	int ne =
			
 
				-		(ns*nw+1)*(ns+nw)
			
 
				-		+ 1; /* glp dumbness */
			
 
				-	int n = 1;
			
 
				-	int ia[ne], ja[ne];
			
 
				-	double ar[ne];
			
 
				-
			
 
				-	lp = glp_create_prob();
			
 
				-
			
 
				-	glp_set_prob_name(lp, "sample");
			
 
				-	glp_set_obj_dir(lp, GLP_MAX);
			
 
				-        glp_set_obj_name(lp, "max speed");
			
 
				-
			
 
				-	/* we add nw*ns columns one for each type of worker in each context
			
 
				-	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
			
 
				-	glp_add_cols(lp, nw*ns+1);
			
 
				-
			
 
				-	for(s = 0; s < ns; s++)
			
 
				-	{
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			char name[32];
			
 
				-			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
			
 
				-			glp_set_col_name(lp, n, name);
			
 
				-			if (integer)
			
 
				-			{
			
 
				-				glp_set_col_kind(lp, n, GLP_IV);
			
 
				-				glp_set_col_bnds(lp, n, GLP_LO, 0, 0);
			
 
				-			}
			
 
				-			else
			
 
				-				glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
			
 
				-			n++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*1/tmax should belong to the interval [0.0;1.0]*/
			
 
				-	glp_set_col_name(lp, n, "vmax");
			
 
				-	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
			
 
				-	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
			
 
				-	glp_set_obj_coef(lp, n, 1.0);
			
 
				-
			
 
				-	n = 1;
			
 
				-	/* one row corresponds to one ctx*/
			
 
				-	glp_add_rows(lp, ns);
			
 
				-
			
 
				-	for(s = 0; s < ns; s++)
			
 
				-	{
			
 
				-		char name[32];
			
 
				-		snprintf(name, sizeof(name), "ctx%d", s);
			
 
				-		glp_set_row_name(lp, s+1, name);
			
 
				-		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
			
 
				-
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			int s2;
			
 
				-			for(s2 = 0; s2 < ns; s2++)
			
 
				-			{
			
 
				-				if(s2 == s)
			
 
				-				{
			
 
				-					ia[n] = s+1;
			
 
				-					ja[n] = w + nw*s2 + 1;
			
 
				-					ar[n] = v[s][w];
			
 
				-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					ia[n] = s+1;
			
 
				-					ja[n] = w + nw*s2 + 1;
			
 
				-					ar[n] = 0.0;
			
 
				-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				-				}
			
 
				-				n++;
			
 
				-			}
			
 
				-		}
			
 
				-		/* 1/tmax */
			
 
				-		ia[n] = s+1;
			
 
				-		ja[n] = ns*nw+1;
			
 
				-		ar[n] = (-1) * flops[s];
			
 
				-//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				-		n++;
			
 
				-	}
			
 
				-
			
 
				-	/*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
			
 
				-	glp_add_rows(lp, nw);
			
 
				-
			
 
				-	for(w = 0; w < nw; w++)
			
 
				-	{
			
 
				-		char name[32];
			
 
				-		snprintf(name, sizeof(name), "w%d", w);
			
 
				-		glp_set_row_name(lp, ns+w+1, name);
			
 
				-		for(s = 0; s < ns; s++)
			
 
				-		{
			
 
				-			int w2;
			
 
				-			for(w2 = 0; w2 < nw; w2++)
			
 
				-			{
			
 
				-				if(w2 == w)
			
 
				-				{
			
 
				-					ia[n] = ns+w+1;
			
 
				-					ja[n] = w2+s*nw + 1;
			
 
				-					ar[n] = 1.0;
			
 
				-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					ia[n] = ns+w+1;
			
 
				-					ja[n] = w2+s*nw + 1;
			
 
				-					ar[n] = 0.0;
			
 
				-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				-				}
			
 
				-				n++;
			
 
				-			}
			
 
				-		}
			
 
				-		/* 1/tmax */
			
 
				-		ia[n] = ns+w+1;
			
 
				-		ja[n] = ns*nw+1;
			
 
				-		ar[n] = 0.0;
			
 
				-//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
			
 
				-		n++;
			
 
				-
			
 
				-		/*sum(all gpus) = 3*/
			
 
				-		if(w == 0)
			
 
				-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
			
 
				-
			
 
				-		/*sum(all cpus) = 9*/
			
 
				-		if(w == 1)
			
 
				-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
			
 
				-	}
			
 
				-
			
 
				-	STARPU_ASSERT(n == ne);
			
 
				-
			
 
				-	glp_load_matrix(lp, ne-1, ia, ja, ar);
			
 
				-
			
 
				-	glp_smcp parm;
			
 
				-	glp_init_smcp(&parm);
			
 
				-	parm.msg_lev = GLP_MSG_OFF;
			
 
				-	int ret = glp_simplex(lp, &parm);
			
 
				-	if (ret)
			
 
				-        {
			
 
				-                printf("error in simplex\n");
			
 
				-		glp_delete_prob(lp);
			
 
				-                lp = NULL;
			
 
				-                return 0.0;
			
 
				-        }
			
 
				-
			
 
				-	int stat = glp_get_prim_stat(lp);
			
 
				-        /* if we don't have a solution return */
			
 
				-        if(stat == GLP_NOFEAS)
			
 
				-        {
			
 
				-                glp_delete_prob(lp);
			
 
				-//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
			
 
				-                lp = NULL;
			
 
				-                return 0.0;
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-	if (integer)
			
 
				-        {
			
 
				-                glp_iocp iocp;
			
 
				-                glp_init_iocp(&iocp);
			
 
				-                iocp.msg_lev = GLP_MSG_OFF;
			
 
				-                glp_intopt(lp, &iocp);
			
 
				-                int stat = glp_mip_status(lp);
			
 
				-                /* if we don't have a solution return */
			
 
				-                if(stat == GLP_NOFEAS)
			
 
				-                {
			
 
				-//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
			
 
				-                        glp_delete_prob(lp);
			
 
				-                        lp = NULL;
			
 
				-                        return 0.0;
			
 
				-                }
			
 
				-        }
			
 
				-
			
 
				-	double vmax = glp_get_obj_val(lp);
			
 
				-
			
 
				-	n = 1;
			
 
				-	for(s = 0; s < ns; s++)
			
 
				-	{
			
 
				-		for(w = 0; w < nw; w++)
			
 
				-		{
			
 
				-			if (integer)
			
 
				-                                res[s][w] = (double)glp_mip_col_val(lp, n);
			
 
				-			else
			
 
				-				res[s][w] = glp_get_col_prim(lp, n);
			
 
				-//			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
			
 
				-			n++;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	glp_delete_prob(lp);
			
 
				-	return vmax;
			
 
				-}
			
 
				 
			
 
				 #endif //STARPU_HAVE_GLPK_H
			
 
				 
			
@@ -249,7 +52,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 
				 //		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
			
 
				 	}
			
 
				 
			
 
				-	return 1/sc_hypervisor_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				+	return 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
			
 
				 #else//STARPU_HAVE_GLPK_H
			
 
				 	return 0.0;
			
 
				 #endif//STARPU_HAVE_GLPK_H
			
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -588,3 +588,43 @@ void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_
 
				 			total_nw[0]++;
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools)
			
 
				+{
			
 
				+        struct sc_hypervisor_policy_task_pool *tp;
			
 
				+        int w, t;
			
 
				+        for (w = 0; w < nw; w++)
			
 
				+        {
			
 
				+                for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+                {
			
 
				+			int worker = workers == NULL ? w : workers[w];
			
 
				+                        enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(worker);
			
 
				+                        double length = starpu_history_based_expected_perf(tp->cl->model, arch, tp->footprint);
			
 
				+
			
 
				+                        if (isnan(length))
			
 
				+                                times[w][t] = NAN;
			
 
				+			else
			
 
				+			{
			
 
				+                                times[w][t] = length / 1000.;
			
 
				+
			
 
				+				double transfer_time = 0.0;
			
 
				+				enum starpu_archtype arch = starpu_worker_get_type(worker);
			
 
				+				if(arch == STARPU_CUDA_WORKER)
			
 
				+				{
			
 
				+					unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
			
 
				+					if(!worker_in_ctx && !size_ctxs)
			
 
				+					{
			
 
				+						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
			
 
				+						transfer_time +=  (tp->footprint / transfer_velocity) / 1000. ;
			
 
				+					}
			
 
				+					double latency = starpu_get_latency_RAM_CUDA(worker);
			
 
				+					transfer_time += latency/1000.;
			
 
				+
			
 
				+				}
			
 
				+//				printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time);
			
 
				+				times[w][t] += transfer_time;
			
 
				+			}
			
 
				+                }
			
 
				+        }
			
 
				+}
			
 
				+
			
--- a/src/sched_policies/eager_central_policy.c
+++ b/src/sched_policies/eager_central_policy.c
@@ -113,13 +113,9 @@ static struct starpu_task *pop_every_task_eager_policy(unsigned sched_ctx_id)
 
				 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	
			
 
				-	starpu_pthread_mutex_t *sched_mutex;
			
 
				-	starpu_pthread_cond_t *sched_cond;
			
 
				-	starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
			
 
				-	
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				+	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
			
 
				 	struct starpu_task* task = _starpu_fifo_pop_every_task(data->fifo, workerid);
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				+	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
			
 
				 	return task;
			
 
				 }