Parcourir la source

merge trunk@9396:9447

Nathalie Furmento il y a 12 ans
Parent
commit
69c30703a4

+ 5 - 4
ChangeLog

@@ -134,10 +134,6 @@ Small features:
   * New configure option --enable-mpi-progression-hook to enable the
     activity polling method for StarPU-MPI.
   * Permit to disable sequential consistency for a given task.
-  * New batch files to run StarPU applications with Microsoft Visual C
-  * Add examples/release/Makefile to test StarPU examples against an
-    installed version of StarPU. That can also be used to test
-    examples using a previous API.
 
 Changes:
   * Fix the block filter functions.
@@ -184,6 +180,11 @@ Changes:
     priorities
   * starpu_scheduler.h is no longer automatically included by
     starpu.h, it has to be manually included when needed
+  * New batch files to run StarPU applications with Microsoft Visual C
+  * Add examples/release/Makefile to test StarPU examples against an
+    installed version of StarPU. That can also be used to test
+    examples using a previous API.
+  * Tutorial is installed in ${docdir}/tutorial
 
 Small changes:
   * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is

+ 7 - 7
configure.ac

@@ -1855,14 +1855,14 @@ AC_CONFIG_COMMANDS([executable-scripts], [
 
 # Create links to ICD files in build/socl/vendors directory. SOCL will use this
 # directory as the OCL_ICD_VENDORS directory
-if test -d /etc/OpenCL/vendors; then
-    for icd in /etc/OpenCL/vendors/*.icd ; do
-        if test "$(basename $icd)" != "socl.icd" ; then
-            new_icd=$(basename $icd)
- 	    AC_CONFIG_LINKS([socl/vendors/$new_icd:$icd])
+for icd in /etc/OpenCL/vendors/*.icd ; do
+	if test -f $icd ; then
+	        if test "$(basename $icd)" != "socl.icd" ; then
+        		new_icd=$(basename $icd)
+			AC_CONFIG_LINKS([socl/vendors/$new_icd:$icd])
+		fi
         fi
-    done
-fi
+done
 
 AC_CONFIG_FILES(tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only)
 AC_CONFIG_HEADER(src/common/config.h include/starpu_config.h gcc-plugin/include/starpu-gcc/config.h starpu-top/config.h)

+ 26 - 1
doc/Makefile.am

@@ -43,7 +43,32 @@ starpu_TEXINFOS = 		\
 
 MAINTAINERCLEANFILES = starpu.pdf starpu.html
 
-EXTRA_DIST = starpu.css
+EXTRA_DIST = starpu.css				\
+	tutorial/README				\
+	tutorial/Makefile			\
+	tutorial/hello_world.c			\
+	tutorial/hello_world_plugin.c		\
+	tutorial/vector_scal.c			\
+	tutorial/vector_scal_cpu.c		\
+	tutorial/vector_scal_cuda.cu		\
+	tutorial/vector_scal_opencl.c		\
+	tutorial/vector_scal_opencl_kernel.cl	\
+	tutorial/vector_scal_plugin.c		\
+	tutorial/vector_scal_plugin_cuda.cu
+
+starpu_tutorial_dir	=	$(docdir)/tutorial
+starpu_tutorial__DATA	=			\
+	tutorial/README				\
+	tutorial/Makefile			\
+	tutorial/hello_world.c			\
+	tutorial/hello_world_plugin.c		\
+	tutorial/vector_scal.c			\
+	tutorial/vector_scal_cpu.c		\
+	tutorial/vector_scal_cuda.cu		\
+	tutorial/vector_scal_opencl.c		\
+	tutorial/vector_scal_opencl_kernel.cl	\
+	tutorial/vector_scal_plugin.c		\
+	tutorial/vector_scal_plugin_cuda.cu
 
 dist_pdf_DATA = starpu.pdf
 dist_html_DATA = starpu.html

+ 3 - 2
doc/chapters/mpi-support.texi

@@ -30,7 +30,7 @@ according to the task graph and an application-provided distribution.
 @node Simple Example
 @section Simple Example
 
-The flags required to compile or link against the MPI layer are then
+The flags required to compile or link against the MPI layer are
 accessible with the following commands:
 
 @example
@@ -38,7 +38,8 @@ $ pkg-config --cflags starpumpi-1.0  # options for the compiler
 $ pkg-config --libs starpumpi-1.0    # options for the linker
 @end example
 
-Also pass the @code{--static} option if the application is to be linked statically.
+You also need pass the @code{--static} option if the application is to
+be linked statically.
 
 @cartouche
 @smallexample

+ 15 - 1
doc/tutorial/Makefile

@@ -24,7 +24,7 @@ HAS_OPENCL	=	$(shell pkg-config --libs starpu-1.1 |grep -i opencl)
 %.o: %.cu
 	nvcc $(CFLAGS) $< -c
 
-TARGETS = hello_world vector_scal hello_world_plugin
+TARGETS = hello_world vector_scal hello_world_plugin vector_scal_plugin
 
 all: $(TARGETS)
 
@@ -45,5 +45,19 @@ vector_scal: $(VECTOR_SCAL_PREREQUISITES)
 hello_world_plugin: hello_world_plugin.c
 	$(CC) $(CFLAGS) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $(LDFLAGS) $^ -o $@
 
+VECTOR_SCAL_PLUGIN_PREREQUISITES	=	vector_scal_plugin.o
+ifneq ($(strip $(HAS_CUDA)),)
+VECTOR_SCAL_PLUGIN_PREREQUISITES	+=	vector_scal_plugin_cuda.o
+VECTOR_SCAL_PLUGIN_COMPILER		=	$(NVCC)
+else
+VECTOR_SCAL_PLUGIN_COMPILER		=	$(CC)
+endif
+
+vector_scal_plugin.o: vector_scal_plugin.c
+	$(CC) -c $(CFLAGS) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $^ -o $@
+
+vector_scal_plugin: $(VECTOR_SCAL_PLUGIN_PREREQUISITES)
+	$(CC) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $(LDFLAGS) $(VECTOR_SCAL_PLUGIN_PREREQUISITES) -o $@
+
 clean:
 	rm -f $(TARGETS) *.o

+ 4 - 0
doc/tutorial/README

@@ -47,3 +47,7 @@ Instructions on how to compile and run StarPU examples
 % make hello_world_plugin
 % ./hello_world_plugin
 
+% make vector_scal_plugin
+% STARPU_NCPU=0 STARPU_NCUDA=0 ./vector_scal_plugin
+% STARPU_NCPU=0 STARPU_NOPENCL=0 ./vector_scal_plugin
+% STARPU_NOPENCL=0 STARPU_NCUDA=0 ./vector_scal_plugin

+ 4 - 5
doc/tutorial/vector_scal_opencl_kernel.cl

@@ -1,7 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,11 +14,11 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-__kernel void vector_mult_opencl(__global float* val, int nx, float factor)
+__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor)
 {
         const int i = get_global_id(0);
-        if (i < nx) {
+        if (i < nx)
+	{
                 val[i] *= factor;
         }
 }
-

+ 194 - 0
doc/tutorial/vector_scal_plugin.c

@@ -0,0 +1,194 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2012 Institut National de Recherche en Informatique et Automatique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* This example showcases features of the StarPU GCC plug-in.  It defines a
+   "vector scaling" task with multiple CPU implementations, an OpenCL
+   implementation, and a CUDA implementation.
+
+   Compiling it without `-fplugin=starpu.so' yields valid sequential code.  */
+
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+
+/* Declare and define the standard CPU implementation.  */
+
+static void vector_scal (unsigned int size, float vector[size], float factor)
+  __attribute__ ((task));
+
+/* The CPU implementation.  */
+static void
+vector_scal (unsigned int size, float vector[size], float factor)
+{
+  unsigned int i;
+  for (i = 0; i < size; i++)
+    vector[i] *= factor;
+}
+
+
+#if defined STARPU_GCC_PLUGIN && defined __SSE__
+/* The SSE-capable CPU implementation.  */
+
+#include <xmmintrin.h>
+
+static void vector_scal_sse (unsigned int size, float vector[size], float factor)
+  __attribute__ ((task_implementation ("cpu", vector_scal)));
+
+static void
+vector_scal_sse (unsigned int size, float vector[size], float factor)
+{
+  unsigned int n_iterations = size / 4;
+
+  __m128 *VECTOR = (__m128 *) vector;
+  __m128 _FACTOR __attribute__ ((aligned (16)));
+  _FACTOR = _mm_set1_ps (factor);
+
+  unsigned int i;
+  for (i = 0; i < n_iterations; i++)
+    VECTOR[i] = _mm_mul_ps (_FACTOR, VECTOR[i]);
+
+  unsigned int remainder = size % 4;
+  if (remainder != 0)
+    {
+      unsigned int start = 4 * n_iterations;
+      for (i = start; i < start + remainder; ++i)
+	vector[i] = factor * vector[i];
+    }
+}
+#endif /* __SSE__ */
+
+
+/* Declaration and definition of the OpenCL implementation.  */
+
+#if defined STARPU_GCC_PLUGIN && defined STARPU_USE_OPENCL
+
+#include <starpu_opencl.h>
+
+/* The OpenCL programs, loaded from `main'.  */
+static struct starpu_opencl_program cl_programs;
+
+static void vector_scal_opencl (unsigned int size, float vector[size], float factor)
+  __attribute__ ((task_implementation ("opencl", vector_scal)));
+
+static void
+vector_scal_opencl (unsigned int size, float vector[size], float factor)
+{
+  int id, devid, err;
+  cl_kernel kernel;
+  cl_command_queue queue;
+  cl_event event;
+
+  cl_mem val = (cl_mem) vector;
+
+  id = starpu_worker_get_id ();
+  devid = starpu_worker_get_devid (id);
+
+  /* Prepare to invoke the kernel.  In the future, this will be largely
+     automated.  */
+  err = starpu_opencl_load_kernel (&kernel, &queue, &cl_programs,
+				   "vector_mult_opencl", devid);
+  if (err != CL_SUCCESS)
+    STARPU_OPENCL_REPORT_ERROR (err);
+
+  err = clSetKernelArg (kernel, 0, sizeof (size), &size);
+  err |= clSetKernelArg (kernel, 1, sizeof (val), &val);
+  err |= clSetKernelArg (kernel, 2, sizeof (factor), &factor);
+  if (err)
+    STARPU_OPENCL_REPORT_ERROR (err);
+
+  size_t global = size, local = 1;
+  err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &global, &local, 0,
+				NULL, &event);
+  if (err != CL_SUCCESS)
+    STARPU_OPENCL_REPORT_ERROR (err);
+
+  clFinish (queue);
+  starpu_opencl_collect_stats (event);
+  clReleaseEvent (event);
+
+  starpu_opencl_release_kernel (kernel);
+}
+
+#endif
+
+
+#ifdef STARPU_USE_CUDA
+
+/* Declaration of the CUDA implementation.  The definition itself is in the
+   `.cu' file itself.  */
+
+extern void vector_scal_cuda (unsigned int size, float vector[size], float factor)
+  __attribute__ ((task_implementation ("cuda", vector_scal)));
+
+#endif
+
+
+#define EPSILON 1e-3
+static bool
+check (unsigned int size, float vector[size], float factor)
+{
+  unsigned int i;
+
+  for (i = 0; i < size; i++)
+    {
+      if (fabs(vector[i] - i * factor) > i*factor*EPSILON)
+        {
+          fprintf(stderr, "%.2f != %.2f\n", vector[i], i*factor);
+          return false;
+        }
+    }
+  fprintf(stderr, "computation is correct\n");
+  return true;
+}
+
+
+int
+main (void)
+{
+  bool valid;
+
+#pragma starpu initialize
+
+#if defined STARPU_GCC_PLUGIN && defined STARPU_USE_OPENCL
+  starpu_opencl_load_opencl_from_file ("vector_scal_opencl_kernel.cl",
+				       &cl_programs, "");
+#endif
+
+#define NX     0x100000
+#define FACTOR 3.14
+
+  {
+    float vector[NX] __attribute__ ((heap_allocated, registered));
+
+    unsigned int i;
+    for (i = 0; i < NX; i++)
+      vector[i] = (float) i;
+
+    vector_scal (NX, vector, FACTOR);
+
+#pragma starpu wait
+#pragma starpu acquire vector
+    valid = check (NX, vector, FACTOR);
+#pragma starpu release vector
+
+  } /* VECTOR is automatically freed here.  */
+
+#pragma starpu shutdown
+
+  return valid ? EXIT_SUCCESS : EXIT_FAILURE;
+}

+ 23 - 5
gcc-plugin/examples/vector_scal/vector_scal_opencl_kernel.cl

@@ -16,11 +16,29 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-__kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor)
+/* CUDA implementation of the `vector_scal' task.  */
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include <stdlib.h>
+
+static __global__ void
+vector_mult_cuda (unsigned int n, float *val, float factor)
 {
-        const int i = get_global_id(0);
-        if (i < nx) {
-                val[i] *= factor;
-        }
+  unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (i < n)
+    val[i] *= factor;
 }
 
+extern "C" void
+vector_scal_cuda (unsigned int size, float vector[], float factor)
+{
+  unsigned threads_per_block = 64;
+  unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;
+
+  vector_mult_cuda <<< nblocks, threads_per_block, 0,
+       starpu_cuda_get_local_stream () >>> (size, vector, factor);
+
+  cudaStreamSynchronize (starpu_cuda_get_local_stream ());
+}

+ 52 - 22
gcc-plugin/examples/Makefile.am

@@ -13,15 +13,44 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
+TESTS	=
+
+examplebindir = $(libdir)/starpu/plugin
+
 if STARPU_USE_CPU
 
-noinst_PROGRAMS =				\
-  hello-starpu matrix-mult stencil5 vector_scal/vector_scal
+if STARPU_HAVE_WINDOWS
+check_PROGRAMS	=	$(TESTS)
+else
+check_PROGRAMS	=	$(LOADER) $(TESTS)
+endif
+
+if !STARPU_HAVE_WINDOWS
+## test loader program
+LOADER			=	loader
+loader_CPPFLAGS =  $(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
+LOADER_BIN		=	$(abs_top_builddir)/examples/$(LOADER)
+loader_SOURCES		=	../../tests/loader.c
+
+if STARPU_HAVE_AM111
+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
+LOG_COMPILER		=	$(LOADER_BIN)
+else
+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN)
+endif
+
+endif
+
+examplebin_PROGRAMS =			\
+	hello-starpu 			\
+	matrix-mult			\
+	stencil5			\
+	vector_scal/vector_scal
 
-if !NO_BLAS_LIB
-noinst_PROGRAMS +=				\
-  cholesky/cholesky
-endif !NO_BLAS_LIB
+#if !NO_BLAS_LIB
+#examplebin_PROGRAMS +=				\
+#  cholesky/cholesky
+#endif !NO_BLAS_LIB
 endif STARPU_USE_CPU
 
 AM_LDFLAGS = $(top_builddir)/src/@LIBSTARPU_LINK@
@@ -38,22 +67,20 @@ AM_CFLAGS =							\
   -fplugin-arg-starpu-verbose					\
   -Wall
 
-noinst_HEADERS =				\
-  cholesky/cholesky.h				\
-  cholesky/cholesky_kernels.h
-
-EXTRA_DIST = vector_scal/vector_scal_opencl_kernel.cl
+#noinst_HEADERS =				\
+#  cholesky/cholesky.h				\
+#  cholesky/cholesky_kernels.h
 
-if !NO_BLAS_LIB
-cholesky_cholesky_SOURCES	=		\
-	cholesky/cholesky.c		\
-	cholesky/cholesky_models.c	\
-	cholesky/cholesky_kernels.c	\
-	$(top_srcdir)/examples/common/blas.c
-
-cholesky_cholesky_LDADD	=	\
-	$(STARPU_BLAS_LDFLAGS)
-endif
+#if !NO_BLAS_LIB
+#cholesky_cholesky_SOURCES	=		\
+#	cholesky/cholesky.c		\
+#	cholesky/cholesky_models.c	\
+#	cholesky/cholesky_kernels.c	\
+#	$(top_srcdir)/examples/common/blas.c
+#
+#cholesky_cholesky_LDADD	=	\
+#	$(STARPU_BLAS_LDFLAGS)
+#endif
 
 vector_scal_vector_scal_SOURCES = vector_scal/vector_scal.c
 
@@ -67,10 +94,13 @@ vector_scal_vector_scal_SOURCES += vector_scal/vector_scal_cuda.cu
 
 else !STARPU_USE_CUDA
 
-EXTRA_DIST += vector_scal/vector_scal_cuda.cu
+EXTRA_DIST = vector_scal/vector_scal_cuda.cu
 
 endif
 
+TESTS += $(examplebin_PROGRAMS)
+
+
 showcheck:
 	-cat $(TEST_LOGS) /dev/null
 	for i in $(SUBDIRS) ; do \

+ 13 - 8
gcc-plugin/examples/matrix-mult.c

@@ -137,13 +137,18 @@ main (int argc, char **argv)
 
   if (argc < 4)
     {
-      fprintf (stderr, "Usage: %s NLOOPS MATRIX-SIZE NSLICES\n", argv[0]);
-      return EXIT_FAILURE;
+      fprintf (stderr, "Using default values.\nCorrect usage: %s NLOOPS MATRIX-SIZE NSLICES\n", argv[0]);
+      mloop = nloop = 10;
+      zdim = ydim = xdim = 16;
+      nslicesz = nslicesy = nslicesx = 4;
+    }
+  else
+    {
+      mloop = nloop = atoi (argv[1]);
+      zdim = ydim = xdim = atoi (argv[2]);
+      nslicesz = nslicesy = nslicesx = atoi (argv[3]);
     }
 
-  mloop = nloop = atoi (argv[1]);
-  zdim = ydim = xdim = atoi (argv[2]);
-  nslicesz = nslicesy = nslicesx = atoi (argv[3]);
   bxdim = xdim / nslicesx;
   bydim = ydim / nslicesy;
   bzdim = zdim / nslicesz;
@@ -166,9 +171,9 @@ main (int argc, char **argv)
 
   gettimeofday (&start_all, NULL);
 
-  float A[zdim * ydim] __heap;
-  float B[xdim * zdim] __heap;
-  float C[xdim * ydim] __heap;
+  float A[zdim * ydim];
+  float B[xdim * zdim];
+  float C[xdim * ydim];
 
   srand (time (NULL));
   for (i = 0; i < zdim * ydim; i++)

+ 10 - 4
gcc-plugin/examples/stencil5.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -34,9 +34,15 @@ static void stencil5_cpu(float *xy, const float *xm1y, const float *xp1y, const
 	*xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5;
 }
 
-#define NITER_DEF 20000
-#define X         10
-#define Y         10
+#ifdef STARPU_QUICK_CHECK
+#  define NITER_DEF	5
+#  define X         	3
+#  define Y         	3
+#else
+#  define NITER_DEF	500
+#  define X         	20
+#  define Y         	20
+#endif
 
 int display = 0;
 int niter = NITER_DEF;

+ 8 - 7
gcc-plugin/examples/vector_scal/vector_scal.c

@@ -105,8 +105,8 @@ vector_scal_opencl (unsigned int size, float vector[size], float factor)
   if (err != CL_SUCCESS)
     STARPU_OPENCL_REPORT_ERROR (err);
 
-  err = clSetKernelArg (kernel, 0, sizeof (val), &val);
-  err |= clSetKernelArg (kernel, 1, sizeof (size), &size);
+  err = clSetKernelArg (kernel, 0, sizeof (size), &size);
+  err |= clSetKernelArg (kernel, 1, sizeof (val), &val);
   err |= clSetKernelArg (kernel, 2, sizeof (factor), &factor);
   if (err)
     STARPU_OPENCL_REPORT_ERROR (err);
@@ -140,9 +140,9 @@ extern void vector_scal_cuda (unsigned int size, float vector[size], float facto
 
 #define EPSILON 1e-3
 static bool
-check (size_t size, float vector[size], float factor)
+check (unsigned int size, float vector[size], float factor)
 {
-  size_t i;
+  unsigned int i;
 
   for (i = 0; i < size; i++)
     {
@@ -164,7 +164,7 @@ main (void)
 #pragma starpu initialize
 
 #if defined STARPU_GCC_PLUGIN && defined STARPU_USE_OPENCL
-  starpu_opencl_load_opencl_from_file ("vector_scal_opencl_kernel.cl",
+  starpu_opencl_load_opencl_from_file ("examples/basic_examples/vector_scal_opencl_kernel.cl",
 				       &cl_programs, "");
 #endif
 
@@ -174,15 +174,16 @@ main (void)
   {
     float vector[NX] __attribute__ ((heap_allocated, registered));
 
-    size_t i;
+    unsigned int i;
     for (i = 0; i < NX; i++)
       vector[i] = (float) i;
 
     vector_scal (NX, vector, FACTOR);
 
 #pragma starpu wait
-
+#pragma starpu acquire vector
     valid = check (NX, vector, FACTOR);
+#pragma starpu release vector
 
   } /* VECTOR is automatically freed here.  */
 

+ 1 - 1
gcc-plugin/examples/vector_scal/vector_scal_cuda.cu

@@ -32,7 +32,7 @@ vector_mult_cuda (unsigned int n, float *val, float factor)
 }
 
 extern "C" void
-vector_scal_cuda (size_t size, float vector[], float factor)
+vector_scal_cuda (unsigned int size, float vector[], float factor)
 {
   unsigned threads_per_block = 64;
   unsigned nblocks = (size + threads_per_block - 1) / threads_per_block;

+ 14 - 3
sc_hypervisor/include/sc_hypervisor_lp.h

@@ -32,9 +32,7 @@ extern "C"
 #include <glpk.h>
 #endif //STARPU_HAVE_GLPK_H
 
-/* returns 1/tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
-double sc_hypervisor_lp_compute_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], double flops[nsched_ctxs], 
-				    double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
+struct sc_hypervisor_policy_task_pool; 
 
 /* returns tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax*/
 double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
@@ -63,6 +61,19 @@ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw
 					    double (*lp_estimated_distrib_func)(int ns, int nw, double draft_w_in_s[ns][nw], 
 									     unsigned is_integer, double tmax, void *specifc_data));
 
+#ifdef STARPU_HAVE_GLPK_H
+/* linear program that returns 1/tmax, and computes in table res the nr of workers needed by each context st 
+   the system ends up in the smallest tmax*/
+double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double velocity[nsched_ctxs][ntypes_of_workers], 
+					       double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]);
+
+/* linear program that simulates a distribution of tasks that minimises the execution time of the tasks in the pool */
+double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt],
+					       double times[nw][nt], unsigned is_integer, double tmax, int *in_sched_ctxs,
+					       struct sc_hypervisor_policy_task_pool *tmp_task_pools);
+
+#endif // STARPU_HAVE_GLPK_H
+
 #ifdef __cplusplus
 }
 #endif

+ 4 - 0
sc_hypervisor/include/sc_hypervisor_policy.h

@@ -52,6 +52,9 @@ void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32
 /* clone a task wrapper linked list */
 struct sc_hypervisor_policy_task_pool* sc_hypervisor_policy_clone_task_pool(struct sc_hypervisor_policy_task_pool *tp);
 
+/* get the execution time of the submitted tasks out of starpu's calibration files */
+void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools);
+
 /* find the context with the lowest priority in order to move some workers */
 unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move);
 
@@ -97,6 +100,7 @@ int sc_hypervisor_has_velocity_gap_btw_ctxs(void);
 /* get the list of workers grouped by type */
 void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_of_workers, int total_nw[ntypes_of_workers]);
 
+
 #ifdef __cplusplus
 }
 #endif

+ 1 - 0
sc_hypervisor/src/Makefile.am

@@ -27,6 +27,7 @@ libsc_hypervisor_la_SOURCES = 				\
 	policies_utils/policy_tools.c			\
 	policies_utils/task_pool.c			\
 	policies_utils/lp_tools.c			\
+	policies_utils/lp_programs.c			\
 	policies_utils/dichotomy.c			\
 	hypervisor_policies/idle_policy.c		\
 	hypervisor_policies/app_driven_policy.c		\

+ 24 - 276
sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c

@@ -34,53 +34,7 @@ struct teft_lp_data
 	unsigned size_ctxs;
 };
 
-static void _get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs)
-{
-        struct sc_hypervisor_policy_task_pool *tp;
-        int w, t;
-        for (w = 0; w < nw; w++)
-        {
-                for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
-                {
-			int worker = workers == NULL ? w : workers[w];
-                        enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(worker);
-                        double length = starpu_history_based_expected_perf(tp->cl->model, arch, tp->footprint);
-
-                        if (isnan(length))
-                                times[w][t] = NAN;
-			else
-			{
-                                times[w][t] = length / 1000.;
-
-				double transfer_time = 0.0;
-				enum starpu_archtype arch = starpu_worker_get_type(worker);
-				if(arch == STARPU_CUDA_WORKER)
-				{
-					unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
-					if(!worker_in_ctx && !size_ctxs)
-					{
-						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
-						transfer_time +=  (tp->footprint / transfer_velocity) / 1000. ;
-					}
-					double latency = starpu_get_latency_RAM_CUDA(worker);
-					transfer_time += latency/1000.;
-
-				}
-//				printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time);
-				times[w][t] += transfer_time;
-			}
-                }
-        }
-}
-
-
-
-/*
- * GNU Linear Programming Kit backend
- */
-#ifdef STARPU_HAVE_GLPK_H
-#include <glpk.h>
-static double _glp_resolve(int ns, int nw, double final_w_in_s[ns][nw], 
+static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw], 
 			   unsigned is_integer, double tmax, void *specific_data)
 {
 	struct teft_lp_data *sd = (struct teft_lp_data *)specific_data;
@@ -91,237 +45,23 @@ static double _glp_resolve(int ns, int nw, double final_w_in_s[ns][nw],
 	int *workers = sd->workers;
 	struct sc_hypervisor_policy_task_pool *tmp_task_pools = sd->tmp_task_pools;
 	unsigned size_ctxs = sd->size_ctxs;
-	
-	double w_in_s[ns][nw];
-	double tasks[nw][nt];
-	
+		
 	if(tmp_task_pools == NULL)
 		return 0.0;
-	struct sc_hypervisor_policy_task_pool * tp;
-	int t, w, s;
-	glp_prob *lp;
-
-	lp = glp_create_prob();
-	glp_set_prob_name(lp, "StarPU theoretical bound");
-	glp_set_obj_dir(lp, GLP_MAX);
-	glp_set_obj_name(lp, "total execution time");
-
-	{
-		double times[nw][nt];
-		int ne = nt * nw /* worker execution time */
-			+ nw * ns
-			+ nw * (nt + ns)
-			+ 1; /* glp dumbness */
-		int n = 1;
-		int ia[ne], ja[ne];
-		double ar[ne];
-
-		_get_tasks_times(nw, nt, times, workers, size_ctxs);
-
-		/* Variables: number of tasks i assigned to worker j, and tmax */
-		glp_add_cols(lp, nw*nt+ns*nw);
-#define colnum(w, t) ((t)*nw+(w)+1)
-		for(s = 0; s < ns; s++)
-			for(w = 0; w < nw; w++)
-				glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.);
-
-		for (w = 0; w < nw; w++)
-			for (t = 0; t < nt; t++)
-			{
-				char name[32];
-				snprintf(name, sizeof(name), "w%dt%dn", w, t);
-				glp_set_col_name(lp, colnum(w, t), name);
-/* 				if (integer) */
-/*                                 { */
-/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
-/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
-/*                                 } */
-/* 				else */
-					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
-			}
-		for(s = 0; s < ns; s++)
-			for(w = 0; w < nw; w++)
-			{
-				char name[32];
-				snprintf(name, sizeof(name), "w%ds%dn", w, s);
-				glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
-				if (is_integer)
-                                {
-                                        glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV);
-                                        glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1);
-                                }
-                                else
-					glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
-			}
-
-		int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
-
-		int curr_row_idx = 0;
-		/* Total worker execution time */
-		glp_add_rows(lp, nw*ns);
-		for (t = 0; t < nt; t++)
-		{
-			int someone = 0;
-			for (w = 0; w < nw; w++)
-				if (!isnan(times[w][t]))
-					someone = 1;
-			if (!someone)
-			{
-				/* This task does not have any performance model at all, abort */
-				printf("NO PERF MODELS\n");
-				glp_delete_prob(lp);
-				return 0.0;
-			}
-		}
-		/*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */
-		for(s = 0; s < ns; s++)
-		{
-			for (w = 0; w < nw; w++)
-			{
-				char name[32], title[64];
-				starpu_worker_get_name(w, name, sizeof(name));
-				snprintf(title, sizeof(title), "worker %s", name);
-				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
-				for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
-				{
-					if((int)tp->sched_ctx_id == sched_ctxs[s])
-					{
-						ia[n] = curr_row_idx+s*nw+w+1;
-						ja[n] = colnum(w, t);
-						if (isnan(times[w][t]))
-							ar[n] = 1000000000.;
-						else
-							ar[n] = times[w][t];
-						n++;
-					}
-				}
-				/* x[s][w] = 1 | 0 */
-				ia[n] = curr_row_idx+s*nw+w+1;
-				ja[n] = nw*nt+s*nw+w+1;
-				ar[n] = (-1) * tmax;
-				n++;
-				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
-			}
-		}
-
-		curr_row_idx += nw*ns;
-
-		/* Total task completion */
-		glp_add_rows(lp, nt);
-		for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
-		{
-			char name[32], title[64];
-			starpu_worker_get_name(w, name, sizeof(name));
-			snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
-			glp_set_row_name(lp, curr_row_idx+t+1, title);
-			for (w = 0; w < nw; w++)
-			{
-				ia[n] = curr_row_idx+t+1;
-				ja[n] = colnum(w, t);
-				ar[n] = 1;
-				n++;
-			}
-			glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n);
-		}
-
-		curr_row_idx += nt;
-
-		/* sum(x[s][i]) = 1 */
-		glp_add_rows(lp, nw);
-		for (w = 0; w < nw; w++)
-		{
-			char name[32], title[64];
-			starpu_worker_get_name(w, name, sizeof(name));
-			snprintf(title, sizeof(title), "w%x", w);
-			glp_set_row_name(lp, curr_row_idx+w+1, title);
-			for(s = 0; s < ns; s++)
-			{
-				ia[n] = curr_row_idx+w+1;
-				ja[n] = nw*nt+s*nw+w+1;
-				ar[n] = 1;
-				n++;
-			}
-			if(is_integer)
-                                glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
-			else
-				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
-		}
-		if(n != ne)
-			printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
-		STARPU_ASSERT(n == ne);
-
-		glp_load_matrix(lp, ne-1, ia, ja, ar);
-	}
-
-	glp_smcp parm;
-	glp_init_smcp(&parm);
-	parm.msg_lev = GLP_MSG_OFF;
-	int ret = glp_simplex(lp, &parm);
-
-/* 	char str[50]; */
-/* 	sprintf(str, "outpu_lp_%g", tmax); */
-
-/* 	glp_print_sol(lp, str); */
-
-	if (ret)
-	{
-		printf("error in simplex\n");
-		glp_delete_prob(lp);
-		lp = NULL;
-		return 0.0;
-	}
 
-	int stat = glp_get_prim_stat(lp);
-	/* if we don't have a solution return */
-	if(stat == GLP_NOFEAS)
-	{
-		glp_delete_prob(lp);
-//		printf("no_sol in tmax = %lf\n", tmax);
-		lp = NULL;
-		return 0.0;
-	}
-
-
-	if (is_integer)
-        {
-                glp_iocp iocp;
-                glp_init_iocp(&iocp);
-                iocp.msg_lev = GLP_MSG_OFF;
-		glp_intopt(lp, &iocp);
-		int stat = glp_mip_status(lp);
-		/* if we don't have a solution return */
-		if(stat == GLP_NOFEAS)
-		{
-//			printf("no int sol in tmax = %lf\n", tmax);
-			glp_delete_prob(lp);
-			lp = NULL;
-			return 0.0;
-		}
-	}
-
-	double res = glp_get_obj_val(lp);
-	for (w = 0; w < nw; w++)
-		for (t = 0; t < nt; t++)
-/* 			if (integer) */
-/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
-/*                         else */
-				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
+	double w_in_s[ns][nw];
+	double tasks[nw][nt];
+	double times[nw][nt];
 	
-//	printf("for tmax %lf\n", tmax);
-	for(s = 0; s < ns; s++)
-		for(w = 0; w < nw; w++)
-		{
-			if (is_integer)
-				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
-                        else
-				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
-//			printf("w_in_s[%d][%d]=%lf\n", s, w, w_in_s[s][w]);
-		}
-//	printf("\n");
+	sc_hypervisor_get_tasks_times(nw, nt, times, workers, size_ctxs, task_pools);
 
-	glp_delete_prob(lp);
+	double res = 0.0;
+#ifdef STARPU_HAVE_GLPK_H
+	res = sc_hypervisor_lp_simulate_distrib_tasks(ns, nw, nt, w_in_s, tasks, times, is_integer, tmax, in_sched_ctxs, tmp_task_pools);
+#endif //STARPU_HAVE_GLPK_H
 	if(res != 0.0)
 	{
+		int s, w, t;
 		for(s = 0; s < ns; s++)
 			for(w = 0; w < nw; w++)
 				final_w_in_s[s][w] = w_in_s[s][w];
@@ -344,7 +84,12 @@ static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nwor
 		nt++;
 
 	double w_in_s[ns][nw];
-	double tasks[nw][nt];
+//	double tasks[nw][nt];
+	double **tasks=(double**)malloc(nw*sizeof(double*));
+	int i;
+	for(i = 0; i < nw; i++)
+		tasks[i] = (double*)malloc(nt*sizeof(double));
+
 
 	struct teft_lp_data specific_data;
 	specific_data.nt = nt;
@@ -362,12 +107,17 @@ static void _size_ctxs(int *sched_ctxs, int nsched_ctxs , int *workers, int nwor
 	double tmin = smallest_tmax;
 
 	unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
-								tmin, tmax, smallest_tmax, _glp_resolve);
+								tmin, tmax, smallest_tmax, _compute_workers_distrib);
 
 	starpu_pthread_mutex_unlock(&mutex);
 	/* if we did find at least one solution redistribute the resources */
 	if(found_sol)
 		sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 1);
+	
+	for(i = 0; i < nw; i++)
+		free(tasks[i]);
+	free(tasks);
+
 }
 
 static void size_if_required()
@@ -460,7 +210,7 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 			double tmax = possible_tmax * ns;
 			double tmin = smallest_tmax;
 			unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, 
-								tmin, tmax, smallest_tmax, _glp_resolve);
+								tmin, tmax, smallest_tmax, _compute_workers_distrib);
 //			starpu_pthread_mutex_unlock(&mutex);
 
 			/* if we did find at least one solution redistribute the resources */
@@ -507,5 +257,3 @@ struct sc_hypervisor_policy teft_lp_policy = {
 	.custom = 0,
 	.name = "teft_lp"
 };
-
-#endif /* STARPU_HAVE_GLPK_H */

+ 450 - 0
sc_hypervisor/src/policies_utils/lp_programs.c

@@ -0,0 +1,450 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011 - 2013  INRIA
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+/*
+ * GNU Linear Programming Kit backend
+ */
+
+#include "sc_hypervisor_policy.h"
+#include "sc_hypervisor_lp.h"
+
+#ifdef STARPU_HAVE_GLPK_H
+
+double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt],
+					       double times[nw][nt], unsigned is_integer, double tmax, int *in_sched_ctxs,
+					       struct sc_hypervisor_policy_task_pool *tmp_task_pools)
+{
+	struct sc_hypervisor_policy_task_pool * tp;
+	int t, w, s;
+	glp_prob *lp;
+
+	lp = glp_create_prob();
+	glp_set_prob_name(lp, "StarPU theoretical bound");
+	glp_set_obj_dir(lp, GLP_MAX);
+	glp_set_obj_name(lp, "total execution time");
+
+	{
+		int ne = nt * nw /* worker execution time */
+			+ nw * ns
+			+ nw * (nt + ns)
+			+ 1; /* glp dumbness */
+		int n = 1;
+		int ia[ne], ja[ne];
+		double ar[ne];
+
+		/* Variables: number of tasks i assigned to worker j, and tmax */
+		glp_add_cols(lp, nw*nt+ns*nw);
+#define colnum(w, t) ((t)*nw+(w)+1)
+		for(s = 0; s < ns; s++)
+			for(w = 0; w < nw; w++)
+				glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.);
+
+		for (w = 0; w < nw; w++)
+			for (t = 0; t < nt; t++)
+			{
+				char name[32];
+				snprintf(name, sizeof(name), "w%dt%dn", w, t);
+				glp_set_col_name(lp, colnum(w, t), name);
+/* 				if (integer) */
+/*                                 { */
+/*                                         glp_set_col_kind(lp, colnum(w, t), GLP_IV); */
+/* 					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); */
+/*                                 } */
+/* 				else */
+					glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0);
+			}
+		for(s = 0; s < ns; s++)
+			for(w = 0; w < nw; w++)
+			{
+				char name[32];
+				snprintf(name, sizeof(name), "w%ds%dn", w, s);
+				glp_set_col_name(lp, nw*nt+s*nw+w+1, name);
+				if (is_integer)
+                                {
+                                        glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV);
+                                        glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1);
+                                }
+                                else
+					glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0);
+			}
+
+		int *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs;
+
+		int curr_row_idx = 0;
+		/* Total worker execution time */
+		glp_add_rows(lp, nw*ns);
+		for (t = 0; t < nt; t++)
+		{
+			int someone = 0;
+			for (w = 0; w < nw; w++)
+				if (!isnan(times[w][t]))
+					someone = 1;
+			if (!someone)
+			{
+				/* This task does not have any performance model at all, abort */
+				printf("NO PERF MODELS\n");
+				glp_delete_prob(lp);
+				return 0.0;
+			}
+		}
+		/*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */
+		for(s = 0; s < ns; s++)
+		{
+			for (w = 0; w < nw; w++)
+			{
+				char name[32], title[64];
+				starpu_worker_get_name(w, name, sizeof(name));
+				snprintf(title, sizeof(title), "worker %s", name);
+				glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title);
+				for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
+				{
+					if((int)tp->sched_ctx_id == sched_ctxs[s])
+					{
+						ia[n] = curr_row_idx+s*nw+w+1;
+						ja[n] = colnum(w, t);
+						if (isnan(times[w][t]))
+							ar[n] = 1000000000.;
+						else
+							ar[n] = times[w][t];
+						n++;
+					}
+				}
+				/* x[s][w] = 1 | 0 */
+				ia[n] = curr_row_idx+s*nw+w+1;
+				ja[n] = nw*nt+s*nw+w+1;
+				ar[n] = (-1) * tmax;
+				n++;
+				glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0);
+			}
+		}
+
+		curr_row_idx += nw*ns;
+
+		/* Total task completion */
+		glp_add_rows(lp, nt);
+		for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint);
+			glp_set_row_name(lp, curr_row_idx+t+1, title);
+			for (w = 0; w < nw; w++)
+			{
+				ia[n] = curr_row_idx+t+1;
+				ja[n] = colnum(w, t);
+				ar[n] = 1;
+				n++;
+			}
+			glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n);
+		}
+
+		curr_row_idx += nt;
+
+		/* sum(x[s][i]) = 1 */
+		glp_add_rows(lp, nw);
+		for (w = 0; w < nw; w++)
+		{
+			char name[32], title[64];
+			starpu_worker_get_name(w, name, sizeof(name));
+			snprintf(title, sizeof(title), "w%x", w);
+			glp_set_row_name(lp, curr_row_idx+w+1, title);
+			for(s = 0; s < ns; s++)
+			{
+				ia[n] = curr_row_idx+w+1;
+				ja[n] = nw*nt+s*nw+w+1;
+				ar[n] = 1;
+				n++;
+			}
+			if(is_integer)
+                                glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1);
+			else
+				glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0);
+		}
+		if(n != ne)
+			printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne);
+		STARPU_ASSERT(n == ne);
+
+		glp_load_matrix(lp, ne-1, ia, ja, ar);
+	}
+
+	glp_smcp parm;
+	glp_init_smcp(&parm);
+	parm.msg_lev = GLP_MSG_OFF;
+	int ret = glp_simplex(lp, &parm);
+
+/* 	char str[50]; */
+/* 	sprintf(str, "outpu_lp_%g", tmax); */
+
+/* 	glp_print_sol(lp, str); */
+
+	if (ret)
+	{
+		printf("error in simplex\n");
+		glp_delete_prob(lp);
+		lp = NULL;
+		return 0.0;
+	}
+
+	int stat = glp_get_prim_stat(lp);
+	/* if we don't have a solution return */
+	if(stat == GLP_NOFEAS)
+	{
+		glp_delete_prob(lp);
+//		printf("no_sol in tmax = %lf\n", tmax);
+		lp = NULL;
+		return 0.0;
+	}
+
+
+	if (is_integer)
+        {
+                glp_iocp iocp;
+                glp_init_iocp(&iocp);
+                iocp.msg_lev = GLP_MSG_OFF;
+		glp_intopt(lp, &iocp);
+		int stat = glp_mip_status(lp);
+		/* if we don't have a solution return */
+		if(stat == GLP_NOFEAS)
+		{
+//			printf("no int sol in tmax = %lf\n", tmax);
+			glp_delete_prob(lp);
+			lp = NULL;
+			return 0.0;
+		}
+	}
+
+	double res = glp_get_obj_val(lp);
+	for (w = 0; w < nw; w++)
+		for (t = 0; t < nt; t++)
+/* 			if (integer) */
+/* 				tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); */
+/*                         else */
+				tasks[w][t] = glp_get_col_prim(lp, colnum(w, t));
+	
+//	printf("for tmax %lf\n", tmax);
+	for(s = 0; s < ns; s++)
+		for(w = 0; w < nw; w++)
+		{
+			if (is_integer)
+				w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1);
+                        else
+				w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1);
+//			printf("w_in_s[%d][%d]=%lf\n", s, w, w_in_s[s][w]);
+		}
+//	printf("\n");
+
+	glp_delete_prob(lp);
+	return res;
+}
+
+double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
+{
+	int integer = 1;
+	int s, w;
+	glp_prob *lp;
+
+	int ne =
+		(ns*nw+1)*(ns+nw)
+		+ 1; /* glp dumbness */
+	int n = 1;
+	int ia[ne], ja[ne];
+	double ar[ne];
+
+	lp = glp_create_prob();
+
+	glp_set_prob_name(lp, "sample");
+	glp_set_obj_dir(lp, GLP_MAX);
+        glp_set_obj_name(lp, "max speed");
+
+	/* we add nw*ns columns one for each type of worker in each context
+	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
+	glp_add_cols(lp, nw*ns+1);
+
+	for(s = 0; s < ns; s++)
+	{
+		for(w = 0; w < nw; w++)
+		{
+			char name[32];
+			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
+			glp_set_col_name(lp, n, name);
+			if (integer)
+			{
+				glp_set_col_kind(lp, n, GLP_IV);
+				glp_set_col_bnds(lp, n, GLP_LO, 0, 0);
+			}
+			else
+				glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
+			n++;
+		}
+	}
+
+	/*1/tmax should belong to the interval [0.0;1.0]*/
+	glp_set_col_name(lp, n, "vmax");
+	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
+	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
+	glp_set_obj_coef(lp, n, 1.0);
+
+	n = 1;
+	/* one row corresponds to one ctx*/
+	glp_add_rows(lp, ns);
+
+	for(s = 0; s < ns; s++)
+	{
+		char name[32];
+		snprintf(name, sizeof(name), "ctx%d", s);
+		glp_set_row_name(lp, s+1, name);
+		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
+
+		for(w = 0; w < nw; w++)
+		{
+			int s2;
+			for(s2 = 0; s2 < ns; s2++)
+			{
+				if(s2 == s)
+				{
+					ia[n] = s+1;
+					ja[n] = w + nw*s2 + 1;
+					ar[n] = v[s][w];
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				else
+				{
+					ia[n] = s+1;
+					ja[n] = w + nw*s2 + 1;
+					ar[n] = 0.0;
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				n++;
+			}
+		}
+		/* 1/tmax */
+		ia[n] = s+1;
+		ja[n] = ns*nw+1;
+		ar[n] = (-1) * flops[s];
+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+		n++;
+	}
+
+	/*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
+	glp_add_rows(lp, nw);
+
+	for(w = 0; w < nw; w++)
+	{
+		char name[32];
+		snprintf(name, sizeof(name), "w%d", w);
+		glp_set_row_name(lp, ns+w+1, name);
+		for(s = 0; s < ns; s++)
+		{
+			int w2;
+			for(w2 = 0; w2 < nw; w2++)
+			{
+				if(w2 == w)
+				{
+					ia[n] = ns+w+1;
+					ja[n] = w2+s*nw + 1;
+					ar[n] = 1.0;
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				else
+				{
+					ia[n] = ns+w+1;
+					ja[n] = w2+s*nw + 1;
+					ar[n] = 0.0;
+//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+				}
+				n++;
+			}
+		}
+		/* 1/tmax */
+		ia[n] = ns+w+1;
+		ja[n] = ns*nw+1;
+		ar[n] = 0.0;
+//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
+		n++;
+
+		/*sum(all gpus) = 3*/
+		if(w == 0)
+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
+
+		/*sum(all cpus) = 9*/
+		if(w == 1)
+			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
+	}
+
+	STARPU_ASSERT(n == ne);
+
+	glp_load_matrix(lp, ne-1, ia, ja, ar);
+
+	glp_smcp parm;
+	glp_init_smcp(&parm);
+	parm.msg_lev = GLP_MSG_OFF;
+	int ret = glp_simplex(lp, &parm);
+	if (ret)
+        {
+                printf("error in simplex\n");
+		glp_delete_prob(lp);
+                lp = NULL;
+                return 0.0;
+        }
+
+	int stat = glp_get_prim_stat(lp);
+        /* if we don't have a solution return */
+        if(stat == GLP_NOFEAS)
+        {
+                glp_delete_prob(lp);
+//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
+                lp = NULL;
+                return 0.0;
+        }
+
+
+	if (integer)
+        {
+                glp_iocp iocp;
+                glp_init_iocp(&iocp);
+                iocp.msg_lev = GLP_MSG_OFF;
+                glp_intopt(lp, &iocp);
+                int stat = glp_mip_status(lp);
+                /* if we don't have a solution return */
+                if(stat == GLP_NOFEAS)
+                {
+//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
+                        glp_delete_prob(lp);
+                        lp = NULL;
+                        return 0.0;
+                }
+        }
+
+	double vmax = glp_get_obj_val(lp);
+
+	n = 1;
+	for(s = 0; s < ns; s++)
+	{
+		for(w = 0; w < nw; w++)
+		{
+			if (integer)
+                                res[s][w] = (double)glp_mip_col_val(lp, n);
+			else
+				res[s][w] = glp_get_col_prim(lp, n);
+//			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
+			n++;
+		}
+	}
+
+	glp_delete_prob(lp);
+	return vmax;
+}
+
+#endif // STARPU_HAVE_GLPK_H

+ 1 - 198
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -21,203 +21,6 @@
 
 #ifdef STARPU_HAVE_GLPK_H
 
-double sc_hypervisor_lp_compute_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int  total_nw[nw])
-{
-	int integer = 1;
-	int s, w;
-	glp_prob *lp;
-
-	int ne =
-		(ns*nw+1)*(ns+nw)
-		+ 1; /* glp dumbness */
-	int n = 1;
-	int ia[ne], ja[ne];
-	double ar[ne];
-
-	lp = glp_create_prob();
-
-	glp_set_prob_name(lp, "sample");
-	glp_set_obj_dir(lp, GLP_MAX);
-        glp_set_obj_name(lp, "max speed");
-
-	/* we add nw*ns columns one for each type of worker in each context
-	   and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/
-	glp_add_cols(lp, nw*ns+1);
-
-	for(s = 0; s < ns; s++)
-	{
-		for(w = 0; w < nw; w++)
-		{
-			char name[32];
-			snprintf(name, sizeof(name), "worker%dctx%d", w, s);
-			glp_set_col_name(lp, n, name);
-			if (integer)
-			{
-				glp_set_col_kind(lp, n, GLP_IV);
-				glp_set_col_bnds(lp, n, GLP_LO, 0, 0);
-			}
-			else
-				glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0);
-			n++;
-		}
-	}
-
-	/*1/tmax should belong to the interval [0.0;1.0]*/
-	glp_set_col_name(lp, n, "vmax");
-	glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0);
-	/* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */
-	glp_set_obj_coef(lp, n, 1.0);
-
-	n = 1;
-	/* one row corresponds to one ctx*/
-	glp_add_rows(lp, ns);
-
-	for(s = 0; s < ns; s++)
-	{
-		char name[32];
-		snprintf(name, sizeof(name), "ctx%d", s);
-		glp_set_row_name(lp, s+1, name);
-		glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.);
-
-		for(w = 0; w < nw; w++)
-		{
-			int s2;
-			for(s2 = 0; s2 < ns; s2++)
-			{
-				if(s2 == s)
-				{
-					ia[n] = s+1;
-					ja[n] = w + nw*s2 + 1;
-					ar[n] = v[s][w];
-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
-				}
-				else
-				{
-					ia[n] = s+1;
-					ja[n] = w + nw*s2 + 1;
-					ar[n] = 0.0;
-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
-				}
-				n++;
-			}
-		}
-		/* 1/tmax */
-		ia[n] = s+1;
-		ja[n] = ns*nw+1;
-		ar[n] = (-1) * flops[s];
-//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
-		n++;
-	}
-
-	/*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */
-	glp_add_rows(lp, nw);
-
-	for(w = 0; w < nw; w++)
-	{
-		char name[32];
-		snprintf(name, sizeof(name), "w%d", w);
-		glp_set_row_name(lp, ns+w+1, name);
-		for(s = 0; s < ns; s++)
-		{
-			int w2;
-			for(w2 = 0; w2 < nw; w2++)
-			{
-				if(w2 == w)
-				{
-					ia[n] = ns+w+1;
-					ja[n] = w2+s*nw + 1;
-					ar[n] = 1.0;
-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
-				}
-				else
-				{
-					ia[n] = ns+w+1;
-					ja[n] = w2+s*nw + 1;
-					ar[n] = 0.0;
-//					printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
-				}
-				n++;
-			}
-		}
-		/* 1/tmax */
-		ia[n] = ns+w+1;
-		ja[n] = ns*nw+1;
-		ar[n] = 0.0;
-//		printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]);
-		n++;
-
-		/*sum(all gpus) = 3*/
-		if(w == 0)
-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]);
-
-		/*sum(all cpus) = 9*/
-		if(w == 1)
-			glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]);
-	}
-
-	STARPU_ASSERT(n == ne);
-
-	glp_load_matrix(lp, ne-1, ia, ja, ar);
-
-	glp_smcp parm;
-	glp_init_smcp(&parm);
-	parm.msg_lev = GLP_MSG_OFF;
-	int ret = glp_simplex(lp, &parm);
-	if (ret)
-        {
-                printf("error in simplex\n");
-		glp_delete_prob(lp);
-                lp = NULL;
-                return 0.0;
-        }
-
-	int stat = glp_get_prim_stat(lp);
-        /* if we don't have a solution return */
-        if(stat == GLP_NOFEAS)
-        {
-                glp_delete_prob(lp);
-//              printf("no_sol in tmax = %lf\n", tmax);                                                                                                                                                             
-                lp = NULL;
-                return 0.0;
-        }
-
-
-	if (integer)
-        {
-                glp_iocp iocp;
-                glp_init_iocp(&iocp);
-                iocp.msg_lev = GLP_MSG_OFF;
-                glp_intopt(lp, &iocp);
-                int stat = glp_mip_status(lp);
-                /* if we don't have a solution return */
-                if(stat == GLP_NOFEAS)
-                {
-//                      printf("no int sol in tmax = %lf\n", tmax);                                                                                                                                                 
-                        glp_delete_prob(lp);
-                        lp = NULL;
-                        return 0.0;
-                }
-        }
-
-	double vmax = glp_get_obj_val(lp);
-
-	n = 1;
-	for(s = 0; s < ns; s++)
-	{
-		for(w = 0; w < nw; w++)
-		{
-			if (integer)
-                                res[s][w] = (double)glp_mip_col_val(lp, n);
-			else
-				res[s][w] = glp_get_col_prim(lp, n);
-//			printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]);
-			n++;
-		}
-	}
-
-	glp_delete_prob(lp);
-	return vmax;
-}
 
 #endif //STARPU_HAVE_GLPK_H
 
@@ -249,7 +52,7 @@ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_work
 //		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
 	}
 
-	return 1/sc_hypervisor_lp_compute_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
+	return 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
 #else//STARPU_HAVE_GLPK_H
 	return 0.0;
 #endif//STARPU_HAVE_GLPK_H

+ 40 - 0
sc_hypervisor/src/policies_utils/policy_tools.c

@@ -588,3 +588,43 @@ void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_
 			total_nw[0]++;
 	}
 }
+
+void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools)
+{
+        struct sc_hypervisor_policy_task_pool *tp;
+        int w, t;
+        for (w = 0; w < nw; w++)
+        {
+                for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+                {
+			int worker = workers == NULL ? w : workers[w];
+                        enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(worker);
+                        double length = starpu_history_based_expected_perf(tp->cl->model, arch, tp->footprint);
+
+                        if (isnan(length))
+                                times[w][t] = NAN;
+			else
+			{
+                                times[w][t] = length / 1000.;
+
+				double transfer_time = 0.0;
+				enum starpu_archtype arch = starpu_worker_get_type(worker);
+				if(arch == STARPU_CUDA_WORKER)
+				{
+					unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id);
+					if(!worker_in_ctx && !size_ctxs)
+					{
+						double transfer_velocity = starpu_get_bandwidth_RAM_CUDA(worker);
+						transfer_time +=  (tp->footprint / transfer_velocity) / 1000. ;
+					}
+					double latency = starpu_get_latency_RAM_CUDA(worker);
+					transfer_time += latency/1000.;
+
+				}
+//				printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time);
+				times[w][t] += transfer_time;
+			}
+                }
+        }
+}
+

+ 2 - 6
src/sched_policies/eager_central_policy.c

@@ -113,13 +113,9 @@ static struct starpu_task *pop_every_task_eager_policy(unsigned sched_ctx_id)
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	int workerid = starpu_worker_get_id();
 	
-	starpu_pthread_mutex_t *sched_mutex;
-	starpu_pthread_cond_t *sched_cond;
-	starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond);
-	
-	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
+	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	struct starpu_task* task = _starpu_fifo_pop_every_task(data->fifo, workerid);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
+	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	return task;
 }