6 years ago · db1951c1f2
--- a/ChangeLog
+++ b/ChangeLog
@@ -19,10 +19,21 @@
 
																 StarPU 1.4.0 (svn revision xxxx)
															
 
																 ==============================================
															
 
																 New features:
															
 
																+  * Fault tolerance support with starpu_task_ft_failed().
															
 
																 StarPU 1.3.2 (git revision xxx)
															
 
																 ==============================================
															
 
																+Small changes:
															
 
																+  * Improve OpenMP support to detect the environment is valid before
															
 
																+    launching OpenMP
															
 
																+  * Delete old code (drivers gordon, scc, starpu-top, and plugin gcc)
															
 
																+    and update authors file accordingly
															
 
																+  * Add Heteroprio documentation (including a simple example)
															
 
																+  * Add a progression hook, to be called when workers are idle, which
															
 
																+    is used in the NewMadeleine implementation of StarPU-MPI to ensure
															
 
																+    communications progress.
															
 
																+
															
 
																 StarPU 1.3.1 (git revision 01949488b4f8e6fe26d2c200293b8aae5876b038)
															
 
																 ==============================================
															
--- a/Makefile.am
+++ b/Makefile.am
@@ -120,6 +120,7 @@ noinst_HEADERS = \
 
																 	include/pthread_win32/semaphore.h
															
 
																 if STARPU_DEVEL
															
 
																+all-local:
															
 
																 	@if $(GREP) -r sys/time.h $$( find $(srcdir)/examples $(srcdir)/tests $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name timer.h -a \! -name loader.c ) ; \
															
 
																 	then \
															
 
																 		echo "Please do not include sys/time, it is not available on Windows, include starpu_util.h and use starpu_timing_now() instead" ; \
															
--- a/configure.ac
+++ b/configure.ac
@@ -789,6 +789,7 @@ AC_SEARCH_LIBS([sqrt],[m],,AC_MSG_ERROR([math library unavailable]))
 
																 AC_HAVE_LIBRARY([ws2_32])
															
 
																 AC_CHECK_FUNCS([sysconf])
															
 
																 AC_CHECK_FUNCS([getrlimit])
															
 
																+AC_CHECK_FUNCS([scandir])
															
 
																 AC_CHECK_FUNC([pthread_spin_lock], have_pthread_spin_lock=yes, have_pthread_spin_lock=no)
															
 
																 if test x$have_pthread_spin_lock = xyes; then
															
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -2,7 +2,7 @@
 
																 #
															
 
																 # Copyright (C) 2013-2018                                Inria
															
 
																 # Copyright (C) 2010-2019                                CNRS
															
 
																-# Copyright (C) 2009,2011,2013,2014,2017                 Université de Bordeaux
															
 
																+# Copyright (C) 2009,2011,2013,2014,2017,2019            Université de Bordeaux
															
 
																 #
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
 
																 # it under the terms of the GNU Lesser General Public License as published by
															
@@ -72,6 +72,7 @@ chapters =	\
 
																 	chapters/390_faq.doxy		\
															
 
																 	chapters/401_out_of_core.doxy		\
															
 
																 	chapters/410_mpi_support.doxy		\
															
 
																+	chapters/415_fault_tolerance.doxy	\
															
 
																 	chapters/420_fft_support.doxy		\
															
 
																 	chapters/430_mic_support.doxy		\
															
 
																 	chapters/450_native_fortran_support.doxy		\
															
--- a/doc/doxygen/chapters/000_introduction.doxy
+++ b/doc/doxygen/chapters/000_introduction.doxy
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2010-2019                                CNRS
															
 
																  * Copyright (C) 2011-2013,2016                           Inria
															
 
																- * Copyright (C) 2009-2011,2014,2016                      Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2011,2014,2016,2019                 Université de Bordeaux
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -290,6 +290,7 @@ The documentation chapters include
 
																 <ul>
															
 
																 <li> \ref OutOfCore
															
 
																 <li> \ref MPISupport
															
 
																+<li> \ref FaultTolerance
															
 
																 <li> \ref FFTSupport
															
 
																 <li> \ref MICSupport
															
 
																 <li> \ref NativeFortranSupport
															
--- a/doc/doxygen/chapters/101_building.doxy
+++ b/doc/doxygen/chapters/101_building.doxy
@@ -209,7 +209,7 @@ $ export LD_LIBRARY_PATH=$STARPU_PATH/lib:$LD_LIBRARY_PATH
 
																 And it is useful to get access to the StarPU tools:
															
 
																 \verbatim
															
 
																-$ PATH=$PATH:$STARPU_PATH/bin
															
 
																+$ export PATH=$PATH:$STARPU_PATH/bin
															
 
																 \endverbatim
															
 
																 It is then useful to check that StarPU executes correctly and finds your hardware:
															
--- a/doc/doxygen/chapters/380_offline_performance_tools.doxy
+++ b/doc/doxygen/chapters/380_offline_performance_tools.doxy
@@ -60,18 +60,7 @@ StarPU can use the FxT library (see
 
																 https://savannah.nongnu.org/projects/fkt/) to generate traces
															
 
																 with a limited runtime overhead.
															
 
																-You can either get a tarball:
															
 
																-
															
 
																-\verbatim
															
 
																-$ wget http://download.savannah.gnu.org/releases/fkt/fxt-0.2.11.tar.gz
															
 
																-\endverbatim
															
 
																-
															
 
																-or use the FxT library from CVS (autotools are required):
															
 
																-
															
 
																-\verbatim
															
 
																-$ cvs -d :pserver:anonymous\@cvs.sv.gnu.org:/sources/fkt co FxT
															
 
																-$ ./bootstrap
															
 
																-\endverbatim
															
 
																+You can get a tarball from http://download.savannah.gnu.org/releases/fkt/
															
 
																 Compiling and installing the FxT library in the <c>$FXTDIR</c> path is
															
 
																 done following the standard procedure:
															
--- a/doc/doxygen/chapters/415_fault_tolerance.doxy
+++ b/doc/doxygen/chapters/415_fault_tolerance.doxy
@@ -0,0 +1,48 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2019                                     Université de Bordeaux
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/*! \page FaultTolerance Fault Tolerance
															
 
																+
															
 
																+\section Introduction Introduction
															
 
																+
															
 
																+Due to e.g. hardware error, some tasks may fail, or even complete nodes may
															
 
																+fail.  For now, StarPU provides some support for failure of tasks.
															
 
																+
															
 
																+\section TaskRetry Retrying tasks
															
 
																+
															
 
																+In case a task implementation notices that it fail to compute properly, it can
															
 
																+call starpu_task_failed() to notify StarPU of the failure.
															
 
																+
															
 
																+<c>tests/fault-tolerance/retry.c</c> is an example of coping with such failure:
															
 
																+the principle is that when submitting the task, one sets its prologue callback
															
 
																+to starpu_task_ft_prologue(). That prologue will turn the task into a meta
															
 
																+task which will manage the repeated submission of try-tasks to perform the
															
 
																+computation until one of the computations succeeds.
															
 
																+
															
 
																+By default, try-tasks will be just retried until one of them succeeds (i.e. the
															
 
																+task implementation does not call starpu_task_failed()). One can change the
															
 
																+behavior by passing a <c>check_failsafe</c> function as prologue parameter,
															
 
																+which will be called at the end of the try-task attempt. It can look at
															
 
																+<c>starpu_task_get_current()->failed</c> to determine whether the try-task
															
 
																+suceeded, in which case it can call starpu_task_ft_success() on the meta-task to
															
 
																+notify success, or if it failed, in which case it can call
															
 
																+starpu_task_failsafe_create_retry() to create another try-task, and submit it
															
 
																+with starpu_task_submit_nodeps().
															
 
																+
															
 
																+This can however only work if the task input are not modified, and is thus not
															
 
																+supported for tasks with data access mode ::STARPU_RW.
															
 
																+
															
 
																+*/
															
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -2,7 +2,7 @@
 
																 %
															
 
																 % Copyright (C) 2013-2016,2018                           Inria
															
 
																 % Copyright (C) 2013-2019                                CNRS
															
 
																-% Copyright (C) 2014,2018                                Université de Bordeaux
															
 
																+% Copyright (C) 2014,2018-2019                                Université de Bordeaux
															
 
																 % Copyright (C) 2013                                     Simon Archipoff
															
 
																 %
															
 
																 % StarPU is free software; you can redistribute it and/or modify
															
@@ -154,6 +154,11 @@ Documentation License”.
 
																 \hypertarget{MPISupport}{}
															
 
																 \input{MPISupport}
															
 
																+\chapter{Fault Tolerance}
															
 
																+\label{FaultTolerance}
															
 
																+\hypertarget{FaultTolerance}{}
															
 
																+\input{FaultTolerance}
															
 
																+
															
 
																 \chapter{FFT Support}
															
 
																 \label{FFTSupport}
															
 
																 \hypertarget{FFTSupport}{}
															
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -304,6 +304,10 @@ if !NO_BLAS_LIB
 
																 STARPU_EXAMPLES +=				\
															
 
																 	mult/sgemm 				\
															
 
																 	mult/dgemm				\
															
 
																+	lu/lu_example_float			\
															
 
																+	lu/lu_example_double			\
															
 
																+	lu/lu_implicit_example_float		\
															
 
																+	lu/lu_implicit_example_double		\
															
 
																 	cholesky/cholesky_tag			\
															
 
																 	cholesky/cholesky_tile_tag		\
															
 
																 	cholesky/cholesky_implicit		\
															
@@ -313,10 +317,6 @@ if !STARPU_SIMGRID
 
																 STARPU_EXAMPLES +=				\
															
 
																 	axpy/axpy				\
															
 
																 	cholesky/cholesky_grain_tag		\
															
 
																-	lu/lu_example_float			\
															
 
																-	lu/lu_example_double			\
															
 
																-	lu/lu_implicit_example_float		\
															
 
																-	lu/lu_implicit_example_double		\
															
 
																 	heat/heat				\
															
 
																 	cg/cg					\
															
 
																 	pipeline/pipeline
															
--- a/examples/lu/lu_example.c
+++ b/examples/lu/lu_example.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2017                                Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2017,2019                           Université de Bordeaux
															
 
																  * Copyright (C) 2011,2012                                Inria
															
 
																  * Copyright (C) 2010-2013,2015-2018                      CNRS
															
 
																  *
															
@@ -176,7 +176,11 @@ void copy_matrix_into_blocks(void)
 
																 static void init_matrix(void)
															
 
																 {
															
 
																 	/* allocate matrix */
															
 
																+#ifdef STARPU_SIMGRID
															
 
																+	A = (void*) 1;
															
 
																+#else
															
 
																 	starpu_malloc_flags((void **)&A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
															
 
																+#endif
															
 
																 	STARPU_ASSERT(A);
															
 
																 	starpu_srand48((long int)time(NULL));
															
@@ -435,10 +439,10 @@ int main(int argc, char **argv)
 
																 		check_result();
															
 
																 	}
															
 
																-#endif
															
 
																 	if (pivot)
															
 
																 		free(ipiv);
															
 
																+#endif
															
 
																 	starpu_free_flags(A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
															
--- a/examples/lu/xlu_implicit_pivot.c
+++ b/examples/lu/xlu_implicit_pivot.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2015,2017,2018                      Université de Bordeaux
															
 
																+ * Copyright (C) 2010-2015,2017,2018-2019                 Université de Bordeaux
															
 
																  * Copyright (C) 2013                                     Inria
															
 
																  * Copyright (C) 2010                                     Mehdi Juhoor
															
 
																  * Copyright (C) 2010-2013,2015-2019                      CNRS
															
@@ -367,5 +367,6 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
																 		starpu_data_unregister(dataAp[bi+nblocks*bj]);
															
 
																 	}
															
 
																 	free(dataAp);
															
 
																+	free(piv_description);
															
 
																 	return ret;
															
 
																 }
															
--- a/examples/native_fortran/nf_example.f90
+++ b/examples/native_fortran/nf_example.f90
@@ -2,7 +2,7 @@
 
																 !
															
 
																 ! Copyright (C) 2017                                     CNRS
															
 
																 ! Copyright (C) 2015,2016                                Inria
															
 
																-! Copyright (C) 2016                                     Université de Bordeaux
															
 
																+! Copyright (C) 2016,2019                                Université de Bordeaux
															
 
																 ! Copyright (C) 2015                                     ONERA
															
 
																 !
															
 
																 ! StarPU is free software; you can redistribute it and/or modify
															
@@ -36,6 +36,7 @@ PROGRAM f90_example
 
																   INTEGER(KIND=C_INT)            :: starpu_maj,starpu_min,starpu_rev
															
 
																   INTEGER(KIND=C_INT)            :: neq,ng,nb,it,it_tot
															
 
																   REAL(KIND=C_DOUBLE)            :: r, coeff2
															
 
																+  REAL(KIND=C_DOUBLE),TARGET     :: flops
															
 
																   TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet
															
 
																   TYPE(C_PTR) :: cl_copy_element = C_NULL_PTR ! copy codelet
															
@@ -95,11 +96,13 @@ PROGRAM f90_example
 
																      ! compute new dro for each element
															
 
																      DO i = 1,Nelt
															
 
																         elt => mesh%elt(i)
															
 
																+        flops = elt%Ng * ( (elt%Np * numpar%Neq_max * 2) + 1 + elt%Np * numpar%Neq_max)
															
 
																         CALL fstarpu_insert_task((/ cl_loop_element,    &
															
 
																                 FSTARPU_VALUE, c_loc(numpar%coeff), FSTARPU_SZ_C_DOUBLE, &
															
 
																                 FSTARPU_R, elt%ro_h,                 &
															
 
																                 FSTARPU_RW, elt%dro_h,                &
															
 
																                 FSTARPU_R, elt%basis_h,              &
															
 
																+                FSTARPU_FLOPS, c_loc(flops),         &
															
 
																                 C_NULL_PTR /))
															
 
																      ENDDO
															
 
																      ! sync (if needed by the algorithm)
															
--- a/include/starpu_profiling.h
+++ b/include/starpu_profiling.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2014,2016,2017                      Université de Bordeaux
															
 
																- * Copyright (C) 2010,2011,2013,2015,2017,2019                 CNRS
															
 
																+ * Copyright (C) 2010-2014,2016,2017,2019                 Université de Bordeaux
															
 
																+ * Copyright (C) 2010,2011,2013,2015,2017,2019            CNRS
															
 
																  * Copyright (C) 2016                                     Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -118,6 +118,8 @@ struct starpu_profiling_worker_info
 
																 	/** Energy consumed by the worker, in Joules */
															
 
																 	double energy_consumed;
															
 
																+	/* TODO: add wasted time due to failed tasks */
															
 
																+
															
 
																 	double flops;
															
 
																 };
															
--- a/include/starpu_stdlib.h
+++ b/include/starpu_stdlib.h
@@ -242,6 +242,7 @@ void starpu_memory_deallocate(unsigned node, size_t size);
 
																 void starpu_memory_wait_available(unsigned node, size_t size);
															
 
																 void starpu_sleep(float nb_sec);
															
 
																+void starpu_usleep(float nb_micro_sec);
															
 
																 /** @} */
															
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -919,6 +919,13 @@ struct starpu_task
 
																 	unsigned no_submitorder:1;
															
 
																 	/**
															
 
																+	   Whether this task has failed and will thus have to be retried
															
 
																+
															
 
																+	   Set by StarPU.
															
 
																+	*/
															
 
																+	unsigned failed:1;
															
 
																+
															
 
																+	/**
															
 
																 	   Whether the scheduler has pushed the task on some queue
															
 
																 	   Set by StarPU.
															
@@ -1348,6 +1355,15 @@ void starpu_task_destroy(struct starpu_task *task);
 
																 int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
															
 
																 /**
															
 
																+   Submit \p task to StarPU with dependency bypass.
															
 
																+
															
 
																+   This can only be called on behalf of another task which has already taken the
															
 
																+   proper dependencies, e.g. this task is just an attempt of doing the actual
															
 
																+   computation of that task.
															
 
																+*/
															
 
																+int starpu_task_submit_nodeps(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
															
 
																+
															
 
																+/**
															
 
																    Submit \p task to the context \p sched_ctx_id. By default,
															
 
																    starpu_task_submit() submits the task to a global context that is
															
 
																    created automatically by StarPU.
															
@@ -1504,6 +1520,57 @@ unsigned starpu_task_get_implementation(struct starpu_task *task);
 
																  */
															
 
																 void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg);
															
 
																+
															
 
																+
															
 
																+
															
 
																+/**
															
 
																+   Function to be used as a prologue callback to enable fault tolerance for the
															
 
																+   task. This prologue will create a try-task, i.e a duplicate of the task,
															
 
																+   which will to the actual computation.
															
 
																+
															
 
																+   The prologue argument can be set to a check_ft function that will be
															
 
																+   called on termination of the duplicate, which can check the result of the
															
 
																+   task, and either confirm success, or resubmit another attempt.
															
 
																+   If it is not set, the default implementation is to just resubmit a new
															
 
																+   try-task.
															
 
																+ */
															
 
																+void starpu_task_ft_prologue(void *check_ft);
															
 
																+
															
 
																+
															
 
																+/**
															
 
																+   Create a try-task for a \p meta_task, given a \p template_task task
															
 
																+   template. The meta task can be passed as template on the first call, but
															
 
																+   since it is mangled by starpu_task_ft_create_retry(), further calls
															
 
																+   (typically made by the check_ft callback) need to be passed the previous
															
 
																+   try-task as template task.
															
 
																+
															
 
																+   \p check_ft is similar to the prologue argument of
															
 
																+   starpu_task_ft_prologue(), and is typicall set to the very function calling
															
 
																+   starpu_task_ft_create_retry().
															
 
																+
															
 
																+   The try-task is returned, and can be modified (e.g. to change scheduling
															
 
																+   parameters) before being submitted with starpu_task_submit_nodeps().
															
 
																+ */
															
 
																+struct starpu_task * starpu_task_ft_create_retry(const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void*));
															
 
																+
															
 
																+/**
															
 
																+   Record that this task failed, and should thus be retried.
															
 
																+   This is usually called from the task codelet function itself, after checking
															
 
																+   the result and noticing that the computation went wrong, and thus the task
															
 
																+   should be retried. The performance of this task execution will not be
															
 
																+   recorded for performance models.
															
 
																+
															
 
																+   This can only be called for a task whose data access modes are either
															
 
																+   STARPU_R and STARPU_W.
															
 
																+ */
															
 
																+void starpu_task_ft_failed(struct starpu_task *task);
															
 
																+
															
 
																+/**
															
 
																+   Notify that the try-task was successful and thus the meta-task was
															
 
																+   successful.
															
 
																+ */
															
 
																+void starpu_task_ft_success(struct starpu_task *meta_task);
															
 
																+
															
 
																 /** @} */
															
 
																 #ifdef __cplusplus
															
--- a/src/common/utils.c
+++ b/src/common/utils.c
@@ -549,6 +549,19 @@ void starpu_sleep(float nb_sec)
 
																 #endif
															
 
																 }
															
 
																+void starpu_usleep(float nb_micro_sec)
															
 
																+{
															
 
																+#ifdef STARPU_SIMGRID
															
 
																+	MSG_process_sleep(nb_micro_sec / 1000000);
															
 
																+#elif defined(STARPU_HAVE_WINDOWS)
															
 
																+	Sleep(nb_micro_sec / 1000);
															
 
																+#elif HAVE_UNISTD_H
															
 
																+	usleep(nb_micro_sec);
															
 
																+#else
															
 
																+#error no implementation of usleep
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																 char *starpu_getenv(const char *str)
															
 
																 {
															
 
																 #ifndef STARPU_SIMGRID
															
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2013,2015-2017                           Inria
															
 
																- * Copyright (C) 2009-2015,2017,2018                      Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2015,2017,2018-2019                      Université de Bordeaux
															
 
																  * Copyright (C) 2010-2013,2015,2017,2018,2019            CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -241,6 +241,60 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
																 }
															
 
																+/* Take a data, without waiting for it to be available (it is assumed to be).
															
 
																+ * This is typicall used for nodeps tasks, for which a previous task has already
															
 
																+ * waited for the proper conditions, and we just need to take another reference
															
 
																+ * for overall reference coherency.
															
 
																+/* No lock is held, this acquires and releases the handle header lock */
															
 
																+static void _starpu_take_data(unsigned request_from_codelet,
															
 
																+						       starpu_data_handle_t handle, enum starpu_data_access_mode mode,
															
 
																+						       struct _starpu_job *j)
															
 
																+{
															
 
																+	STARPU_ASSERT_MSG(!handle->arbiter, "TODO");
															
 
																+
															
 
																+	/* Do not care about some flags */
															
 
																+	mode &= ~STARPU_COMMUTE;
															
 
																+	mode &= ~STARPU_SSEND;
															
 
																+	mode &= ~STARPU_LOCALITY;
															
 
																+	if (mode == STARPU_RW)
															
 
																+		mode = STARPU_W;
															
 
																+
															
 
																+	/* Take the lock protecting the header. We try to do some progression
															
 
																+	 * in case this is called from a worker, otherwise we just wait for the
															
 
																+	 * lock to be available. */
															
 
																+	if (request_from_codelet)
															
 
																+	{
															
 
																+		int cpt = 0;
															
 
																+		while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
															
 
																+		{
															
 
																+			cpt++;
															
 
																+			_starpu_datawizard_progress(0);
															
 
																+		}
															
 
																+		if (cpt == STARPU_SPIN_MAXTRY)
															
 
																+			_starpu_spin_lock(&handle->header_lock);
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		_starpu_spin_lock(&handle->header_lock);
															
 
																+	}
															
 
																+
															
 
																+	/* If we are currently performing a reduction, we freeze any request
															
 
																+	 * that is not explicitely a reduction task. */
															
 
																+	unsigned is_a_reduction_task = (request_from_codelet && j && j->reduction_task);
															
 
																+
															
 
																+	STARPU_ASSERT_MSG(!is_a_reduction_task, "TODO");
															
 
																+
															
 
																+	enum starpu_data_access_mode previous_mode = handle->current_mode;
															
 
																+
															
 
																+	STARPU_ASSERT_MSG(mode == previous_mode, "mode was %d, but requested %d", previous_mode, mode);
															
 
																+
															
 
																+	handle->refcnt++;
															
 
																+	handle->busy_count++;
															
 
																+
															
 
																+	_starpu_spin_unlock(&handle->header_lock);
															
 
																+}
															
 
																+
															
 
																+
															
 
																 /* No lock is held */
															
 
																 unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_data_access_mode mode,
															
 
																 							  void (*callback)(void *), void *argcb)
															
@@ -260,7 +314,7 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 
																 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
															
 
																 }
															
 
																-/* Acquire all data of the given job, one by one in handle pointer value order
															
 
																+/* Try to acquire all data of the given job, one by one in handle pointer value order
															
 
																  */
															
 
																 /* No lock is held */
															
 
																 static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned start_buffer_index)
															
@@ -301,6 +355,50 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 
																 	return 0;
															
 
																 }
															
 
																+static void take_data_from_job(struct _starpu_job *j, unsigned buffer_index)
															
 
																+{
															
 
																+	/* Note that we do not access j->task->handles, but j->ordered_buffers
															
 
																+	 * which is a sorted copy of it. */
															
 
																+	struct _starpu_data_descr *buffer = &(_STARPU_JOB_GET_ORDERED_BUFFERS(j)[buffer_index]);
															
 
																+	starpu_data_handle_t handle = buffer->handle;
															
 
																+	enum starpu_data_access_mode mode = buffer->mode & ~STARPU_COMMUTE;
															
 
																+
															
 
																+	_starpu_take_data(1, handle, mode, j);
															
 
																+}
															
 
																+
															
 
																+/* Immediately acquire all data of the given job, one by one in handle pointer value order
															
 
																+ */
															
 
																+/* No lock is held */
															
 
																+static void _submit_job_take_data_deps(struct _starpu_job *j, unsigned start_buffer_index)
															
 
																+{
															
 
																+	unsigned buf;
															
 
																+
															
 
																+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task);
															
 
																+	for (buf = start_buffer_index; buf < nbuffers; buf++)
															
 
																+	{
															
 
																+		starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf);
															
 
																+		if (buf)
															
 
																+		{
															
 
																+			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf-1);
															
 
																+			if (handle_m1 == handle)
															
 
																+				/* We have already requested this data, skip it. This
															
 
																+				 * depends on ordering putting writes before reads, see
															
 
																+				 * _starpu_compar_handles.  */
															
 
																+				continue;
															
 
																+		}
															
 
																+
															
 
																+		if(handle->arbiter)
															
 
																+		{
															
 
																+			/* We arrived on an arbitered data, we stop and proceed
															
 
																+			 * with the arbiter second step.  */
															
 
																+			STARPU_ASSERT_MSG(0, "TODO");
															
 
																+			//_starpu_submit_job_take_arbitered_deps(j, buf, nbuffers);
															
 
																+		}
															
 
																+
															
 
																+                take_data_from_job(j, buf);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																 /* This is called when the tag+task dependencies are to be finished releasing.  */
															
 
																 void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data)
															
 
																 {
															
@@ -363,7 +461,7 @@ void _starpu_job_set_ordered_buffers(struct _starpu_job *j)
 
																 }
															
 
																 /* Sort the data used by the given job by handle pointer value order, and
															
 
																- * acquire them in that order */
															
 
																+ * try to acquire them in that order */
															
 
																 /* No  lock is held */
															
 
																 unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
															
 
																 {
															
@@ -391,6 +489,19 @@ static unsigned unlock_one_requester(struct _starpu_data_requester *r)
 
																 		return 0;
															
 
																 }
															
 
																+/* Sort the data used by the given job by handle pointer value order, and
															
 
																+ * immediately acquire them in that order */
															
 
																+/* No  lock is held */
															
 
																+void _starpu_submit_job_take_data_deps(struct _starpu_job *j)
															
 
																+{
															
 
																+	struct starpu_codelet *cl = j->task->cl;
															
 
																+
															
 
																+	if ((cl == NULL) || (STARPU_TASK_GET_NBUFFERS(j->task) == 0))
															
 
																+		return;
															
 
																+
															
 
																+	_submit_job_take_data_deps(j, 0);
															
 
																+}
															
 
																+
															
 
																 /* This is called when a task is finished with a piece of data
															
 
																  * (or on starpu_data_release)
															
 
																  *
															
--- a/src/core/dependencies/data_concurrency.h
+++ b/src/core/dependencies/data_concurrency.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2015                                     Inria
															
 
																- * Copyright (C) 2009-2012,2014,2015,2018                 Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2012,2014,2015,2018-2019                 Université de Bordeaux
															
 
																  * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -25,6 +25,7 @@ void _starpu_job_set_ordered_buffers(struct _starpu_job *j);
 
																 unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j);
															
 
																 void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers);
															
 
																+void _starpu_submit_job_take_data_deps(struct _starpu_job *j);
															
 
																 void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data);
															
 
																 int _starpu_notify_data_dependencies(starpu_data_handle_t handle);
															
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011-2017                                Inria
															
 
																- * Copyright (C) 2008-2018                                Université de Bordeaux
															
 
																+ * Copyright (C) 2008-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2010-2019                                CNRS
															
 
																  * Copyright (C) 2013                                     Thibaut Lambert
															
 
																  * Copyright (C) 2011                                     Télécom-SudParis
															
@@ -519,7 +519,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
																 			{
															
 
																 				/* We reuse the same job structure */
															
 
																 				task->status = STARPU_TASK_BLOCKED;
															
 
																-				int ret = _starpu_submit_job(j);
															
 
																+				int ret = _starpu_submit_job(j, 0);
															
 
																 				STARPU_ASSERT(!ret);
															
 
																 			}
															
 
																 #ifdef STARPU_OPENMP
															
@@ -700,6 +700,20 @@ unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j)
 
																 }
															
 
																 #endif
															
 
																+unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j)
															
 
																+{
															
 
																+	unsigned ret;
															
 
																+	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+
															
 
																+	/* Take references */
															
 
																+	_starpu_submit_job_take_data_deps(j);
															
 
																+
															
 
																+	/* And immediately push task */
															
 
																+	ret = _starpu_push_task(j);
															
 
																+
															
 
																+	return ret;
															
 
																+}
															
 
																+
															
 
																 /* This is called when a tag or task dependency is to be released.  */
															
 
																 void _starpu_enforce_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data, int tag)
															
 
																 {
															
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2008-2018                                Université de Bordeaux
															
 
																+ * Copyright (C) 2008-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2011,2014                                Inria
															
 
																  * Copyright (C) 2010,2011,2013-2015,2017,2018,2019       CNRS
															
 
																  * Copyright (C) 2013                                     Thibaut Lambert
															
@@ -255,6 +255,7 @@ unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j);
 
																 /* When waking up a continuation, we only enforce new task dependencies */
															
 
																 unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j);
															
 
																 #endif
															
 
																+unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j);
															
 
																 void _starpu_enforce_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data, int tag);
															
 
																 /* Called at the submission of the job */
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1408,7 +1408,7 @@ void starpu_perfmodel_directory(FILE *output)
 
																  * the performance model files */
															
 
																 int starpu_perfmodel_list(FILE *output)
															
 
																 {
															
 
																-#if !defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__)
															
 
																+#ifdef HAVE_SCANDIR
															
 
																         char *path;
															
 
																 	struct dirent **list;
															
 
																 	int n;
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011-2019                                Inria
															
 
																- * Copyright (C) 2009-2018                                Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2017                                     Erwan Leria
															
 
																  * Copyright (C) 2010-2019                                CNRS
															
 
																  * Copyright (C) 2013                                     Thibaut Lambert
															
@@ -478,7 +478,7 @@ int _starpu_task_test_termination(struct starpu_task *task)
 
																 /* NB in case we have a regenerable task, it is possible that the job was
															
 
																  * already counted. */
															
 
																-int _starpu_submit_job(struct _starpu_job *j)
															
 
																+int _starpu_submit_job(struct _starpu_job *j, int nodeps)
															
 
																 {
															
 
																 	struct starpu_task *task = j->task;
															
 
																 	int ret;
															
@@ -552,15 +552,22 @@ int _starpu_submit_job(struct _starpu_job *j)
 
																 	}
															
 
																 #endif
															
 
																-#ifdef STARPU_OPENMP
															
 
																-	if (continuation)
															
 
																+	if (nodeps)
															
 
																 	{
															
 
																-		ret = _starpu_reenforce_task_deps_and_schedule(j);
															
 
																+		ret = _starpu_take_deps_and_schedule(j);
															
 
																 	}
															
 
																 	else
															
 
																-#endif
															
 
																 	{
															
 
																-		ret = _starpu_enforce_deps_and_schedule(j);
															
 
																+#ifdef STARPU_OPENMP
															
 
																+		if (continuation)
															
 
																+		{
															
 
																+			ret = _starpu_reenforce_task_deps_and_schedule(j);
															
 
																+		}
															
 
																+		else
															
 
																+#endif
															
 
																+		{
															
 
																+			ret = _starpu_enforce_deps_and_schedule(j);
															
 
																+		}
															
 
																 	}
															
 
																 	_STARPU_LOG_OUT();
															
@@ -810,7 +817,7 @@ static int _starpu_task_submit_head(struct starpu_task *task)
 
																 }
															
 
																 /* application should submit new tasks to StarPU through this function */
															
 
																-int starpu_task_submit(struct starpu_task *task)
															
 
																+int _starpu_task_submit(struct starpu_task *task, int nodeps)
															
 
																 {
															
 
																 	_STARPU_LOG_IN();
															
 
																 	STARPU_ASSERT(task);
															
@@ -826,6 +833,7 @@ int starpu_task_submit(struct starpu_task *task)
 
																 	}
															
 
																 	unsigned is_sync = task->synchronous;
															
 
																 	starpu_task_bundle_t bundle = task->bundle;
															
 
																+	STARPU_ASSERT_MSG(!(nodeps && bundle), "not supported\n");
															
 
																 	/* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a
															
 
																 	* task structure, it is possible that this job structure was already
															
 
																 	* allocated. */
															
@@ -854,6 +862,7 @@ int starpu_task_submit(struct starpu_task *task)
 
																 			_starpu_perf_counter_update_per_codelet_sample(task->cl);
															
 
																 		}
															
 
																 	}
															
 
																+	STARPU_ASSERT_MSG(!(nodeps && continuation), "not supported\n");
															
 
																 	if (!j->internal)
															
 
																 	{
															
@@ -889,7 +898,8 @@ int starpu_task_submit(struct starpu_task *task)
 
																 	if (task->cl && !continuation)
															
 
																 	{
															
 
																 		_starpu_job_set_ordered_buffers(j);
															
 
																-		_starpu_detect_implicit_data_deps(task);
															
 
																+		if (!nodeps)
															
 
																+			_starpu_detect_implicit_data_deps(task);
															
 
																 	}
															
 
																 	if (bundle)
															
@@ -930,7 +940,7 @@ int starpu_task_submit(struct starpu_task *task)
 
																 	if (profiling)
															
 
																 		_starpu_clock_gettime(&info->submit_time);
															
 
																-	ret = _starpu_submit_job(j);
															
 
																+	ret = _starpu_submit_job(j, nodeps);
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 	if (_starpu_simgrid_task_submit_cost())
															
 
																 		MSG_process_sleep(0.000001);
															
@@ -949,6 +959,11 @@ int starpu_task_submit(struct starpu_task *task)
 
																 	return ret;
															
 
																 }
															
 
																+int starpu_task_submit(struct starpu_task *task)
															
 
																+{
															
 
																+	return _starpu_task_submit(task, 0);
															
 
																+}
															
 
																+
															
 
																 int _starpu_task_submit_internally(struct starpu_task *task)
															
 
																 {
															
 
																 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
@@ -965,27 +980,9 @@ int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id)
 
																 /* The StarPU core can submit tasks directly to the scheduler or a worker,
															
 
																  * skipping dependencies completely (when it knows what it is doing).  */
															
 
																-int _starpu_task_submit_nodeps(struct starpu_task *task)
															
 
																+int starpu_task_submit_nodeps(struct starpu_task *task)
															
 
																 {
															
 
																-	int ret = _starpu_task_submit_head(task);
															
 
																-	STARPU_ASSERT(ret == 0);
															
 
																-
															
 
																-	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																-
															
 
																-	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
															
 
																-	_starpu_sched_task_submit(task);
															
 
																-
															
 
																-	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-	_starpu_handle_job_submission(j);
															
 
																-	_starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task);
															
 
																-	if (task->cl)
															
 
																-		/* This would be done by data dependencies checking */
															
 
																-		_starpu_job_set_ordered_buffers(j);
															
 
																-	STARPU_ASSERT(task->status == STARPU_TASK_BLOCKED);
															
 
																-	task->status = STARPU_TASK_READY;
															
 
																-	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																-
															
 
																-	return _starpu_push_task(j);
															
 
																+	return _starpu_task_submit(task, 1);
															
 
																 }
															
 
																 /*
															
@@ -1570,3 +1567,102 @@ void _starpu_watchdog_shutdown(void)
 
																 	STARPU_PTHREAD_JOIN(watchdog_thread, NULL);
															
 
																 }
															
 
																+
															
 
																+static void _starpu_ft_check_support(const struct starpu_task *task)
															
 
																+{
															
 
																+	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
															
 
																+	unsigned i;
															
 
																+
															
 
																+	for (i = 0; i < nbuffers; i++)
															
 
																+	{
															
 
																+		enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, i);
															
 
																+		STARPU_ASSERT_MSG (mode == STARPU_R || mode == STARPU_W,
															
 
																+				"starpu_task_failed is only supported for tasks with access modes STARPU_R and STARPU_W");
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+struct starpu_task *starpu_task_ft_create_retry
															
 
																+(const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void *))
															
 
																+{
															
 
																+	/* Create a new task to actually perform the result */
															
 
																+	struct starpu_task *new_task = starpu_task_create();
															
 
																+
															
 
																+	*new_task = *template_task;
															
 
																+	new_task->prologue_callback_func = NULL;
															
 
																+	/* XXX: cl_arg needs to be duplicated */
															
 
																+	STARPU_ASSERT_MSG(!meta_task->cl_arg_free || !meta_task->cl_arg, "not supported yet");
															
 
																+	STARPU_ASSERT_MSG(!meta_task->callback_func, "not supported");
															
 
																+	new_task->callback_func = check_ft;
															
 
																+	new_task->callback_arg = (void*) meta_task;
															
 
																+	new_task->callback_arg_free = 0;
															
 
																+	new_task->prologue_callback_arg_free = 0;
															
 
																+	STARPU_ASSERT_MSG(!new_task->prologue_callback_pop_arg_free, "not supported");
															
 
																+	new_task->use_tag = 0;
															
 
																+	new_task->synchronous = 0;
															
 
																+	new_task->destroy = 1;
															
 
																+	new_task->regenerate = 0;
															
 
																+	new_task->no_submitorder = 1;
															
 
																+	new_task->failed = 0;
															
 
																+	new_task->status = STARPU_TASK_INVALID;
															
 
																+	new_task->profiling_info = NULL;
															
 
																+	new_task->prev = NULL;
															
 
																+	new_task->next = NULL;
															
 
																+	new_task->starpu_private = NULL;
															
 
																+	new_task->omp_task = NULL;
															
 
																+
															
 
																+	return new_task;
															
 
																+}
															
 
																+
															
 
																+static void _starpu_default_check_ft(void *arg)
															
 
																+{
															
 
																+	struct starpu_task *meta_task = arg;
															
 
																+	struct starpu_task *current_task = starpu_task_get_current();
															
 
																+	struct starpu_task *new_task;
															
 
																+	int ret;
															
 
																+
															
 
																+	if (!current_task->failed)
															
 
																+	{
															
 
																+		starpu_task_ft_success(meta_task);
															
 
																+		return;
															
 
																+	}
															
 
																+
															
 
																+	new_task = starpu_task_ft_create_retry
															
 
																+(meta_task, current_task, _starpu_default_check_ft);
															
 
																+
															
 
																+	ret = starpu_task_submit_nodeps(new_task);
															
 
																+	STARPU_ASSERT(!ret);
															
 
																+}
															
 
																+
															
 
																+void starpu_task_ft_prologue(void *arg)
															
 
																+{
															
 
																+	struct starpu_task *meta_task = starpu_task_get_current();
															
 
																+	struct starpu_task *new_task;
															
 
																+	void (*check_ft)(void*) = arg;
															
 
																+	int ret;
															
 
																+
															
 
																+	if (!check_ft)
															
 
																+		check_ft = _starpu_default_check_ft;
															
 
																+
															
 
																+	/* Create a task which will do the actual computation */
															
 
																+	new_task = starpu_task_ft_create_retry
															
 
																+(meta_task, meta_task, check_ft);
															
 
																+
															
 
																+	ret = starpu_task_submit_nodeps(new_task);
															
 
																+	STARPU_ASSERT(!ret);
															
 
																+
															
 
																+	/* Make the parent task wait for the result getting correct */
															
 
																+	starpu_task_end_dep_add(meta_task, 1);
															
 
																+	meta_task->where = STARPU_NOWHERE;
															
 
																+}
															
 
																+
															
 
																+void starpu_task_ft_failed(struct starpu_task *task)
															
 
																+{
															
 
																+	_starpu_ft_check_support(task);
															
 
																+
															
 
																+	task->failed = 1;
															
 
																+}
															
 
																+
															
 
																+void starpu_task_ft_success(struct starpu_task *meta_task)
															
 
																+{
															
 
																+	starpu_task_end_dep_release(meta_task);
															
 
																+}
															
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011-2014,2016,2017                      Inria
															
 
																- * Copyright (C) 2009-2018                                Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2010-2017, 2019                          CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -39,11 +39,7 @@ void _starpu_task_init(void);
 
																 void _starpu_task_deinit(void);
															
 
																 void _starpu_set_current_task(struct starpu_task *task);
															
 
																-/* NB the second argument makes it possible to count regenerable tasks only
															
 
																- * once. */
															
 
																-int _starpu_submit_job(struct _starpu_job *j);
															
 
																-
															
 
																-int _starpu_task_submit_nodeps(struct starpu_task *task);
															
 
																+int _starpu_submit_job(struct _starpu_job *j, int nodeps);
															
 
																 void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check);
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -353,32 +353,38 @@ struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d)
 
																 	for (workerid = 0; workerid < nworkers; workerid++)
															
 
																 	{
															
 
																-		struct _starpu_worker *worker;
															
 
																-		worker = _starpu_get_worker_struct(workerid);
															
 
																-
															
 
																 		if (starpu_worker_get_type(workerid) == d->type)
															
 
																 		{
															
 
																+			struct _starpu_worker *worker;
															
 
																+			worker = _starpu_get_worker_struct(workerid);
															
 
																 			switch (d->type)
															
 
																 			{
															
 
																+#ifdef STARPU_USE_CPU
															
 
																 			case STARPU_CPU_WORKER:
															
 
																-			{
															
 
																-				if (worker->driver.id.cpu_id == d->id.cpu_id)
															
 
																+				if (worker->devid == d->id.cpu_id)
															
 
																 					return worker;
															
 
																 				break;
															
 
																-			}
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																 			case STARPU_OPENCL_WORKER:
															
 
																 			{
															
 
																-				if (worker->driver.id.opencl_id == d->id.opencl_id)
															
 
																+				cl_device_id device;
															
 
																+				starpu_opencl_get_device(worker->devid, &device);
															
 
																+				if (device == d->id.opencl_id)
															
 
																 					return worker;
															
 
																 				break;
															
 
																 			}
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																 			case STARPU_CUDA_WORKER:
															
 
																 			{
															
 
																-				if (worker->driver.id.cuda_id == d->id.cuda_id)
															
 
																+				if (worker->devid == d->id.cuda_id)
															
 
																 					return worker;
															
 
																 				break;
															
 
																 			}
															
 
																+#endif
															
 
																+
															
 
																 			default:
															
 
																 				(void) worker;
															
 
																 				_STARPU_DEBUG("Invalid device type\n");
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -810,18 +810,18 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 		_starpu_init_worker_queue(workerarg);
															
 
																-		struct starpu_driver *driver = &(workerarg->driver);
															
 
																-		driver->type = workerarg->arch;
															
 
																+		struct starpu_driver driver;
															
 
																+		driver.type = workerarg->arch;
															
 
																 		switch (workerarg->arch)
															
 
																 		{
															
 
																 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
															
 
																 			case STARPU_CPU_WORKER:
															
 
																-				driver->id.cpu_id = devid;
															
 
																+			{
															
 
																+				driver.id.cpu_id = devid;
															
 
																 				workerarg->driver_ops = &_starpu_driver_cpu_ops;
															
 
																 				workerarg->wait_for_worker_initialization = 1;
															
 
																-				workerarg->may_launch_driver = _starpu_may_launch_driver(&pconfig->conf, driver);
															
 
																-				if (workerarg->may_launch_driver)
															
 
																+				if (_starpu_may_launch_driver(&pconfig->conf, &driver))
															
 
																 				{
															
 
																 					STARPU_PTHREAD_CREATE_ON(
															
 
																 						"CPU",
															
@@ -836,11 +836,13 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 					workerarg->run_by_starpu = 0;
															
 
																 				}
															
 
																 				break;
															
 
																+			}
															
 
																 #endif
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
															
 
																 			case STARPU_CUDA_WORKER:
															
 
																-				driver->id.cuda_id = devid;
															
 
																+			{
															
 
																+				driver.id.cuda_id = devid;
															
 
																 				workerarg->driver_ops = &_starpu_driver_cuda_ops;
															
 
																 				struct _starpu_worker_set *worker_set = workerarg->set;
															
@@ -852,9 +854,8 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 				worker_set->set_is_initialized = 0;
															
 
																 				worker_set->wait_for_set_initialization = 1;
															
 
																 				workerarg->wait_for_worker_initialization = 0;
															
 
																-				workerarg->may_launch_driver = _starpu_may_launch_driver(&pconfig->conf, driver);
															
 
																-				if (workerarg->may_launch_driver)
															
 
																+				if (_starpu_may_launch_driver(&pconfig->conf, &driver))
															
 
																 				{
															
 
																 					STARPU_PTHREAD_CREATE_ON(
															
 
																 						"CUDA",
															
@@ -869,17 +870,18 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 					workerarg->run_by_starpu = 0;
															
 
																 				}
															
 
																 				break;
															
 
																+			}
															
 
																 #endif
															
 
																 #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
															
 
																 			case STARPU_OPENCL_WORKER:
															
 
																+			{
															
 
																 #ifndef STARPU_SIMGRID
															
 
																-				starpu_opencl_get_device(devid, &driver->id.opencl_id);
															
 
																+				starpu_opencl_get_device(devid, &driver.id.opencl_id);
															
 
																 				workerarg->driver_ops = &_starpu_driver_opencl_ops;
															
 
																 				workerarg->wait_for_worker_initialization = 1;
															
 
																-				workerarg->may_launch_driver = _starpu_may_launch_driver(&pconfig->conf, driver);
															
 
																-				if (workerarg->may_launch_driver)
															
 
																+				if (_starpu_may_launch_driver(&pconfig->conf, &driver))
															
 
																 				{
															
 
																 					STARPU_PTHREAD_CREATE_ON(
															
 
																 						"OpenCL",
															
@@ -895,10 +897,12 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 				}
															
 
																 #endif
															
 
																 				break;
															
 
																+			}
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_MIC
															
 
																 			case STARPU_MIC_WORKER:
															
 
																+			{
															
 
																 				/* We spawn only one thread
															
 
																 				 * per MIC device, which will control all MIC
															
 
																 				 * workers of this device. (by using a worker set). */
															
@@ -919,10 +923,12 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 						_starpu_simgrid_get_host_by_worker(workerarg));
															
 
																 				break;
															
 
																+			}
															
 
																 #endif /* STARPU_USE_MIC */
															
 
																 #ifdef STARPU_USE_MPI_MASTER_SLAVE
															
 
																 			case STARPU_MPI_MS_WORKER:
															
 
																+			{
															
 
																 				/* We spawn only one thread
															
 
																 				 * per MPI device, which will control all MPI
															
 
																 				 * workers of this device. (by using a worker set). */
															
@@ -950,6 +956,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 #endif /* STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD */
															
 
																 				break;
															
 
																+			}
															
 
																 #endif /* STARPU_USE_MPI_MASTER_SLAVE */
															
 
																 			default:
															
@@ -1741,24 +1748,43 @@ unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, stru
 
																 	if (worker->state_changing_ctx_notice)
															
 
																 		return 0;
															
 
																-	if (worker->driver.type == STARPU_CPU_WORKER || worker->driver.type == STARPU_CUDA_WORKER || worker->driver.type == STARPU_OPENCL_WORKER)
															
 
																+	unsigned can_block = 1;
															
 
																+
															
 
																+	struct starpu_driver driver;
															
 
																+	driver.type = worker->arch;
															
 
																+	switch (driver.type)
															
 
																 	{
															
 
																-		if (worker->may_launch_driver == 0)
															
 
																-			return 0;
															
 
																+	case STARPU_CPU_WORKER:
															
 
																+		driver.id.cpu_id = worker->devid;
															
 
																+		break;
															
 
																+	case STARPU_CUDA_WORKER:
															
 
																+		driver.id.cuda_id = worker->devid;
															
 
																+		break;
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	case STARPU_OPENCL_WORKER:
															
 
																+		starpu_opencl_get_device(worker->devid, &driver.id.opencl_id);
															
 
																+		break;
															
 
																+#endif
															
 
																+	default:
															
 
																+		goto always_launch;
															
 
																 	}
															
 
																-	else
															
 
																-	{
															
 
																+	if (!_starpu_may_launch_driver(&_starpu_config.conf, &driver))
															
 
																+		return 0;
															
 
																+
															
 
																+always_launch:
															
 
																+
															
 
																 #ifndef STARPU_SIMGRID
															
 
																-		if (!_starpu_check_that_no_data_request_exists(memnode))
															
 
																-			return 0;
															
 
																+	if (!_starpu_check_that_no_data_request_exists(memnode))
															
 
																+		can_block = 0;
															
 
																 #endif
															
 
																-		if (!_starpu_machine_is_running())
															
 
																-			return 0;
															
 
																-		if (!_starpu_execute_registered_progression_hooks())
															
 
																-			return 0;
															
 
																-	}
															
 
																-	return 1;
															
 
																+	if (!_starpu_machine_is_running())
															
 
																+		can_block = 0;
															
 
																+
															
 
																+	if (!_starpu_execute_registered_progression_hooks())
															
 
																+		can_block = 0;
															
 
																+
															
 
																+	return can_block;
															
 
																 #endif
															
 
																 }
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -155,8 +155,6 @@ LIST_TYPE(_starpu_worker,
 
																 	char short_name[32];
															
 
																 	unsigned run_by_starpu; /**< Is this run by StarPU or directly by the application ? */
															
 
																 	struct _starpu_driver_ops *driver_ops;
															
 
																-	struct starpu_driver driver;
															
 
																-	unsigned may_launch_driver;
															
 
																 	struct _starpu_sched_ctx_list *sched_ctx_list;
															
 
																 	int tmp_sched_ctx;
															
--- a/src/datawizard/filters.h
+++ b/src/datawizard/filters.h
@@ -26,12 +26,6 @@
 
																 #include <starpu.h>
															
 
																 #include <common/config.h>
															
 
																-void
															
 
																-_starpu_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts,
															
 
																-					     size_t elemsize, unsigned id,
															
 
																-					     unsigned ld, unsigned *chunk_size,
															
 
																-					     size_t *offset);
															
 
																-
															
 
																 /* submit asynchronous unpartitioning / partitioning to make target active read-only or read-write */
															
 
																 void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write);
															
--- a/src/datawizard/interfaces/block_filters.c
+++ b/src/datawizard/interfaces/block_filters.c
@@ -1,8 +1,8 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2012                                     Inria
															
 
																- * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
															
 
																- * Copyright (C) 2011-2014,2016, 2019                           Université de Bordeaux
															
 
																+ * Copyright (C) 2010,2011,2013,2015,2017,2019            CNRS
															
 
																+ * Copyright (C) 2011-2014,2016, 2019                     Université de Bordeaux
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -21,7 +21,7 @@
 
																 #include <datawizard/filters.h>
															
 
																 void starpu_block_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
															
 
																-                                    unsigned id, unsigned nparts)
															
 
																+			       unsigned id, unsigned nparts)
															
 
																 {
															
 
																         struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface;
															
 
																         struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface;
															
@@ -31,12 +31,11 @@ void starpu_block_filter_block(void *father_interface, void *child_interface, ST
 
																         uint32_t nz = block_father->nz;
															
 
																 	size_t elemsize = block_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nparts <= nx, "%u parts for %u elements", nparts, nx);
															
 
																+	STARPU_ASSERT_MSG(nparts <= nx, "cannot split %u elements in %u parts", nx, nparts);
															
 
																 	uint32_t chunk_size;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1,
															
 
																-				       &chunk_size, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1, &chunk_size, &offset);
															
 
																 	STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__);
															
 
																 	block_child->id = block_father->id;
															
@@ -57,7 +56,7 @@ void starpu_block_filter_block(void *father_interface, void *child_interface, ST
 
																 }
															
 
																 void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
															
 
																-                                    unsigned id, unsigned nparts)
															
 
																+				      unsigned id, unsigned nparts)
															
 
																 {
															
 
																         struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface;
															
 
																         struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface;
															
@@ -70,13 +69,11 @@ void starpu_block_filter_block_shadow(void *father_interface, void *child_interf
 
																         uint32_t nz = block_father->nz;
															
 
																 	size_t elemsize = block_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nparts <= nx, "%u parts for %u elements", nparts, nx);
															
 
																+	STARPU_ASSERT_MSG(nparts <= nx, "cannot split %u elements in %u parts", nx, nparts);
															
 
																 	uint32_t child_nx;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1,
															
 
																-						     &child_nx, &offset);
															
 
																-	
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1, &child_nx, &offset);
															
 
																 	STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__);
															
 
																 	block_child->id = block_father->id;
															
@@ -97,7 +94,7 @@ void starpu_block_filter_block_shadow(void *father_interface, void *child_interf
 
																 }
															
 
																 void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
															
 
																-                                    unsigned id, unsigned nparts)
															
 
																+					unsigned id, unsigned nparts)
															
 
																 {
															
 
																         struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface;
															
 
																         struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface;
															
@@ -107,12 +104,11 @@ void starpu_block_filter_vertical_block(void *father_interface, void *child_inte
 
																         uint32_t nz = block_father->nz;
															
 
																 	size_t elemsize = block_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nparts <= ny, "%u parts for %u elements", nparts, ny);
															
 
																+	STARPU_ASSERT_MSG(nparts <= ny, "cannot split %u elements in %u parts", ny, nparts);
															
 
																 	uint32_t child_ny;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nparts, elemsize, id, block_father->ldy,
															
 
																-				       &child_ny, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nparts, elemsize, id, block_father->ldy, &child_ny, &offset);
															
 
																 	STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__);
															
 
																 	block_child->id = block_father->id;
															
@@ -133,7 +129,7 @@ void starpu_block_filter_vertical_block(void *father_interface, void *child_inte
 
																 }
															
 
																 void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
															
 
																-                                    unsigned id, unsigned nparts)
															
 
																+					       unsigned id, unsigned nparts)
															
 
																 {
															
 
																         struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface;
															
 
																         struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface;
															
@@ -146,14 +142,12 @@ void starpu_block_filter_vertical_block_shadow(void *father_interface, void *chi
 
																         uint32_t nz = block_father->nz;
															
 
																 	size_t elemsize = block_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nparts <= ny, "%u parts for %u elements", nparts, ny);
															
 
																+	STARPU_ASSERT_MSG(nparts <= ny, "cannot split %u elements in %u parts", ny, nparts);
															
 
																 	uint32_t child_ny;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nparts, elemsize, id,
															
 
																-						     block_father->ldy,
															
 
																-						     &child_ny, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nparts, elemsize, id, block_father->ldy, &child_ny, &offset);
															
 
																 	STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__);
															
 
																 	block_child->id = block_father->id;
															
@@ -174,7 +168,7 @@ void starpu_block_filter_vertical_block_shadow(void *father_interface, void *chi
 
																 }
															
 
																 void starpu_block_filter_depth_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
															
 
																-                                    unsigned id, unsigned nparts)
															
 
																+				     unsigned id, unsigned nparts)
															
 
																 {
															
 
																         struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface;
															
 
																         struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface;
															
@@ -184,7 +178,7 @@ void starpu_block_filter_depth_block(void *father_interface, void *child_interfa
 
																         uint32_t nz = block_father->nz;
															
 
																 	size_t elemsize = block_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nparts <= nz, "%u parts for %u elements", nparts, nz);
															
 
																+	STARPU_ASSERT_MSG(nparts <= nz, "cannot split %u elements in %u parts", nz, nparts);
															
 
																 	uint32_t child_nz;
															
 
																 	size_t offset;
															
@@ -211,7 +205,7 @@ void starpu_block_filter_depth_block(void *father_interface, void *child_interfa
 
																 }
															
 
																 void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f,
															
 
																-                                    unsigned id, unsigned nparts)
															
 
																+					    unsigned id, unsigned nparts)
															
 
																 {
															
 
																         struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface;
															
 
																         struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface;
															
@@ -224,14 +218,12 @@ void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_
 
																         uint32_t nz = block_father->nz - 2 * shadow_size;
															
 
																 	size_t elemsize = block_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nparts <= nz, "%u parts for %u elements", nparts, nz);
															
 
																+	STARPU_ASSERT_MSG(nparts <= nz, "cannot split %u elements into %u parts", nz, nparts);
															
 
																 	uint32_t child_nz;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nz, nparts, elemsize, id,
															
 
																-						     block_father->ldz,
															
 
																-						     &child_nz, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nz, nparts, elemsize, id, block_father->ldz, &child_nz, &offset);
															
 
																 	STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__);
															
 
																 	block_child->id = block_father->id;
															
--- a/src/datawizard/interfaces/matrix_filters.c
+++ b/src/datawizard/interfaces/matrix_filters.c
@@ -3,7 +3,7 @@
 
																  * Copyright (C) 2008-2014,2016,2019                      Université de Bordeaux
															
 
																  * Copyright (C) 2012                                     Inria
															
 
																  * Copyright (C) 2010                                     Mehdi Juhoor
															
 
																- * Copyright (C) 2010,2011,2013,2015,2017                 CNRS
															
 
																+ * Copyright (C) 2010,2011,2013,2015,2017,2019            CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -33,13 +33,12 @@ void starpu_matrix_filter_block(void *father_interface, void *child_interface, S
 
																 	uint32_t ny = matrix_father->ny;
															
 
																 	size_t elemsize = matrix_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx);
															
 
																+	STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks);
															
 
																 	uint32_t child_nx;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
															
 
																-						     &child_nx, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset);
															
 
																 	STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__);
															
@@ -77,13 +76,12 @@ void starpu_matrix_filter_block_shadow(void *father_interface, void *child_inter
 
																 	uint32_t ny = matrix_father->ny;
															
 
																 	size_t elemsize = matrix_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx);
															
 
																+	STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks);
															
 
																 	uint32_t child_nx;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
															
 
																-						     &child_nx, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset);
															
 
																 	child_nx += 2 * shadow_size;
															
@@ -117,14 +115,12 @@ void starpu_matrix_filter_vertical_block(void *father_interface, void *child_int
 
																 	uint32_t ny = matrix_father->ny;
															
 
																 	size_t elemsize = matrix_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nchunks <= ny, "%u parts for %u elements", nchunks, ny);
															
 
																+	STARPU_ASSERT_MSG(nchunks <= ny, "cannot split %u elements in %u parts", ny, nchunks);
															
 
																 	uint32_t child_ny;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nchunks, elemsize, id,
															
 
																-						     matrix_father->ld,
															
 
																-						     &child_ny, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nchunks, elemsize, id, matrix_father->ld, &child_ny, &offset);
															
 
																 	STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__);
															
 
																 	matrix_child->id = matrix_father->id;
															
@@ -157,14 +153,12 @@ void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *ch
 
																 	uint32_t ny = matrix_father->ny - 2 * shadow_size;
															
 
																 	size_t elemsize = matrix_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nchunks <= ny, "%u parts for %u elements", nchunks, ny);
															
 
																+	STARPU_ASSERT_MSG(nchunks <= ny, "cannot split %u elements in %u parts", ny, nchunks);
															
 
																 	uint32_t child_ny;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nchunks, elemsize, id,
															
 
																-						     matrix_father->ld,
															
 
																-						     &child_ny, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(ny, nchunks, elemsize, id, matrix_father->ld, &child_ny, &offset);
															
 
																 	child_ny += 2 * shadow_size;
															
 
																 	STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__);
															
@@ -172,7 +166,7 @@ void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *ch
 
																 	matrix_child->nx = nx;
															
 
																 	matrix_child->ny = child_ny;
															
 
																 	matrix_child->elemsize = elemsize;
															
 
																-	STARPU_ASSERT_MSG(matrix_father->allocsize == matrix_father->nx * matrix_father->ny * matrix_father->elemsize, "partitioning matrix with non-trivial allocsize not supported yet, patch welcome");
															
 
																+	STARPU_ASSERT_MSG(matrix_father->allocsize == matrix_father->nx * matrix_father->ny * matrix_father->elemsize, "partitioning matrix with non-trivial allocsize not supported yet, patch welcomed");
															
 
																 	matrix_child->allocsize = matrix_child->nx * matrix_child->ny * elemsize;
															
 
																 	/* is the information on this node valid ? */
															
--- a/src/datawizard/interfaces/vector_filters.c
+++ b/src/datawizard/interfaces/vector_filters.c
@@ -3,7 +3,7 @@
 
																  * Copyright (C) 2008-2014,2016,2017,2019                 Université de Bordeaux
															
 
																  * Copyright (C) 2012                                     Inria
															
 
																  * Copyright (C) 2010                                     Mehdi Juhoor
															
 
																- * Copyright (C) 2010,2011,2013,2015-2017                 CNRS
															
 
																+ * Copyright (C) 2010,2011,2013,2015-2017,2019            CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -29,12 +29,11 @@ void starpu_vector_filter_block(void *father_interface, void *child_interface, S
 
																 	uint32_t nx = vector_father->nx;
															
 
																 	size_t elemsize = vector_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx);
															
 
																+	STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks);
															
 
																 	uint32_t child_nx;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
															
 
																-						     &child_nx, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset);
															
 
																 	STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__);
															
 
																 	vector_child->id = vector_father->id;
															
@@ -64,12 +63,11 @@ void starpu_vector_filter_block_shadow(void *father_interface, void *child_inter
 
																 	uint32_t nx = vector_father->nx - 2 * shadow_size;
															
 
																 	size_t elemsize = vector_father->elemsize;
															
 
																-	STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx);
															
 
																+	STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks);
															
 
																 	uint32_t child_nx;
															
 
																 	size_t offset;
															
 
																-	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
															
 
																-						     &child_nx, &offset);
															
 
																+	starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset);
															
 
																 	child_nx += 2*shadow_size;
															
 
																 	STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__);
															
@@ -155,7 +153,7 @@ void starpu_vector_filter_list_long(void *father_interface, void *child_interfac
 
																 	vector_child->id = vector_father->id;
															
 
																 	vector_child->nx = chunk_size;
															
 
																 	vector_child->elemsize = elemsize;
															
 
																-	STARPU_ASSERT_MSG(vector_father->allocsize == vector_father->nx * vector_father->elemsize, "partitioning vector with non-trival allocsize not supported yet, patch welcome");
															
 
																+	STARPU_ASSERT_MSG(vector_father->allocsize == vector_father->nx * vector_father->elemsize, "partitioning vector with non-trival allocsize not supported yet, patch welcomed");
															
 
																 	vector_child->allocsize = vector_child->nx * elemsize;
															
 
																 	if (vector_father->dev_handle)
															
@@ -188,7 +186,7 @@ void starpu_vector_filter_list(void *father_interface, void *child_interface, st
 
																 	vector_child->id = vector_father->id;
															
 
																 	vector_child->nx = chunk_size;
															
 
																 	vector_child->elemsize = elemsize;
															
 
																-	STARPU_ASSERT_MSG(vector_father->allocsize == vector_father->nx * vector_father->elemsize, "partitioning vector with non-trival allocsize not supported yet, patch welcome");
															
 
																+	STARPU_ASSERT_MSG(vector_father->allocsize == vector_father->nx * vector_father->elemsize, "partitioning vector with non-trival allocsize not supported yet, patch welcomed");
															
 
																 	vector_child->allocsize = vector_child->nx * elemsize;
															
 
																 	if (vector_father->dev_handle)
															
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011-2017                                Inria
															
 
																- * Copyright (C) 2009-2018                                Université de Bordeaux
															
 
																+ * Copyright (C) 2009-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2013                                     Joris Pablo
															
 
																  * Copyright (C) 2017,2018                                Federal University of Rio Grande do Sul (UFRGS)
															
 
																  * Copyright (C) 2011-2019                                CNRS
															
@@ -1609,7 +1609,7 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
																 		int i;
															
 
																 		for (i = 0; i < last_codelet_parameter[worker] && i < MAX_PARAMETERS; i++)
															
 
																 		{
															
 
																-			eaten += snprintf(parameters + eaten, sizeof(parameters) - eaten - 1, "%s%s", i?"_":"", last_codelet_parameter_description[worker][i]);
															
 
																+			eaten += snprintf(parameters + eaten, sizeof(parameters) - eaten - 1, "%s%s", i?" ":"", last_codelet_parameter_description[worker][i]);
															
 
																 		}
															
 
																 	}
															
 
																 	parameters[sizeof(parameters)-1] = 0;
															
@@ -1641,6 +1641,12 @@ static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
																 		char *prefix = options->file_prefix;
															
 
																 		unsigned sched_ctx = ev->param[0];
															
 
																+		/* Paje won't like spaces, replace with underscores */
															
 
																+		char *c;
															
 
																+		for (c = parameters; *c; c++)
															
 
																+			if (*c == ' ')
															
 
																+				*c = '_';
															
 
																+
															
 
																 		worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], job_id, ((double) task->kflops) / 1000000, X, Y, Z, task->iterations[0], task->iterations[1], options);
															
 
																 		if (sched_ctx != 0)
															
 
																 		{
															
@@ -4478,6 +4484,7 @@ struct parse_task
 
																 {
															
 
																 	unsigned exec_time;
															
 
																 	unsigned data_total;
															
 
																+	unsigned workerid;
															
 
																 	char *codelet_name;
															
 
																 };
															
@@ -4515,7 +4522,7 @@ static void write_task(struct parse_task pt)
 
																 		fprintf(codelet_list, "%s\n", codelet_name);
															
 
																 	}
															
 
																 	double time = pt.exec_time * NANO_SEC_TO_MILI_SEC;
															
 
																-	fprintf(kernel->file, "%lf %u\n", time, pt.data_total);
															
 
																+	fprintf(kernel->file, "%lf %u %u\n", time, pt.data_total, pt.workerid);
															
 
																 }
															
 
																 void starpu_fxt_write_data_trace(char *filename_in)
															
@@ -4570,6 +4577,7 @@ void starpu_fxt_write_data_trace(char *filename_in)
 
																 		case _STARPU_FUT_START_CODELET_BODY:
															
 
																 			workerid = ev.param[2];
															
 
																+			tasks[workerid].workerid = (unsigned)workerid;
															
 
																 			tasks[workerid].exec_time = ev.time;
															
 
																 			has_name = ev.param[4];
															
 
																 			tasks[workerid].codelet_name = strdup(has_name ? get_fxt_string(&ev, 5): "unknown");
															
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011-2017                                Inria
															
 
																- * Copyright (C) 2010-2018                                Université de Bordeaux
															
 
																+ * Copyright (C) 2010-2019                                Université de Bordeaux
															
 
																  * Copyright (C) 2010-2017, 2019                          CNRS
															
 
																  * Copyright (C) 2013                                     Thibaut Lambert
															
 
																  * Copyright (C) 2011                                     Télécom-SudParis
															
@@ -268,9 +268,12 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
																 				do_update_time_model = 1;
															
 
																 			}
															
 
																 #else
															
 
																-			const unsigned do_update_time_model = 1;
															
 
																+			unsigned do_update_time_model = 1;
															
 
																 			const double time_consumed = measured;
															
 
																 #endif
															
 
																+			if (j->task->failed)
															
 
																+				/* Do not record perfmodel for failed tasks, they may terminate earlier */
															
 
																+				do_update_time_model = 0;
															
 
																 			if (do_update_time_model)
															
 
																 			{
															
 
																 				_starpu_update_perfmodel_history(j, j->task->cl->model, perf_arch, worker->devid, time_consumed, j->nimpl);
															
@@ -301,9 +304,12 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
																 		}
															
 
																 #else
															
 
																 		const double energy_consumed = profiling_info->energy_consumed;
															
 
																-		const unsigned do_update_energy_model = 1;
															
 
																+		unsigned do_update_energy_model = 1;
															
 
																 #endif
															
 
																+		if (j->task->failed)
															
 
																+			/* Do not record perfmodel for failed tasks, they may terminate earlier */
															
 
																+			do_update_energy_model = 0;
															
 
																 		if (do_update_energy_model)
															
 
																 		{
															
 
																 			_starpu_update_perfmodel_history(j, j->task->cl->energy_model, perf_arch, worker->devid, energy_consumed, j->nimpl);
															
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -327,6 +327,7 @@ myPROGRAMS +=				\
 
																 	disk/mem_reclaim			\
															
 
																 	errorcheck/invalid_blocking_calls	\
															
 
																 	errorcheck/workers_cpuid		\
															
 
																+	fault-tolerance/retry			\
															
 
																 	helper/starpu_data_cpy			\
															
 
																 	helper/starpu_create_sync_task		\
															
 
																 	microbenchs/async_tasks_overhead	\
															
--- a/tests/fault-tolerance/retry.c
+++ b/tests/fault-tolerance/retry.c
@@ -0,0 +1,123 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011-2013,2015,2017                      CNRS
															
 
																+ * Copyright (C) 2017                                     Inria
															
 
																+ * Copyright (C) 2019                                     Université de Bordeaux
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/*
															
 
																+ * This tests the fault tolerance interface: it submits a tasks which repeatedly
															
 
																+ * fails until being eventually successful
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																+
															
 
																+/* This task fakes some repeated errors  */
															
 
																+static int retry;
															
 
																+void cpu_increment(void *descr[], void *arg)
															
 
																+{
															
 
																+	(void)arg;
															
 
																+	unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	unsigned *var2 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
 
																+	FPRINTF(stderr,"computing\n");
															
 
																+	*var2 = *var + 1;
															
 
																+	if (retry < 10)
															
 
																+	{
															
 
																+		FPRINTF(stderr,"failing\n");
															
 
																+		retry++;
															
 
																+		/* Fake failure */
															
 
																+		starpu_task_ft_failed(starpu_task_get_current());
															
 
																+	}
															
 
																+	else
															
 
																+		FPRINTF(stderr,"succeed\n");
															
 
																+}
															
 
																+
															
 
																+static struct starpu_codelet my_codelet =
															
 
																+{
															
 
																+	.cpu_funcs = {cpu_increment},
															
 
																+	.cpu_funcs_name = {"cpu_increment"},
															
 
																+	.modes = { STARPU_R, STARPU_W },
															
 
																+	.nbuffers = 2
															
 
																+};
															
 
																+
															
 
																+/* This implements the retry strategy
															
 
																+ * (Identical to the default implementation: just retry) */
															
 
																+static void check_ft(void *arg)
															
 
																+{
															
 
																+	struct starpu_task *meta_task = arg;
															
 
																+	struct starpu_task *current_task = starpu_task_get_current();
															
 
																+	struct starpu_task *new_task;
															
 
																+	int ret;
															
 
																+
															
 
																+	if (!current_task->failed)
															
 
																+	{
															
 
																+		FPRINTF(stderr,"didn't fail, release main task\n");
															
 
																+		starpu_task_ft_success(meta_task);
															
 
																+		return;
															
 
																+	}
															
 
																+
															
 
																+	FPRINTF(stderr,"failed, try again\n");
															
 
																+
															
 
																+	new_task = starpu_task_ft_create_retry(meta_task, current_task, check_ft);
															
 
																+
															
 
																+	/* Here we could e.g. force the task to use only a CPU implementation
															
 
																+	 * known to be failsafe */
															
 
																+
															
 
																+	ret = starpu_task_submit_nodeps(new_task);
															
 
																+	STARPU_ASSERT(!ret);
															
 
																+}
															
 
																+
															
 
																+int main(void)
															
 
																+{
															
 
																+	int x = 12;
															
 
																+	int y = 1;
															
 
																+        starpu_data_handle_t h_x, h_y;
															
 
																+	int ret, ret1;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	starpu_variable_data_register(&h_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
															
 
																+	starpu_variable_data_register(&h_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
															
 
																+
															
 
																+	retry = 0;
															
 
																+	ret1 = starpu_task_insert(&my_codelet,
															
 
																+				  STARPU_PROLOGUE_CALLBACK, starpu_task_ft_prologue,
															
 
																+				  STARPU_PROLOGUE_CALLBACK_ARG, check_ft,
															
 
																+				  STARPU_R, h_x,
															
 
																+				  STARPU_W, h_y,
															
 
																+				  0);
															
 
																+	if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert");
															
 
																+	starpu_task_wait_for_all();
															
 
																+
															
 
																+	starpu_data_unregister(h_x);
															
 
																+	starpu_data_unregister(h_y);
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	if (x != 12)
															
 
																+		ret = 1;
															
 
																+	FPRINTF(stderr, "Value x = %d (expected 12)\n", x);
															
 
																+
															
 
																+	if (ret1 != -ENODEV)
															
 
																+	{
															
 
																+		if (y != 13)
															
 
																+			ret = 1;
															
 
																+		FPRINTF(stderr, "Value y = %d (expected 13)\n", y);
															
 
																+	}
															
 
																+
															
 
																+	STARPU_RETURN(ret);
															
 
																+}
															
--- a/tests/perfmodels/regression_based.c
+++ b/tests/perfmodels/regression_based.c
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2011,2012,2014                           Inria
															
 
																  * Copyright (C) 2011-2016,2019                           Université de Bordeaux
															
 
																- * Copyright (C) 2011-2017                                CNRS
															
 
																+ * Copyright (C) 2011-2017, 2019                          CNRS
															
 
																  * Copyright (C) 2011                                     Télécom-SudParis
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -70,7 +70,7 @@ void memset_cpu(void *descr[], void *arg)
 
																 	int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																 	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);
															
 
																-	usleep(10);
															
 
																+	starpu_usleep(10);
															
 
																 	memset(ptr, 42, n * sizeof(*ptr));
															
 
																 }
															
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -42,6 +42,18 @@ dist_pkgdata_perfmodels_sampling_bus_DATA = \
 
																 	perfmodels/sampling/bus/attila.latency	\
															
 
																 	perfmodels/sampling/bus/attila.platform.xml	\
															
 
																 	perfmodels/sampling/bus/attila.platform.v4.xml	\
															
 
																+	perfmodels/sampling/bus/hannibal.affinity	\
															
 
																+	perfmodels/sampling/bus/hannibal.bandwidth	\
															
 
																+	perfmodels/sampling/bus/hannibal.config	\
															
 
																+	perfmodels/sampling/bus/hannibal.latency	\
															
 
																+	perfmodels/sampling/bus/hannibal.platform.xml	\
															
 
																+	perfmodels/sampling/bus/hannibal.platform.v4.xml	\
															
 
																+	perfmodels/sampling/bus/hannibal-pitch.affinity	\
															
 
																+	perfmodels/sampling/bus/hannibal-pitch.bandwidth	\
															
 
																+	perfmodels/sampling/bus/hannibal-pitch.config	\
															
 
																+	perfmodels/sampling/bus/hannibal-pitch.latency	\
															
 
																+	perfmodels/sampling/bus/hannibal-pitch.platform.xml	\
															
 
																+	perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml	\
															
 
																 	perfmodels/sampling/bus/idgraf.affinity	\
															
 
																 	perfmodels/sampling/bus/idgraf.bandwidth	\
															
 
																 	perfmodels/sampling/bus/idgraf.config	\
															
@@ -79,6 +91,20 @@ dist_pkgdata_perfmodels_sampling_codelets_DATA = \
 
																 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21.attila	\
															
 
																 	perfmodels/sampling/codelets/45/starpu_dlu_lu_model_22.attila	\
															
 
																 	perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila	\
															
 
																+	perfmodels/sampling/codelets/45/chol_model_11.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/chol_model_21.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/chol_model_22.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal	\
															
 
																+	perfmodels/sampling/codelets/45/chol_model_11.hannibal-pitch	\
															
 
																+	perfmodels/sampling/codelets/45/chol_model_21.hannibal-pitch	\
															
 
																+	perfmodels/sampling/codelets/45/chol_model_22.hannibal-pitch	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal-pitch	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal-pitch	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal-pitch	\
															
 
																+	perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal-pitch	\
															
 
																 	perfmodels/sampling/codelets/45/chol_model_11.idgraf	\
															
 
																 	perfmodels/sampling/codelets/45/chol_model_21.idgraf	\
															
 
																 	perfmodels/sampling/codelets/45/chol_model_22.idgraf	\
															
--- a/tools/dev/valgrind/hdf5.suppr
+++ b/tools/dev/valgrind/hdf5.suppr
@@ -1,7 +1,7 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																 # Copyright (C) 2017                                     CNRS
															
 
																-# Copyright (C) 2017                                     Université de Bordeaux
															
 
																+# Copyright (C) 2017, 2019                               Université de Bordeaux
															
 
																 #
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
 
																 # it under the terms of the GNU Lesser General Public License as published by
															
@@ -20,7 +20,6 @@
 
																    Memcheck:Leak
															
 
																    match-leak-kinds: reachable
															
 
																    ...
															
 
																-   fun:H5FL_reg_malloc
															
 
																    fun:H5E_get_stack
															
 
																    ...
															
 
																 }
															
@@ -34,3 +33,12 @@
 
																    ...
															
 
																 }
															
 
																+{
															
 
																+   thread-specific value
															
 
																+   Memcheck:Leak
															
 
																+   match-leak-kinds: reachable
															
 
																+   ...
															
 
																+   fun:H5CX_push
															
 
																+   ...
															
 
																+}
															
 
																+
															
--- a/tools/perfmodels/sampling/bus/hannibal-pitch.affinity
+++ b/tools/perfmodels/sampling/bus/hannibal-pitch.affinity
@@ -0,0 +1 @@
 
																+hannibal.affinity
															
--- a/tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth
+++ b/tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth
@@ -0,0 +1 @@
 
																+hannibal.bandwidth
															
--- a/tools/perfmodels/sampling/bus/hannibal-pitch.config
+++ b/tools/perfmodels/sampling/bus/hannibal-pitch.config
@@ -0,0 +1 @@
 
																+hannibal.config
															
--- a/tools/perfmodels/sampling/bus/hannibal-pitch.latency
+++ b/tools/perfmodels/sampling/bus/hannibal-pitch.latency
@@ -0,0 +1 @@
 
																+hannibal.latency
															
--- a/tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml
+++ b/tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml
@@ -0,0 +1,70 @@
 
																+<?xml version="1.0"?>
															
 
																+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
															
 
																+ <platform version="4">
															
 
																+ <config id="General">
															
 
																+   <prop id="network/TCP-gamma" value="-1"></prop>
															
 
																+   <prop id="network/latency-factor" value="1"></prop>
															
 
																+   <prop id="network/bandwidth-factor" value="1"></prop>
															
 
																+ </config>
															
 
																+ <AS  id="AS0"  routing="Full">
															
 
																+   <host id="MAIN" speed="1f"/>
															
 
																+   <host id="CPU0" speed="2000000000f"/>
															
 
																+   <host id="CPU1" speed="2000000000f"/>
															
 
																+   <host id="CPU2" speed="2000000000f"/>
															
 
																+   <host id="CPU3" speed="2000000000f"/>
															
 
																+   <host id="CPU4" speed="2000000000f"/>
															
 
																+   <host id="CPU5" speed="2000000000f"/>
															
 
																+   <host id="CPU6" speed="2000000000f"/>
															
 
																+   <host id="CPU7" speed="2000000000f"/>
															
 
																+   <host id="CUDA0" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA1" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA2" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL0" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL1" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL2" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+
															
 
																+   <host id="RAM" speed="1f"/>
															
 
																+
															
 
																+   <link id="Share" bandwidth="5988779905.433726Bps" latency="0.000000s"/>
															
 
																+
															
 
																+   <link id="RAM-CUDA0" bandwidth="1653658596.433726Bps" latency="0.000012s"/>
															
 
																+   <link id="CUDA0-RAM" bandwidth="993981963.299022Bps" latency="0.000012s"/>
															
 
																+   <link id="RAM-CUDA1" bandwidth="869707794.319062Bps" latency="0.000013s"/>
															
 
																+   <link id="CUDA1-RAM" bandwidth="925610046.160954Bps" latency="0.000013s"/>
															
 
																+   <link id="RAM-CUDA2" bandwidth="1653711631.023217Bps" latency="0.000012s"/>
															
 
																+   <link id="CUDA2-RAM" bandwidth="981498659.805904Bps" latency="0.000013s"/>
															
 
																+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796Bps" latency="0.000020s"/>
															
 
																+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681Bps" latency="0.000064s"/>
															
 
																+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693Bps" latency="0.000020s"/>
															
 
																+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437Bps" latency="0.000036s"/>
															
 
																+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525Bps" latency="0.000020s"/>
															
 
																+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727Bps" latency="0.000037s"/>
															
 
																+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
															
 
																+ </AS>
															
 
																+ </platform>
															
--- a/tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml
+++ b/tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml
@@ -0,0 +1,70 @@
 
																+<?xml version="1.0"?>
															
 
																+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
															
 
																+ <platform version="3">
															
 
																+ <config id="General">
															
 
																+   <prop id="network/TCP_gamma" value="-1"></prop>
															
 
																+   <prop id="network/latency_factor" value="1"></prop>
															
 
																+   <prop id="network/bandwidth_factor" value="1"></prop>
															
 
																+ </config>
															
 
																+ <AS  id="AS0"  routing="Full">
															
 
																+   <host id="MAIN" power="1"/>
															
 
																+   <host id="CPU0" power="2000000000"/>
															
 
																+   <host id="CPU1" power="2000000000"/>
															
 
																+   <host id="CPU2" power="2000000000"/>
															
 
																+   <host id="CPU3" power="2000000000"/>
															
 
																+   <host id="CPU4" power="2000000000"/>
															
 
																+   <host id="CPU5" power="2000000000"/>
															
 
																+   <host id="CPU6" power="2000000000"/>
															
 
																+   <host id="CPU7" power="2000000000"/>
															
 
																+   <host id="CUDA0" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA1" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA2" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL0" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL1" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL2" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+
															
 
																+   <host id="RAM" power="1"/>
															
 
																+
															
 
																+   <link id="Share" bandwidth="5988779905.433726" latency="0.000000"/>
															
 
																+   
															
 
																+   <link id="RAM-CUDA0" bandwidth="1653658596.433726" latency="0.000012"/>
															
 
																+   <link id="CUDA0-RAM" bandwidth="993981963.299022" latency="0.000012"/>
															
 
																+   <link id="RAM-CUDA1" bandwidth="869707794.319062" latency="0.000013"/>
															
 
																+   <link id="CUDA1-RAM" bandwidth="925610046.160954" latency="0.000013"/>
															
 
																+   <link id="RAM-CUDA2" bandwidth="1653711631.023217" latency="0.000012"/>
															
 
																+   <link id="CUDA2-RAM" bandwidth="981498659.805904" latency="0.000013"/>
															
 
																+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796" latency="0.000020"/>
															
 
																+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681" latency="0.000064"/>
															
 
																+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693" latency="0.000020"/>
															
 
																+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437" latency="0.000036"/>
															
 
																+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525" latency="0.000020"/>
															
 
																+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727" latency="0.000037"/>
															
 
																+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
															
 
																+ </AS>
															
 
																+ </platform>
															
--- a/tools/perfmodels/sampling/bus/hannibal.affinity
+++ b/tools/perfmodels/sampling/bus/hannibal.affinity
@@ -0,0 +1,7 @@
 
																+# GPU	CPU0	CPU1	CPU2	CPU3	CPU4	CPU5	CPU6	CPU7	
															
 
																+0	0	1	2	3	4	5	6	7	
															
 
																+1	4	5	6	7	0	1	2	3	
															
 
																+2	4	5	6	7	0	1	2	3	
															
 
																+0	0	1	2	3	4	5	6	7	
															
 
																+1	4	5	6	7	0	1	2	3	
															
 
																+2	4	5	6	7	0	1	2	3	
															
--- a/tools/perfmodels/sampling/bus/hannibal.bandwidth
+++ b/tools/perfmodels/sampling/bus/hannibal.bandwidth
@@ -0,0 +1,17 @@
 
																+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
															
 
																+0.000000	5988.779905	3149.675860	5988.971975	3975.378655	2636.838726	3992.447567	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+3599.738919	0.000000	1679.850942	2248.345554	1889.122528	1521.977521	1892.968372	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+3352.127736	2149.165370	0.000000	2149.190105	1818.623736	1475.884075	1822.187624	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+3554.530216	2230.599117	1669.939421	0.000000	1876.596887	1513.836926	1880.391850	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+2937.163572	1970.662958	1519.854976	1970.683755	0.000000	1389.455231	1692.226493	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+2610.203571	1817.881699	1427.338068	1817.899396	1575.646193	0.000000	1578.320689	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+2812.550617	1913.772761	1485.791058	1913.792375	1647.181820	1360.930908	0.000000	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
--- a/tools/perfmodels/sampling/bus/hannibal.config
+++ b/tools/perfmodels/sampling/bus/hannibal.config
@@ -0,0 +1,4 @@
 
																+# Current configuration
															
 
																+8 # Number of CPUs
															
 
																+3 # Number of CUDA devices
															
 
																+3 # Number of OpenCL devices
															
--- a/tools/perfmodels/sampling/bus/hannibal.latency
+++ b/tools/perfmodels/sampling/bus/hannibal.latency
@@ -0,0 +1,17 @@
 
																+# to 0		to 1		to 2		to 3		to 4		to 5		to 6		to 7		to 8		to 9		to 10		to 11		to 12		to 13		to 14		to 15		
															
 
																+0.000000	12.460938	12.570312	12.468750	20.000000	20.328125	19.593750	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+12.476562	0.000000	25.046875	24.945312	32.476562	32.804688	32.070312	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+12.593750	25.054688	0.000000	25.062500	32.593750	32.921875	32.187500	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+12.539062	25.000000	25.109375	0.000000	32.539062	32.867188	32.132812	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+63.601562	76.062500	76.171875	76.070312	0.000000	83.929688	83.195312	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+35.992188	48.453125	48.562500	48.460938	55.992188	0.000000	55.585938	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+36.765625	49.226562	49.335938	49.234375	56.765625	57.093750	0.000000	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
 
																+nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan	nan
															
--- a/tools/perfmodels/sampling/bus/hannibal.platform.v4.xml
+++ b/tools/perfmodels/sampling/bus/hannibal.platform.v4.xml
@@ -0,0 +1,70 @@
 
																+<?xml version="1.0"?>
															
 
																+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
															
 
																+ <platform version="4">
															
 
																+ <config id="General">
															
 
																+   <prop id="network/TCP-gamma" value="-1"></prop>
															
 
																+   <prop id="network/latency-factor" value="1"></prop>
															
 
																+   <prop id="network/bandwidth-factor" value="1"></prop>
															
 
																+ </config>
															
 
																+ <AS  id="AS0"  routing="Full">
															
 
																+   <host id="MAIN" speed="1f"/>
															
 
																+   <host id="CPU0" speed="2000000000f"/>
															
 
																+   <host id="CPU1" speed="2000000000f"/>
															
 
																+   <host id="CPU2" speed="2000000000f"/>
															
 
																+   <host id="CPU3" speed="2000000000f"/>
															
 
																+   <host id="CPU4" speed="2000000000f"/>
															
 
																+   <host id="CPU5" speed="2000000000f"/>
															
 
																+   <host id="CPU6" speed="2000000000f"/>
															
 
																+   <host id="CPU7" speed="2000000000f"/>
															
 
																+   <host id="CUDA0" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA1" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA2" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL0" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL1" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL2" speed="2000000000f">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+
															
 
																+   <host id="RAM" speed="1f"/>
															
 
																+
															
 
																+   <link id="Share" bandwidth="5988971975.023217Bps" latency="0.000000s"/>
															
 
																+
															
 
																+   <link id="RAM-CUDA0" bandwidth="5988779905.433726Bps" latency="0.000012s"/>
															
 
																+   <link id="CUDA0-RAM" bandwidth="3599738919.299022Bps" latency="0.000012s"/>
															
 
																+   <link id="RAM-CUDA1" bandwidth="3149675860.319062Bps" latency="0.000013s"/>
															
 
																+   <link id="CUDA1-RAM" bandwidth="3352127736.160954Bps" latency="0.000013s"/>
															
 
																+   <link id="RAM-CUDA2" bandwidth="5988971975.023217Bps" latency="0.000012s"/>
															
 
																+   <link id="CUDA2-RAM" bandwidth="3554530215.805904Bps" latency="0.000013s"/>
															
 
																+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796Bps" latency="0.000020s"/>
															
 
																+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681Bps" latency="0.000064s"/>
															
 
																+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693Bps" latency="0.000020s"/>
															
 
																+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437Bps" latency="0.000036s"/>
															
 
																+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525Bps" latency="0.000020s"/>
															
 
																+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727Bps" latency="0.000037s"/>
															
 
																+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
															
 
																+ </AS>
															
 
																+ </platform>
															
--- a/tools/perfmodels/sampling/bus/hannibal.platform.xml
+++ b/tools/perfmodels/sampling/bus/hannibal.platform.xml
@@ -0,0 +1,70 @@
 
																+<?xml version="1.0"?>
															
 
																+ <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
															
 
																+ <platform version="3">
															
 
																+ <config id="General">
															
 
																+   <prop id="network/TCP_gamma" value="-1"></prop>
															
 
																+   <prop id="network/latency_factor" value="1"></prop>
															
 
																+   <prop id="network/bandwidth_factor" value="1"></prop>
															
 
																+ </config>
															
 
																+ <AS  id="AS0"  routing="Full">
															
 
																+   <host id="MAIN" power="1"/>
															
 
																+   <host id="CPU0" power="2000000000"/>
															
 
																+   <host id="CPU1" power="2000000000"/>
															
 
																+   <host id="CPU2" power="2000000000"/>
															
 
																+   <host id="CPU3" power="2000000000"/>
															
 
																+   <host id="CPU4" power="2000000000"/>
															
 
																+   <host id="CPU5" power="2000000000"/>
															
 
																+   <host id="CPU6" power="2000000000"/>
															
 
																+   <host id="CPU7" power="2000000000"/>
															
 
																+   <host id="CUDA0" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA1" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="CUDA2" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+     <prop id="memcpy_peer" value="0"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL0" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL1" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+   <host id="OpenCL2" power="2000000000">
															
 
																+     <prop id="memsize" value="3145728000"/>
															
 
																+   </host>
															
 
																+
															
 
																+   <host id="RAM" power="1"/>
															
 
																+
															
 
																+   <link id="Share" bandwidth="5988971975.023217" latency="0.000000"/>
															
 
																+
															
 
																+   <link id="RAM-CUDA0" bandwidth="5988779905.433726" latency="0.000012"/>
															
 
																+   <link id="CUDA0-RAM" bandwidth="3599738919.299022" latency="0.000012"/>
															
 
																+   <link id="RAM-CUDA1" bandwidth="3149675860.319062" latency="0.000013"/>
															
 
																+   <link id="CUDA1-RAM" bandwidth="3352127736.160954" latency="0.000013"/>
															
 
																+   <link id="RAM-CUDA2" bandwidth="5988971975.023217" latency="0.000012"/>
															
 
																+   <link id="CUDA2-RAM" bandwidth="3554530215.805904" latency="0.000013"/>
															
 
																+   <link id="RAM-OpenCL0" bandwidth="3975378655.154796" latency="0.000020"/>
															
 
																+   <link id="OpenCL0-RAM" bandwidth="2937163571.508681" latency="0.000064"/>
															
 
																+   <link id="RAM-OpenCL1" bandwidth="2636838726.154693" latency="0.000020"/>
															
 
																+   <link id="OpenCL1-RAM" bandwidth="2610203570.688437" latency="0.000036"/>
															
 
																+   <link id="RAM-OpenCL2" bandwidth="3992447566.540525" latency="0.000020"/>
															
 
																+   <link id="OpenCL2-RAM" bandwidth="2812550617.128727" latency="0.000037"/>
															
 
																+   <route src="RAM" dst="CUDA0" symmetrical="NO"><link_ctn id="RAM-CUDA0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA0" dst="RAM" symmetrical="NO"><link_ctn id="CUDA0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA1" symmetrical="NO"><link_ctn id="RAM-CUDA1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA1" dst="RAM" symmetrical="NO"><link_ctn id="CUDA1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="CUDA2" symmetrical="NO"><link_ctn id="RAM-CUDA2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="CUDA2" dst="RAM" symmetrical="NO"><link_ctn id="CUDA2-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL0" symmetrical="NO"><link_ctn id="RAM-OpenCL0"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL0" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL0-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL1" symmetrical="NO"><link_ctn id="RAM-OpenCL1"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL1" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL1-RAM"/><link_ctn id="Share"/></route>
															
 
																+   <route src="RAM" dst="OpenCL2" symmetrical="NO"><link_ctn id="RAM-OpenCL2"/><link_ctn id="Share"/></route>
															
 
																+   <route src="OpenCL2" dst="RAM" symmetrical="NO"><link_ctn id="OpenCL2-RAM"/><link_ctn id="Share"/></route>
															
 
																+ </AS>
															
 
																+ </platform>
															
--- a/tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+617e5fe6	3686400	0	1.701016e+05	7.229737e+03	4.082438e+06	6.956835e+11	24
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+617e5fe6	3686400	0	1.188776e+05	9.331204e+02	2.113643e+08	2.512803e+13	1778
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+617e5fe6	3686400	0	1.205438e+05	2.044578e+03	2.189075e+08	2.639552e+13	1816
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal-pitch
@@ -0,0 +1 @@
 
																+chol_model_11.hannibal
															
--- a/tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	8.856576e+08	1.551780e+04	9.258624e+03	5.415867e+08	1.139602e+13	34901
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	8.856576e+08	1.787309e+04	1.121893e+04	5.782658e+08	1.440761e+13	32354
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	8.856576e+08	1.675795e+04	1.012077e+04	5.931309e+08	1.356507e+13	35394
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_21.hannibal-pitch
@@ -0,0 +1 @@
 
																+chol_model_21.hannibal
															
--- a/tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+24c84a50	11059200	1.769472e+09	5.763709e+03	3.768350e+03	4.501024e+09	3.703209e+13	780925
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+24c84a50	11059200	1.769472e+09	5.889910e+03	4.485232e+03	4.352661e+09	4.050353e+13	739003
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+24c84a50	11059200	1.769472e+09	5.782569e+03	3.939612e+03	4.412291e+09	3.735706e+13	763033
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_22.hannibal-pitch
@@ -0,0 +1 @@
 
																+chol_model_22.hannibal
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+617e5fe6	3686400	0.000000e+00	1.250229e+05	4.416720e+03	1.500275e+06	1.878028e+11	12
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+617e5fe6	3686400	0.000000e+00	8.424585e+04	1.140908e+03	4.802014e+07	4.046239e+12	570
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+617e5fe6	3686400	0.000000e+00	8.331807e+04	6.460292e+02	5.782274e+07	4.817969e+12	694
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_11.hannibal-pitch
@@ -0,0 +1 @@
 
																+starpu_slu_lu_model_11.hannibal
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	0.000000e+00	1.072902e+04	3.731292e+03	7.780684e+07	9.357572e+11	7252
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	0.000000e+00	1.250147e+04	5.489974e+03	7.944684e+07	1.184741e+12	6355
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	0.000000e+00	1.131230e+04	4.120480e+03	8.165221e+07	1.046224e+12	7218
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_12.hannibal-pitch
@@ -0,0 +1 @@
 
																+starpu_slu_lu_model_12.hannibal
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	0.000000e+00	1.103789e+04	3.664518e+03	7.889881e+07	9.668643e+11	7148
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	0.000000e+00	1.284524e+04	5.462619e+03	8.441889e+07	1.280490e+12	6572
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+ff82dda0	7372800	0.000000e+00	1.171798e+04	4.121992e+03	8.325626e+07	1.096315e+12	7105
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_21.hannibal-pitch
@@ -0,0 +1 @@
 
																+starpu_slu_lu_model_21.hannibal
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal
@@ -0,0 +1,104 @@
 
																+##################
															
 
																+#	Performance	Model	Version
															
 
																+45
															
 
																+
															
 
																+####################
															
 
																+# COMBs
															
 
																+# number of combinations
															
 
																+3
															
 
																+####################
															
 
																+# COMB_1
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+0
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda0_impl0 (Comb1)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+24c84a50	11059200	0.000000e+00	5.116253e+03	1.361494e+03	9.170526e+08	5.024130e+12	179243
															
 
																+
															
 
																+####################
															
 
																+# COMB_2
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda1_impl0 (Comb2)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+24c84a50	11059200	0.000000e+00	5.228920e+03	1.967478e+03	8.761527e+08	5.229949e+12	167559
															
 
																+
															
 
																+####################
															
 
																+# COMB_3
															
 
																+# number of types devices
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3)
															
 
																+1
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# device id 
															
 
																+2
															
 
																+####################
															
 
																+# DEV_0
															
 
																+# number of cores 
															
 
																+1
															
 
																+##########
															
 
																+# number of implementations
															
 
																+1
															
 
																+#####
															
 
																+# Model for cuda2_impl0 (Comb3)
															
 
																+#	number	of	entries
															
 
																+1
															
 
																+#	sumlnx	sumlnx2	sumlny	sumlnxlny	alpha	beta	n	minx	maxx
															
 
																+0.000000e+00	0.000000e+00	0.000000e+00	0.000000e+00	nan	nan	0	0	0
															
 
																+#	a	b	c
															
 
																+nan	nan	nan
															
 
																+# not multiple-regression-base
															
 
																+0
															
 
																+#	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
															
 
																+24c84a50	11059200	0.000000e+00	5.131691e+03	1.494139e+03	8.920059e+08	4.965550e+12	173823
															
 
																+
															
--- a/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal-pitch
+++ b/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_22.hannibal-pitch
@@ -0,0 +1 @@
 
																+starpu_slu_lu_model_22.hannibal
															
--- a/tools/starpu_fxt_data_trace.c
+++ b/tools/starpu_fxt_data_trace.c
@@ -2,7 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2013                                     Joris Pablo
															
 
																  * Copyright (C) 2014,2015,2017                           CNRS
															
 
																- * Copyright (C) 2011-2014,2016                           Université de Bordeaux
															
 
																+ * Copyright (C) 2011-2014,2016,2019                      Université de Bordeaux
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -19,6 +19,7 @@
 
																 #include <stdio.h>
															
 
																 #include <starpu.h>
															
 
																 #include <string.h>
															
 
																+#include <sys/stat.h>
															
 
																 #include <common/config.h>
															
 
																 #define PROGNAME "starpu_fxt_data_trace"
															
@@ -75,7 +76,8 @@ static void write_gp(int argc, char **argv)
 
																 		exit(-1);
															
 
																 	}
															
 
																 	char codelet_name[MAX_LINE_SIZE];
															
 
																-	FILE *plt = fopen("data_trace.gp", "w+");
															
 
																+	const char *file_name = "data_trace.gp";
															
 
																+	FILE *plt = fopen(file_name, "w+");
															
 
																 	if(!plt)
															
 
																 	{
															
 
																 		perror("Error while creating data_trace.gp:");
															
@@ -129,7 +131,6 @@ static void write_gp(int argc, char **argv)
 
																 	}
															
 
																 	fprintf(plt, "\n");
															
 
																-	fprintf(stdout, "Gnuplot file <data_trace.gp> has been successfully created.\n");
															
 
																 	if(fclose(codelet_list))
															
 
																 	{
															
 
																 		perror("close failed :");
															
@@ -141,6 +142,23 @@ static void write_gp(int argc, char **argv)
 
																 		perror("close failed :");
															
 
																 		exit(-1);
															
 
																 	}
															
 
																+
															
 
																+	struct stat sb;
															
 
																+	int ret = stat(file_name, &sb);
															
 
																+	if (ret)
															
 
																+	{
															
 
																+		perror("stat");
															
 
																+		STARPU_ABORT();
															
 
																+	}
															
 
																+
															
 
																+	/* Make the gnuplot scrit executable for the owner */
															
 
																+	ret = chmod(file_name, sb.st_mode|S_IXUSR);
															
 
																+	if (ret)
															
 
																+	{
															
 
																+		perror("chmod");
															
 
																+		STARPU_ABORT();
															
 
																+	}
															
 
																+	fprintf(stdout, "Gnuplot file <data_trace.gp> has been successfully created.\n");
															
 
																 }
															
 
																 int main(int argc, char **argv)
															
--- a/tools/starpu_replay.c
+++ b/tools/starpu_replay.c
@@ -268,7 +268,10 @@ static void arrays_managing(int mode)
 
																 /* Check if a handle hasn't been registered yet */
															
 
																 static void variable_data_register_check(size_t * array_of_size, int nb_handles)
															
 
																 {
															
 
																-	int h;
															
 
																+	int h, i;
															
 
																+	starpu_data_handle_t orig_handles[nb_handles];
															
 
																+
															
 
																+	ARRAY_DUP(handles_ptr, orig_handles, nb_handles);
															
 
																 	for (h = 0 ; h < nb_handles ; h++)
															
 
																 	{
															
@@ -276,16 +279,29 @@ static void variable_data_register_check(size_t * array_of_size, int nb_handles)
 
																 		{
															
 
																 			struct handle * handles_cell;
															
 
																-			_STARPU_MALLOC(handles_cell, sizeof(*handles_cell));
															
 
																-			STARPU_ASSERT(handles_cell != NULL);
															
 
																+			for (i = 0; i < h; i++)
															
 
																+			{
															
 
																+				/* Maybe we just registered it in this very h loop */
															
 
																+				if (handles_ptr[h] == orig_handles[i])
															
 
																+				{
															
 
																+					handles_ptr[h] = handles_ptr[i];
															
 
																+					break;
															
 
																+				}
															
 
																+			}
															
 
																+
															
 
																+			if (i == h)
															
 
																+			{
															
 
																+				_STARPU_MALLOC(handles_cell, sizeof(*handles_cell));
															
 
																+				STARPU_ASSERT(handles_cell != NULL);
															
 
																-			handles_cell->handle = handles_ptr[h]; /* Get the hidden key (initial handle from the file) to store it as a key*/
															
 
																+				handles_cell->handle = handles_ptr[h]; /* Get the hidden key (initial handle from the file) to store it as a key*/
															
 
																-			starpu_variable_data_register(handles_ptr+h, STARPU_MAIN_RAM, (uintptr_t) 1, array_of_size[h]);
															
 
																+				starpu_variable_data_register(handles_ptr+h, STARPU_MAIN_RAM, (uintptr_t) 1, array_of_size[h]);
															
 
																-			handles_cell->mem_ptr = handles_ptr[h]; /* Store the new value of the handle into the hash table */
															
 
																+				handles_cell->mem_ptr = handles_ptr[h]; /* Store the new value of the handle into the hash table */
															
 
																-			HASH_ADD(hh, handles_hash, handle, sizeof(handles_ptr[h]), handles_cell);
															
 
																+				HASH_ADD(hh, handles_hash, handle, sizeof(handles_ptr[h]), handles_cell);
															
 
																+			}
															
 
																 		}
															
 
																 	}
															
 
																 }
															
@@ -532,6 +548,7 @@ int main(int argc, char **argv)
 
																 	reset();
															
 
																 	double start = starpu_timing_now();
															
 
																+	int linenum = 0;
															
 
																 	while(1)
															
 
																 	{
															
@@ -571,6 +588,8 @@ int main(int argc, char **argv)
 
																 			s_allocated *= 2;
															
 
																 		}
															
 
																+		linenum++;
															
 
																+
															
 
																 		if (ln == s)
															
 
																 		{
															
 
																 			/* Empty line, do task */
															
@@ -652,6 +671,7 @@ int main(int argc, char **argv)
 
																 							fprintf(stderr, "[starpu][Warning] Error loading perfmodel symbol %s\n", model);
															
 
																 							fprintf(stderr, "[starpu][Warning] Taking only measurements from the given execution, and forcing execution on worker %d\n", workerid);
															
 
																+							starpu_perfmodel_unload_model(&realmodel->perfmodel);
															
 
																 							free(realmodel->model_name);
															
 
																 							free(realmodel);
															
 
																 							realmodel = NULL;
															
@@ -659,6 +679,9 @@ int main(int argc, char **argv)
 
																 					}
															
 
																+					struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, 0);
															
 
																+
															
 
																+					unsigned comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices);
															
 
																 					unsigned narch = starpu_perfmodel_get_narch_combs();
															
 
																 					struct task_arg *arg;
															
@@ -669,9 +692,6 @@ int main(int argc, char **argv)
 
																 					if (realmodel == NULL)
															
 
																 					{
															
 
																-						struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, 0);
															
 
																-
															
 
																-						unsigned comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices);
															
 
																 						/* Erf, do without perfmodel, for execution there */
															
 
																 						task->task.workerid = workerid;
															
 
																 						task->task.execute_on_a_specific_worker = 1;
															
@@ -795,19 +815,19 @@ int main(int argc, char **argv)
 
																 		}
															
 
																 		else if (TEST("Parameters"))
															
 
																 		{
															
 
																-			/* Parameters line format is PARAM1_PARAM2_(...)PARAMi_(...)PARAMn */
															
 
																+			/* Parameters line format is PARAM1 PARAM2 (...)PARAMi (...)PARAMn */
															
 
																 			char * param_str = s + 12;
															
 
																 			int count = 0;
															
 
																 			for (i = 0 ; param_str[i] != '\n'; i++)
															
 
																 			{
															
 
																-				if (param_str[i] == '_') /* Checking the number of '_' (underscore), assuming that the file is not corrupted */
															
 
																+				if (param_str[i] == ' ') /* Checking the number of ' ' (space), assuming that the file is not corrupted */
															
 
																 				{
															
 
																 					count++;
															
 
																 				}
															
 
																 			}
															
 
																-			nb_parameters = count + 1; /* There is one underscore per paramater execept for the last one, that's why we have to add +1 (dirty programming) */
															
 
																+			nb_parameters = count + 1; /* There is one space per paramater except for the last one, that's why we have to add +1 (dirty programming) */
															
 
																 			/* This part of the algorithm will determine if it needs static or dynamic arrays */
															
 
																 			alloc_mode = set_alloc_mode(nb_parameters);
															
@@ -820,30 +840,28 @@ int main(int argc, char **argv)
 
																 			const char *delim = " ";
															
 
																 			char *token = strtok(buffer, delim);
															
 
																-			while (token != NULL)
															
 
																+			for (i = 0 ; i < nb_parameters ; i++)
															
 
																 			{
															
 
																-				for (i = 0 ; i < nb_parameters ; i++)
															
 
																-				{
															
 
																-					struct handle *handles_cell; /* A cell of the hash table for the handles */
															
 
																-					starpu_data_handle_t  handle_value = (starpu_data_handle_t) strtol(token, NULL, 16); /* Get the ith handle on the line (in the file) */
															
 
																-
															
 
																-					HASH_FIND(hh, handles_hash, &handle_value, sizeof(handle_value), handles_cell); /* Find if the handle_value was already registered as a key in the hash table */
															
 
																+				STARPU_ASSERT(token);
															
 
																+				struct handle *handles_cell; /* A cell of the hash table for the handles */
															
 
																+				starpu_data_handle_t  handle_value = (starpu_data_handle_t) strtol(token, NULL, 16); /* Get the ith handle on the line (in the file) */
															
 
																-					/* If it wasn't, then add it to the hash table */
															
 
																-					if (handles_cell == NULL)
															
 
																-					{
															
 
																-						/* Hide the initial handle from the file into the handles array to find it when necessary */
															
 
																-						handles_ptr[i] = handle_value;
															
 
																-						reg_signal[i] = 1;
															
 
																-					}
															
 
																-					else
															
 
																-					{
															
 
																-						handles_ptr[i] = handles_cell->mem_ptr;
															
 
																-						reg_signal[i] = 0;
															
 
																-					}
															
 
																+				HASH_FIND(hh, handles_hash, &handle_value, sizeof(handle_value), handles_cell); /* Find if the handle_value was already registered as a key in the hash table */
															
 
																-					token = strtok(NULL, delim);
															
 
																+				/* If it wasn't, then add it to the hash table */
															
 
																+				if (handles_cell == NULL)
															
 
																+				{
															
 
																+					/* Hide the initial handle from the file into the handles array to find it when necessary */
															
 
																+					handles_ptr[i] = handle_value;
															
 
																+					reg_signal[i] = 1;
															
 
																 				}
															
 
																+				else
															
 
																+				{
															
 
																+					handles_ptr[i] = handles_cell->mem_ptr;
															
 
																+					reg_signal[i] = 0;
															
 
																+				}
															
 
																+
															
 
																+				token = strtok(NULL, delim);
															
 
																 			}
															
 
																 		}
															
 
																 		else if (TEST("Modes"))
															
@@ -853,7 +871,7 @@ int main(int argc, char **argv)
 
																 			const char * delim = " ";
															
 
																 			char * token = strtok(buffer, delim);
															
 
																-			while (token != NULL)
															
 
																+			while (token != NULL && mode_i < nb_parameters)
															
 
																 			{
															
 
																 				/* Subject to the names of starpu modes enumerator are not modified */
															
 
																 				if (!strncmp(token, "RW", 2))
															
@@ -888,7 +906,7 @@ int main(int argc, char **argv)
 
																 			_STARPU_MALLOC(sizes_set, nb_parameters * sizeof(size_t));
															
 
																-			while (token != NULL)
															
 
																+			while (token != NULL && k < nb_parameters)
															
 
																 			{
															
 
																 				sizes_set[k] = strtol(token, NULL, 10);
															
 
																 				token = strtok(NULL, delim);
															
@@ -969,9 +987,9 @@ eof:
 
																         }
															
 
																 	starpu_shutdown();
															
 
																-
															
 
																 	return 0;
															
 
																 enodev:
															
 
																+	starpu_shutdown();
															
 
																 	return 77;
															
 
																 }