6 years ago · 85adf47b98
--- a/ChangeLog
+++ b/ChangeLog
@@ -42,6 +42,12 @@ New features:
 
				     fields of starpu_task to 0.
			
 
				   * starpufft: Support 3D.
			
 
				 
			
 
				+Changes:
			
 
				+  * Modification in the Native Fortran interface of the functions
			
 
				+    fstarpu_mpi_task_insert, fstarpu_mpi_task_build and
			
 
				+    fstarpu_mpi_task_post_build to only take 1 parameter being the MPI
			
 
				+    communicator, the codelet and the various parameters for the task.
			
 
				+
			
 
				 Small features:
			
 
				   * New starpu_task_insert() and alike parameter STARPU_TASK_WORKERIDS
			
 
				     allowing to set the fields starpu_task::workerids_len and
			
--- a/configure.ac
+++ b/configure.ac
@@ -1365,7 +1365,7 @@ if test x$enable_cuda = xyes; then
 
				 		])
			
 
				 	    ],
			
 
				 	    [
			
 
				-	    AC_MSG_ERROR([NVML found, but nvml.h could not be compiled])
			
 
				+	    AC_MSG_WARN([NVML found, but nvml.h could not be compiled])
			
 
				 	    have_valid_nvml="no"
			
 
				 	    ]
			
 
				 	)
			
@@ -2970,9 +2970,15 @@ AC_SUBST(BLAS_LIB,$blas_lib)
 
				 #			 Multiple linear regression			      #
			
 
				 #                                                                             #
			
 
				 ###############################################################################
			
 
				+if test x$enable_simgrid = xyes ; then
			
 
				+	# There is no need for building mlr models in simgrid mode
			
 
				+	default_enable_mlr=no
			
 
				+else
			
 
				+	default_enable_mlr=yes
			
 
				+fi
			
 
				 AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--disable-mlr],
			
 
				 			[Disable multiple linear regression models])],
			
 
				-			enable_mlr=$enableval, enable_mlr=yes)
			
 
				+			enable_mlr=$enableval, enable_mlr=$default_enable_mlr)
			
 
				 
			
 
				 AC_MSG_CHECKING(whether multiple linear regression models are disabled)
			
 
				 if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then
			
--- a/doc/doxygen/Makefile.am
+++ b/doc/doxygen/Makefile.am
@@ -198,6 +198,7 @@ dox_inputs = $(DOX_CONFIG) 				\
 
				 	$(top_srcdir)/include/starpu_bound.h		\
			
 
				 	$(top_srcdir)/include/starpu_clusters.h		\
			
 
				 	$(top_srcdir)/include/starpu_cublas.h		\
			
 
				+	$(top_srcdir)/include/starpu_cublas_v2.h	\
			
 
				 	$(top_srcdir)/include/starpu_cusparse.h		\
			
 
				 	$(top_srcdir)/include/starpu_cuda.h		\
			
 
				 	$(top_srcdir)/include/starpu_data_filters.h	\
			
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -100,14 +100,18 @@ become available, without taking priorities into account.
 
				 The <b>dmda</b> (deque model data aware) scheduler is similar to dm, but it also takes
			
 
				 into account data transfer time.
			
 
				 
			
 
				+The <b>dmdap</b> (deque model data aware prio) scheduler is similar to dmda,
			
 
				+except that it sorts tasks by priority order, which allows to become even closer
			
 
				+to HEFT by respecting priorities after having made the scheduling decision (but
			
 
				+it still schedules tasks in the order they become available).
			
 
				+
			
 
				 The <b>dmdar</b> (deque model data aware ready) scheduler is similar to dmda,
			
 
				 but it also privileges tasks whose data buffers are already available
			
 
				 on the target device.
			
 
				 
			
 
				-The <b>dmdas</b> (deque model data aware sorted) scheduler is similar to dmdar,
			
 
				-except that it sorts tasks by priority order, which allows to become even closer
			
 
				-to HEFT by respecting priorities after having made the scheduling decision (but
			
 
				-it still schedules tasks in the order they become available).
			
 
				+The <b>dmdas</b> combines dmdap and dmdas: it sorts tasks by priority order,
			
 
				+but for a given priority it will privilege tasks whose data buffers are already
			
 
				+available on the target device.
			
 
				 
			
 
				 The <b>dmdasd</b> (deque model data aware sorted decision) scheduler is similar
			
 
				 to dmdas, except that when scheduling a task, it takes into account its priority
			
--- a/doc/doxygen/chapters/520_files.doxy
+++ b/doc/doxygen/chapters/520_files.doxy
@@ -25,6 +25,7 @@
 
				 \file starpu_bound.h
			
 
				 \file starpu_clusters.h
			
 
				 \file starpu_cublas.h
			
 
				+\file starpu_cublas_v2.h
			
 
				 \file starpu_cusparse.h
			
 
				 \file starpu_cuda.h
			
 
				 \file starpu_data_filters.h
			
@@ -36,7 +37,10 @@
 
				 \file starpu_expert.h
			
 
				 \file starpu_fxt.h
			
 
				 \file starpu_hash.h
			
 
				+\file starpu_helper.h
			
 
				+\file starpu_heteroprio.h
			
 
				 \file starpu_mic.h
			
 
				+\file starpu_mpi_ms.h
			
 
				 \file starpu_mod.f90
			
 
				 \file starpu_opencl.h
			
 
				 \file starpu_openmp.h
			
@@ -54,6 +58,7 @@
 
				 \file starpu_stdlib.h
			
 
				 \file starpu_task_bundle.h
			
 
				 \file starpu_task.h
			
 
				+\file starpu_task_dep.h
			
 
				 \file starpu_task_list.h
			
 
				 \file starpu_task_util.h
			
 
				 \file starpu_thread.h
			
--- a/doc/doxygen/dev/checkDoc.sh
+++ b/doc/doxygen/dev/checkDoc.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2013,2014,2016,2017                      CNRS
			
 
				+# Copyright (C) 2013,2014,2016,2017,2019                      CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -16,21 +16,55 @@
 
				 #
			
 
				 dirname=$(dirname $0)
			
 
				 
			
 
				-x=$(grep ingroup $dirname/../chapters/api/*.doxy $dirname/../chapters/api/sc_hypervisor/*.doxy |awk -F':' '{print $2}'| awk 'NF != 2')
			
 
				-if test -n "$x" ; then
			
 
				-    echo Errors on group definitions
			
 
				-    echo $x
			
 
				-fi
			
 
				-
			
 
				-echo
			
 
				+DIRS="$dirname/../../../include $dirname/../../../mpi/include $dirname/../../../starpurm/include $dirname/../../../sc_hypervisor/include"
			
 
				 echo "Defined groups"
			
 
				-grep ingroup $dirname/../chapters/api/*.doxy $dirname/../chapters/api/sc_hypervisor/*.doxy|awk -F':' '{print $2}'| awk 'NF == 2'|sort|uniq
			
 
				+groups=""
			
 
				+for d in $DIRS
			
 
				+do
			
 
				+    echo Checking $d
			
 
				+    gg=$(grep -rs defgroup $d | awk '{print $3}')
			
 
				+    echo $gg
			
 
				+    groups=$(echo $groups $gg)
			
 
				+done
			
 
				+for g in $groups
			
 
				+do
			
 
				+    gg=$(echo $g | sed 's/_/__/g')
			
 
				+    x=$(grep $gg $dirname/../refman.tex)
			
 
				+    if test -z "$x"
			
 
				+    then
			
 
				+	echo "Error. Group $g not included in refman.tex"
			
 
				+    fi
			
 
				+done
			
 
				 echo
			
 
				 
			
 
				-for f in $dirname/../../../build/doc/doxygen/latex/*tex ; do
			
 
				-    x=$(grep $(basename $f .tex) $dirname/../refman.tex)
			
 
				-    if test -z "$x" ; then
			
 
				-	echo Error. $f not included in refman.tex
			
 
				-    fi
			
 
				+for d in $DIRS
			
 
				+do
			
 
				+    for f in $(find $d -name "*.h")
			
 
				+    do
			
 
				+	ff=$(echo $f  | awk -F'/' '{print $NF}')
			
 
				+	x=$(grep $ff $dirname/../doxygen-config.cfg.in)
			
 
				+	if test -z "$x"
			
 
				+	then
			
 
				+	    echo Error. $f not included in doxygen-config.cfg.in
			
 
				+	fi
			
 
				+	x=$(grep $ff $dirname/../chapters/520_files.doxy)
			
 
				+	if test -z "$x"
			
 
				+	then
			
 
				+	    echo Error. $f not included in 520_files.doxy
			
 
				+	fi
			
 
				+    done
			
 
				+done
			
 
				+echo
			
 
				+
			
 
				+for p in starpu sc__hypervisor
			
 
				+do
			
 
				+    for f in $dirname/../../../build/doc/doxygen/latex/${p}*tex
			
 
				+    do
			
 
				+	x=$(grep $(basename $f .tex) $dirname/../refman.tex)
			
 
				+	if test -z "$x"
			
 
				+	then
			
 
				+	    echo Error. $f not included in refman.tex
			
 
				+	fi
			
 
				+    done
			
 
				 done
			
 
				 
			
--- a/doc/doxygen/dev/sc_funcs.cocci
+++ b/doc/doxygen/dev/sc_funcs.cocci
@@ -1,28 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2014,2015,2017                           CNRS
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-@scfunc@
			
 
				-position p;
			
 
				-type t;
			
 
				-identifier f =~ "sc";
			
 
				-@@
			
 
				-
			
 
				-t f@p( ... );
			
 
				-
			
 
				-@ script:python @
			
 
				-p << scfunc.p;
			
 
				-f << scfunc.f;
			
 
				-@@
			
 
				-print "%s,%s:%s" % (f,p[0].file,p[0].line)
			
--- a/doc/doxygen/dev/starpu_check_refs.sh
+++ b/doc/doxygen/dev/starpu_check_refs.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2016-2018                                CNRS
			
 
				+# Copyright (C) 2016-2019                                CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,10 +20,6 @@ greencolor=$(tput setaf 2)
 
				 
			
 
				 dirname=$(dirname $0)
			
 
				 
			
 
				-STARPU_H_FILES=$(find $dirname/../../../include $dirname/../../../mpi/include -name '*.h')
			
 
				-SC_H_FILES=$(find $dirname/../../../sc_hypervisor/include -name '*.h')
			
 
				-SRC="$dirname/../../../src $dirname/../../../mpi/src $dirname/../../../sc_hypervisor/src"
			
 
				-
			
 
				 #grep --exclude-dir=.git --binary-files=without-match -rsF "\ref" $dirname/../chapters|grep -v "\\ref [a-zA-Z]"
			
 
				 #echo continue && read
			
 
				 
			
@@ -32,36 +28,35 @@ GREP="grep --exclude-dir=.git --binary-files=without-match -rsF"
 
				 REFS=$($GREP "\ref" $dirname/../chapters| tr ':' '\012' | tr '.' '\012'  | tr ',' '\012'  | tr '(' '\012' | tr ')' '\012' | tr ' ' '\012'|grep -F '\ref' -A1 | grep -v '^--$' | sed 's/\\ref/=\\ref/' | tr '\012' ':' | tr '=' '\012' | sort | uniq)
			
 
				 find $dirname/../chapters -name "*doxy" -exec cat {} \; > /tmp/DOXYGEN_$$
			
 
				 cat $dirname/../refman.tex >> /tmp/DOXYGEN_$$
			
 
				+find $dirname/../../../include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
			
 
				+find $dirname/../../../starpurm/include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
			
 
				+find $dirname/../../../mpi/include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
			
 
				+find $dirname/../../../sc_hypervisor/include -name "*h" -exec cat {} \; >> /tmp/DOXYGEN_$$
			
 
				+
			
 
				+stcolor=$(tput sgr0)
			
 
				+redcolor=$(tput setaf 1)
			
 
				+greencolor=$(tput setaf 2)
			
 
				 
			
 
				 for r in $REFS
			
 
				 do
			
 
				     ref=$(echo $r | sed 's/\\ref:\(.*\):/\1/')
			
 
				-    n=$($GREP -crs "section $ref" /tmp/DOXYGEN_$$)
			
 
				-    if test $n -eq 0
			
 
				+    if test -n "$ref"
			
 
				     then
			
 
				-	n=$($GREP -crs "anchor $ref" /tmp/DOXYGEN_$$)
			
 
				-	if test $n -eq 0
			
 
				-	then
			
 
				-	    n=$($GREP -crs "ingroup $ref" /tmp/DOXYGEN_$$)
			
 
				-	    if test $n -eq 0
			
 
				+	#echo "ref $ref"
			
 
				+	for keyword in "section " "anchor " "ingroup " "defgroup " "def " "struct " "label{"
			
 
				+	do
			
 
				+	    n=$($GREP -crs "${keyword}${ref}" /tmp/DOXYGEN_$$)
			
 
				+	    if test $n -ne 0
			
 
				 	    then
			
 
				-		n=$($GREP -crs "def $ref" /tmp/DOXYGEN_$$)
			
 
				-		if test $n -eq 0
			
 
				-		then
			
 
				-		    n=$($GREP -crs "struct $ref" /tmp/DOXYGEN_$$)
			
 
				-		    if test $n -eq 0
			
 
				-		    then
			
 
				-			if test $n -eq 0
			
 
				-			then
			
 
				-			    n=$($GREP -crs "label{$ref" /tmp/DOXYGEN_$$)
			
 
				-			    if test $n -eq 0
			
 
				-			    then
			
 
				-				echo $ref missing
			
 
				-			    fi
			
 
				-			fi
			
 
				-		    fi
			
 
				-		fi
			
 
				+		break
			
 
				 	    fi
			
 
				+	done
			
 
				+	if test $n -eq 0
			
 
				+	then
			
 
				+	    echo "${redcolor}$ref${stcolor} is missing"
			
 
				+	else
			
 
				+	    true
			
 
				+	    #echo "${greencolor}$ref${stcolor} is ok"
			
 
				 	fi
			
 
				     fi
			
 
				 done
			
--- a/doc/doxygen/dev/starpu_check_undocumented.sh
+++ b/doc/doxygen/dev/starpu_check_undocumented.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2011-2018                                CNRS
			
 
				+# Copyright (C) 2011-2019                                CNRS
			
 
				 # Copyright (C) 2011                                     Inria
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -15,8 +15,6 @@
 
				 #
			
 
				 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				 #
			
 
				-# Note: expects Coccinelle's spatch command n the PATH
			
 
				-# See: http://coccinelle.lip6.fr/
			
 
				 
			
 
				 stcolor=$(tput sgr0)
			
 
				 redcolor=$(tput setaf 1)
			
@@ -40,52 +38,81 @@ else
 
				     fi
			
 
				 fi
			
 
				 
			
 
				-if [ "$1" == "--func" ] || [ "$1" == "" ] ; then
			
 
				-    starpu_functions=$(spatch -very_quiet -sp_file $dirname/starpu_funcs.cocci $STARPU_H_FILES)
			
 
				-    sc_functions=$(spatch -very_quiet -sp_file $dirname/sc_funcs.cocci $SC_H_FILES)
			
 
				-    for func in $starpu_functions $sc_functions ; do
			
 
				-	fname=$(echo $func|awk -F ',' '{print $1}')
			
 
				-	location=$(echo $func|awk -F ',' '{print $2}')
			
 
				-	x=$(grep "$fname(" $dirname/../chapters/api/*.doxy | grep "\\fn")
			
 
				-	if test "$x" == "" ; then
			
 
				-	    echo "function ${redcolor}${fname}${stcolor} at location ${redcolor}$location${stcolor} is not (or incorrectly) documented"
			
 
				-	    #	else
			
 
				-	    #		echo "function ${greencolor}${fname}${stcolor} at location $location is correctly documented"
			
 
				-	fi
			
 
				+ok()
			
 
				+{
			
 
				+    type=$1
			
 
				+    name=$2
			
 
				+    echo "$type ${greencolor}${name}${stcolor} is (maybe correctly) documented"
			
 
				+}
			
 
				+
			
 
				+ko()
			
 
				+{
			
 
				+    type=$1
			
 
				+    name=$2
			
 
				+    echo "$type ${redcolor}${name}${stcolor} is not (or incorrectly) documented"
			
 
				+}
			
 
				+
			
 
				+if [ "$1" == "--func" ] || [ "$1" == "" ]
			
 
				+then
			
 
				+    for f in $STARPU_H_FILES $SC_H_FILES
			
 
				+    do
			
 
				+	grep "(" $f | grep ';' | grep starpu | grep '^[a-z]' | grep -v typedef | grep -v '(\*' | while read line
			
 
				+	do
			
 
				+	    x=$(grep -F -B1 "$line" $f | head -1)
			
 
				+	    fname=$(echo $line | awk -F'(' '{print $1}' | awk '{print $NF}' | tr -d '*')
			
 
				+	    if test "$x" == '*/'
			
 
				+	    then
			
 
				+		ok function $fname
			
 
				+	    else
			
 
				+		#echo $line
			
 
				+		ko function $fname
			
 
				+	    fi
			
 
				+	done
			
 
				     done
			
 
				-    echo
			
 
				 fi
			
 
				 
			
 
				 if [ "$1" == "--struct" ] || [ "$1" == "" ] ; then
			
 
				-    starpu_structs=$(grep "struct starpu" $STARPU_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
			
 
				-    sc_structs=$(grep "struct sc" $SC_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
			
 
				-    for struct in $starpu_structs $sc_structs ; do
			
 
				-	x=$(grep -F "\\struct $struct" $dirname/../chapters/api/*.doxy)
			
 
				-	if test "$x" == "" ; then
			
 
				-	    echo "struct ${redcolor}${struct}${stcolor} is not (or incorrectly) documented"
			
 
				+    starpu=$(grep "^struct starpu_[a-z_]*$" $STARPU_H_FILES | awk '{print $NF}')
			
 
				+    sc=$(grep "^struct sc_[a-z_]*$" $SC_H_FILES | awk '{print $NF}')
			
 
				+    for o in $starpu $sc ; do
			
 
				+	hfile=$(grep -l "^struct ${o}$" $STARPU_H_FILES $SC_H_FILES)
			
 
				+	x=$(grep -B1 "^struct ${o}$" $hfile | head -1)
			
 
				+	if test "$x" == '*/'
			
 
				+	then
			
 
				+	    ok "struct" ${o}
			
 
				+	else
			
 
				+	    ko "struct" ${o}
			
 
				 	fi
			
 
				     done
			
 
				     echo
			
 
				 fi
			
 
				 
			
 
				 if [ "$1" == "--enum" ] || [ "$1" == "" ] ; then
			
 
				-    starpu_enums=$(grep "enum starpu" $STARPU_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
			
 
				-    sc_enums=$(grep "enum starpu" $SC_H_FILES | grep -v "[;|,|(|)]" | awk '{print $2}')
			
 
				-    for enum in $starpu_enums $sc_enums ; do
			
 
				-	x=$(grep -F "\\enum $enum" $dirname/../chapters/api/*.doxy)
			
 
				-	if test "$x" == "" ; then
			
 
				-	    echo "enum ${redcolor}${enum}${stcolor} is not (or incorrectly) documented"
			
 
				+    starpu=$(grep "^enum starpu_[a-z_]*$" $STARPU_H_FILES | awk '{print $NF}')
			
 
				+    sc=$(grep "^enum sc_[a-z_]*$" $SC_H_FILES | awk '{print $NF}')
			
 
				+    for o in $starpu $sc ; do
			
 
				+	hfile=$(grep -l "^enum ${o}$" $STARPU_H_FILES $SC_H_FILES)
			
 
				+	x=$(grep -B1 "^enum ${o}$" $hfile | head -1)
			
 
				+	if test "$x" == '*/'
			
 
				+	then
			
 
				+	    ok "enum" ${o}
			
 
				+	else
			
 
				+	    ko "enum" ${o}
			
 
				 	fi
			
 
				     done
			
 
				     echo
			
 
				 fi
			
 
				 
			
 
				 if [ "$1" == "--macro" ] || [ "$1" == "" ] ; then
			
 
				-    macros=$(grep "define\b" $STARPU_H_FILES $SC_H_FILES |grep -v deprecated|grep "#" | grep -v "__" | sed 's/#[ ]*/#/g' | awk '{print $2}' | awk -F'(' '{print $1}' | sort|uniq)
			
 
				-    for macro in $macros ; do
			
 
				-	x=$(grep -F "\\def $macro" $dirname/../chapters/api/*.doxy)
			
 
				-	if test "$x" == "" ; then
			
 
				-	    echo "macro ${redcolor}${macro}${stcolor} is not (or incorrectly) documented"
			
 
				+    macros=$(grep "define\b" $STARPU_H_FILES $SC_H_FILES |grep -v deprecated|grep "#" | grep -v "__" | sed 's/#[ ]*/#/g' | awk '{print $2}' | awk -F'(' '{print $1}' | grep -i starpu | sort|uniq)
			
 
				+    for o in $macros ; do
			
 
				+	hfile=$(grep -l "define\b ${o}" $STARPU_H_FILES $SC_H_FILES)
			
 
				+	x=$(grep -B1 "define\b ${o}" $hfile | head -1)
			
 
				+	if test "$x" == '*/'
			
 
				+	then
			
 
				+	    ok "define" ${o}
			
 
				+	else
			
 
				+	    ko "define" ${o}
			
 
				 	fi
			
 
				     done
			
 
				     echo
			
@@ -96,8 +123,9 @@ if [ "$1" == "--var" ] || [ "$1" == "" ] ; then
 
				     for variable in $variables ; do
			
 
				 	x=$(grep "$variable" $dirname/../chapters/501_environment_variables.doxy | grep "\\anchor")
			
 
				 	if test "$x" == "" ; then
			
 
				-	    echo "variable ${redcolor}${variable}${stcolor} is not (or incorrectly) documented"
			
 
				+	    ko "variable" $variable
			
 
				+	else
			
 
				+	    ok "variable" $variable
			
 
				 	fi
			
 
				     done
			
 
				 fi
			
 
				-
			
--- a/doc/doxygen/dev/starpu_funcs.cocci
+++ b/doc/doxygen/dev/starpu_funcs.cocci
@@ -1,28 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2013,2015,2017                           CNRS
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-@starpufunc@
			
 
				-position p;
			
 
				-type t;
			
 
				-identifier f =~ "starpu";
			
 
				-@@
			
 
				-
			
 
				-t f@p( ... );
			
 
				-
			
 
				-@ script:python @
			
 
				-p << starpufunc.p;
			
 
				-f << starpufunc.f;
			
 
				-@@
			
 
				-print "%s,%s:%s" % (f,p[0].file,p[0].line)
			
--- a/doc/doxygen/doxygen-config.cfg.in
+++ b/doc/doxygen/doxygen-config.cfg.in
@@ -26,6 +26,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 
				 	 		 @top_srcdir@/include/starpu_clusters.h \
			
 
				 			 @top_srcdir@/include/starpu_cusparse.h \
			
 
				 			 @top_srcdir@/include/starpu_cublas.h \
			
 
				+			 @top_srcdir@/include/starpu_cublas_v2.h \
			
 
				 			 @top_srcdir@/include/starpu_cuda.h \
			
 
				 			 @top_srcdir@/include/starpu_data_filters.h \
			
 
				 			 @top_srcdir@/include/starpu_data.h \
			
@@ -39,6 +40,7 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 
				 			 @top_srcdir@/include/starpu_hash.h \
			
 
				 			 @top_srcdir@/include/starpu_helper.h \
			
 
				 			 @top_srcdir@/include/starpu_mic.h \
			
 
				+			 @top_srcdir@/include/starpu_mpi_ms.h \
			
 
				 			 @top_srcdir@/include/starpu_mod.f90 \
			
 
				 			 @top_srcdir@/include/starpu_opencl.h \
			
 
				 			 @top_srcdir@/include/starpu_openmp.h \
			
@@ -71,7 +73,13 @@ INPUT                  = @top_srcdir@/doc/doxygen/chapters \
 
				 			 @top_srcdir@/mpi/include/fstarpu_mpi_mod.f90 \
			
 
				 			 @top_srcdir@/starpufft/include/starpufft.h \
			
 
				 			 @top_srcdir@/sc_hypervisor/include \
			
 
				-			 @top_srcdir@/starpurm/include/starpurm.h
			
 
				+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_config.h \
			
 
				+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_policy.h \
			
 
				+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_lp.h  \
			
 
				+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor.h \
			
 
				+			 @top_srcdir@/sc_hypervisor/include/sc_hypervisor_monitoring.h \
			
 
				+			 @top_srcdir@/starpurm/include/starpurm.h \
			
 
				+			 @top_srcdir@/include/schedulers/starpu_heteroprio.h
			
 
				 
			
 
				 EXAMPLE_PATH           = @top_srcdir@/doc/doxygen \
			
 
				 		       	 @top_srcdir@/doc/doxygen/chapters \
			
--- a/doc/doxygen/doxygen.cfg
+++ b/doc/doxygen/doxygen.cfg
@@ -1627,6 +1627,7 @@ PREDEFINED             = STARPU_USE_OPENCL=1 \
 
				 			 STARPU_MKL=1 \
			
 
				 			 STARPU_WORKER_CALLBACKS=1 \
			
 
				 			 STARPU_HAVE_GLPK_H=1 \
			
 
				+			 STARPU_USE_MPI_MASTER_SLAVE=1 \
			
 
				                          __GCC__
			
 
				 
			
 
				 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
			
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -262,6 +262,9 @@ Documentation License”.
 
				 \input{group__API__Modularized__Scheduler}
			
 
				 \input{group__API__Clustering__Machine}
			
 
				 \input{group__API__Interop__Support}
			
 
				+\input{group__API__Master__Slave}
			
 
				+\input{group__API__Random__Functions}
			
 
				+\input{group__API__Sink}
			
 
				 
			
 
				 \chapter{File Index}
			
 
				 \input{files}
			
@@ -276,6 +279,7 @@ Documentation License”.
 
				 \input{starpu__clusters_8h}
			
 
				 \input{starpu__config_8h}
			
 
				 \input{starpu__cublas_8h}
			
 
				+\input{starpu__cublas__v2_8h}
			
 
				 \input{starpu__cusparse_8h}
			
 
				 \input{starpu__cuda_8h}
			
 
				 \input{starpu__data_8h}
			
@@ -287,6 +291,8 @@ Documentation License”.
 
				 \input{starpu__expert_8h}
			
 
				 \input{starpu__fxt_8h}
			
 
				 \input{starpu__hash_8h}
			
 
				+\input{starpu__helper_8h}
			
 
				+\input{starpu__heteroprio_8h}
			
 
				 \input{starpu__mic_8h}
			
 
				 \input{starpu__mod_8f90}
			
 
				 \input{starpu__mpi_8h}
			
--- a/examples/cholesky/cholesky.sh
+++ b/examples/cholesky/cholesky.sh
@@ -17,7 +17,7 @@
 
				 
			
 
				 ROOT=${0%.sh}
			
 
				 #[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS=`$(dirname $0)/../../tools/starpu_sched_display`
			
 
				-[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS="dmdas modular-heft modular-heft-prio dmdar dmda dmdasd prio lws"
			
 
				+[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS="dmdas modular-heft modular-heft-prio dmdap dmdar dmda dmdasd prio lws"
			
 
				 [ -n "$STARPU_HOSTNAME" ] || export STARPU_HOSTNAME=mirage
			
 
				 unset MALLOC_PERTURB_
			
 
				 
			
--- a/mpi/examples/Makefile.am
+++ b/mpi/examples/Makefile.am
@@ -140,6 +140,10 @@ examplebin_PROGRAMS += 			\
 
				 	mpi_lu/plu_outofcore_example_float	\
			
 
				 	mpi_lu/plu_outofcore_example_double
			
 
				 
			
 
				+starpu_mpi_EXAMPLES	+=	\
			
 
				+	mpi_lu/plu_implicit_example_float	\
			
 
				+	mpi_lu/plu_implicit_example_double
			
 
				+
			
 
				 mpi_lu_plu_example_float_LDADD =	\
			
 
				 	$(STARPU_LIBNUMA_LDFLAGS)				\
			
 
				 	$(STARPU_BLAS_LDFLAGS) -lm
			
@@ -279,6 +283,7 @@ if BUILD_EXAMPLES
 
				 if !STARPU_SANITIZE
			
 
				 examplebin_PROGRAMS +=		\
			
 
				 	native_fortran/nf_mm	\
			
 
				+	native_fortran/nf_mm_task_build	\
			
 
				 	native_fortran/nf_basic_ring
			
 
				 
			
 
				 native_fortran_nf_mm_SOURCES	=			\
			
@@ -290,6 +295,15 @@ native_fortran_nf_mm_SOURCES	=			\
 
				 native_fortran_nf_mm_LDADD =					\
			
 
				 	-lm
			
 
				 
			
 
				+native_fortran_nf_mm_task_build_SOURCES	=			\
			
 
				+	native_fortran/nf_mm_cl.f90			\
			
 
				+	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90	\
			
 
				+	$(top_srcdir)/include/fstarpu_mod.f90		\
			
 
				+	native_fortran/nf_mm_task_build.f90
			
 
				+
			
 
				+native_fortran_nf_mm_task_build_LDADD =					\
			
 
				+	-lm
			
 
				+
			
 
				 native_fortran_nf_basic_ring_SOURCES	=			\
			
 
				 	$(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90	\
			
 
				 	$(top_srcdir)/include/fstarpu_mod.f90		\
			
@@ -301,6 +315,7 @@ native_fortran_nf_basic_ring_LDADD =					\
 
				 if !STARPU_SIMGRID
			
 
				 starpu_mpi_EXAMPLES +=				\
			
 
				 	native_fortran/nf_mm			\
			
 
				+	native_fortran/nf_mm_task_build		\
			
 
				 	native_fortran/nf_basic_ring
			
 
				 endif
			
 
				 endif
			
@@ -396,6 +411,9 @@ nf_mm_cl.o: $(top_srcdir)/mpi/examples/native_fortran/nf_mm_cl.f90 fstarpu_mpi_m
 
				 nf_mm.o: $(top_srcdir)/mpi/examples/native_fortran/nf_mm.f90 nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod
			
 
				 	$(AM_V_FC)$(FC) $(native_fortran_nf_mm_FCFLAGS) $(FCFLAGS) -c -o $@ `test -f 'native_fortran/nf_mm.f90' || echo '$(srcdir)/'`native_fortran/nf_mm.f90
			
 
				 
			
 
				+nf_mm_task_build.o: $(top_srcdir)/mpi/examples/native_fortran/nf_mm_task_build.f90 nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod
			
 
				+	$(AM_V_FC)$(FC) $(native_fortran_nf_mm__task_build_FCFLAGS) $(FCFLAGS) -c -o $@ `test -f 'native_fortran/nf_mm_task_build.f90' || echo '$(srcdir)/'`native_fortran/nf_mm_task_build.f90
			
 
				+
			
 
				 nf_basic_ring.o: $(top_srcdir)/mpi/examples/native_fortran/nf_basic_ring.f90 fstarpu_mpi_mod.mod fstarpu_mod.mod
			
 
				 	$(AM_V_FC)$(FC) $(native_fortran_nf_basic_ring_FCFLAGS) $(FCFLAGS) -c -o $@ `test -f 'native_fortran/nf_basic_ring.f90' || echo '$(srcdir)/'`native_fortran/nf_basic_ring.f90
			
 
				 endif
			
--- a/mpi/examples/complex/mpi_complex.c
+++ b/mpi/examples/complex/mpi_complex.c
@@ -112,11 +112,12 @@ int main(int argc, char **argv)
 
				 	}
			
 
				 	else if (rank == 1)
			
 
				 	{
			
 
				+		MPI_Status status;
			
 
				 		starpu_data_handle_t xhandle;
			
 
				 		double xreal = 14.0;
			
 
				 		double ximaginary = 18.0;
			
 
				 		starpu_complex_data_register(&xhandle, STARPU_MAIN_RAM, &xreal, &ximaginary, 1);
			
 
				-		starpu_mpi_recv(xhandle, 0, 10, MPI_COMM_WORLD, NULL);
			
 
				+		starpu_mpi_recv(xhandle, 0, 10, MPI_COMM_WORLD, &status);
			
 
				 		starpu_data_unregister(xhandle);
			
 
				 		FPRINTF(stderr, "[received] real %f imaginary %f\n", xreal, ximaginary);
			
 
				 		STARPU_ASSERT_MSG(xreal == 4 && ximaginary == 8, "Incorrect received value\n");
			
--- a/mpi/examples/filters/filter.c
+++ b/mpi/examples/filters/filter.c
@@ -168,9 +168,4 @@ int main(int argc, char **argv)
 
				 	starpu_mpi_shutdown();
			
 
				 
			
 
				 	return ok;
			
 
				-
			
 
				-enodev:
			
 
				-	FPRINTF(stderr, "WARNING: No one can execute this task\n");
			
 
				-	starpu_shutdown();
			
 
				-	return 77;
			
 
				 }
			
--- a/mpi/examples/mpi_lu/plu_implicit_example.c
+++ b/mpi/examples/mpi_lu/plu_implicit_example.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2018                                CNRS
			
 
				+ * Copyright (C) 2010-2019                                CNRS
			
 
				  * Copyright (C) 2010,2011,2013-2015,2017,2018            Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  * Copyright (C) 2012,2013                                Inria
			
@@ -34,8 +34,8 @@
 
				 static unsigned long size = 4096;
			
 
				 static unsigned nblocks = 16;
			
 
				 static unsigned check = 0;
			
 
				-static int p = 1;
			
 
				-static int q = 1;
			
 
				+static int p = -1;
			
 
				+static int q = -1;
			
 
				 static unsigned display = 0;
			
 
				 static unsigned no_prio = 0;
			
 
				 
			
@@ -252,7 +252,14 @@ int main(int argc, char **argv)
 
				 	starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank);
			
 
				 	starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size);
			
 
				 
			
 
				-	STARPU_ASSERT(p*q == world_size);
			
 
				+	if (p == -1 && q==-1)
			
 
				+	{
			
 
				+		fprintf(stderr, "Setting default values for p and q\n");
			
 
				+		p = (q % 2 == 0) ? 2 : 1;
			
 
				+		q = world_size / p;
			
 
				+
			
 
				+	}
			
 
				+	STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size);
			
 
				 
			
 
				 	starpu_cublas_init();
			
 
				 
			
--- a/mpi/examples/native_fortran/nf_mm.f90
+++ b/mpi/examples/native_fortran/nf_mm.f90
@@ -1,6 +1,6 @@
 
				 ! StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 !
			
 
				-! Copyright (C) 2017                                     CNRS
			
 
				+! Copyright (C) 2017, 2019                               CNRS
			
 
				 ! Copyright (C) 2016                                     Inria
			
 
				 ! Copyright (C) 2016                                     Université de Bordeaux
			
 
				 !
			
@@ -23,7 +23,8 @@ program nf_mm
 
				         implicit none
			
 
				 
			
 
				         logical, parameter :: verbose = .false.
			
 
				-        integer(c_int) :: comm_rank, comm_size, comm_world
			
 
				+        integer(c_int) :: comm_size, comm_rank
			
 
				+        integer(c_int), target :: comm_world
			
 
				         integer(c_int) :: N = 16, BS = 4, NB
			
 
				         real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:)
			
 
				         type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:)
			
@@ -166,13 +167,13 @@ program nf_mm
 
				         end do
			
 
				 
			
 
				         do b_col=1,NB
			
 
				-        do b_row=1,NB
			
 
				-                ret = fstarpu_mpi_task_insert(comm_world, (/ cl_mm, &
			
 
				-                        FSTARPU_R,  dh_A(b_row), &
			
 
				-                        FSTARPU_R,  dh_B(b_col), &
			
 
				-                        FSTARPU_RW, dh_C(b_row,b_col), &
			
 
				-                        C_NULL_PTR /))
			
 
				-        end do
			
 
				+           do b_row=1,NB
			
 
				+              call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
			
 
				+                   FSTARPU_R,  dh_A(b_row), &
			
 
				+                   FSTARPU_R,  dh_B(b_col), &
			
 
				+                   FSTARPU_RW, dh_C(b_row,b_col), &
			
 
				+                   C_NULL_PTR /))
			
 
				+           end do
			
 
				         end do
			
 
				 
			
 
				         call fstarpu_task_wait_for_all()
			
--- a/mpi/examples/native_fortran/nf_mm_task_build.f90
+++ b/mpi/examples/native_fortran/nf_mm_task_build.f90
@@ -0,0 +1,248 @@
 
				+! StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+!
			
 
				+! Copyright (C) 2017, 2019                               CNRS
			
 
				+! Copyright (C) 2016                                     Inria
			
 
				+! Copyright (C) 2016                                     Université de Bordeaux
			
 
				+!
			
 
				+! StarPU is free software; you can redistribute it and/or modify
			
 
				+! it under the terms of the GNU Lesser General Public License as published by
			
 
				+! the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+! your option) any later version.
			
 
				+!
			
 
				+! StarPU is distributed in the hope that it will be useful, but
			
 
				+! WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+!
			
 
				+! See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+!
			
 
				+program nf_mm
			
 
				+        use iso_c_binding       ! C interfacing module
			
 
				+        use fstarpu_mod         ! StarPU interfacing module
			
 
				+        use fstarpu_mpi_mod     ! StarPU-MPI interfacing module
			
 
				+        use nf_mm_cl
			
 
				+        implicit none
			
 
				+
			
 
				+        logical, parameter :: verbose = .false.
			
 
				+        integer(c_int) :: comm_size, comm_rank
			
 
				+        integer(c_int), target :: comm_world
			
 
				+        integer(c_int) :: N = 16, BS = 4, NB
			
 
				+        real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:)
			
 
				+        type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:)
			
 
				+        type(c_ptr) :: cl_mm
			
 
				+        type(c_ptr) :: task
			
 
				+        integer(c_int) :: ncpu
			
 
				+        integer(c_int) :: ret
			
 
				+        integer(c_int) :: row, col
			
 
				+        integer(c_int) :: b_row, b_col
			
 
				+        integer(c_int) :: mr, tag, rank
			
 
				+
			
 
				+        ret = fstarpu_init(C_NULL_PTR)
			
 
				+        if (ret == -19) then
			
 
				+                stop 77
			
 
				+        else if (ret /= 0) then
			
 
				+                stop 1
			
 
				+        end if
			
 
				+
			
 
				+        ret = fstarpu_mpi_init(1)
			
 
				+        print *,"fstarpu_mpi_init status:", ret
			
 
				+        if (ret /= 0) then
			
 
				+                stop 1
			
 
				+        end if
			
 
				+
			
 
				+        ! stop there if no CPU worker available
			
 
				+        ncpu = fstarpu_cpu_worker_get_count()
			
 
				+        if (ncpu == 0) then
			
 
				+                call fstarpu_shutdown()
			
 
				+                stop 77
			
 
				+        end if
			
 
				+
			
 
				+        comm_world = fstarpu_mpi_world_comm()
			
 
				+        comm_size = fstarpu_mpi_world_size()
			
 
				+        comm_rank = fstarpu_mpi_world_rank()
			
 
				+
			
 
				+        if (comm_size < 2) then
			
 
				+                call fstarpu_shutdown()
			
 
				+                ret = fstarpu_mpi_shutdown()
			
 
				+                stop 77
			
 
				+        end if
			
 
				+
			
 
				+        ! TODO: process app's argc/argv
			
 
				+        NB = N/BS
			
 
				+
			
 
				+        ! allocate and initialize codelet
			
 
				+        cl_mm = fstarpu_codelet_allocate()
			
 
				+        call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char)
			
 
				+        call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult))
			
 
				+        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
			
 
				+        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
			
 
				+        call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW)
			
 
				+
			
 
				+        ! allocate matrices
			
 
				+        if (comm_rank == 0) then
			
 
				+                allocate(A(N,N))
			
 
				+                allocate(B(N,N))
			
 
				+                allocate(C(N,N))
			
 
				+        end if
			
 
				+
			
 
				+        ! init matrices
			
 
				+        if (comm_rank == 0) then
			
 
				+                do col=1,N
			
 
				+                do row=1,N
			
 
				+                if (row == col) then
			
 
				+                        A(row,col) = 2
			
 
				+                else
			
 
				+                        A(row,col) = 0
			
 
				+                end if
			
 
				+                B(row,col) = row*N+col
			
 
				+                C(row,col) = 0
			
 
				+                end do
			
 
				+                end do
			
 
				+
			
 
				+                if (verbose) then
			
 
				+                        print *,"A"
			
 
				+                        call mat_disp(A)
			
 
				+                        print *,"B"
			
 
				+                        call mat_disp(B)
			
 
				+                        print *,"C"
			
 
				+                        call mat_disp(C)
			
 
				+                end if
			
 
				+        end if
			
 
				+
			
 
				+        ! allocate data handles
			
 
				+        allocate(dh_A(NB))
			
 
				+        allocate(dh_B(NB))
			
 
				+        allocate(dh_C(NB,NB))
			
 
				+
			
 
				+        ! register matrices
			
 
				+        if (comm_rank == 0) then
			
 
				+                mr = 0 ! TODO: use STARPU_MAIN_RAM constant
			
 
				+        else
			
 
				+                mr = -1
			
 
				+        end if
			
 
				+        tag = 0
			
 
				+
			
 
				+        do b_row=1,NB
			
 
				+                if (comm_rank == 0) then
			
 
				+                        call fstarpu_matrix_data_register(dh_A(b_row), mr, &
			
 
				+                                c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1)))
			
 
				+                else
			
 
				+                        call fstarpu_matrix_data_register(dh_A(b_row), mr, &
			
 
				+                                c_null_ptr, N, BS, N, c_sizeof(A(1,1)))
			
 
				+                end if
			
 
				+                call fstarpu_mpi_data_register(dh_A(b_row), tag, 0)
			
 
				+                tag = tag+1
			
 
				+        end do
			
 
				+
			
 
				+        do b_col=1,NB
			
 
				+                if (comm_rank == 0) then
			
 
				+                        call fstarpu_matrix_data_register(dh_B(b_col), mr, &
			
 
				+                                c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1)))
			
 
				+                else
			
 
				+                        call fstarpu_matrix_data_register(dh_B(b_col), mr, &
			
 
				+                                c_null_ptr, N, N, BS, c_sizeof(B(1,1)))
			
 
				+                end if
			
 
				+                call fstarpu_mpi_data_register(dh_B(b_col), tag, 0)
			
 
				+                tag = tag+1
			
 
				+        end do
			
 
				+
			
 
				+        do b_col=1,NB
			
 
				+        do b_row=1,NB
			
 
				+                if (comm_rank == 0) then
			
 
				+                        call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
			
 
				+                                c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1)))
			
 
				+                else
			
 
				+                        call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, &
			
 
				+                                c_null_ptr, N, BS, BS, c_sizeof(C(1,1)))
			
 
				+                end if
			
 
				+                call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0)
			
 
				+                tag = tag+1
			
 
				+        end do
			
 
				+        end do
			
 
				+
			
 
				+        ! distribute matrix C
			
 
				+        do b_col=1,NB
			
 
				+        do b_row=1,NB
			
 
				+        rank = modulo(b_row+b_col, comm_size)
			
 
				+        call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank)
			
 
				+        end do
			
 
				+        end do
			
 
				+
			
 
				+        do b_col=1,NB
			
 
				+           do b_row=1,NB
			
 
				+              task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, &
			
 
				+                   				FSTARPU_R,  dh_A(b_row), &
			
 
				+                                                FSTARPU_R,  dh_B(b_col), &
			
 
				+                                                FSTARPU_RW, dh_C(b_row,b_col), &
			
 
				+                                                C_NULL_PTR /))
			
 
				+              if (c_associated(task)) then
			
 
				+                 ret = fstarpu_task_submit(task)
			
 
				+              endif
			
 
				+              call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, &
			
 
				+                   				FSTARPU_R,  dh_A(b_row), &
			
 
				+                                                FSTARPU_R,  dh_B(b_col), &
			
 
				+                                                FSTARPU_RW, dh_C(b_row,b_col), &
			
 
				+                                                C_NULL_PTR /))
			
 
				+           end do
			
 
				+        end do
			
 
				+
			
 
				+        call fstarpu_task_wait_for_all()
			
 
				+
			
 
				+        ! undistribute matrix C
			
 
				+        do b_col=1,NB
			
 
				+        do b_row=1,NB
			
 
				+        call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0)
			
 
				+        end do
			
 
				+        end do
			
 
				+
			
 
				+        ! unregister matrices
			
 
				+        do b_row=1,NB
			
 
				+                call fstarpu_data_unregister(dh_A(b_row))
			
 
				+        end do
			
 
				+
			
 
				+        do b_col=1,NB
			
 
				+                call fstarpu_data_unregister(dh_B(b_col))
			
 
				+        end do
			
 
				+
			
 
				+        do b_col=1,NB
			
 
				+        do b_row=1,NB
			
 
				+                call fstarpu_data_unregister(dh_C(b_row,b_col))
			
 
				+        end do
			
 
				+        end do
			
 
				+
			
 
				+        ! check result
			
 
				+        if (comm_rank == 0) then
			
 
				+                if (verbose) then
			
 
				+                        print *,"final C"
			
 
				+                        call mat_disp(C)
			
 
				+                end if
			
 
				+
			
 
				+                do col=1,N
			
 
				+                do row=1,N
			
 
				+                if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then
			
 
				+                        print *, "check failed"
			
 
				+                        stop 1
			
 
				+                end if
			
 
				+                end do
			
 
				+                end do
			
 
				+        end if
			
 
				+
			
 
				+        ! free handles
			
 
				+        deallocate(dh_A)
			
 
				+        deallocate(dh_B)
			
 
				+        deallocate(dh_C)
			
 
				+
			
 
				+        ! free matrices
			
 
				+        if (comm_rank == 0) then
			
 
				+                deallocate(A)
			
 
				+                deallocate(B)
			
 
				+                deallocate(C)
			
 
				+        end if
			
 
				+        call fstarpu_codelet_free(cl_mm)
			
 
				+        call fstarpu_shutdown()
			
 
				+
			
 
				+        ret = fstarpu_mpi_shutdown()
			
 
				+        print *,"fstarpu_mpi_shutdown status:", ret
			
 
				+        if (ret /= 0) then
			
 
				+                stop 1
			
 
				+        end if
			
 
				+end program nf_mm
			
--- a/mpi/examples/user_datatype/user_datatype.c
+++ b/mpi/examples/user_datatype/user_datatype.c
@@ -92,10 +92,11 @@ int main(int argc, char **argv)
 
				 	}
			
 
				 	else if (rank == 1)
			
 
				 	{
			
 
				+		MPI_Status status;
			
 
				 		struct starpu_my_data myx = {.d = 11 , .c = 'a'};
			
 
				 		starpu_data_handle_t handlex;
			
 
				 		starpu_my_data_register(&handlex, STARPU_MAIN_RAM, &myx);
			
 
				-		starpu_mpi_recv(handlex, 0, 10, MPI_COMM_WORLD, NULL);
			
 
				+		starpu_mpi_recv(handlex, 0, 10, MPI_COMM_WORLD, &status);
			
 
				 		starpu_data_unregister(handlex);
			
 
				 		FPRINTF(stderr, "[starpu mpi] myx.d=%d myx.c=%c\n", myx.d, myx.c);
			
 
				 		STARPU_ASSERT_MSG(myx.d == 98 && myx.c == 'z', "Incorrect received value\n");
			
--- a/mpi/include/fstarpu_mpi_mod.f90
+++ b/mpi/include/fstarpu_mpi_mod.f90
@@ -1,6 +1,6 @@
 
				 ! StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 !
			
 
				-! Copyright (C) 2017                                     CNRS
			
 
				+! Copyright (C) 2017,2019                                CNRS
			
 
				 ! Copyright (C) 2016                                     Inria
			
 
				 ! Copyright (C) 2016,2017                                Université de Bordeaux
			
 
				 !
			
@@ -247,34 +247,27 @@ module fstarpu_mpi_mod
 
				                 end function fstarpu_mpi_shutdown
			
 
				 
			
 
				                 ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
			
 
				-                function fstarpu_mpi_task_build(mpi_comm,arglist) bind(C)
			
 
				-                        use iso_c_binding, only: c_ptr,c_int
			
 
				+                function fstarpu_mpi_task_build(arglist) bind(C)
			
 
				+                        use iso_c_binding, only: c_ptr
			
 
				                         type(c_ptr) :: fstarpu_mpi_task_build
			
 
				-                        integer(c_int), value, intent(in) :: mpi_comm
			
 
				-                        type(c_ptr), dimension(:), intent(in) :: arglist
			
 
				+                        type(c_ptr), dimension(*), intent(in) :: arglist
			
 
				                 end function fstarpu_mpi_task_build
			
 
				 
			
 
				                 ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...);
			
 
				-                function fstarpu_mpi_task_post_build(mpi_comm,arglist) bind(C)
			
 
				-                        use iso_c_binding, only: c_ptr,c_int
			
 
				-                        integer(c_int) :: fstarpu_mpi_task_post_build
			
 
				-                        integer(c_int), value, intent(in) :: mpi_comm
			
 
				-                        type(c_ptr), dimension(:), intent(in) :: arglist
			
 
				-                end function fstarpu_mpi_task_post_build
			
 
				+                subroutine fstarpu_mpi_task_post_build(arglist) bind(C)
			
 
				+                        use iso_c_binding, only: c_ptr
			
 
				+                        type(c_ptr), dimension(*), intent(in) :: arglist
			
 
				+                end subroutine fstarpu_mpi_task_post_build
			
 
				 
			
 
				                 ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...);
			
 
				-                function fstarpu_mpi_task_insert(mpi_comm,arglist) bind(C)
			
 
				-                        use iso_c_binding, only: c_ptr,c_int
			
 
				-                        integer(c_int) :: fstarpu_mpi_task_insert
			
 
				-                        integer(c_int), value, intent(in) :: mpi_comm
			
 
				-                        type(c_ptr), dimension(:), intent(in) :: arglist
			
 
				-                end function fstarpu_mpi_task_insert
			
 
				-                function fstarpu_mpi_insert_task(mpi_comm,arglist) bind(C,name="fstarpu_mpi_task_insert")
			
 
				-                        use iso_c_binding, only: c_ptr,c_int
			
 
				-                        integer(c_int) :: fstarpu_mpi_insert_task
			
 
				-                        integer(c_int), value, intent(in) :: mpi_comm
			
 
				-                        type(c_ptr), dimension(:), intent(in) :: arglist
			
 
				-                end function fstarpu_mpi_insert_task
			
 
				+                subroutine fstarpu_mpi_task_insert(arglist) bind(C)
			
 
				+                        use iso_c_binding, only: c_ptr
			
 
				+                        type(c_ptr), dimension(*), intent(in) :: arglist
			
 
				+                end subroutine fstarpu_mpi_task_insert
			
 
				+                subroutine fstarpu_mpi_insert_task(arglist) bind(C,name="fstarpu_mpi_task_insert")
			
 
				+                        use iso_c_binding, only: c_ptr
			
 
				+                        type(c_ptr), dimension(*), intent(in) :: arglist
			
 
				+                end subroutine fstarpu_mpi_insert_task
			
 
				 
			
 
				                 ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node);
			
 
				                 subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C)
			
--- a/mpi/src/Makefile.am
+++ b/mpi/src/Makefile.am
@@ -1,7 +1,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2012,2016                                Inria
			
 
				-# Copyright (C) 2010-2018                                CNRS
			
 
				+# Copyright (C) 2010-2019                                CNRS
			
 
				 # Copyright (C) 2009-2014,2018                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -73,6 +73,8 @@ noinst_HEADERS =					\
 
				 	mpi/starpu_mpi_comm.h				\
			
 
				 	mpi/starpu_mpi_tag.h				\
			
 
				 	mpi/starpu_mpi_driver.h				\
			
 
				+	mpi/starpu_mpi_mpi_backend.h			\
			
 
				+	nmad/starpu_mpi_nmad_backend.h			\
			
 
				 	load_balancer/policy/data_movements_interface.h	\
			
 
				 	load_balancer/policy/load_data_interface.h	\
			
 
				 	load_balancer/policy/load_balancer_policy.h
			
@@ -94,7 +96,9 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 
				 	starpu_mpi_task_insert_fortran.c		\
			
 
				 	starpu_mpi_init.c				\
			
 
				 	nmad/starpu_mpi_nmad.c				\
			
 
				+	nmad/starpu_mpi_nmad_backend.c			\
			
 
				 	mpi/starpu_mpi_mpi.c				\
			
 
				+	mpi/starpu_mpi_mpi_backend.c			\
			
 
				 	mpi/starpu_mpi_early_data.c			\
			
 
				 	mpi/starpu_mpi_early_request.c			\
			
 
				 	mpi/starpu_mpi_sync_data.c			\
			
--- a/mpi/src/load_balancer/policy/load_heat_propagation.c
+++ b/mpi/src/load_balancer/policy/load_heat_propagation.c
@@ -290,12 +290,12 @@ static void update_data_ranks()
 
				 				//        fprintf(stderr,"Bring back data %p (tag %d) from node %d on node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], starpu_mpi_data_get_rank(handle), my_rank);
			
 
				 				//}
			
 
				 
			
 
				-				_STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%"PRIi64"d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
			
 
				+				_STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%"PRIi64",%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank);
			
 
				 
			
 
				 				/* Migrate the data handle */
			
 
				 				starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL);
			
 
				 
			
 
				-				_STARPU_DEBUG("New rank (%d) of data %"PRIi64"d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
			
 
				+				_STARPU_DEBUG("New rank (%d) of data %"PRIi64" upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank);
			
 
				 				starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD);
			
 
				 			}
			
 
				 		}
			
--- a/mpi/src/mpi/starpu_mpi_comm.h
+++ b/mpi/src/mpi/starpu_mpi_comm.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2015-2017                                CNRS
			
 
				+ * Copyright (C) 2015-2017, 2019                          CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -23,6 +23,8 @@
 
				 
			
 
				 #ifdef STARPU_USE_MPI_MPI
			
 
				 
			
 
				+#include <mpi/starpu_mpi_mpi_backend.h>
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
 
				 {
			
--- a/mpi/src/mpi/starpu_mpi_early_data.c
+++ b/mpi/src/mpi/starpu_mpi_early_data.c
@@ -18,6 +18,7 @@
 
				 #include <stdlib.h>
			
 
				 #include <starpu_mpi.h>
			
 
				 #include <mpi/starpu_mpi_early_data.h>
			
 
				+#include <mpi/starpu_mpi_mpi_backend.h>
			
 
				 #include <starpu_mpi_private.h>
			
 
				 #include <common/uthash.h>
			
 
				 
			
--- a/mpi/src/mpi/starpu_mpi_mpi.c
+++ b/mpi/src/mpi/starpu_mpi_mpi.c
@@ -166,7 +166,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 
			
 
				 	_STARPU_MPI_INC_POSTED_REQUESTS(-1);
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(0, "new req %p srcdst %d tag %"PRIi64"d and type %s %d\n", req, req->node_tag.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->is_internal_req);
			
 
				+	_STARPU_MPI_DEBUG(0, "new req %p srcdst %d tag %"PRIi64" and type %s %d\n", req, req->node_tag.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req);
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
			
 
				 
			
@@ -178,7 +178,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 		 * pointer associated to the data_handle, and push it into the
			
 
				 		 * ready_requests list, so as the real MPI request can be submitted
			
 
				 		 * before the next submission of the envelope-catching request. */
			
 
				-		if (req->is_internal_req)
			
 
				+		if (req->backend->is_internal_req)
			
 
				 		{
			
 
				 			_starpu_mpi_datatype_allocate(req->data_handle, req);
			
 
				 			if (req->registered_datatype == 1)
			
@@ -192,7 +192,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 				_STARPU_MPI_MALLOC(req->ptr, req->count);
			
 
				 			}
			
 
				 
			
 
				-			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %"PRIi64"d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				+			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				 					  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
			
 
				 					  req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				 			_starpu_mpi_req_list_push_front(&ready_recv_requests, req);
			
@@ -200,10 +200,10 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 
			
 
				 			/* inform the starpu mpi thread that the request has been pushed in the ready_requests list */
			
 
				 			STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
			
 
				-			STARPU_PTHREAD_MUTEX_LOCK(&req->posted_mutex);
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&req->backend->posted_mutex);
			
 
				 			req->posted = 1;
			
 
				-			STARPU_PTHREAD_COND_BROADCAST(&req->posted_cond);
			
 
				-			STARPU_PTHREAD_MUTEX_UNLOCK(&req->posted_mutex);
			
 
				+			STARPU_PTHREAD_COND_BROADCAST(&req->backend->posted_cond);
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->posted_mutex);
			
 
				 			STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
			
 
				 		}
			
 
				 		else
			
@@ -224,11 +224,11 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 				STARPU_PTHREAD_MUTEX_UNLOCK(&(early_data_handle->req_mutex));
			
 
				 				STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex);
			
 
				 
			
 
				-				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %"PRIi64"d has already been received, copying previously received data into handle's pointer..\n", req, req->node_tag.data_tag);
			
 
				+				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %"PRIi64" has already been received, copying previously received data into handle's pointer..\n", req, req->node_tag.data_tag);
			
 
				 				STARPU_ASSERT(req->data_handle != early_data_handle->handle);
			
 
				 
			
 
				-				req->internal_req = early_data_handle->req;
			
 
				-				req->early_data_handle = early_data_handle;
			
 
				+				req->backend->internal_req = early_data_handle->req;
			
 
				+				req->backend->early_data_handle = early_data_handle;
			
 
				 
			
 
				 				struct _starpu_mpi_early_data_cb_args *cb_args;
			
 
				 				_STARPU_MPI_MALLOC(cb_args, sizeof(struct _starpu_mpi_early_data_cb_args));
			
@@ -246,7 +246,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 			else
			
 
				 			{
			
 
				 				struct _starpu_mpi_req *sync_req = _starpu_mpi_sync_data_find(req->node_tag.data_tag, req->node_tag.rank, req->node_tag.comm);
			
 
				-				_STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %"PRIi64"d and src %d = %p\n", req->node_tag.data_tag, req->node_tag.rank, sync_req);
			
 
				+				_STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %"PRIi64" and src %d = %p\n", req->node_tag.data_tag, req->node_tag.rank, sync_req);
			
 
				 				if (sync_req)
			
 
				 				{
			
 
				 					req->sync = 1;
			
@@ -268,7 +268,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				-					_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %"PRIi64"d) into the request hashmap\n", req, req->node_tag.rank, req->node_tag.data_tag);
			
 
				+					_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %"PRIi64") into the request hashmap\n", req, req->node_tag.rank, req->node_tag.data_tag);
			
 
				 					_starpu_mpi_early_request_enqueue(req);
			
 
				 				}
			
 
				 			}
			
@@ -281,7 +281,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 
				 		else
			
 
				 			_starpu_mpi_req_list_push_front(&ready_recv_requests, req);
			
 
				 		_STARPU_MPI_INC_READY_REQUESTS(+1);
			
 
				-		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %"PRIi64"d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				+		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				 				  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
			
 
				 				  req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				 	}
			
@@ -359,7 +359,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(0, "post MPI isend request %p type %s tag %"PRIi64"d src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
			
 
				+	_STARPU_MPI_DEBUG(0, "post MPI isend request %p type %s tag %"PRIi64" src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync);
			
 
				 
			
 
				 	_starpu_mpi_comm_amounts_inc(req->node_tag.comm, req->node_tag.rank, req->datatype, req->count);
			
 
				 
			
@@ -368,27 +368,27 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 
				 	if (req->sync == 0)
			
 
				 	{
			
 
				 		_STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.comm);
			
 
				-		req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->data_request);
			
 
				+		req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->backend->data_request);
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				 		_STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.comm);
			
 
				-		req->ret = MPI_Issend(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.comm, &req->data_request);
			
 
				+		req->ret = MPI_Issend(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.comm, &req->backend->data_request);
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Issend returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 	}
			
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-	_starpu_mpi_simgrid_wait_req(&req->data_request, &req->status_store, &req->queue, &req->done);
			
 
				+	_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
			
 
				 #endif
			
 
				 
			
 
				 	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, starpu_data_get_size(req->data_handle), req->pre_sync_jobid);
			
 
				 
			
 
				 	/* somebody is perhaps waiting for the MPI request to be posted */
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 	req->submitted = 1;
			
 
				-	STARPU_PTHREAD_COND_BROADCAST(&req->req_cond);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 
			
 
				 	_starpu_mpi_handle_detached_request(req);
			
 
				 
			
@@ -399,54 +399,55 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 
				 {
			
 
				 	_starpu_mpi_datatype_allocate(req->data_handle, req);
			
 
				 
			
 
				-	_STARPU_MPI_CALLOC(req->envelope, 1,sizeof(struct _starpu_mpi_envelope));
			
 
				-	req->envelope->mode = _STARPU_MPI_ENVELOPE_DATA;
			
 
				-	req->envelope->data_tag = req->node_tag.data_tag;
			
 
				-	req->envelope->sync = req->sync;
			
 
				+	_STARPU_MPI_CALLOC(req->backend->envelope, 1,sizeof(struct _starpu_mpi_envelope));
			
 
				+	req->backend->envelope->mode = _STARPU_MPI_ENVELOPE_DATA;
			
 
				+	req->backend->envelope->data_tag = req->node_tag.data_tag;
			
 
				+	req->backend->envelope->sync = req->sync;
			
 
				 
			
 
				 	if (req->registered_datatype == 1)
			
 
				 	{
			
 
				-		int size;
			
 
				+		int size, ret;
			
 
				 		req->count = 1;
			
 
				 		req->ptr = starpu_data_handle_to_pointer(req->data_handle, STARPU_MAIN_RAM);
			
 
				 
			
 
				 		MPI_Type_size(req->datatype, &size);
			
 
				-		req->envelope->size = (starpu_ssize_t)req->count * size;
			
 
				+		req->backend->envelope->size = (starpu_ssize_t)req->count * size;
			
 
				 		_STARPU_MPI_DEBUG(20, "Post MPI isend count (%ld) datatype_size %ld request to %d\n",req->count,starpu_data_get_size(req->data_handle), req->node_tag.rank);
			
 
				-		_STARPU_MPI_COMM_TO_DEBUG(req->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->envelope->data_tag, req->node_tag.comm);
			
 
				-		MPI_Isend(req->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->size_req);
			
 
				+		_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.comm);
			
 
				+		ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->backend->size_req);
			
 
				+		STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending envelope, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret));
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				 		int ret;
			
 
				 
			
 
				  		// Do not pack the data, just try to find out the size
			
 
				-		starpu_data_pack(req->data_handle, NULL, &(req->envelope->size));
			
 
				+		starpu_data_pack(req->data_handle, NULL, &(req->backend->envelope->size));
			
 
				 
			
 
				-		if (req->envelope->size != -1)
			
 
				+		if (req->backend->envelope->size != -1)
			
 
				  		{
			
 
				  			// We already know the size of the data, let's send it to overlap with the packing of the data
			
 
				-			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", req->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
			
 
				-			req->count = req->envelope->size;
			
 
				-			_STARPU_MPI_COMM_TO_DEBUG(req->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->envelope->data_tag, req->node_tag.comm);
			
 
				-			ret = MPI_Isend(req->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->size_req);
			
 
				+			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
			
 
				+			req->count = req->backend->envelope->size;
			
 
				+			_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.comm);
			
 
				+			ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->backend->size_req);
			
 
				 			STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret));
			
 
				  		}
			
 
				 
			
 
				  		// Pack the data
			
 
				  		starpu_data_pack(req->data_handle, &req->ptr, &req->count);
			
 
				-		if (req->envelope->size == -1)
			
 
				+		if (req->backend->envelope->size == -1)
			
 
				  		{
			
 
				  			// We know the size now, let's send it
			
 
				-			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (second call to pack)\n", req->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
			
 
				-			_STARPU_MPI_COMM_TO_DEBUG(req->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->envelope->data_tag, req->node_tag.comm);
			
 
				-			ret = MPI_Isend(req->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->size_req);
			
 
				+			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (second call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
			
 
				+			_STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.comm);
			
 
				+			ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.comm, &req->backend->size_req);
			
 
				 			STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret));
			
 
				  		}
			
 
				  		else
			
 
				  		{
			
 
				  			// We check the size returned with the 2 calls to pack is the same
			
 
				-			STARPU_MPI_ASSERT_MSG(req->count == req->envelope->size, "Calls to pack_data returned different sizes %ld != %ld", req->count, req->envelope->size);
			
 
				+			STARPU_MPI_ASSERT_MSG(req->count == req->backend->envelope->size, "Calls to pack_data returned different sizes %ld != %ld", req->count, req->backend->envelope->size);
			
 
				  		}
			
 
				 		// We can send the data now
			
 
				 	}
			
@@ -473,7 +474,7 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(0, "post MPI irecv request %p type %s tag %"PRIi64"d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				+	_STARPU_MPI_DEBUG(0, "post MPI irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				 
			
 
				 	_STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
			
 
				 
			
@@ -494,14 +495,14 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 
				 	if (req->sync)
			
 
				 	{
			
 
				 		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.comm);
			
 
				-		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.comm, &req->data_request);
			
 
				+		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.comm, &req->backend->data_request);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				 		_STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.comm);
			
 
				-		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->data_request);
			
 
				+		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->backend->data_request);
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-		_starpu_mpi_simgrid_wait_req(&req->data_request, &req->status_store, &req->queue, &req->done);
			
 
				+		_starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done);
			
 
				 #endif
			
 
				 	}
			
 
				 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
@@ -509,10 +510,10 @@ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req)
 
				 	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag);
			
 
				 
			
 
				 	/* somebody is perhaps waiting for the MPI request to be posted */
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 	req->submitted = 1;
			
 
				-	STARPU_PTHREAD_COND_BROADCAST(&req->req_cond);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 
			
 
				 	_starpu_mpi_handle_detached_request(req);
			
 
				 
			
@@ -529,16 +530,16 @@ void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 	/* Which is the mpi request we are waiting for ? */
			
 
				-	struct _starpu_mpi_req *req = waiting_req->other_request;
			
 
				+	struct _starpu_mpi_req *req = waiting_req->backend->other_request;
			
 
				 
			
 
				 	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
			
 
				-	if (req->data_request != MPI_REQUEST_NULL)
			
 
				+	if (req->backend->data_request != MPI_REQUEST_NULL)
			
 
				 	{
			
 
				 		// TODO: Fix for STARPU_SIMGRID
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 		STARPU_MPI_ASSERT_MSG(0, "Implement this in STARPU_SIMGRID");
			
 
				 #endif
			
 
				-		req->ret = MPI_Wait(&req->data_request, waiting_req->status);
			
 
				+		req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status);
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 	}
			
 
				 	_STARPU_MPI_TRACE_UWAIT_END(req->node_tag.rank, req->node_tag.data_tag);
			
@@ -558,34 +559,34 @@ int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 
				 
			
 
				 	/* We cannot try to complete a MPI request that was not actually posted
			
 
				 	 * to MPI yet. */
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&(req->req_mutex));
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&(req->backend->req_mutex));
			
 
				 	while (!(req->submitted))
			
 
				-		STARPU_PTHREAD_COND_WAIT(&(req->req_cond), &(req->req_mutex));
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&(req->req_mutex));
			
 
				+		STARPU_PTHREAD_COND_WAIT(&(req->backend->req_cond), &(req->backend->req_mutex));
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&(req->backend->req_mutex));
			
 
				 
			
 
				 	/* Initialize the request structure */
			
 
				 	 _starpu_mpi_request_init(&waiting_req);
			
 
				 	waiting_req->prio = INT_MAX;
			
 
				 	waiting_req->status = status;
			
 
				-	waiting_req->other_request = req;
			
 
				+	waiting_req->backend->other_request = req;
			
 
				 	waiting_req->func = _starpu_mpi_wait_func;
			
 
				 	waiting_req->request_type = WAIT_REQ;
			
 
				 
			
 
				 	_starpu_mpi_submit_ready_request_inc(waiting_req);
			
 
				 
			
 
				 	/* We wait for the MPI request to finish */
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 	while (!req->completed)
			
 
				-		STARPU_PTHREAD_COND_WAIT(&req->req_cond, &req->req_mutex);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+		STARPU_PTHREAD_COND_WAIT(&req->backend->req_cond, &req->backend->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 
			
 
				 	ret = req->ret;
			
 
				 
			
 
				 	/* The internal request structure was automatically allocated */
			
 
				 	*public_req = NULL;
			
 
				-	if (req->internal_req)
			
 
				+	if (req->backend->internal_req)
			
 
				 	{
			
 
				-		_starpu_mpi_request_destroy(req->internal_req);
			
 
				+		_starpu_mpi_request_destroy(req->backend->internal_req);
			
 
				 	}
			
 
				 	_starpu_mpi_request_destroy(req);
			
 
				 	_starpu_mpi_request_destroy(waiting_req);
			
@@ -604,9 +605,9 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 	/* Which is the mpi request we are testing for ? */
			
 
				-	struct _starpu_mpi_req *req = testing_req->other_request;
			
 
				+	struct _starpu_mpi_req *req = testing_req->backend->other_request;
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(0, "Test request %p type %s tag %"PRIi64"d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				+	_STARPU_MPI_DEBUG(0, "Test request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
			
 
				 			  req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				 
			
@@ -616,7 +617,7 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
				 	req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, testing_req->flag);
			
 
				 	memcpy(testing_req->status, &req->status_store, sizeof(*testing_req->status));
			
 
				 #else
			
 
				-	req->ret = MPI_Test(&req->data_request, testing_req->flag, testing_req->status);
			
 
				+	req->ret = MPI_Test(&req->backend->data_request, testing_req->flag, testing_req->status);
			
 
				 #endif
			
 
				 
			
 
				 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
@@ -629,10 +630,10 @@ void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
				 		_starpu_mpi_handle_request_termination(req);
			
 
				 	}
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&testing_req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&testing_req->backend->req_mutex);
			
 
				 	testing_req->completed = 1;
			
 
				-	STARPU_PTHREAD_COND_SIGNAL(&testing_req->req_cond);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->req_mutex);
			
 
				+	STARPU_PTHREAD_COND_SIGNAL(&testing_req->backend->req_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->backend->req_mutex);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
@@ -647,9 +648,9 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 
			
 
				 	STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 	unsigned submitted = req->submitted;
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 
			
 
				 	if (submitted)
			
 
				 	{
			
@@ -660,7 +661,7 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 		testing_req->prio = INT_MAX;
			
 
				 		testing_req->flag = flag;
			
 
				 		testing_req->status = status;
			
 
				-		testing_req->other_request = req;
			
 
				+		testing_req->backend->other_request = req;
			
 
				 		testing_req->func = _starpu_mpi_test_func;
			
 
				 		testing_req->completed = 0;
			
 
				 		testing_req->request_type = TEST_REQ;
			
@@ -668,10 +669,10 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 		_starpu_mpi_submit_ready_request_inc(testing_req);
			
 
				 
			
 
				 		/* We wait for the test request to finish */
			
 
				-		STARPU_PTHREAD_MUTEX_LOCK(&(testing_req->req_mutex));
			
 
				+		STARPU_PTHREAD_MUTEX_LOCK(&(testing_req->backend->req_mutex));
			
 
				 		while (!(testing_req->completed))
			
 
				-			STARPU_PTHREAD_COND_WAIT(&(testing_req->req_cond), &(testing_req->req_mutex));
			
 
				-		STARPU_PTHREAD_MUTEX_UNLOCK(&(testing_req->req_mutex));
			
 
				+			STARPU_PTHREAD_COND_WAIT(&(testing_req->backend->req_cond), &(testing_req->backend->req_mutex));
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&(testing_req->backend->req_mutex));
			
 
				 
			
 
				 		ret = testing_req->ret;
			
 
				 
			
@@ -681,9 +682,9 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 			 * request structure which was automatically allocated
			
 
				 			 * */
			
 
				 			*public_req = NULL;
			
 
				-			if (req->internal_req)
			
 
				+			if (req->backend->internal_req)
			
 
				 			{
			
 
				-				_starpu_mpi_request_destroy(req->internal_req);
			
 
				+				_starpu_mpi_request_destroy(req->backend->internal_req);
			
 
				 			}
			
 
				 			_starpu_mpi_request_destroy(req);
			
 
				 		}
			
@@ -758,10 +759,10 @@ int _starpu_mpi_barrier(MPI_Comm comm)
 
				 	_starpu_mpi_submit_ready_request(barrier_req);
			
 
				 
			
 
				 	/* We wait for the MPI request to finish */
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&barrier_req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&barrier_req->backend->req_mutex);
			
 
				 	while (!barrier_req->completed)
			
 
				-		STARPU_PTHREAD_COND_WAIT(&barrier_req->req_cond, &barrier_req->req_mutex);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier_req->req_mutex);
			
 
				+		STARPU_PTHREAD_COND_WAIT(&barrier_req->backend->req_cond, &barrier_req->backend->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&barrier_req->backend->req_mutex);
			
 
				 
			
 
				 	_starpu_mpi_request_destroy(barrier_req);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
@@ -795,14 +796,14 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %"PRIi64"d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n",
			
 
				+	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n",
			
 
				 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle, req->ptr,
			
 
				-			  req->datatype_name, (int)req->count, req->registered_datatype, req->internal_req);
			
 
				+			  req->datatype_name, (int)req->count, req->registered_datatype, req->backend->internal_req);
			
 
				 
			
 
				-	if (req->internal_req)
			
 
				+	if (req->backend->internal_req)
			
 
				 	{
			
 
				-		free(req->early_data_handle);
			
 
				-		req->early_data_handle = NULL;
			
 
				+		free(req->backend->early_data_handle);
			
 
				+		req->backend->early_data_handle = NULL;
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
@@ -816,7 +817,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
				 					// has completed, as MPI can re-order messages, let's call
			
 
				 					// MPI_Wait to make sure data have been sent
			
 
				 					int ret;
			
 
				-					ret = MPI_Wait(&req->size_req, MPI_STATUS_IGNORE);
			
 
				+					ret = MPI_Wait(&req->backend->size_req, MPI_STATUS_IGNORE);
			
 
				 					STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret));
			
 
				 					free(req->ptr);
			
 
				 					req->ptr = NULL;
			
@@ -838,10 +839,10 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
				 
			
 
				 	_starpu_mpi_release_req_data(req);
			
 
				 
			
 
				-	if (req->envelope)
			
 
				+	if (req->backend->envelope)
			
 
				 	{
			
 
				-		free(req->envelope);
			
 
				-		req->envelope = NULL;
			
 
				+		free(req->backend->envelope);
			
 
				+		req->backend->envelope = NULL;
			
 
				 	}
			
 
				 
			
 
				 	/* Execute the specified callback, if any */
			
@@ -850,10 +851,10 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
				 
			
 
				 	/* tell anyone potentially waiting on the request that it is
			
 
				 	 * terminated now */
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 	req->completed = 1;
			
 
				-	STARPU_PTHREAD_COND_BROADCAST(&req->req_cond);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+	STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
@@ -902,18 +903,18 @@ static void _starpu_mpi_early_data_cb(void* arg)
 
				 		if (args->req->detached)
			
 
				 		{
			
 
				 			/* have the internal request destroyed now or when completed */
			
 
				-			STARPU_PTHREAD_MUTEX_LOCK(&args->req->internal_req->req_mutex);
			
 
				-			if (args->req->internal_req->to_destroy)
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&args->req->backend->internal_req->backend->req_mutex);
			
 
				+			if (args->req->backend->internal_req->backend->to_destroy)
			
 
				 			{
			
 
				 				/* The request completed first, can now destroy it */
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->internal_req->req_mutex);
			
 
				-				_starpu_mpi_request_destroy(args->req->internal_req);
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->internal_req->backend->req_mutex);
			
 
				+				_starpu_mpi_request_destroy(args->req->backend->internal_req);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				 				/* The request didn't complete yet, tell it to destroy it when it completes */
			
 
				-				args->req->internal_req->to_destroy = 1;
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->internal_req->req_mutex);
			
 
				+				args->req->backend->internal_req->backend->to_destroy = 1;
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->internal_req->backend->req_mutex);
			
 
				 			}
			
 
				 			_starpu_mpi_handle_request_termination(args->req);
			
 
				 			_starpu_mpi_request_destroy(args->req);
			
@@ -924,11 +925,11 @@ static void _starpu_mpi_early_data_cb(void* arg)
 
				 			// be handled when calling starpu_mpi_wait
			
 
				 			// We store in the application request the internal MPI
			
 
				 			// request so that it can be used by starpu_mpi_wait
			
 
				-			args->req->data_request = args->req->internal_req->data_request;
			
 
				-			STARPU_PTHREAD_MUTEX_LOCK(&args->req->req_mutex);
			
 
				+			args->req->backend->data_request = args->req->backend->internal_req->backend->data_request;
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&args->req->backend->req_mutex);
			
 
				 			args->req->submitted = 1;
			
 
				-			STARPU_PTHREAD_COND_BROADCAST(&args->req->req_cond);
			
 
				-			STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->req_mutex);
			
 
				+			STARPU_PTHREAD_COND_BROADCAST(&args->req->backend->req_cond);
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->req_mutex);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -958,12 +959,12 @@ static void _starpu_mpi_test_detached_requests(void)
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
			
 
				 
			
 
				 		_STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
			
 
				-		//_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64"d - TYPE %s %d\n", &req->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.rank);
			
 
				+		//_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.rank);
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 		req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag);
			
 
				 #else
			
 
				-		STARPU_MPI_ASSERT_MSG(req->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL");
			
 
				-		req->ret = MPI_Test(&req->data_request, &flag, MPI_STATUS_IGNORE);
			
 
				+		STARPU_MPI_ASSERT_MSG(req->backend->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL");
			
 
				+		req->ret = MPI_Test(&req->backend->data_request, &flag, MPI_STATUS_IGNORE);
			
 
				 #endif
			
 
				 
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
@@ -990,19 +991,19 @@ static void _starpu_mpi_test_detached_requests(void)
 
				 
			
 
				 			_STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.rank, req->node_tag.data_tag);
			
 
				 
			
 
				-			STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex);
			
 
				 			/* We don't want to free internal non-detached
			
 
				 			   requests, we need to get their MPI request before
			
 
				 			   destroying them */
			
 
				-			if (req->is_internal_req && !req->to_destroy)
			
 
				+			if (req->backend->is_internal_req && !req->backend->to_destroy)
			
 
				 			{
			
 
				 				/* We have completed the request, let the application request destroy it */
			
 
				-				req->to_destroy = 1;
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+				req->backend->to_destroy = 1;
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&req->req_mutex);
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex);
			
 
				 				_starpu_mpi_request_destroy(req);
			
 
				 			}
			
 
				 
			
@@ -1044,7 +1045,7 @@ static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req)
 
				 	STARPU_MPI_ASSERT_MSG(req, "Invalid request");
			
 
				 
			
 
				 	/* submit the request to MPI */
			
 
				-	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %"PRIi64"d src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				+	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n",
			
 
				 			  req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.rank, req->data_handle,
			
 
				 			  req->ptr, req->datatype_name, (int)req->count, req->registered_datatype);
			
 
				 	req->func(req);
			
@@ -1054,7 +1055,7 @@ static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req)
 
				 
			
 
				 static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope, MPI_Status status, MPI_Comm comm)
			
 
				 {
			
 
				-	_STARPU_MPI_DEBUG(20, "Request with tag %"PRIi64"d and source %d not found, creating a early_data_handle to receive incoming data..\n", envelope->data_tag, status.MPI_SOURCE);
			
 
				+	_STARPU_MPI_DEBUG(20, "Request with tag %"PRIi64" and source %d not found, creating a early_data_handle to receive incoming data..\n", envelope->data_tag, status.MPI_SOURCE);
			
 
				 	_STARPU_MPI_DEBUG(20, "Request sync %d\n", envelope->sync);
			
 
				 
			
 
				 	struct _starpu_mpi_early_data_handle* early_data_handle = _starpu_mpi_early_data_create(envelope, status.MPI_SOURCE, comm);
			
@@ -1084,7 +1085,7 @@ static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope
 
				 		//_starpu_mpi_early_data_add(early_data_handle);
			
 
				 	}
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_data_handle with tag %"PRIi64"d from comm %ld src %d ..\n",
			
 
				+	_STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_data_handle with tag %"PRIi64" from comm %ld src %d ..\n",
			
 
				 			  early_data_handle->node_tag.data_tag, (long int)comm, status.MPI_SOURCE);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
			
 
				 	early_data_handle->req = _starpu_mpi_irecv_common(early_data_handle->handle, status.MPI_SOURCE,
			
@@ -1095,10 +1096,10 @@ static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope
 
				 	// We wait until the request is pushed in the
			
 
				 	// ready_request list
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&(early_data_handle->req->posted_mutex));
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&(early_data_handle->req->backend->posted_mutex));
			
 
				 	while (!(early_data_handle->req->posted))
			
 
				-		STARPU_PTHREAD_COND_WAIT(&(early_data_handle->req->posted_cond), &(early_data_handle->req->posted_mutex));
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&(early_data_handle->req->posted_mutex));
			
 
				+		STARPU_PTHREAD_COND_WAIT(&(early_data_handle->req->backend->posted_cond), &(early_data_handle->req->backend->posted_mutex));
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&(early_data_handle->req->backend->posted_mutex));
			
 
				 
			
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning check if req_ready is still necessary
			
@@ -1305,7 +1306,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 				{
			
 
				 					struct _starpu_mpi_req *_sync_req = _starpu_mpi_sync_data_find(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm);
			
 
				 					_STARPU_MPI_DEBUG(20, "Sending data with tag %"PRIi64" to node %d\n", _sync_req->node_tag.data_tag, envelope_status.MPI_SOURCE);
			
 
				-					STARPU_MPI_ASSERT_MSG(envelope->data_tag == _sync_req->node_tag.data_tag, "Tag mismatch (envelope %"PRIi64"d != req %"PRIi64"d)\n",
			
 
				+					STARPU_MPI_ASSERT_MSG(envelope->data_tag == _sync_req->node_tag.data_tag, "Tag mismatch (envelope %"PRIi64" != req %"PRIi64")\n",
			
 
				 							      envelope->data_tag, _sync_req->node_tag.data_tag);
			
 
				 					STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
			
 
				 					_starpu_mpi_isend_data_func(_sync_req);
			
@@ -1313,7 +1314,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				-					_STARPU_MPI_DEBUG(3, "Searching for application request with tag %"PRIi64"d and source %d (size %ld)\n", envelope->data_tag, envelope_status.MPI_SOURCE, envelope->size);
			
 
				+					_STARPU_MPI_DEBUG(3, "Searching for application request with tag %"PRIi64" and source %d (size %ld)\n", envelope->data_tag, envelope_status.MPI_SOURCE, envelope->size);
			
 
				 
			
 
				 					struct _starpu_mpi_req *early_request = _starpu_mpi_early_request_dequeue(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm);
			
 
				 
			
@@ -1344,7 +1345,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 							new_req->callback_arg = NULL;
			
 
				 							new_req->func = _starpu_mpi_irecv_size_func;
			
 
				 							new_req->sequential_consistency = 1;
			
 
				-							new_req->is_internal_req = 0; // ????
			
 
				+							new_req->backend->is_internal_req = 0; // ????
			
 
				 							new_req->count = envelope->size;
			
 
				 							_starpu_mpi_sync_data_add(new_req);
			
 
				 						}
			
@@ -1360,7 +1361,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 					 * _starpu_mpi_handle_ready_request. */
			
 
				 					else
			
 
				 					{
			
 
				-						_STARPU_MPI_DEBUG(2000, "A matching application request has been found for the incoming data with tag %"PRIi64"d\n", envelope->data_tag);
			
 
				+						_STARPU_MPI_DEBUG(2000, "A matching application request has been found for the incoming data with tag %"PRIi64"\n", envelope->data_tag);
			
 
				 						_STARPU_MPI_DEBUG(2000, "Request sync %d\n", envelope->sync);
			
 
				 
			
 
				 						early_request->sync = envelope->sync;
			
--- a/mpi/src/mpi/starpu_mpi_mpi_backend.c
+++ b/mpi/src/mpi/starpu_mpi_mpi_backend.c
@@ -0,0 +1,117 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2017                                     Inria
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				+ * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <starpu_config.h>
			
 
				+#include <starpu_mpi.h>
			
 
				+#include <starpu_mpi_private.h>
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MPI
			
 
				+
			
 
				+#include <mpi/starpu_mpi_mpi_backend.h>
			
 
				+#include <mpi/starpu_mpi_tag.h>
			
 
				+#include <mpi/starpu_mpi_comm.h>
			
 
				+#include <mpi/starpu_mpi_comm.h>
			
 
				+#include <mpi/starpu_mpi_tag.h>
			
 
				+#include <mpi/starpu_mpi_driver.h>
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_init(struct starpu_conf *conf)
			
 
				+{
			
 
				+	_starpu_mpi_driver_init(conf);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_shutdown(void)
			
 
				+{
			
 
				+	_starpu_mpi_tag_shutdown();
			
 
				+	_starpu_mpi_comm_shutdown();
			
 
				+	_starpu_mpi_driver_shutdown();
			
 
				+}
			
 
				+
			
 
				+int _starpu_mpi_mpi_backend_reserve_core(void)
			
 
				+{
			
 
				+	return (starpu_get_env_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0) <= 0);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_request_init(struct _starpu_mpi_req *req)
			
 
				+{
			
 
				+	_STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend));
			
 
				+
			
 
				+	req->backend->data_request = 0;
			
 
				+
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&req->backend->req_mutex, NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT(&req->backend->req_cond, NULL);
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&req->backend->posted_mutex, NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT(&req->backend->posted_cond, NULL);
			
 
				+
			
 
				+	req->backend->other_request = NULL;
			
 
				+
			
 
				+	req->backend->size_req = 0;
			
 
				+	req->backend->internal_req = NULL;
			
 
				+	req->backend->is_internal_req = 0;
			
 
				+	req->backend->to_destroy = 1;
			
 
				+	req->backend->early_data_handle = NULL;
			
 
				+	req->backend->envelope = NULL;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_request_fill(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req)
			
 
				+{
			
 
				+	_starpu_mpi_comm_register(comm);
			
 
				+
			
 
				+	req->backend->is_internal_req = is_internal_req;
			
 
				+	/* For internal requests, we wait for both the request completion and the matching application request completion */
			
 
				+	req->backend->to_destroy = !is_internal_req;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_request_destroy(struct _starpu_mpi_req *req)
			
 
				+{
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&req->backend->req_mutex);
			
 
				+	STARPU_PTHREAD_COND_DESTROY(&req->backend->req_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&req->backend->posted_mutex);
			
 
				+	STARPU_PTHREAD_COND_DESTROY(&req->backend->posted_cond);
			
 
				+	free(req->backend);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_data_clear(starpu_data_handle_t data_handle)
			
 
				+{
			
 
				+	_starpu_mpi_tag_data_release(data_handle);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_data_register(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag)
			
 
				+{
			
 
				+	_starpu_mpi_tag_data_register(data_handle, data_tag);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_mpi_backend_comm_register(MPI_Comm comm)
			
 
				+{
			
 
				+	_starpu_mpi_comm_register(comm);
			
 
				+}
			
 
				+
			
 
				+struct _starpu_mpi_backend _mpi_backend =
			
 
				+{
			
 
				+ 	._starpu_mpi_backend_init = _starpu_mpi_mpi_backend_init,
			
 
				+ 	._starpu_mpi_backend_shutdown = _starpu_mpi_mpi_backend_shutdown,
			
 
				+	._starpu_mpi_backend_reserve_core = _starpu_mpi_mpi_backend_reserve_core,
			
 
				+	._starpu_mpi_backend_request_init = _starpu_mpi_mpi_backend_request_init,
			
 
				+	._starpu_mpi_backend_request_fill = _starpu_mpi_mpi_backend_request_fill,
			
 
				+	._starpu_mpi_backend_request_destroy = _starpu_mpi_mpi_backend_request_destroy,
			
 
				+	._starpu_mpi_backend_data_clear = _starpu_mpi_mpi_backend_data_clear,
			
 
				+	._starpu_mpi_backend_data_register = _starpu_mpi_mpi_backend_data_register,
			
 
				+	._starpu_mpi_backend_comm_register = _starpu_mpi_mpi_backend_comm_register
			
 
				+};
			
 
				+
			
 
				+#endif /* STARPU_USE_MPI_MPI*/
			
--- a/mpi/src/mpi/starpu_mpi_mpi_backend.h
+++ b/mpi/src/mpi/starpu_mpi_mpi_backend.h
@@ -0,0 +1,80 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2017                                     Inria
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				+ * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_MPI_MPI_BACKEND_H__
			
 
				+#define __STARPU_MPI_MPI_BACKEND_H__
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+#include <common/uthash.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MPI
			
 
				+
			
 
				+extern int _starpu_mpi_tag;
			
 
				+#define _STARPU_MPI_TAG_ENVELOPE  _starpu_mpi_tag
			
 
				+#define _STARPU_MPI_TAG_DATA      _starpu_mpi_tag+1
			
 
				+#define _STARPU_MPI_TAG_SYNC_DATA _starpu_mpi_tag+2
			
 
				+
			
 
				+enum _starpu_envelope_mode
			
 
				+{
			
 
				+	_STARPU_MPI_ENVELOPE_DATA=0,
			
 
				+	_STARPU_MPI_ENVELOPE_SYNC_READY=1
			
 
				+};
			
 
				+
			
 
				+struct _starpu_mpi_envelope
			
 
				+{
			
 
				+	enum _starpu_envelope_mode mode;
			
 
				+	starpu_ssize_t size;
			
 
				+	starpu_mpi_tag_t data_tag;
			
 
				+	unsigned sync;
			
 
				+};
			
 
				+
			
 
				+struct _starpu_mpi_req_backend
			
 
				+{
			
 
				+	MPI_Request data_request;
			
 
				+
			
 
				+	starpu_pthread_mutex_t req_mutex;
			
 
				+	starpu_pthread_cond_t req_cond;
			
 
				+	starpu_pthread_mutex_t posted_mutex;
			
 
				+	starpu_pthread_cond_t posted_cond;
			
 
				+	/* In the case of a Wait/Test request, we are going to post a request
			
 
				+	 * to test the completion of another request */
			
 
				+	struct _starpu_mpi_req *other_request;
			
 
				+
			
 
				+	MPI_Request size_req;
			
 
				+
			
 
				+	struct _starpu_mpi_envelope* envelope;
			
 
				+
			
 
				+	unsigned is_internal_req:1;
			
 
				+	unsigned to_destroy:1;
			
 
				+	struct _starpu_mpi_req *internal_req;
			
 
				+	struct _starpu_mpi_early_data_handle *early_data_handle;
			
 
				+     	UT_hash_handle hh;
			
 
				+};
			
 
				+
			
 
				+#endif // STARPU_USE_MPI_MPI
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif // __STARPU_MPI_MPI_BACKEND_H__
			
--- a/mpi/src/mpi/starpu_mpi_tag.c
+++ b/mpi/src/mpi/starpu_mpi_tag.c
@@ -89,7 +89,7 @@ void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t
 
				 	STARPU_ASSERT_MSG(!(_starpu_mpi_tag_get_data_handle_from_tag(data_tag)),
			
 
				 			  "There is already a data handle %p registered with the tag %ld\n", _starpu_mpi_tag_get_data_handle_from_tag(data_tag), data_tag);
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(42, "Adding handle %p with tag %"PRIi64"d in hashtable\n", handle, data_tag);
			
 
				+	_STARPU_MPI_DEBUG(42, "Adding handle %p with tag %"PRIi64" in hashtable\n", handle, data_tag);
			
 
				 
			
 
				 	entry->handle = handle;
			
 
				 	entry->data_tag = data_tag;
			
@@ -108,7 +108,7 @@ int _starpu_mpi_tag_data_release(starpu_data_handle_t handle)
 
				 {
			
 
				 	starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(handle);
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(42, "Removing handle %p with tag %"PRIi64"d from hashtable\n", handle, data_tag);
			
 
				+	_STARPU_MPI_DEBUG(42, "Removing handle %p with tag %"PRIi64" from hashtable\n", handle, data_tag);
			
 
				 
			
 
				 	if (data_tag != -1)
			
 
				 	{
			
@@ -116,7 +116,7 @@ int _starpu_mpi_tag_data_release(starpu_data_handle_t handle)
 
				 
			
 
				 		_starpu_spin_lock(&registered_tag_handles_lock);
			
 
				 		HASH_FIND(hh, registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), sizeof(tag_entry->data_tag), tag_entry);
			
 
				-		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %"PRIi64"d isn't in the hashmap !", handle, data_tag);
			
 
				+		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %"PRIi64" isn't in the hashmap !", handle, data_tag);
			
 
				 
			
 
				 		HASH_DEL(registered_tag_handles, tag_entry);
			
 
				 
			
--- a/mpi/src/nmad/starpu_mpi_nmad.c
+++ b/mpi/src/nmad/starpu_mpi_nmad.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				- * Copyright (C) 2010-2015,2017,2018                      CNRS
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				  * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
 
				  *
			
@@ -38,7 +38,7 @@
 
				 
			
 
				 #include <nm_sendrecv_interface.h>
			
 
				 #include <nm_mpi_nmad.h>
			
 
				-
			
 
				+#include "starpu_mpi_nmad_backend.h"
			
 
				 
			
 
				 static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,nm_sr_event_t event);
			
 
				 #ifdef STARPU_VERBOSE
			
@@ -96,18 +96,18 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 
				 
			
 
				 	struct nm_data_s data;
			
 
				 	nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count);
			
 
				-	nm_sr_send_init(req->session, &(req->data_request));
			
 
				-	nm_sr_send_pack_data(req->session, &(req->data_request), &data);
			
 
				-	nm_sr_send_set_priority(req->session, &req->data_request, req->prio);
			
 
				+	nm_sr_send_init(req->backend->session, &(req->backend->data_request));
			
 
				+	nm_sr_send_pack_data(req->backend->session, &(req->backend->data_request), &data);
			
 
				+	nm_sr_send_set_priority(req->backend->session, &req->backend->data_request, req->prio);
			
 
				 
			
 
				 	if (req->sync == 0)
			
 
				 	{
			
 
				-		req->ret = nm_sr_send_isend(req->session, &(req->data_request), req->gate, req->node_tag.data_tag);
			
 
				+		req->ret = nm_sr_send_isend(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag);
			
 
				 		STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Isend returning %d", req->ret);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		req->ret = nm_sr_send_issend(req->session, &(req->data_request), req->gate, req->node_tag.data_tag);
			
 
				+		req->ret = nm_sr_send_issend(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag);
			
 
				 		STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Issend returning %d", req->ret);
			
 
				 	}
			
 
				 
			
@@ -124,7 +124,7 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 
				 
			
 
				 	if (req->registered_datatype == 1)
			
 
				 	{
			
 
				-		req->waited = 1;
			
 
				+		req->backend->waited = 1;
			
 
				 		req->count = 1;
			
 
				 		req->ptr = starpu_data_handle_to_pointer(req->data_handle, STARPU_MAIN_RAM);
			
 
				 	}
			
@@ -132,7 +132,7 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 
				 	{
			
 
				 		starpu_ssize_t psize = -1;
			
 
				 		int ret;
			
 
				-		req->waited =2;
			
 
				+		req->backend->waited =2;
			
 
				 
			
 
				 		// Do not pack the data, just try to find out the size
			
 
				 		starpu_data_pack(req->data_handle, NULL, &psize);
			
@@ -142,10 +142,10 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 
				 			// We already know the size of the data, let's send it to overlap with the packing of the data
			
 
				 			_STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", psize, sizeof(req->count), "MPI_BYTE", req->node_tag.rank);
			
 
				 			req->count = psize;
			
 
				-			//ret = nm_sr_isend(nm_mpi_communicator_get_session(p_req->p_comm),nm_mpi_communicator_get_gate(p_comm,req->srcdst), req->mpi_tag,&req->count, sizeof(req->count), &req->size_req);
			
 
				-			ret = nm_sr_isend(req->session,req->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->size_req);
			
 
				+			//ret = nm_sr_isend(nm_mpi_communicator_get_session(p_req->p_comm),nm_mpi_communicator_get_gate(p_comm,req->srcdst), req->mpi_tag,&req->count, sizeof(req->count), &req->backend->size_req);
			
 
				+			ret = nm_sr_isend(req->backend->session,req->backend->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->backend->size_req);
			
 
				 
			
 
				-			//	ret = MPI_Isend(&req->count, sizeof(req->count), MPI_BYTE, req->srcdst, req->mpi_tag, req->comm, &req->size_req);
			
 
				+			//	ret = MPI_Isend(&req->count, sizeof(req->count), MPI_BYTE, req->srcdst, req->mpi_tag, req->comm, &req->backend->size_req);
			
 
				 			STARPU_ASSERT_MSG(ret == NM_ESUCCESS, "when sending size, nm_sr_isend returning %d", ret);
			
 
				 		}
			
 
				 
			
@@ -155,7 +155,7 @@ void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 
				 		{
			
 
				 			// We know the size now, let's send it
			
 
				 			_STARPU_MPI_DEBUG(1, "Sending size %ld (%ld %s) with tag %ld to node %d (second call to pack)\n", req->count, sizeof(req->count), "MPI_BYTE", req->node_tag.data_tag, req->node_tag.rank);
			
 
				-			ret = nm_sr_isend(req->session,req->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->size_req);
			
 
				+			ret = nm_sr_isend(req->backend->session,req->backend->gate, req->node_tag.data_tag,&req->count, sizeof(req->count), &req->backend->size_req);
			
 
				 			STARPU_ASSERT_MSG(ret == NM_ESUCCESS, "when sending size, nm_sr_isend returning %d", ret);
			
 
				 		}
			
 
				 		else
			
@@ -186,9 +186,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 
				 	//req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
			
 
				 	struct nm_data_s data;
			
 
				 	nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count);
			
 
				-	nm_sr_recv_init(req->session, &(req->data_request));
			
 
				-	nm_sr_recv_unpack_data(req->session, &(req->data_request), &data);
			
 
				-	nm_sr_recv_irecv(req->session, &(req->data_request), req->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL);
			
 
				+	nm_sr_recv_init(req->backend->session, &(req->backend->data_request));
			
 
				+	nm_sr_recv_unpack_data(req->backend->session, &(req->backend->data_request), &data);
			
 
				+	nm_sr_recv_irecv(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL);
			
 
				 
			
 
				 	_STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag);
			
 
				 
			
@@ -259,9 +259,9 @@ int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 
				 
			
 
				 	/* we must do a test_locked to avoid race condition :
			
 
				 	 * without req_cond could still be used and couldn't be freed)*/
			
 
				-	while (!req->completed || ! piom_cond_test_locked(&(req->req_cond),REQ_FINALIZED))
			
 
				+	while (!req->completed || ! piom_cond_test_locked(&(req->backend->req_cond),REQ_FINALIZED))
			
 
				 	{
			
 
				-		piom_cond_wait(&(req->req_cond),REQ_FINALIZED);
			
 
				+		piom_cond_wait(&(req->backend->req_cond),REQ_FINALIZED);
			
 
				 	}
			
 
				 
			
 
				 	if (status!=MPI_STATUS_IGNORE)
			
@@ -292,7 +292,7 @@ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
				 
			
 
				 	/* we must do a test_locked to avoid race condition :
			
 
				 	 * without req_cond could still be used and couldn't be freed)*/
			
 
				-	*flag = req->completed && piom_cond_test_locked(&(req->req_cond),REQ_FINALIZED);
			
 
				+	*flag = req->completed && piom_cond_test_locked(&(req->backend->req_cond),REQ_FINALIZED);
			
 
				 	if (*flag && status!=MPI_STATUS_IGNORE)
			
 
				 		_starpu_mpi_req_status(req,status);
			
 
				 
			
@@ -358,17 +358,17 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 
				 	{
			
 
				 		if (req->registered_datatype == 0)
			
 
				 		{
			
 
				-			if(req->waited == 1)
			
 
				+			if(req->backend->waited == 1)
			
 
				 			        nm_mpi_nmad_data_release(req->datatype);
			
 
				 			if (req->request_type == SEND_REQ)
			
 
				 			{
			
 
				-				req->waited--;
			
 
				+				req->backend->waited--;
			
 
				 				// We need to make sure the communication for sending the size
			
 
				 				// has completed, as MPI can re-order messages, let's count
			
 
				 				// recerived message.
			
 
				 				// FIXME concurent access.
			
 
				 				STARPU_ASSERT_MSG(event == NM_SR_EVENT_FINALIZED, "Callback with event %d", event);
			
 
				-				if(req->waited>0)
			
 
				+				if(req->backend->waited>0)
			
 
				 					return;
			
 
				 
			
 
				 			}
			
@@ -411,7 +411,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req,n
 
				 			/* tell anyone potentially waiting on the request that it is
			
 
				 			 * terminated now (should be done after the callback)*/
			
 
				 			req->completed = 1;
			
 
				-			piom_cond_signal(&req->req_cond, REQ_FINALIZED);
			
 
				+			piom_cond_signal(&req->backend->req_cond, REQ_FINALIZED);
			
 
				 		}
			
 
				 		int pending_remaining = STARPU_ATOMIC_ADD(&pending_request, -1);
			
 
				 		if (!running && !pending_remaining)
			
@@ -427,16 +427,16 @@ void _starpu_mpi_handle_request_termination_callback(nm_sr_event_t event, const
 
				 
			
 
				 static void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req)
			
 
				 {
			
 
				-	if(req->request_type == SEND_REQ && req->waited>1)
			
 
				+	if(req->request_type == SEND_REQ && req->backend->waited>1)
			
 
				 	{
			
 
				-		nm_sr_request_set_ref(&(req->size_req), req);
			
 
				-		nm_sr_request_monitor(req->session, &(req->size_req), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
			
 
				+		nm_sr_request_set_ref(&(req->backend->size_req), req);
			
 
				+		nm_sr_request_monitor(req->backend->session, &(req->backend->size_req), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
			
 
				 	}
			
 
				 	/* the if must be before, because the first callback can directly free
			
 
				-	* a detached request (the second callback free if req->waited>1). */
			
 
				-	nm_sr_request_set_ref(&(req->data_request), req);
			
 
				+	* a detached request (the second callback free if req->backend->waited>1). */
			
 
				+	nm_sr_request_set_ref(&(req->backend->data_request), req);
			
 
				 
			
 
				-	nm_sr_request_monitor(req->session, &(req->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
			
 
				+	nm_sr_request_monitor(req->backend->session, &(req->backend->data_request), NM_SR_EVENT_FINALIZED,_starpu_mpi_handle_request_termination_callback);
			
 
				 }
			
 
				 
			
 
				 void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends)
			
@@ -572,7 +572,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 		else
			
 
				 		{
			
 
				 			c->req->completed=1;
			
 
				-			piom_cond_signal(&(c->req->req_cond), REQ_FINALIZED);
			
 
				+			piom_cond_signal(&(c->req->backend->req_cond), REQ_FINALIZED);
			
 
				 		}
			
 
				 		STARPU_ATOMIC_ADD( &pending_request, -1);
			
 
				 		/* we signal that the request is completed.*/
			
@@ -685,12 +685,12 @@ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv)
 
				 			(strcmp(s_idle_hooks, "HOOK")   == 0) ? PIOM_POLL_POINT_HOOK :
			
 
				 			0;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	if(polling_point_prog)
			
 
				 	{
			
 
				 		starpu_progression_hook_register((unsigned (*)(void *))&piom_ltask_schedule, (void *)&polling_point_prog);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	if(polling_point_idle)
			
 
				 	{
			
 
				 		starpu_idle_hook_register((unsigned (*)(void *))&piom_ltask_schedule, (void *)&polling_point_idle);
			
--- a/mpi/src/nmad/starpu_mpi_nmad_backend.c
+++ b/mpi/src/nmad/starpu_mpi_nmad_backend.c
@@ -0,0 +1,87 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2017                                     Inria
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				+ * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include "starpu_mpi_nmad_backend.h"
			
 
				+#include <starpu_mpi_private.h>
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_NMAD
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_init(struct starpu_conf *conf)
			
 
				+{
			
 
				+	(void)conf;
			
 
				+	/* strat_prio is preferred for StarPU instead of default strat_aggreg */
			
 
				+	setenv("NMAD_STRATEGY", "prio", 0 /* do not overwrite user-supplied value, if set */);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_shutdown(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+int _starpu_mpi_nmad_backend_reserve_core(void)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_request_init(struct _starpu_mpi_req *req)
			
 
				+{
			
 
				+	_STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend));
			
 
				+	piom_cond_init(&req->backend->req_cond, 0);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_request_fill(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req)
			
 
				+{
			
 
				+	nm_mpi_nmad_dest(&req->backend->session, &req->backend->gate, comm, req->node_tag.rank);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_request_destroy(struct _starpu_mpi_req *req)
			
 
				+{
			
 
				+	piom_cond_destroy(&(req->backend->req_cond));
			
 
				+	free(req->backend);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_data_clear(starpu_data_handle_t data_handle)
			
 
				+{
			
 
				+	(void)data_handle;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_data_register(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag)
			
 
				+{
			
 
				+	(void)data_handle;
			
 
				+	(void)data_tag;
			
 
				+}
			
 
				+
			
 
				+void _starpu_mpi_nmad_backend_comm_register(MPI_Comm comm)
			
 
				+{
			
 
				+	(void)comm;
			
 
				+}
			
 
				+
			
 
				+struct _starpu_mpi_backend _mpi_backend =
			
 
				+{
			
 
				+ 	._starpu_mpi_backend_init = _starpu_mpi_nmad_backend_init,
			
 
				+ 	._starpu_mpi_backend_shutdown = _starpu_mpi_nmad_backend_shutdown,
			
 
				+	._starpu_mpi_backend_reserve_core = _starpu_mpi_nmad_backend_reserve_core,
			
 
				+	._starpu_mpi_backend_request_init = _starpu_mpi_nmad_backend_request_init,
			
 
				+	._starpu_mpi_backend_request_fill = _starpu_mpi_nmad_backend_request_fill,
			
 
				+	._starpu_mpi_backend_request_destroy = _starpu_mpi_nmad_backend_request_destroy,
			
 
				+	._starpu_mpi_backend_data_clear = _starpu_mpi_nmad_backend_data_clear,
			
 
				+	._starpu_mpi_backend_data_register = _starpu_mpi_nmad_backend_data_register,
			
 
				+	._starpu_mpi_backend_comm_register = _starpu_mpi_nmad_backend_comm_register
			
 
				+};
			
 
				+
			
 
				+#endif /* STARPU_USE_MPI_NMAD*/
			
--- a/mpi/src/nmad/starpu_mpi_nmad_backend.h
+++ b/mpi/src/nmad/starpu_mpi_nmad_backend.h
@@ -0,0 +1,51 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2017                                     Inria
			
 
				+ * Copyright (C) 2010-2015,2017,2018,2019                 CNRS
			
 
				+ * Copyright (C) 2009-2014,2017,2018-2019                 Université de Bordeaux
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_MPI_NMAD_BACKEND_H__
			
 
				+#define __STARPU_MPI_NMAD_BACKEND_H__
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_NMAD
			
 
				+
			
 
				+#include <nm_sendrecv_interface.h>
			
 
				+#include <nm_session_interface.h>
			
 
				+#include <nm_mpi_nmad.h>
			
 
				+
			
 
				+struct _starpu_mpi_req_backend
			
 
				+{
			
 
				+	nm_gate_t gate;
			
 
				+	nm_session_t session;
			
 
				+	nm_sr_request_t data_request;
			
 
				+	int waited;
			
 
				+	piom_cond_t req_cond;
			
 
				+	nm_sr_request_t size_req;
			
 
				+};
			
 
				+
			
 
				+#endif // STARPU_USE_MPI_NMAD
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif // __STARPU_MPI_NMAD_BACKEND_H__
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -36,11 +36,6 @@
 
				 #include <core/topology.h>
			
 
				 #include <core/workers.h>
			
 
				 
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-#include <mpi/starpu_mpi_comm.h>
			
 
				-#include <mpi/starpu_mpi_tag.h>
			
 
				-#endif
			
 
				-
			
 
				 static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
			
 
				 {
			
 
				 	/* Asynchronously request StarPU to fetch the data in main memory: when
			
@@ -49,10 +44,7 @@ static void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum sta
 
				 	starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, STARPU_MAIN_RAM, mode, _starpu_mpi_submit_ready_request, (void *)req, sequential_consistency, 1, &req->pre_sync_jobid, &req->post_sync_jobid);
			
 
				 }
			
 
				 
			
 
				-static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle,
			
 
				-							int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				-							unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				-							int sequential_consistency)
			
 
				+static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 {
			
 
				 	if (_starpu_mpi_fake_world_size != -1)
			
 
				 	{
			
@@ -66,9 +58,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 
				 	enum starpu_data_access_mode mode = STARPU_R;
			
 
				 #endif
			
 
				 
			
 
				-	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(
			
 
				-	                                      data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func,
			
 
				-					      sequential_consistency, 0, 0);
			
 
				+	struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, sequential_consistency, 0, 0);
			
 
				 	_starpu_mpi_req_willpost(req);
			
 
				 
			
 
				 	if (_starpu_mpi_use_coop_sends && detached == 1 && sync == 0 && callback == NULL)
			
@@ -253,9 +243,7 @@ int starpu_mpi_barrier(MPI_Comm comm)
 
				 
			
 
				 void _starpu_mpi_data_clear(starpu_data_handle_t data_handle)
			
 
				 {
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-	_starpu_mpi_tag_data_release(data_handle);
			
 
				-#endif
			
 
				+	_mpi_backend._starpu_mpi_backend_data_clear(data_handle);
			
 
				 	_starpu_mpi_cache_data_clear(data_handle);
			
 
				 	free(data_handle->mpi_data);
			
 
				 	data_handle->mpi_data = NULL;
			
@@ -289,9 +277,7 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 
				 
			
 
				 	if (data_tag != -1)
			
 
				 	{
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-		_starpu_mpi_tag_data_register(data_handle, data_tag);
			
 
				-#endif
			
 
				+		_mpi_backend._starpu_mpi_backend_data_register(data_handle, data_tag);
			
 
				 		mpi_data->node_tag.data_tag = data_tag;
			
 
				 	}
			
 
				 	if (rank != -1)
			
@@ -299,9 +285,6 @@ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_
 
				 		_STARPU_MPI_TRACE_DATA_SET_RANK(data_handle, rank);
			
 
				 		mpi_data->node_tag.rank = rank;
			
 
				 		mpi_data->node_tag.comm = comm;
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-		_starpu_mpi_comm_register(comm);
			
 
				-#endif
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/mpi/src/starpu_mpi_init.c
+++ b/mpi/src/starpu_mpi_init.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2016,2017                                Inria
			
 
				- * Copyright (C) 2010-2018                                CNRS
			
 
				+ * Copyright (C) 2010-2019                                CNRS
			
 
				  * Copyright (C) 2009-2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -32,12 +32,6 @@
 
				 #include <core/simgrid.h>
			
 
				 #include <core/task.h>
			
 
				 
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-#include <mpi/starpu_mpi_comm.h>
			
 
				-#include <mpi/starpu_mpi_tag.h>
			
 
				-#include <mpi/starpu_mpi_driver.h>
			
 
				-#endif
			
 
				-
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 static int _mpi_world_size;
			
 
				 static int _mpi_world_rank;
			
@@ -75,10 +69,6 @@ void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv)
 
				 	{
			
 
				 		STARPU_ASSERT_MSG(argc_argv->comm == MPI_COMM_WORLD, "It does not make sense to ask StarPU-MPI to initialize MPI while a non-world communicator was given");
			
 
				 		int thread_support;
			
 
				-#ifdef STARPU_USE_MPI_NMAD
			
 
				-		/* strat_prio is preferred for StarPU instead of default strat_aggreg */
			
 
				-		setenv("NMAD_STRATEGY", "prio", 0 /* do not overwrite user-supplied value, if set */);
			
 
				-#endif /* STARPU_USE_MPI_NMAD */
			
 
				 		_STARPU_DEBUG("Calling MPI_Init_thread\n");
			
 
				 		if (MPI_Init_thread(argc_argv->argc, argc_argv->argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS)
			
 
				 		{
			
@@ -189,11 +179,9 @@ int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm c
 
				 		conf = &localconf;
			
 
				 	}
			
 
				 
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-	_starpu_mpi_driver_init(conf);
			
 
				+	_mpi_backend._starpu_mpi_backend_init(conf);
			
 
				 
			
 
				-	if (starpu_get_env_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0) <= 0)
			
 
				-#endif
			
 
				+	if (_mpi_backend._starpu_mpi_backend_reserve_core())
			
 
				 	{
			
 
				 		/* Reserve a core for our progression thread */
			
 
				 		if (conf->reserve_ncpus == -1)
			
@@ -227,11 +215,7 @@ int starpu_mpi_shutdown(void)
 
				 	_starpu_mpi_comm_amounts_display(stderr, rank);
			
 
				 	_starpu_mpi_comm_amounts_shutdown();
			
 
				 	_starpu_mpi_cache_shutdown(world_size);
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-	_starpu_mpi_tag_shutdown();
			
 
				-	_starpu_mpi_comm_shutdown();
			
 
				-	_starpu_mpi_driver_shutdown();
			
 
				-#endif
			
 
				+
			
 
				 	if (_mpi_initialized_starpu)
			
 
				 		starpu_shutdown();
			
 
				 
			
--- a/mpi/src/starpu_mpi_private.h
+++ b/mpi/src/starpu_mpi_private.h
@@ -28,11 +28,6 @@
 
				 #include <common/prio_list.h>
			
 
				 #include <common/starpu_spinlock.h>
			
 
				 #include <core/simgrid.h>
			
 
				-#if defined(STARPU_USE_MPI_NMAD)
			
 
				-#include <pioman.h>
			
 
				-#include <nm_sendrecv_interface.h>
			
 
				-#include <nm_session_interface.h>
			
 
				-#endif
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 extern "C"
			
@@ -52,7 +47,7 @@ struct _starpu_simgrid_mpi_req
 
				 };
			
 
				 
			
 
				 int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag);
			
 
				-void _starpu_mpi_simgrid_wait_req(MPI_Request *request, 	MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done);
			
 
				+void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done);
			
 
				 #endif
			
 
				 
			
 
				 extern int _starpu_debug_rank;
			
@@ -73,7 +68,7 @@ extern int _starpu_mpi_use_coop_sends;
 
				 void _starpu_mpi_env_init(void);
			
 
				 
			
 
				 #ifdef STARPU_NO_ASSERT
			
 
				-#  define STARPU_MPI_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); }} while(0)
			
 
				+#  define STARPU_MPI_ASSERT_MSG(x, msg, ...) do { if (0) { (void) (x); }} while(0)
			
 
				 #else
			
 
				 #  if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS)
			
 
				 int _starpu_debug_rank;
			
@@ -107,32 +102,32 @@ int _starpu_debug_rank;
 
				 
			
 
				 #ifdef STARPU_MPI_VERBOSE
			
 
				 #  define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) \
			
 
				-	do								\
			
 
				-	{							\
			
 
				-	     	if (_starpu_mpi_comm_debug)			\
			
 
				-		{					\
			
 
				-     			int __size;			\
			
 
				-			char _comm_name[128];		\
			
 
				-			int _comm_name_len;		\
			
 
				-			int _rank;			    \
			
 
				+	do \
			
 
				+	{ \
			
 
				+	     	if (_starpu_mpi_comm_debug) \
			
 
				+		{ \
			
 
				+     			int __size; \
			
 
				+			char _comm_name[128]; \
			
 
				+			int _comm_name_len; \
			
 
				+			int _rank; \
			
 
				 			starpu_mpi_comm_rank(comm, &_rank); \
			
 
				-			MPI_Type_size(datatype, &__size);		\
			
 
				+			MPI_Type_size(datatype, &__size); \
			
 
				 			MPI_Comm_get_name(comm, _comm_name, &_comm_name_len); \
			
 
				 			fprintf(stderr, "[%d][starpu_mpi] :%d:%s:%d:%d:%ld:%s:%p:%ld:%d:%s:%d\n", _rank, _rank, way, node, tag, utag, _comm_name, ptr, count, __size, __starpu_func__ , __LINE__); \
			
 
				-			fflush(stderr);					\
			
 
				-		}							\
			
 
				+			fflush(stderr);	\
			
 
				+		} \
			
 
				 	} while(0);
			
 
				-#  define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) 	    _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, dest, tag, utag, comm, "-->")
			
 
				+#  define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, dest, tag, utag, comm, "-->")
			
 
				 #  define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm)  _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, source, tag, utag, comm, "<--")
			
 
				 #  define _STARPU_MPI_DEBUG(level, fmt, ...) \
			
 
				 	do \
			
 
				 	{								\
			
 
				 		if (!_starpu_silent && _starpu_debug_level_min <= level && level <= _starpu_debug_level_max)	\
			
 
				-		{							\
			
 
				+		{ \
			
 
				 			if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
			
 
				 			fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__,## __VA_ARGS__); \
			
 
				 			fflush(stderr); \
			
 
				-		}			\
			
 
				+		} \
			
 
				 	} while(0);
			
 
				 #else
			
 
				 #  define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way)  do { } while(0)
			
@@ -163,24 +158,6 @@ int _starpu_debug_rank;
 
				 #  define _STARPU_MPI_LOG_OUT()
			
 
				 #endif
			
 
				 
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-extern int _starpu_mpi_tag;
			
 
				-#define _STARPU_MPI_TAG_ENVELOPE  _starpu_mpi_tag
			
 
				-#define _STARPU_MPI_TAG_DATA      _starpu_mpi_tag+1
			
 
				-#define _STARPU_MPI_TAG_SYNC_DATA _starpu_mpi_tag+2
			
 
				-
			
 
				-#define _STARPU_MPI_ENVELOPE_DATA       0
			
 
				-#define _STARPU_MPI_ENVELOPE_SYNC_READY 1
			
 
				-
			
 
				-struct _starpu_mpi_envelope
			
 
				-{
			
 
				-	int mode;
			
 
				-	starpu_ssize_t size;
			
 
				-	starpu_mpi_tag_t data_tag;
			
 
				-	unsigned sync;
			
 
				-};
			
 
				-#endif /* STARPU_USE_MPI_MPI */
			
 
				-
			
 
				 enum _starpu_mpi_request_type
			
 
				 {
			
 
				 	SEND_REQ=0,
			
@@ -229,6 +206,7 @@ struct _starpu_mpi_data
 
				 
			
 
				 struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle);
			
 
				 
			
 
				+struct _starpu_mpi_req_backend;
			
 
				 struct _starpu_mpi_req;
			
 
				 LIST_TYPE(_starpu_mpi_req,
			
 
				 	/* description of the data at StarPU level */
			
@@ -243,22 +221,13 @@ LIST_TYPE(_starpu_mpi_req,
 
				 	starpu_ssize_t count;
			
 
				 	int registered_datatype;
			
 
				 
			
 
				+	struct _starpu_mpi_req_backend *backend;
			
 
				+
			
 
				 	/* who are we talking to ? */
			
 
				 	struct _starpu_mpi_node_tag node_tag;
			
 
				-#if defined(STARPU_USE_MPI_NMAD)
			
 
				-	nm_gate_t gate;
			
 
				-	nm_session_t session;
			
 
				-#endif
			
 
				-
			
 
				 	void (*func)(struct _starpu_mpi_req *);
			
 
				 
			
 
				 	MPI_Status *status;
			
 
				-#if defined(STARPU_USE_MPI_NMAD)
			
 
				-	nm_sr_request_t data_request;
			
 
				-	int waited;
			
 
				-#elif defined(STARPU_USE_MPI_MPI)
			
 
				-	MPI_Request data_request;
			
 
				-#endif
			
 
				 	struct _starpu_mpi_req_multilist_coop_sends coop_sends;
			
 
				 	struct _starpu_mpi_coop_sends *coop_sends_head;
			
 
				 
			
@@ -266,17 +235,6 @@ LIST_TYPE(_starpu_mpi_req,
 
				 	unsigned sync;
			
 
				 
			
 
				 	int ret;
			
 
				-#if defined(STARPU_USE_MPI_NMAD)
			
 
				-	piom_cond_t req_cond;
			
 
				-#elif defined(STARPU_USE_MPI_MPI)
			
 
				-	starpu_pthread_mutex_t req_mutex;
			
 
				-	starpu_pthread_cond_t req_cond;
			
 
				-	starpu_pthread_mutex_t posted_mutex;
			
 
				-	starpu_pthread_cond_t posted_cond;
			
 
				-	/* In the case of a Wait/Test request, we are going to post a request
			
 
				-	 * to test the completion of another request */
			
 
				-	struct _starpu_mpi_req *other_request;
			
 
				-#endif
			
 
				 
			
 
				 	enum _starpu_mpi_request_type request_type; /* 0 send, 1 recv */
			
 
				 
			
@@ -290,21 +248,6 @@ LIST_TYPE(_starpu_mpi_req,
 
				 	void (*callback)(void *);
			
 
				 
			
 
				         /* in the case of user-defined datatypes, we need to send the size of the data */
			
 
				-#if defined(STARPU_USE_MPI_NMAD)
			
 
				-	nm_sr_request_t size_req;
			
 
				-#elif defined(STARPU_USE_MPI_MPI)
			
 
				-	MPI_Request size_req;
			
 
				-#endif
			
 
				-
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-	struct _starpu_mpi_envelope* envelope;
			
 
				-
			
 
				-	unsigned is_internal_req:1;
			
 
				-	unsigned to_destroy:1;
			
 
				-	struct _starpu_mpi_req *internal_req;
			
 
				-	struct _starpu_mpi_early_data_handle *early_data_handle;
			
 
				-     	UT_hash_handle hh;
			
 
				-#endif
			
 
				 
			
 
				 	int sequential_consistency;
			
 
				 
			
@@ -346,13 +289,12 @@ void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, in
 
				 void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req);
			
 
				 void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
			
 
				 struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle,
			
 
				-						       int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				-						       unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				-						       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				-						       int sequential_consistency,
			
 
				-						       int is_internal_req,
			
 
				-						       starpu_ssize_t count);
			
 
				-
			
 
				+						  int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
			
 
				+						  unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
			
 
				+						  enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				+						  int sequential_consistency,
			
 
				+						  int is_internal_req,
			
 
				+						  starpu_ssize_t count);
			
 
				 
			
 
				 void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req);
			
 
				 void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req);
			
@@ -380,6 +322,23 @@ void _starpu_mpi_wait_for_initialization();
 
				 #endif
			
 
				 void _starpu_mpi_data_flush(starpu_data_handle_t data_handle);
			
 
				 
			
 
				+/*
			
 
				+ * Specific functions to backend implementation
			
 
				+ */
			
 
				+struct _starpu_mpi_backend
			
 
				+{
			
 
				+	void (*_starpu_mpi_backend_init)(struct starpu_conf *conf);
			
 
				+	void (*_starpu_mpi_backend_shutdown)(void);
			
 
				+	int (*_starpu_mpi_backend_reserve_core)(void);
			
 
				+	void (*_starpu_mpi_backend_request_init)(struct _starpu_mpi_req *req);
			
 
				+	void (*_starpu_mpi_backend_request_fill)(struct _starpu_mpi_req *req, MPI_Comm comm, int is_internal_req);
			
 
				+	void (*_starpu_mpi_backend_request_destroy)(struct _starpu_mpi_req *req);
			
 
				+	void (*_starpu_mpi_backend_data_clear)(starpu_data_handle_t data_handle);
			
 
				+	void (*_starpu_mpi_backend_data_register)(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag);
			
 
				+	void (*_starpu_mpi_backend_comm_register)(MPI_Comm comm);
			
 
				+};
			
 
				+
			
 
				+extern struct _starpu_mpi_backend _mpi_backend;
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/mpi/src/starpu_mpi_req.c
+++ b/mpi/src/starpu_mpi_req.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2018                                CNRS
			
 
				+ * Copyright (C) 2010-2019                                CNRS
			
 
				  * Copyright (C) 2009-2019                                Université de Bordeaux
			
 
				  * Copyright (C) 2012,2013,2016,2017                      Inria
			
 
				  * Copyright (C) 2017                                     Guillaume Beauchamp
			
@@ -19,13 +19,6 @@
 
				 
			
 
				 #include <starpu.h>
			
 
				 #include <starpu_mpi_private.h>
			
 
				-#if defined(STARPU_USE_MPI_MPI)
			
 
				-#include <mpi/starpu_mpi_comm.h>
			
 
				-#endif
			
 
				-#if defined(STARPU_USE_MPI_NMAD)
			
 
				-#include <pioman.h>
			
 
				-#include <nm_mpi_nmad.h>
			
 
				-#endif
			
 
				 
			
 
				 void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
			
 
				 {
			
@@ -48,21 +41,10 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 
				 	(*req)->func = NULL;
			
 
				 
			
 
				 	(*req)->status = NULL;
			
 
				-#ifdef STARPU_USE_MPI_MPI
			
 
				-	(*req)->data_request = 0;
			
 
				-#endif
			
 
				 	(*req)->flag = NULL;
			
 
				 	_starpu_mpi_req_multilist_init_coop_sends(*req);
			
 
				 
			
 
				 	(*req)->ret = -1;
			
 
				-#ifdef STARPU_USE_MPI_NMAD
			
 
				-	piom_cond_init(&((*req)->req_cond), 0);
			
 
				-#elif defined(STARPU_USE_MPI_MPI)
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
			
 
				-	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
			
 
				-#endif
			
 
				 
			
 
				 	(*req)->request_type = UNKNOWN_REQ;
			
 
				 
			
@@ -70,23 +52,11 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 
				 	(*req)->completed = 0;
			
 
				 	(*req)->posted = 0;
			
 
				 
			
 
				-#ifdef STARPU_USE_MPI_MPI
			
 
				-	(*req)->other_request = NULL;
			
 
				-#endif
			
 
				-
			
 
				 	(*req)->sync = 0;
			
 
				 	(*req)->detached = -1;
			
 
				 	(*req)->callback = NULL;
			
 
				 	(*req)->callback_arg = NULL;
			
 
				 
			
 
				-#ifdef STARPU_USE_MPI_MPI
			
 
				-	(*req)->size_req = 0;
			
 
				-	(*req)->internal_req = NULL;
			
 
				-	(*req)->is_internal_req = 0;
			
 
				-	(*req)->to_destroy = 1;
			
 
				-	(*req)->early_data_handle = NULL;
			
 
				-	(*req)->envelope = NULL;
			
 
				-#endif
			
 
				 	(*req)->sequential_consistency = 1;
			
 
				 	(*req)->pre_sync_jobid = -1;
			
 
				 	(*req)->post_sync_jobid = -1;
			
@@ -96,6 +66,7 @@ void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 
				 	starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &((*req)->queue));
			
 
				 	(*req)->done = 0;
			
 
				 #endif
			
 
				+	_mpi_backend._starpu_mpi_backend_request_init(*req);
			
 
				 }
			
 
				 
			
 
				 struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handle,
			
@@ -108,10 +79,6 @@ struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handl
 
				 {
			
 
				 	struct _starpu_mpi_req *req;
			
 
				 
			
 
				-#ifdef STARPU_USE_MPI_MPI
			
 
				-	_starpu_mpi_comm_register(comm);
			
 
				-#endif
			
 
				-
			
 
				 	/* Initialize the request structure */
			
 
				 	_starpu_mpi_request_init(&req);
			
 
				 	req->request_type = request_type;
			
@@ -128,30 +95,18 @@ struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handl
 
				 	req->callback_arg = arg;
			
 
				 	req->func = func;
			
 
				 	req->sequential_consistency = sequential_consistency;
			
 
				-#ifdef STARPU_USE_MPI_NMAD
			
 
				-	nm_mpi_nmad_dest(&req->session, &req->gate, comm, req->node_tag.rank);
			
 
				-#elif defined(STARPU_USE_MPI_MPI)
			
 
				-	req->is_internal_req = is_internal_req;
			
 
				-	/* For internal requests, we wait for both the request completion and the matching application request completion */
			
 
				-	req->to_destroy = !is_internal_req;
			
 
				 	req->count = count;
			
 
				-#endif
			
 
				+
			
 
				+	_mpi_backend._starpu_mpi_backend_request_fill(req, comm, is_internal_req);
			
 
				 
			
 
				 	return req;
			
 
				 }
			
 
				 
			
 
				 void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
			
 
				 {
			
 
				-#ifdef STARPU_USE_MPI_NMAD
			
 
				-	piom_cond_destroy(&(req->req_cond));
			
 
				-#elif defined(STARPU_USE_MPI_MPI)
			
 
				-	STARPU_PTHREAD_MUTEX_DESTROY(&req->req_mutex);
			
 
				-	STARPU_PTHREAD_COND_DESTROY(&req->req_cond);
			
 
				-	STARPU_PTHREAD_MUTEX_DESTROY(&req->posted_mutex);
			
 
				-	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
			
 
				+	_mpi_backend._starpu_mpi_backend_request_destroy(req);
			
 
				 	free(req->datatype_name);
			
 
				 	req->datatype_name = NULL;
			
 
				-#endif
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 	starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &req->queue);
			
 
				 	starpu_pthread_queue_destroy(&req->queue);
			
--- a/mpi/src/starpu_mpi_task_insert_fortran.c
+++ b/mpi/src/starpu_mpi_task_insert_fortran.c
@@ -488,26 +488,26 @@ int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, vo
 
				 	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio);
			
 
				 }
			
 
				 
			
 
				-int fstarpu_mpi_task_insert(MPI_Fint comm, void ***_arglist)
			
 
				+void fstarpu_mpi_task_insert(void **arglist)
			
 
				 {
			
 
				-	void **arglist = *_arglist;
			
 
				-	struct starpu_codelet *codelet = arglist[0];
			
 
				+	MPI_Fint comm = *((MPI_Fint *)arglist[0]);
			
 
				+	struct starpu_codelet *codelet = arglist[1];
			
 
				 	if (codelet == NULL)
			
 
				 	{
			
 
				 		STARPU_ABORT_MSG("task without codelet");
			
 
				 	}
			
 
				-	int ret;
			
 
				 
			
 
				-	ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+1);
			
 
				-	return ret;
			
 
				+	int ret;
			
 
				+	ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+2);
			
 
				+	STARPU_ASSERT(ret >= 0);
			
 
				 }
			
 
				 
			
 
				 /* fstarpu_mpi_insert_task: aliased to fstarpu_mpi_task_insert in fstarpu_mpi_mod.f90 */
			
 
				 
			
 
				-struct starpu_task *fstarpu_mpi_task_build(MPI_Fint comm, void ***_arglist)
			
 
				+struct starpu_task *fstarpu_mpi_task_build(void **arglist)
			
 
				 {
			
 
				-	void **arglist = *_arglist;
			
 
				-	struct starpu_codelet *codelet = arglist[0];
			
 
				+	MPI_Fint comm = *((MPI_Fint *)arglist[0]);
			
 
				+	struct starpu_codelet *codelet = arglist[1];
			
 
				 	if (codelet == NULL)
			
 
				 	{
			
 
				 		STARPU_ABORT_MSG("task without codelet");
			
@@ -515,38 +515,34 @@ struct starpu_task *fstarpu_mpi_task_build(MPI_Fint comm, void ***_arglist)
 
				 	struct starpu_task *task;
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, NULL, arglist+1);
			
 
				+	ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, NULL, arglist+2);
			
 
				 	STARPU_ASSERT(ret >= 0);
			
 
				 	return (ret > 0) ? NULL : task;
			
 
				 }
			
 
				 
			
 
				-int fstarpu_mpi_task_post_build(MPI_Fint _comm, void ***_arglist)
			
 
				+void fstarpu_mpi_task_post_build(void **arglist)
			
 
				 {
			
 
				-	void **arglist = *_arglist;
			
 
				-	struct starpu_codelet *codelet = arglist[0];
			
 
				+	MPI_Fint comm = *((MPI_Fint *)arglist[0]);
			
 
				+	struct starpu_codelet *codelet = arglist[1];
			
 
				 	if (codelet == NULL)
			
 
				 	{
			
 
				 		STARPU_ABORT_MSG("task without codelet");
			
 
				 	}
			
 
				-	MPI_Comm comm = MPI_Comm_f2c(_comm);
			
 
				 	int xrank, do_execute;
			
 
				 	int ret, me, nb_nodes;
			
 
				 	struct starpu_data_descr *descrs;
			
 
				 	int nb_data;
			
 
				 	int prio;
			
 
				 
			
 
				-	starpu_mpi_comm_rank(comm, &me);
			
 
				-	starpu_mpi_comm_size(comm, &nb_nodes);
			
 
				+	starpu_mpi_comm_rank(MPI_Comm_f2c(comm), &me);
			
 
				+	starpu_mpi_comm_size(MPI_Comm_f2c(comm), &nb_nodes);
			
 
				 
			
 
				 	/* Find out whether we are to execute the data because we own the data to be written to. */
			
 
				-	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist);
			
 
				-	if (ret < 0)
			
 
				-		return ret;
			
 
				+	ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist+2);
			
 
				+	STARPU_ASSERT(ret >= 0);
			
 
				 
			
 
				-	return _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio);
			
 
				+	ret = _starpu_mpi_task_postbuild_v(MPI_Comm_f2c(comm), xrank, do_execute, descrs, nb_data, prio);
			
 
				+	STARPU_ASSERT(ret >= 0);
			
 
				 }
			
 
				 
			
 
				 #endif /* HAVE_MPI_COMM_F2C */
			
 
				-
			
 
				-
			
 
				-
			
--- a/mpi/tests/attr.c
+++ b/mpi/tests/attr.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2017                                     Inria
			
 
				- * Copyright (C) 2017,2018                                CNRS
			
 
				+ * Copyright (C) 2017,2018,2019                           CNRS
			
 
				  * Copyright (C) 2017,2018                                Université de Bordeaux
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -33,8 +33,8 @@ int main(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
 
				 	STARPU_ASSERT_MSG(flag == 1, "starpu_mpi_comm_get_attr was called with valid argument\n");
			
 
				 
			
 
				 	rvalue = *value;
			
 
				-	FPRINTF(stderr, "Value: %"PRIi64"d\n", *value);
			
 
				-	FPRINTF(stderr, "Value: %"PRIi64"d\n", rvalue);
			
 
				+	FPRINTF(stderr, "Value: %"PRIi64"\n", *value);
			
 
				+	FPRINTF(stderr, "Value: %"PRIi64"\n", rvalue);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/mpi/tests/block_interface.c
+++ b/mpi/tests/block_interface.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2011,2014,2015,2017,2018            Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				- * Copyright (C) 2010-2012,2014,2015,2017                 CNRS
			
 
				+ * Copyright (C) 2010-2012,2014,2015,2017,2019            CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,7 +20,11 @@
 
				 #include <stdlib.h>
			
 
				 #include "helper.h"
			
 
				 
			
 
				-#define NITER	2048
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#  define NITER	16
			
 
				+#else
			
 
				+#  define NITER	2048
			
 
				+#endif
			
 
				 
			
 
				 #define BIGSIZE	128
			
 
				 #define SIZE	64
			
@@ -44,7 +48,8 @@ int main(int argc, char **argv)
 
				 			FPRINTF(stderr, "We need at least 2 processes.\n");
			
 
				 
			
 
				 		starpu_mpi_shutdown();
			
 
				-		MPI_Finalize();
			
 
				+		if (!mpi_init)
			
 
				+			MPI_Finalize();
			
 
				 		return STARPU_TEST_SKIPPED;
			
 
				 	}
			
 
				 
			
--- a/mpi/tests/block_interface_pinned.c
+++ b/mpi/tests/block_interface_pinned.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2009-2011,2014,2015,2017,2018            Université de Bordeaux
			
 
				  * Copyright (C) 2013                                     Inria
			
 
				- * Copyright (C) 2010-2012,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2012,2015,2017,2019                 CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -20,9 +20,13 @@
 
				 #include <stdlib.h>
			
 
				 #include "helper.h"
			
 
				 
			
 
				-#define NITER	2048
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#  define NITER	16
			
 
				+#else
			
 
				+#  define NITER	2048
			
 
				+#endif
			
 
				 
			
 
				-#define BIGSIZE	64
			
 
				+#define BIGSIZE	128
			
 
				 #define SIZE	64
			
 
				 
			
 
				 int main(int argc, char **argv)
			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -123,7 +123,7 @@ void starpu_fxt_autostart_profiling(int autostart)
 
				 	if (autostart)
			
 
				 		initial_key_mask = FUT_KEYMASKALL;
			
 
				 	else
			
 
				-		initial_key_mask = FUT_KEYMASK0;
			
 
				+		initial_key_mask = 0;
			
 
				 }
			
 
				 
			
 
				 void starpu_fxt_start_profiling()
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -466,6 +466,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	unsigned destroy = task->destroy;
			
 
				 	unsigned detach = task->detach;
			
 
				 	unsigned regenerate = task->regenerate;
			
 
				+	unsigned synchronous = task->synchronous;
			
 
				 
			
 
				 	/* we do not desallocate the job structure if some is going to
			
 
				 	 * wait after the task */
			
@@ -501,9 +502,9 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	/* A continuation is not much different from a regenerated task. */
			
 
				 	if (regenerate || continuation)
			
 
				 	{
			
 
				-		STARPU_ASSERT_MSG((detach && !destroy && !task->synchronous)
			
 
				+		STARPU_ASSERT_MSG((detach && !destroy && !synchronous)
			
 
				 				|| continuation
			
 
				-				, "Regenerated task must be detached (was %u), and not have detroy=1 (was %u) or synchronous=1 (was %u)", detach, destroy, task->synchronous);
			
 
				+				, "Regenerated task must be detached (was %u), and not have detroy=1 (was %u) or synchronous=1 (was %u)", detach, destroy, synchronous);
			
 
				 		STARPU_AYU_ADDTASK(j->job_id, j->exclude_from_dag?NULL:task);
			
 
				 
			
 
				 		{
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -540,7 +540,7 @@ static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmode
 
				 
			
 
				 	if (entry)
			
 
				 	{
			
 
				-		STARPU_ASSERT_MSG(flops >=0, "Negative flops %lf in performance model file %s", flops, path);
			
 
				+		STARPU_ASSERT_MSG(isnan(flops) || flops >=0, "Negative flops %lf in performance model file %s", flops, path);
			
 
				 		STARPU_ASSERT_MSG(mean >=0, "Negative mean %lf in performance model file %s", mean, path);
			
 
				 		STARPU_ASSERT_MSG(deviation >=0, "Negative deviation %lf in performance model file %s", deviation, path);
			
 
				 		STARPU_ASSERT_MSG(sum >=0, "Negative sum %lf in performance model file %s", sum, path);
			
@@ -993,7 +993,7 @@ static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int
 
				 
			
 
				 	per_arch_model = &model->state->per_arch[comb][impl];
			
 
				 	/* count the number of elements in the lists */
			
 
				-	struct starpu_perfmodel_history_list *ptr = NULL;
			
 
				+	struct starpu_perfmodel_history_list *ptr;
			
 
				 
			
 
				 	dump_reg_model_xml(f, model, comb, impl);
			
 
				 
			
@@ -1938,13 +1938,16 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 					entry->deviation = sqrt((fabs(entry->sum2 - (entry->sum*entry->sum)/n))/n);
			
 
				 				}
			
 
				 
			
 
				-				if (j->task->flops != 0.)
			
 
				+				if (j->task->flops != 0. && !isnan(entry->flops))
			
 
				 				{
			
 
				 					if (entry->flops == 0.)
			
 
				 						entry->flops = j->task->flops;
			
 
				-					else if (((entry->flops - j->task->flops) / entry->flops) > 0.00001)
			
 
				+					else if ((fabs(entry->flops - j->task->flops) / entry->flops) > 0.00001)
			
 
				+					{
			
 
				 						/* Incoherent flops! forget about trying to record flops */
			
 
				+						_STARPU_DISP("Incoherent flops in model %s: %f vs previous %f, stopping recording flops\n", model->symbol, j->task->flops, entry->flops);
			
 
				 						entry->flops = NAN;
			
 
				+					}
			
 
				 				}
			
 
				 			}
			
 
				 
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -77,6 +77,7 @@ static struct starpu_sched_policy *predefined_policies[] =
 
				 	&_starpu_sched_ws_policy,
			
 
				 	&_starpu_sched_dm_policy,
			
 
				 	&_starpu_sched_dmda_policy,
			
 
				+	&_starpu_sched_dmda_prio_policy,
			
 
				 	&_starpu_sched_dmda_ready_policy,
			
 
				 	&_starpu_sched_dmda_sorted_policy,
			
 
				 	&_starpu_sched_dmda_sorted_decision_policy,
			
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -83,6 +83,7 @@ extern struct starpu_sched_policy _starpu_sched_prio_policy;
 
				 extern struct starpu_sched_policy _starpu_sched_random_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_dm_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_dmda_policy;
			
 
				+extern struct starpu_sched_policy _starpu_sched_dmda_prio_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy;
			
 
				 extern struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy;
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1447,6 +1447,7 @@ static unsigned _starpu_topology_count_ngpus(hwloc_obj_t obj)
 
				 static int _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				 	int i;
			
 
				+
			
 
				 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				 	{
			
 
				 		config->workers[i].workerid = i;
			
@@ -1749,34 +1750,28 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config, in
 
				 			int nth_per_core = starpu_get_env_number_default("STARPU_NTHREADS_PER_CORE", 1);
			
 
				 			avail_cpus *= nth_per_core;
			
 
				 
			
 
				-			if (config->conf.reserve_ncpus > 0)
			
 
				-			{
			
 
				-				if (avail_cpus < config->conf.reserve_ncpus)
			
 
				-				{
			
 
				-					_STARPU_DISP("Warning: %d CPU cores were requested to be reserved, but only %ld were available,\n", config->conf.reserve_ncpus, avail_cpus);
			
 
				-					avail_cpus = 0;
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					avail_cpus -= config->conf.reserve_ncpus;
			
 
				-				}
			
 
				-			}
			
 
				-
			
 
				 			ncpu = avail_cpus;
			
 
				-			if (ncpu > STARPU_MAXCPUS)
			
 
				-			{
			
 
				-				_STARPU_DISP("Warning: %d CPU cores detected. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
			
 
				-				ncpu = STARPU_MAXCPUS;
			
 
				-			}
			
 
				 		}
			
 
				-		else
			
 
				+
			
 
				+		if (ncpu > STARPU_MAXCPUS)
			
 
				+		{
			
 
				+			_STARPU_DISP("Warning: %d CPU cores requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
			
 
				+			ncpu = STARPU_MAXCPUS;
			
 
				+		}
			
 
				+
			
 
				+		if (config->conf.reserve_ncpus > 0)
			
 
				 		{
			
 
				-			if (ncpu > STARPU_MAXCPUS)
			
 
				+			if (ncpu < config->conf.reserve_ncpus)
			
 
				 			{
			
 
				-				_STARPU_DISP("Warning: %d CPU cores requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n", ncpu, STARPU_MAXCPUS);
			
 
				-				ncpu = STARPU_MAXCPUS;
			
 
				+				_STARPU_DISP("Warning: %d CPU cores were requested to be reserved, but only %d were available,\n", config->conf.reserve_ncpus, ncpu);
			
 
				+				ncpu = 0;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				ncpu -= config->conf.reserve_ncpus;
			
 
				 			}
			
 
				 		}
			
 
				+
			
 
				 	}
			
 
				 
			
 
				 	topology->ncpus = ncpu;
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -1320,6 +1320,24 @@ struct starpu_sched_policy _starpu_sched_dmda_policy =
 
				 	.worker_type = STARPU_WORKER_LIST,
			
 
				 };
			
 
				 
			
 
				+struct starpu_sched_policy _starpu_sched_dmda_prio_policy =
			
 
				+{
			
 
				+	.init_sched = initialize_dmda_sorted_policy,
			
 
				+	.deinit_sched = deinitialize_dmda_policy,
			
 
				+	.add_workers = dmda_add_workers ,
			
 
				+	.remove_workers = dmda_remove_workers,
			
 
				+	.push_task = dmda_push_sorted_task,
			
 
				+	.simulate_push_task = dmda_simulate_push_sorted_task,
			
 
				+	.push_task_notify = dmda_push_task_notify,
			
 
				+	.pop_task = dmda_pop_task,
			
 
				+	.pre_exec_hook = dmda_pre_exec_hook,
			
 
				+	.post_exec_hook = dmda_post_exec_hook,
			
 
				+	.pop_every_task = dmda_pop_every_task,
			
 
				+	.policy_name = "dmdap",
			
 
				+	.policy_description = "data-aware performance model (priority)",
			
 
				+	.worker_type = STARPU_WORKER_LIST,
			
 
				+};
			
 
				+
			
 
				 struct starpu_sched_policy _starpu_sched_dmda_sorted_policy =
			
 
				 {
			
 
				 	.init_sched = initialize_dmda_sorted_policy,
			
--- a/tests/datawizard/interfaces/test_interfaces.c
+++ b/tests/datawizard/interfaces/test_interfaces.c
@@ -168,7 +168,6 @@ static int create_task(struct starpu_task **taskp, enum starpu_worker_archtype t
 
				 		n_mics = starpu_worker_get_ids_by_type(STARPU_MIC_WORKER, mic_workers, STARPU_MAXMICDEVS);
			
 
				 	}
			
 
				 
			
 
				-	int workerid=0;
			
 
				 	int *workers;
			
 
				 	static struct starpu_codelet cl;
			
 
				 	starpu_codelet_init(&cl);
			
--- a/tests/datawizard/variable_size.c
+++ b/tests/datawizard/variable_size.c
@@ -296,7 +296,7 @@ int main(void)
 
				 	setenv("STARPU_LIMIT_CPU_MEM", LIMIT, 1);
			
 
				 	setenv("STARPU_DISK_SWAP", s, 0);
			
 
				 	setenv("STARPU_DISK_SWAP_SIZE", "100000", 1);
			
 
				-#ifdef STARPU_LINUX_SYS
			
 
				+#if 0 //def STARPU_LINUX_SYS
			
 
				 	setenv("STARPU_DISK_SWAP_BACKEND", "unistd_o_direct", 0);
			
 
				 #else
			
 
				 	setenv("STARPU_DISK_SWAP_BACKEND", "unistd", 0);
			
--- a/tests/fault-tolerance/retry.c
+++ b/tests/fault-tolerance/retry.c
@@ -47,7 +47,7 @@ void cpu_increment(void *descr[], void *arg)
 
				 static struct starpu_codelet my_codelet =
			
 
				 {
			
 
				 	.cpu_funcs = {cpu_increment},
			
 
				-	.cpu_funcs_name = {"cpu_increment"},
			
 
				+	//.cpu_funcs_name = {"cpu_increment"},
			
 
				 	.modes = { STARPU_R, STARPU_W },
			
 
				 	.nbuffers = 2
			
 
				 };
			
--- a/tests/loader.c
+++ b/tests/loader.c
@@ -223,10 +223,41 @@ int main(int argc, char *argv[])
 
				 	struct timeval start;
			
 
				 	struct timeval end;
			
 
				 	double timing;
			
 
				+	int x=1;
			
 
				 
			
 
				 	test_args = NULL;
			
 
				 	timeout = 0;
			
 
				-	test_name = argv[1];
			
 
				+
			
 
				+	if (argv[x] && strcmp(argv[x], "-t") == 0)
			
 
				+	{
			
 
				+		timeout = strtol(argv[x+1], NULL, 10);
			
 
				+		x += 2;
			
 
				+	}
			
 
				+	else if (getenv("STARPU_TIMEOUT_ENV"))
			
 
				+	{
			
 
				+		/* get user-defined iter_max value */
			
 
				+		timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10);
			
 
				+	}
			
 
				+	if (timeout <= 0)
			
 
				+		timeout = DEFAULT_TIMEOUT;
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				+	/* compare values between the 2 values of timeout */
			
 
				+	if (getenv("MPIEXEC_TIMEOUT"))
			
 
				+	{
			
 
				+		int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10);
			
 
				+		if (mpiexec_timeout != timeout)
			
 
				+			fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout);
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	if (argv[x] && strcmp(argv[x], "-p") == 0)
			
 
				+	{
			
 
				+		test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1);
			
 
				+		sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]);
			
 
				+	}
			
 
				+	else
			
 
				+		test_name = argv[x];
			
 
				 
			
 
				 	if (!test_name)
			
 
				 	{
			
@@ -234,6 +265,10 @@ int main(int argc, char *argv[])
 
				 		exit(EXIT_FAILURE);
			
 
				 	}
			
 
				 
			
 
				+	if (strstr(test_name, "tasks_size_overhead_scheds.sh") || strstr(test_name, "schedulers.sh"))
			
 
				+		/* This extensively tests various schedulers, let it run longer */
			
 
				+		timeout *= 10;
			
 
				+
			
 
				 	if (strstr(test_name, "spmv/dw_block_spmv"))
			
 
				 	{
			
 
				 		test_args = (char *) calloc(150, sizeof(char));
			
@@ -254,26 +289,6 @@ int main(int argc, char *argv[])
 
				 	if (launcher_args)
			
 
				 		launcher_args=strdup(launcher_args);
			
 
				 
			
 
				-	/* get user-defined iter_max value */
			
 
				-	if (getenv("STARPU_TIMEOUT_ENV"))
			
 
				-		timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10);
			
 
				-	if (timeout <= 0)
			
 
				-		timeout = DEFAULT_TIMEOUT;
			
 
				-
			
 
				-#ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				-	/* compare values between the 2 values of timeout */
			
 
				-	if (getenv("MPIEXEC_TIMEOUT"))
			
 
				-	{
			
 
				-		int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10);
			
 
				-		if (mpiexec_timeout != timeout)
			
 
				-			fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout);
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-	if (strstr(test_name, "tasks_size_overhead_scheds.sh") || strstr(test_name, "schedulers.sh"))
			
 
				-		/* This extensively tests various schedulers, let it run longer */
			
 
				-		timeout *= 10;
			
 
				-
			
 
				 	setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1);
			
 
				 
			
 
				 	/* set SIGALARM handler */
			
--- a/tests/main/bind.c
+++ b/tests/main/bind.c
@@ -52,7 +52,8 @@ int main(void)
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				 
			
 
				 	/* Make sure StarPU uses two core less */
			
 
				-	STARPU_ASSERT(starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == ncpus-2);
			
 
				+	STARPU_ASSERT_MSG(starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == ncpus-2, "Expected %d CPUs, got %d\n", ncpus-2, starpu_worker_get_count_by_type(STARPU_CPU_WORKER));
			
 
				+	FPRINTF(stderr, "CPUS: %d as expected\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER));
			
 
				 
			
 
				 	/* Check we can grab a whole core */
			
 
				 	active_bindid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0);
			
--- a/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh
+++ b/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2017                                Université de Bordeaux
			
 
				+# Copyright (C) 2016,2017,2019                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,6 +17,6 @@
 
				 #
			
 
				 source $(dirname $0)/microbench.sh
			
 
				 
			
 
				-XSUCCESS="dmda dmdar dmdas dmdasd"
			
 
				+XSUCCESS="dmda dmdap dmdar dmdas dmdasd"
			
 
				 
			
 
				 test_scheds parallel_dependent_homogeneous_tasks_data
			
--- a/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh
+++ b/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2017                                Université de Bordeaux
			
 
				+# Copyright (C) 2016,2017,2019                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,6 +17,6 @@
 
				 #
			
 
				 source $(dirname $0)/microbench.sh
			
 
				 
			
 
				-XSUCCESS="dmda dmdar dmdas dmdasd pheft"
			
 
				+XSUCCESS="dmda dmdap dmdar dmdas dmdasd pheft"
			
 
				 
			
 
				 test_scheds parallel_independent_heterogeneous_tasks_data
			
--- a/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh
+++ b/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2017                                Université de Bordeaux
			
 
				+# Copyright (C) 2016,2017,2019                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,6 +17,6 @@
 
				 #
			
 
				 source $(dirname $0)/microbench.sh
			
 
				 
			
 
				-XSUCCESS="dmda dmdar dmdas dmdasd pheft"
			
 
				+XSUCCESS="dmda dmdap dmdar dmdas dmdasd pheft"
			
 
				 
			
 
				 test_scheds parallel_independent_homogeneous_tasks_data
			
--- a/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh
+++ b/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2017                                Université de Bordeaux
			
 
				+# Copyright (C) 2016,2017,2019                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,6 +17,6 @@
 
				 #
			
 
				 source $(dirname $0)/microbench.sh
			
 
				 
			
 
				-XSUCCESS="dmda dmdar dmdas dmdasd pheft"
			
 
				+XSUCCESS="dmda dmdap dmdar dmdas dmdasd pheft"
			
 
				 
			
 
				 test_scheds parallel_independent_heterogeneous_tasks_data
			
--- a/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh
+++ b/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh
@@ -2,7 +2,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2016,2017                                CNRS
			
 
				-# Copyright (C) 2016,2017                                Université de Bordeaux
			
 
				+# Copyright (C) 2016,2017,2019                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,6 +17,6 @@
 
				 #
			
 
				 source $(dirname $0)/microbench.sh
			
 
				 
			
 
				-XSUCCESS="dmda dmdar dmdas dmdasd pheft"
			
 
				+XSUCCESS="dmda dmdap dmdar dmdas dmdasd pheft"
			
 
				 
			
 
				 test_scheds parallel_independent_homogeneous_tasks_data
			
--- a/tests/parallel_tasks/parallel_kernels_trivial.c
+++ b/tests/parallel_tasks/parallel_kernels_trivial.c
@@ -2,7 +2,7 @@
 
				  *
			
 
				  * Copyright (C) 2010-2016,2018                           Université de Bordeaux
			
 
				  * Copyright (C) 2012,2013                                Inria
			
 
				- * Copyright (C) 2010-2013,2015,2017                      CNRS
			
 
				+ * Copyright (C) 2010-2013,2015,2017,2019                 CNRS
			
 
				  * Copyright (C) 2013                                     Thibaut Lambert
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -68,7 +68,7 @@ static struct starpu_codelet cl_seq =
 
				 {
			
 
				 	.cpu_funcs = {codelet_null},
			
 
				 	.cuda_funcs = {codelet_null},
			
 
				-	.cpu_funcs_name = {"codelet_null_seq"},
			
 
				+	.cpu_funcs_name = {"codelet_null"},
			
 
				         .opencl_funcs = {codelet_null},
			
 
				 	.model = &model,
			
 
				 	.nbuffers = 1,
			
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -328,43 +328,43 @@ endif
 
				 
			
 
				 if STARPU_HAVE_HELP2MAN
			
 
				 starpu_calibrate_bus.1: starpu_calibrate_bus$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Force StarPU bus calibration" --output=$@ ./$<
			
 
				 starpu_machine_display.1: starpu_machine_display$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Display machine StarPU information" --output=$@ ./$<
			
 
				 starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Display StarPU performance model" --output=$@ ./$<
			
 
				 starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Plot StarPU performance model" --output=$@ ./$<
			
 
				 starpu_tasks_rec_complete.1: starpu_tasks_rec_complete$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Complete StarPU tasks.rec file" --output=$@ ./$<
			
 
				 starpu_lp2paje.1: starpu_lp2paje$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Convert lp StarPU schedule into Paje format" --output=$@ ./$<
			
 
				 starpu_workers_activity.1: starpu_workers_activity
			
 
				 	chmod +x $<
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Display StarPU workers activity" --output=$@ ./$<
			
 
				 starpu_codelet_profile.1: starpu_codelet_profile
			
 
				 	chmod +x $<
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Draw StarPU codelet profile" --output=$@ ./$<
			
 
				 starpu_codelet_histo_profile.1: starpu_codelet_histo_profile
			
 
				 	chmod +x $<
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Draw StarPU codelet histogram" --output=$@ ./$<
			
 
				 starpu_mpi_comm_matrix.1: starpu_mpi_comm_matrix.py
			
 
				 	chmod +x $<
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Draw StarPU MPI communications matrix" --output=$@ ./$<
			
 
				 starpu_paje_draw_histogram.1: starpu_paje_draw_histogram
			
 
				 	chmod +x $<
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Draw StarPU trace histogram" --output=$@ ./$<
			
 
				 starpu_paje_state_stats.1: starpu_paje_state_stats
			
 
				 	chmod +x $<
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Print statistics from StarPU trace" --output=$@ ./$<
			
 
				 
			
 
				 if STARPU_USE_FXT
			
 
				 starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Convert raw StarPU FxT trace to various traces" --output=$@ ./$<
			
 
				 starpu_fxt_stats.1: starpu_fxt_stats$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Print statistics from raw StarPU FxT trace" --output=$@ ./$<
			
 
				 starpu_fxt_data_trace.1: starpu_fxt_data_trace$(EXEEXT)
			
 
				-	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+	help2man --no-discard-stderr -N -n "Print data trace from raw StarPU FxT trace" --output=$@ ./$<
			
 
				 endif
			
 
				 
			
 
				 dist_man1_MANS =\
			
--- a/tools/dev/valgrind/libc.suppr
+++ b/tools/dev/valgrind/libc.suppr
@@ -1,6 +1,6 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2014,2016-2018                           CNRS
			
 
				+# Copyright (C) 2014,2016-2019                           CNRS
			
 
				 # Copyright (C) 2014-2016,2019                           Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -156,3 +156,106 @@
 
				    fun:_dl_init
			
 
				    obj:/lib/x86_64-linux-gnu/ld-2.26.so
			
 
				 }
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   ...
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   ...
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   ...
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+   obj:*
			
 
				+   obj:*
			
 
				+   obj:*
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+   obj:*
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+   obj:*
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   openat(filename)
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   obj:*
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   openat(filename)
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+   ...
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/lib/x86_64-linux-gnu/libdl-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   read(count)
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   read(buf)
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   lseek(offset)
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1
			
 
				+   obj:*
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1
			
 
				+   obj:*
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1
			
 
				+}
			
--- a/tools/dev/valgrind/libnuma.suppr
+++ b/tools/dev/valgrind/libnuma.suppr
@@ -1,7 +1,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2012                                     Inria
			
 
				-# Copyright (C) 2012,2017                                CNRS
			
 
				+# Copyright (C) 2012,2017,2019                           CNRS
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
 
				 # it under the terms of the GNU Lesser General Public License as published by
			
@@ -30,3 +30,12 @@
 
				    fun:numa_node_size64
			
 
				    ...
			
 
				 }
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libnuma.so.1.0.0
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libnuma.so.1.0.0
			
 
				+   obj:*
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libquadmath.so.0.0.0
			
 
				+   obj:*
			
 
				+}
			
--- a/tools/dev/valgrind/nvidia.suppr
+++ b/tools/dev/valgrind/nvidia.suppr
@@ -0,0 +1,84 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2019                                     CNRS
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   ...
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   ...
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   readlink(path)
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Param
			
 
				+   lstat(file_name)
			
 
				+   obj:/lib/x86_64-linux-gnu/libc-2.28.so
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Cond
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0
			
 
				+   obj:/lib/x86_64-linux-gnu/ld-2.28.so
			
 
				+}
			
--- a/tools/dev/valgrind/pthread.suppr
+++ b/tools/dev/valgrind/pthread.suppr
@@ -1,7 +1,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2012                                     Inria
			
 
				-# Copyright (C) 2012,2017                                CNRS
			
 
				+# Copyright (C) 2012,2017,2019                           CNRS
			
 
				 # Copyright (C) 2013                                     Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -39,3 +39,8 @@
 
				    fun:pthread_mutex_destroy
			
 
				    ...
			
 
				 }
			
 
				+{
			
 
				+   <insert_a_suppression_name_here>
			
 
				+   Memcheck:Value8
			
 
				+   obj:/lib/x86_64-linux-gnu/libpthread-2.28.so
			
 
				+}
			
--- a/tools/dev/valgrind/valgrind.sh
+++ b/tools/dev/valgrind/valgrind.sh
@@ -1,7 +1,7 @@
 
				 #!/bin/bash
			
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2016,2017                                CNRS
			
 
				+# Copyright (C) 2016,2017,2019                           CNRS
			
 
				 # Copyright (C) 2017                                     Université de Bordeaux
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -31,4 +31,4 @@ else
 
				 fi
			
 
				 SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done)
			
 
				 
			
 
				-$RUN --num-callers=42 --gen-suppressions=all $SUPPRESSIONS $*
			
 
				+$RUN --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $*
			
--- a/tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal
+++ b/tools/perfmodels/sampling/codelets/45/chol_model_11.hannibal
@@ -36,7 +36,7 @@ nan	nan	nan
 
				 # not multiple-regression-base
			
 
				 0
			
 
				 #	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
			
 
				-617e5fe6	3686400	0	1.701016e+05	7.229737e+03	4.082438e+06	6.956835e+11	24
			
 
				+617e5fe6	3686400	2.953730e+08   	1.701016e+05	7.229737e+03	4.082438e+06	6.956835e+11	24
			
 
				 
			
 
				 ####################
			
 
				 # COMB_2
			
@@ -68,7 +68,7 @@ nan	nan	nan
 
				 # not multiple-regression-base
			
 
				 0
			
 
				 #	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
			
 
				-617e5fe6	3686400	0	1.188776e+05	9.331204e+02	2.113643e+08	2.512803e+13	1778
			
 
				+617e5fe6	3686400	2.953730e+08   	1.188776e+05	9.331204e+02	2.113643e+08	2.512803e+13	1778
			
 
				 
			
 
				 ####################
			
 
				 # COMB_3
			
@@ -100,5 +100,5 @@ nan	nan	nan
 
				 # not multiple-regression-base
			
 
				 0
			
 
				 #	hash	size	flops	mean	(us)	dev	(us)	sum	sum2	n
			
 
				-617e5fe6	3686400	0	1.205438e+05	2.044578e+03	2.189075e+08	2.639552e+13	1816
			
 
				+617e5fe6	3686400	2.953730e+08   	1.205438e+05	2.044578e+03	2.189075e+08	2.639552e+13	1816
			
 
				 
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_atlas.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_goto.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_11_openblas.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_11.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_atlas.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_goto.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_12_openblas.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_12.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_atlas.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.idgraf
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.idgraf
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.mirage
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.mirage
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.mirage
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.sirocco
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_goto.sirocco
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.sirocco
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.attila
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.attila
@@ -0,0 +1 @@
 
				+starpu_dlu_lu_model_21.attila
			
--- a/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.idgraf
+++ b/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_21_openblas.idgraf