5 years ago · 371313943f
--- a/bin/starpu_calibrate_bus
+++ b/bin/starpu_calibrate_bus
--- a/bin/starpu_codelet_histo_profile
+++ b/bin/starpu_codelet_histo_profile
@@ -0,0 +1,100 @@
 
				+#!/bin/sh
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+PROGNAME=$0
			
 
				+
			
 
				+usage()
			
 
				+{
			
 
				+	echo "Offline tool to draw codelet profile histogram over a traced execution"
			
 
				+	echo ""
			
 
				+	echo "Usage: $PROGNAME distrib.data"
			
 
				+	echo ""
			
 
				+	echo "Options:"
			
 
				+	echo "	-h, --help          display this help and exit"
			
 
				+	echo "	-v, --version       output version information and exit"
			
 
				+	echo ""
			
 
				+	echo "Report bugs to <starpu-devel@lists.gforge.inria.fr>"
			
 
				+	exit 1
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
			
 
				+    usage
			
 
				+fi
			
 
				+
			
 
				+create_histograms()
			
 
				+{
			
 
				+
			
 
				+inputfile=$1
			
 
				+
			
 
				+R --no-save > /dev/null << EOF
			
 
				+
			
 
				+handle_hash <- function (codelet, arch, hash)
			
 
				+{
			
 
				+
			
 
				+mytable <- table
			
 
				+mytable <- mytable[mytable[,1]==codelet,]
			
 
				+mytable <- mytable[mytable[,2]==arch,]
			
 
				+mytable <- mytable[mytable[,4]==hash,]
			
 
				+
			
 
				+val <- mytable[,5]
			
 
				+
			
 
				+
			
 
				+# there is certainly a better way to do this !
			
 
				+size <- unique(mytable[,3])
			
 
				+
			
 
				+pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep="."));
			
 
				+
			
 
				+try ( { h <- hist(val[val > quantile(val,0.01) & val<quantile(val,0.99)], col="red", breaks=50, density=10) } )
			
 
				+
			
 
				+dev.off()
			
 
				+
			
 
				+}
			
 
				+
			
 
				+table <- read.table("$inputfile")
			
 
				+
			
 
				+codeletlist <- unique(table[,1])
			
 
				+
			
 
				+for (codelet in codeletlist)
			
 
				+{
			
 
				+	archlist <- unique(table[table[,1]==codelet,2])
			
 
				+
			
 
				+	for (arch in archlist)
			
 
				+	{
			
 
				+		hashlist <- unique(table[table[,2]==arch,4])
			
 
				+
			
 
				+		for (hash in hashlist)
			
 
				+		{
			
 
				+			print(codelet)
			
 
				+			print(arch)
			
 
				+			print(hash)
			
 
				+			handle_hash(codelet, arch, hash)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+EOF
			
 
				+
			
 
				+}
			
 
				+
			
 
				+for inputfile in $@
			
 
				+do
			
 
				+	create_histograms $inputfile 
			
 
				+done
			
--- a/bin/starpu_codelet_profile
+++ b/bin/starpu_codelet_profile
@@ -0,0 +1,81 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2008-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+PROGNAME=$0
			
 
				+
			
 
				+usage()
			
 
				+{
			
 
				+	echo "Offline tool to draw codelet profile over a traced execution"
			
 
				+	echo ""
			
 
				+	echo "Usage: $PROGNAME distrib.data codelet_name"
			
 
				+	echo ""
			
 
				+	echo "Options:"
			
 
				+	echo "	-h, --help          display this help and exit"
			
 
				+	echo "	-v, --version       output version information and exit"
			
 
				+	echo ""
			
 
				+	echo "Report bugs to <starpu-devel@lists.gforge.inria.fr>"
			
 
				+	exit 1
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$2" = "" ] ; then
			
 
				+    usage
			
 
				+fi
			
 
				+
			
 
				+inputfile=$1
			
 
				+codelet_name=$2
			
 
				+
			
 
				+archlist=`< $inputfile grep "^$codelet_name	" | cut -f 2 | sort | uniq | xargs` 
			
 
				+
			
 
				+# extract subfiles from the history file
			
 
				+for arch in $archlist
			
 
				+do
			
 
				+		echo "Arch $arch"
			
 
				+		grep "^$codelet_name	$arch" $inputfile > $inputfile.$arch
			
 
				+done
			
 
				+
			
 
				+# create the gnuplot file
			
 
				+
			
 
				+gpfile=$inputfile.gp
			
 
				+
			
 
				+echo "#!/usr/bin/gnuplot -persist" 		> $gpfile
			
 
				+echo "set term postscript eps enhanced color" 	>> $gpfile
			
 
				+echo "set logscale x"				>> $gpfile 
			
 
				+echo "set logscale y"				>> $gpfile 
			
 
				+echo "set output \"$inputfile.eps\""		>> $gpfile
			
 
				+echo "set key top left"				>> $gpfile
			
 
				+echo "set xlabel \"Total data size\""		>> $gpfile
			
 
				+echo "set ylabel \"Execution time (ms)\""	>> $gpfile
			
 
				+
			
 
				+echo -n "plot	" 				>> $gpfile
			
 
				+
			
 
				+first=1
			
 
				+
			
 
				+for arch in $archlist
			
 
				+do
			
 
				+		if [ $first = 0 ] 
			
 
				+		then
			
 
				+			echo -n "  , " >> $gpfile
			
 
				+		else
			
 
				+			first=0
			
 
				+		fi
			
 
				+
			
 
				+		echo -n " \"$inputfile.$arch\" using 3:5  title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile
			
 
				+done
			
--- a/bin/starpu_env
+++ b/bin/starpu_env
@@ -0,0 +1,52 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+PROGNAME=starpu_env
			
 
				+
			
 
				+usage()
			
 
				+{
			
 
				+    echo "Tool to set StarPU environment variables"
			
 
				+    echo ""
			
 
				+    echo "Usage: source $PROGNAME"
			
 
				+    echo ""
			
 
				+    echo ""
			
 
				+    echo "Options:"
			
 
				+    echo "	-h, --help          display this help and exit"
			
 
				+    echo "	-v, --version       output version information and exit"
			
 
				+    echo ""
			
 
				+    echo "Report bugs to <starpu-devel@lists.gforge.inria.fr>"
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "-v" ] || [ "$1" = "--version" ]
			
 
				+then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+elif [ "$1" = "-h" ] || [ "$1" = "--help" ]
			
 
				+then
			
 
				+    usage
			
 
				+else
			
 
				+    starpu_prefix=$(realpath /home/mmakni/starpu)
			
 
				+    if test -f $starpu_prefix/bin/starpu_machine_display -a -f $starpu_prefix/lib/pkgconfig/libstarpu.pc
			
 
				+    then
			
 
				+	echo "Setting StarPU environment for $starpu_prefix"
			
 
				+	export PKG_CONFIG_PATH=$starpu_prefix/lib/pkgconfig:$PKG_CONFIG_PATH
			
 
				+	export LD_LIBRARY_PATH=$starpu_prefix/lib:$LD_LIBRARY_PATH
			
 
				+	export PATH=$starpu_prefix/bin:$PATH
			
 
				+	export MANPATH=$starpu_prefix/share/man:$MANPATH
			
 
				+    else
			
 
				+	echo "[Error] $starpu_prefix is not a valid StarPU installation directory"
			
 
				+    fi
			
 
				+fi
			
 
				+
			
--- a/bin/starpu_fxt_number_events_to_names.py
+++ b/bin/starpu_fxt_number_events_to_names.py
@@ -0,0 +1,248 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+import sys
			
 
				+
			
 
				+"""
			
 
				+STARPU_FXT_EVENT_DEFINES is generated by configure and is the output of
			
 
				+the following command:
			
 
				+grep -E "#define\s+_STARPU_(MPI_)?FUT_" src/common/fxt.h mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2
			
 
				+"""
			
 
				+
			
 
				+fxt_codes_raw = """
			
 
				+#define _STARPU_FUT_WORKER_INIT_START	0x5100
			
 
				+#define _STARPU_FUT_WORKER_INIT_END	0x5101
			
 
				+#define	_STARPU_FUT_START_CODELET_BODY	0x5102
			
 
				+#define	_STARPU_FUT_END_CODELET_BODY	0x5103
			
 
				+#define _STARPU_FUT_JOB_PUSH		0x5104
			
 
				+#define _STARPU_FUT_JOB_POP		0x5105
			
 
				+#define _STARPU_FUT_UPDATE_TASK_CNT	0x5106
			
 
				+#define _STARPU_FUT_START_FETCH_INPUT_ON_TID	0x5107
			
 
				+#define _STARPU_FUT_END_FETCH_INPUT_ON_TID	0x5108
			
 
				+#define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID	0x5109
			
 
				+#define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID	0x5110
			
 
				+#define _STARPU_FUT_TAG		0x5111
			
 
				+#define _STARPU_FUT_TAG_DEPS	0x5112
			
 
				+#define _STARPU_FUT_TASK_DEPS		0x5113
			
 
				+#define _STARPU_FUT_DATA_COPY		0x5114
			
 
				+#define _STARPU_FUT_WORK_STEALING	0x5115
			
 
				+#define _STARPU_FUT_WORKER_DEINIT_START	0x5116
			
 
				+#define _STARPU_FUT_WORKER_DEINIT_END	0x5117
			
 
				+#define _STARPU_FUT_WORKER_SLEEP_START	0x5118
			
 
				+#define _STARPU_FUT_WORKER_SLEEP_END	0x5119
			
 
				+#define _STARPU_FUT_TASK_SUBMIT		0x511a
			
 
				+#define _STARPU_FUT_CODELET_DATA_HANDLE	0x511b
			
 
				+#define _STARPU_FUT_MODEL_NAME		0x511c
			
 
				+#define _STARPU_FUT_DATA_NAME		0x511d
			
 
				+#define _STARPU_FUT_DATA_COORDINATES	0x511e
			
 
				+#define _STARPU_FUT_HANDLE_DATA_UNREGISTER	0x511f
			
 
				+#define _STARPU_FUT_USER_DEFINED_START	0x5120
			
 
				+#define _STARPU_FUT_USER_DEFINED_END	0x5121
			
 
				+#define	_STARPU_FUT_NEW_MEM_NODE	0x5122
			
 
				+#define	_STARPU_FUT_START_CALLBACK	0x5123
			
 
				+#define	_STARPU_FUT_END_CALLBACK	0x5124
			
 
				+#define	_STARPU_FUT_TASK_DONE		0x5125
			
 
				+#define	_STARPU_FUT_TAG_DONE		0x5126
			
 
				+#define	_STARPU_FUT_START_ALLOC		0x5127
			
 
				+#define	_STARPU_FUT_END_ALLOC		0x5128
			
 
				+#define	_STARPU_FUT_START_ALLOC_REUSE	0x5129
			
 
				+#define	_STARPU_FUT_END_ALLOC_REUSE	0x5130
			
 
				+#define	_STARPU_FUT_USED_MEM	0x512a
			
 
				+#define _STARPU_FUT_TASK_NAME	0x512b
			
 
				+#define _STARPU_FUT_DATA_WONT_USE	0x512c
			
 
				+#define _STARPU_FUT_TASK_COLOR	0x512d
			
 
				+#define _STARPU_FUT_DATA_DOING_WONT_USE	0x512e
			
 
				+#define	_STARPU_FUT_START_MEMRECLAIM	0x5131
			
 
				+#define	_STARPU_FUT_END_MEMRECLAIM	0x5132
			
 
				+#define	_STARPU_FUT_START_DRIVER_COPY	0x5133
			
 
				+#define	_STARPU_FUT_END_DRIVER_COPY	0x5134
			
 
				+#define	_STARPU_FUT_START_DRIVER_COPY_ASYNC	0x5135
			
 
				+#define	_STARPU_FUT_END_DRIVER_COPY_ASYNC	0x5136
			
 
				+#define	_STARPU_FUT_START_PROGRESS_ON_TID	0x5137
			
 
				+#define	_STARPU_FUT_END_PROGRESS_ON_TID		0x5138
			
 
				+#define _STARPU_FUT_USER_EVENT		0x5139
			
 
				+#define _STARPU_FUT_SET_PROFILING	0x513a
			
 
				+#define _STARPU_FUT_TASK_WAIT_FOR_ALL	0x513b
			
 
				+#define _STARPU_FUT_EVENT		0x513c
			
 
				+#define _STARPU_FUT_THREAD_EVENT	0x513d
			
 
				+#define	_STARPU_FUT_CODELET_DETAILS	0x513e
			
 
				+#define	_STARPU_FUT_CODELET_DATA	0x513f
			
 
				+#define _STARPU_FUT_LOCKING_MUTEX	0x5140
			
 
				+#define _STARPU_FUT_MUTEX_LOCKED	0x5141
			
 
				+#define _STARPU_FUT_UNLOCKING_MUTEX	0x5142
			
 
				+#define _STARPU_FUT_MUTEX_UNLOCKED	0x5143
			
 
				+#define _STARPU_FUT_TRYLOCK_MUTEX	0x5144
			
 
				+#define _STARPU_FUT_RDLOCKING_RWLOCK	0x5145
			
 
				+#define _STARPU_FUT_RWLOCK_RDLOCKED	0x5146
			
 
				+#define _STARPU_FUT_WRLOCKING_RWLOCK	0x5147
			
 
				+#define _STARPU_FUT_RWLOCK_WRLOCKED	0x5148
			
 
				+#define _STARPU_FUT_UNLOCKING_RWLOCK	0x5149
			
 
				+#define _STARPU_FUT_RWLOCK_UNLOCKED	0x514a
			
 
				+#define _STARPU_FUT_LOCKING_SPINLOCK	0x514b
			
 
				+#define _STARPU_FUT_SPINLOCK_LOCKED	0x514c
			
 
				+#define _STARPU_FUT_UNLOCKING_SPINLOCK	0x514d
			
 
				+#define _STARPU_FUT_SPINLOCK_UNLOCKED	0x514e
			
 
				+#define _STARPU_FUT_TRYLOCK_SPINLOCK	0x514f
			
 
				+#define _STARPU_FUT_COND_WAIT_BEGIN	0x5150
			
 
				+#define _STARPU_FUT_COND_WAIT_END	0x5151
			
 
				+#define _STARPU_FUT_MEMORY_FULL		0x5152
			
 
				+#define _STARPU_FUT_DATA_LOAD 		0x5153
			
 
				+#define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154
			
 
				+#define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155
			
 
				+#define	_STARPU_FUT_START_FREE		0x5156
			
 
				+#define	_STARPU_FUT_END_FREE		0x5157
			
 
				+#define	_STARPU_FUT_START_WRITEBACK	0x5158
			
 
				+#define	_STARPU_FUT_END_WRITEBACK	0x5159
			
 
				+#define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 	0x515a
			
 
				+#define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 	0x515b
			
 
				+#define	_STARPU_FUT_START_WRITEBACK_ASYNC	0x515c
			
 
				+#define	_STARPU_FUT_END_WRITEBACK_ASYNC		0x515d
			
 
				+#define	_STARPU_FUT_HYPERVISOR_BEGIN    0x5160
			
 
				+#define	_STARPU_FUT_HYPERVISOR_END	0x5161
			
 
				+#define _STARPU_FUT_BARRIER_WAIT_BEGIN		0x5162
			
 
				+#define _STARPU_FUT_BARRIER_WAIT_END		0x5163
			
 
				+#define _STARPU_FUT_WORKER_SCHEDULING_START	0x5164
			
 
				+#define _STARPU_FUT_WORKER_SCHEDULING_END	0x5165
			
 
				+#define _STARPU_FUT_WORKER_SCHEDULING_PUSH	0x5166
			
 
				+#define _STARPU_FUT_WORKER_SCHEDULING_POP	0x5167
			
 
				+#define	_STARPU_FUT_START_EXECUTING	0x5168
			
 
				+#define	_STARPU_FUT_END_EXECUTING	0x5169
			
 
				+#define _STARPU_FUT_SCHED_COMPONENT_NEW		0x516a
			
 
				+#define _STARPU_FUT_SCHED_COMPONENT_CONNECT	0x516b
			
 
				+#define _STARPU_FUT_SCHED_COMPONENT_PUSH	0x516c
			
 
				+#define _STARPU_FUT_SCHED_COMPONENT_PULL	0x516d
			
 
				+#define _STARPU_FUT_TASK_SUBMIT_START	0x516e
			
 
				+#define _STARPU_FUT_TASK_SUBMIT_END	0x516f
			
 
				+#define _STARPU_FUT_TASK_BUILD_START	0x5170
			
 
				+#define _STARPU_FUT_TASK_BUILD_END	0x5171
			
 
				+#define _STARPU_FUT_TASK_MPI_DECODE_START	0x5172
			
 
				+#define _STARPU_FUT_TASK_MPI_DECODE_END		0x5173
			
 
				+#define _STARPU_FUT_TASK_MPI_PRE_START	0x5174
			
 
				+#define _STARPU_FUT_TASK_MPI_PRE_END	0x5175
			
 
				+#define _STARPU_FUT_TASK_MPI_POST_START	0x5176
			
 
				+#define _STARPU_FUT_TASK_MPI_POST_END	0x5177
			
 
				+#define _STARPU_FUT_TASK_WAIT_START	0x5178
			
 
				+#define _STARPU_FUT_TASK_WAIT_END	0x5179
			
 
				+#define _STARPU_FUT_TASK_WAIT_FOR_ALL_START	0x517a
			
 
				+#define _STARPU_FUT_TASK_WAIT_FOR_ALL_END	0x517b
			
 
				+#define _STARPU_FUT_HANDLE_DATA_REGISTER	0x517c
			
 
				+#define _STARPU_FUT_START_FETCH_INPUT	0x517e
			
 
				+#define _STARPU_FUT_END_FETCH_INPUT	0x517f
			
 
				+#define _STARPU_FUT_TASK_THROTTLE_START	0x5180
			
 
				+#define _STARPU_FUT_TASK_THROTTLE_END	0x5181
			
 
				+#define _STARPU_FUT_DATA_STATE_INVALID 0x5182
			
 
				+#define _STARPU_FUT_DATA_STATE_OWNER      0x5183
			
 
				+#define _STARPU_FUT_DATA_STATE_SHARED     0x5184
			
 
				+#define _STARPU_FUT_DATA_REQUEST_CREATED   0x5185
			
 
				+#define _STARPU_FUT_PAPI_TASK_EVENT_VALUE   0x5186
			
 
				+#define _STARPU_MPI_FUT_START				0x5201
			
 
				+#define _STARPU_MPI_FUT_STOP				0x5202
			
 
				+#define _STARPU_MPI_FUT_BARRIER				0x5203
			
 
				+#define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN		0x5204
			
 
				+#define _STARPU_MPI_FUT_ISEND_SUBMIT_END		0x5205
			
 
				+#define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN		0x5206
			
 
				+#define _STARPU_MPI_FUT_IRECV_SUBMIT_END		0x5207
			
 
				+#define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN		0x5208
			
 
				+#define _STARPU_MPI_FUT_ISEND_COMPLETE_END		0x5209
			
 
				+#define _STARPU_MPI_FUT_DATA_SET_RANK			0x521a
			
 
				+#define _STARPU_MPI_FUT_IRECV_TERMINATED		0x521b
			
 
				+#define _STARPU_MPI_FUT_ISEND_TERMINATED		0x521c
			
 
				+#define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN		0x521d
			
 
				+#define _STARPU_MPI_FUT_TESTING_DETACHED_END		0x521e
			
 
				+#define _STARPU_MPI_FUT_TEST_BEGIN			0x521f
			
 
				+#define _STARPU_MPI_FUT_TEST_END			0x5220
			
 
				+#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN		0x520a
			
 
				+#define _STARPU_MPI_FUT_IRECV_COMPLETE_END		0x520b
			
 
				+#define _STARPU_MPI_FUT_SLEEP_BEGIN			0x520c
			
 
				+#define _STARPU_MPI_FUT_SLEEP_END			0x520d
			
 
				+#define _STARPU_MPI_FUT_DTESTING_BEGIN			0x520e
			
 
				+#define _STARPU_MPI_FUT_DTESTING_END			0x520f
			
 
				+#define _STARPU_MPI_FUT_UTESTING_BEGIN			0x5210
			
 
				+#define _STARPU_MPI_FUT_UTESTING_END			0x5211
			
 
				+#define _STARPU_MPI_FUT_UWAIT_BEGIN			0x5212
			
 
				+#define _STARPU_MPI_FUT_UWAIT_END			0x5213
			
 
				+#define _STARPU_MPI_FUT_POLLING_BEGIN			0x5214
			
 
				+#define _STARPU_MPI_FUT_POLLING_END			0x5215
			
 
				+#define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN		0x5216
			
 
				+#define _STARPU_MPI_FUT_DRIVER_RUN_END			0x5217
			
 
				+#define _STARPU_MPI_FUT_DATA_SET_TAG			0x5218
			
 
				+"""
			
 
				+
			
 
				+PROGNAME=sys.argv[0]
			
 
				+
			
 
				+number_events_path = None
			
 
				+
			
 
				+def usage():
			
 
				+    print("Convert event keys in number_events.data to event names")
			
 
				+    print("")
			
 
				+    print("Usage: %s <number_events.data path>" % PROGNAME)
			
 
				+    print("")
			
 
				+    print("Options:")
			
 
				+    print("	-h, --help          display this help and exit")
			
 
				+    print("	-v, --version       output version information and exit")
			
 
				+    print("")
			
 
				+    print("Report bugs to <starpu-devel@lists.gforge.inria.fr>")
			
 
				+    sys.exit(1)
			
 
				+
			
 
				+
			
 
				+if len(sys.argv) == 2:
			
 
				+    if sys.argv[1] == '-v' or sys.argv[1] == '--version':
			
 
				+        print("%s (StarPU) 1.3.99" % PROGNAME)
			
 
				+        sys.exit(0)
			
 
				+    elif sys.argv[1] == '-h' or sys.argv[1] == '--help':
			
 
				+        usage()
			
 
				+    else:
			
 
				+        number_events_path = sys.argv[1]
			
 
				+else:
			
 
				+    usage()
			
 
				+
			
 
				+def man():
			
 
				+    print("Sepecify file containing event stats")
			
 
				+    sys.exit(1)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Process fxt_code_raw content to ease the conversion:
			
 
				+fxt_codes = dict()
			
 
				+for line in fxt_codes_raw.split("\n"):
			
 
				+    elements = line.split()
			
 
				+
			
 
				+    if len(elements) == 3:
			
 
				+        key = int(elements[2][2:], 16)
			
 
				+        assert(key not in fxt_codes)
			
 
				+
			
 
				+        fxt_codes[key] = elements[1]
			
 
				+
			
 
				+
			
 
				+# Convert content of the file:
			
 
				+nb_events = 0
			
 
				+
			
 
				+with open(number_events_path, 'r') as f:
			
 
				+    for line in f:
			
 
				+        elements = line.split()
			
 
				+        if len(elements) == 2:
			
 
				+            key = int(elements[0][2:], 16)
			
 
				+            nb = int(elements[1])
			
 
				+            nb_events += nb
			
 
				+            if key in fxt_codes:
			
 
				+                print("%12d    %s" % (nb, fxt_codes[key]))
			
 
				+            else:
			
 
				+                print("%12d    %s" % (nb, elements[0]))
			
 
				+
			
 
				+print("       TOTAL:   %d" % nb_events)
			
--- a/bin/starpu_lp2paje
+++ b/bin/starpu_lp2paje
--- a/bin/starpu_machine_display
+++ b/bin/starpu_machine_display
--- a/bin/starpu_mlr_analysis
+++ b/bin/starpu_mlr_analysis
@@ -0,0 +1,92 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+# Script for giving statistical analysis of the paje trace
			
 
				+
			
 
				+set -e # fail fast
			
 
				+
			
 
				+# File names
			
 
				+SOURCE_DIR=/home/mmakni/starpu/build_starpu/../tools
			
 
				+
			
 
				+outputfile="mlr_analysis.html"
			
 
				+analysis_script="$SOURCE_DIR/starpu_mlr_analysis.Rmd"
			
 
				+
			
 
				+# Command line arguments
			
 
				+inputfile=""
			
 
				+
			
 
				+help_script()
			
 
				+{
			
 
				+cat << EOF
			
 
				+Give an example of the trace analysis for computing multiple linear regression model
			
 
				+
			
 
				+Options:
			
 
				+   -h      Show this message
			
 
				+
			
 
				+Examples:
			
 
				+$0 .starpu/sampling/codelets/tmp/test_mlr.out
			
 
				+$0 
			
 
				+
			
 
				+Report bugs to <starpu-devel@lists.gforge.inria.fr>
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then
			
 
				+    help_script
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+while getopts "h" opt; do
			
 
				+    case $opt in
			
 
				+    \?)
			
 
				+      echo "Invalid option: -$OPTARG"
			
 
				+      help_script
			
 
				+      exit 3
			
 
				+      ;;
			
 
				+  esac
			
 
				+done
			
 
				+
			
 
				+# Reading files that need to be analyzed
			
 
				+shift $((OPTIND - 1))
			
 
				+inputfile=$1
			
 
				+if [[ $# < 1 ]]; then
			
 
				+    inputfile="$SOURCE_DIR/perfmodels/sampling/codelets/tmp/mlr_init.out"
			
 
				+else
			
 
				+# Error if there is more than one input file
			
 
				+    if [[ $# > 1 ]]; then
			
 
				+	echo "Error!"
			
 
				+	help_script
			
 
				+	exit 2
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+if [ ! -s $inputfile ]
			
 
				+    then
			
 
				+	echo "Error: file $inputfile does not exist!"
			
 
				+	exit 5
			
 
				+fi
			
 
				+
			
 
				+#####################################
			
 
				+# Running analysis file to get actual results
			
 
				+in="$(cd "$(dirname "$inputfile")"; pwd)/$(basename "$inputfile")"
			
 
				+
			
 
				+Rscript -e "library(knitr); input_trace = '$in' ; outputhtml='$outputfile';\
			
 
				+            outputRmd = gsub('.html\$','.Rmd',outputhtml);\
			
 
				+            knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)"
			
--- a/bin/starpu_mlr_analysis.Rmd
+++ b/bin/starpu_mlr_analysis.Rmd
@@ -0,0 +1,256 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+```{r Setup, echo=FALSE}
			
 
				+opts_chunk$set(echo=FALSE)
			
 
				+```
			
 
				+
			
 
				+```{r Load_R_files_and_functions}
			
 
				+print_codelet <- function(reg,codelet){
			
 
				+   cat(paste("/* ############################################ */", "\n"))
			
 
				+   cat(paste("/*\t Automatically generated code */", "\n"))
			
 
				+   cat(paste("\t Check for potential errors and be sure parameter value are written in good order (alphabetical one by default)", "\n"))
			
 
				+   cat(paste("\t Adjusted R-squared: ", summary(reg)$adj.r.squared, "*/\n\n"))
			
 
				+
			
 
				+   ncomb <- reg$rank - 1
			
 
				+   cat(paste("\t ", codelet, ".model->ncombinations = ", ncomb, ";\n", sep=""))
			
 
				+
			
 
				+   cat(paste("\t ", codelet, ".model->combinations = (unsigned **) malloc(", codelet, ".model->ncombinations*sizeof(unsigned *))", ";\n\n", sep=""))
			
 
				+
			
 
				+   cat(paste("\t if (", codelet, ".model->combinations)", "\n", "\t {\n", sep=""))
			
 
				+   cat(paste("\t   for (unsigned i = 0; i < ", codelet, ".model->ncombinations; i++)", "\n", "\t   {\n", sep=""))
			
 
				+   cat(paste("\t     ", codelet, ".model->combinations[i] = (unsigned *) malloc(", codelet, ".model->nparameters*sizeof(unsigned))", ";\n", "\t   }\n", "\t }\n\n", sep=""))
			
 
				+
			
 
				+   # Computing combinations
			
 
				+   df <- data.frame(attr(reg$terms, "factors"))
			
 
				+   df <- df/2
			
 
				+   df$Params <- row.names(df)
			
 
				+   df <-df[c(2:nrow(df)),]
			
 
				+
			
 
				+   i=1
			
 
				+   options(warn=-1)
			
 
				+   for(i in (1:nrow(df)))
			
 
				+   {
			
 
				+     name <- df[i,]$Params
			
 
				+     if (grepl("I\\(*", name))
			
 
				+     {
			
 
				+        exp <- as.numeric(gsub("(.*?)\\^(.*?)\\)", "\\2", name))
			
 
				+        df[i,] <- as.numeric(df[i,]) * exp
			
 
				+        df[i,]$Params <- as.character(gsub("I\\((.*?)\\^(.*?)\\)", "\\1", name))
			
 
				+     }
			
 
				+   }
			
 
				+   df <- aggregate(. ~ Params, transform(df, Params), sum)
			
 
				+   options(warn=0)
			
 
				+
			
 
				+   i=1
			
 
				+   j=1
			
 
				+   for(j in (2:length(df)))
			
 
				+   {
			
 
				+     for(i in (1:nrow(df)))
			
 
				+     {
			
 
				+       cat(paste("\t ", codelet, ".model->combinations[", j-2, "][", i-1, "] = ", as.numeric(df[i,j]), ";\n", sep=""))
			
 
				+     }
			
 
				+   }
			
 
				+
			
 
				+   cat(paste("/* ############################################ */", "\n"))
			
 
				+}
			
 
				+
			
 
				+df<-read.csv(input_trace, header=TRUE)
			
 
				+
			
 
				+opts_chunk$set(echo=TRUE)
			
 
				+```
			
 
				+
			
 
				+# Multiple Linear Regression Model Example
			
 
				+
			
 
				+## Introduction
			
 
				+
			
 
				+This document demonstrates the type of the analysis needed to compute
			
 
				+the multiple linear regression model of the task. It relies on the
			
 
				+input data benchmarked by the StarPU (or any other tool, but following
			
 
				+the same format). The input data used in this example is generated by
			
 
				+the task "mlr_init", from the "examples/mlr/mlr.c".
			
 
				+
			
 
				+This document can be used as an template for the analysis of any other
			
 
				+task.
			
 
				+
			
 
				+### How to compile
			
 
				+
			
 
				+    ./starpu_mlr_analysis .starpu/sampling/codelets/tmp/mlr_init.out
			
 
				+
			
 
				+### Software dependencies
			
 
				+
			
 
				+In order to run the analysis you need to have R installed:
			
 
				+
			
 
				+    sudo apt-get install r-base
			
 
				+
			
 
				+In order to compile this document, you need *knitr* (although you can
			
 
				+perfectly only use the R code from this document without knitr). If
			
 
				+you decided that you want to generate this document, then start R
			
 
				+(e.g., from terminal) and install knitr package:
			
 
				+
			
 
				+    R> install.packages("knitr")
			
 
				+
			
 
				+No additional R packages are needed.
			
 
				+
			
 
				+## First glimpse at the data
			
 
				+
			
 
				+First, we show the relations between all parameters in a single plot.
			
 
				+
			
 
				+```{r InitPlot}
			
 
				+plot(df)
			
 
				+```
			
 
				+
			
 
				+For this example, all three parameters M, N, K have some influence,
			
 
				+but their relation is not easy to understand.
			
 
				+
			
 
				+In general, this type of plots can typically show if there are
			
 
				+outliers. It can also show if there is a group of parameters which are
			
 
				+mutually perfectly correlated, in which case only a one parameter from
			
 
				+the group should be kept for the further analysis. Additionally, plot
			
 
				+can show the parameters that have a constant value, and since these
			
 
				+cannot have an influence on the model, they should also be ignored.
			
 
				+
			
 
				+However, making conclusions based solely on the visual analysis can be
			
 
				+treacherous and it is better to rely on the statistical tools. The
			
 
				+multiple linear regression methods used in the following sections will
			
 
				+also be able to detect and ignore these irrelevant
			
 
				+parameters. Therefore, this initial visual look should only be used to
			
 
				+get a basic idea about the model, but all the parameters should be
			
 
				+kept for now.
			
 
				+
			
 
				+## Initial model
			
 
				+
			
 
				+At this point, an initial model is computed, using all the parameters,
			
 
				+but not taking into account their exponents or the relations between
			
 
				+them.
			
 
				+
			
 
				+```{r Model1}
			
 
				+model1 <- lm(data=df, Duration ~ M+N+K)
			
 
				+summary(model1)
			
 
				+```
			
 
				+
			
 
				+For each parameter and the constant in the first column, an estimation
			
 
				+of the corresponding coefficient is provided along with the 95%
			
 
				+confidence interval. If there are any parameters with NA value, which
			
 
				+suggests that the parameters are correlated to another parameter or
			
 
				+that their value is constant, these parameters should not be used in
			
 
				+the following model computations. The stars in the last column
			
 
				+indicate the significance of each parameter. However, having maximum
			
 
				+three stars for each parameter does not necessarily mean that the
			
 
				+model is perfect and we should always inspect the adjusted R^2 value
			
 
				+(the closer it is to 1, the better the model is). To the users that
			
 
				+are not common to the multiple linear regression analysis and R tools,
			
 
				+we suggest to the R documentation. Some explanations are also provided
			
 
				+in the following article https://hal.inria.fr/hal-01180272.
			
 
				+
			
 
				+In this example, all parameters M, N, K are very important. However,
			
 
				+it is not clear if there are some relations between them or if some of
			
 
				+these parameters should be used with an exponent. Moreover, adjusted
			
 
				+R^2 value is not extremely high and we hope we can get a better
			
 
				+one. Thus, we proceed to the more advanced analysis.
			
 
				+
			
 
				+## Refining the model
			
 
				+
			
 
				+Now, we can seek for the relations between the parameters. Note that
			
 
				+trying all the possible combinations for the cases with a huge number
			
 
				+of parameters can be prohibitively long. Thus, it may be better to first
			
 
				+get rid of the parameters which seem to have very small influence
			
 
				+(typically the ones with no stars from the table in the previous
			
 
				+section).
			
 
				+
			
 
				+```{r Model2}
			
 
				+model2 <- lm(data=df, Duration ~ M*N*K)
			
 
				+summary(model2)
			
 
				+```
			
 
				+
			
 
				+This model is more accurate, as the R^2 value increased. We can also
			
 
				+try some of these parameters with the exponents.
			
 
				+
			
 
				+```{r Model3}
			
 
				+model3 <- lm(data=df, Duration ~ I(M^2)+I(M^3)+I(N^2)+I(N^3)+I(K^2)+I(K^3))
			
 
				+summary(model3)
			
 
				+```
			
 
				+
			
 
				+It seems like some parameters are important. Now we combine these and
			
 
				+try to find the optimal combination (here we go directly to the final
			
 
				+solution, although this process typically takes several iterations of
			
 
				+trying different combinations).
			
 
				+
			
 
				+```{r Model4}
			
 
				+model4 <- lm(data=df, Duration ~ I(M^2):N+I(N^3):K)
			
 
				+summary(model4)
			
 
				+```
			
 
				+
			
 
				+This seems to be the most accurate model, with a high R^2 value. We
			
 
				+can proceed to its validation.
			
 
				+
			
 
				+## Validation
			
 
				+
			
 
				+Once the model has been computed, we should validate it. Apart from
			
 
				+the low adjusted R^2 value, the model weakness can also be observed
			
 
				+even better when inspecting the residuals. The results on two
			
 
				+following plots (and thus the accuracy of the model) will greatly
			
 
				+depend on the measurements variability and the design of experiments.
			
 
				+
			
 
				+```{r Validation}
			
 
				+par(mfrow=c(1,2))
			
 
				+plot(model4, which=c(1:2))
			
 
				+```
			
 
				+
			
 
				+Generally speaking, if there are some structures on the left plot,
			
 
				+this can indicate that there are certain phenomena not explained by
			
 
				+the model. Many points on the same horizontal line represent
			
 
				+repetitive occurrences of the task with the same parameter values,
			
 
				+which is typical for a single experiment run with a homogeneous
			
 
				+data. The fact that there is some variability is common, as executing
			
 
				+exactly the same code on a real machine will always have slightly
			
 
				+different duration. However, having a huge variability means that the
			
 
				+benchmarks were very noisy, thus deriving an accurate models from them
			
 
				+will be hard.
			
 
				+
			
 
				+Plot on the right may show that the residuals do not follow the normal
			
 
				+distribution. Therefore, such model in overall would have a limited
			
 
				+predictive power.
			
 
				+
			
 
				+If we are not satisfied with the accuracy of the observed models, we
			
 
				+should go back to the previous section and try to find a better
			
 
				+one. In some cases, the benchmarked data is just be too noisy or the
			
 
				+choice of the parameters is not appropriate, and thus the experiments
			
 
				+should be redesigned and rerun.
			
 
				+
			
 
				+When we are finally satisfied with the model accuracy, we should
			
 
				+modify our task code, so that StarPU knows which parameters
			
 
				+combinations are used in the model.
			
 
				+
			
 
				+## Generating C code
			
 
				+
			
 
				+Depending on the way the task codelet is programmed, this section may
			
 
				+be somehow useful. This is a simple helper to generate C code for the
			
 
				+parameters combinations and it should be copied to the task
			
 
				+description in the application. The function generating the code is
			
 
				+not so robust, so make sure that the generated code correctly
			
 
				+corresponds to computed model (e.g., parameters are considered in the
			
 
				+alphabetical order).
			
 
				+
			
 
				+```{r Code}
			
 
				+print_codelet(model4, "mlr_cl")
			
 
				+```
			
 
				+
			
 
				+## Conclusion
			
 
				+
			
 
				+We have computed the model for our benchmarked data using multiple
			
 
				+linear regression. After encoding this model into the task code,
			
 
				+StarPU will be able to automatically compute the coefficients and use
			
 
				+the model to predict task duration.
			
--- a/bin/starpu_mpi_comm_matrix.py
+++ b/bin/starpu_mpi_comm_matrix.py
@@ -0,0 +1,116 @@
 
				+#!/usr/bin/env python3
			
 
				+# coding=utf-8
			
 
				+#
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+import sys
			
 
				+import re
			
 
				+import os
			
 
				+
			
 
				+PROGNAME=sys.argv[0]
			
 
				+
			
 
				+def usage():
			
 
				+    print("Offline tool to draw a communication matrix")
			
 
				+    print("")
			
 
				+    print("Usage: %s <output_execution>" % PROGNAME)
			
 
				+    print("")
			
 
				+    print("Options:")
			
 
				+    print("	-h, --help          display this help and exit")
			
 
				+    print("	-v, --version       output version information and exit")
			
 
				+    print("	-png                produce plots in png format (default is pdf)")
			
 
				+    print("")
			
 
				+    print("Report bugs to <starpu-devel@lists.gforge.inria.fr>")
			
 
				+    sys.exit(1)
			
 
				+
			
 
				+if len(sys.argv) >= 2:
			
 
				+    if sys.argv[1] == '-v' or sys.argv[1] == '--version':
			
 
				+        print("%s (StarPU) 1.3.99" % PROGNAME)
			
 
				+        sys.exit(0)
			
 
				+    if sys.argv[1] == '-h' or sys.argv[1] == '--help':
			
 
				+        usage()
			
 
				+if (len(sys.argv) == 1):
			
 
				+    usage()
			
 
				+
			
 
				+if len(sys.argv) >= 2 and sys.argv[1] == '-png':
			
 
				+    outputformat='png'
			
 
				+    outputext='png'
			
 
				+    outputfile=sys.argv[2]
			
 
				+else:
			
 
				+    outputformat='pdf color'
			
 
				+    outputext='pdf'
			
 
				+    outputfile=sys.argv[1]
			
 
				+
			
 
				+# find the number of nodes
			
 
				+nodes=0
			
 
				+file = open(outputfile, "r")
			
 
				+for line in file.readlines():
			
 
				+    match = re.search('\TOTAL', line)
			
 
				+    if match:
			
 
				+        (node,stuff)=line.split(sep="[")[2].split("]")
			
 
				+        if (int(node) > nodes):
			
 
				+            nodes=int(node)
			
 
				+file.close()
			
 
				+nodes=nodes+1
			
 
				+
			
 
				+# extract volume of comm and bandwidth between all pair of nodes
			
 
				+volumes = [[0 for x in range(nodes)] for y in range(nodes)]
			
 
				+bandwidth = [[0 for x in range(nodes)] for y in range(nodes)]
			
 
				+file = open(outputfile, "r")
			
 
				+for line in file.readlines():
			
 
				+    match = re.search('\[starpu_comm_stats]', line)
			
 
				+    if match:
			
 
				+        match = re.search('TOTAL', line)
			
 
				+        if not match:
			
 
				+            (head,volB,B,volMB,MB,bwB,B,bwMB,MB) = line.split()
			
 
				+            (src,dst)=head.split(sep="[")[2].split(sep="]")[0].split(sep=":")
			
 
				+            volumes[int(src)][int(dst)] = float(volB)
			
 
				+            bandwidth[int(src)][int(dst)] = float(bwB)
			
 
				+file.close()
			
 
				+
			
 
				+def writeData(filename, nodes, data):
			
 
				+    ofile=open(filename, "w")
			
 
				+    for dst in range(nodes):
			
 
				+        for src in range(nodes):
			
 
				+            ofile.write("%f "% data[src][dst])
			
 
				+        ofile.write("\n")
			
 
				+    ofile.close()
			
 
				+
			
 
				+def generateGnuplotScript(filename, datafilename, outputfile, nodes):
			
 
				+    ofile=open(filename, "w")
			
 
				+    srctics=""
			
 
				+    dsttics=""
			
 
				+    for node in range(nodes-1):
			
 
				+        srctics += "\"src%d\" %d, " % (node, node)
			
 
				+        dsttics += "\"dst%d\" %d, " % (node, node)
			
 
				+    ofile.write("set term %s\n" % outputformat)
			
 
				+    ofile.write("set output \"%s.%s\"\n" % (outputfile, outputext))
			
 
				+    ofile.write("set view map scale 1\nset style data lines\n")
			
 
				+    ofile.write("set palette model RGB defined ( 0 'white', 100 'black' )\n")
			
 
				+    ofile.write("set xtics (%s\"src%d\" %d)\n" % (srctics, nodes-1, nodes-1))
			
 
				+    ofile.write("set ytics (%s\"dst%d\" %d)\n" % (dsttics, nodes-1, nodes-1))
			
 
				+    ofile.write("plot '%s' matrix with image\n" % datafilename)
			
 
				+    ofile.close()
			
 
				+
			
 
				+# generate gnuplot volume data and script file
			
 
				+writeData(outputfile+"_volume.data", nodes, volumes)
			
 
				+generateGnuplotScript(outputfile+"_volume.gp", outputfile+"_volume.data", outputfile+"_volume_heatmap", nodes)
			
 
				+os.system("gnuplot " + outputfile+"_volume.gp")
			
 
				+
			
 
				+# generate gnuplot bandwidth data and script file
			
 
				+writeData(outputfile+"_bw.data", nodes, bandwidth)
			
 
				+generateGnuplotScript(outputfile+"_bw.gp", outputfile+"_bw.data", outputfile+"_bw_heatmap", nodes)
			
 
				+os.system("gnuplot " + outputfile+"_bw.gp")
			
--- a/bin/starpu_paje_draw_histogram
+++ b/bin/starpu_paje_draw_histogram
@@ -0,0 +1,144 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+# Copyright (C) 2014       Université Joseph Fourier
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+# Script for giving statistical analysis of the paje trace
			
 
				+
			
 
				+set -e # fail fast
			
 
				+
			
 
				+# File names
			
 
				+basename="$PWD"
			
 
				+r_script="$(dirname $(which $0))/starpu_paje_draw_histogram.R"
			
 
				+r_input=""
			
 
				+
			
 
				+# Command line arguments
			
 
				+range="0:-1"
			
 
				+name="All"
			
 
				+verbose=0
			
 
				+inputfiles=""
			
 
				+
			
 
				+help_script()
			
 
				+{
			
 
				+cat << EOF
			
 
				+Give statistical analysis of the paje trace
			
 
				+
			
 
				+$0 [ options ] paje.trace [paje.trace2 ...]
			
 
				+
			
 
				+Options:
			
 
				+   -r      To fix range x1:x2 ("-1" for infinity)
			
 
				+   -n      To choose a certain state
			
 
				+   -v      Print output to command line
			
 
				+   -h      Show this message
			
 
				+
			
 
				+Examples:
			
 
				+
			
 
				+$0 -n chol_model_22 example.native.trace
			
 
				+
			
 
				+$0 -r 100:300 -n FetchingInput,Overhead -v example.native.trace example.simgrid.trace
			
 
				+
			
 
				+Report bugs to <starpu-devel@lists.gforge.inria.fr>
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
			
 
				+    help_script
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+while getopts "r:n:vh" opt; do
			
 
				+  case $opt in
			
 
				+     r)
			
 
				+      range="$OPTARG"
			
 
				+      ;;
			
 
				+     n)
			
 
				+      name="$OPTARG"
			
 
				+      ;;
			
 
				+     v)
			
 
				+      verbose=1
			
 
				+      ;;
			
 
				+     h)
			
 
				+      help_script
			
 
				+      exit 4
			
 
				+      ;;
			
 
				+    \?)
			
 
				+      echo "Invalid option: -$OPTARG"
			
 
				+      help_script
			
 
				+      exit 3
			
 
				+      ;;
			
 
				+  esac
			
 
				+done
			
 
				+
			
 
				+# Reading files that need to be analyzed
			
 
				+shift $((OPTIND - 1))
			
 
				+inputfiles=$@
			
 
				+if [[ $# < 1 ]]; then
			
 
				+    echo "Error!"
			
 
				+    help_script
			
 
				+    exit 2
			
 
				+fi
			
 
				+
			
 
				+# Getting range
			
 
				+range1=$(eval echo $range | cut -d: -f1)
			
 
				+range2=$(eval echo $range | cut -d: -f2)
			
 
				+
			
 
				+#####################################
			
 
				+# Transforming input files into .csv
			
 
				+for file in $inputfiles; do
			
 
				+    if [ ! -s $file ]
			
 
				+	then
			
 
				+	echo "Error: file $file does not exist!"
			
 
				+	exit 5
			
 
				+    fi
			
 
				+# Sorting traces
			
 
				+    grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > start.trace
			
 
				+    grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > end.trace
			
 
				+    sort -s -V --key=2,2 end.trace > endSorted.trace
			
 
				+    if grep -q start_profiling endSorted.trace
			
 
				+    then
			
 
				+	echo Using start_profiling/stop_profiling trace selection.
			
 
				+	sed -ne '/start_profiling/,/stop_profiling/p' < endSorted.trace > endSorted2.trace
			
 
				+    else
			
 
				+	cp endSorted.trace endSorted2.trace
			
 
				+    fi
			
 
				+    cat start.trace endSorted2.trace > outputSorted.trace
			
 
				+
			
 
				+# Transferring to .csv
			
 
				+    pj_dump -n outputSorted.trace > $file.csv
			
 
				+    perl -i -ne 'print if /^State/' $file.csv
			
 
				+
			
 
				+    r_input=$(eval echo "$r_input $file.csv")
			
 
				+done
			
 
				+
			
 
				+#####################################
			
 
				+# Running R file to get actual results
			
 
				+Rscript $r_script $range1 $range2 $name $r_input
			
 
				+
			
 
				+# Directly opening .pdf result
			
 
				+if [[ $verbose == 1 ]]; then
			
 
				+    evince Rplots.pdf
			
 
				+fi
			
 
				+
			
 
				+# Cleanup: delete temporary files
			
 
				+rm -f outputSorted.trace
			
 
				+rm -f start.trace
			
 
				+rm -f end.trace
			
 
				+rm -f endSorted.trace
			
 
				+rm -f endSorted2.trace
			
--- a/bin/starpu_paje_draw_histogram.R
+++ b/bin/starpu_paje_draw_histogram.R
@@ -0,0 +1,125 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+# Copyright (C) 2014       Université Joseph Fourier
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+# R script that is giving statistical analysis of the paje trace
			
 
				+
			
 
				+# Can be called from the command line with:
			
 
				+# Rscript $this_script $range1 $range2 $name $outputfile $inputfiles
			
 
				+
			
 
				+# Package containing ddply function
			
 
				+library(plyr)
			
 
				+library(ggplot2)
			
 
				+library(data.table)
			
 
				+
			
 
				+# Function for reading .csv file
			
 
				+read_df <- function(file,range1,range2) {
			
 
				+  df<-read.csv(file, header=FALSE, strip.white=TRUE)
			
 
				+  names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")
			
 
				+  df = df[!(names(df) %in% c("Nature","Type", "Depth"))]
			
 
				+  df$Origin<-file
			
 
				+
			
 
				+# Changing names if needed:
			
 
				+  df$Value <- as.character(df$Value)
			
 
				+  df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))
			
 
				+
			
 
				+# Considering only the states with a given name
			
 
				+  if (name != "All")
			
 
				+    df<-df[df$Value %in% name[[1]],]
			
 
				+  
			
 
				+# Aligning to begin time from 0
			
 
				+  m <- min(df$Start)
			
 
				+  df$Start <- df$Start - m
			
 
				+  df$End <- df$Start+df$Duration
			
 
				+
			
 
				+# Taking only the states inside a given range
			
 
				+  df <- df[df$Start>=range1 & df$End<=range2,]
			
 
				+
			
 
				+# Return data frame
			
 
				+  df
			
 
				+}
			
 
				+
			
 
				+#########################################
			
 
				+#########################################
			
 
				+# Main
			
 
				+#########################################
			
 
				+# Reading command line arguments
			
 
				+args <- commandArgs(trailingOnly = TRUE)
			
 
				+range1<-as.numeric(args[1])
			
 
				+if (range1==-1)
			
 
				+  range1<-Inf
			
 
				+range2<-as.numeric(args[2])
			
 
				+if (range2==-1)
			
 
				+  range2<-Inf
			
 
				+name<-strsplit(args[3], ",")
			
 
				+
			
 
				+# Reading first file
			
 
				+filename<-args[4]
			
 
				+df<-read_df(filename,range1,range2)
			
 
				+
			
 
				+i=5
			
 
				+while (i <= length(args))
			
 
				+  {
			
 
				+# Reading next input file
			
 
				+    filename<-args[i]
			
 
				+    dft<-read_df(filename,range1,range2)
			
 
				+
			
 
				+    df<-rbindlist(list(df,dft))
			
 
				+    
			
 
				+    i <- i+1
			
 
				+  }
			
 
				+
			
 
				+# Error: if there is no results for a given range and state
			
 
				+if (nrow(df)==0)
			
 
				+  stop("Result is empty!")
			
 
				+
			
 
				+# Plotting histograms
			
 
				+plot <- ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count.., fill=..count..),binwidth = diff(range(df$Duration))/30)
			
 
				+plot <- plot + theme_bw()  + scale_fill_gradient(high = "#132B43", low = "#56B1F7") + ggtitle("Histograms for state distribution") + ylab("Count") + xlab("Time [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_y")
			
 
				+
			
 
				+# Adding text for total duration
			
 
				+ad<-ggplot_build(plot)$data[[1]]
			
 
				+al<-ggplot_build(plot)$panel$layout
			
 
				+ad<-merge(ad,al)
			
 
				+anno1 <- ddply(ad, .(ROW), summarise, x = max(x)*0.7, y = max(y)*0.9)
			
 
				+anno1<-merge(anno1,al)
			
 
				+anno2 <- ddply(df, .(Origin,Value), summarise, tot=as.integer(sum(Duration)))
			
 
				+anno2$PANEL <- row.names(anno2)
			
 
				+anno2$lab <- sprintf("Total duration: \n%ims",anno2$tot)
			
 
				+anno <- merge(anno1,anno2)
			
 
				+plot <- plot + geom_text(data = anno, aes(x=x, y=y, label=lab, colour="red"))
			
 
				+
			
 
				+# Printing plot
			
 
				+plot
			
 
				+
			
 
				+# End
			
 
				+write("Done producing a histogram plot. Open Rplots.pdf located in this folder to see the results", stdout())
			
--- a/bin/starpu_paje_state_stats
+++ b/bin/starpu_paje_state_stats
@@ -0,0 +1,145 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+# Copyright (C) 2014       Université Joseph Fourier
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+# Script for giving statistical analysis of the paje trace
			
 
				+
			
 
				+set -e # fail fast
			
 
				+
			
 
				+# File names
			
 
				+basename="$PWD"
			
 
				+outputfile="starpu_paje_state_stats.csv"
			
 
				+r_script="$(dirname $(which $0))/starpu_paje_state_stats.R"
			
 
				+r_input=""
			
 
				+
			
 
				+# Command line arguments
			
 
				+range="0:-1"
			
 
				+name="All"
			
 
				+verbose=0
			
 
				+inputfiles=""
			
 
				+
			
 
				+help_script()
			
 
				+{
			
 
				+cat << EOF
			
 
				+Give statistical analysis of the paje trace
			
 
				+
			
 
				+$0 [ options ] paje.trace [paje.trace2 ...]
			
 
				+
			
 
				+Options:
			
 
				+   -r      To fix range x1:x2 ("-1" for infinity)
			
 
				+   -n      To choose a certain state
			
 
				+   -v      Print output to command line
			
 
				+   -h      Show this message
			
 
				+
			
 
				+Examples:
			
 
				+
			
 
				+$0 example.native.trace
			
 
				+
			
 
				+$0 -r 100:300 -n FetchingInput -v example.native.trace example.simgrid.trace
			
 
				+
			
 
				+Report bugs to <starpu-devel@lists.gforge.inria.fr>
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
			
 
				+    help_script
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+while getopts "r:n:vh" opt; do
			
 
				+  case $opt in
			
 
				+     r)
			
 
				+      range="$OPTARG"
			
 
				+      ;;
			
 
				+     n)
			
 
				+      name="$OPTARG"
			
 
				+      ;;
			
 
				+     v)
			
 
				+      verbose=1
			
 
				+      ;;
			
 
				+     h)
			
 
				+      help_script
			
 
				+      exit 4
			
 
				+      ;;
			
 
				+    \?)
			
 
				+      echo "Invalid option: -$OPTARG"
			
 
				+      help_script
			
 
				+      exit 3
			
 
				+      ;;
			
 
				+  esac
			
 
				+done
			
 
				+
			
 
				+# Reading files that need to be analyzed
			
 
				+shift $((OPTIND - 1))
			
 
				+inputfiles=$@
			
 
				+if [[ $# < 1 ]]; then
			
 
				+    echo "Error!"
			
 
				+    help_script
			
 
				+    exit 2
			
 
				+fi
			
 
				+
			
 
				+# Getting range
			
 
				+range1=$(eval echo $range | cut -d: -f1)
			
 
				+range2=$(eval echo $range | cut -d: -f2)
			
 
				+
			
 
				+#####################################
			
 
				+# Transforming input files into .csv
			
 
				+for file in $inputfiles; do
			
 
				+    if [ ! -s $file ]
			
 
				+	then
			
 
				+	echo "Error: file $file does not exist!"
			
 
				+	exit 5
			
 
				+    fi
			
 
				+# Sorting traces
			
 
				+    grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > start.trace
			
 
				+    grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > end.trace
			
 
				+    sort -s -V --key=2,2 end.trace > endSorted.trace
			
 
				+    if grep -q start_profiling endSorted.trace
			
 
				+    then
			
 
				+	echo Using start_profiling/stop_profiling trace selection.
			
 
				+	sed -ne '/start_profiling/,/stop_profiling/p' < endSorted.trace > endSorted2.trace
			
 
				+    else
			
 
				+	cp endSorted.trace endSorted2.trace
			
 
				+    fi
			
 
				+    cat start.trace endSorted2.trace > outputSorted.trace
			
 
				+
			
 
				+# Transferring to .csv
			
 
				+    pj_dump -n outputSorted.trace > $file.csv
			
 
				+    perl -i -ne 'print if /^State/' $file.csv
			
 
				+
			
 
				+    r_input=$(eval echo "$r_input $file.csv")
			
 
				+done
			
 
				+
			
 
				+#####################################
			
 
				+# Running R file to get actual results
			
 
				+Rscript $r_script $range1 $range2 $name $outputfile $r_input
			
 
				+
			
 
				+# If verbose then write results to stdout
			
 
				+if [[ $verbose == 1 ]]; then
			
 
				+    column -s, -t $outputfile
			
 
				+fi
			
 
				+
			
 
				+# Cleanup: delete temporary files
			
 
				+rm -f outputSorted.trace
			
 
				+rm -f start.trace
			
 
				+rm -f end.trace
			
 
				+rm -f endSorted.trace
			
 
				+rm -f endSorted2.trace
			
--- a/bin/starpu_paje_state_stats.R
+++ b/bin/starpu_paje_state_stats.R
@@ -0,0 +1,125 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+# Copyright (C) 2014       Université Joseph Fourier
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+# R script that is giving statistical analysis of the paje trace
			
 
				+
			
 
				+# Can be called from the command line with:
			
 
				+# Rscript $this_script $range1 $range2 $name $outputfile $inputfiles
			
 
				+
			
 
				+# Package containing ddply function
			
 
				+library(plyr)
			
 
				+
			
 
				+# Function for reading .csv file
			
 
				+read_df <- function(file,range1,range2) {
			
 
				+  df<-read.csv(file, header=FALSE, strip.white=TRUE)
			
 
				+  names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")
			
 
				+  df = df[!(names(df) %in% c("Nature","Type", "Depth"))]
			
 
				+
			
 
				+# Changing names if needed:
			
 
				+  df$Value <- as.character(df$Value)
			
 
				+  df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Su", "SubmittingTask", as.character(df$Value))
			
 
				+
			
 
				+# Considering only the states with a given name
			
 
				+  if (name != "All")
			
 
				+    df<-df[df$Value %in% name[[1]],]
			
 
				+  
			
 
				+# Aligning to begin time from 0
			
 
				+  m <- min(df$Start)
			
 
				+  df$Start <- df$Start - m
			
 
				+  df$End <- df$Start+df$Duration
			
 
				+
			
 
				+# Taking only the states inside a given range
			
 
				+  df <- df[df$Start>=range1 & df$End<=range2,]
			
 
				+
			
 
				+# Return data frame
			
 
				+  df
			
 
				+}
			
 
				+
			
 
				+#########################################
			
 
				+#########################################
			
 
				+# Main
			
 
				+#########################################
			
 
				+# Reading command line arguments
			
 
				+args <- commandArgs(trailingOnly = TRUE)
			
 
				+range1<-as.numeric(args[1])
			
 
				+if (range1==-1)
			
 
				+  range1<-Inf
			
 
				+range2<-as.numeric(args[2])
			
 
				+if (range2==-1)
			
 
				+  range2<-Inf
			
 
				+name<-strsplit(args[3], ",")
			
 
				+outputfile<-args[4]
			
 
				+
			
 
				+# Reading first file
			
 
				+filename<-args[5]
			
 
				+df<-read_df(filename,range1,range2)
			
 
				+
			
 
				+# Getting summary of the first file
			
 
				+dfout<-ddply(df, c("Value"), summarize, Events_ = length(as.numeric(Duration)), Duration_ = sum(as.numeric(Duration)))
			
 
				+names(dfout)<-c("Value",sprintf("Events_%s",filename),sprintf("Duration_%s",filename))
			
 
				+
			
 
				+i=6
			
 
				+while (i <= length(args))
			
 
				+  {
			
 
				+# Reading next input file
			
 
				+    filename<-args[i]
			
 
				+    df<-read_df(filename,range1,range2)
			
 
				+
			
 
				+# Getting summary of the next file
			
 
				+    dp<-ddply(df, c("Value"), summarize, Events_ = length(as.numeric(Duration)), Duration_ = sum(as.numeric(Duration)))
			
 
				+    names(dp)<-c("Value",sprintf("Events_%s",filename),sprintf("Duration_%s",filename))
			
 
				+
			
 
				+# Merging results into one single data frame
			
 
				+    if (nrow(dp)>0)
			
 
				+      {
			
 
				+        if (nrow(dfout)>0)
			
 
				+          dfout<-merge(dfout,dp, by = "Value", all=TRUE)
			
 
				+        else
			
 
				+          dfout<-dp
			
 
				+      }
			
 
				+    
			
 
				+    i <- i+1
			
 
				+  }
			
 
				+
			
 
				+# Cosmetics: change NA to 0
			
 
				+dfout[is.na(dfout)] <- 0
			
 
				+
			
 
				+# Error: if there is no results for a given range and state
			
 
				+if (nrow(dfout)==0)
			
 
				+  stop("Result is empty!")
			
 
				+
			
 
				+# Write results into the new .csv file
			
 
				+write.table(dfout, file=outputfile, row.names=FALSE, sep = ", ")
			
 
				+
			
 
				+
			
--- a/bin/starpu_paje_summary
+++ b/bin/starpu_paje_summary
@@ -0,0 +1,109 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+# Copyright (C) 2014       Université Joseph Fourier
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+# Script for giving statistical analysis of the paje trace
			
 
				+
			
 
				+set -e # fail fast
			
 
				+
			
 
				+# File names
			
 
				+SOURCE_DIR=/home/mmakni/starpu/build_starpu/../tools
			
 
				+
			
 
				+outputfile="summary.html"
			
 
				+analysis_script="$SOURCE_DIR/starpu_paje_summary.Rmd"
			
 
				+analysis_input=""
			
 
				+
			
 
				+# Command line arguments
			
 
				+inputfiles=""
			
 
				+
			
 
				+help_script()
			
 
				+{
			
 
				+cat << EOF
			
 
				+Give statistical analysis of the paje trace
			
 
				+
			
 
				+Options:
			
 
				+   -h      Show this message
			
 
				+
			
 
				+Examples:
			
 
				+$0 example.native.trace
			
 
				+$0 example.native.trace example.simgrid.trace
			
 
				+
			
 
				+Report bugs to <starpu-devel@lists.gforge.inria.fr>
			
 
				+EOF
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
			
 
				+    help_script
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+while getopts "h" opt; do
			
 
				+  case $opt in
			
 
				+    \?)
			
 
				+      echo "Invalid option: -$OPTARG"
			
 
				+      help_script
			
 
				+      exit 3
			
 
				+      ;;
			
 
				+  esac
			
 
				+done
			
 
				+
			
 
				+# Reading files that need to be analyzed
			
 
				+shift $((OPTIND - 1))
			
 
				+inputfiles=$@
			
 
				+# Error if there is no input files specified
			
 
				+if [[ $# < 1 ]]; then
			
 
				+    echo "Error!"
			
 
				+    help_script
			
 
				+    exit 2
			
 
				+fi
			
 
				+
			
 
				+#####################################
			
 
				+# Transforming input files into .csv
			
 
				+for file in $inputfiles; do
			
 
				+    if [ ! -s $file ]
			
 
				+	then
			
 
				+	echo "Error: file $file does not exist!"
			
 
				+	exit 5
			
 
				+    fi
			
 
				+# Sorting traces
			
 
				+    grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > start.trace
			
 
				+    grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v  $file > end.trace
			
 
				+    sort -s -V --key=2,2 end.trace > endSorted.trace
			
 
				+    cat start.trace endSorted.trace > outputSorted.trace
			
 
				+
			
 
				+# Transferring to .csv
			
 
				+    pj_dump -n outputSorted.trace > $file.csv
			
 
				+    perl -i -ne 'print if /^State/' $file.csv
			
 
				+done
			
 
				+
			
 
				+analysis_input=`echo \"$inputfiles".csv\"" | sed 's/  */.csv", "/g'`
			
 
				+
			
 
				+#####################################
			
 
				+# Running analysis file to get actual results
			
 
				+Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\
			
 
				+            outputRmd = gsub('.html\$','.Rmd',outputhtml);\
			
 
				+            knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)"
			
 
				+
			
 
				+# Cleanup: delete temporary files
			
 
				+rm -f outputSorted.trace
			
 
				+rm -f start.trace
			
 
				+rm -f end.trace
			
 
				+rm -f endSorted.trace
			
--- a/bin/starpu_paje_summary.Rmd
+++ b/bin/starpu_paje_summary.Rmd
@@ -0,0 +1,299 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+<div id="table-of-contents">
			
 
				+<h2>Table of Contents</h2>
			
 
				+<div id="text-table-of-contents">
			
 
				+<ul>
			
 
				+<li><a href="#sec-1">1. Introduction</a>
			
 
				+<ul>
			
 
				+<li>
			
 
				+<ul>
			
 
				+<li><a href="#sec-1-0-1">1.0.1. How to compile</a></li>
			
 
				+<li><a href="#sec-1-0-2">1.0.2. Software dependencies</a></li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+</ul>
			
 
				+</li>
			
 
				+<li><a href="#sec-2">2. Gantt Charts of the whole Trace</a></li>
			
 
				+<li><a href="#sec-3">3. Table Summary</a></li>
			
 
				+<li><a href="#sec-4">4. State Duration during the Execution Time</a></li>
			
 
				+<li><a href="#sec-5">5. Distribution Histograms</a></li>
			
 
				+</ul>
			
 
				+</div>
			
 
				+</div>
			
 
				+```{r Setup, echo=FALSE}
			
 
				+opts_chunk$set(echo=FALSE)
			
 
				+```
			
 
				+
			
 
				+
			
 
				+
			
 
				+```{r Install_R_libraries}
			
 
				+InstalledPackage <- function(package)
			
 
				+{
			
 
				+    available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE)))
			
 
				+    missing <- package[!available]
			
 
				+    if (length(missing) > 0) return(FALSE)
			
 
				+    return(TRUE)
			
 
				+}
			
 
				+
			
 
				+CRANChoosen <- function()
			
 
				+{
			
 
				+    return(getOption("repos")["CRAN"] != "@CRAN@")
			
 
				+}
			
 
				+
			
 
				+UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org")
			
 
				+{
			
 
				+    if(!InstalledPackage(package))
			
 
				+    {
			
 
				+	if(!CRANChoosen())
			
 
				+	{
			
 
				+	    chooseCRANmirror()
			
 
				+	    if(!CRANChoosen())
			
 
				+	    {
			
 
				+		options(repos = c(CRAN = defaultCRANmirror))
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+	suppressMessages(suppressWarnings(install.packages(package)))
			
 
				+	if(!InstalledPackage(package)) return(FALSE)
			
 
				+    }
			
 
				+    return(TRUE)
			
 
				+}
			
 
				+
			
 
				+# Now install desired libraries
			
 
				+libraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer")
			
 
				+for(libr in libraries)
			
 
				+{
			
 
				+    if(!UsePackage(libr))
			
 
				+    {
			
 
				+	stop("Error!", libr)
			
 
				+    }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+```{r Load_R_files}
			
 
				+# Load ggplot and plyr just for the following cases
			
 
				+   library(ggplot2)
			
 
				+   library(plyr)
			
 
				+   library(data.table)
			
 
				+   library(RColorBrewer)
			
 
				+
			
 
				+# Defining non-computation states:
			
 
				+def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")
			
 
				+
			
 
				+# Function for reading .csv file
			
 
				+read_df <- function(file,range1,range2) {
			
 
				+  df<-read.csv(file, header=FALSE, strip.white=TRUE)
			
 
				+  names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value")
			
 
				+  df = df[!(names(df) %in% c("Nature","Type", "Depth"))]
			
 
				+  df$Origin<-as.factor(as.character(file))
			
 
				+
			
 
				+# Changing names if needed:
			
 
				+  df$Value <- as.character(df$Value)
			
 
				+  df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value))
			
 
				+  df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value))
			
 
				+
			
 
				+# Small cleanup
			
 
				+df$Start<-round(df$Start,digit=1)
			
 
				+df$End<-round(df$End,digit=1)
			
 
				+df$ResourceId<-as.factor(df$ResourceId)
			
 
				+df$Value<-as.factor(df$Value)
			
 
				+
			
 
				+# Start from zero
			
 
				+  m <- min(df$Start)
			
 
				+  df$Start <- df$Start - m
			
 
				+  df$End <- df$Start+df$Duration
			
 
				+
			
 
				+# Return data frame
			
 
				+  df
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+```{r Load_traces}
			
 
				+df<-data.frame()
			
 
				+if( !exists("input_traces") )
			
 
				+  input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv")
			
 
				+
			
 
				+for (i in 1:length(input_traces)){
			
 
				+  dfs<-read_df(input_traces[i])
			
 
				+  df<-rbindlist(list(df,dfs))
			
 
				+}
			
 
				+
			
 
				+# Color palettes
			
 
				+colourCount = length(unique(df$Value))
			
 
				+getPalette = colorRampPalette(brewer.pal(9, "Set1"))
			
 
				+
			
 
				+# Order of Value so we can have good colors
			
 
				+ker_states<-as.character(unique(df[!(df$Value %in% def_states),Value]))
			
 
				+ordered_states<-append(sort(ker_states), def_states)
			
 
				+df$Value <- factor(df$Value, levels=ordered_states)
			
 
				+
			
 
				+# Order of ResourceId so we can have y-axis
			
 
				+df$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId))))
			
 
				+```
			
 
				+
			
 
				+# Introduction
			
 
				+
			
 
				+This document presents a basic analysis of multiple StarPU
			
 
				+traces. First, paje *traces* will be transferred into *.csv* files and
			
 
				+then we analyze them with **R**. This summary is a first step that
			
 
				+should help researchers verify their hypothesis or find problematic
			
 
				+areas that require more exhaustive investigation.
			
 
				+
			
 
				+Be cautious, as the following results are only a brief analysis of
			
 
				+the traces and many important phenomena could still be hidden. Also,
			
 
				+be very careful when comparing different states or traces. Even
			
 
				+though some large discrepancies can be irrelevant, in other cases
			
 
				+even the smallest differences can be essential in understanding what
			
 
				+exactly happened during the StarPU execution.
			
 
				+
			
 
				+### How to compile
			
 
				+
			
 
				+    ./starpu_summary.sh example.native.trace example.simgrid.trace
			
 
				+
			
 
				+### Software dependencies
			
 
				+
			
 
				+In order to run this analysis you need to have R installed:
			
 
				+
			
 
				+    sudo apt-get install r-base
			
 
				+
			
 
				+Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (<https://github.com/schnorr/pajeng>), so we encourage users to install it.
			
 
				+
			
 
				+When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package:
			
 
				+
			
 
				+    R> install.packages("knitr")
			
 
				+
			
 
				+Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*)
			
 
				+
			
 
				+# Gantt Charts of the whole Trace
			
 
				+
			
 
				+First, we show a simple gantt chart of every trace. X-axis is a
			
 
				+simple timeline of the execution, *Resources* on y-axis correspond
			
 
				+to different CPUs/GPUs that were used and finally different colors
			
 
				+represent different *States* of the application.
			
 
				+
			
 
				+This kind of figures can often point to the idle time or
			
 
				+synchronization problems. Small disadvantage is that in most cases
			
 
				+there are too many states, thus it is impossible to display them all
			
 
				+on a single plot without aggregation. Therefore for any strange
			
 
				+behavior at a certain part of the trace, we strongly suggest to zoom
			
 
				+on the interval it occurred.
			
 
				+
			
 
				+```{r Gantt1}
			
 
				+ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) +
			
 
				+ theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) +
			
 
				+ geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") +
			
 
				+ facet_wrap(~Origin,ncol=1,scale="free_y")
			
 
				+```
			
 
				+
			
 
				+Second, we will concentrate only on computation kernel states, to
			
 
				+get rid of visualization artifacts that can be introduced by other
			
 
				+(sometimes irrelevant) states. Normally, this plot should not be too
			
 
				+different from the previous one.
			
 
				+
			
 
				+```{r Gantt2}
			
 
				+# Select only computation kernels
			
 
				+ df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),]
			
 
				+
			
 
				+# Start from zero
			
 
				+  m <- min(df1$Start)
			
 
				+  df1$Start <- df1$Start - m
			
 
				+  df1$End <- df1$Start+df1$Duration
			
 
				+
			
 
				+# Plot
			
 
				+ ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) +
			
 
				+  theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) +
			
 
				+  geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") +
			
 
				+  facet_wrap(~Origin,ncol=1,scale="free_y")
			
 
				+```
			
 
				+
			
 
				+# Table Summary
			
 
				+
			
 
				+Here we present how much time application spent in each state
			
 
				+(OverallDuration), how many times it was in that state (Count),
			
 
				+mean and median values of duration (Mean and Median), and finally
			
 
				+what is a standard deviation (StandDev).
			
 
				+
			
 
				+General information provided by this table can sometimes give an
			
 
				+idea to application experts which parts of code are not working as
			
 
				+desired. Be aware that this kind of tables hide many important
			
 
				+things, such as outliers, multiple modes, etc.
			
 
				+
			
 
				+```{r Table}
			
 
				+options(width=120)
			
 
				+ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration))
			
 
				+```
			
 
				+
			
 
				+# State Duration during the Execution Time
			
 
				+
			
 
				+Now, we show how duration of each state was changing during the
			
 
				+execution. This can display a general behavior of a state; show if
			
 
				+there are outliers or multiple modes; are some events occurring in
			
 
				+groups, etc. . It can also suggest a strange behavior of a state
			
 
				+during a certain time interval, which should be later investigated
			
 
				+more carefully.
			
 
				+
			
 
				+  However, since each event is represented by a single point (and
			
 
				+there is no "alpha" factor), those events that happen almost
			
 
				+simultaneously are overplotted. Therefore density of events along
			
 
				+execution time may not be easy to read.
			
 
				+
			
 
				+```{r Dur}
			
 
				+ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y")
			
 
				+```
			
 
				+
			
 
				+# Distribution Histograms
			
 
				+
			
 
				+Finally, we show a distribution of *Duration* for each state in form
			
 
				+of histograms. X-axis is partitioned into bins with equidistant time
			
 
				+intervals in milliseconds, while y-axis represents the number of
			
 
				+occurrences inside such intervals for a certain state. Note that for
			
 
				+the first plot y-axis is not fixed, meaning that the scale changes
			
 
				+from one row to another. This plot allows to not only to see what
			
 
				+was the most frequent duration of a state, but also to compare
			
 
				+duration between different states.
			
 
				+
			
 
				+```{r Hist1}
			
 
				+ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y")
			
 
				+```
			
 
				+
			
 
				+Similar to the previous figure, only now traces are showed vertically
			
 
				+instead of horizontally. Note that for this plot x-axis is not fixed,
			
 
				+meaning that the scale changes from one column to another. This plot
			
 
				+allows to compare frequency of different states and in case of
			
 
				+multiple traces to easily compare duration distribution for each
			
 
				+state.
			
 
				+
			
 
				+```{r Hist2}
			
 
				+ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x")
			
 
				+```
			
--- a/bin/starpu_perfmodel_display
+++ b/bin/starpu_perfmodel_display
--- a/bin/starpu_perfmodel_plot
+++ b/bin/starpu_perfmodel_plot
--- a/bin/starpu_perfmodel_recdump
+++ b/bin/starpu_perfmodel_recdump
--- a/bin/starpu_sched_display
+++ b/bin/starpu_sched_display
--- a/bin/starpu_send_recv_data_use.py
+++ b/bin/starpu_send_recv_data_use.py
@@ -0,0 +1,138 @@
 
				+#!/usr/bin/env python3
			
 
				+# coding=utf-8
			
 
				+#
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+import sys
			
 
				+PROGNAME = sys.argv[0]
			
 
				+
			
 
				+def usage():
			
 
				+    print("Offline tool to draw graph showing elapsed time between sent or received data and their use by tasks")
			
 
				+    print("")
			
 
				+    print("Usage: %s <folder containing comms.rec and tasks.rec files>" % PROGNAME)
			
 
				+
			
 
				+if len(sys.argv) != 2:
			
 
				+    usage()
			
 
				+    sys.exit(1)
			
 
				+
			
 
				+import re
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+from matplotlib.gridspec import GridSpec
			
 
				+import os
			
 
				+
			
 
				+def convert_rec_file(filename):
			
 
				+    lines = []
			
 
				+    item = dict()
			
 
				+
			
 
				+    with open(filename, "r") as f:
			
 
				+        for l in f.readlines():
			
 
				+            if l == "\n":
			
 
				+                lines.append(item)
			
 
				+                item = dict()
			
 
				+            else:
			
 
				+                ls = l.split(":")
			
 
				+                key = ls[0].lower()
			
 
				+                value = ls[1].strip()
			
 
				+
			
 
				+                if key in item:
			
 
				+                    print("Warning: duplicated key '" + key + "'")
			
 
				+                else:
			
 
				+                    if re.match('^\d+$', value) != None:
			
 
				+                        item[key] = int(value)
			
 
				+                    elif re.match("^\d+\.\d+$", value) != None:
			
 
				+                        item[key] = float(value)
			
 
				+                    else:
			
 
				+                        item[key] = value
			
 
				+
			
 
				+    return lines
			
 
				+
			
 
				+working_directory = sys.argv[1]
			
 
				+
			
 
				+comms = convert_rec_file(os.path.join(working_directory, "comms.rec"))
			
 
				+tasks = [t for t in convert_rec_file(os.path.join(working_directory, "tasks.rec")) if "control" not in t and "starttime" in t]
			
 
				+
			
 
				+if len(tasks) == 0:
			
 
				+    print("There is no task using data after communication.")
			
 
				+    sys.exit(0)
			
 
				+
			
 
				+
			
 
				+def plot_graph(comm_time_key, match, filename, title, xlabel):
			
 
				+    delays = []
			
 
				+    workers = dict()
			
 
				+    nb = 0
			
 
				+    durations = []
			
 
				+    min_time = 0.
			
 
				+    max_time = 0.
			
 
				+
			
 
				+    for c in comms:
			
 
				+        t_matched = None
			
 
				+        for t in tasks:
			
 
				+            if match(t, c):
			
 
				+                t_matched = t
			
 
				+                break
			
 
				+
			
 
				+        if t_matched is not None:
			
 
				+            worker = str(t_matched['mpirank']) + "-" + str(t_matched['workerid'])
			
 
				+            if worker not in workers:
			
 
				+                workers[worker] = []
			
 
				+
			
 
				+            eps = t["starttime"] - c[comm_time_key]
			
 
				+            assert(eps > 0)
			
 
				+            durations.append(eps)
			
 
				+            workers[worker].append((c[comm_time_key], eps))
			
 
				+
			
 
				+            if min_time == 0 or c[comm_time_key] < min_time:
			
 
				+                min_time = c[comm_time_key]
			
 
				+            if max_time == 0 or c[comm_time_key] > max_time:
			
 
				+                max_time = c[comm_time_key]
			
 
				+
			
 
				+            nb += 1
			
 
				+
			
 
				+
			
 
				+    fig = plt.figure(constrained_layout=True)
			
 
				+
			
 
				+    gs = GridSpec(2, 2, figure=fig)
			
 
				+    axs = [fig.add_subplot(gs[0, :-1]), fig.add_subplot(gs[1, :-1]), fig.add_subplot(gs[0:, -1])]
			
 
				+    i = 0
			
 
				+    for y, x in workers.items():
			
 
				+        # print(y, x)
			
 
				+        axs[0].broken_barh(x, [i*10, 8], facecolors=(0.1, 0.2, 0.5, 0.2))
			
 
				+        i += 1
			
 
				+
			
 
				+    i = 0
			
 
				+    for y, x in workers.items():
			
 
				+        for xx in x:
			
 
				+            axs[1].broken_barh([xx], [i, 1])
			
 
				+            i += 1
			
 
				+
			
 
				+    axs[0].set_yticks([i*10+4 for i in range(len(workers))])
			
 
				+    axs[0].set_yticklabels(list(workers))
			
 
				+    axs[0].set(xlabel="Time (ms) - Duration: " + str(max_time - min_time) + "ms", ylabel="Worker [mpi]-[*pu]", title=title)
			
 
				+
			
 
				+    if len(durations) != 0:
			
 
				+        axs[2].hist(durations, bins=np.logspace(np.log10(1), np.log10(max(durations)), 50), rwidth=0.8)
			
 
				+        axs[2].set_xscale("log")
			
 
				+        axs[2].set(xlabel=xlabel, ylabel="Number of occurences", title="Histogramm")
			
 
				+
			
 
				+    fig.set_size_inches(15, 9)
			
 
				+
			
 
				+    plt.savefig(os.path.join(working_directory, filename), dpi=100)
			
 
				+    plt.show()
			
 
				+
			
 
				+plot_graph("recvtime", lambda t, c: (t["mpirank"] == c["dst"] and t["starttime"] >= c["recvtime"] and str(c["recvhandle"]) in t["handles"]), "recv_use.png", "Elapsed time between recv and use (ms)", "Time between data reception and its use by a task")
			
 
				+plot_graph("sendtime", lambda t, c: (t["mpirank"] == c["src"] and t["starttime"] >= c["sendtime"] and str(c["sendhandle"]) in t["handles"]), "send_use.png", "Elapsed time between send and use (ms)", "Time between data sending and its use by a task")
			
--- a/bin/starpu_tasks_rec_complete
+++ b/bin/starpu_tasks_rec_complete
--- a/bin/starpu_temanejo2.sh
+++ b/bin/starpu_temanejo2.sh
@@ -0,0 +1,27 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+d=${AYUDAME2_INSTALL_DIR?}
			
 
				+cmd=${1?"usage: $0 <cmd> [args*]"}
			
 
				+shift
			
 
				+if test ! -r ayudame.cfg; then
			
 
				+	echo "warning: no 'ayudame.cfg' file found in current working directory, an example is available in <STARPU_INSTALL_DIR>/share/starpu/ayudame.cfg"
			
 
				+fi
			
 
				+PATH=$d/bin:$PATH
			
 
				+LD_LIBRARY_PATH=$d/lib:$LD_LIBRARY_PATH
			
 
				+PYTHONPATH=$d/lib/python2.7/site-packages:$PYTHONPATH
			
 
				+export PATH LD_LIBRARY_PATH PYTHONPATH
			
 
				+$d/bin/Temanejo2 -p 8888 -d 8889 -P $d/lib/libayudame.so -L $d/lib -A $cmd "$@"
			
--- a/bin/starpu_trace_state_stats.py
+++ b/bin/starpu_trace_state_stats.py
@@ -0,0 +1,395 @@
 
				+#!/usr/bin/env python3
			
 
				+# coding=utf-8
			
 
				+#
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+
			
 
				+##
			
 
				+# This script parses the generated trace.rec file and reports statistics about
			
 
				+# the number of different events/tasks and their durations. The report is
			
 
				+# similar to the starpu_paje_state_stats.in script, except that this one
			
 
				+# doesn't need R and pj_dump (from the pajeng repository), and it is also much
			
 
				+# faster.
			
 
				+##
			
 
				+
			
 
				+import getopt
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+class Event():
			
 
				+    def __init__(self, type, name, category, start_time):
			
 
				+        self._type = type
			
 
				+        self._name = name
			
 
				+        self._category = category
			
 
				+        self._start_time = start_time
			
 
				+
			
 
				+class EventStats():
			
 
				+    def __init__(self, name, duration_time, category, count = 1):
			
 
				+        self._name = name
			
 
				+        self._duration_time = duration_time
			
 
				+        self._category = category
			
 
				+        self._count = count
			
 
				+
			
 
				+    def aggregate(self, duration_time):
			
 
				+        self._duration_time += duration_time
			
 
				+        self._count += 1
			
 
				+
			
 
				+    def show(self):
			
 
				+        if not self._name == None and not self._category == None:
			
 
				+            print("\"" + self._name + "\"," + str(self._count) + ",\"" + self._category + "\"," + str(round(self._duration_time, 6)))
			
 
				+
			
 
				+class Worker():
			
 
				+    def __init__(self, id):
			
 
				+        self._id        = id
			
 
				+        self._events    = []
			
 
				+        self._stats     = []
			
 
				+        self._stack     = []
			
 
				+        self._current_state = None
			
 
				+
			
 
				+    def get_event_stats(self, name):
			
 
				+        for stat in self._stats:
			
 
				+            if stat._name == name:
			
 
				+                return stat
			
 
				+        return None
			
 
				+
			
 
				+    def add_event(self, type, name, category, start_time):
			
 
				+        self._events.append(Event(type, name, category, start_time))
			
 
				+
			
 
				+    def add_event_to_stats(self, curr_event):
			
 
				+        if curr_event._type == "PushState":
			
 
				+            self._stack.append(curr_event)
			
 
				+            return # Will look later to find a PopState event.
			
 
				+        elif curr_event._type == "PopState":
			
 
				+            if len(self._stack) == 0:
			
 
				+                print("warning: PopState without a PushState, probably a trace with start/stop profiling")
			
 
				+                self._current_state = None
			
 
				+                return
			
 
				+            next_event = curr_event
			
 
				+            curr_event = self._stack.pop()
			
 
				+        elif curr_event._type == "SetState":
			
 
				+            if self._current_state == None:
			
 
				+                # First SetState event found
			
 
				+                self._current_state = curr_event
			
 
				+                return
			
 
				+            saved_state = curr_event
			
 
				+            next_event = curr_event
			
 
				+            curr_event = self._current_state
			
 
				+            self._current_state = saved_state
			
 
				+        else:
			
 
				+            sys.exit("ERROR: Invalid event type!")
			
 
				+
			
 
				+        # Compute duration with the next event.
			
 
				+        a = curr_event._start_time
			
 
				+        b = next_event._start_time
			
 
				+
			
 
				+        # Add the event to the list of stats.
			
 
				+        for i in range(len(self._stats)):
			
 
				+            if self._stats[i]._name == curr_event._name:
			
 
				+                self._stats[i].aggregate(b - a)
			
 
				+                return
			
 
				+        self._stats.append(EventStats(curr_event._name, b - a,
			
 
				+                                      curr_event._category))
			
 
				+
			
 
				+    def calc_stats(self, start_profiling_times, stop_profiling_times):
			
 
				+        num_events = len(self._events)
			
 
				+        use_start_stop = len(start_profiling_times) != 0
			
 
				+        for i in range(0, num_events):
			
 
				+            event = self._events[i]
			
 
				+            if i > 0 and self._events[i-1]._name == "Deinitializing":
			
 
				+                # Drop all events after the Deinitializing event is found
			
 
				+                # because they do not make sense.
			
 
				+                break
			
 
				+
			
 
				+            if not use_start_stop:
			
 
				+                self.add_event_to_stats(event)
			
 
				+                continue
			
 
				+
			
 
				+            # Check if the event is inbetween start/stop profiling events
			
 
				+            for t in range(len(start_profiling_times)):
			
 
				+                if (event._start_time > start_profiling_times[t] and
			
 
				+                    event._start_time < stop_profiling_times[t]):
			
 
				+                    self.add_event_to_stats(event)
			
 
				+                    break
			
 
				+
			
 
				+        if not use_start_stop:
			
 
				+            return
			
 
				+
			
 
				+        # Special case for SetState events which need a next one for computing
			
 
				+        # the duration.
			
 
				+        curr_event = self._events[-1]
			
 
				+        if curr_event._type == "SetState":
			
 
				+            for i in range(len(start_profiling_times)):
			
 
				+                if (curr_event._start_time > start_profiling_times[i] and
			
 
				+                    curr_event._start_time < stop_profiling_times[i]):
			
 
				+                    curr_event = Event(curr_event._type, curr_event._name,
			
 
				+                                       curr_event._category,
			
 
				+                                       stop_profiling_times[i])
			
 
				+            self.add_event_to_stats(curr_event)
			
 
				+
			
 
				+def read_blocks(input_file):
			
 
				+    empty_lines = 0
			
 
				+    first_line = 1
			
 
				+    blocks = []
			
 
				+    for line in open(input_file):
			
 
				+        if first_line:
			
 
				+            blocks.append([])
			
 
				+            blocks[-1].append(line)
			
 
				+            first_line = 0
			
 
				+
			
 
				+        # Check for empty lines
			
 
				+        if not line or line[0] == '\n':
			
 
				+            # If 1st one: new block
			
 
				+            if empty_lines == 0:
			
 
				+                blocks.append([])
			
 
				+            empty_lines += 1
			
 
				+        else:
			
 
				+            # Non empty line: add line in current(last) block
			
 
				+            empty_lines = 0
			
 
				+            blocks[-1].append(line)
			
 
				+    return blocks
			
 
				+
			
 
				+def read_field(field, index):
			
 
				+    return field[index+1:-1]
			
 
				+
			
 
				+def insert_worker_event(workers, prog_events, block):
			
 
				+    worker_id = -1
			
 
				+    name = None
			
 
				+    start_time = 0.0
			
 
				+    category = None
			
 
				+
			
 
				+    for line in block:
			
 
				+        key   = line[:2]
			
 
				+        value = read_field(line, 2)
			
 
				+        if key == "E:": # EventType
			
 
				+            event_type = value
			
 
				+        elif key == "C:": # Category
			
 
				+            category = value
			
 
				+        elif key == "W:": # WorkerId
			
 
				+            worker_id = int(value)
			
 
				+        elif key == "N:": # Name
			
 
				+            name = value
			
 
				+        elif key == "S:": # StartTime
			
 
				+            start_time = float(value)
			
 
				+
			
 
				+    # Program events don't belong to workers, they are globals.
			
 
				+    if category == "Program":
			
 
				+        prog_events.append(Event(event_type, name, category, start_time))
			
 
				+        return
			
 
				+
			
 
				+    for worker in workers:
			
 
				+        if worker._id == worker_id:
			
 
				+            worker.add_event(event_type, name, category, start_time)
			
 
				+            return
			
 
				+    worker = Worker(worker_id)
			
 
				+    worker.add_event(event_type, name, category, start_time)
			
 
				+    workers.append(worker)
			
 
				+
			
 
				+def calc_times(stats):
			
 
				+    tr = 0.0 # Runtime
			
 
				+    tt = 0.0 # Task
			
 
				+    ti = 0.0 # Idle
			
 
				+    ts = 0.0 # Scheduling
			
 
				+    for stat in stats:
			
 
				+        if stat._category == None:
			
 
				+            continue
			
 
				+        if stat._category == "Runtime":
			
 
				+            if stat._name == "Scheduling":
			
 
				+                # Scheduling time is part of runtime but we want to have
			
 
				+                # it separately.
			
 
				+                ts += stat._duration_time
			
 
				+            else:
			
 
				+                tr += stat._duration_time
			
 
				+        elif stat._category == "Task":
			
 
				+            tt += stat._duration_time
			
 
				+        elif stat._category == "Other":
			
 
				+            ti += stat._duration_time
			
 
				+        else:
			
 
				+            print("WARNING: Unknown category '" + stat._category + "'!")
			
 
				+    return ti, tr, tt, ts
			
 
				+
			
 
				+def save_times(ti, tr, tt, ts):
			
 
				+    f = open("times.csv", "w+")
			
 
				+    f.write("\"Time\",\"Duration\"\n")
			
 
				+    f.write("\"Runtime\"," + str(tr) + "\n")
			
 
				+    f.write("\"Task\"," + str(tt) + "\n")
			
 
				+    f.write("\"Idle\"," + str(ti) + "\n")
			
 
				+    f.write("\"Scheduling\"," + str(ts) + "\n")
			
 
				+    f.close()
			
 
				+
			
 
				+def calc_et(tt_1, tt_p):
			
 
				+    """ Compute the task efficiency (et). This measures the exploitation of
			
 
				+    data locality. """
			
 
				+    return tt_1 / tt_p
			
 
				+
			
 
				+def calc_es(tt_p, ts_p):
			
 
				+    """ Compute the scheduling efficiency (es). This measures time spent in
			
 
				+    the runtime scheduler. """
			
 
				+    return tt_p / (tt_p + ts_p)
			
 
				+
			
 
				+def calc_er(tt_p, tr_p, ts_p):
			
 
				+    """ Compute the runtime efficiency (er). This measures how the runtime
			
 
				+    overhead affects performance."""
			
 
				+    return (tt_p + ts_p) / (tt_p + tr_p + ts_p)
			
 
				+
			
 
				+def calc_ep(tt_p, tr_p, ti_p, ts_p):
			
 
				+    """ Compute the pipeline efficiency (et). This measures how much
			
 
				+    concurrency is available and how well it's exploited. """
			
 
				+    return (tt_p + tr_p + ts_p) / (tt_p + tr_p + ti_p + ts_p)
			
 
				+
			
 
				+def calc_e(et, er, ep, es):
			
 
				+    """ Compute the parallel efficiency. """
			
 
				+    return et * er * ep * es
			
 
				+
			
 
				+def save_efficiencies(e, ep, er, et, es):
			
 
				+    f = open("efficiencies.csv", "w+")
			
 
				+    f.write("\"Efficiency\",\"Value\"\n")
			
 
				+    f.write("\"Parallel\"," + str(e) + "\n")
			
 
				+    f.write("\"Task\"," + str(et) + "\n")
			
 
				+    f.write("\"Runtime\"," + str(er) + "\n")
			
 
				+    f.write("\"Scheduling\"," + str(es) + "\n")
			
 
				+    f.write("\"Pipeline\"," + str(ep) + "\n")
			
 
				+    f.close()
			
 
				+
			
 
				+def usage():
			
 
				+    print("USAGE:")
			
 
				+    print("starpu_trace_state_stats.py [ -te -s=<time> ] <trace.rec>")
			
 
				+    print("")
			
 
				+    print("OPTIONS:")
			
 
				+    print(" -t or --time            Compute and dump times to times.csv")
			
 
				+    print("")
			
 
				+    print(" -e or --efficiency      Compute and dump efficiencies to efficiencies.csv")
			
 
				+    print("")
			
 
				+    print(" -s or --seq_task_time   Used to compute task efficiency between sequential and parallel times")
			
 
				+    print("                         (if not set, task efficiency will be 1.0)")
			
 
				+    print("")
			
 
				+    print("EXAMPLES:")
			
 
				+    print("# Compute event statistics and report them to stdout:")
			
 
				+    print("python starpu_trace_state_stats.py trace.rec")
			
 
				+    print("")
			
 
				+    print("# Compute event stats, times and efficiencies:")
			
 
				+    print("python starpu_trace_state_stats.py -te trace.rec")
			
 
				+    print("")
			
 
				+    print("# Compute correct task efficiency with the sequential task time:")
			
 
				+    print("python starpu_trace_state_stats.py -s=60093.950614 trace.rec")
			
 
				+
			
 
				+def main():
			
 
				+    try:
			
 
				+        opts, args = getopt.getopt(sys.argv[1:], "hets:",
			
 
				+                                   ["help", "time", "efficiency", "seq_task_time="])
			
 
				+    except getopt.GetoptError as err:
			
 
				+        usage()
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    dump_time = False
			
 
				+    dump_efficiency = False
			
 
				+    tt_1 = 0.0
			
 
				+
			
 
				+    for o, a in opts:
			
 
				+        if o in ("-h", "--help"):
			
 
				+            usage()
			
 
				+            sys.exit()
			
 
				+        elif o in ("-t", "--time"):
			
 
				+            dump_time = True
			
 
				+        elif o in ("-e", "--efficiency"):
			
 
				+            dump_efficiency = True
			
 
				+        elif o in ("-s", "--seq_task_time"):
			
 
				+            tt_1 = float(a)
			
 
				+
			
 
				+    if len(args) < 1:
			
 
				+        usage()
			
 
				+        sys.exit()
			
 
				+    recfile = args[0]
			
 
				+
			
 
				+    if not os.path.isfile(recfile):
			
 
				+        sys.exit("File does not exist!")
			
 
				+
			
 
				+    # Declare a list for all workers.
			
 
				+    workers = []
			
 
				+
			
 
				+    # Declare a list for program events
			
 
				+    prog_events = []
			
 
				+
			
 
				+    # Read the recutils file format per blocks.
			
 
				+    blocks = read_blocks(recfile)
			
 
				+    for block in blocks:
			
 
				+        if not len(block) == 0:
			
 
				+            first_line = block[0]
			
 
				+            if first_line[:2] == "E:":
			
 
				+                insert_worker_event(workers, prog_events, block)
			
 
				+
			
 
				+    # Find allowed range times between start/stop profiling events.
			
 
				+    start_profiling_times = []
			
 
				+    stop_profiling_times = []
			
 
				+    for prog_event in prog_events:
			
 
				+        if prog_event._name == "start_profiling":
			
 
				+            start_profiling_times.append(prog_event._start_time)
			
 
				+        if prog_event._name == "stop_profiling":
			
 
				+            stop_profiling_times.append(prog_event._start_time)
			
 
				+
			
 
				+    if len(start_profiling_times) != len(stop_profiling_times):
			
 
				+        sys.exit("Mismatch number of start/stop profiling events!")
			
 
				+
			
 
				+    # Compute worker statistics.
			
 
				+    stats = []
			
 
				+    for worker in workers:
			
 
				+        worker.calc_stats(start_profiling_times, stop_profiling_times)
			
 
				+        for stat in worker._stats:
			
 
				+            found = False
			
 
				+            for s in stats:
			
 
				+                if stat._name == s._name:
			
 
				+                    found = True
			
 
				+                    break
			
 
				+            if not found == True:
			
 
				+                stats.append(EventStats(stat._name, 0.0, stat._category, 0))
			
 
				+
			
 
				+    # Compute global statistics for all workers.
			
 
				+    for i in range(0, len(workers)):
			
 
				+        for stat in stats:
			
 
				+            s = workers[i].get_event_stats(stat._name)
			
 
				+            if not s == None:
			
 
				+                # A task might not be executed on all workers.
			
 
				+                stat._duration_time += s._duration_time
			
 
				+                stat._count += s._count
			
 
				+
			
 
				+    # Output statistics.
			
 
				+    print("\"Name\",\"Count\",\"Type\",\"Duration\"")
			
 
				+    for stat in stats:
			
 
				+        stat.show()
			
 
				+
			
 
				+    # Compute runtime, task, idle, scheduling times and dump them to times.csv
			
 
				+    ti_p = tr_p = tt_p = ts_p = 0.0
			
 
				+    if dump_time == True:
			
 
				+        ti_p, tr_p, tt_p, ts_p = calc_times(stats)
			
 
				+        save_times(ti_p, tr_p, tt_p, ts_p)
			
 
				+
			
 
				+    # Compute runtime, task, idle efficiencies and dump them to
			
 
				+    # efficiencies.csv.
			
 
				+    if dump_efficiency == True or not tt_1 == 0.0:
			
 
				+        if dump_time == False:
			
 
				+            ti_p, tr_p, tt_p, ts_p = calc_times(stats)
			
 
				+        if tt_1 == 0.0:
			
 
				+            sys.stderr.write("WARNING: Task efficiency will be 1.0 because -s is not set!\n")
			
 
				+            tt_1 = tt_p
			
 
				+
			
 
				+        # Compute efficiencies.
			
 
				+        et = round(calc_et(tt_1, tt_p), 6)
			
 
				+        es = round(calc_es(tt_p, ts_p), 6)
			
 
				+        er = round(calc_er(tt_p, tr_p, ts_p), 6)
			
 
				+        ep = round(calc_ep(tt_p, tr_p, ti_p, ts_p), 6)
			
 
				+        e  = round(calc_e(et, er, ep, es), 6)
			
 
				+        save_efficiencies(e, ep, er, et, es)
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/bin/starpu_workers_activity
+++ b/bin/starpu_workers_activity
@@ -0,0 +1,182 @@
 
				+#!/bin/sh
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+PROGNAME=$0
			
 
				+
			
 
				+usage()
			
 
				+{
			
 
				+    echo "Offline tool to display the activity of the workers during the execution."
			
 
				+    echo ""
			
 
				+    echo "  The starpu_fxt_tool utility now generates a file named 'activity.data' which"
			
 
				+    echo "  can be processed by this script to generate a plot named activity.eps"
			
 
				+    echo ""
			
 
				+    echo "  Typical usage:"
			
 
				+    echo "     ./starpu_fxt_tool -i /tmp/prof_file_foo"
			
 
				+    echo "     $PROGNAME activity.data"
			
 
				+    echo ""
			
 
				+    echo "Options:"
			
 
				+    echo "	-h, --help          display this help and exit"
			
 
				+    echo "	-v, --version       output version information and exit"
			
 
				+    echo ""
			
 
				+    echo "Report bugs to <starpu-devel@lists.gforge.inria.fr>"
			
 
				+    exit 0
			
 
				+}
			
 
				+
			
 
				+if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then
			
 
				+    echo "$PROGNAME (StarPU) 1.3.99"
			
 
				+    exit 0
			
 
				+fi
			
 
				+
			
 
				+if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then
			
 
				+    usage
			
 
				+fi
			
 
				+
			
 
				+if [ ! -f $1 ] ; then
			
 
				+    echo "Error. File <$1> not found"
			
 
				+    echo ""
			
 
				+    usage
			
 
				+fi
			
 
				+
			
 
				+# The input file must be generated by the starpu_fxt_tool command
			
 
				+inputfile_with_counters=$1
			
 
				+
			
 
				+# We extract the counters out of the input file
			
 
				+inputfile=.$inputfile_with_counters.activity
			
 
				+inputfile_cnt_ready=.$1.cnt_ready
			
 
				+inputfile_cnt_submitted=.$1.cnt_submitted
			
 
				+set_profiling_list=.$1.set_profiling_list
			
 
				+names=.$1.names
			
 
				+
			
 
				+grep "^set_profiling" $inputfile_with_counters > $set_profiling_list
			
 
				+grep "0$" $set_profiling_list | cut -f 2 | sort -n > $set_profiling_list.disable
			
 
				+grep "1$" $set_profiling_list | cut -f 2 | sort -n > $set_profiling_list.enable
			
 
				+
			
 
				+grep "^name" $inputfile_with_counters > $names
			
 
				+
			
 
				+grep -v "^cnt" $inputfile_with_counters | grep -v "^set_profiling" | grep -v "^name" > $inputfile
			
 
				+grep "^cnt_ready" $inputfile_with_counters > $inputfile_cnt_ready
			
 
				+grep "^cnt_submitted" $inputfile_with_counters > $inputfile_cnt_submitted
			
 
				+
			
 
				+# Count the number of workers in the trace
			
 
				+workers=`cut -f1 $inputfile | sort -n | uniq`
			
 
				+nworkers=`cut -f1 $inputfile | sort -n | uniq|wc -l`
			
 
				+
			
 
				+# size of the entire graph
			
 
				+width=1.5
			
 
				+heigth=0.40
			
 
				+total_heigth=$(echo "$heigth + ($heigth * $nworkers)"|bc -l)
			
 
				+
			
 
				+# In case 3 arguments are provided, the 2nd (resp. 3rd) indicates the start
			
 
				+# (resp. the end) of the interval to be displayed.
			
 
				+if [ $# -ge 3 ]; then
			
 
				+starttime=$2
			
 
				+endtime=$3
			
 
				+else
			
 
				+#if profiling is explicitely enabled (resp. disabled) at some point, we set the
			
 
				+# default start (rest. end) point when we enable (resp. disable) profiling for
			
 
				+# the first time.
			
 
				+profiling_enable_cnt=`wc -l $set_profiling_list.enable|sed -e "s/\(.*\) .*/\1/"`
			
 
				+if [ $profiling_enable_cnt -ge 1 ]; then
			
 
				+starttime=`head -1 $set_profiling_list.enable`
			
 
				+else
			
 
				+starttime=$(cut -f 2 $inputfile |sort -n|head -1)
			
 
				+fi
			
 
				+
			
 
				+# TODO test if last disable > first enable
			
 
				+
			
 
				+profiling_disable_cnt=$(wc -l $set_profiling_list.disable|sed -e "s/\(.*\) .*/\1/")
			
 
				+if [ $profiling_disable_cnt -ge 1 ]; then
			
 
				+endtime=`tail -1 $set_profiling_list.disable`
			
 
				+else
			
 
				+endtime=$(cut -f 2 $inputfile |sort -n|tail -1)
			
 
				+fi
			
 
				+
			
 
				+# The values in the file are in ms, we display seconds
			
 
				+starttime=$(echo "$starttime * 0.001 "| bc -l)
			
 
				+endtime=$(echo "$endtime * 0.001 "| bc -l)
			
 
				+
			
 
				+fi
			
 
				+
			
 
				+echo "START $starttime END $endtime"
			
 
				+
			
 
				+# Gnuplot header
			
 
				+cat > gnuplotcmd << EOF
			
 
				+set term postscript eps enhanced color
			
 
				+set output "activity.eps"
			
 
				+set xrange [$starttime:$endtime]
			
 
				+set size $width,$total_heigth
			
 
				+set multiplot;
			
 
				+
			
 
				+set origin 0.0,0.0;
			
 
				+set size $width,$heigth;
			
 
				+
			
 
				+set logscale y
			
 
				+
			
 
				+plot "$inputfile_cnt_submitted" using (\$2/1000):3 with filledcurves lt rgb "#999999" title "submitted",\
			
 
				+	"$inputfile_cnt_ready" using (\$2/1000):3 with filledcurves lt rgb "#000000" title "ready"
			
 
				+
			
 
				+set nologscale y
			
 
				+
			
 
				+EOF
			
 
				+
			
 
				+cnt=0
			
 
				+for worker in $workers
			
 
				+do
			
 
				+	grep "^$worker\s" $inputfile > .tmp.$worker
			
 
				+
			
 
				+	starty=$(echo "$heigth + ($heigth * $cnt)"|bc -l)
			
 
				+
			
 
				+cat >> gnuplotcmd << EOF
			
 
				+
			
 
				+set origin 0.0,$starty;
			
 
				+set size $width,$heigth;
			
 
				+
			
 
				+set key off
			
 
				+
			
 
				+set yrange [0:100]
			
 
				+
			
 
				+set ylabel "$(cut -f2- $names |grep "^$worker$" | cut -f2)"
			
 
				+
			
 
				+plot ".tmp.$worker" using (\$2/1000):(100) with filledcurves y1=0.0 lt rgb "#000000" notitle,\
			
 
				+	 ".tmp.$worker" using (\$2/1000):((100*(\$4+\$5))/\$3) with filledcurves y1=0.0 lt rgb "#ff0000" notitle,\
			
 
				+	 ".tmp.$worker" using (\$2/1000):((100*\$4)/\$3) with filledcurves y1=0.0 lt rgb "#00ff00" notitle
			
 
				+EOF
			
 
				+
			
 
				+	cnt=$(($cnt+1))	
			
 
				+done
			
 
				+
			
 
				+
			
 
				+cat >> gnuplotcmd << EOF
			
 
				+unset multiplot
			
 
				+EOF
			
 
				+
			
 
				+gnuplot < gnuplotcmd
			
 
				+
			
 
				+rm gnuplotcmd
			
 
				+rm $inputfile
			
 
				+rm $inputfile_cnt_ready
			
 
				+rm $inputfile_cnt_submitted
			
 
				+
			
 
				+rm $set_profiling_list
			
 
				+rm $set_profiling_list.enable
			
 
				+rm $set_profiling_list.disable
			
 
				+
			
 
				+#rm $names
			
 
				+
			
 
				+for worker in $workers
			
 
				+do
			
 
				+	rm .tmp.$worker
			
 
				+done
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -18,11 +18,11 @@
 
				 #
			
 
				 include $(top_srcdir)/starpu.mk
			
 
				 
			
 
				-AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
			
 
				-AM_CXXFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused
			
 
				-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(MAGMA_LIBS) $(HWLOC_LIBS) @LIBS@ $(FXT_LIBS)
			
 
				-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
			
 
				-AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)
			
 
				+AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused -lpapi
			
 
				+AM_CXXFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CXXFLAGS) -Wno-unused -lpapi
			
 
				+LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(MAGMA_LIBS) $(HWLOC_LIBS) @LIBS@ $(FXT_LIBS) -lpapi
			
 
				+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -lpapi
			
 
				+AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS) -lpapi
			
 
				 
			
 
				 SUBDIRS = stencil
			
 
				 
			
--- a/examples/stencil/Makefile.am
+++ b/examples/stencil/Makefile.am
@@ -15,10 +15,10 @@
 
				 #
			
 
				 include $(top_srcdir)/starpu.mk
			
 
				 
			
 
				-AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
			
 
				-LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@ $(FXT_LIBS)
			
 
				-AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
			
 
				-AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS)
			
 
				+AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused -lpapi
			
 
				+LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@ $(FXT_LIBS) -lpapi
			
 
				+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -lpapi
			
 
				+AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_SCIF_LDFLAGS) -lpapi
			
 
				 
			
 
				 if STARPU_USE_MPI
			
 
				 LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
			
--- a/include/starpu/1.3/fstarpu_mod.f90
+++ b/include/starpu/1.3/fstarpu_mod.f90
--- a/include/starpu/1.3/starpu.h
+++ b/include/starpu/1.3/starpu.h
@@ -0,0 +1,603 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_H__
			
 
				+#define __STARPU_H__
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#ifndef _MSC_VER
			
 
				+#include <stdint.h>
			
 
				+#else
			
 
				+#include <windows.h>
			
 
				+typedef unsigned char uint8_t;
			
 
				+typedef unsigned short uint16_t;
			
 
				+typedef unsigned int uint32_t;
			
 
				+typedef unsigned long long uint64_t;
			
 
				+typedef UINT_PTR uintptr_t;
			
 
				+typedef char int8_t;
			
 
				+typedef short int16_t;
			
 
				+typedef int int32_t;
			
 
				+typedef long long int64_t;
			
 
				+typedef INT_PTR intptr_t;
			
 
				+#endif
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef STARPU_HAVE_WINDOWS
			
 
				+#include <windows.h>
			
 
				+#endif
			
 
				+
			
 
				+#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				+
			
 
				+#include <starpu_thread.h>
			
 
				+#include <starpu_thread_util.h>
			
 
				+#include <starpu_util.h>
			
 
				+#include <starpu_data.h>
			
 
				+#include <starpu_helper.h>
			
 
				+#include <starpu_disk.h>
			
 
				+#include <starpu_data_interfaces.h>
			
 
				+#include <starpu_data_filters.h>
			
 
				+#include <starpu_stdlib.h>
			
 
				+#include <starpu_task_bundle.h>
			
 
				+#include <starpu_task_dep.h>
			
 
				+#include <starpu_task.h>
			
 
				+#include <starpu_worker.h>
			
 
				+#include <starpu_perfmodel.h>
			
 
				+#include <starpu_worker.h>
			
 
				+#ifndef BUILDING_STARPU
			
 
				+#include <starpu_task_list.h>
			
 
				+#endif
			
 
				+#include <starpu_task_util.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include <starpu_sched_ctx.h>
			
 
				+#include <starpu_expert.h>
			
 
				+#include <starpu_rand.h>
			
 
				+#include <starpu_cuda.h>
			
 
				+#include <starpu_cublas.h>
			
 
				+#include <starpu_cusparse.h>
			
 
				+#include <starpu_bound.h>
			
 
				+#include <starpu_hash.h>
			
 
				+#include <starpu_profiling.h>
			
 
				+#include <starpu_fxt.h>
			
 
				+#include <starpu_driver.h>
			
 
				+#include <starpu_tree.h>
			
 
				+#include <starpu_openmp.h>
			
 
				+#include <starpu_simgrid_wrap.h>
			
 
				+#include <starpu_bitmap.h>
			
 
				+#include <starpu_clusters.h>
			
 
				+#include <starpu_perf_monitoring.h>
			
 
				+#include <starpu_perf_steering.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Initialization_and_Termination Initialization and Termination
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Structure passed to the starpu_init() function to configure StarPU.
			
 
				+   It has to be initialized with starpu_conf_init(). When the default
			
 
				+   value is used, StarPU automatically selects the number of
			
 
				+   processing units and takes the default scheduling policy. The
			
 
				+   environment variables overwrite the equivalent parameters.
			
 
				+*/
			
 
				+struct starpu_conf
			
 
				+{
			
 
				+	/**
			
 
				+	   @private
			
 
				+	   Will be initialized by starpu_conf_init(). Should not be
			
 
				+	   set by hand.
			
 
				+	*/
			
 
				+	int magic;
			
 
				+
			
 
				+	/**
			
 
				+	   @private
			
 
				+	   Tell starpu_init() if MPI will be initialized later.
			
 
				+	*/
			
 
				+	int will_use_mpi;
			
 
				+
			
 
				+	/**
			
 
				+	   Name of the scheduling policy. This can also be specified
			
 
				+	   with the environment variable \ref STARPU_SCHED. (default =
			
 
				+	   <c>NULL</c>).
			
 
				+	*/
			
 
				+	const char *sched_policy_name;
			
 
				+
			
 
				+	/**
			
 
				+	   Definition of the scheduling policy. This field is ignored
			
 
				+	   if starpu_conf::sched_policy_name is set.
			
 
				+	   (default = <c>NULL</c>)
			
 
				+	*/
			
 
				+	struct starpu_sched_policy *sched_policy;
			
 
				+	void (*sched_policy_init)(unsigned);
			
 
				+
			
 
				+	/**
			
 
				+	   For all parameters specified in this structure that can
			
 
				+	   also be set with environment variables, by default,
			
 
				+	   StarPU chooses the value of the environment variable
			
 
				+	   against the value set in starpu_conf. Setting the parameter
			
 
				+	   starpu_conf::precedence_over_environment_variables to 1 allows to give precedence
			
 
				+	   to the value set in the structure over the environment
			
 
				+	   variable.
			
 
				+	 */
			
 
				+	int precedence_over_environment_variables;
			
 
				+
			
 
				+	/**
			
 
				+	   Number of CPU cores that StarPU can use. This can also be
			
 
				+	   specified with the environment variable \ref STARPU_NCPU.
			
 
				+	   (default = -1)
			
 
				+	*/
			
 
				+	int ncpus;
			
 
				+
			
 
				+	/**
			
 
				+	   Number of CPU cores to that StarPU should leave aside. They can then
			
 
				+	   be used by application threads, by calling starpu_get_next_bindid() to
			
 
				+	   get their ID, and starpu_bind_thread_on() to bind the current thread to them.
			
 
				+	  */
			
 
				+	int reserve_ncpus;
			
 
				+
			
 
				+	/**
			
 
				+	   Number of CUDA devices that StarPU can use. This can also
			
 
				+	   be specified with the environment variable \ref
			
 
				+	   STARPU_NCUDA.
			
 
				+	   (default = -1)
			
 
				+	*/
			
 
				+	int ncuda;
			
 
				+
			
 
				+	/**
			
 
				+	   Number of OpenCL devices that StarPU can use. This can also
			
 
				+	   be specified with the environment variable \ref
			
 
				+	   STARPU_NOPENCL.
			
 
				+	   (default = -1)
			
 
				+	*/
			
 
				+	int nopencl;
			
 
				+
			
 
				+	/**
			
 
				+	   Number of MIC devices that StarPU can use. This can also be
			
 
				+	   specified with the environment variable \ref STARPU_NMIC.
			
 
				+	   (default = -1)
			
 
				+	*/
			
 
				+	int nmic;
			
 
				+
			
 
				+	/**
			
 
				+	   Number of MPI Master Slave devices that StarPU can use.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_NMPI_MS.
			
 
				+	   (default = -1)
			
 
				+	*/
			
 
				+        int nmpi_ms;
			
 
				+
			
 
				+	/**
			
 
				+	   If this flag is set, the starpu_conf::workers_bindid array
			
 
				+	   indicates where the different workers are bound, otherwise
			
 
				+	   StarPU automatically selects where to bind the different
			
 
				+	   workers. This can also be specified with the environment
			
 
				+	   variable \ref STARPU_WORKERS_CPUID.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	unsigned use_explicit_workers_bindid;
			
 
				+	/**
			
 
				+	   If the starpu_conf::use_explicit_workers_bindid flag is
			
 
				+	   set, this array indicates where to bind the different
			
 
				+	   workers. The i-th entry of the starpu_conf::workers_bindid
			
 
				+	   indicates the logical identifier of the processor which
			
 
				+	   should execute the i-th worker. Note that the logical
			
 
				+	   ordering of the CPUs is either determined by the OS, or
			
 
				+	   provided by the hwloc library in case it is available.
			
 
				+	*/
			
 
				+	unsigned workers_bindid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	/**
			
 
				+	   If this flag is set, the CUDA workers will be attached to
			
 
				+	   the CUDA devices specified in the
			
 
				+	   starpu_conf::workers_cuda_gpuid array. Otherwise, StarPU
			
 
				+	   affects the CUDA devices in a round-robin fashion. This can
			
 
				+	   also be specified with the environment variable \ref
			
 
				+	   STARPU_WORKERS_CUDAID.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	unsigned use_explicit_workers_cuda_gpuid;
			
 
				+	/**
			
 
				+	   If the starpu_conf::use_explicit_workers_cuda_gpuid flag is
			
 
				+	   set, this array contains the logical identifiers of the
			
 
				+	   CUDA devices (as used by \c cudaGetDevice()).
			
 
				+	*/
			
 
				+	unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	/**
			
 
				+	   If this flag is set, the OpenCL workers will be attached to
			
 
				+	   the OpenCL devices specified in the
			
 
				+	   starpu_conf::workers_opencl_gpuid array. Otherwise, StarPU
			
 
				+	   affects the OpenCL devices in a round-robin fashion. This
			
 
				+	   can also be specified with the environment variable \ref
			
 
				+	   STARPU_WORKERS_OPENCLID.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	unsigned use_explicit_workers_opencl_gpuid;
			
 
				+	/**
			
 
				+	   If the starpu_conf::use_explicit_workers_opencl_gpuid flag
			
 
				+	   is set, this array contains the logical identifiers of the
			
 
				+	   OpenCL devices to be used.
			
 
				+	*/
			
 
				+	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	/**
			
 
				+	   If this flag is set, the MIC workers will be attached to
			
 
				+	   the MIC devices specified in the array
			
 
				+	   starpu_conf::workers_mic_deviceid. Otherwise, StarPU
			
 
				+	   affects the MIC devices in a round-robin fashion. This can
			
 
				+	   also be specified with the environment variable \ref
			
 
				+	   STARPU_WORKERS_MICID.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	unsigned use_explicit_workers_mic_deviceid;
			
 
				+	/**
			
 
				+	   If the flag starpu_conf::use_explicit_workers_mic_deviceid
			
 
				+	   is set, the array contains the logical identifiers of the
			
 
				+	   MIC devices to be used.
			
 
				+	*/
			
 
				+	unsigned workers_mic_deviceid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	/**
			
 
				+	   If this flag is set, the MPI Master Slave workers will be
			
 
				+	   attached to the MPI Master Slave devices specified in the
			
 
				+	   array starpu_conf::workers_mpi_ms_deviceid. Otherwise,
			
 
				+	   StarPU affects the MPI Master Slave devices in a
			
 
				+	   round-robin fashion.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	unsigned use_explicit_workers_mpi_ms_deviceid;
			
 
				+	/**
			
 
				+	   If the flag
			
 
				+	   starpu_conf::use_explicit_workers_mpi_ms_deviceid is set,
			
 
				+	   the array contains the logical identifiers of the MPI
			
 
				+	   Master Slave devices to be used.
			
 
				+	*/
			
 
				+	unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	/**
			
 
				+	   If this flag is set, StarPU will recalibrate the bus.  If
			
 
				+	   this value is equal to -1, the default value is used. This
			
 
				+	   can also be specified with the environment variable \ref
			
 
				+	   STARPU_BUS_CALIBRATE.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	int bus_calibrate;
			
 
				+	/**
			
 
				+	   If this flag is set, StarPU will calibrate the performance
			
 
				+	   models when executing tasks. If this value is equal to -1,
			
 
				+	   the default value is used. If the value is equal to 1, it
			
 
				+	   will force continuing calibration. If the value is equal to
			
 
				+	   2, the existing performance models will be overwritten.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_CALIBRATE.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	int calibrate;
			
 
				+
			
 
				+	/**
			
 
				+	   By default, StarPU executes parallel tasks concurrently.
			
 
				+	   Some parallel libraries (e.g. most OpenMP implementations)
			
 
				+	   however do not support concurrent calls to parallel code.
			
 
				+	   In such case, setting this flag makes StarPU only start one
			
 
				+	   parallel task at a time (but other CPU and GPU tasks are
			
 
				+	   not affected and can be run concurrently). The parallel
			
 
				+	   task scheduler will however still try varying combined
			
 
				+	   worker sizes to look for the most efficient ones.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_SINGLE_COMBINED_WORKER.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	int single_combined_worker;
			
 
				+
			
 
				+	/**
			
 
				+	   Path to the kernel to execute on the MIC device, compiled
			
 
				+	   for MIC architecture. When set to <c>NULL</c>, StarPU
			
 
				+	   automatically looks next to the host program location.
			
 
				+	   (default = <c>NULL</c>)
			
 
				+	*/
			
 
				+	char *mic_sink_program_path;
			
 
				+
			
 
				+	/**
			
 
				+	   This flag should be set to 1 to disable asynchronous copies
			
 
				+	   between CPUs and all accelerators.
			
 
				+	   The AMD implementation of OpenCL is known to fail when
			
 
				+	   copying data asynchronously. When using this
			
 
				+	   implementation, it is therefore necessary to disable
			
 
				+	   asynchronous data transfers.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_DISABLE_ASYNCHRONOUS_COPY.
			
 
				+	   This can also be specified at compilation time by giving to
			
 
				+	   the configure script the option \ref
			
 
				+	   disable-asynchronous-copy "--disable-asynchronous-copy".
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	int disable_asynchronous_copy;
			
 
				+	/**
			
 
				+	   This flag should be set to 1 to disable asynchronous copies
			
 
				+	   between CPUs and CUDA accelerators.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
			
 
				+	   This can also be specified at compilation time by giving to
			
 
				+	   the configure script the option \ref
			
 
				+	   disable-asynchronous-cuda-copy
			
 
				+	   "--disable-asynchronous-cuda-copy".
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	int disable_asynchronous_cuda_copy;
			
 
				+	/**
			
 
				+	   This flag should be set to 1 to disable asynchronous copies
			
 
				+	   between CPUs and OpenCL accelerators.
			
 
				+	   The AMD implementation of OpenCL is known to fail when
			
 
				+	   copying data asynchronously. When using this
			
 
				+	   implementation, it is therefore necessary to disable
			
 
				+	   asynchronous data transfers.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY.
			
 
				+	   This can also be specified at compilation time by giving to
			
 
				+	   the configure script the option \ref
			
 
				+	   disable-asynchronous-opencl-copy
			
 
				+	   "--disable-asynchronous-opencl-copy".
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	int disable_asynchronous_opencl_copy;
			
 
				+	/**
			
 
				+	   This flag should be set to 1 to disable asynchronous copies
			
 
				+	   between CPUs and MIC accelerators.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY.
			
 
				+	   This can also be specified at compilation time by giving to
			
 
				+	   the configure script the option \ref
			
 
				+	   disable-asynchronous-mic-copy
			
 
				+	   "--disable-asynchronous-mic-copy".
			
 
				+	   (default = 0).
			
 
				+	*/
			
 
				+	int disable_asynchronous_mic_copy;
			
 
				+	/**
			
 
				+	   This flag should be set to 1 to disable asynchronous copies
			
 
				+	   between CPUs and MPI Master Slave devices.
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY.
			
 
				+	   This can also be specified at compilation time by giving to
			
 
				+	   the configure script the option \ref
			
 
				+	   disable-asynchronous-mpi-master-slave-copy
			
 
				+	   "--disable-asynchronous-mpi-master-slave-copy".
			
 
				+	   (default = 0).
			
 
				+	*/
			
 
				+	int disable_asynchronous_mpi_ms_copy;
			
 
				+
			
 
				+	/**
			
 
				+	   Enable CUDA/OpenGL interoperation on these CUDA devices.
			
 
				+	   This can be set to an array of CUDA device identifiers for
			
 
				+	   which \c cudaGLSetGLDevice() should be called instead of \c
			
 
				+	   cudaSetDevice(). Its size is specified by the
			
 
				+	   starpu_conf::n_cuda_opengl_interoperability field below
			
 
				+	   (default = <c>NULL</c>)
			
 
				+	*/
			
 
				+	unsigned *cuda_opengl_interoperability;
			
 
				+	/**
			
 
				+	   Size of the array starpu_conf::cuda_opengl_interoperability
			
 
				+	*/
			
 
				+	unsigned n_cuda_opengl_interoperability;
			
 
				+
			
 
				+	/**
			
 
				+	   Array of drivers that should not be launched by StarPU. The
			
 
				+	   application will run in one of its own threads.
			
 
				+	   (default = <c>NULL</c>)
			
 
				+	*/
			
 
				+	struct starpu_driver *not_launched_drivers;
			
 
				+	/**
			
 
				+	   The number of StarPU drivers that should not be launched by
			
 
				+	   StarPU, i.e number of elements of the array
			
 
				+	   starpu_conf::not_launched_drivers.
			
 
				+	   (default = 0)
			
 
				+	*/
			
 
				+	unsigned n_not_launched_drivers;
			
 
				+
			
 
				+	/**
			
 
				+	   Specify the buffer size used for FxT tracing. Starting from
			
 
				+	   FxT version 0.2.12, the buffer will automatically be
			
 
				+	   flushed when it fills in, but it may still be interesting
			
 
				+	   to specify a bigger value to avoid any flushing (which
			
 
				+	   would disturb the trace).
			
 
				+	*/
			
 
				+	uint64_t trace_buffer_size;
			
 
				+	int global_sched_ctx_min_priority;
			
 
				+	int global_sched_ctx_max_priority;
			
 
				+
			
 
				+#ifdef STARPU_WORKER_CALLBACKS
			
 
				+	void (*callback_worker_going_to_sleep)(unsigned workerid);
			
 
				+	void (*callback_worker_waking_up)(unsigned workerid);
			
 
				+#endif
			
 
				+
			
 
				+	/**
			
 
				+	   Specify if StarPU should catch SIGINT, SIGSEGV and SIGTRAP
			
 
				+	   signals to make sure final actions (e.g dumping FxT trace
			
 
				+	   files) are done even though the application has crashed. By
			
 
				+	   default (value = \c 1), signals are catched. It should be
			
 
				+	   disabled on systems which already catch these signals for
			
 
				+	   their own needs (e.g JVM)
			
 
				+	   This can also be specified with the environment variable
			
 
				+	   \ref STARPU_CATCH_SIGNALS
			
 
				+	 */
			
 
				+	int catch_signals;
			
 
				+
			
 
				+	/**
			
 
				+	   Specify whether StarPU should automatically start to collect
			
 
				+	   performance counters after initialization
			
 
				+	 */
			
 
				+	unsigned start_perf_counter_collection;
			
 
				+
			
 
				+	/**
			
 
				+	   Minimum spinning backoff of drivers. Default value: \c 1
			
 
				+	 */
			
 
				+	unsigned driver_spinning_backoff_min;
			
 
				+
			
 
				+	/**
			
 
				+	   Maximum spinning backoff of drivers. Default value: \c 32
			
 
				+	 */
			
 
				+	unsigned driver_spinning_backoff_max;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Initialize the \p conf structure with the default values. In case
			
 
				+   some configuration parameters are already specified through
			
 
				+   environment variables, starpu_conf_init() initializes the fields of
			
 
				+   \p conf according to the environment variables.
			
 
				+   For instance if \ref STARPU_CALIBRATE is set, its value is put in
			
 
				+   the field starpu_conf::calibrate of \p conf.
			
 
				+   Upon successful completion, this function returns 0. Otherwise,
			
 
				+   <c>-EINVAL</c> indicates that the argument was <c>NULL</c>.
			
 
				+*/
			
 
				+int starpu_conf_init(struct starpu_conf *conf);
			
 
				+
			
 
				+/**
			
 
				+   StarPU initialization method, must be called prior to any other
			
 
				+   StarPU call. It is possible to specify StarPU’s configuration (e.g.
			
 
				+   scheduling policy, number of cores, ...) by passing a
			
 
				+   non-<c>NULL</c> \p conf. Default configuration is used if \p conf
			
 
				+   is <c>NULL</c>. Upon successful completion, this function returns
			
 
				+   0. Otherwise, <c>-ENODEV</c> indicates that no worker was available
			
 
				+   (and thus StarPU was not initialized).
			
 
				+*/
			
 
				+int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_init(), but also take the \p argc and \p argv as
			
 
				+   defined by the application.
			
 
				+   Do not call starpu_init() and starpu_initialize() in the same
			
 
				+   program.
			
 
				+*/
			
 
				+int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if StarPU is already initialized.
			
 
				+*/
			
 
				+int starpu_is_initialized(void);
			
 
				+
			
 
				+/**
			
 
				+   Wait for starpu_init() call to finish.
			
 
				+*/
			
 
				+void starpu_wait_initialized(void);
			
 
				+
			
 
				+/**
			
 
				+   StarPU termination method, must be called at the end of the
			
 
				+   application: statistics and other post-mortem debugging information
			
 
				+   are not guaranteed to be available until this method has been
			
 
				+   called.
			
 
				+*/
			
 
				+void starpu_shutdown(void);
			
 
				+
			
 
				+/**
			
 
				+   Suspend the processing of new tasks by workers. It can be used in a
			
 
				+   program where StarPU is used during only a part of the execution.
			
 
				+   Without this call, the workers continue to poll for new tasks in a
			
 
				+   tight loop, wasting CPU time. The symmetric call to starpu_resume()
			
 
				+   should be used to unfreeze the workers.
			
 
				+*/
			
 
				+void starpu_pause(void);
			
 
				+/**
			
 
				+   Symmetrical call to starpu_pause(), used to resume the workers
			
 
				+   polling for new tasks.
			
 
				+*/
			
 
				+void starpu_resume(void);
			
 
				+
			
 
				+/**
			
 
				+   Value to be passed to starpu_get_next_bindid() and
			
 
				+   starpu_bind_thread_on() when binding a thread which will
			
 
				+   significantly eat CPU time, and should thus have its own dedicated
			
 
				+   CPU.
			
 
				+*/
			
 
				+#define STARPU_THREAD_ACTIVE (1 << 0)
			
 
				+
			
 
				+/**
			
 
				+   Return a PU binding ID which can be used to bind threads with
			
 
				+   starpu_bind_thread_on(). \p flags can be set to
			
 
				+   STARPU_THREAD_ACTIVE or 0. When \p npreferred is set to non-zero,
			
 
				+   \p preferred is an array of size \p npreferred in which a
			
 
				+   preference of PU binding IDs can be set. By default StarPU will
			
 
				+   return the first PU available for binding.
			
 
				+*/
			
 
				+unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred);
			
 
				+
			
 
				+/**
			
 
				+   Bind the calling thread on the given \p cpuid (which should have
			
 
				+   been obtained with starpu_get_next_bindid()).
			
 
				+
			
 
				+   Return -1 if a thread was already bound to this PU (but binding
			
 
				+   will still have been done, and a warning will have been printed),
			
 
				+   so the caller can tell the user how to avoid the issue.
			
 
				+
			
 
				+   \p name should be set to a unique string so that different calls
			
 
				+   with the same name for the same cpuid does not produce a warning.
			
 
				+*/
			
 
				+int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name);
			
 
				+
			
 
				+/**
			
 
				+   Print a description of the topology on \p f.
			
 
				+*/
			
 
				+void starpu_topology_print(FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if asynchronous data transfers between CPU and
			
 
				+   accelerators are disabled.
			
 
				+*/
			
 
				+int starpu_asynchronous_copy_disabled(void);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if asynchronous data transfers between CPU and CUDA
			
 
				+   accelerators are disabled.
			
 
				+*/
			
 
				+int starpu_asynchronous_cuda_copy_disabled(void);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if asynchronous data transfers between CPU and OpenCL
			
 
				+   accelerators are disabled.
			
 
				+*/
			
 
				+int starpu_asynchronous_opencl_copy_disabled(void);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if asynchronous data transfers between CPU and MIC devices
			
 
				+   are disabled.
			
 
				+*/
			
 
				+int starpu_asynchronous_mic_copy_disabled(void);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if asynchronous data transfers between CPU and MPI Slave
			
 
				+   devices are disabled.
			
 
				+*/
			
 
				+int starpu_asynchronous_mpi_ms_copy_disabled(void);
			
 
				+
			
 
				+void starpu_display_stats(void);
			
 
				+
			
 
				+void starpu_get_version(int *major, int *minor, int *release);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#include "starpu_deprecated_api.h"
			
 
				+
			
 
				+#endif /* __STARPU_H__ */
			
--- a/include/starpu/1.3/starpu_bitmap.h
+++ b/include/starpu/1.3/starpu_bitmap.h
@@ -0,0 +1,301 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2013       Simon Archipoff
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_BITMAP_H__
			
 
				+#define __STARPU_BITMAP_H__
			
 
				+
			
 
				+#include <starpu_util.h>
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#include <string.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Bitmap Bitmap
			
 
				+   @brief This is the interface for the bitmap utilities provided by StarPU.
			
 
				+   @{
			
 
				+ */
			
 
				+#ifndef _STARPU_LONG_BIT
			
 
				+#define _STARPU_LONG_BIT ((int)(sizeof(unsigned long) * 8))
			
 
				+#endif
			
 
				+
			
 
				+#define _STARPU_BITMAP_SIZE ((STARPU_NMAXWORKERS - 1)/_STARPU_LONG_BIT) + 1
			
 
				+
			
 
				+/** create a empty starpu_bitmap */
			
 
				+static inline struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				+/** zero a starpu_bitmap */
			
 
				+static inline void starpu_bitmap_init(struct starpu_bitmap *b);
			
 
				+/** free \p b */
			
 
				+static inline void starpu_bitmap_destroy(struct starpu_bitmap *b);
			
 
				+
			
 
				+/** set bit \p e in \p b */
			
 
				+static inline void starpu_bitmap_set(struct starpu_bitmap *b, int e);
			
 
				+/** unset bit \p e in \p b */
			
 
				+static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e);
			
 
				+/** unset all bits in \p b */
			
 
				+static inline void starpu_bitmap_unset_all(struct starpu_bitmap *b);
			
 
				+
			
 
				+/** return true iff bit \p e is set in \p b */
			
 
				+static inline int starpu_bitmap_get(struct starpu_bitmap *b, int e);
			
 
				+/** Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c; */
			
 
				+static inline void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c);
			
 
				+/** Basically compute \p a |= \p b */
			
 
				+static inline void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b);
			
 
				+/** return 1 iff \p e is set in \p b1 AND \p e is set in \p b2 */
			
 
				+static inline int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e);
			
 
				+/** return the number of set bits in \p b */
			
 
				+static inline int starpu_bitmap_cardinal(struct starpu_bitmap *b);
			
 
				+
			
 
				+/** return the index of the first set bit of \p b, -1 if none */
			
 
				+static inline int starpu_bitmap_first(struct starpu_bitmap *b);
			
 
				+/** return the position of the last set bit of \p b, -1 if none */
			
 
				+static inline int starpu_bitmap_last(struct starpu_bitmap *b);
			
 
				+/** return the position of set bit right after \p e in \p b, -1 if none */
			
 
				+static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e);
			
 
				+/** todo */
			
 
				+static inline int starpu_bitmap_has_next(struct starpu_bitmap *b, int e);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+struct starpu_bitmap
			
 
				+{
			
 
				+	unsigned long bits[_STARPU_BITMAP_SIZE];
			
 
				+	int cardinal;
			
 
				+};
			
 
				+
			
 
				+#ifdef _STARPU_DEBUG_BITMAP
			
 
				+static int _starpu_check_bitmap(struct starpu_bitmap *b)
			
 
				+{
			
 
				+	int card = b->cardinal;
			
 
				+	int i = starpu_bitmap_first(b);
			
 
				+	int j;
			
 
				+	for(j = 0; j < card; j++)
			
 
				+	{
			
 
				+		if(i == -1)
			
 
				+			return 0;
			
 
				+		int tmp = starpu_bitmap_next(b,i);
			
 
				+		if(tmp == i)
			
 
				+			return 0;
			
 
				+		i = tmp;
			
 
				+	}
			
 
				+	if(i != -1)
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				+#else
			
 
				+#define _starpu_check_bitmap(b) 1
			
 
				+#endif
			
 
				+
			
 
				+static int _starpu_count_bit_static(unsigned long e)
			
 
				+{
			
 
				+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
			
 
				+	return __builtin_popcountl(e);
			
 
				+#else
			
 
				+	int c = 0;
			
 
				+	while(e)
			
 
				+	{
			
 
				+		c += e&1;
			
 
				+		e >>= 1;
			
 
				+	}
			
 
				+	return c;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline struct starpu_bitmap *starpu_bitmap_create()
			
 
				+{
			
 
				+	return (struct starpu_bitmap *) calloc(1, sizeof(struct starpu_bitmap));
			
 
				+}
			
 
				+
			
 
				+static inline void starpu_bitmap_init(struct starpu_bitmap *b)
			
 
				+{
			
 
				+	memset(b, 0, sizeof(*b));
			
 
				+}
			
 
				+
			
 
				+static inline void starpu_bitmap_destroy(struct starpu_bitmap * b)
			
 
				+{
			
 
				+	free(b);
			
 
				+}
			
 
				+
			
 
				+static inline void starpu_bitmap_set(struct starpu_bitmap * b, int e)
			
 
				+{
			
 
				+	if(!starpu_bitmap_get(b, e))
			
 
				+		b->cardinal++;
			
 
				+	else
			
 
				+		return;
			
 
				+	STARPU_ASSERT(e/_STARPU_LONG_BIT < _STARPU_BITMAP_SIZE);
			
 
				+	b->bits[e/_STARPU_LONG_BIT] |= (1ul << (e%_STARPU_LONG_BIT));
			
 
				+	STARPU_ASSERT(_starpu_check_bitmap(b));
			
 
				+}
			
 
				+static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e)
			
 
				+{
			
 
				+	if(starpu_bitmap_get(b, e))
			
 
				+		b->cardinal--;
			
 
				+	else
			
 
				+		return;
			
 
				+	STARPU_ASSERT(e/_STARPU_LONG_BIT < _STARPU_BITMAP_SIZE);
			
 
				+	if(e / _STARPU_LONG_BIT > _STARPU_BITMAP_SIZE)
			
 
				+		return;
			
 
				+	b->bits[e/_STARPU_LONG_BIT] &= ~(1ul << (e%_STARPU_LONG_BIT));
			
 
				+	STARPU_ASSERT(_starpu_check_bitmap(b));
			
 
				+}
			
 
				+
			
 
				+static inline void starpu_bitmap_unset_all(struct starpu_bitmap * b)
			
 
				+{
			
 
				+	memset(b->bits, 0, _STARPU_BITMAP_SIZE * sizeof(unsigned long));
			
 
				+}
			
 
				+
			
 
				+static inline void starpu_bitmap_unset_and(struct starpu_bitmap * a, struct starpu_bitmap * b, struct starpu_bitmap * c)
			
 
				+{
			
 
				+	a->cardinal = 0;
			
 
				+	int i;
			
 
				+	for(i = 0; i < _STARPU_BITMAP_SIZE; i++)
			
 
				+	{
			
 
				+		a->bits[i] = b->bits[i] & c->bits[i];
			
 
				+		a->cardinal += _starpu_count_bit_static(a->bits[i]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline int starpu_bitmap_get(struct starpu_bitmap * b, int e)
			
 
				+{
			
 
				+	STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE);
			
 
				+	if(e / _STARPU_LONG_BIT >= _STARPU_BITMAP_SIZE)
			
 
				+		return 0;
			
 
				+	return (b->bits[e/_STARPU_LONG_BIT] & (1ul << (e%_STARPU_LONG_BIT))) ?
			
 
				+		1:
			
 
				+		0;
			
 
				+}
			
 
				+
			
 
				+static inline void starpu_bitmap_or(struct starpu_bitmap * a, struct starpu_bitmap * b)
			
 
				+{
			
 
				+	int i;
			
 
				+	a->cardinal = 0;
			
 
				+	for(i = 0; i < _STARPU_BITMAP_SIZE; i++)
			
 
				+	{
			
 
				+		a->bits[i] |= b->bits[i];
			
 
				+		a->cardinal += _starpu_count_bit_static(a->bits[i]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static inline int starpu_bitmap_and_get(struct starpu_bitmap * b1, struct starpu_bitmap * b2, int e)
			
 
				+{
			
 
				+	return starpu_bitmap_get(b1,e) && starpu_bitmap_get(b2,e);
			
 
				+}
			
 
				+
			
 
				+static inline int starpu_bitmap_cardinal(struct starpu_bitmap * b)
			
 
				+{
			
 
				+	return b->cardinal;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static inline int _starpu_get_first_bit_rank(unsigned long ms)
			
 
				+{
			
 
				+	STARPU_ASSERT(ms != 0);
			
 
				+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
			
 
				+	return __builtin_ffsl(ms) - 1;
			
 
				+#else
			
 
				+	unsigned long m = 1ul;
			
 
				+	int i = 0;
			
 
				+	while(!(m&ms))
			
 
				+		i++,m<<=1;
			
 
				+	return i;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline int _starpu_get_last_bit_rank(unsigned long l)
			
 
				+{
			
 
				+	STARPU_ASSERT(l != 0);
			
 
				+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
			
 
				+	return 8*sizeof(l) - __builtin_clzl(l);
			
 
				+#else
			
 
				+	int ibit = _STARPU_LONG_BIT - 1;
			
 
				+	while((!(1ul << ibit)) & l)
			
 
				+		ibit--;
			
 
				+	STARPU_ASSERT(ibit >= 0);
			
 
				+	return ibit;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline int starpu_bitmap_first(struct starpu_bitmap * b)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	while(i < _STARPU_BITMAP_SIZE && !b->bits[i])
			
 
				+		i++;
			
 
				+	if( i == _STARPU_BITMAP_SIZE)
			
 
				+		return -1;
			
 
				+	int nb_long = i;
			
 
				+	unsigned long ms = b->bits[i];
			
 
				+
			
 
				+	return (nb_long * _STARPU_LONG_BIT) + _starpu_get_first_bit_rank(ms);
			
 
				+}
			
 
				+
			
 
				+static inline int starpu_bitmap_has_next(struct starpu_bitmap * b, int e)
			
 
				+{
			
 
				+	int nb_long = (e+1) / _STARPU_LONG_BIT;
			
 
				+	int nb_bit = (e+1) % _STARPU_LONG_BIT;
			
 
				+	unsigned long mask = (~0ul) << nb_bit;
			
 
				+	if(b->bits[nb_long] & mask)
			
 
				+		return 1;
			
 
				+	for(nb_long++; nb_long < _STARPU_BITMAP_SIZE; nb_long++)
			
 
				+		if(b->bits[nb_long])
			
 
				+			return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int starpu_bitmap_last(struct starpu_bitmap * b)
			
 
				+{
			
 
				+	if(b->cardinal == 0)
			
 
				+		return -1;
			
 
				+	int ilong;
			
 
				+	for(ilong = _STARPU_BITMAP_SIZE - 1; ilong >= 0; ilong--)
			
 
				+	{
			
 
				+		if(b->bits[ilong])
			
 
				+			break;
			
 
				+	}
			
 
				+	STARPU_ASSERT(ilong >= 0);
			
 
				+	unsigned long l = b->bits[ilong];
			
 
				+	return ilong * _STARPU_LONG_BIT + _starpu_get_last_bit_rank(l);
			
 
				+}
			
 
				+
			
 
				+static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e)
			
 
				+{
			
 
				+	int nb_long = e / _STARPU_LONG_BIT;
			
 
				+	int nb_bit = e % _STARPU_LONG_BIT;
			
 
				+	unsigned long rest = nb_bit == _STARPU_LONG_BIT - 1 ? 0 : (~0ul << (nb_bit + 1)) & b->bits[nb_long];
			
 
				+	if(nb_bit != (_STARPU_LONG_BIT - 1) && rest)
			
 
				+	{
			
 
				+		int i = _starpu_get_first_bit_rank(rest);
			
 
				+		STARPU_ASSERT(i >= 0 && i < _STARPU_LONG_BIT);
			
 
				+		return (nb_long * _STARPU_LONG_BIT) + i;
			
 
				+	}
			
 
				+
			
 
				+	for(nb_long++;nb_long < _STARPU_BITMAP_SIZE; nb_long++)
			
 
				+		if(b->bits[nb_long])
			
 
				+			return nb_long * _STARPU_LONG_BIT + _starpu_get_first_bit_rank(b->bits[nb_long]);
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_BITMAP_H__ */
			
--- a/include/starpu/1.3/starpu_bound.h
+++ b/include/starpu/1.3/starpu_bound.h
@@ -0,0 +1,82 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_BOUND_H__
			
 
				+#define __STARPU_BOUND_H__
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time
			
 
				+   @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Start recording tasks (resets stats). \p deps tells whether
			
 
				+   dependencies should be recorded too (this is quite expensive)
			
 
				+*/
			
 
				+void starpu_bound_start(int deps, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Stop recording tasks
			
 
				+*/
			
 
				+void starpu_bound_stop(void);
			
 
				+
			
 
				+/**
			
 
				+   Emit the DAG that was recorded on \p output.
			
 
				+*/
			
 
				+void starpu_bound_print_dot(FILE *output);
			
 
				+
			
 
				+/**
			
 
				+   Get theoretical upper bound (in ms) (needs glpk support detected by
			
 
				+   configure script). It returns 0 if some performance models are not
			
 
				+   calibrated.
			
 
				+*/
			
 
				+void starpu_bound_compute(double *res, double *integer_res, int integer);
			
 
				+
			
 
				+/**
			
 
				+   Emit the Linear Programming system on \p output for the recorded
			
 
				+   tasks, in the lp format
			
 
				+*/
			
 
				+void starpu_bound_print_lp(FILE *output);
			
 
				+
			
 
				+/**
			
 
				+   Emit the Linear Programming system on \p output for the recorded
			
 
				+   tasks, in the mps format
			
 
				+*/
			
 
				+void starpu_bound_print_mps(FILE *output);
			
 
				+
			
 
				+/**
			
 
				+   Emit on \p output the statistics of actual execution vs theoretical
			
 
				+   upper bound. \p integer permits to choose between integer solving
			
 
				+   (which takes a long time but is correct), and relaxed solving
			
 
				+   (which provides an approximate solution).
			
 
				+*/
			
 
				+void starpu_bound_print(FILE *output, int integer);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_BOUND_H__ */
			
--- a/include/starpu/1.3/starpu_clusters.h
+++ b/include/starpu/1.3/starpu_clusters.h
@@ -0,0 +1,140 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2015-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_CLUSTERS_UTIL_H__
			
 
				+#define __STARPU_CLUSTERS_UTIL_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef STARPU_CLUSTER
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+
			
 
				+#include <hwloc.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Clustering_Machine Clustering Machine
			
 
				+   @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_MIN_NB			(1<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_MAX_NB			(2<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_NB			(3<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_PREFERE_MIN		(4<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_KEEP_HOMOGENEOUS		(5<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_POLICY_NAME		(6<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_POLICY_STRUCT		(7<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_CREATE_FUNC		(8<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_CREATE_FUNC_ARG		(9<<STARPU_MODE_SHIFT)
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_TYPE			(10<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_AWAKE_WORKERS		(11<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_PARTITION_ONE		(12<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_NEW			(13<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_cluster_machine
			
 
				+ */
			
 
				+#define STARPU_CLUSTER_NCORES			(14<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   These represent the default available functions to enforce cluster
			
 
				+   use by the sub-runtime
			
 
				+*/
			
 
				+enum starpu_cluster_types
			
 
				+{
			
 
				+	STARPU_CLUSTER_OPENMP, /**< todo */
			
 
				+	STARPU_CLUSTER_INTEL_OPENMP_MKL,  /**< todo */
			
 
				+#ifdef STARPU_MKL
			
 
				+	STARPU_CLUSTER_GNU_OPENMP_MKL,  /**< todo */
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+struct starpu_cluster_machine;
			
 
				+
			
 
				+struct starpu_cluster_machine* starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...);
			
 
				+int starpu_uncluster_machine(struct starpu_cluster_machine* clusters);
			
 
				+int starpu_cluster_print(struct starpu_cluster_machine* clusters);
			
 
				+
			
 
				+/* Prologue functions */
			
 
				+void starpu_openmp_prologue(void*);
			
 
				+#define starpu_intel_openmp_mkl_prologue starpu_openmp_prologue
			
 
				+#ifdef STARPU_MKL
			
 
				+void starpu_gnu_openmp_mkl_prologue(void*);
			
 
				+#endif /* STARPU_MKL */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_CLUSTERS_UTIL_H__ */
			
--- a/include/starpu/1.3/starpu_config.h
+++ b/include/starpu/1.3/starpu_config.h
@@ -0,0 +1,321 @@
 
				+/* include/starpu_config.h.  Generated from starpu_config.h.in by configure.  */
			
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2020       Federal University of Rio Grande do Sul (UFRGS)
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This is the public config.h file, installed along libstarpu.
			
 
				+ *
			
 
				+ * It should only contain the build-time #defines which have an effect on the
			
 
				+ * API & ABI.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_CONFIG_PUBLIC_H__
			
 
				+#define __STARPU_CONFIG_PUBLIC_H__
			
 
				+
			
 
				+/**
			
 
				+   Define the major version of StarPU. This is the version used when
			
 
				+   compiling the application.
			
 
				+   @ingroup API_Versioning
			
 
				+*/
			
 
				+#define STARPU_MAJOR_VERSION 1
			
 
				+
			
 
				+/**
			
 
				+   @ingroup API_Versioning
			
 
				+   Define the minor version of StarPU. This is the version used when
			
 
				+   compiling the application.
			
 
				+*/
			
 
				+#define STARPU_MINOR_VERSION 3
			
 
				+
			
 
				+/**
			
 
				+   Define the release version of StarPU. This is the version used when
			
 
				+   compiling the application.
			
 
				+   @ingroup API_Versioning
			
 
				+*/
			
 
				+#define STARPU_RELEASE_VERSION 99
			
 
				+
			
 
				+#define STARPU_USE_CPU 1
			
 
				+
			
 
				+/**
			
 
				+   Defined when StarPU has been installed with
			
 
				+   CUDA support. It should be used in your code to detect the
			
 
				+   availability of CUDA.
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+*/
			
 
				+#define STARPU_USE_CUDA 1
			
 
				+
			
 
				+/**
			
 
				+   Defined when StarPU has been installed with OpenCL support. It
			
 
				+   should be used in your code to detect the availability of OpenCL as
			
 
				+   shown in \ref FullSourceCodeVectorScal.
			
 
				+   @ingroup API_OpenCL_Extensions
			
 
				+*/
			
 
				+#define STARPU_USE_OPENCL 1
			
 
				+
			
 
				+/**
			
 
				+   Defined when StarPU has been installed with MIC support. It should
			
 
				+   be used in your code to detect the availability of MIC.
			
 
				+   @ingroup API_MIC_Extensions
			
 
				+*/
			
 
				+/* #undef STARPU_USE_MIC */
			
 
				+
			
 
				+/**
			
 
				+   Defined when StarPU has been installed with MPI Master Slave
			
 
				+   support. It should be used in your code to detect the availability
			
 
				+   of MPI Master Slave.
			
 
				+   @ingroup API_MPI_Support
			
 
				+*/
			
 
				+/* #undef STARPU_USE_MPI_MASTER_SLAVE */
			
 
				+
			
 
				+/**
			
 
				+   Defined when StarPU has been installed with OpenMP Runtime support.
			
 
				+   It should be used in your code to detect the availability of the
			
 
				+   runtime support for OpenMP.
			
 
				+   @ingroup API_OpenMP_Runtime_Support
			
 
				+*/
			
 
				+#define STARPU_OPENMP 1
			
 
				+
			
 
				+/* #undef STARPU_CLUSTER */
			
 
				+
			
 
				+/* #undef STARPU_SIMGRID */
			
 
				+/* #undef STARPU_SIMGRID_MC */
			
 
				+/* #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT */
			
 
				+/* #undef STARPU_HAVE_SIMGRID_MSG_H */
			
 
				+/* #undef STARPU_HAVE_MSG_MSG_H */
			
 
				+/* #undef STARPU_HAVE_SIMGRID_ACTOR_H */
			
 
				+/* #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H */
			
 
				+/* #undef STARPU_HAVE_SIMGRID_MUTEX_H */
			
 
				+/* #undef STARPU_HAVE_SIMGRID_COND_H */
			
 
				+/* #undef STARPU_HAVE_SIMGRID_BARRIER_H */
			
 
				+/* #undef STARPU_HAVE_XBT_SYNCHRO_H */
			
 
				+#define STARPU_HAVE_VALGRIND_H 1
			
 
				+#define STARPU_HAVE_MEMCHECK_H 1
			
 
				+/* #undef STARPU_VALGRIND_FULL */
			
 
				+/* #undef STARPU_SANITIZE_LEAK */
			
 
				+#define STARPU_NON_BLOCKING_DRIVERS 1
			
 
				+/* workers must call callbacks on sleep/wake-up */
			
 
				+/* #undef STARPU_WORKER_CALLBACKS */
			
 
				+
			
 
				+/* #undef STARPU_HAVE_ICC */
			
 
				+
			
 
				+/**
			
 
				+   Defined when StarPU has been installed with MPI support. It should
			
 
				+   be used in your code to detect the availability of MPI.
			
 
				+   @ingroup API_MPI_Support
			
 
				+*/
			
 
				+/* #undef STARPU_USE_MPI */
			
 
				+/* #undef STARPU_USE_MPI_MPI */
			
 
				+/* #undef STARPU_USE_MPI_NMAD */
			
 
				+
			
 
				+/* #undef STARPU_ATLAS */
			
 
				+/* #undef STARPU_GOTO */
			
 
				+/* #undef STARPU_OPENBLAS */
			
 
				+/* #undef STARPU_MKL */
			
 
				+/* #undef STARPU_ARMPL */
			
 
				+/* #undef STARPU_SYSTEM_BLAS */
			
 
				+/* #undef STARPU_HAVE_CBLAS_H */
			
 
				+
			
 
				+/**
			
 
				+   Define the directory in which the OpenCL codelets of the
			
 
				+   applications provided with StarPU have been installed.
			
 
				+   @ingroup API_OpenCL_Extensions
			
 
				+*/
			
 
				+/* #undef STARPU_OPENCL_DATADIR */
			
 
				+/* #undef STARPU_HAVE_MAGMA */
			
 
				+
			
 
				+/* #undef STARPU_OPENGL_RENDER */
			
 
				+/* #undef STARPU_USE_GTK */
			
 
				+#define STARPU_HAVE_X11 1
			
 
				+/* #undef STARPU_PAPI */
			
 
				+
			
 
				+#define STARPU_HAVE_POSIX_MEMALIGN 1
			
 
				+
			
 
				+#define STARPU_HAVE_MEMALIGN 1
			
 
				+
			
 
				+#define STARPU_HAVE_MALLOC_H 1
			
 
				+
			
 
				+#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP 1
			
 
				+#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP 1
			
 
				+#define STARPU_HAVE_SYNC_FETCH_AND_ADD 1
			
 
				+#define STARPU_HAVE_SYNC_FETCH_AND_OR 1
			
 
				+#define STARPU_HAVE_SYNC_LOCK_TEST_AND_SET 1
			
 
				+#define STARPU_HAVE_SYNC_SYNCHRONIZE 1
			
 
				+
			
 
				+/* #undef STARPU_DEVEL */
			
 
				+/* #undef STARPU_MODEL_DEBUG */
			
 
				+/* #undef STARPU_NO_ASSERT */
			
 
				+/* #undef STARPU_DEBUG */
			
 
				+/* #undef STARPU_VERBOSE */
			
 
				+#define STARPU_GDB_PATH "/usr/bin/gdb"
			
 
				+
			
 
				+#define STARPU_HAVE_FFTW 1
			
 
				+#define STARPU_HAVE_FFTWF 1
			
 
				+#define STARPU_HAVE_FFTWL 1
			
 
				+#define STARPU_HAVE_CUFFTDOUBLECOMPLEX 1
			
 
				+
			
 
				+#define STARPU_HAVE_CURAND 1
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of memory nodes managed by StarPU. The
			
 
				+   default value can be modified at configure by using the option \ref
			
 
				+   enable-maxnodes "--enable-maxnodes". Reducing it allows to
			
 
				+   considerably reduce memory used by StarPU data structures.
			
 
				+   @ingroup API_Workers_Properties
			
 
				+*/
			
 
				+#define STARPU_MAXNODES 16
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of buffers that tasks will be able to
			
 
				+   take as parameters. The default value is 8, it can be changed by
			
 
				+   using the configure option \ref enable-maxbuffers
			
 
				+   "--enable-maxbuffers".
			
 
				+   @ingroup API_Codelet_And_Tasks
			
 
				+*/
			
 
				+#define STARPU_NMAXBUFS 8
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of fxt mpi files that can be read when
			
 
				+   generating traces. The default value is 64, it can be changed by
			
 
				+   using the configure option \ref enable-fxt-max-files
			
 
				+   "--enable-fxt-max-files".
			
 
				+   @ingroup API_MPI_Support
			
 
				+*/
			
 
				+#define STARPU_FXT_MAX_FILES 64
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of CPU workers managed by StarPU. The
			
 
				+   default value can be modified at configure by using the option \ref
			
 
				+   enable-maxcpus "--enable-maxcpus".
			
 
				+   @ingroup API_Workers_Properties
			
 
				+*/
			
 
				+#define STARPU_MAXCPUS 64
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of NUMA nodes managed by StarPU. The
			
 
				+   default value can be modified at configure by using the option \ref
			
 
				+   enable-maxnumanodes "--enable-maxnumanodes".
			
 
				+   @ingroup API_Workers_Properties
			
 
				+*/
			
 
				+#define STARPU_MAXNUMANODES 2
			
 
				+
			
 
				+/**
			
 
				+ * Define the maximum number of CUDA devices that are supported by StarPU.
			
 
				+ * @ingroup API_CUDA_Extensions
			
 
				+ */
			
 
				+#define STARPU_MAXCUDADEVS 4
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of OpenCL devices that are supported by
			
 
				+   StarPU.
			
 
				+   @ingroup API_OpenCL_Extensions
			
 
				+*/
			
 
				+#define STARPU_MAXOPENCLDEVS 8
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of MIC devices that are supported by
			
 
				+   StarPU.
			
 
				+   @ingroup API_MIC_Extensions
			
 
				+*/
			
 
				+#define STARPU_MAXMICDEVS 0
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of workers managed by StarPU.
			
 
				+   @ingroup API_Workers_Properties
			
 
				+*/
			
 
				+#define STARPU_NMAXWORKERS 80
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of scheduling contexts managed by StarPU.
			
 
				+   The default value can be modified at configure by using the option
			
 
				+   \ref enable-max-sched-ctxs "--enable-max-sched-ctxs".
			
 
				+   @ingroup API_Scheduling_Policy
			
 
				+*/
			
 
				+#define STARPU_NMAX_SCHED_CTXS 10
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum number of implementations per architecture. The
			
 
				+   default value can be modified at configure by using the option \ref
			
 
				+   enable-maximplementations "--enable-maximplementations".
			
 
				+   @ingroup API_Scheduling_Policy
			
 
				+*/
			
 
				+#define STARPU_MAXIMPLEMENTATIONS 4
			
 
				+
			
 
				+#define STARPU_MAXMPKERNELS 10
			
 
				+/* #undef STARPU_USE_SC_HYPERVISOR */
			
 
				+/* #undef STARPU_SC_HYPERVISOR_DEBUG */
			
 
				+/* #undef STARPU_HAVE_GLPK_H */
			
 
				+
			
 
				+#define STARPU_HAVE_CUDA_MEMCPY_PEER 1
			
 
				+#define STARPU_HAVE_LIBNUMA 1
			
 
				+
			
 
				+/* #undef STARPU_HAVE_WINDOWS */
			
 
				+#define STARPU_LINUX_SYS 1
			
 
				+#define STARPU_HAVE_SETENV 1
			
 
				+#define STARPU_HAVE_UNSETENV 1
			
 
				+#define STARPU_HAVE_UNISTD_H 1
			
 
				+#define STARPU_HAVE_HDF5 1
			
 
				+
			
 
				+/* #undef STARPU_USE_FXT */
			
 
				+/* #undef STARPU_FXT_LOCK_TRACES */
			
 
				+
			
 
				+#ifdef _MSC_VER
			
 
				+typedef long starpu_ssize_t;
			
 
				+#define __starpu_func__ __FUNCTION__
			
 
				+#else
			
 
				+#  include <sys/types.h>
			
 
				+typedef ssize_t starpu_ssize_t;
			
 
				+#define __starpu_func__ __func__
			
 
				+#endif
			
 
				+
			
 
				+#if defined(c_plusplus) || defined(__cplusplus)
			
 
				+/* inline is part of C++ */
			
 
				+#  define __starpu_inline inline
			
 
				+#elif defined(_MSC_VER) || defined(__HP_cc)
			
 
				+#  define __starpu_inline __inline
			
 
				+#else
			
 
				+#  define __starpu_inline __inline__
			
 
				+#endif
			
 
				+
			
 
				+/* #undef STARPU_QUICK_CHECK */
			
 
				+/* #undef STARPU_LONG_CHECK */
			
 
				+#define STARPU_USE_DRAND48 1
			
 
				+#define STARPU_USE_ERAND48_R 1
			
 
				+#define STARPU_HAVE_NEARBYINTF 1
			
 
				+#define STARPU_HAVE_RINTF 1
			
 
				+
			
 
				+#define STARPU_HAVE_HWLOC 1
			
 
				+#define STARPU_HAVE_PTHREAD_SPIN_LOCK 1
			
 
				+#define STARPU_HAVE_PTHREAD_BARRIER 1
			
 
				+#define STARPU_HAVE_PTHREAD_SETNAME_NP 1
			
 
				+#define STARPU_HAVE_STRUCT_TIMESPEC 1
			
 
				+#define STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO 1
			
 
				+#define STARPU_PTHREAD_COND_INITIALIZER_ZERO 1
			
 
				+#define STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO 1
			
 
				+
			
 
				+/* This is only for building examples */
			
 
				+#define STARPU_HAVE_HELGRIND_H 1
			
 
				+
			
 
				+/* Enable Fortran to C MPI interface */
			
 
				+/* #undef HAVE_MPI_COMM_F2C */
			
 
				+
			
 
				+/* #undef STARPU_HAVE_DARWIN */
			
 
				+
			
 
				+#define STARPU_HAVE_CXX11 1
			
 
				+#define STARPU_HAVE_STRERROR_R 1
			
 
				+#define STARPU_HAVE_STATEMENT_EXPRESSIONS 1
			
 
				+/* #undef STARPU_PERF_MODEL_DIR */
			
 
				+
			
 
				+#endif
			
--- a/include/starpu/1.3/starpu_cublas.h
+++ b/include/starpu/1.3/starpu_cublas.h
@@ -0,0 +1,61 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_CUBLAS_H__
			
 
				+#define __STARPU_CUBLAS_H__
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+   @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Initialize CUBLAS on every CUDA device. The
			
 
				+   CUBLAS library must be initialized prior to any CUBLAS call. Calling
			
 
				+   starpu_cublas_init() will initialize CUBLAS on every CUDA device
			
 
				+   controlled by StarPU. This call blocks until CUBLAS has been properly
			
 
				+   initialized on every device.
			
 
				+*/
			
 
				+void starpu_cublas_init(void);
			
 
				+
			
 
				+/**
			
 
				+   Set the proper CUBLAS stream for CUBLAS v1. This must be called
			
 
				+   from the CUDA codelet before calling CUBLAS v1 kernels, so that
			
 
				+   they are queued on the proper CUDA stream. When using one thread
			
 
				+   per CUDA worker, this function does not do anything since the
			
 
				+   CUBLAS stream does not change, and is set once by
			
 
				+   starpu_cublas_init().
			
 
				+*/
			
 
				+void starpu_cublas_set_stream(void);
			
 
				+
			
 
				+/**
			
 
				+   Synchronously deinitialize the CUBLAS library on
			
 
				+   every CUDA device.
			
 
				+*/
			
 
				+void starpu_cublas_shutdown(void);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_CUBLAS_H__ */
			
--- a/include/starpu/1.3/starpu_cublas_v2.h
+++ b/include/starpu/1.3/starpu_cublas_v2.h
@@ -0,0 +1,49 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_CUBLAS_V2_H__
			
 
				+#define __STARPU_CUBLAS_V2_H__
			
 
				+
			
 
				+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				+
			
 
				+#include <cublas_v2.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+   @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Return the CUSPARSE handle to be used to queue CUSPARSE kernels. It
			
 
				+   is properly initialized and configured for multistream by
			
 
				+   starpu_cusparse_init().
			
 
				+*/
			
 
				+cublasHandle_t starpu_cublas_get_local_handle(void);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_CUBLAS_V2_H__ */
			
--- a/include/starpu/1.3/starpu_cuda.h
+++ b/include/starpu/1.3/starpu_cuda.h
@@ -0,0 +1,140 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_CUDA_H__
			
 
				+#define __STARPU_CUDA_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_CUDA_Extensions CUDA Extensions
			
 
				+   @{
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+   Report a CUBLAS error.
			
 
				+*/
			
 
				+void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
			
 
				+
			
 
				+/**
			
 
				+   Call starpu_cublas_report_error(), passing the current function, file and line position.
			
 
				+*/
			
 
				+#define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+/**
			
 
				+   Report a CUDA error.
			
 
				+*/
			
 
				+void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
			
 
				+
			
 
				+/**
			
 
				+   Call starpu_cuda_report_error(), passing the current function, file and line position.
			
 
				+*/
			
 
				+#define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+/**
			
 
				+   Return the current worker’s CUDA stream. StarPU provides a stream
			
 
				+   for every CUDA device controlled by StarPU. This function is only
			
 
				+   provided for convenience so that programmers can easily use
			
 
				+   asynchronous operations within codelets without having to create a
			
 
				+   stream by hand. Note that the application is not forced to use the
			
 
				+   stream provided by starpu_cuda_get_local_stream() and may also
			
 
				+   create its own streams. Synchronizing with
			
 
				+   <c>cudaDeviceSynchronize()</c> is allowed, but will reduce the
			
 
				+   likelihood of having all transfers overlapped.
			
 
				+*/
			
 
				+cudaStream_t starpu_cuda_get_local_stream(void);
			
 
				+
			
 
				+/**
			
 
				+   Return a pointer to device properties for worker \p workerid
			
 
				+   (assumed to be a CUDA worker).
			
 
				+*/
			
 
				+const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
			
 
				+   to the pointer \p dst_ptr on \p dst_node. The function first tries to
			
 
				+   copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
			
 
				+   asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
			
 
				+   data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successfull. It returns 0 if the synchronous
			
 
				+   copy was successful, or fails otherwise.
			
 
				+*/
			
 
				+int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on
			
 
				+   \p src_node to the pointer \p dst_ptr on \p dst_node.
			
 
				+
			
 
				+   The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in
			
 
				+   the source (resp. destination) interface.
			
 
				+
			
 
				+   The function first tries to copy the data asynchronous (unless \p stream is
			
 
				+   <c>NULL</c>). If the asynchronous copy fails or if \p stream is <c>NULL</c>,
			
 
				+   it copies the data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successfull. It returns 0 if the synchronous copy was
			
 
				+   successful, or fails otherwise.
			
 
				+*/
			
 
				+int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node,
			
 
				+				  size_t blocksize,
			
 
				+				  size_t numblocks, size_t ld_src, size_t ld_dst,
			
 
				+				  cudaStream_t stream, enum cudaMemcpyKind kind);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the
			
 
				+   pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node.
			
 
				+
			
 
				+   The blocks are grouped by \p numblocks_1 blocks whose start addresses are
			
 
				+   ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination)
			
 
				+   interface.
			
 
				+
			
 
				+   The function first tries to copy the data asynchronous (unless \p stream is
			
 
				+   <c>NULL</c>). If the asynchronous copy fails or if \p stream is <c>NULL</c>,
			
 
				+   it copies the data synchronously. The function returns <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successfull. It returns 0 if the synchronous copy was
			
 
				+   successful, or fails otherwise.
			
 
				+*/
			
 
				+int starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node,
			
 
				+				  size_t blocksize,
			
 
				+				  size_t numblocks_1, size_t ld1_src, size_t ld1_dst,
			
 
				+				  size_t numblocks_2, size_t ld2_src, size_t ld2_dst,
			
 
				+				  cudaStream_t stream, enum cudaMemcpyKind kind);
			
 
				+
			
 
				+/**
			
 
				+   Call <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
			
 
				+   according to whether \p devid is among the field
			
 
				+   starpu_conf::cuda_opengl_interoperability.
			
 
				+*/
			
 
				+void starpu_cuda_set_device(unsigned devid);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* STARPU_USE_CUDA && !STARPU_DONT_INCLUDE_CUDA_HEADERS */
			
 
				+
			
 
				+#endif /* __STARPU_CUDA_H__ */
			
--- a/include/starpu/1.3/starpu_cusparse.h
+++ b/include/starpu/1.3/starpu_cusparse.h
@@ -0,0 +1,62 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_CUSPARSE_H__
			
 
				+#define __STARPU_CUSPARSE_H__
			
 
				+
			
 
				+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				+#include <cusparse.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @ingroup API_CUDA_Extensions
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Initialize CUSPARSE on every CUDA device
			
 
				+   controlled by StarPU. This call blocks until CUSPARSE has been properly
			
 
				+   initialized on every device.
			
 
				+*/
			
 
				+void starpu_cusparse_init(void);
			
 
				+
			
 
				+/**
			
 
				+   Synchronously deinitialize the CUSPARSE library on
			
 
				+   every CUDA device.
			
 
				+*/
			
 
				+void starpu_cusparse_shutdown(void);
			
 
				+
			
 
				+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
			
 
				+/**
			
 
				+   Return the CUSPARSE handle to be used to queue CUSPARSE
			
 
				+   kernels. It is properly initialized and configured for multistream by
			
 
				+   starpu_cusparse_init().
			
 
				+*/
			
 
				+cusparseHandle_t starpu_cusparse_get_local_handle(void);
			
 
				+#endif
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_CUSPARSE_H__ */
			
--- a/include/starpu/1.3/starpu_data.h
+++ b/include/starpu/1.3/starpu_data.h
@@ -0,0 +1,547 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_DATA_H__
			
 
				+#define __STARPU_DATA_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Data_Management Data Management
			
 
				+   @brief Data management facilities provided by StarPU. We show how
			
 
				+   to use existing data interfaces in \ref API_Data_Interfaces, but
			
 
				+   developers can design their own data interfaces if required.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct _starpu_data_state;
			
 
				+/**
			
 
				+   StarPU uses ::starpu_data_handle_t as an opaque handle to manage a
			
 
				+   piece of data. Once a piece of data has been registered to StarPU,
			
 
				+   it is associated to a ::starpu_data_handle_t which keeps track of
			
 
				+   the state of the piece of data over the entire machine, so that we
			
 
				+   can maintain data consistency and locate data replicates for
			
 
				+   instance.
			
 
				+*/
			
 
				+typedef struct _starpu_data_state* starpu_data_handle_t;
			
 
				+
			
 
				+/**
			
 
				+   Describe a StarPU data access mode
			
 
				+
			
 
				+   Note: when adding a flag here, update
			
 
				+   _starpu_detect_implicit_data_deps_with_handle
			
 
				+
			
 
				+   Note: other STARPU_* values in include/starpu_task_util.h
			
 
				+ */
			
 
				+enum starpu_data_access_mode
			
 
				+{
			
 
				+	STARPU_NONE=0, /**< todo */
			
 
				+	STARPU_R=(1<<0), /**< read-only mode */
			
 
				+	STARPU_W=(1<<1), /**< write-only mode */
			
 
				+	STARPU_RW=(STARPU_R|STARPU_W), /**< read-write mode. Equivalent to ::STARPU_R|::STARPU_W  */
			
 
				+	STARPU_SCRATCH=(1<<2), /**< A temporary buffer is allocated
			
 
				+				  for the task, but StarPU does not
			
 
				+				  enforce data consistency---i.e. each
			
 
				+				  device has its own buffer,
			
 
				+				  independently from each other (even
			
 
				+				  for CPUs), and no data transfer is
			
 
				+				  ever performed. This is useful for
			
 
				+				  temporary variables to avoid
			
 
				+				  allocating/freeing buffers inside
			
 
				+				  each task. Currently, no behavior is
			
 
				+				  defined concerning the relation with
			
 
				+				  the ::STARPU_R and ::STARPU_W modes
			
 
				+				  and the value provided at
			
 
				+				  registration --- i.e., the value of
			
 
				+				  the scratch buffer is undefined at
			
 
				+				  entry of the codelet function.  It
			
 
				+				  is being considered for future
			
 
				+				  extensions at least to define the
			
 
				+				  initial value.  For now, data to be
			
 
				+				  used in ::STARPU_SCRATCH mode should
			
 
				+				  be registered with node -1 and a
			
 
				+				  <c>NULL</c> pointer, since the value
			
 
				+				  of the provided buffer is simply
			
 
				+				  ignored for now.
			
 
				+			       */
			
 
				+	STARPU_REDUX=(1<<3), /**< todo */
			
 
				+	STARPU_COMMUTE=(1<<4), /**<  ::STARPU_COMMUTE can be passed
			
 
				+				  along ::STARPU_W or ::STARPU_RW to
			
 
				+				  express that StarPU can let tasks
			
 
				+				  commute, which is useful e.g. when
			
 
				+				  bringing a contribution into some
			
 
				+				  data, which can be done in any order
			
 
				+				  (but still require sequential
			
 
				+				  consistency against reads or
			
 
				+				  non-commutative writes).
			
 
				+			       */
			
 
				+	STARPU_SSEND=(1<<5), /**< used in starpu_mpi_insert_task() to
			
 
				+				specify the data has to be sent using
			
 
				+				a synchronous and non-blocking mode
			
 
				+				(see starpu_mpi_issend())
			
 
				+			     */
			
 
				+	STARPU_LOCALITY=(1<<6), /**< used to tell the scheduler which
			
 
				+				   data is the most important for the
			
 
				+				   task, and should thus be used to
			
 
				+				   try to group tasks on the same core
			
 
				+				   or cache, etc. For now only the ws
			
 
				+				   and lws schedulers take this flag
			
 
				+				   into account, and only when rebuild
			
 
				+				   with \c USE_LOCALITY flag defined in
			
 
				+				   the
			
 
				+				   src/sched_policies/work_stealing_policy.c
			
 
				+				   source code.
			
 
				+				*/
			
 
				+	STARPU_ACCESS_MODE_MAX=(1<<7) /**< todo */
			
 
				+};
			
 
				+
			
 
				+struct starpu_data_interface_ops;
			
 
				+
			
 
				+/** Set the name of the data, to be shown in various profiling tools. */
			
 
				+void starpu_data_set_name(starpu_data_handle_t handle, const char *name);
			
 
				+
			
 
				+/**
			
 
				+   Set the coordinates of the data, to be shown in various profiling
			
 
				+   tools. \p dimensions is the size of the \p dims array. This can be
			
 
				+   for instance the tile coordinates within a big matrix.
			
 
				+*/
			
 
				+void starpu_data_set_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]);
			
 
				+
			
 
				+/**
			
 
				+   Set the coordinates of the data, to be shown in various profiling
			
 
				+   tools. \p dimensions is the number of subsequent \c int parameters.
			
 
				+   This can be for instance the tile coordinates within a big matrix.
			
 
				+*/
			
 
				+void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...);
			
 
				+
			
 
				+/**
			
 
				+   Get the coordinates of the data, as set by a previous call to
			
 
				+   starpu_data_set_coordinates_array() or starpu_data_set_coordinates()
			
 
				+   \p dimensions is the size of the \p dims array.
			
 
				+   This returns the actual number of returned coordinates.
			
 
				+*/
			
 
				+unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]);
			
 
				+
			
 
				+/**
			
 
				+   Unregister a data \p handle from StarPU. If the data was
			
 
				+   automatically allocated by StarPU because the home node was -1, all
			
 
				+   automatically allocated buffers are freed. Otherwise, a valid copy
			
 
				+   of the data is put back into the home node in the buffer that was
			
 
				+   initially registered. Using a data handle that has been
			
 
				+   unregistered from StarPU results in an undefined behaviour. In case
			
 
				+   we do not need to update the value of the data in the home node, we
			
 
				+   can use the function starpu_data_unregister_no_coherency() instead.
			
 
				+*/
			
 
				+void starpu_data_unregister(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+    Similar to starpu_data_unregister(), except that StarPU does not
			
 
				+    put back a valid copy into the home node, in the buffer that was
			
 
				+    initially registered.
			
 
				+*/
			
 
				+void starpu_data_unregister_no_coherency(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Destroy the data \p handle once it is no longer needed by any
			
 
				+   submitted task. No coherency is provided.
			
 
				+
			
 
				+   This is not safe to call starpu_data_unregister_submit() on a handle that
			
 
				+   comes from the registration of a non-NULL application home buffer, since the
			
 
				+   moment when the unregistration will happen is unknown to the
			
 
				+   application. Only calling starpu_shutdown() allows to be sure that the data
			
 
				+   was really unregistered.
			
 
				+*/
			
 
				+void starpu_data_unregister_submit(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Destroy all replicates of the data \p handle immediately. After
			
 
				+   data invalidation, the first access to \p handle must be performed
			
 
				+   in ::STARPU_W mode. Accessing an invalidated data in ::STARPU_R
			
 
				+   mode results in undefined behaviour.
			
 
				+*/
			
 
				+void starpu_data_invalidate(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Submit invalidation of the data \p handle after completion of
			
 
				+   previously submitted tasks.
			
 
				+*/
			
 
				+void starpu_data_invalidate_submit(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Specify that the data \p handle can be discarded without impacting
			
 
				+   the application.
			
 
				+*/
			
 
				+void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important);
			
 
				+
			
 
				+/**
			
 
				+   @name Access registered data from the application
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   This macro can be used to acquire data, but not require it to be
			
 
				+   available on a given node, only enforce R/W dependencies. This can
			
 
				+   for instance be used to wait for tasks which produce the data, but
			
 
				+   without requesting a fetch to the main memory.
			
 
				+*/
			
 
				+#define STARPU_ACQUIRE_NO_NODE -1
			
 
				+
			
 
				+/**
			
 
				+   Similar to ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all
			
 
				+   nodes, preventing them from being evicted for instance. This is
			
 
				+   mostly useful inside StarPU only.
			
 
				+*/
			
 
				+#define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2
			
 
				+
			
 
				+/**
			
 
				+   The application must call this function prior to accessing
			
 
				+   registered data from main memory outside tasks. StarPU ensures that
			
 
				+   the application will get an up-to-date copy of \p handle in main
			
 
				+   memory located where the data was originally registered, and that
			
 
				+   all concurrent accesses (e.g. from tasks) will be consistent with
			
 
				+   the access mode specified with \p mode. starpu_data_release() must
			
 
				+   be called once the application no longer needs to access the piece
			
 
				+   of data. Note that implicit data dependencies are also enforced by
			
 
				+   starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all
			
 
				+   tasks scheduled to work on the data, unless they have been disabled
			
 
				+   explictly by calling
			
 
				+   starpu_data_set_default_sequential_consistency_flag() or
			
 
				+   starpu_data_set_sequential_consistency_flag().
			
 
				+   starpu_data_acquire() is a blocking call, so that it cannot be
			
 
				+   called from tasks or from their callbacks (in that case,
			
 
				+   starpu_data_acquire() returns <c>-EDEADLK</c>). Upon successful
			
 
				+   completion, this function returns 0.
			
 
				+*/
			
 
				+int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire(), except that the data will be
			
 
				+   available on the given memory node instead of main memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can
			
 
				+   be used instead of an explicit node number.
			
 
				+*/
			
 
				+int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Asynchronous equivalent of starpu_data_acquire(). When the data
			
 
				+   specified in \p handle is available in the access \p mode, the \p
			
 
				+   callback function is executed. The application may access
			
 
				+   the requested data during the execution of \p callback. The \p callback
			
 
				+   function must call starpu_data_release() once the application no longer
			
 
				+   needs to access the piece of data. Note that implicit data
			
 
				+   dependencies are also enforced by starpu_data_acquire_cb() in case they
			
 
				+   are not disabled. Contrary to starpu_data_acquire(), this function is
			
 
				+   non-blocking and may be called from task callbacks. Upon successful
			
 
				+   completion, this function returns 0.
			
 
				+*/
			
 
				+int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_cb(), except that the
			
 
				+   data will be available on the given memory node instead of main
			
 
				+   memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be
			
 
				+   used instead of an explicit node number.
			
 
				+*/
			
 
				+int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_cb() with the possibility of
			
 
				+   enabling or disabling data dependencies.
			
 
				+   When the data specified in \p handle is available in the access
			
 
				+   \p mode, the \p callback function is executed. The application may access
			
 
				+   the requested data during the execution of this \p callback. The \p callback
			
 
				+   function must call starpu_data_release() once the application no longer
			
 
				+   needs to access the piece of data. Note that implicit data
			
 
				+   dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they
			
 
				+   are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency.
			
 
				+   Similarly to starpu_data_acquire_cb(), this function is
			
 
				+   non-blocking and may be called from task callbacks. Upon successful
			
 
				+   completion, this function returns 0.
			
 
				+*/
			
 
				+int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_cb_sequential_consistency(), except that the
			
 
				+   data will be available on the given memory node instead of main
			
 
				+   memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				+   explicit node number.
			
 
				+*/
			
 
				+int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency);
			
 
				+
			
 
				+int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_on_node_cb_sequential_consistency(),
			
 
				+   except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used
			
 
				+   to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens
			
 
				+   just before the acquisition, and \e post_sync_jobid happens just after the
			
 
				+   release.
			
 
				+*/
			
 
				+int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid);
			
 
				+
			
 
				+/**
			
 
				+   The application can call this function instead of starpu_data_acquire() so as to
			
 
				+   acquire the data like starpu_data_acquire(), but only if all
			
 
				+   previously-submitted tasks have completed, in which case starpu_data_acquire_try()
			
 
				+   returns 0. StarPU will have ensured that the application will get an up-to-date
			
 
				+   copy of \p handle in main memory located where the data was originally
			
 
				+   registered. starpu_data_release() must be called once the application no longer
			
 
				+   needs to access the piece of data.
			
 
				+*/
			
 
				+int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_acquire_try(), except that the
			
 
				+   data will be available on the given memory node instead of main
			
 
				+   memory.
			
 
				+   ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an
			
 
				+   explicit node number.
			
 
				+*/
			
 
				+int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+#ifdef __GCC__
			
 
				+
			
 
				+/**
			
 
				+   STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(),
			
 
				+   except that the code to be executed in a callback is directly provided
			
 
				+   as a macro parameter, and the data \p handle is automatically released
			
 
				+   after it. This permits to easily execute code which depends on the
			
 
				+   value of some registered data. This is non-blocking too and may be
			
 
				+   called from task callbacks.
			
 
				+*/
			
 
				+#  define STARPU_DATA_ACQUIRE_CB(handle, mode, code) do \
			
 
				+	{ \						\
			
 
				+		void callback(void *arg)		\
			
 
				+		{					\
			
 
				+			code;				\
			
 
				+			starpu_data_release(handle);  	\
			
 
				+		}			      		\
			
 
				+		starpu_data_acquire_cb(handle, mode, callback, NULL);	\
			
 
				+	}						\
			
 
				+	while(0)
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Release the piece of data acquired by the
			
 
				+   application either by starpu_data_acquire() or by
			
 
				+   starpu_data_acquire_cb().
			
 
				+*/
			
 
				+void starpu_data_release(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_release(), except that the data
			
 
				+   will be available on the given memory \p node instead of main memory.
			
 
				+   The \p node parameter must be exactly the same as the corresponding \c
			
 
				+   starpu_data_acquire_on_node* call.
			
 
				+*/
			
 
				+void starpu_data_release_on_node(starpu_data_handle_t handle, int node);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   This is an arbiter, which implements an advanced but centralized
			
 
				+   management of concurrent data accesses, see \ref
			
 
				+   ConcurrentDataAccess for the details.
			
 
				+*/
			
 
				+typedef struct starpu_arbiter *starpu_arbiter_t;
			
 
				+
			
 
				+/**
			
 
				+   Create a data access arbiter, see \ref ConcurrentDataAccess for the
			
 
				+   details
			
 
				+*/
			
 
				+starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   Make access to \p handle managed by \p arbiter
			
 
				+*/
			
 
				+void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter);
			
 
				+
			
 
				+/**
			
 
				+   Destroy the \p arbiter . This must only be called after all data
			
 
				+   assigned to it have been unregistered.
			
 
				+*/
			
 
				+void starpu_arbiter_destroy(starpu_arbiter_t arbiter);
			
 
				+
			
 
				+/**
			
 
				+   Explicitly ask StarPU to allocate room for a piece of data on
			
 
				+   the specified memory \p node.
			
 
				+*/
			
 
				+int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   Issue a fetch request for the data \p handle to \p node, i.e.
			
 
				+   requests that the data be replicated to the given node as soon as possible, so that it is
			
 
				+   available there for tasks. If \p async is 0, the call will
			
 
				+   block until the transfer is achieved, else the call will return immediately,
			
 
				+   after having just queued the request. In the latter case, the request will
			
 
				+   asynchronously wait for the completion of any task writing on the
			
 
				+   data.
			
 
				+*/
			
 
				+int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				+
			
 
				+/**
			
 
				+   Issue a prefetch request for the data \p handle to \p node, i.e.
			
 
				+   requests that the data be replicated to \p node when there is room for it, so that it is
			
 
				+   available there for tasks. If \p async is 0, the call will
			
 
				+   block until the transfer is achieved, else the call will return immediately,
			
 
				+   after having just queued the request. In the latter case, the request will
			
 
				+   asynchronously wait for the completion of any task writing on the
			
 
				+   data.
			
 
				+*/
			
 
				+int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				+
			
 
				+int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Issue an idle prefetch request for the data \p handle to \p node, i.e.
			
 
				+   requests that the data be replicated to \p node, so that it is
			
 
				+   available there for tasks, but only when the bus is really idle. If \p async is 0, the call will
			
 
				+   block until the transfer is achieved, else the call will return immediately,
			
 
				+   after having just queued the request. In the latter case, the request will
			
 
				+   asynchronously wait for the completion of any task writing on the data.
			
 
				+*/
			
 
				+int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async);
			
 
				+int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Check whether a valid copy of \p handle is currently available on
			
 
				+   memory node \p node.
			
 
				+*/
			
 
				+unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   Advise StarPU that \p handle will not be used in the close future, and is
			
 
				+   thus a good candidate for eviction from GPUs. StarPU will thus write its value
			
 
				+   back to its home node when the bus is idle, and select this data in priority
			
 
				+   for eviction when memory gets low.
			
 
				+*/
			
 
				+void starpu_data_wont_use(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Set the write-through mask of the data \p handle (and
			
 
				+   its children), i.e. a bitmask of nodes where the data should be always
			
 
				+   replicated after modification. It also prevents the data from being
			
 
				+   evicted from these nodes when memory gets scarse. When the data is
			
 
				+   modified, it is automatically transfered into those memory nodes. For
			
 
				+   instance a <c>1<<0</c> write-through mask means that the CUDA workers
			
 
				+   will commit their changes in main memory (node 0).
			
 
				+*/
			
 
				+void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask);
			
 
				+
			
 
				+/**
			
 
				+   @name Implicit Data Dependencies
			
 
				+   In this section, we describe how StarPU makes it possible to
			
 
				+   insert implicit task dependencies in order to enforce sequential data
			
 
				+   consistency. When this data consistency is enabled on a specific data
			
 
				+   handle, any data access will appear as sequentially consistent from
			
 
				+   the application. For instance, if the application submits two tasks
			
 
				+   that access the same piece of data in read-only mode, and then a third
			
 
				+   task that access it in write mode, dependencies will be added between
			
 
				+   the two first tasks and the third one. Implicit data dependencies are
			
 
				+   also inserted in the case of data accesses from the application.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Set the data consistency mode associated to a data handle. The
			
 
				+   consistency mode set using this function has the priority over the
			
 
				+   default mode which can be set with
			
 
				+   starpu_data_set_default_sequential_consistency_flag().
			
 
				+*/
			
 
				+void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag);
			
 
				+
			
 
				+/**
			
 
				+   Get the data consistency mode associated to the data handle \p handle
			
 
				+*/
			
 
				+unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Return the default sequential consistency flag
			
 
				+*/
			
 
				+unsigned starpu_data_get_default_sequential_consistency_flag(void);
			
 
				+
			
 
				+/**
			
 
				+   Set the default sequential consistency flag. If a non-zero
			
 
				+   value is passed, a sequential data consistency will be enforced for
			
 
				+   all handles registered after this function call, otherwise it is
			
 
				+   disabled. By default, StarPU enables sequential data consistency. It
			
 
				+   is also possible to select the data consistency mode of a specific
			
 
				+   data handle with the function
			
 
				+   starpu_data_set_sequential_consistency_flag().
			
 
				+*/
			
 
				+void starpu_data_set_default_sequential_consistency_flag(unsigned flag);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   Set whether this data should be elligible to be evicted to disk
			
 
				+   storage (1) or not (0). The default is 1.
			
 
				+*/
			
 
				+void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag);
			
 
				+/**
			
 
				+   Get whether this data was set to be elligible to be evicted to disk
			
 
				+   storage (1) or not (0).
			
 
				+*/
			
 
				+unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Query the status of \p handle on the specified \p memory_node.
			
 
				+*/
			
 
				+void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested);
			
 
				+
			
 
				+struct starpu_codelet;
			
 
				+
			
 
				+/**
			
 
				+   Set the codelets to be used for \p handle when it is accessed in the
			
 
				+   mode ::STARPU_REDUX. Per-worker buffers will be initialized with
			
 
				+   the codelet \p init_cl, and reduction between per-worker buffers will be
			
 
				+   done with the codelet \p redux_cl.
			
 
				+*/
			
 
				+void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl);
			
 
				+
			
 
				+struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle);
			
 
				+
			
 
				+unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node);
			
 
				+
			
 
				+void starpu_memchunk_tidy(unsigned memory_node);
			
 
				+
			
 
				+/**
			
 
				+   Set the field \c user_data for the \p handle to \p user_data . It can
			
 
				+   then be retrieved with starpu_data_get_user_data(). \p user_data can be any
			
 
				+   application-defined value, for instance a pointer to an object-oriented
			
 
				+   container for the data.
			
 
				+*/
			
 
				+void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data);
			
 
				+
			
 
				+/**
			
 
				+   Retrieve the field \c user_data previously set for the \p handle.
			
 
				+*/
			
 
				+void *starpu_data_get_user_data(starpu_data_handle_t handle);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_DATA_H__ */
			
--- a/include/starpu/1.3/starpu_data_filters.h
+++ b/include/starpu/1.3/starpu_data_filters.h
@@ -0,0 +1,542 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2010       Mehdi Juhoor
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_DATA_FILTERS_H__
			
 
				+#define __STARPU_DATA_FILTERS_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <stdarg.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Data_Partition Data Partition
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_data_interface_ops;
			
 
				+
			
 
				+/**
			
 
				+   Describe a data partitioning operation, to be given to starpu_data_partition()
			
 
				+*/
			
 
				+struct starpu_data_filter
			
 
				+{
			
 
				+	/**
			
 
				+	   Fill the \p child_interface structure with interface information
			
 
				+	   for the \p i -th child of the parent \p father_interface (among
			
 
				+	   \p nparts). The \p filter structure is provided, allowing to inspect the
			
 
				+	   starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr
			
 
				+	   parameters.
			
 
				+	   The details of what needs to be filled in \p child_interface vary according
			
 
				+	   to the data interface, but generally speaking:
			
 
				+	   <ul>
			
 
				+	   <li> <c>id</c> is usually just copied over from the father,
			
 
				+	   when the sub data has the same structure as the father,
			
 
				+	   e.g. a subvector is a vector, a submatrix is a matrix, etc.
			
 
				+	   This is however not the case for instance when dividing a
			
 
				+	   BCSR matrix into its dense blocks, which then are matrices.
			
 
				+	   </li>
			
 
				+	   <li> <c>nx</c>, <c>ny</c> and alike are usually divided by
			
 
				+	   the number of subdata, depending how the subdivision is
			
 
				+	   done (e.g. nx division vs ny division for vertical matrix
			
 
				+	   division vs horizontal matrix division). </li>
			
 
				+	   <li> <c>ld</c> for matrix interfaces are usually just
			
 
				+	   copied over: the leading dimension (ld) usually does not
			
 
				+	   change. </li>
			
 
				+	   <li> <c>elemsize</c> is usually just copied over. </li>
			
 
				+	   <li> <c>ptr</c>, the pointer to the data, has to be
			
 
				+	   computed according to \p i and the father's <c>ptr</c>, so
			
 
				+	   as to point to the start of the sub data. This should
			
 
				+	   however be done only if the father has <c>ptr</c> different
			
 
				+	   from NULL: in the OpenCL case notably, the
			
 
				+	   <c>dev_handle</c> and <c>offset</c> fields are used
			
 
				+	   instead. </li>
			
 
				+	   <li> <c>dev_handle</c> should be just copied over from the
			
 
				+	   parent. </li>
			
 
				+	   <li> <c>offset</c> has to be computed according to \p i and
			
 
				+	   the father's <c>offset</c>, so as to provide the offset of
			
 
				+	   the start of the sub data. This is notably used for the
			
 
				+	   OpenCL case.
			
 
				+	   </ul>
			
 
				+	*/
			
 
				+	void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts);
			
 
				+	unsigned nchildren; /**< Number of parts to partition the data into. */
			
 
				+	/**
			
 
				+	   Return the number of children. This can be used instead of
			
 
				+	   starpu_data_filter::nchildren when the number of children depends
			
 
				+	   on the actual data (e.g. the number of blocks in a sparse
			
 
				+	   matrix).
			
 
				+	*/
			
 
				+	unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle);
			
 
				+	/**
			
 
				+	   When children use different data interface,
			
 
				+	   return which interface is used by child number \p id.
			
 
				+	*/
			
 
				+	struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id);
			
 
				+	unsigned filter_arg; /**< Additional parameter for the filter function */
			
 
				+	/**
			
 
				+	   Additional pointer parameter for
			
 
				+	   the filter function, such as the
			
 
				+	   sizes of the different parts. */
			
 
				+	void *filter_arg_ptr;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   @name Basic API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Request the partitioning of \p initial_handle into several subdata
			
 
				+   according to the filter \p f.
			
 
				+
			
 
				+   Here an example of how to use the function.
			
 
				+   \code{.c}
			
 
				+   struct starpu_data_filter f =
			
 
				+   {
			
 
				+     .filter_func = starpu_matrix_filter_block,
			
 
				+     .nchildren = nslicesx
			
 
				+   };
			
 
				+   starpu_data_partition(A_handle, &f);
			
 
				+    \endcode
			
 
				+*/
			
 
				+void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f);
			
 
				+
			
 
				+/**
			
 
				+  Unapply the filter which has been applied to \p root_data, thus
			
 
				+  unpartitioning the data. The pieces of data are collected back into
			
 
				+  one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM).
			
 
				+  Tasks working on the partitioned data will be waited for
			
 
				+  by starpu_data_unpartition().
			
 
				+
			
 
				+  Here an example of how to use the function.
			
 
				+  \code{.c}
			
 
				+  starpu_data_unpartition(A_handle, STARPU_MAIN_RAM);
			
 
				+  \endcode
			
 
				+*/
			
 
				+void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node);
			
 
				+
			
 
				+/**
			
 
				+   Return the \p i -th child of the given \p handle, which must have
			
 
				+   been partitionned beforehand.
			
 
				+*/
			
 
				+starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of children \p handle has been partitioned into.
			
 
				+*/
			
 
				+int starpu_data_get_nb_children(starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   After partitioning a StarPU data by applying a filter,
			
 
				+   starpu_data_get_sub_data() can be used to get handles for each of the
			
 
				+   data portions. \p root_data is the parent data that was partitioned.
			
 
				+   \p depth is the number of filters to traverse (in case several filters
			
 
				+   have been applied, to e.g. partition in row blocks, and then in column
			
 
				+   blocks), and the subsequent parameters are the indexes. The function
			
 
				+   returns a handle to the subdata.
			
 
				+
			
 
				+   Here an example of how to use the function.
			
 
				+   \code{.c}
			
 
				+   h = starpu_data_get_sub_data(A_handle, 1, taskx);
			
 
				+   \endcode
			
 
				+*/
			
 
				+starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... );
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_get_sub_data() but use a \c va_list for the
			
 
				+   parameter list.
			
 
				+*/
			
 
				+starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa);
			
 
				+
			
 
				+/**
			
 
				+   Apply \p nfilters filters to the handle designated by \p
			
 
				+   root_handle recursively. \p nfilters pointers to variables of the
			
 
				+   type starpu_data_filter should be given.
			
 
				+*/
			
 
				+void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...);
			
 
				+
			
 
				+/**
			
 
				+   Apply \p nfilters filters to the handle designated by
			
 
				+   \p root_handle recursively. Use a \p va_list of pointers to
			
 
				+   variables of the type starpu_data_filter.
			
 
				+*/
			
 
				+void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Asynchronous API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Plan to partition \p initial_handle into several subdata according to
			
 
				+   the filter \p f.
			
 
				+   The handles are returned into the \p children array, which has to be
			
 
				+   the same size as the number of parts described in \p f. These handles
			
 
				+   are not immediately usable, starpu_data_partition_submit() has to be
			
 
				+   called to submit the actual partitioning.
			
 
				+
			
 
				+   Here is an example of how to use the function:
			
 
				+   \code{.c}
			
 
				+   starpu_data_handle_t children[nslicesx];
			
 
				+   struct starpu_data_filter f =
			
 
				+   {
			
 
				+     .filter_func = starpu_matrix_filter_block,
			
 
				+     .nchildren = nslicesx
			
 
				+     };
			
 
				+     starpu_data_partition_plan(A_handle, &f, children);
			
 
				+\endcode
			
 
				+*/
			
 
				+void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Submit the actual partitioning of \p initial_handle into the \p nparts
			
 
				+   \p children handles. This call is asynchronous, it only submits that the
			
 
				+   partitioning should be done, so that the \p children handles can now be used to
			
 
				+   submit tasks, and \p initial_handle can not be used to submit tasks any more (to
			
 
				+   guarantee coherency).
			
 
				+   For instance,
			
 
				+   \code{.c}
			
 
				+   starpu_data_partition_submit(A_handle, nslicesx, children);
			
 
				+   \endcode
			
 
				+*/
			
 
				+void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_partition_submit(), but do not invalidate \p
			
 
				+   initial_handle. This allows to continue using it, but the application has to be
			
 
				+   careful not to write to \p initial_handle or \p children handles, only read from
			
 
				+   them, since the coherency is otherwise not guaranteed.  This thus allows to
			
 
				+   submit various tasks which concurrently read from various partitions of the data.
			
 
				+
			
 
				+   When the application wants to write to \p initial_handle again, it should call
			
 
				+   starpu_data_unpartition_submit(), which will properly add dependencies between the
			
 
				+   reads on the \p children and the writes to be submitted.
			
 
				+
			
 
				+   If instead the application wants to write to \p children handles, it should
			
 
				+   call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add
			
 
				+   dependencies between the reads on the \p initial_handle and the writes to be
			
 
				+   submitted.
			
 
				+*/
			
 
				+void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Assume that a partitioning of \p initial_handle has already been submited
			
 
				+   in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade
			
 
				+   that partitioning into read-write mode for the \p children, by invalidating \p
			
 
				+   initial_handle, and adding the necessary dependencies.
			
 
				+*/
			
 
				+void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Assuming that \p initial_handle is partitioned into \p children,
			
 
				+   submit an unpartitionning of \p initial_handle, i.e. submit a
			
 
				+   gathering of the pieces on the requested \p gathering_node memory
			
 
				+   node, and submit an invalidation of the children.
			
 
				+ */
			
 
				+void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
			
 
				+
			
 
				+void starpu_data_unpartition_submit_r(starpu_data_handle_t initial_handle, int gathering_node);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_partition_submit(), but do not invalidate \p
			
 
				+   initial_handle. This allows to continue using it, but the application has to be
			
 
				+   careful not to write to \p initial_handle or \p children handles, only read from
			
 
				+   them, since the coherency is otherwise not guaranteed.  This thus allows to
			
 
				+   submit various tasks which concurrently read from various
			
 
				+   partitions of the data.
			
 
				+*/
			
 
				+void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node);
			
 
				+
			
 
				+/**
			
 
				+   Clear the partition planning established between \p root_data and
			
 
				+   \p children with starpu_data_partition_plan(). This will notably
			
 
				+   submit an unregister all the \p children, which can thus not be
			
 
				+   used any more afterwards.
			
 
				+*/
			
 
				+void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_unpartition_submit_sequential_consistency()
			
 
				+   but allow to specify a callback function for the unpartitiong task
			
 
				+*/
			
 
				+void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_partition_submit() but also allow to specify
			
 
				+   the coherency to be used for the main data \p initial_handle
			
 
				+   through the parameter \p sequential_consistency.
			
 
				+*/
			
 
				+void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_data_unpartition_submit() but also allow to specify
			
 
				+   the coherency to be used for the main data \p initial_handle
			
 
				+   through the parameter \p sequential_consistency.
			
 
				+*/
			
 
				+void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency);
			
 
				+
			
 
				+/**
			
 
				+   Disable the automatic partitioning of the data \p handle for which
			
 
				+   a asynchronous plan has previously been submitted
			
 
				+*/
			
 
				+void starpu_data_partition_not_automatic(starpu_data_handle_t handle);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Predefined BCSR Filter Functions
			
 
				+   Predefined partitioning functions for BCSR data. Examples on how to
			
 
				+   use them are shown in \ref PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Partition a block-sparse matrix into dense matrices.
			
 
				+   starpu_data_filter::get_child_ops needs to be set to
			
 
				+   starpu_bcsr_filter_canonical_block_child_ops()
			
 
				+*/
			
 
				+void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+/**
			
 
				+   Return the child_ops of the partition obtained with starpu_bcsr_filter_canonical_block().
			
 
				+*/
			
 
				+struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(struct starpu_data_filter *f, unsigned child);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block-sparse matrix into block-sparse matrices.
			
 
				+
			
 
				+   The split is done along the leading dimension, i.e. along adjacent nnz blocks.
			
 
				+*/
			
 
				+void starpu_bcsr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Predefined CSR Filter Functions
			
 
				+   Predefined partitioning functions for CSR data. Examples on how to
			
 
				+   use them are shown in \ref PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Partition a block-sparse matrix into vertical block-sparse matrices.
			
 
				+*/
			
 
				+void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Predefined Matrix Filter Functions
			
 
				+   Predefined partitioning functions for matrix
			
 
				+   data. Examples on how to use them are shown in \ref
			
 
				+   PartitioningData.
			
 
				+   Note: this is using the C element order which is row-major, i.e. elements
			
 
				+   with consecutive x coordinates are consecutive in memory.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the x dimension, thus getting (x/\p
			
 
				+   nparts ,y) matrices. If \p nparts does not divide x, the last
			
 
				+   submatrix contains the remainder.
			
 
				+*/
			
 
				+void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the x dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting ((x-2*shadow)/\p
			
 
				+   nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow,
			
 
				+   the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>: This can
			
 
				+   only be used for read-only access, as no coherency is enforced for the
			
 
				+   shadowed parts. A usage example is available in
			
 
				+   examples/filters/shadow2d.c
			
 
				+*/
			
 
				+void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the y dimension, thus getting
			
 
				+   (x,y/\p nparts) matrices. If \p nparts does not divide y, the last
			
 
				+   submatrix contains the remainder.
			
 
				+*/
			
 
				+void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a dense Matrix along the y dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not
			
 
				+   divide y-2*shadow, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>: This can only be used for read-only access, as no
			
 
				+   coherency is enforced for the shadowed parts. A usage example is
			
 
				+   available in examples/filters/shadow2d.c
			
 
				+*/
			
 
				+void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Predefined Vector Filter Functions
			
 
				+   Predefined partitioning functions for vector
			
 
				+   data. Examples on how to use them are shown in \ref
			
 
				+   PartitioningData.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				+   equal size.
			
 
				+*/
			
 
				+void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned in \p nparts chunks of
			
 
				+   equal size with a shadow border <c>filter_arg_ptr</c>, thus getting a vector
			
 
				+   of size <c>(n-2*shadow)/nparts+2*shadow</c>. The <c>filter_arg_ptr</c> field
			
 
				+   of \p f must be the shadow size casted into \c void*.
			
 
				+
			
 
				+   <b>IMPORTANT</b>: This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts. An usage example is available in
			
 
				+   examples/filters/shadow.c
			
 
				+*/
			
 
				+void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned into \p nparts chunks
			
 
				+   according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				+   <c>filter_arg_ptr</c> field must point to an array of \p nparts long
			
 
				+   elements, each of which specifies the number of elements in each chunk
			
 
				+   of the partition.
			
 
				+*/
			
 
				+void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+  Return in \p child_interface the \p id th element of the vector
			
 
				+  represented by \p father_interface once partitioned into \p nparts chunks
			
 
				+  according to the <c>filter_arg_ptr</c> field of \p f. The
			
 
				+  <c>filter_arg_ptr</c> field must point to an array of \p nparts uint32_t
			
 
				+  elements, each of which specifies the number of elements in each chunk
			
 
				+  of the partition.
			
 
				+*/
			
 
				+void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Return in \p child_interface the \p id th element of the vector
			
 
				+   represented by \p father_interface once partitioned in <c>2</c> chunks of
			
 
				+   equal size, ignoring nparts. Thus, \p id must be <c>0</c> or <c>1</c>.
			
 
				+*/
			
 
				+void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Predefined Block Filter Functions
			
 
				+   Predefined partitioning functions for block data. Examples on how
			
 
				+   to use them are shown in \ref PartitioningData. An example is
			
 
				+   available in \c examples/filters/shadow3d.c
			
 
				+   Note: this is using the C element order which is row-major, i.e. elements
			
 
				+   with consecutive x coordinates are consecutive in memory.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+  Partition a block along the X dimension, thus getting
			
 
				+  (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last
			
 
				+  submatrix contains the remainder.
			
 
				+ */
			
 
				+void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the X dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not
			
 
				+   divide x, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>:
			
 
				+   This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts.
			
 
				+*/
			
 
				+void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Y dimension, thus getting
			
 
				+   (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last
			
 
				+   submatrix contains the remainder.
			
 
				+ */
			
 
				+void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Y dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not
			
 
				+   divide y, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>:
			
 
				+   This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts.
			
 
				+*/
			
 
				+void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Z dimension, thus getting
			
 
				+   (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last
			
 
				+   submatrix contains the remainder.
			
 
				+*/
			
 
				+void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Partition a block along the Z dimension, with a
			
 
				+   shadow border <c>filter_arg_ptr</c>, thus getting
			
 
				+   (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not
			
 
				+   divide z, the last submatrix contains the remainder.
			
 
				+
			
 
				+   <b>IMPORTANT</b>:
			
 
				+   This can only be used for read-only access, as no coherency is
			
 
				+   enforced for the shadowed parts.
			
 
				+*/
			
 
				+void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts);
			
 
				+
			
 
				+/**
			
 
				+   Given an integer \p n, \p n the number of parts it must be divided in, \p id the
			
 
				+   part currently considered, determines the \p chunk_size and the \p offset, taking
			
 
				+   into account the size of the elements stored in the data structure \p elemsize
			
 
				+   and \p ld, the leading dimension, which is most often 1.
			
 
				+ */
			
 
				+void
			
 
				+starpu_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts,
			
 
				+					     size_t elemsize, unsigned id,
			
 
				+					     unsigned ld, unsigned *chunk_size,
			
 
				+					     size_t *offset);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/include/starpu/1.3/starpu_data_interfaces.h
+++ b/include/starpu/1.3/starpu_data_interfaces.h
--- a/include/starpu/1.3/starpu_deprecated_api.h
+++ b/include/starpu/1.3/starpu_deprecated_api.h
@@ -0,0 +1,123 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_DEPRECATED_API_H__
			
 
				+#define __STARPU_DEPRECATED_API_H__
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+#if defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API)
			
 
				+#warning Your application is using deprecated types. You may want to update to use the latest API, by using tools/dev/rename.sh.
			
 
				+#endif /* defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) */
			
 
				+
			
 
				+#define starpu_permodel_history_based_expected_perf	starpu_perfmodel_history_based_expected_perf
			
 
				+
			
 
				+#ifdef STARPU_USE_DEPRECATED_ONE_ZERO_API
			
 
				+
			
 
				+#define starpu_allocate_buffer_on_node	starpu_malloc_on_node
			
 
				+#define starpu_free_buffer_on_node	starpu_free_on_node
			
 
				+#define starpu_helper_cublas_init	starpu_cublas_init
			
 
				+#define starpu_helper_cublas_shutdown	starpu_cublas_shutdown
			
 
				+
			
 
				+#define starpu_canonical_block_filter_bcsr	starpu_bcsr_filter_canonical_block
			
 
				+#define starpu_vertical_block_filter_func_csr	starpu_csr_filter_vertical_block
			
 
				+
			
 
				+#define starpu_block_filter_func			starpu_matrix_filter_block
			
 
				+#define starpu_block_shadow_filter_func			starpu_matrix_filter_block_shadow
			
 
				+#define starpu_vertical_block_filter_func		starpu_matrix_filter_vertical_block
			
 
				+#define starpu_vertical_block_shadow_filter_func	starpu_matrix_filter_vertical_block_shadow
			
 
				+
			
 
				+#define starpu_block_filter_func_vector		starpu_vector_filter_block
			
 
				+#define starpu_block_shadow_filter_func_vector	starpu_vector_filter_block_shadow
			
 
				+#define starpu_vector_list_filter_func		starpu_vector_filter_list
			
 
				+#define starpu_vector_divide_in_2_filter_func	starpu_vector_filter_divide_in_2
			
 
				+
			
 
				+#define starpu_block_filter_func_block			starpu_block_filter_block
			
 
				+#define starpu_block_shadow_filter_func_block		starpu_block_filter_block_shadow
			
 
				+#define starpu_vertical_block_filter_func_block		starpu_block_filter_vertical_block
			
 
				+#define starpu_vertical_block_shadow_filter_func_block	starpu_block_filter_vertical_block_shadow
			
 
				+#define starpu_depth_block_filter_func_block		starpu_block_filter_depth_block
			
 
				+#define starpu_depth_block_shadow_filter_func_block	starpu_block_filter_depth_block_shadow
			
 
				+
			
 
				+#define starpu_display_codelet_stats		starpu_codelet_display_stats
			
 
				+
			
 
				+#define starpu_access_mode				starpu_data_access_mode
			
 
				+#define starpu_buffer_descr				starpu_data_descr
			
 
				+#define starpu_memory_display_stats			starpu_data_display_memory_stats
			
 
				+#define starpu_handle_to_pointer			starpu_data_handle_to_pointer
			
 
				+#define starpu_handle_get_local_ptr			starpu_data_get_local_ptr
			
 
				+#define starpu_crc32_be_n				starpu_hash_crc32c_be_n
			
 
				+#define starpu_crc32_be					starpu_hash_crc32c_be
			
 
				+#define starpu_crc32_string				starpu_hash_crc32c_string
			
 
				+#define starpu_perf_archtype				starpu_perfmodel_archtype
			
 
				+#define starpu_history_based_expected_perf		starpu_perfmodel_history_based_expected_perf
			
 
				+#define starpu_task_profiling_info			starpu_profiling_task_info
			
 
				+#define starpu_worker_profiling_info			starpu_profiling_worker_info
			
 
				+#define starpu_bus_profiling_info			starpu_profiling_bus_info
			
 
				+#define starpu_set_profiling_id				starpu_profiling_set_id
			
 
				+#define starpu_worker_get_profiling_info		starpu_profiling_worker_get_info
			
 
				+#define starpu_bus_profiling_helper_display_summary	starpu_profiling_bus_helper_display_summary
			
 
				+#define starpu_worker_profiling_helper_display_summary	starpu_profiling_worker_helper_display_summary
			
 
				+#define starpu_archtype					starpu_worker_archtype
			
 
				+
			
 
				+#define starpu_handle_get_interface_id		starpu_data_get_interface_id
			
 
				+#define starpu_handle_get_size			starpu_data_get_size
			
 
				+#define starpu_handle_pack_data			starpu_data_pack
			
 
				+#define starpu_handle_unpack_data		starpu_data_unpack
			
 
				+
			
 
				+#endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
			
 
				+
			
 
				+#ifdef STARPU_USE_DEPRECATED_API
			
 
				+typedef starpu_data_handle_t starpu_data_handle;
			
 
				+typedef struct starpu_block_interface starpu_block_interface_t;
			
 
				+typedef struct starpu_matrix_interface starpu_matrix_interface_t;
			
 
				+typedef struct starpu_vector_interface starpu_vector_interface_t;
			
 
				+typedef struct starpu_variable_interface starpu_variable_interface_t;
			
 
				+typedef struct starpu_csr_interface starpu_csr_interface_t;
			
 
				+typedef struct starpu_bcsr_interface starpu_bcsr_interface_t;
			
 
				+typedef struct starpu_multiformat_interface starpu_multiformat_interface_t;
			
 
				+#define starpu_machine_topology_s starpu_machine_topology
			
 
				+#define starpu_htbl32_node_s starpu_htbl32_node
			
 
				+#define starpu_history_list_t starpu_history_list
			
 
				+#define starpu_buffer_descr_t starpu_buffer_descr
			
 
				+#define starpu_regression_model_t starpu_regression_model
			
 
				+#define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel
			
 
				+#define starpu_perfmodel_t starpu_perfmodel
			
 
				+#define starpu_sched_policy_s starpu_sched_policy
			
 
				+#define starpu_data_interface_ops_t starpu_data_interface_ops
			
 
				+
			
 
				+typedef struct starpu_buffer_descr starpu_buffer_descr;
			
 
				+typedef struct starpu_codelet starpu_codelet;
			
 
				+typedef struct starpu_codelet starpu_codelet_t;
			
 
				+typedef enum starpu_access_mode starpu_access_mode;
			
 
				+
			
 
				+#define starpu_print_bus_bandwidth     starpu_bus_print_bandwidth
			
 
				+#define starpu_get_handle_interface_id starpu_handle_get_interface_id
			
 
				+#define starpu_get_current_task        starpu_task_get_current
			
 
				+#define starpu_unpack_cl_args          starpu_codelet_unpack_args
			
 
				+#define starpu_pack_cl_args   	       starpu_codelet_pack_args
			
 
				+#define starpu_task_deinit	       starpu_task_clean
			
 
				+
			
 
				+#endif /* STARPU_USE_DEPRECATED_API */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_DEPRECATED_API_H__ */
			
--- a/include/starpu/1.3/starpu_disk.h
+++ b/include/starpu/1.3/starpu_disk.h
@@ -0,0 +1,220 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2013       Corentin Salingue
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_DISK_H__
			
 
				+#define __STARPU_DISK_H__
			
 
				+
			
 
				+#include <sys/types.h>
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Out_Of_Core Out Of Core
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Set of functions to manipulate datas on disk.
			
 
				+*/
			
 
				+struct starpu_disk_ops
			
 
				+{
			
 
				+	/**
			
 
				+	   Connect a disk memory at location \p parameter with size \p size, and return a
			
 
				+	   base as void*, which will be passed by StarPU to all other methods.
			
 
				+	*/
			
 
				+	void *  (*plug)   (void *parameter, starpu_ssize_t size);
			
 
				+	/**
			
 
				+	   Disconnect a disk memory \p base.
			
 
				+	*/
			
 
				+	void    (*unplug) (void *base);
			
 
				+
			
 
				+	/**
			
 
				+	   Measure the bandwidth and the latency for the disk \p node and save it. Returns
			
 
				+	   1 if it could measure it.
			
 
				+	*/
			
 
				+	int    (*bandwidth)    (unsigned node, void *base);
			
 
				+
			
 
				+	/**
			
 
				+	   Create a new location for datas of size \p size. Return an opaque object pointer.
			
 
				+	*/
			
 
				+	void *  (*alloc)  (void *base, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Free a data \p obj previously allocated with starpu_disk_ops::alloc.
			
 
				+	*/
			
 
				+	void    (*free)   (void *base, void *obj, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Open an existing location of datas, at a specific position \p pos dependent on the backend.
			
 
				+	*/
			
 
				+	void *  (*open)   (void *base, void *pos, size_t size);
			
 
				+	/**
			
 
				+	   Close, without deleting it, a location of datas \p obj.
			
 
				+	*/
			
 
				+	void    (*close)  (void *base, void *obj, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put
			
 
				+	   into \p buf. Return the actual number of read bytes.
			
 
				+	*/
			
 
				+	int     (*read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+	/**
			
 
				+	   Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Return 0 on success.
			
 
				+	*/
			
 
				+	int     (*write)  (void *base, void *obj, const void *buf, off_t offset, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size
			
 
				+	*/
			
 
				+	int	(*full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
			
 
				+	/**
			
 
				+	   Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
			
 
				+	   \p size, so that a \c full_read will get it.
			
 
				+	*/
			
 
				+	int 	(*full_write)   (void * base, void * obj, void * ptr, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p
			
 
				+	   offset, from \p buf. Return a void* pointer that StarPU will pass to \c
			
 
				+	   xxx_request methods for testing for the completion.
			
 
				+	*/
			
 
				+	void *  (*async_write)  (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+	/**
			
 
				+	   Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p
			
 
				+	   offset, and put into \p buf. Return a void* pointer that StarPU will pass to \c
			
 
				+	   xxx_request methods for testing for the completion.
			
 
				+	*/
			
 
				+	void *  (*async_read)   (void *base, void *obj, void *buf, off_t offset, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Read all data from \p obj of \p base, from offset 0. Return it in an allocated buffer \p ptr, of size \p size
			
 
				+	*/
			
 
				+	void *	(*async_full_read)    (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
			
 
				+	/**
			
 
				+	   Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to
			
 
				+	   \p size, so that a starpu_disk_ops::full_read will get it.
			
 
				+	*/
			
 
				+	void *	(*async_full_write)   (void * base, void * obj, void * ptr, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Copy from offset \p offset_src of disk object \p obj_src in \p base_src to
			
 
				+	   offset \p offset_dst of disk object \p obj_dst in \p base_dst. Return a void*
			
 
				+	   pointer that StarPU will pass to \c xxx_request methods for testing for the
			
 
				+	   completion.
			
 
				+	*/
			
 
				+	void *  (*copy)   (void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size);
			
 
				+
			
 
				+	/**
			
 
				+	   Wait for completion of request \p async_channel returned by a previous
			
 
				+	   asynchronous read, write or copy.
			
 
				+	*/
			
 
				+	void   (*wait_request) (void * async_channel);
			
 
				+	/**
			
 
				+	   Test for completion of request \p async_channel returned by a previous
			
 
				+	   asynchronous read, write or copy. Return 1 on completion, 0 otherwise.
			
 
				+	*/
			
 
				+	int    (*test_request) (void * async_channel);
			
 
				+
			
 
				+	/**
			
 
				+	   Free the request allocated by a previous asynchronous read, write or copy.
			
 
				+	*/
			
 
				+	void   (*free_request)(void * async_channel);
			
 
				+
			
 
				+	/* TODO: readv, writev, read2d, write2d, etc. */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Use the stdio library (fwrite, fread...) to read/write on disk.
			
 
				+
			
 
				+   <strong>Warning: It creates one file per allocation !</strong>
			
 
				+
			
 
				+   Do not support asynchronous transfers.
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_stdio_ops;
			
 
				+
			
 
				+/**
			
 
				+   Use the HDF5 library.
			
 
				+
			
 
				+   <strong>It doesn't support multiple opening from different processes. </strong>
			
 
				+
			
 
				+   You may only allow one process to write in the HDF5 file.
			
 
				+
			
 
				+   <strong>If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. </strong>
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_hdf5_ops;
			
 
				+
			
 
				+/**
			
 
				+   Use the unistd library (write, read...) to read/write on disk.
			
 
				+
			
 
				+   <strong>Warning: It creates one file per allocation !</strong>
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_unistd_ops;
			
 
				+
			
 
				+/**
			
 
				+   Use the unistd library (write, read...) to read/write on disk with the O_DIRECT flag.
			
 
				+
			
 
				+   <strong>Warning: It creates one file per allocation !</strong>
			
 
				+
			
 
				+   Only available on Linux systems.
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops;
			
 
				+
			
 
				+/**
			
 
				+   Use the leveldb created by Google. More information at https://code.google.com/p/leveldb/
			
 
				+   Do not support asynchronous transfers.
			
 
				+*/
			
 
				+extern struct starpu_disk_ops starpu_disk_leveldb_ops;
			
 
				+
			
 
				+/**
			
 
				+   Close an existing data opened with starpu_disk_open().
			
 
				+*/
			
 
				+void starpu_disk_close(unsigned node, void *obj, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   Open an existing file memory in a disk node. \p size is the size of
			
 
				+   the file. \p pos is the specific position dependent on the backend,
			
 
				+   given to the \c open  method of the disk operations. Return an
			
 
				+   opaque object pointer.
			
 
				+*/
			
 
				+void *starpu_disk_open(unsigned node, void *pos, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   Register a disk memory node with a set of functions to manipulate
			
 
				+   datas. The \c plug member of \p func will be passed \p parameter,
			
 
				+   and return a \c base which will be passed to all \p func methods.
			
 
				+   <br />
			
 
				+   SUCCESS: return the disk node. <br />
			
 
				+   FAIL: return an error code. <br />
			
 
				+   \p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size
			
 
				+   being negative means infinite size.
			
 
				+*/
			
 
				+int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size);
			
 
				+
			
 
				+/**
			
 
				+   Minimum size of a registered disk. The size of a disk is the last
			
 
				+   parameter of the function starpu_disk_register().
			
 
				+*/
			
 
				+#define STARPU_DISK_SIZE_MIN (16*1024*1024)
			
 
				+
			
 
				+/**
			
 
				+   Contain the node number of the disk swap, if set up through the
			
 
				+   \ref STARPU_DISK_SWAP variable.
			
 
				+*/
			
 
				+extern int starpu_disk_swap_node;
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#endif /* __STARPU_DISK_H__ */
			
--- a/include/starpu/1.3/starpu_driver.h
+++ b/include/starpu/1.3/starpu_driver.h
@@ -0,0 +1,101 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_DRIVER_H__
			
 
				+#define __STARPU_DRIVER_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
			
 
				+#include <starpu_opencl.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Running_Drivers Running Drivers
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   structure for a driver
			
 
				+*/
			
 
				+struct starpu_driver
			
 
				+{
			
 
				+	/**
			
 
				+	    Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER
			
 
				+	    and ::STARPU_OPENCL_WORKER are currently supported.
			
 
				+	*/
			
 
				+	enum starpu_worker_archtype type;
			
 
				+	/**
			
 
				+	   Identifier of the driver.
			
 
				+	*/
			
 
				+	union
			
 
				+	{
			
 
				+		unsigned cpu_id;
			
 
				+		unsigned cuda_id;
			
 
				+#if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__)
			
 
				+		cl_device_id opencl_id;
			
 
				+#else
			
 
				+		unsigned opencl_id;
			
 
				+#endif
			
 
				+	} id;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Initialize the given driver, run it until it receives a request to
			
 
				+   terminate, deinitialize it and return 0 on success. Return
			
 
				+   <c>-EINVAL</c> if starpu_driver::type is not a valid StarPU device type
			
 
				+   (::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER).
			
 
				+
			
 
				+   This is the same as using the following functions: calling
			
 
				+   starpu_driver_init(), then calling starpu_driver_run_once() in a loop,
			
 
				+   and finally starpu_driver_deinit().
			
 
				+*/
			
 
				+int starpu_driver_run(struct starpu_driver *d);
			
 
				+
			
 
				+/**
			
 
				+   Notify all running drivers that they should terminate.
			
 
				+*/
			
 
				+void starpu_drivers_request_termination(void);
			
 
				+
			
 
				+/**
			
 
				+   Initialize the given driver. Return 0 on success, <c>-EINVAL</c>
			
 
				+   if starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+*/
			
 
				+int starpu_driver_init(struct starpu_driver *d);
			
 
				+
			
 
				+/**
			
 
				+   Run the driver once, then return 0 on success, <c>-EINVAL</c> if
			
 
				+   starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+*/
			
 
				+int starpu_driver_run_once(struct starpu_driver *d);
			
 
				+
			
 
				+/**
			
 
				+   Deinitialize the given driver. Return 0 on success, <c>-EINVAL</c> if
			
 
				+   starpu_driver::type is not a valid ::starpu_worker_archtype.
			
 
				+*/
			
 
				+int starpu_driver_deinit(struct starpu_driver *d);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_DRIVER_H__ */
			
--- a/include/starpu/1.3/starpu_expert.h
+++ b/include/starpu/1.3/starpu_expert.h
@@ -0,0 +1,55 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_EXPERT_H__
			
 
				+#define __STARPU_EXPERT_H__
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Expert_Mode Expert Mode
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Wake all the workers, so they can inspect data requests and task
			
 
				+   submissions again.
			
 
				+*/
			
 
				+void starpu_wake_all_blocked_workers(void);
			
 
				+
			
 
				+/**
			
 
				+   Register a progression hook, to be called when workers are idle.
			
 
				+*/
			
 
				+int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   Unregister a given progression hook.
			
 
				+*/
			
 
				+void starpu_progression_hook_deregister(int hook_id);
			
 
				+
			
 
				+int starpu_idle_hook_register(unsigned (*func)(void *arg), void *arg);
			
 
				+void starpu_idle_hook_deregister(int hook_id);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_H__ */
			
--- a/include/starpu/1.3/starpu_fxt.h
+++ b/include/starpu/1.3/starpu_fxt.h
@@ -0,0 +1,161 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2013       Joris Pablo
			
 
				+ * Copyright (C) 2013       Thibaut Lambert
			
 
				+ * Copyright (C) 2020       Federal University of Rio Grande do Sul (UFRGS)
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_FXT_H__
			
 
				+#define __STARPU_FXT_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+#include <starpu_perfmodel.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_FxT_Support FxT Support
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_fxt_codelet_event
			
 
				+{
			
 
				+	char symbol[256];
			
 
				+	int workerid;
			
 
				+	char perfmodel_archname[256];
			
 
				+	uint32_t hash;
			
 
				+	size_t size;
			
 
				+	float time;
			
 
				+};
			
 
				+
			
 
				+struct starpu_fxt_options
			
 
				+{
			
 
				+	unsigned per_task_colour;
			
 
				+	unsigned no_events;
			
 
				+	unsigned no_counter;
			
 
				+	unsigned no_bus;
			
 
				+	unsigned no_flops;
			
 
				+	unsigned ninputfiles;
			
 
				+	unsigned no_smooth;
			
 
				+	unsigned no_acquire;
			
 
				+	unsigned memory_states;
			
 
				+	unsigned internal;
			
 
				+	unsigned label_deps;
			
 
				+	char *filenames[STARPU_FXT_MAX_FILES];
			
 
				+	char *out_paje_path;
			
 
				+	char *distrib_time_path;
			
 
				+	char *activity_path;
			
 
				+	char *sched_tasks_path;
			
 
				+	char *dag_path;
			
 
				+	char *tasks_path;
			
 
				+	char *data_path;
			
 
				+	char *papi_path;
			
 
				+	char *comms_path;
			
 
				+	char *number_events_path;
			
 
				+	char *anim_path;
			
 
				+	char *states_path;
			
 
				+
			
 
				+	/**
			
 
				+	   In case we are going to gather multiple traces (e.g in the case of
			
 
				+	   MPI processes), we may need to prefix the name of the containers.
			
 
				+	*/
			
 
				+	char *file_prefix;
			
 
				+	/**
			
 
				+	   In case we are going to gather multiple traces (e.g in the case of
			
 
				+	   MPI processes), we may need to prefix the name of the containers.
			
 
				+	*/
			
 
				+	uint64_t file_offset;
			
 
				+	/**
			
 
				+	   In case we are going to gather multiple traces (e.g in the case of
			
 
				+	   MPI processes), we may need to prefix the name of the containers.
			
 
				+	*/
			
 
				+	int file_rank;
			
 
				+
			
 
				+	/**
			
 
				+	   Output parameters
			
 
				+	*/
			
 
				+	char worker_names[STARPU_NMAXWORKERS][256];
			
 
				+	/**
			
 
				+	   Output parameters
			
 
				+	*/
			
 
				+	struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS];
			
 
				+	/**
			
 
				+	   Output parameters
			
 
				+	*/
			
 
				+	int nworkers;
			
 
				+
			
 
				+	/**
			
 
				+	   In case we want to dump the list of codelets to an external tool
			
 
				+	*/
			
 
				+	struct starpu_fxt_codelet_event **dumped_codelets;
			
 
				+	/**
			
 
				+	   In case we want to dump the list of codelets to an external tool
			
 
				+	*/
			
 
				+	long dumped_codelets_count;
			
 
				+};
			
 
				+
			
 
				+void starpu_fxt_options_init(struct starpu_fxt_options *options);
			
 
				+void starpu_fxt_generate_trace(struct starpu_fxt_options *options);
			
 
				+
			
 
				+/**
			
 
				+   Determine whether profiling should be started by starpu_init(), or only when
			
 
				+   starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to
			
 
				+   prevent it.
			
 
				+*/
			
 
				+void starpu_fxt_autostart_profiling(int autostart);
			
 
				+
			
 
				+/**
			
 
				+   Start recording the trace. The trace is by default started from
			
 
				+   starpu_init() call, but can be paused by using
			
 
				+   starpu_fxt_stop_profiling(), in which case
			
 
				+   starpu_fxt_start_profiling() should be called to resume recording
			
 
				+   events.
			
 
				+*/
			
 
				+void starpu_fxt_start_profiling(void);
			
 
				+
			
 
				+/**
			
 
				+   Stop recording the trace. The trace is by default stopped when calling
			
 
				+   starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to
			
 
				+   stop it earlier. starpu_fxt_start_profiling() can then be called to
			
 
				+   start recording it again, etc.
			
 
				+*/
			
 
				+void starpu_fxt_stop_profiling(void);
			
 
				+void starpu_fxt_write_data_trace(char *filename_in);
			
 
				+
			
 
				+/**
			
 
				+    Wrapper to get value of env variable STARPU_FXT_TRACE
			
 
				+*/
			
 
				+int starpu_fxt_is_enabled();
			
 
				+
			
 
				+/**
			
 
				+   Add an event in the execution trace if FxT is enabled.
			
 
				+*/
			
 
				+void starpu_fxt_trace_user_event(unsigned long code);
			
 
				+
			
 
				+/**
			
 
				+   Add a string event in the execution trace if FxT is enabled.
			
 
				+*/
			
 
				+void starpu_fxt_trace_user_event_string(const char *s);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_FXT_H__ */
			
--- a/include/starpu/1.3/starpu_hash.h
+++ b/include/starpu/1.3/starpu_hash.h
@@ -0,0 +1,63 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_HASH_H__
			
 
				+#define __STARPU_HASH_H__
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include <stddef.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @ingroup API_Data_Interfaces
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Compute the CRC of a byte buffer seeded by the \p inputcrc
			
 
				+   <em>current state</em>. The return value should be considered as the new
			
 
				+   <em>current state</em> for future CRC computation. This is used for computing
			
 
				+   data size footprint.
			
 
				+*/
			
 
				+uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc);
			
 
				+
			
 
				+/**
			
 
				+   Compute the CRC of a 32bit number seeded by the \p inputcrc
			
 
				+   <em>current state</em>. The return value should be considered as the new
			
 
				+   <em>current state</em> for future CRC computation. This is used for computing
			
 
				+   data size footprint.
			
 
				+*/
			
 
				+uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc);
			
 
				+
			
 
				+/**
			
 
				+   Compute the CRC of a string seeded by the \p inputcrc <em>current
			
 
				+   state</em>. The return value should be considered as the new <em>current
			
 
				+   state</em> for future CRC computation. This is used for computing data
			
 
				+   size footprint.
			
 
				+*/
			
 
				+uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_HASH_H__ */
			
--- a/include/starpu/1.3/starpu_helper.h
+++ b/include/starpu/1.3/starpu_helper.h
@@ -0,0 +1,230 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2008-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_HELPER_H__
			
 
				+#define __STARPU_HELPER_H__
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+#include <hwloc.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Miscellaneous_Helpers Miscellaneous Helpers
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return the min of the two parameters.
			
 
				+*/
			
 
				+#define STARPU_MIN(a,b)	((a)<(b)?(a):(b))
			
 
				+/**
			
 
				+   Return the max of the two parameters.
			
 
				+*/
			
 
				+#define STARPU_MAX(a,b)	((a)<(b)?(b):(a))
			
 
				+
			
 
				+/**
			
 
				+   Define a value which can be used to mark pointers as invalid
			
 
				+   values.
			
 
				+*/
			
 
				+#define STARPU_POISON_PTR	((void *)0xdeadbeef)
			
 
				+
			
 
				+extern int _starpu_silent;
			
 
				+
			
 
				+char *starpu_getenv(const char *str);
			
 
				+
			
 
				+/**
			
 
				+   If the environment variable \c str is defined and its value is contained in the array \c strings, return the array position.
			
 
				+   Raise an error if the environment variable \c str is defined with a value not in \c strings
			
 
				+   Return \c defvalue if the environment variable \c str is not defined.
			
 
				+ */
			
 
				+int starpu_get_env_string_var_default(const char *str, const char *strings[], int defvalue);
			
 
				+
			
 
				+/**
			
 
				+   If the environment variable \c str is defined with a well-defined size value, return the value as a size in bytes. Expected size qualifiers are b, B, k, K, m, M, g, G. The default qualifier is K.
			
 
				+   If the environment variable \c str is not defined or is empty, return \c defval
			
 
				+   Raise an error if the value of the environment variable \c str is not well-defined.
			
 
				+ */
			
 
				+int starpu_get_env_size_default(const char *str, int defval);
			
 
				+
			
 
				+/**
			
 
				+   Return the integer value of the environment variable named \p str.
			
 
				+   Return 0 otherwise (the variable does not exist or has a
			
 
				+   non-integer value).
			
 
				+*/
			
 
				+static __starpu_inline int starpu_get_env_number(const char *str)
			
 
				+{
			
 
				+	char *strval;
			
 
				+
			
 
				+	strval = starpu_getenv(str);
			
 
				+	if (strval)
			
 
				+	{
			
 
				+		/* the env variable was actually set */
			
 
				+		long int val;
			
 
				+		char *pcheck;
			
 
				+
			
 
				+		val = strtol(strval, &pcheck, 10);
			
 
				+		if (*pcheck)
			
 
				+		{
			
 
				+			fprintf(stderr,"The %s environment variable must contain an integer\n", str);
			
 
				+			STARPU_ABORT();
			
 
				+		}
			
 
				+
			
 
				+		/* fprintf(stderr, "ENV %s WAS %d\n", str, val); */
			
 
				+		STARPU_ASSERT_MSG(val >= 0, "The value for the environment variable '%s' cannot be negative", str);
			
 
				+		return (int)val;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* there is no such env variable */
			
 
				+		/* fprintf("There was no %s ENV\n", str); */
			
 
				+		return -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static __starpu_inline int starpu_get_env_number_default(const char *str, int defval)
			
 
				+{
			
 
				+	int ret = starpu_get_env_number(str);
			
 
				+	if (ret == -1)
			
 
				+		ret = defval;
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static __starpu_inline float starpu_get_env_float_default(const char *str, float defval)
			
 
				+{
			
 
				+	char *strval;
			
 
				+
			
 
				+	strval = starpu_getenv(str);
			
 
				+	if (strval)
			
 
				+	{
			
 
				+		/* the env variable was actually set */
			
 
				+		float val;
			
 
				+		char *pcheck;
			
 
				+
			
 
				+		val = strtof(strval, &pcheck);
			
 
				+		if (*pcheck)
			
 
				+		{
			
 
				+			fprintf(stderr,"The %s environment variable must contain a float\n", str);
			
 
				+			STARPU_ABORT();
			
 
				+		}
			
 
				+
			
 
				+		/* fprintf(stderr, "ENV %s WAS %f\n", str, val); */
			
 
				+		return val;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* there is no such env variable */
			
 
				+		/* fprintf("There was no %s ENV\n", str); */
			
 
				+		return defval;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+   Execute the given function \p func on a subset of workers. When
			
 
				+   calling this method, the offloaded function \p func is executed by
			
 
				+   every StarPU worker that are eligible to execute the function. The
			
 
				+   argument \p arg is passed to the offloaded function. The argument
			
 
				+   \p where specifies on which types of processing units the function
			
 
				+   should be executed.
			
 
				+   Similarly to the field starpu_codelet::where, it is possible to
			
 
				+   specify that the function should be executed on every CUDA device
			
 
				+   and every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function
			
 
				+   blocks until \p func has been executed on every appropriate
			
 
				+   processing units, and thus may not be called from a callback
			
 
				+   function for instance.
			
 
				+*/
			
 
				+void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where);
			
 
				+
			
 
				+/**
			
 
				+   Same as starpu_execute_on_each_worker(), except that the task name
			
 
				+   is specified in the argument \p name.
			
 
				+*/
			
 
				+void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name);
			
 
				+
			
 
				+/**
			
 
				+   Call \p func(\p arg) on every worker in the \p workers array. \p
			
 
				+   num_workers indicates the number of workers in this array.  This
			
 
				+   function is synchronous, but the different workers may execute the
			
 
				+   function in parallel.
			
 
				+*/
			
 
				+void starpu_execute_on_specific_workers(void (*func)(void*), void *arg, unsigned num_workers, unsigned *workers, const char *name);
			
 
				+
			
 
				+/**
			
 
				+   Return the current date in micro-seconds.
			
 
				+*/
			
 
				+double starpu_timing_now(void);
			
 
				+
			
 
				+/**
			
 
				+   Copy the content of \p src_handle into \p dst_handle. The parameter \p
			
 
				+   asynchronous indicates whether the function should block or not. In
			
 
				+   the case of an asynchronous call, it is possible to synchronize with
			
 
				+   the termination of this operation either by the means of implicit
			
 
				+   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
			
 
				+   \p callback_func is not <c>NULL</c>, this callback function is executed after
			
 
				+   the handle has been copied, and it is given the pointer \p
			
 
				+   callback_arg as argument.
			
 
				+*/
			
 
				+int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
			
 
				+
			
 
				+/**
			
 
				+   Create a copy of \p src_handle, and return a new handle in \p dst_handle,
			
 
				+   which is to be used only for read accesses. This allows StarPU to optimize it
			
 
				+   by not actually copying the data whenever possible.
			
 
				+   The parameter \p asynchronous indicates whether the function should block
			
 
				+   or not. In the case of an asynchronous call, it is possible to synchronize
			
 
				+   with the termination of this operation either by the means of implicit
			
 
				+   dependencies (if enabled) or by calling starpu_task_wait_for_all(). If
			
 
				+   \p callback_func is not <c>NULL</c>, this callback function is executed after
			
 
				+   the handle has been copied, and it is given the pointer \p
			
 
				+   callback_arg as argument.
			
 
				+*/
			
 
				+int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
			
 
				+
			
 
				+/**
			
 
				+   Call hwloc-ps to display binding of each processus and thread running on
			
 
				+   the machine.<br>
			
 
				+   Use the environment variable \ref STARPU_DISPLAY_BINDINGS to automatically
			
 
				+   call this function at the beginning of the execution of StarPU.
			
 
				+*/
			
 
				+void starpu_display_bindings(void);
			
 
				+
			
 
				+/**
			
 
				+   If \c hwloc is used, convert the given \p logical_index of a PU to the OS
			
 
				+   index of this PU. If \c hwloc is not used, return \p logical_index.
			
 
				+*/
			
 
				+int starpu_get_pu_os_index(unsigned logical_index);
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+/**
			
 
				+   Get the hwloc topology used by StarPU. One can use this pointer to get
			
 
				+   information about topology, but not to change settings related to topology.
			
 
				+*/
			
 
				+hwloc_topology_t starpu_get_hwloc_topology(void);
			
 
				+#endif
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif // __STARPU_HELPER_H__
			
--- a/include/starpu/1.3/starpu_heteroprio.h
+++ b/include/starpu/1.3/starpu_heteroprio.h
@@ -0,0 +1,76 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2015-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SCHEDULER_HETEROPRIO_H__
			
 
				+#define __STARPU_SCHEDULER_HETEROPRIO_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_HETEROPRIO_MAX_PRIO 100
			
 
				+/* #define STARPU_NB_TYPES 3 */
			
 
				+/* #define STARPU_CPU_IDX 0 */
			
 
				+/* #define STARPU_CUDA_IDX 1 */
			
 
				+/* #define STARPU_OPENCL_IDX 2 */
			
 
				+
			
 
				+#define STARPU_HETEROPRIO_MAX_PREFETCH 2
			
 
				+#if STARPU_HETEROPRIO_MAX_PREFETCH <= 0
			
 
				+#error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1
			
 
				+#endif
			
 
				+
			
 
				+enum starpu_heteroprio_types
			
 
				+{
			
 
				+// First will be zero
			
 
				+	STARPU_CPU_IDX, // = 0
			
 
				+	STARPU_CUDA_IDX,
			
 
				+	STARPU_OPENCL_IDX,
			
 
				+	STARPU_MIC_IDX,
			
 
				+	STARPU_MPI_MS_IDX,
			
 
				+// This will be the number of archs
			
 
				+	STARPU_NB_TYPES
			
 
				+};
			
 
				+
			
 
				+static const unsigned starpu_heteroprio_types_to_arch[STARPU_NB_TYPES+1] =
			
 
				+{
			
 
				+	STARPU_CPU,
			
 
				+	STARPU_CUDA,
			
 
				+	STARPU_OPENCL,
			
 
				+	STARPU_MIC,
			
 
				+        STARPU_MPI_MS,
			
 
				+	0
			
 
				+};
			
 
				+
			
 
				+/** Tell how many prio there are for a given arch */
			
 
				+void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_heteroprio_types arch, unsigned max_prio);
			
 
				+
			
 
				+/** Set the mapping for a given arch prio=>bucket */
			
 
				+void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_heteroprio_types arch, unsigned source_prio, unsigned dest_bucket_id);
			
 
				+
			
 
				+/** Tell which arch is the faster for the tasks of a bucket (optional) */
			
 
				+void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_heteroprio_types arch, unsigned bucket_id);
			
 
				+
			
 
				+/** Tell how slow is a arch for the tasks of a bucket (optional) */ 
			
 
				+void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_heteroprio_types arch, unsigned bucket_id, float slow_factor);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */
			
--- a/include/starpu/1.3/starpu_mic.h
+++ b/include/starpu/1.3/starpu_mic.h
@@ -0,0 +1,61 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_MIC_H__
			
 
				+#define __STARPU_MIC_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_MIC_Extensions MIC Extensions
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Type for MIC function symbols
			
 
				+*/
			
 
				+typedef void *starpu_mic_func_symbol_t;
			
 
				+
			
 
				+/**
			
 
				+   Initiate a lookup on each MIC device to find the address of the
			
 
				+   function named \p func_name, store it in the global array kernels
			
 
				+   and return the index in the array through \p symbol.
			
 
				+*/
			
 
				+int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
			
 
				+
			
 
				+/**
			
 
				+   If successfull, return the pointer to the function defined by \p symbol on
			
 
				+   the device linked to the called device. This can for instance be used
			
 
				+   in a starpu_mic_func_t implementation.
			
 
				+*/
			
 
				+starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+#endif /* __STARPU_MIC_H__ */
			
--- a/include/starpu/1.3/starpu_mod.f90
+++ b/include/starpu/1.3/starpu_mod.f90
@@ -0,0 +1,151 @@
 
				+! StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+!
			
 
				+! Copyright (C) 2015-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+!
			
 
				+! StarPU is free software; you can redistribute it and/or modify
			
 
				+! it under the terms of the GNU Lesser General Public License as published by
			
 
				+! the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+! your option) any later version.
			
 
				+!
			
 
				+! StarPU is distributed in the hope that it will be useful, but
			
 
				+! WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+!
			
 
				+! See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+!
			
 
				+MODULE starpu_mod
			
 
				+  ! == starpu.h ==
			
 
				+
			
 
				+  ! starpu_conf_init
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_conf_init(conf) BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+       TYPE(C_PTR), VALUE :: conf
			
 
				+     END SUBROUTINE starpu_conf_init
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_init
			
 
				+  INTERFACE
			
 
				+     FUNCTION starpu_init(conf) BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+       TYPE(C_PTR), VALUE :: conf
			
 
				+       INTEGER(KIND=C_INT) :: starpu_init
			
 
				+     END FUNCTION starpu_init
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_initialize
			
 
				+
			
 
				+  ! starpu_pause
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_pause() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_pause
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_resume
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_resume() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_resume
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_shutdown
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_shutdown() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_shutdown
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_topology_print
			
 
				+
			
 
				+  ! starpu_asynchronous_copy_disabled
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_asynchronous_copy_disabled
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_asynchronous_cuda_copy_disabled
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_asynchronous_cuda_copy_disabled
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_asynchronous_opencl_copy_disabled
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_asynchronous_opencl_copy_disabled
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_asynchronous_mic_copy_disabled
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_asynchronous_mic_copy_disabled() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_asynchronous_mic_copy_disabled
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_display_stats
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_display_stats() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_display_stats
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_get_version
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_get_version(major,minor,release) BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+       INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release
			
 
				+     END SUBROUTINE starpu_get_version
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! starpu_cpu_worker_get_count
			
 
				+  INTERFACE
			
 
				+     FUNCTION starpu_cpu_worker_get_count() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+       INTEGER(KIND=C_INT)              :: starpu_cpu_worker_get_count
			
 
				+     END FUNCTION starpu_cpu_worker_get_count
			
 
				+  END INTERFACE
			
 
				+
			
 
				+  ! == starpu_task.h ==
			
 
				+
			
 
				+  ! starpu_tag_declare_deps
			
 
				+  ! starpu_tag_declare_deps_array
			
 
				+  ! starpu_task_declare_deps_array
			
 
				+  ! starpu_tag_wait
			
 
				+  ! starpu_tag_wait_array
			
 
				+  ! starpu_tag_notify_from_apps
			
 
				+  ! starpu_tag_restart
			
 
				+  ! starpu_tag_remove
			
 
				+  ! starpu_task_init
			
 
				+  ! starpu_task_clean
			
 
				+  ! starpu_task_create
			
 
				+  ! starpu_task_destroy
			
 
				+  ! starpu_task_submit
			
 
				+  ! starpu_task_submit_to_ctx
			
 
				+  ! starpu_task_finished
			
 
				+  ! starpu_task_wait
			
 
				+  ! starpu_task_wait_for_all
			
 
				+  INTERFACE
			
 
				+     SUBROUTINE starpu_task_wait_for_all() BIND(C)
			
 
				+       USE iso_c_binding
			
 
				+     END SUBROUTINE starpu_task_wait_for_all
			
 
				+  END INTERFACE
			
 
				+  ! starpu_task_wait_for_n_submitted
			
 
				+  ! starpu_task_wait_for_all_in_ctx
			
 
				+  ! starpu_task_wait_for_n_submitted_in_ctx
			
 
				+  ! starpu_task_wait_for_no_ready
			
 
				+  ! starpu_task_nready
			
 
				+  ! starpu_task_nsubmitted
			
 
				+  ! starpu_codelet_init
			
 
				+  ! starpu_codelet_display_stats
			
 
				+  ! starpu_task_get_current
			
 
				+  ! starpu_parallel_task_barrier_init
			
 
				+  ! starpu_parallel_task_barrier_init_n
			
 
				+  ! starpu_task_dup
			
 
				+  ! starpu_task_set_implementation
			
 
				+  ! starpu_task_get_implementation
			
 
				+
			
 
				+END MODULE starpu_mod
			
--- a/include/starpu/1.3/starpu_mpi_ms.h
+++ b/include/starpu/1.3/starpu_mpi_ms.h
@@ -0,0 +1,48 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2016-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_MPI_MS_H__
			
 
				+#define __STARPU_MPI_MS_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI_MASTER_SLAVE
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Master_Slave Master Slave Extension
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+typedef void *starpu_mpi_ms_func_symbol_t;
			
 
				+
			
 
				+int starpu_mpi_ms_register_kernel(starpu_mpi_ms_func_symbol_t *symbol, const char *func_name);
			
 
				+
			
 
				+starpu_mpi_ms_kernel_t starpu_mpi_ms_get_kernel(starpu_mpi_ms_func_symbol_t symbol);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* STARPU_USE_MPI_MASTER_SLAVE */
			
 
				+
			
 
				+#endif /* __STARPU_MPI_MS_H__ */
			
--- a/include/starpu/1.3/starpu_opencl.h
+++ b/include/starpu/1.3/starpu_opencl.h
@@ -0,0 +1,337 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_OPENCL_H__
			
 
				+#define __STARPU_OPENCL_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+#ifdef STARPU_USE_OPENCL
			
 
				+#ifndef CL_TARGET_OPENCL_VERSION
			
 
				+#define CL_TARGET_OPENCL_VERSION 100
			
 
				+#endif
			
 
				+#ifdef __APPLE__
			
 
				+#include <OpenCL/cl.h>
			
 
				+#else
			
 
				+#include <CL/cl.h>
			
 
				+#endif
			
 
				+#include <assert.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_OpenCL_Extensions OpenCL Extensions
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Store the OpenCL programs as compiled for the different OpenCL
			
 
				+   devices.
			
 
				+*/
			
 
				+struct starpu_opencl_program
			
 
				+{
			
 
				+	/** Store each program for each OpenCL device. */
			
 
				+	cl_program programs[STARPU_MAXOPENCLDEVS];
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   @name Writing OpenCL kernels
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return the OpenCL context of the device designated by \p devid
			
 
				+   in \p context.
			
 
				+*/
			
 
				+void starpu_opencl_get_context(int devid, cl_context *context);
			
 
				+
			
 
				+/**
			
 
				+   Return the cl_device_id corresponding to \p devid in \p device.
			
 
				+*/
			
 
				+void starpu_opencl_get_device(int devid, cl_device_id *device);
			
 
				+
			
 
				+/**
			
 
				+   Return the command queue of the device designated by \p devid
			
 
				+   into \p queue.
			
 
				+*/
			
 
				+void starpu_opencl_get_queue(int devid, cl_command_queue *queue);
			
 
				+
			
 
				+/**
			
 
				+   Return the context of the current worker.
			
 
				+*/
			
 
				+void starpu_opencl_get_current_context(cl_context *context);
			
 
				+
			
 
				+/**
			
 
				+   Return the computation kernel command queue of the current
			
 
				+   worker.
			
 
				+*/
			
 
				+void starpu_opencl_get_current_queue(cl_command_queue *queue);
			
 
				+
			
 
				+/**
			
 
				+   Set the arguments of a given kernel. The list of arguments
			
 
				+   must be given as <c>(size_t size_of_the_argument, cl_mem *
			
 
				+   pointer_to_the_argument)</c>. The last argument must be 0. Return the
			
 
				+   number of arguments that were successfully set. In case of failure,
			
 
				+   return the id of the argument that could not be set and \p err is set to
			
 
				+   the error returned by OpenCL. Otherwise, return the number of
			
 
				+   arguments that were set.
			
 
				+
			
 
				+   Here an example:
			
 
				+   \code{.c}
			
 
				+   int n;
			
 
				+   cl_int err;
			
 
				+   cl_kernel kernel;
			
 
				+   n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0);
			
 
				+   if (n != 2) fprintf(stderr, "Error : %d\n", err);
			
 
				+   \endcode
			
 
				+*/
			
 
				+int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Compiling OpenCL kernels
			
 
				+   Source codes for OpenCL kernels can be stored in a file or in a
			
 
				+   string. StarPU provides functions to build the program executable for
			
 
				+   each available OpenCL device as a cl_program object. This program
			
 
				+   executable can then be loaded within a specific queue as explained in
			
 
				+   the next section. These are only helpers, Applications can also fill a
			
 
				+   starpu_opencl_program array by hand for more advanced use (e.g.
			
 
				+   different programs on the different OpenCL devices, for relocation
			
 
				+   purpose for instance).
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Store the contents of the file \p source_file_name in the buffer
			
 
				+   \p opencl_program_source. The file \p source_file_name can be located in the
			
 
				+   current directory, or in the directory specified by the environment
			
 
				+   variable \ref STARPU_OPENCL_PROGRAM_DIR, or
			
 
				+   in the directory <c>share/starpu/opencl</c> of the installation
			
 
				+   directory of StarPU, or in the source directory of StarPU. When the
			
 
				+   file is found, \p located_file_name is the full name of the file as it
			
 
				+   has been located on the system, \p located_dir_name the directory
			
 
				+   where it has been located. Otherwise, they are both set to the empty
			
 
				+   string.
			
 
				+*/
			
 
				+void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source);
			
 
				+
			
 
				+/**
			
 
				+   Similar to function starpu_opencl_load_program_source() but
			
 
				+   allocate the buffers \p located_file_name, \p located_dir_name and
			
 
				+   \p opencl_program_source.
			
 
				+*/
			
 
				+void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source);
			
 
				+
			
 
				+/**
			
 
				+   Compile the OpenCL kernel stored in the file \p source_file_name
			
 
				+   with the given options \p build_options and store the result in the
			
 
				+   directory <c>$STARPU_HOME/.starpu/opencl</c> with the same filename as
			
 
				+   \p source_file_name. The compilation is done for every OpenCL device,
			
 
				+   and the filename is suffixed with the vendor id and the device id of
			
 
				+   the OpenCL device.
			
 
				+*/
			
 
				+int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options);
			
 
				+
			
 
				+/**
			
 
				+   Compile the OpenCL kernel in the string \p opencl_program_source
			
 
				+   with the given options \p build_options and store the result in the
			
 
				+   directory <c>$STARPU_HOME/.starpu/opencl</c> with the filename \p
			
 
				+   file_name. The compilation is done for every OpenCL device, and the
			
 
				+   filename is suffixed with the vendor id and the device id of the
			
 
				+   OpenCL device.
			
 
				+*/
			
 
				+int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options);
			
 
				+
			
 
				+/**
			
 
				+   Compile the binary OpenCL kernel identified with \p kernel_id.
			
 
				+   For every OpenCL device, the binary OpenCL kernel will be loaded from
			
 
				+   the file
			
 
				+   <c>$STARPU_HOME/.starpu/opencl/\<kernel_id\>.\<device_type\>.vendor_id_\<vendor_id\>_device_id_\<device_id\></c>.
			
 
				+*/
			
 
				+int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs);
			
 
				+
			
 
				+/**
			
 
				+   Compile an OpenCL source code stored in a file.
			
 
				+*/
			
 
				+int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options);
			
 
				+/**
			
 
				+   Compile an OpenCL source code stored in a string.
			
 
				+ */
			
 
				+int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options);
			
 
				+
			
 
				+/**
			
 
				+   Unload an OpenCL compiled code.
			
 
				+*/
			
 
				+int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Loading OpenCL kernels
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Create a kernel \p kernel for device \p devid, on its computation
			
 
				+   command queue returned in \p queue, using program \p opencl_programs
			
 
				+   and name \p kernel_name.
			
 
				+*/
			
 
				+int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid);
			
 
				+
			
 
				+/**
			
 
				+   Release the given \p kernel, to be called after kernel execution.
			
 
				+*/
			
 
				+int starpu_opencl_release_kernel(cl_kernel kernel);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name OpenCL Statistics
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Collect statistics on a kernel execution.
			
 
				+   After termination of the kernels, the OpenCL codelet should call this
			
 
				+   function with the event returned by \c clEnqueueNDRangeKernel(), to
			
 
				+   let StarPU collect statistics about the kernel execution (used cycles,
			
 
				+   consumed energy).
			
 
				+*/
			
 
				+int starpu_opencl_collect_stats(cl_event event);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name OpenCL Utilities
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return the error message in English corresponding to \p status, an OpenCL
			
 
				+   error code.
			
 
				+*/
			
 
				+const char *starpu_opencl_error_string(cl_int status);
			
 
				+
			
 
				+/**
			
 
				+   Given a valid error status, print the corresponding error message on
			
 
				+   \c stdout, along with the function name \p func, the filename
			
 
				+   \p file, the line number \p line and the message \p msg.
			
 
				+*/
			
 
				+void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status);
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_display_error() with the error
			
 
				+   \p status, the current function name, current file and line number,
			
 
				+   and a empty message.
			
 
				+*/
			
 
				+#define STARPU_OPENCL_DISPLAY_ERROR(status) starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_display_error() and abort.
			
 
				+*/
			
 
				+static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status)
			
 
				+{
			
 
				+	starpu_opencl_display_error(func, file, line, msg, status);
			
 
				+	assert(0);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_report_error() with the error \p
			
 
				+   status, the current function name, current file and line number,
			
 
				+   and a empty message.
			
 
				+*/
			
 
				+#define STARPU_OPENCL_REPORT_ERROR(status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status)
			
 
				+
			
 
				+/**
			
 
				+   Call the function starpu_opencl_report_error() with \p msg
			
 
				+   and \p status, the current function name, current file and line number.
			
 
				+*/
			
 
				+#define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status)
			
 
				+
			
 
				+/**
			
 
				+   Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid
			
 
				+   combination of \c cl_mem_flags values.
			
 
				+*/
			
 
				+cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p size bytes from the given \p ptr on RAM \p src_node to the
			
 
				+   given \p buffer on OpenCL \p dst_node. \p offset is the offset, in
			
 
				+   bytes, in \p buffer. if \p event is <c>NULL</c>, the copy is
			
 
				+   synchronous, i.e the queue is synchronised before returning. If not
			
 
				+   <c>NULL</c>, \p event can be used after the call to wait for this
			
 
				+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				+   if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successful, or to 0 if \p event was
			
 
				+   <c>NULL</c>.
			
 
				+*/
			
 
				+cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p size bytes asynchronously from the given \p buffer on OpenCL
			
 
				+   \p src_node to the given \p ptr on RAM \p dst_node. \p offset is the
			
 
				+   offset, in bytes, in \p buffer. if \p event is <c>NULL</c>, the copy
			
 
				+   is synchronous, i.e the queue is synchronised before returning. If not
			
 
				+   <c>NULL</c>, \p event can be used after the call to wait for this
			
 
				+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				+   if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successful, or to 0 if \p event was
			
 
				+   <c>NULL</c>.
			
 
				+*/
			
 
				+cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p size bytes asynchronously from byte offset \p src_offset of \p
			
 
				+   src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on
			
 
				+   OpenCL \p dst_node. if \p event is <c>NULL</c>, the copy is
			
 
				+   synchronous, i.e. the queue is synchronised before returning. If not
			
 
				+   <c>NULL</c>, \p event can be used after the call to wait for this
			
 
				+   particular copy to complete. This function returns <c>CL_SUCCESS</c>
			
 
				+   if the copy was successful, or a valid OpenCL error code otherwise.
			
 
				+   The integer pointed to by \p ret is set to <c>-EAGAIN</c> if the
			
 
				+   asynchronous launch was successful, or to 0 if \p event was
			
 
				+   <c>NULL</c>.
			
 
				+*/
			
 
				+cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret);
			
 
				+
			
 
				+/**
			
 
				+   Copy \p size bytes from byte offset \p src_offset of \p src on \p
			
 
				+   src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p
			
 
				+   event is <c>NULL</c>, the copy is synchronous, i.e. the queue is
			
 
				+   synchronised before returning. If not <c>NULL</c>, \p event can be
			
 
				+   used after the call to wait for this particular copy to complete. The
			
 
				+   function returns <c>-EAGAIN</c> if the asynchronous launch was
			
 
				+   successfull. It returns 0 if the synchronous copy was successful, or
			
 
				+   fails otherwise.
			
 
				+*/
			
 
				+cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* STARPU_USE_OPENCL */
			
 
				+
			
 
				+#endif /* __STARPU_OPENCL_H__ */
			
--- a/include/starpu/1.3/starpu_openmp.h
+++ b/include/starpu/1.3/starpu_openmp.h
--- a/include/starpu/1.3/starpu_perf_monitoring.h
+++ b/include/starpu/1.3/starpu_perf_monitoring.h
@@ -0,0 +1,212 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_PERF_MONITORING_H__
			
 
				+#define __STARPU_PERF_MONITORING_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Perf_Monitoring Performance Monitoring Counters
			
 
				+   @brief This section describes the interface to access performance monitoring counters.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   @name API
			
 
				+   \anchor PM_API
			
 
				+   @{
			
 
				+*/
			
 
				+/**
			
 
				+   Enum of all possible performance counter scopes.
			
 
				+ */
			
 
				+enum starpu_perf_counter_scope
			
 
				+{
			
 
				+	starpu_perf_counter_scope_undefined     = 0, /** undefined scope */
			
 
				+	starpu_perf_counter_scope_global        = 2, /** global scope */
			
 
				+	starpu_perf_counter_scope_per_worker    = 4, /** per-worker scope */
			
 
				+	starpu_perf_counter_scope_per_codelet   = 6  /** per-codelet scope */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+  Enum of all possible performance counter value type.
			
 
				+ */
			
 
				+enum starpu_perf_counter_type
			
 
				+{
			
 
				+	starpu_perf_counter_type_undefined = 0, /** underfined value type */
			
 
				+	starpu_perf_counter_type_int32     = 1, /** signed 32-bit integer value */
			
 
				+	starpu_perf_counter_type_int64     = 2, /** signed 64-bit integer value */
			
 
				+	starpu_perf_counter_type_float     = 3, /** 32-bit single precision floating-point value */
			
 
				+	starpu_perf_counter_type_double    = 4  /** 64-bit double precision floating-point value */
			
 
				+};
			
 
				+
			
 
				+struct starpu_perf_counter_listener;
			
 
				+struct starpu_perf_counter_sample;
			
 
				+struct starpu_perf_counter_set;
			
 
				+
			
 
				+/**
			
 
				+  Start collecting performance counter values.
			
 
				+  */
			
 
				+void starpu_perf_counter_collection_start();
			
 
				+/**
			
 
				+  Stop collecting performance counter values.
			
 
				+  */
			
 
				+void starpu_perf_counter_collection_stop();
			
 
				+
			
 
				+/**
			
 
				+  Translate scope name constant string to scope id.
			
 
				+  */
			
 
				+int starpu_perf_counter_scope_name_to_id(const char *name);
			
 
				+/**
			
 
				+  Translate scope id to scope name constant string.
			
 
				+  */
			
 
				+const char *starpu_perf_counter_scope_id_to_name(enum starpu_perf_counter_scope scope);
			
 
				+
			
 
				+/**
			
 
				+  Translate type name constant string to type id.
			
 
				+  */
			
 
				+int starpu_perf_counter_type_name_to_id(const char *name);
			
 
				+/**
			
 
				+  Translate type id to type name constant string.
			
 
				+  */
			
 
				+const char *starpu_perf_counter_type_id_to_name(enum starpu_perf_counter_type type);
			
 
				+
			
 
				+/**
			
 
				+  Return the number of performance counters for the given scope.
			
 
				+  */
			
 
				+int starpu_perf_counter_nb(enum starpu_perf_counter_scope scope);
			
 
				+/**
			
 
				+  Translate a performance counter name to its id.
			
 
				+  */
			
 
				+int starpu_perf_counter_name_to_id(enum starpu_perf_counter_scope scope, const char *name);
			
 
				+/**
			
 
				+  Translate a performance counter rank in its scope to its counter id.
			
 
				+  */
			
 
				+int starpu_perf_counter_nth_to_id(enum starpu_perf_counter_scope scope, int nth);
			
 
				+/**
			
 
				+  Translate a counter id to its name constant string.
			
 
				+  */
			
 
				+const char *starpu_perf_counter_id_to_name(int id);
			
 
				+/**
			
 
				+  Return the counter's type id.
			
 
				+  */
			
 
				+int starpu_perf_counter_get_type_id(int id);
			
 
				+/**
			
 
				+  Return the counter's help string.
			
 
				+  */
			
 
				+const char *starpu_perf_counter_get_help_string(int id);
			
 
				+
			
 
				+/**
			
 
				+  Display the list of counters defined in the given scope.
			
 
				+  */
			
 
				+void starpu_perf_counter_list_avail(enum starpu_perf_counter_scope scope);
			
 
				+/**
			
 
				+  Display the list of counters defined in all scopes.
			
 
				+  */
			
 
				+void starpu_perf_counter_list_all_avail(void);
			
 
				+
			
 
				+/**
			
 
				+  Allocate a new performance counter set.
			
 
				+  */
			
 
				+struct starpu_perf_counter_set *starpu_perf_counter_set_alloc(enum starpu_perf_counter_scope scope);
			
 
				+/**
			
 
				+  Free a performance counter set.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_free(struct starpu_perf_counter_set *set);
			
 
				+
			
 
				+/**
			
 
				+  Enable a given counter in the set.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_enable_id(struct starpu_perf_counter_set *set, int id);
			
 
				+/**
			
 
				+  Disable a given counter in the set.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_disable_id(struct starpu_perf_counter_set *set, int id);
			
 
				+
			
 
				+/**
			
 
				+  Initialize a new performance counter listener.
			
 
				+  */
			
 
				+struct starpu_perf_counter_listener *starpu_perf_counter_listener_init(struct starpu_perf_counter_set *set, void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context), void *user_arg);
			
 
				+/**
			
 
				+  End a performance counter listener.
			
 
				+  */
			
 
				+void starpu_perf_counter_listener_exit(struct starpu_perf_counter_listener *listener);
			
 
				+
			
 
				+/**
			
 
				+  Set a listener for the global scope.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_global_listener(struct starpu_perf_counter_listener *listener);
			
 
				+/**
			
 
				+  Set a listener for the per_worker scope on a given worker.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_per_worker_listener(unsigned workerid, struct starpu_perf_counter_listener *listener);
			
 
				+/**
			
 
				+  Set a common listener for all workers.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_all_per_worker_listeners(struct starpu_perf_counter_listener *listener);
			
 
				+/**
			
 
				+  Set a per_codelet listener for a codelet.
			
 
				+  */
			
 
				+void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, struct starpu_perf_counter_listener *listener);
			
 
				+
			
 
				+/**
			
 
				+  Unset the global listener.
			
 
				+  */
			
 
				+void starpu_perf_counter_unset_global_listener();
			
 
				+/**
			
 
				+  Unset the per_worker listener.
			
 
				+  */
			
 
				+void starpu_perf_counter_unset_per_worker_listener(unsigned workerid);
			
 
				+/**
			
 
				+  Unset all per_worker listeners.
			
 
				+  */
			
 
				+void starpu_perf_counter_unset_all_per_worker_listeners(void);
			
 
				+/**
			
 
				+  Unset a per_codelet listener.
			
 
				+  */
			
 
				+void starpu_perf_counter_unset_per_codelet_listener(struct starpu_codelet *cl);
			
 
				+
			
 
				+/**
			
 
				+  Read an int32 counter value from a sample.
			
 
				+  */
			
 
				+int32_t starpu_perf_counter_sample_get_int32_value(struct starpu_perf_counter_sample *sample, const int counter_id);
			
 
				+/**
			
 
				+  Read an int64 counter value from a sample.
			
 
				+  */
			
 
				+int64_t starpu_perf_counter_sample_get_int64_value(struct starpu_perf_counter_sample *sample, const int counter_id);
			
 
				+/**
			
 
				+  Read a float counter value from a sample.
			
 
				+  */
			
 
				+float starpu_perf_counter_sample_get_float_value(struct starpu_perf_counter_sample *sample, const int counter_id);
			
 
				+/**
			
 
				+  Read a double counter value from a sample.
			
 
				+  */
			
 
				+double starpu_perf_counter_sample_get_double_value(struct starpu_perf_counter_sample *sample, const int counter_id);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_PERF_MONITORING_H__ */
			
--- a/include/starpu/1.3/starpu_perf_steering.h
+++ b/include/starpu/1.3/starpu_perf_steering.h
@@ -0,0 +1,225 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2019-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_PERF_STEERING_H__
			
 
				+#define __STARPU_PERF_STEERING_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Perf_Steering Performance Steering Knobs
			
 
				+   @brief This section describes the interface to access performance steering counters.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   @name API
			
 
				+   \anchor PM_API
			
 
				+   @{
			
 
				+*/
			
 
				+/**
			
 
				+   Enum of all possible performance knob scopes.
			
 
				+ */
			
 
				+enum starpu_perf_knob_scope
			
 
				+{
			
 
				+	starpu_perf_knob_scope_undefined     = 0, /** undefined scope */
			
 
				+	starpu_perf_knob_scope_global        = 1, /** global scope */
			
 
				+	starpu_perf_knob_scope_per_worker    = 3, /** per-worker scope */
			
 
				+	starpu_perf_knob_scope_per_scheduler = 5  /** per-scheduler scope */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+  Enum of all possible performance knob value type.
			
 
				+ */
			
 
				+enum starpu_perf_knob_type
			
 
				+{
			
 
				+	starpu_perf_knob_type_undefined = 0, /** underfined value type */
			
 
				+	starpu_perf_knob_type_int32     = 1, /** signed 32-bit integer value */
			
 
				+	starpu_perf_knob_type_int64     = 2, /** signed 64-bit integer value */
			
 
				+	starpu_perf_knob_type_float     = 3, /** 32-bit single precision floating-point value */
			
 
				+	starpu_perf_knob_type_double    = 4  /** 64-bit double precision floating-point value */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+  Translate scope name constant string to scope id.
			
 
				+  */
			
 
				+int starpu_perf_knob_scope_name_to_id(const char *name);
			
 
				+/**
			
 
				+  Translate scope id to scope name constant string.
			
 
				+  */
			
 
				+const char *starpu_perf_knob_scope_id_to_name(enum starpu_perf_knob_scope scope);
			
 
				+
			
 
				+/**
			
 
				+  Translate type name constant string to type id.
			
 
				+  */
			
 
				+int starpu_perf_knob_type_name_to_id(const char *name);
			
 
				+/**
			
 
				+  Translate type id to type name constant string.
			
 
				+  */
			
 
				+const char *starpu_perf_knob_type_id_to_name(enum starpu_perf_knob_type type);
			
 
				+
			
 
				+/**
			
 
				+  Return the number of performance steering knobs for the given scope.
			
 
				+  */
			
 
				+int starpu_perf_knob_nb(enum starpu_perf_knob_scope scope);
			
 
				+/**
			
 
				+  Translate a performance knob name to its id.
			
 
				+  */
			
 
				+int starpu_perf_knob_name_to_id(enum starpu_perf_knob_scope scope, const char *name);
			
 
				+/**
			
 
				+  Translate a performance knob name to its id.
			
 
				+  */
			
 
				+int starpu_perf_knob_nth_to_id(enum starpu_perf_knob_scope scope, int nth);
			
 
				+/**
			
 
				+  Translate a performance knob rank in its scope to its knob id.
			
 
				+  */
			
 
				+const char *starpu_perf_knob_id_to_name(int id);
			
 
				+/**
			
 
				+  Translate a knob id to its name constant string.
			
 
				+  */
			
 
				+int starpu_perf_knob_get_type_id(int id);
			
 
				+/**
			
 
				+  Return the knob's help string.
			
 
				+  */
			
 
				+const char *starpu_perf_knob_get_help_string(int id);
			
 
				+
			
 
				+/**
			
 
				+  Display the list of knobs defined in the given scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_list_avail(enum starpu_perf_knob_scope scope);
			
 
				+/**
			
 
				+  Display the list of knobs defined in all scopes.
			
 
				+  */
			
 
				+void starpu_perf_knob_list_all_avail(void);
			
 
				+
			
 
				+/**
			
 
				+  Get knob value for Global scope.
			
 
				+  */
			
 
				+int32_t starpu_perf_knob_get_global_int32_value (const int knob_id);
			
 
				+/**
			
 
				+  Get knob value for Global scope.
			
 
				+  */
			
 
				+int64_t starpu_perf_knob_get_global_int64_value (const int knob_id);
			
 
				+/**
			
 
				+  Get knob value for Global scope.
			
 
				+  */
			
 
				+float   starpu_perf_knob_get_global_float_value (const int knob_id);
			
 
				+/**
			
 
				+  Get knob value for Global scope.
			
 
				+  */
			
 
				+double  starpu_perf_knob_get_global_double_value(const int knob_id);
			
 
				+
			
 
				+/**
			
 
				+  Set int32 knob value for Global scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_global_int32_value (const int knob_id, int32_t new_value);
			
 
				+/**
			
 
				+  Set int64 knob value for Global scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_global_int64_value (const int knob_id, int64_t new_value);
			
 
				+/**
			
 
				+  Set float knob value for Global scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_global_float_value (const int knob_id, float   new_value);
			
 
				+/**
			
 
				+  Set double knob value for Global scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_global_double_value(const int knob_id, double  new_value);
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ Get int32 value for Per_worker scope.
			
 
				+  */
			
 
				+int32_t starpu_perf_knob_get_per_worker_int32_value (const int knob_id, unsigned workerid);
			
 
				+/**
			
 
				+ Get int64 value for Per_worker scope.
			
 
				+  */
			
 
				+int64_t starpu_perf_knob_get_per_worker_int64_value (const int knob_id, unsigned workerid);
			
 
				+/**
			
 
				+ Get float value for Per_worker scope.
			
 
				+  */
			
 
				+float   starpu_perf_knob_get_per_worker_float_value (const int knob_id, unsigned workerid);
			
 
				+/**
			
 
				+ Get double value for Per_worker scope.
			
 
				+  */
			
 
				+double  starpu_perf_knob_get_per_worker_double_value(const int knob_id, unsigned workerid);
			
 
				+
			
 
				+/**
			
 
				+ Set int32 value for Per_worker scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_worker_int32_value (const int knob_id, unsigned workerid, int32_t new_value);
			
 
				+/**
			
 
				+ Set int64 value for Per_worker scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_worker_int64_value (const int knob_id, unsigned workerid, int64_t new_value);
			
 
				+/**
			
 
				+ Set float value for Per_worker scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_worker_float_value (const int knob_id, unsigned workerid, float   new_value);
			
 
				+/**
			
 
				+ Set double value for Per_worker scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_worker_double_value(const int knob_id, unsigned workerid, double  new_value);
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ Get int32 value for per_scheduler scope.
			
 
				+  */
			
 
				+int32_t starpu_perf_knob_get_per_scheduler_int32_value (const int knob_id, const char * sched_policy_name);
			
 
				+/**
			
 
				+ Get int64 value for per_scheduler scope.
			
 
				+  */
			
 
				+int64_t starpu_perf_knob_get_per_scheduler_int64_value (const int knob_id, const char * sched_policy_name);
			
 
				+/**
			
 
				+ Get float value for per_scheduler scope.
			
 
				+  */
			
 
				+float   starpu_perf_knob_get_per_scheduler_float_value (const int knob_id, const char * sched_policy_name);
			
 
				+/**
			
 
				+ Get double value for per_scheduler scope.
			
 
				+  */
			
 
				+double  starpu_perf_knob_get_per_scheduler_double_value(const int knob_id, const char * sched_policy_name);
			
 
				+
			
 
				+/**
			
 
				+ Set int32 value for per_scheduler scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_scheduler_int32_value (const int knob_id, const char * sched_policy_name, int32_t new_value);
			
 
				+/**
			
 
				+ Set int64 value for per_scheduler scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_scheduler_int64_value (const int knob_id, const char * sched_policy_name, int64_t new_value);
			
 
				+/**
			
 
				+ Set float value for per_scheduler scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_scheduler_float_value (const int knob_id, const char * sched_policy_name, float   new_value);
			
 
				+/**
			
 
				+ Set double value for per_scheduler scope.
			
 
				+  */
			
 
				+void starpu_perf_knob_set_per_scheduler_double_value(const int knob_id, const char * sched_policy_name, double  new_value);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_PERF_STEERING_H__ */
			
--- a/include/starpu/1.3/starpu_perfmodel.h
+++ b/include/starpu/1.3/starpu_perfmodel.h
@@ -0,0 +1,460 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ * Copyright (C) 2013       Thibaut Lambert
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_PERFMODEL_H__
			
 
				+#define __STARPU_PERFMODEL_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Performance_Model Performance Model
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_task;
			
 
				+struct starpu_data_descr;
			
 
				+
			
 
				+#define STARPU_NARCH STARPU_ANY_WORKER
			
 
				+
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				+struct starpu_perfmodel_device
			
 
				+{
			
 
				+	enum starpu_worker_archtype type; /**< type of the device */
			
 
				+	int devid;                        /**< identifier of the precise device */
			
 
				+	int ncores;                       /**< number of execution in parallel, minus 1 */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				+struct starpu_perfmodel_arch
			
 
				+{
			
 
				+	int ndevices;                            /**< number of the devices for the given arch */
			
 
				+	struct starpu_perfmodel_device *devices; /**< list of the devices for the given arch */
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct starpu_perfmodel_history_entry
			
 
				+{
			
 
				+	double mean;        /**< mean_n = 1/n sum */
			
 
				+	double deviation;   /**< n dev_n = sum2 - 1/n (sum)^2 */
			
 
				+	double sum;         /**< sum of samples (in µs) */
			
 
				+	double sum2;        /**< sum of samples^2 */
			
 
				+	unsigned nsample;   /**< number of samples */
			
 
				+	unsigned nerror;
			
 
				+	uint32_t footprint; /**< data footprint */
			
 
				+	size_t size;        /**< in bytes */
			
 
				+	double flops;       /**< Provided by the application */
			
 
				+
			
 
				+	double duration;
			
 
				+	starpu_tag_t tag;
			
 
				+	double *parameters;
			
 
				+};
			
 
				+
			
 
				+struct starpu_perfmodel_history_list
			
 
				+{
			
 
				+	struct starpu_perfmodel_history_list *next;
			
 
				+	struct starpu_perfmodel_history_entry *entry;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				+struct starpu_perfmodel_regression_model
			
 
				+{
			
 
				+	double sumlny;          /**< sum of ln(measured) */
			
 
				+
			
 
				+	double sumlnx;          /**< sum of ln(size) */
			
 
				+	double sumlnx2;         /**< sum of ln(size)^2 */
			
 
				+
			
 
				+	unsigned long minx;     /**< minimum size */
			
 
				+	unsigned long maxx;     /**< maximum size */
			
 
				+
			
 
				+	double sumlnxlny;       /**< sum of ln(size)*ln(measured) */
			
 
				+
			
 
				+	double alpha;           /**< estimated = alpha * size ^ beta */
			
 
				+	double beta;            /**< estimated = alpha * size ^ beta */
			
 
				+	unsigned valid;         /**< whether the linear regression model is valid (i.e. enough measures) */
			
 
				+
			
 
				+	double a;               /**< estimated = a size ^b + c */
			
 
				+	double b;               /**< estimated = a size ^b + c */
			
 
				+	double c;               /**< estimated = a size ^b + c */
			
 
				+	unsigned nl_valid;      /**< whether the non-linear regression model is valid (i.e. enough measures) */
			
 
				+
			
 
				+	unsigned nsample;       /**< number of sample values for non-linear regression */
			
 
				+
			
 
				+	double *coeff;          /**< list of computed coefficients for multiple linear regression model */
			
 
				+	unsigned ncoeff;        /**< number of coefficients for multiple linear regression model */
			
 
				+	unsigned multi_valid;   /**< whether the multiple linear regression model is valid */
			
 
				+};
			
 
				+
			
 
				+struct starpu_perfmodel_history_table;
			
 
				+
			
 
				+#define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED
			
 
				+
			
 
				+typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				+typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   information about the performance model of a given arch.
			
 
				+*/
			
 
				+struct starpu_perfmodel_per_arch
			
 
				+{
			
 
				+	/**
			
 
				+	   Used by ::STARPU_PER_ARCH, must point to functions which take a
			
 
				+	   task, the target arch and implementation number (as mere
			
 
				+	   conveniency, since the array is already indexed by these), and
			
 
				+	   must return a task duration estimation in micro-seconds.
			
 
				+	*/
			
 
				+	starpu_perfmodel_per_arch_cost_function cost_function;
			
 
				+	/**
			
 
				+	   Same as in structure starpu_perfmodel, but per-arch, in case it
			
 
				+	   depends on the architecture-specific implementation.
			
 
				+	*/
			
 
				+	starpu_perfmodel_per_arch_size_base size_base;
			
 
				+
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   The history of performance measurements.
			
 
				+	*/
			
 
				+	struct starpu_perfmodel_history_table *history;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
			
 
				+	   ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
			
 
				+	   measures.
			
 
				+	*/
			
 
				+	struct starpu_perfmodel_history_list *list;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
			
 
				+	   and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
			
 
				+	   factors of the regression.
			
 
				+	*/
			
 
				+	struct starpu_perfmodel_regression_model regression;
			
 
				+
			
 
				+	char debug_path[256];
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   todo
			
 
				+*/
			
 
				+enum starpu_perfmodel_type
			
 
				+{
			
 
				+        STARPU_PERFMODEL_INVALID=0,
			
 
				+	STARPU_PER_WORKER,                /**< Application-provided per-worker cost model function */
			
 
				+	STARPU_PER_ARCH,                  /**< Application-provided per-arch cost model function */
			
 
				+	STARPU_COMMON,                    /**< Application-provided common cost model function, with per-arch factor */
			
 
				+	STARPU_HISTORY_BASED,             /**< Automatic history-based cost model */
			
 
				+	STARPU_REGRESSION_BASED,          /**< Automatic linear regression-based cost model  (alpha * size ^ beta) */
			
 
				+	STARPU_NL_REGRESSION_BASED,       /**< Automatic non-linear regression-based cost model (a * size ^ b + c) */
			
 
				+	STARPU_MULTIPLE_REGRESSION_BASED  /**< Automatic multiple linear regression-based cost model. Application
			
 
				+					     provides parameters, their combinations and exponents. */
			
 
				+};
			
 
				+
			
 
				+struct _starpu_perfmodel_state;
			
 
				+typedef struct _starpu_perfmodel_state* starpu_perfmodel_state_t;
			
 
				+
			
 
				+/**
			
 
				+   Contain all information about a performance model. At least the
			
 
				+   type and symbol fields have to be filled when defining a performance
			
 
				+   model for a codelet. For compatibility, make sure to initialize the
			
 
				+   whole structure to zero, either by using explicit memset, or by
			
 
				+   letting the compiler implicitly do it in e.g. static storage case. If
			
 
				+   not provided, other fields have to be zero.
			
 
				+*/
			
 
				+struct starpu_perfmodel
			
 
				+{
			
 
				+	/**
			
 
				+	   type of performance model
			
 
				+	   <ul>
			
 
				+	   <li>
			
 
				+	   ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
			
 
				+	   ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
			
 
				+	   provided, this is purely history-based.
			
 
				+	   </li>
			
 
				+	   <li>
			
 
				+	   ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
			
 
				+	   starpu_perfmodel::nparameters (number of different parameters),
			
 
				+	   starpu_perfmodel::ncombinations (number of parameters
			
 
				+	   combinations-tuples) and table starpu_perfmodel::combinations
			
 
				+	   which defines exponents of the equation. Function cl_perf_func
			
 
				+	   also needs to define how to extract parameters from the task.
			
 
				+	   </li>
			
 
				+	   <li>
			
 
				+	   ::STARPU_PER_ARCH: either field
			
 
				+	   starpu_perfmodel::arch_cost_function has to be filled with a
			
 
				+	   function that returns the cost in micro-seconds on the arch given
			
 
				+	   as parameter, or field starpu_perfmodel::per_arch has to be filled
			
 
				+	   with functions which return the cost in micro-seconds.
			
 
				+	   </li>
			
 
				+	   <li>
			
 
				+	   ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
			
 
				+	   filled with a function that returns the cost in micro-seconds on a
			
 
				+	   CPU, timing on other archs will be determined by multiplying by an
			
 
				+	   arch-specific factor.
			
 
				+	   </li>
			
 
				+	   </ul>
			
 
				+	*/
			
 
				+	enum starpu_perfmodel_type type;
			
 
				+
			
 
				+	/**
			
 
				+	   Used by ::STARPU_COMMON. Take a task and implementation number,
			
 
				+	   and must return a task duration estimation in micro-seconds.
			
 
				+	*/
			
 
				+	double (*cost_function)(struct starpu_task *, unsigned nimpl);
			
 
				+	/**
			
 
				+	   Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation
			
 
				+	   number, and must return a task duration estimation in
			
 
				+	   micro-seconds on that arch.
			
 
				+	*/
			
 
				+	double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch * arch, unsigned nimpl);
			
 
				+	/**
			
 
				+	   Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation
			
 
				+	   number, and must return a task duration estimation in
			
 
				+	   micro-seconds on that worker.
			
 
				+	*/
			
 
				+	double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl);
			
 
				+
			
 
				+	/**
			
 
				+	   Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
			
 
				+	   ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
			
 
				+	   implementation number, and return the size to be used as index to
			
 
				+	   distinguish histories and as a base for regressions.
			
 
				+	*/
			
 
				+	size_t (*size_base)(struct starpu_task *, unsigned nimpl);
			
 
				+	/**
			
 
				+	   Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
			
 
				+	   and return the footprint to be used as index to distinguish
			
 
				+	   histories. The default is to use the starpu_task_data_footprint()
			
 
				+	   function.
			
 
				+	*/
			
 
				+	uint32_t (*footprint)(struct starpu_task *);
			
 
				+
			
 
				+	/**
			
 
				+	   symbol name for the performance model, which will be used as file
			
 
				+	   name to store the model. It must be set otherwise the model will
			
 
				+	   be ignored.
			
 
				+	*/
			
 
				+	const char *symbol;
			
 
				+
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Whether the performance model is already loaded from the disk.
			
 
				+	*/
			
 
				+	unsigned is_loaded;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	*/
			
 
				+	unsigned benchmarking;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	*/
			
 
				+	unsigned is_init;
			
 
				+
			
 
				+	void (*parameters)(struct starpu_task * task, double *parameters);
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Names of parameters used for multiple linear regression models (M,
			
 
				+	   N, K)
			
 
				+	*/
			
 
				+	const char **parameters_names;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Number of parameters used for multiple linear regression models
			
 
				+	*/
			
 
				+	unsigned nparameters;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Table of combinations of parameters (and the exponents) used for
			
 
				+	   multiple linear regression models
			
 
				+	*/
			
 
				+	unsigned **combinations;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	   Number of combination of parameters used for multiple linear
			
 
				+	   regression models
			
 
				+	*/
			
 
				+	unsigned ncombinations;
			
 
				+	/**
			
 
				+	   \private
			
 
				+	*/
			
 
				+	starpu_perfmodel_state_t state;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Initialize the \p model performance model structure. This is automatically
			
 
				+   called when e.g. submitting a task using a codelet using this performance model.
			
 
				+*/
			
 
				+void starpu_perfmodel_init(struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Load the performance model found in the file named \p filename. \p model has to be
			
 
				+   completely zero, and will be filled with the information stored in the given file.
			
 
				+*/
			
 
				+int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Load a given performance model. \p model has to be
			
 
				+   completely zero, and will be filled with the information stored in
			
 
				+   <c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
			
 
				+   external tools that want to read the performance model files.
			
 
				+*/
			
 
				+
			
 
				+int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Unload \p model which has been previously loaded
			
 
				+   through the function starpu_perfmodel_load_symbol()
			
 
				+*/
			
 
				+int starpu_perfmodel_unload_model(struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+  Fills \p path (supposed to be \p maxlen long) with the full path to the
			
 
				+  performance model file for symbol \p symbol.  This path can later on be used
			
 
				+  for instance with starpu_perfmodel_load_file() .
			
 
				+*/
			
 
				+void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen);
			
 
				+
			
 
				+/**
			
 
				+  Dump performance model \p model to output stream \p output, in XML format.
			
 
				+*/
			
 
				+void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Free internal memory used for sampling
			
 
				+   management. It should only be called by an application which is not
			
 
				+   calling starpu_shutdown() as this function already calls it. See for
			
 
				+   example <c>tools/starpu_perfmodel_display.c</c>.
			
 
				+*/
			
 
				+void starpu_perfmodel_free_sampling(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the architecture type of the worker \p workerid.
			
 
				+*/
			
 
				+struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				+int starpu_perfmodel_get_narch_combs(void);
			
 
				+int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices);
			
 
				+int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices);
			
 
				+struct starpu_perfmodel_arch *starpu_perfmodel_arch_comb_fetch(int comb);
			
 
				+
			
 
				+struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl);
			
 
				+struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...);
			
 
				+
			
 
				+int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...);
			
 
				+int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...);
			
 
				+
			
 
				+/**
			
 
				+   Return the path to the debugging information for the performance model.
			
 
				+*/
			
 
				+void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl);
			
 
				+
			
 
				+char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
			
 
				+
			
 
				+/**
			
 
				+   Return the architecture name for \p arch
			
 
				+*/
			
 
				+void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Return the estimated time of a task with the given model and the given footprint.
			
 
				+*/
			
 
				+double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint);
			
 
				+
			
 
				+/**
			
 
				+   If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions.
			
 
				+*/
			
 
				+void starpu_perfmodel_initialize(void);
			
 
				+
			
 
				+/**
			
 
				+   Print a list of all performance models on \p output
			
 
				+*/
			
 
				+int starpu_perfmodel_list(FILE *output);
			
 
				+
			
 
				+void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output);
			
 
				+int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
			
 
				+int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output);
			
 
				+
			
 
				+int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model);
			
 
				+
			
 
				+/**
			
 
				+   Feed the performance model model with an explicit
			
 
				+   measurement measured (in µs), in addition to measurements done by StarPU
			
 
				+   itself. This can be useful when the application already has an
			
 
				+   existing set of measurements done in good conditions, that StarPU
			
 
				+   could benefit from instead of doing on-line measurements. An example
			
 
				+   of use can be seen in \ref PerformanceModelExample.
			
 
				+*/
			
 
				+void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
			
 
				+
			
 
				+/**
			
 
				+   Print the directory name storing performance models on \p output
			
 
				+*/
			
 
				+void starpu_perfmodel_directory(FILE *output);
			
 
				+
			
 
				+/**
			
 
				+   Print a matrix of bus bandwidths on \p f.
			
 
				+*/
			
 
				+void starpu_bus_print_bandwidth(FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Print the affinity devices on \p f.
			
 
				+*/
			
 
				+void starpu_bus_print_affinity(FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
			
 
				+*/
			
 
				+void starpu_bus_print_filenames(FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Return the bandwidth of data transfer between two memory nodes
			
 
				+*/
			
 
				+double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node);
			
 
				+
			
 
				+/**
			
 
				+   Return the latency of data transfer between two memory nodes
			
 
				+*/
			
 
				+double starpu_transfer_latency(unsigned src_node, unsigned dst_node);
			
 
				+
			
 
				+/**
			
 
				+   Return the estimated time to transfer a given size between two memory nodes.
			
 
				+*/
			
 
				+double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   Performance model which just always return 1µs.
			
 
				+*/
			
 
				+extern struct starpu_perfmodel starpu_perfmodel_nop;
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_PERFMODEL_H__ */
			
--- a/include/starpu/1.3/starpu_profiling.h
+++ b/include/starpu/1.3/starpu_profiling.h
@@ -0,0 +1,327 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2020       Federal University of Rio Grande do Sul (UFRGS)
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_PROFILING_H__
			
 
				+#define __STARPU_PROFILING_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <errno.h>
			
 
				+#include <time.h>
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef STARPU_PAPI
			
 
				+#include <papi.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Profiling Profiling
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Used when calling the function starpu_profiling_status_set() to disable profiling.
			
 
				+*/
			
 
				+#define STARPU_PROFILING_DISABLE	0
			
 
				+/**
			
 
				+   Used when calling the function starpu_profiling_status_set() to enable profiling.
			
 
				+*/
			
 
				+#define STARPU_PROFILING_ENABLE		1
			
 
				+
			
 
				+/**
			
 
				+   Information about the execution of a task. It is accessible from
			
 
				+   the field starpu_task::profiling_info if profiling was enabled.
			
 
				+ */
			
 
				+struct starpu_profiling_task_info
			
 
				+{
			
 
				+	/** Date of task submission (relative to the initialization of StarPU). */
			
 
				+	struct timespec submit_time;
			
 
				+
			
 
				+	/** Time when the task was submitted to the scheduler. */
			
 
				+	struct timespec push_start_time;
			
 
				+	/** Time when the scheduler finished with the task submission. */
			
 
				+	struct timespec push_end_time;
			
 
				+	/** Time when the scheduler started to be requested for a task, and eventually gave that task. */
			
 
				+	struct timespec pop_start_time;
			
 
				+	/** Time when the scheduler finished providing the task for execution. */
			
 
				+	struct timespec pop_end_time;
			
 
				+
			
 
				+	/** Time when the worker started fetching input data. */
			
 
				+	struct timespec acquire_data_start_time;
			
 
				+	/** Time when the worker finished fetching input data. */
			
 
				+	struct timespec acquire_data_end_time;
			
 
				+
			
 
				+	/** Date of task execution beginning (relative to the initialization of StarPU). */
			
 
				+	struct timespec start_time;
			
 
				+	/** Date of task execution termination (relative to the initialization of StarPU). */
			
 
				+	struct timespec end_time;
			
 
				+
			
 
				+	/** Time when the worker started releasing data. */
			
 
				+	struct timespec release_data_start_time;
			
 
				+	/** Time when the worker finished releasing data. */
			
 
				+	struct timespec release_data_end_time;
			
 
				+
			
 
				+	/** Time when the worker started the application callback for the task. */
			
 
				+	struct timespec callback_start_time;
			
 
				+	/** Time when the worker finished the application callback for the task. */
			
 
				+	struct timespec callback_end_time;
			
 
				+
			
 
				+	/* TODO add expected length, expected start/end ? */
			
 
				+
			
 
				+	/** Identifier of the worker which has executed the task. */
			
 
				+	int workerid;
			
 
				+
			
 
				+	/** Number of cycles used by the task, only available in the MoviSim */
			
 
				+	uint64_t used_cycles;
			
 
				+	/** Number of cycles stalled within the task, only available in the MoviSim */
			
 
				+	uint64_t stall_cycles;
			
 
				+	/** Energy consumed by the task, in Joules */
			
 
				+	double energy_consumed;
			
 
				+
			
 
				+#ifdef STARPU_PAPI
			
 
				+	/** PAPI Events **/
			
 
				+	long long int papi_values[PAPI_MAX_HWCTRS];
			
 
				+	int papi_event_set;
			
 
				+#endif
			
 
				+
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Profiling information associated to a worker. The timing is
			
 
				+   provided since the previous call to
			
 
				+   starpu_profiling_worker_get_info()
			
 
				+*/
			
 
				+struct starpu_profiling_worker_info
			
 
				+{
			
 
				+	/** Starting date for the reported profiling measurements. */
			
 
				+	struct timespec start_time;
			
 
				+	/** Duration of the profiling measurement interval. */
			
 
				+	struct timespec total_time;
			
 
				+	/** Time spent by the worker to execute tasks during the profiling measurement interval. */
			
 
				+	struct timespec executing_time;
			
 
				+	/** Time spent idling by the worker during the profiling measurement interval. */
			
 
				+	struct timespec sleeping_time;
			
 
				+	/** Number of tasks executed by the worker during the profiling measurement interval. */
			
 
				+	int executed_tasks;
			
 
				+
			
 
				+	/** Number of cycles used by the worker, only available in the MoviSim */
			
 
				+	uint64_t used_cycles;
			
 
				+	/** Number of cycles stalled within the worker, only available in the MoviSim */
			
 
				+	uint64_t stall_cycles;
			
 
				+	/** Energy consumed by the worker, in Joules */
			
 
				+	double energy_consumed;
			
 
				+
			
 
				+	/* TODO: add wasted time due to failed tasks */
			
 
				+
			
 
				+	double flops;
			
 
				+};
			
 
				+
			
 
				+struct starpu_profiling_bus_info
			
 
				+{
			
 
				+	/** Time of bus profiling startup. */
			
 
				+	struct timespec start_time;
			
 
				+	/** Total time of bus profiling. */
			
 
				+	struct timespec total_time;
			
 
				+	/** Number of bytes transferred during profiling. */
			
 
				+	int long long transferred_bytes;
			
 
				+	/** Number of transfers during profiling. */
			
 
				+	int transfer_count;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Reset performance counters and enable profiling if the
			
 
				+   environment variable \ref STARPU_PROFILING is set to a positive value.
			
 
				+*/
			
 
				+void starpu_profiling_init(void);
			
 
				+
			
 
				+/**
			
 
				+   Set the ID used for profiling trace filename. Has to be called before starpu_init().
			
 
				+*/
			
 
				+void starpu_profiling_set_id(int new_id);
			
 
				+
			
 
				+/**
			
 
				+   Set the profiling status. Profiling is activated
			
 
				+   by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing
			
 
				+   \ref STARPU_PROFILING_DISABLE disables profiling. Calling this function
			
 
				+   resets all profiling measurements. When profiling is enabled, the
			
 
				+   field starpu_task::profiling_info points to a valid structure
			
 
				+   starpu_profiling_task_info containing information about the execution
			
 
				+   of the task. Negative return values indicate an error, otherwise the
			
 
				+   previous status is returned.
			
 
				+*/
			
 
				+int starpu_profiling_status_set(int status);
			
 
				+
			
 
				+/**
			
 
				+   Return the current profiling status or a negative value in case
			
 
				+   there was an error.
			
 
				+*/
			
 
				+int starpu_profiling_status_get(void);
			
 
				+
			
 
				+#ifdef BUILDING_STARPU
			
 
				+#include <common/utils.h>
			
 
				+#ifdef __GNUC__
			
 
				+extern int _starpu_profiling;
			
 
				+#define starpu_profiling_status_get() ({ \
			
 
				+	int __ret; \
			
 
				+	ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); \
			
 
				+	__ret = _starpu_profiling; \
			
 
				+	ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); \
			
 
				+	__ret; \
			
 
				+})
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Get the profiling info associated to the worker identified by
			
 
				+   \p workerid, and reset the profiling measurements. If the argument \p
			
 
				+   worker_info is <c>NULL</c>, only reset the counters associated to worker
			
 
				+   \p workerid. Upon successful completion, this function returns 0.
			
 
				+   Otherwise, a negative value is returned.
			
 
				+*/
			
 
				+int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of buses in the machine
			
 
				+*/
			
 
				+int starpu_bus_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the bus between \p src and \p dst
			
 
				+*/
			
 
				+int starpu_bus_get_id(int src, int dst);
			
 
				+
			
 
				+/**
			
 
				+   Return the source point of bus \p busid
			
 
				+*/
			
 
				+int starpu_bus_get_src(int busid);
			
 
				+
			
 
				+/**
			
 
				+   Return the destination point of bus \p busid
			
 
				+*/
			
 
				+int starpu_bus_get_dst(int busid);
			
 
				+void starpu_bus_set_direct(int busid, int direct);
			
 
				+int starpu_bus_get_direct(int busid);
			
 
				+void starpu_bus_set_ngpus(int busid, int ngpus);
			
 
				+int starpu_bus_get_ngpus(int busid);
			
 
				+
			
 
				+/**
			
 
				+   See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example.
			
 
				+   Note that calling starpu_bus_get_profiling_info() resets the counters to zero.
			
 
				+*/
			
 
				+int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info);
			
 
				+
			
 
				+/* Some helper functions to manipulate profiling API output */
			
 
				+/* Reset timespec */
			
 
				+static __starpu_inline void starpu_timespec_clear(struct timespec *tsp)
			
 
				+{
			
 
				+	tsp->tv_sec = 0;
			
 
				+	tsp->tv_nsec = 0;
			
 
				+}
			
 
				+
			
 
				+#define STARPU_NS_PER_S 1000000000
			
 
				+
			
 
				+/* Computes result = a + b */
			
 
				+static __starpu_inline void starpu_timespec_add(struct timespec *a,
			
 
				+						struct timespec *b,
			
 
				+						struct timespec *result)
			
 
				+{
			
 
				+	result->tv_sec = a->tv_sec + b->tv_sec;
			
 
				+	result->tv_nsec = a->tv_nsec + b->tv_nsec;
			
 
				+
			
 
				+	if (result->tv_nsec >= STARPU_NS_PER_S)
			
 
				+	{
			
 
				+		++(result)->tv_sec;
			
 
				+		result->tv_nsec -= STARPU_NS_PER_S;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Computes res += b */
			
 
				+static __starpu_inline void starpu_timespec_accumulate(struct timespec *result,
			
 
				+						       struct timespec *a)
			
 
				+{
			
 
				+	result->tv_sec += a->tv_sec;
			
 
				+	result->tv_nsec += a->tv_nsec;
			
 
				+
			
 
				+	if (result->tv_nsec >= STARPU_NS_PER_S)
			
 
				+	{
			
 
				+		++(result)->tv_sec;
			
 
				+		result->tv_nsec -= STARPU_NS_PER_S;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Computes result = a - b */
			
 
				+static __starpu_inline void starpu_timespec_sub(const struct timespec *a,
			
 
				+						const struct timespec *b,
			
 
				+						struct timespec *result)
			
 
				+{
			
 
				+	result->tv_sec = a->tv_sec - b->tv_sec;
			
 
				+	result->tv_nsec = a->tv_nsec - b->tv_nsec;
			
 
				+
			
 
				+	if ((result)->tv_nsec < 0)
			
 
				+	{
			
 
				+		--(result)->tv_sec;
			
 
				+		result->tv_nsec += STARPU_NS_PER_S;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#define starpu_timespec_cmp(a, b, CMP)                          \
			
 
				+	(((a)->tv_sec == (b)->tv_sec) ? ((a)->tv_nsec CMP (b)->tv_nsec) : ((a)->tv_sec CMP (b)->tv_sec))
			
 
				+
			
 
				+/**
			
 
				+   Return the time elapsed between \p start and \p end in microseconds.
			
 
				+*/
			
 
				+double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end);
			
 
				+
			
 
				+/**
			
 
				+   Convert the given timespec \p ts into microseconds
			
 
				+*/
			
 
				+double starpu_timing_timespec_to_us(struct timespec *ts);
			
 
				+
			
 
				+/**
			
 
				+   Display statistics about the bus on \c stderr. if the environment
			
 
				+   variable \ref STARPU_BUS_STATS is defined. The function is called
			
 
				+   automatically by starpu_shutdown().
			
 
				+*/
			
 
				+void starpu_profiling_bus_helper_display_summary(void);
			
 
				+
			
 
				+/**
			
 
				+   Display statistic about the workers on \c stderr if the
			
 
				+   environment variable \ref STARPU_WORKER_STATS is defined. The function is
			
 
				+   called automatically by starpu_shutdown().
			
 
				+*/
			
 
				+void starpu_profiling_worker_helper_display_summary(void);
			
 
				+
			
 
				+/**
			
 
				+   Display statistics about the current data handles registered
			
 
				+   within StarPU. StarPU must have been configured with the configure
			
 
				+   option \ref enable-memory-stats "--enable-memory-stats" (see \ref
			
 
				+   MemoryFeedback).
			
 
				+*/
			
 
				+void starpu_data_display_memory_stats();
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_PROFILING_H__ */
			
--- a/include/starpu/1.3/starpu_rand.h
+++ b/include/starpu/1.3/starpu_rand.h
@@ -0,0 +1,74 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_RAND_H__
			
 
				+#define __STARPU_RAND_H__
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Random_Functions Random Functions
			
 
				+   @{
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+/* In simgrid mode, force using seed 0 by default to get reproducible behavior by default */
			
 
				+#define starpu_seed(seed)				starpu_get_env_number_default("STARPU_RAND_SEED", 0)
			
 
				+#else
			
 
				+#define starpu_seed(seed)				starpu_get_env_number_default("STARPU_RAND_SEED", (seed))
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_DRAND48
			
 
				+#  define starpu_srand48(seed)				srand48(starpu_seed(seed))
			
 
				+#  define starpu_drand48()				drand48()
			
 
				+#  define starpu_lrand48()				lrand48()
			
 
				+#  define starpu_erand48(xsubi)				erand48(xsubi)
			
 
				+#  ifdef STARPU_USE_ERAND48_R
			
 
				+typedef struct drand48_data starpu_drand48_data;
			
 
				+#    define starpu_srand48_r(seed, buffer)		srand48_r(starpu_seed(seed), buffer)
			
 
				+#    define starpu_drand48_r(buffer, result)		drand48_r(buffer, result)
			
 
				+#    define starpu_lrand48_r(buffer, result)		lrand48_r(buffer, result)
			
 
				+#    define starpu_erand48_r(xsubi, buffer, result)	erand48_r(xsubi, buffer, result)
			
 
				+#else
			
 
				+typedef int starpu_drand48_data;
			
 
				+#    define starpu_srand48_r(seed, buffer)		srand48(starpu_seed(seed))
			
 
				+#    define starpu_drand48_r(buffer, result)		do {*(result) = drand48(); } while (0)
			
 
				+#    define starpu_lrand48_r(buffer, result)		do {*(result) = lrand48(); } while (0)
			
 
				+#    define starpu_erand48_r(xsubi, buffer, result)	do {(void) buffer; *(result) = erand48(xsubi); } while (0)
			
 
				+#  endif
			
 
				+#else
			
 
				+typedef int starpu_drand48_data;
			
 
				+#  define starpu_srand48(seed)				srand(starpu_seed(seed))
			
 
				+#  define starpu_drand48() 				(double)(rand()) / RAND_MAX
			
 
				+#  define starpu_lrand48() 				rand()
			
 
				+#  define starpu_erand48(xsubi)				starpu_drand48()
			
 
				+#  define starpu_srand48_r(seed, buffer) 		srand(starpu_seed(seed))
			
 
				+#  define starpu_erand48_r(xsubi, buffer, result)	do {(void) xsubi; (void) buffer; *(result) = ((double)(rand()) / RAND_MAX);} while (0)
			
 
				+#endif
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_RAND_H__ */
			
--- a/include/starpu/1.3/starpu_sched_component.h
+++ b/include/starpu/1.3/starpu_sched_component.h
@@ -0,0 +1,838 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2013       Simon Archipoff
			
 
				+ * Copyright (C) 2017       Arthur Chevalier
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SCHED_COMPONENT_H__
			
 
				+#define __STARPU_SCHED_COMPONENT_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+#include <hwloc.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Modularized_Scheduler Modularized Scheduler Interface
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   flags for starpu_sched_component::properties
			
 
				+*/
			
 
				+enum starpu_sched_component_properties
			
 
				+{
			
 
				+	/** indicate that all workers have the same starpu_worker_archtype */
			
 
				+	STARPU_SCHED_COMPONENT_HOMOGENEOUS = (1<<0),
			
 
				+	/** indicate that all workers have the same memory component */
			
 
				+	STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE = (1<<1)
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   indicate if component is homogeneous
			
 
				+*/
			
 
				+#define STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component) ((component)->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS)
			
 
				+
			
 
				+/**
			
 
				+   indicate if all workers have the same memory component
			
 
				+*/
			
 
				+#define STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component) ((component)->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)
			
 
				+
			
 
				+/**
			
 
				+   Structure for a scheduler module.  A scheduler is a
			
 
				+   tree-like structure of them, some parts of scheduler can be shared by
			
 
				+   several contexes to perform some local optimisations, so, for all
			
 
				+   components, a list of parent is defined by \c sched_ctx_id. They
			
 
				+   embed there specialised method in a pseudo object-style, so calls are
			
 
				+   like <c>component->push_task(component,task)</c>
			
 
				+*/
			
 
				+struct starpu_sched_component
			
 
				+{
			
 
				+	/** The tree containing the component*/
			
 
				+	struct starpu_sched_tree *tree;
			
 
				+	/** set of underlying workers */
			
 
				+	struct starpu_bitmap workers;
			
 
				+	/**
			
 
				+	   subset of starpu_sched_component::workers that is currently available in the context
			
 
				+	   The push method should take this value into account, it is set with:
			
 
				+	   component->workers UNION tree->workers UNION
			
 
				+	   component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component
			
 
				+	*/
			
 
				+	struct starpu_bitmap workers_in_ctx;
			
 
				+	/** private data */
			
 
				+	void *data;
			
 
				+	char *name;
			
 
				+	/** number of compoments's children */
			
 
				+	unsigned nchildren;
			
 
				+	/** vector of component's children */
			
 
				+	struct starpu_sched_component **children;
			
 
				+	/** number of component's parents */
			
 
				+	unsigned nparents;
			
 
				+	/** vector of component's parents */
			
 
				+	struct starpu_sched_component **parents;
			
 
				+
			
 
				+	/** add a child to component */
			
 
				+	void (*add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child);
			
 
				+	/** remove a child from component */
			
 
				+	void (*remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child);
			
 
				+	void (*add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent);
			
 
				+	void (*remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent);
			
 
				+
			
 
				+	/**
			
 
				+	   push a task in the scheduler module. this function is called to
			
 
				+	   push a task on component subtree, this can either perform a
			
 
				+	   recursive call on a child or store the task in the component,
			
 
				+	   then it will be returned by a further pull_task call.
			
 
				+	   the caller must ensure that component is able to execute task.
			
 
				+	   This method must either return 0 if it the task was properly stored or
			
 
				+	   passed over to a child component, or return a value different from 0 if the
			
 
				+	   task could not be consumed (e.g. the queue is full).
			
 
				+	*/
			
 
				+	int (*push_task)(struct starpu_sched_component *, struct starpu_task *);
			
 
				+
			
 
				+	/**
			
 
				+	   pop a task from the scheduler module. this function is called by workers to get a task from their
			
 
				+	   parents. this function should first return a locally stored task
			
 
				+	   or perform a recursive call on the parents.
			
 
				+	   the task returned by this function should be executable by the caller
			
 
				+	*/
			
 
				+	struct starpu_task *(*pull_task)(struct starpu_sched_component *from, struct starpu_sched_component *to);
			
 
				+
			
 
				+	/**
			
 
				+	   This function is called by a component which implements a queue,
			
 
				+	   allowing it to signify to its parents that an empty slot is
			
 
				+	   available in its queue. This should return 1 if some tasks could be pushed
			
 
				+	   The basic implementation of this function
			
 
				+	   is a recursive call to its parents, the user has to specify a
			
 
				+	   personally-made function to catch those calls.
			
 
				+	*/
			
 
				+	int (*can_push)(struct starpu_sched_component *from, struct starpu_sched_component *to);
			
 
				+
			
 
				+	/**
			
 
				+	   This function allow a component to wake up a worker. It is
			
 
				+	   currently called by component which implements a queue, to
			
 
				+	   signify to its children that a task have been pushed in its local
			
 
				+	   queue, and is available to be popped by a worker, for example.
			
 
				+	   This should return 1 if some some container or worker could (or will) pull
			
 
				+	   some tasks.
			
 
				+	   The basic implementation of this function is a recursive call to
			
 
				+	   its children, until at least one worker have been woken up.
			
 
				+	*/
			
 
				+	int (*can_pull)(struct starpu_sched_component *component);
			
 
				+
			
 
				+	int (*notify)(struct starpu_sched_component* component, int message_ID, void* arg);
			
 
				+
			
 
				+	/**
			
 
				+	   heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum
			
 
				+	   of relatives speedup of workers available in context.
			
 
				+	   estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker))
			
 
				+	*/
			
 
				+	double (*estimated_load)(struct starpu_sched_component *component);
			
 
				+	/**
			
 
				+	   return the time when a worker will enter in starvation. This function is relevant only if the task->predicted
			
 
				+	   member has been set.
			
 
				+	*/
			
 
				+	double (*estimated_end)(struct starpu_sched_component *component);
			
 
				+
			
 
				+	/**
			
 
				+	   called by starpu_sched_component_destroy. Should free data allocated during creation
			
 
				+	*/
			
 
				+	void (*deinit_data)(struct starpu_sched_component *component);
			
 
				+
			
 
				+	/**
			
 
				+	   this function is called for each component when workers are added or removed from a context
			
 
				+	*/
			
 
				+	void (*notify_change_workers)(struct starpu_sched_component *component);
			
 
				+	int properties;
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+	/**
			
 
				+	   the hwloc object associated to scheduler module. points to the
			
 
				+	   part of topology that is binded to this component, eg: a numa
			
 
				+	   node for a ws component that would balance load between
			
 
				+	   underlying sockets
			
 
				+	*/
			
 
				+	hwloc_obj_t obj;
			
 
				+#else
			
 
				+	void *obj;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   The actual scheduler
			
 
				+*/
			
 
				+struct starpu_sched_tree
			
 
				+{
			
 
				+	/**
			
 
				+	   entry module of the scheduler
			
 
				+	*/
			
 
				+	struct starpu_sched_component *root;
			
 
				+	/**
			
 
				+	   set of workers available in this context, this value is used to mask workers in modules
			
 
				+	*/
			
 
				+	struct starpu_bitmap workers;
			
 
				+	/**
			
 
				+	   context id of the scheduler
			
 
				+	*/
			
 
				+	unsigned sched_ctx_id;
			
 
				+	/**
			
 
				+	   lock used to protect the scheduler, it is taken in read mode pushing a task and in write mode for adding or
			
 
				+	   removing workers
			
 
				+	*/
			
 
				+	starpu_pthread_mutex_t lock;
			
 
				+};
			
 
				+
			
 
				+void starpu_initialize_prio_center_policy(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling Tree API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   create a empty initialized starpu_sched_tree
			
 
				+*/
			
 
				+struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id) STARPU_ATTRIBUTE_MALLOC;
			
 
				+/**
			
 
				+   destroy tree and free all non shared component in it.
			
 
				+*/
			
 
				+void starpu_sched_tree_destroy(struct starpu_sched_tree *tree);
			
 
				+struct starpu_sched_tree *starpu_sched_tree_get(unsigned sched_ctx_id);
			
 
				+/**
			
 
				+   recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers).
			
 
				+*/
			
 
				+void starpu_sched_tree_update_workers(struct starpu_sched_tree *t);
			
 
				+/**
			
 
				+   recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers)
			
 
				+*/
			
 
				+void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				+int starpu_sched_tree_push_task(struct starpu_task *task);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				+struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx);
			
 
				+
			
 
				+/**
			
 
				+   Push a task to a component. This is a helper for <c>component->push_task(component, task)</c> plus tracing.
			
 
				+*/
			
 
				+int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Pull a task from a component. This is a helper for <c>component->pull_task(component)</c> plus tracing.
			
 
				+*/
			
 
				+struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to);
			
 
				+
			
 
				+struct starpu_task* starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *to, int* success);
			
 
				+struct starpu_task* starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int* success);
			
 
				+int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component * component);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				+void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				+void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
			
 
				+
			
 
				+/**
			
 
				+   Attach component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT)
			
 
				+*/
			
 
				+void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Generic Scheduling Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+typedef struct starpu_sched_component * (*starpu_sched_component_create_t)(struct starpu_sched_tree *tree, void *data);
			
 
				+
			
 
				+/**
			
 
				+   allocate and initialize component field with defaults values :
			
 
				+   .pop_task make recursive call on father
			
 
				+   .estimated_load compute relative speedup and tasks in sub tree
			
 
				+   .estimated_end return the minimum of recursive call on children
			
 
				+   .add_child is starpu_sched_component_add_child
			
 
				+   .remove_child is starpu_sched_component_remove_child
			
 
				+   .notify_change_workers does nothing
			
 
				+   .deinit_data does nothing
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   free data allocated by starpu_sched_component_create and call component->deinit_data(component)
			
 
				+   set to <c>NULL</c> the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component
			
 
				+*/
			
 
				+
			
 
				+void starpu_sched_component_destroy(struct starpu_sched_component *component);
			
 
				+/**
			
 
				+   recursively destroy non shared parts of a \p component 's tree
			
 
				+*/
			
 
				+void starpu_sched_component_destroy_rec(struct starpu_sched_component *component);
			
 
				+
			
 
				+void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context
			
 
				+*/
			
 
				+int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   return a non <c>NULL</c> value if \p component can execute \p task.
			
 
				+   write the execution prediction length for the best implementation of the best worker available and write this at \p length address.
			
 
				+   this result is more relevant if starpu_sched_component::is_homogeneous is non <c>NULL</c>.
			
 
				+   if a worker need to be calibrated for an implementation, nan is set to \p length.
			
 
				+*/
			
 
				+int STARPU_WARN_UNUSED_RESULT starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length);
			
 
				+
			
 
				+/**
			
 
				+   return the average time to transfer \p task data to underlying \p component workers.
			
 
				+*/
			
 
				+double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task);
			
 
				+
			
 
				+void starpu_sched_component_prefetch_on_node(struct starpu_sched_component *component, struct starpu_task *task);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Worker Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid);
			
 
				+struct starpu_sched_component *starpu_sched_component_worker_new(unsigned sched_ctx, int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers).
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers);
			
 
				+
			
 
				+/**
			
 
				+   return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0
			
 
				+*/
			
 
				+int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a worker component
			
 
				+*/
			
 
				+int starpu_sched_component_is_worker(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a simple worker component
			
 
				+*/
			
 
				+int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a combined worker component
			
 
				+*/
			
 
				+int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+   update predictions for workers
			
 
				+*/
			
 
				+void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   compatibility with starpu_sched_policy interface
			
 
				+*/
			
 
				+void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Flow-control Fifo Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   default function for the pull component method, just call pull of parents until one of them returns a task
			
 
				+*/
			
 
				+struct starpu_task * starpu_sched_component_parents_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to);
			
 
				+
			
 
				+/**
			
 
				+   default function for the can_push component method, just call can_push of parents until one of them returns non-zero
			
 
				+*/
			
 
				+int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to);
			
 
				+
			
 
				+/**
			
 
				+default function for the can_pull component method, just call can_pull of children until one of them returns non-zero
			
 
				+*/
			
 
				+int starpu_sched_component_can_pull(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   function for the can_pull component method, call can_pull of all children
			
 
				+*/
			
 
				+int starpu_sched_component_can_pull_all(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   default function for the estimated_load component method, just sum up the loads
			
 
				+   of the children of the component.
			
 
				+*/
			
 
				+double starpu_sched_component_estimated_load(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   function that can be used for the estimated_end component method, compute the minimum completion time of the children.
			
 
				+*/
			
 
				+double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component);
			
 
				+
			
 
				+/**
			
 
				+   function that can be used for the estimated_end component method, compute
			
 
				+   the minimum completion time of the children, and add to it an estimation of how
			
 
				+   existing queued work, plus the exp_len work, can be completed. This is typically
			
 
				+   used instead of starpu_sched_component_estimated_end_min when the component
			
 
				+   contains a queue of tasks, which thus needs to be added to the estimations.
			
 
				+*/
			
 
				+double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len);
			
 
				+
			
 
				+/**
			
 
				+   default function for the estimated_end component method, compute the average completion time of the children.
			
 
				+*/
			
 
				+double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component);
			
 
				+
			
 
				+struct starpu_sched_component_fifo_data
			
 
				+{
			
 
				+	unsigned ntasks_threshold;
			
 
				+	double exp_len_threshold;
			
 
				+	int ready;
			
 
				+	int exp;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities.
			
 
				+   A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it.
			
 
				+   starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers.
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a fifo component
			
 
				+*/
			
 
				+int starpu_sched_component_is_fifo(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Flow-control Prio Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component_prio_data
			
 
				+{
			
 
				+	unsigned ntasks_threshold;
			
 
				+	double exp_len_threshold;
			
 
				+	int ready;
			
 
				+	int exp;
			
 
				+};
			
 
				+struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_prio(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Work-Stealing Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority.
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a work stealing component
			
 
				+ */
			
 
				+int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component);
			
 
				+
			
 
				+/**
			
 
				+   undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker.
			
 
				+*/
			
 
				+int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Random Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   create a component that perform a random scheduling
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return true iff \p component is a random component
			
 
				+*/
			
 
				+int starpu_sched_component_is_random(struct starpu_sched_component *);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Eager Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_eager(struct starpu_sched_component *);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Eager Prio Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component *starpu_sched_component_eager_prio_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_eager_prio(struct starpu_sched_component *);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Eager-Calibration Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping MCT Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component_mct_data
			
 
				+{
			
 
				+	double alpha;
			
 
				+	double beta;
			
 
				+	double _gamma;
			
 
				+	double idle_power;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   create a component with mct_data paremeters. the mct component doesnt
			
 
				+   do anything but pushing tasks on no_perf_model_component and
			
 
				+   calibrating_component
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+int starpu_sched_component_is_mct(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Heft Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_heft(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Resource-mapping Heteroprio Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component_heteroprio_data
			
 
				+{
			
 
				+	struct starpu_sched_component_mct_data *mct;
			
 
				+	unsigned batch;
			
 
				+};
			
 
				+
			
 
				+struct starpu_sched_component * starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_heteroprio_data * params) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_heteroprio(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Special-purpose Best_Implementation Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Select the implementation that offer the shortest computation length for the first worker that can execute the task.
			
 
				+   Or an implementation that need to be calibrated.
			
 
				+   Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid.
			
 
				+   If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined.
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Special-purpose Perfmodel_Select Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component_perfmodel_select_data
			
 
				+{
			
 
				+	struct starpu_sched_component *calibrator_component;
			
 
				+	struct starpu_sched_component *no_perfmodel_component;
			
 
				+	struct starpu_sched_component *perfmodel_component;
			
 
				+};
			
 
				+struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Staged pull Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component * starpu_sched_component_stage_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_stage(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name User-choice push Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_sched_component * starpu_sched_component_userchoice_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+int starpu_sched_component_is_userchoice(struct starpu_sched_component *component);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Recipe Component API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   parameters for starpu_sched_component_composed_component_create
			
 
				+*/
			
 
				+struct starpu_sched_component_composed_recipe;
			
 
				+
			
 
				+/**
			
 
				+   return an empty recipe for a composed component, it should not be used without modification
			
 
				+*/
			
 
				+struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   return a recipe to build a composed component with a \p create_component
			
 
				+*/
			
 
				+struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   add \p create_component under all previous components in recipe
			
 
				+*/
			
 
				+void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg);
			
 
				+
			
 
				+/**
			
 
				+   destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called
			
 
				+*/
			
 
				+void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *);
			
 
				+
			
 
				+/**
			
 
				+   create a component that behave as all component of recipe where linked. Except that you cant use starpu_sched_component_is_foo function
			
 
				+   if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component
			
 
				+*/
			
 
				+struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+/**
			
 
				+   Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be <c>NULL</c>, then
			
 
				+   the level is just skipped. Bugs everywhere, do not rely on.
			
 
				+*/
			
 
				+struct starpu_sched_component_specs
			
 
				+{
			
 
				+	/**
			
 
				+	   the composed component to put on the top of the scheduler
			
 
				+	   this member must not be <c>NULL</c> as it is the root of the topology
			
 
				+	*/
			
 
				+	struct starpu_sched_component_composed_recipe *hwloc_machine_composed_sched_component;
			
 
				+	/**
			
 
				+	   the composed component to put for each memory component
			
 
				+	*/
			
 
				+	struct starpu_sched_component_composed_recipe *hwloc_component_composed_sched_component;
			
 
				+	/**
			
 
				+	   the composed component to put for each socket
			
 
				+	*/
			
 
				+	struct starpu_sched_component_composed_recipe *hwloc_socket_composed_sched_component;
			
 
				+	/**
			
 
				+	   the composed component to put for each cache
			
 
				+	*/
			
 
				+	struct starpu_sched_component_composed_recipe *hwloc_cache_composed_sched_component;
			
 
				+
			
 
				+	/**
			
 
				+	   a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype.
			
 
				+	   <c>NULL</c> is a valid return value, then no component will be added on top
			
 
				+	*/
			
 
				+	struct starpu_sched_component_composed_recipe *(*worker_composed_sched_component)(enum starpu_worker_archtype archtype);
			
 
				+	/**
			
 
				+	   this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build
			
 
				+	   a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos.
			
 
				+	   If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same
			
 
				+	   numa component it will be shared. it indicates if heterogenous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node
			
 
				+	*/
			
 
				+	int mix_heterogeneous_workers;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+   build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine.
			
 
				+*/
			
 
				+struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s);
			
 
				+#endif /* STARPU_HAVE_HWLOC */
			
 
				+
			
 
				+/**
			
 
				+   @name Basic API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+#define STARPU_SCHED_SIMPLE_DECIDE_MASK		(3<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_DECIDE_WORKERS	(1<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_DECIDE_MEMNODES	(2<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_DECIDE_ARCHS	(3<<0)
			
 
				+
			
 
				+/**
			
 
				+   Request to create the scheduling decision-making component even if there is only one available choice. This is useful for instance when the decision-making component will store tasks itself (and not use STARPU_SCHED_SIMPLE_FIFO_ABOVE) to decide in which order tasks should be passed below.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_DECIDE_ALWAYS	(1<<3)
			
 
				+
			
 
				+/**
			
 
				+   Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated.
			
 
				+   In other words, this is needed when using a component which needs performance models for tasks.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_PERFMODEL		(1<<4)
			
 
				+
			
 
				+/**
			
 
				+   Request that a component be added just above workers, that chooses the best task implementation.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_IMPL		(1<<5)
			
 
				+
			
 
				+/**
			
 
				+   Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component.
			
 
				+
			
 
				+   This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_FIFO_ABOVE		(1<<6)
			
 
				+
			
 
				+/**
			
 
				+   Request that the fifo above be sorted by priorities
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO	(1<<7)
			
 
				+
			
 
				+/**
			
 
				+   Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers.
			
 
				+
			
 
				+   This is useful to be able to schedule a (tunable) small number of tasks in advance only.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_FIFOS_BELOW		(1<<8)
			
 
				+
			
 
				+/**
			
 
				+   Request that the fifos below be sorted by priorities
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO	(1<<9)
			
 
				+
			
 
				+/**
			
 
				+   Request that the fifos below be pulled rather ready tasks
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY	(1<<10)
			
 
				+
			
 
				+/**
			
 
				+   Request that work between workers using the same fifo below be distributed using a work stealing component.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_WS_BELOW		(1<<11)
			
 
				+
			
 
				+/**
			
 
				+   Request to not only choose between simple workers, but also choose between combined workers.
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_COMBINED_WORKERS	(1<<12)
			
 
				+
			
 
				+/**
			
 
				+   Request that the fifos below keep track of expected duration, start and end time of theirs elements
			
 
				+*/
			
 
				+#define STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP	(1<<13)
			
 
				+
			
 
				+/**
			
 
				+   Create a simple modular scheduler tree around a scheduling decision-making
			
 
				+   component \p component. The details of what should be built around \p component
			
 
				+   is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are
			
 
				+   mutually exclusive. \p data is passed to the \p create_decision_component
			
 
				+   function when creating the decision component.
			
 
				+*/
			
 
				+void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Create a simple modular scheduler tree around several scheduling decision-making
			
 
				+   components. The parameters are similar to
			
 
				+   starpu_sched_component_initialize_simple_scheduler, but per scheduling decision, for instance:
			
 
				+
			
 
				+   starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 2,
			
 
				+     create1, data1, flags1,
			
 
				+     create2, data2, flags2);
			
 
				+
			
 
				+   The different flags parameters must be coherent: same decision flags. They
			
 
				+   must not include the perfmodel flag (not supported yet).
			
 
				+*/
			
 
				+void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id, unsigned ndecisions, ...);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#define STARPU_COMPONENT_MUTEX_LOCK(m) \
			
 
				+do \
			
 
				+{ \
			
 
				+	const int _relaxed_state = starpu_worker_get_relax_state(); \
			
 
				+	if (!_relaxed_state) \
			
 
				+		starpu_worker_relax_on(); \
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK((m)); \
			
 
				+	if (!_relaxed_state) \
			
 
				+		starpu_worker_relax_off(); \
			
 
				+} \
			
 
				+while(0)
			
 
				+
			
 
				+#define STARPU_COMPONENT_MUTEX_TRYLOCK(m) STARPU_PTHREAD_MUTEX_TRYLOCK((m))
			
 
				+
			
 
				+#define STARPU_COMPONENT_MUTEX_UNLOCK(m) STARPU_PTHREAD_MUTEX_UNLOCK((m))
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_SCHED_COMPONENT_H__ */
			
--- a/include/starpu/1.3/starpu_sched_ctx.h
+++ b/include/starpu/1.3/starpu_sched_ctx.h
@@ -0,0 +1,438 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2016       Uppsala University
			
 
				+ * Copyright (C) 2017       Arthur Chevalier
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SCHED_CTX_H__
			
 
				+#define __STARPU_SCHED_CTX_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Scheduling_Contexts Scheduling Contexts
			
 
				+   @brief StarPU permits on one hand grouping workers in combined
			
 
				+   workers in order to execute a parallel task and on the other hand
			
 
				+   grouping tasks in bundles that will be executed by a single
			
 
				+   specified worker.
			
 
				+   In contrast when we group workers in scheduling contexts we submit
			
 
				+   starpu tasks to them and we schedule them with the policy assigned
			
 
				+   to the context. Scheduling contexts can be created, deleted and
			
 
				+   modified dynamically.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling Contexts Basic API
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify a
			
 
				+   name for a scheduling policy
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_POLICY_NAME		 (1<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify a
			
 
				+   pointer to a scheduling policy
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_POLICY_STRUCT		 (2<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify a
			
 
				+   minimum scheduler priority value.
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_POLICY_MIN_PRIO	 (3<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify a
			
 
				+   maximum scheduler priority value.
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_POLICY_MAX_PRIO	 (4<<16)
			
 
				+
			
 
				+#define STARPU_SCHED_CTX_HIERARCHY_LEVEL         (5<<16)
			
 
				+#define STARPU_SCHED_CTX_NESTED                  (6<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify ???
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_AWAKE_WORKERS           (7<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify a
			
 
				+   function pointer allowing to initialize the scheduling policy.
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_POLICY_INIT             (8<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify a
			
 
				+   pointer to some user data related to the context being created.
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_USER_DATA               (9<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() in order to create a
			
 
				+   context on the NVIDIA GPU to specify the number of SMs the context
			
 
				+   should have
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_CUDA_NSMS               (10<<16)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_sched_ctx_create() to specify
			
 
				+   a list of sub contexts of the current context.
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX_SUB_CTXS                (11<<16)
			
 
				+
			
 
				+/**
			
 
				+   Create a scheduling context with the given parameters
			
 
				+   (see below) and assign the workers in \p workerids_ctx to execute the
			
 
				+   tasks submitted to it. The return value represents the identifier of
			
 
				+   the context that has just been created. It will be further used to
			
 
				+   indicate the context the tasks will be submitted to. The return value
			
 
				+   should be at most ::STARPU_NMAX_SCHED_CTXS.
			
 
				+
			
 
				+   The arguments following the name of the scheduling context can be of
			
 
				+   the following types:
			
 
				+   <ul>
			
 
				+   <li> ::STARPU_SCHED_CTX_POLICY_NAME, followed by the name of a
			
 
				+   predefined scheduling policy. Use an empty string to create the
			
 
				+   context with the default scheduling policy.
			
 
				+   </li>
			
 
				+   <li> ::STARPU_SCHED_CTX_POLICY_STRUCT, followed by a pointer to a
			
 
				+   custom scheduling policy (struct starpu_sched_policy *)
			
 
				+   </li>
			
 
				+   <li> ::STARPU_SCHED_CTX_POLICY_MIN_PRIO, followed by a integer
			
 
				+   representing the minimum priority value to be defined for the
			
 
				+   scheduling policy.
			
 
				+   </li>
			
 
				+   <li> ::STARPU_SCHED_CTX_POLICY_MAX_PRIO, followed by a integer
			
 
				+   representing the maximum priority value to be defined for the
			
 
				+   scheduling policy.
			
 
				+   </li>
			
 
				+   <li> ::STARPU_SCHED_CTX_POLICY_INIT, followed by a function pointer
			
 
				+   (ie. void init_sched(void)) allowing to initialize the scheduling policy.
			
 
				+   </li>
			
 
				+   <li> ::STARPU_SCHED_CTX_USER_DATA, followed by a pointer
			
 
				+   to a custom user data structure, to be retrieved by \ref starpu_sched_ctx_get_user_data().
			
 
				+   </li>
			
 
				+   </ul>
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_create(int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name, ...);
			
 
				+
			
 
				+/**
			
 
				+   Create a context indicating an approximate interval of resources
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap);
			
 
				+
			
 
				+/**
			
 
				+   Execute the callback whenever the last task of the context finished
			
 
				+   executing, it is called with the parameters \p sched_ctx and any
			
 
				+   other parameter needed by the application (packed in \p args)
			
 
				+*/
			
 
				+void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args);
			
 
				+
			
 
				+/**
			
 
				+   Add dynamically the workers in \p workerids_ctx to the context \p
			
 
				+   sched_ctx_id. The last argument cannot be greater than
			
 
				+   ::STARPU_NMAX_SCHED_CTXS.
			
 
				+*/
			
 
				+void starpu_sched_ctx_add_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Remove the workers in \p workerids_ctx from the context
			
 
				+   \p sched_ctx_id. The last argument cannot be greater than
			
 
				+   ::STARPU_NMAX_SCHED_CTXS.
			
 
				+*/
			
 
				+void starpu_sched_ctx_remove_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Print on the file \p f the worker names belonging to the context \p
			
 
				+   sched_ctx_id
			
 
				+*/
			
 
				+void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f);
			
 
				+
			
 
				+/**
			
 
				+   Delete scheduling context \p sched_ctx_id and transfer remaining
			
 
				+   workers to the inheritor scheduling context.
			
 
				+*/
			
 
				+void starpu_sched_ctx_delete(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Indicate that the context \p inheritor will inherit the resources
			
 
				+   of the context \p sched_ctx_id when \p sched_ctx_id will be
			
 
				+   deleted.
			
 
				+*/
			
 
				+void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Set the scheduling context the subsequent tasks will be submitted
			
 
				+   to
			
 
				+*/
			
 
				+void starpu_sched_ctx_set_context(unsigned *sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the scheduling context the tasks are currently submitted to,
			
 
				+   or ::STARPU_NMAX_SCHED_CTXS if no default context has been defined
			
 
				+   by calling the function starpu_sched_ctx_set_context().
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_get_context(void);
			
 
				+
			
 
				+/**
			
 
				+   Stop submitting tasks from the empty context list until the next
			
 
				+   time the context has time to check the empty context list
			
 
				+*/
			
 
				+void starpu_sched_ctx_stop_task_submission(void);
			
 
				+
			
 
				+/**
			
 
				+   Indicate starpu that the application finished submitting to this
			
 
				+   context in order to move the workers to the inheritor as soon as
			
 
				+   possible.
			
 
				+*/
			
 
				+void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the list of workers in the array \p workerids, the return
			
 
				+   value is the number of workers. The user should free the \p
			
 
				+   workerids table after finishing using it (it is allocated inside
			
 
				+   the function with the proper size)
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids);
			
 
				+
			
 
				+/**
			
 
				+   Return the list of workers in the array \p workerids, the return
			
 
				+   value is the number of workers. This list is provided in raw order,
			
 
				+   i.e. not sorted by tree or list order, and the user should not free
			
 
				+   the \p workerids table. This function is thus much less costly than
			
 
				+   starpu_sched_ctx_get_workers_list().
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of workers managed by the specified context
			
 
				+   (Usually needed to verify if it manages any workers or if it should
			
 
				+   be blocked)
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of workers shared by two contexts.
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2);
			
 
				+
			
 
				+/**
			
 
				+   Return 1 if the worker belongs to the context and 0 otherwise
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the workerid if the worker belongs to the context and -1 otherwise.
			
 
				+   If the thread calling this function is not a worker the function returns -1
			
 
				+   as it calls the function starpu_worker_get_id().
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Check if a worker is shared between several contexts
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Return the user data pointer associated to the scheduling context.
			
 
				+*/
			
 
				+void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_ctx_set_user_data(unsigned sched_ctx_id, void* user_data);
			
 
				+
			
 
				+/**
			
 
				+   Allocate the scheduling policy data (private information of the
			
 
				+   scheduler like queues, variables, additional condition variables)
			
 
				+   the context
			
 
				+*/
			
 
				+void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data);
			
 
				+
			
 
				+/**
			
 
				+   Return the scheduling policy data (private information of the
			
 
				+   scheduler) of the contexts previously assigned to.
			
 
				+*/
			
 
				+void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
			
 
				+
			
 
				+struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Execute any parallel code on the workers of the sched_ctx (workers
			
 
				+   are blocked)
			
 
				+*/
			
 
				+void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id);
			
 
				+
			
 
				+int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id);
			
 
				+
			
 
				+double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid);
			
 
				+
			
 
				+void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid);
			
 
				+
			
 
				+void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid);
			
 
				+
			
 
				+void starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids);
			
 
				+
			
 
				+void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid);
			
 
				+
			
 
				+int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers);
			
 
				+
			
 
				+void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master);
			
 
				+
			
 
				+/**
			
 
				+   Return the first context (child of sched_ctx_id) where the workerid
			
 
				+   is master
			
 
				+ */
			
 
				+unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the context id of masterid if it master of a context. If
			
 
				+   not, return ::STARPU_NMAX_SCHED_CTXS.
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_master_get_context(int masterid);
			
 
				+
			
 
				+void starpu_sched_ctx_revert_task_counters_ctx_locked(unsigned sched_ctx_id, double flops);
			
 
				+
			
 
				+void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush);
			
 
				+
			
 
				+int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id);
			
 
				+
			
 
				+void (*starpu_sched_ctx_get_sched_policy_init(unsigned sched_ctx_id))(unsigned);
			
 
				+
			
 
				+unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers);
			
 
				+
			
 
				+int starpu_sched_ctx_get_stream_worker(unsigned sub_ctx);
			
 
				+int starpu_sched_ctx_get_nsms(unsigned sched_ctx);
			
 
				+void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling Context Priorities
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return the current minimum priority level supported by the
			
 
				+   scheduling policy of the given scheduler context.
			
 
				+*/
			
 
				+int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the current maximum priority level supported by the
			
 
				+   scheduling policy of the given scheduler context.
			
 
				+*/
			
 
				+int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Define the minimum task priority level supported by the scheduling
			
 
				+   policy of the given scheduler context. The default minimum priority
			
 
				+   level is the same as the default priority level which is 0 by
			
 
				+   convention. The application may access that value by calling the
			
 
				+   function starpu_sched_ctx_get_min_priority(). This function should
			
 
				+   only be called from the initialization method of the scheduling
			
 
				+   policy, and should not be used directly from the application.
			
 
				+*/
			
 
				+int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio);
			
 
				+
			
 
				+/**
			
 
				+   Define the maximum priority level supported by the scheduling
			
 
				+   policy of the given scheduler context. The default maximum priority
			
 
				+   level is 1. The application may access that value by calling the
			
 
				+   starpu_sched_ctx_get_max_priority() function. This function should
			
 
				+   only be called from the initialization method of the scheduling
			
 
				+   policy, and should not be used directly from the application.
			
 
				+*/
			
 
				+int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio);
			
 
				+
			
 
				+int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id);
			
 
				+
			
 
				+int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Provided for legacy reasons.
			
 
				+*/
			
 
				+#define STARPU_MIN_PRIO		(starpu_sched_get_min_priority())
			
 
				+
			
 
				+/**
			
 
				+   Provided for legacy reasons.
			
 
				+*/
			
 
				+#define STARPU_MAX_PRIO		(starpu_sched_get_max_priority())
			
 
				+
			
 
				+/**
			
 
				+   By convention, the default priority level should be 0 so that we
			
 
				+   can statically allocate tasks with a default priority.
			
 
				+*/
			
 
				+#define STARPU_DEFAULT_PRIO	0
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling Context Worker Collection
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Create a worker collection of the type indicated by the last
			
 
				+   parameter for the context specified through the first parameter.
			
 
				+*/
			
 
				+struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC;
			
 
				+
			
 
				+/**
			
 
				+   Delete the worker collection of the specified scheduling context
			
 
				+*/
			
 
				+void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Return the worker collection managed by the indicated context
			
 
				+*/
			
 
				+struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_SCHED_CTX_H__ */
			
--- a/include/starpu/1.3/starpu_sched_ctx_hypervisor.h
+++ b/include/starpu/1.3/starpu_sched_ctx_hypervisor.h
@@ -0,0 +1,109 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SCHED_CTX_HYPERVISOR_H__
			
 
				+#define __STARPU_SCHED_CTX_HYPERVISOR_H__
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @ingroup API_Scheduling_Contexts
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling Context Link with Hypervisor
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Performance counters used by the starpu to indicate the hypervisor
			
 
				+   how the application and the resources are executing.
			
 
				+ */
			
 
				+struct starpu_sched_ctx_performance_counters
			
 
				+{
			
 
				+	/**
			
 
				+	   Inform the hypervisor for how long a worker has been idle
			
 
				+	   in the specified context
			
 
				+	*/
			
 
				+	void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time);
			
 
				+
			
 
				+	/**
			
 
				+	   Inform the hypervisor that a task executing a specified
			
 
				+	   number of instructions has been poped from the worker
			
 
				+	*/
			
 
				+	void (*notify_poped_task)(unsigned sched_ctx_id, int worker);
			
 
				+
			
 
				+	/**
			
 
				+	   Notify the hypervisor that a task has been scheduled on
			
 
				+	   the queue of the worker corresponding to the specified
			
 
				+	   context
			
 
				+	*/
			
 
				+	void (*notify_pushed_task)(unsigned sched_ctx_id, int worker);
			
 
				+
			
 
				+	/**
			
 
				+	   Notify the hypervisor that a task has just been executed
			
 
				+	*/
			
 
				+	void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops);
			
 
				+
			
 
				+	/**
			
 
				+	   Notify the hypervisor that a task has just been submitted
			
 
				+	*/
			
 
				+	void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint, size_t data_size);
			
 
				+
			
 
				+	void (*notify_empty_ctx)(unsigned sched_ctx_id, struct starpu_task *task);
			
 
				+
			
 
				+	/**
			
 
				+	   Notify the hypervisor that the context was deleted
			
 
				+	*/
			
 
				+	void (*notify_delete_context)(unsigned sched_ctx);
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Indicate to starpu the pointer to the performance counter
			
 
				+*/
			
 
				+void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void *perf_counters);
			
 
				+
			
 
				+/**
			
 
				+   Callback that lets the scheduling policy tell the hypervisor that a
			
 
				+   task was pushed on a worker
			
 
				+*/
			
 
				+void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				+/**
			
 
				+   Allow the hypervisor to let starpu know it's initialised
			
 
				+*/
			
 
				+void starpu_sched_ctx_notify_hypervisor_exists(void);
			
 
				+
			
 
				+/**
			
 
				+   Ask starpu if it is informed if the hypervisor is initialised
			
 
				+*/
			
 
				+unsigned starpu_sched_ctx_check_if_hypervisor_exists(void);
			
 
				+
			
 
				+void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_SCHED_CTX_HYPERVISOR_H__ */
			
--- a/include/starpu/1.3/starpu_scheduler.h
+++ b/include/starpu/1.3/starpu_scheduler.h
@@ -0,0 +1,481 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ * Copyright (C) 2013       Thibaut Lambert
			
 
				+ * Copyright (C) 2016       Uppsala University
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SCHEDULER_H__
			
 
				+#define __STARPU_SCHEDULER_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Scheduling_Policy Scheduling Policy
			
 
				+   @brief TODO. While StarPU comes with a variety of scheduling
			
 
				+   policies (see \ref TaskSchedulingPolicy), it may sometimes be
			
 
				+   desirable to implement custom policies to address specific
			
 
				+   problems. The API described below allows users to write their own
			
 
				+   scheduling policy.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_task;
			
 
				+
			
 
				+/**
			
 
				+   Contain all the methods that implement a scheduling policy. An
			
 
				+   application may specify which scheduling strategy in the field
			
 
				+   starpu_conf::sched_policy passed to the function starpu_init().
			
 
				+
			
 
				+   For each task going through the scheduler, the following methods
			
 
				+   get called in the given order:
			
 
				+
			
 
				+   <ul>
			
 
				+   <li>starpu_sched_policy::submit_hook when the task is
			
 
				+   submitted</li>
			
 
				+   <li>starpu_sched_policy::push_task when the task becomes ready. The
			
 
				+   scheduler is here <b>given</b> the task</li>
			
 
				+   <li>starpu_sched_policy::pop_task when the worker is idle. The
			
 
				+   scheduler here <b>gives</b> back the task to the core. It must not
			
 
				+   access this task any more</li>
			
 
				+   <li>starpu_sched_policy::pre_exec_hook right before the worker
			
 
				+   actually starts the task computation (after transferring any
			
 
				+   missing data).</li>
			
 
				+   <li>starpu_sched_policy::post_exec_hook right after the worker
			
 
				+   actually completes the task computation.</li>
			
 
				+   </ul>
			
 
				+
			
 
				+   For each task not going through the scheduler (because
			
 
				+   starpu_task::execute_on_a_specific_worker was set), these get
			
 
				+   called:
			
 
				+
			
 
				+   <ul>
			
 
				+   <li>starpu_sched_policy::submit_hook when the task is
			
 
				+   submitted</li>
			
 
				+   <li>starpu_sched_policy::push_task_notify when the task becomes
			
 
				+   ready. This is just a notification, the scheduler does not have to
			
 
				+   do anything about the task.</li>
			
 
				+   <li>starpu_sched_policy::pre_exec_hook right before the worker
			
 
				+   actually starts the task computation (after transferring any
			
 
				+   missing data).</li>
			
 
				+   <li>starpu_sched_policy::post_exec_hook right after the worker
			
 
				+   actually completes the task computation.</li>
			
 
				+   </ul>
			
 
				+*/
			
 
				+struct starpu_sched_policy
			
 
				+{
			
 
				+	/**
			
 
				+	   Initialize the scheduling policy, called before any other
			
 
				+	   method.
			
 
				+	*/
			
 
				+	void (*init_sched)(unsigned sched_ctx_id);
			
 
				+	/**
			
 
				+	   Cleanup the scheduling policy
			
 
				+	*/
			
 
				+	void (*deinit_sched)(unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Insert a task into the scheduler, called when the task
			
 
				+	   becomes ready for execution. This must call
			
 
				+	   starpu_push_task_end() once it has effectively pushed the
			
 
				+	   task to a queue (to note the time when this was done in the
			
 
				+	   task), but before releasing mutexes (so that the task
			
 
				+	   hasn't been already taken by a worker).
			
 
				+	*/
			
 
				+	int (*push_task)(struct starpu_task *);
			
 
				+
			
 
				+	double (*simulate_push_task)(struct starpu_task *);
			
 
				+
			
 
				+	/**
			
 
				+	   Notify the scheduler that a task was pushed on a given
			
 
				+	   worker. This method is called when a task that was
			
 
				+	   explicitly assigned to a worker becomes ready and is about
			
 
				+	   to be executed by the worker. This method therefore permits
			
 
				+	   to keep the state of the scheduler coherent even when
			
 
				+	   StarPU bypasses the scheduling strategy.
			
 
				+
			
 
				+	   Note: to get an estimation of the task duration, \p perf_workerid
			
 
				+	   needs to be used rather than \p workerid, for the case of parallel
			
 
				+	   tasks.
			
 
				+	*/
			
 
				+	void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Get a task from the scheduler.
			
 
				+	   If this method returns NULL, the worker will start
			
 
				+	   sleeping. If later on some task are pushed for this worker,
			
 
				+	   starpu_wake_worker() must be called to wake the worker so
			
 
				+	   it can call the pop_task() method again.
			
 
				+	   The mutex associated to the worker is already taken when
			
 
				+	   this method is called. This method may release it (e.g. for
			
 
				+	   scalability reasons when doing work stealing), but it must
			
 
				+	   acquire it again before taking the decision whether to
			
 
				+	   return a task or NULL, so the atomicity of deciding to
			
 
				+	   return NULL and making the worker actually sleep is
			
 
				+	   preserved. Otherwise in simgrid or blocking driver mode the
			
 
				+	   worker might start sleeping while a task has just been
			
 
				+	   pushed for it.
			
 
				+	   If this method is defined as <c>NULL</c>, the worker will
			
 
				+	   only execute tasks from its local queue. In this case, the
			
 
				+	   push_task method should use the starpu_push_local_task
			
 
				+	   method to assign tasks to the different workers.
			
 
				+	*/
			
 
				+	struct starpu_task *(*pop_task)(unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Remove all available tasks from the scheduler (tasks are
			
 
				+	   chained by the means of the field starpu_task::prev and
			
 
				+	   starpu_task::next). The mutex associated to the worker is
			
 
				+	   already taken when this method is called. This is currently
			
 
				+	   not used and can be discarded.
			
 
				+	*/
			
 
				+	struct starpu_task *(*pop_every_task)(unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field. This method is called when a task is
			
 
				+	   submitted.
			
 
				+	*/
			
 
				+	void (*submit_hook)(struct starpu_task *task);
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field. This method is called every time a task is
			
 
				+	   starting.
			
 
				+	*/
			
 
				+	void (*pre_exec_hook)(struct starpu_task *, unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field. This method is called every time a task has
			
 
				+	   been executed.
			
 
				+	*/
			
 
				+	void (*post_exec_hook)(struct starpu_task *, unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field. This method is called when it is a good
			
 
				+	   time to start scheduling tasks. This is notably called when
			
 
				+	   the application calls starpu_task_wait_for_all() or
			
 
				+	   starpu_do_schedule() explicitly.
			
 
				+	*/
			
 
				+	void (*do_schedule)(unsigned sched_ctx_id);
			
 
				+
			
 
				+	/**
			
 
				+	   Initialize scheduling structures corresponding to each
			
 
				+	   worker used by the policy.
			
 
				+	*/
			
 
				+	void (*add_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
			
 
				+
			
 
				+	/**
			
 
				+	   Deinitialize scheduling structures corresponding to each
			
 
				+	   worker used by the policy.
			
 
				+	*/
			
 
				+	void (*remove_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers);
			
 
				+
			
 
				+	/** Whether this scheduling policy does data prefetching, and thus the
			
 
				+	    core should not try to do it opportunistically.
			
 
				+	*/
			
 
				+	int prefetches;
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field. Name of the policy.
			
 
				+	*/
			
 
				+	const char *policy_name;
			
 
				+
			
 
				+	/**
			
 
				+	   Optional field. Human readable description of the policy.
			
 
				+	*/
			
 
				+	const char *policy_description;
			
 
				+
			
 
				+	enum starpu_worker_collection_type worker_type;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Return an <c>NULL</c>-terminated array of all the predefined
			
 
				+   scheduling policies.
			
 
				+*/
			
 
				+struct starpu_sched_policy **starpu_sched_get_predefined_policies();
			
 
				+
			
 
				+/**
			
 
				+   When there is no available task for a worker, StarPU blocks this
			
 
				+   worker on a condition variable. This function specifies which
			
 
				+   condition variable (and the associated mutex) should be used to
			
 
				+   block (and to wake up) a worker. Note that multiple workers may use
			
 
				+   the same condition variable. For instance, in the case of a
			
 
				+   scheduling strategy with a single task queue, the same condition
			
 
				+   variable would be used to block and wake up all workers.
			
 
				+*/
			
 
				+void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond);
			
 
				+
			
 
				+unsigned long starpu_task_get_job_id(struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   TODO: check if this is correct
			
 
				+   Return the current minimum priority level supported by the scheduling
			
 
				+   policy
			
 
				+*/
			
 
				+int starpu_sched_get_min_priority(void);
			
 
				+
			
 
				+/**
			
 
				+   TODO: check if this is correct
			
 
				+   Return the current maximum priority level supported by the
			
 
				+   scheduling policy
			
 
				+*/
			
 
				+int starpu_sched_get_max_priority(void);
			
 
				+
			
 
				+/**
			
 
				+   TODO: check if this is correct
			
 
				+   Define the minimum task priority level supported by the scheduling
			
 
				+   policy. The default minimum priority level is the same as the
			
 
				+   default priority level which is 0 by convention. The application
			
 
				+   may access that value by calling the function
			
 
				+   starpu_sched_get_min_priority(). This function should only be
			
 
				+   called from the initialization method of the scheduling policy, and
			
 
				+   should not be used directly from the application.
			
 
				+*/
			
 
				+int starpu_sched_set_min_priority(int min_prio);
			
 
				+
			
 
				+/**
			
 
				+   TODO: check if this is correct
			
 
				+   Define the maximum priority level supported by the scheduling
			
 
				+   policy. The default maximum priority level is 1. The application
			
 
				+   may access that value by calling the function
			
 
				+   starpu_sched_get_max_priority(). This function should only be
			
 
				+   called from the initialization method of the scheduling policy, and
			
 
				+   should not be used directly from the application.
			
 
				+*/
			
 
				+int starpu_sched_set_max_priority(int max_prio);
			
 
				+
			
 
				+/**
			
 
				+   Check if the worker specified by workerid can execute the codelet.
			
 
				+   Schedulers need to call it before assigning a task to a worker,
			
 
				+   otherwise the task may fail to execute.
			
 
				+*/
			
 
				+int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Check if the worker specified by workerid can execute the codelet
			
 
				+   and return which implementation numbers can be used.
			
 
				+   Schedulers need to call it before assigning a task to a worker,
			
 
				+   otherwise the task may fail to execute.
			
 
				+   This should be preferred rather than calling
			
 
				+   starpu_worker_can_execute_task() for each and every implementation.
			
 
				+   It can also be used with <c>impl_mask == NULL</c> to check for at
			
 
				+   least one implementation without determining which.
			
 
				+*/
			
 
				+int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask);
			
 
				+
			
 
				+/**
			
 
				+   Check if the worker specified by workerid can execute the codelet
			
 
				+   and return the first implementation which can be used.
			
 
				+   Schedulers need to call it before assigning a task to a worker,
			
 
				+   otherwise the task may fail to execute. This should be preferred
			
 
				+   rather than calling starpu_worker_can_execute_task() for
			
 
				+   each and every implementation. It can also be used with
			
 
				+   <c>impl_mask == NULL</c> to check for at least one implementation
			
 
				+   without determining which.
			
 
				+*/
			
 
				+int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl);
			
 
				+
			
 
				+/**
			
 
				+   The scheduling policy may put tasks directly into a worker’s local
			
 
				+   queue so that it is not always necessary to create its own queue
			
 
				+   when the local queue is sufficient. If \p back is not 0, \p task is
			
 
				+   put at the back of the queue where the worker will pop tasks first.
			
 
				+   Setting \p back to 0 therefore ensures a FIFO ordering.
			
 
				+*/
			
 
				+int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
			
 
				+
			
 
				+/**
			
 
				+   Must be called by a scheduler to notify that the given
			
 
				+   task has just been pushed.
			
 
				+*/
			
 
				+int starpu_push_task_end(struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Whether \ref STARPU_PREFETCH was set
			
 
				+*/
			
 
				+int starpu_get_prefetch_flag(void);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p node with a given
			
 
				+   priority
			
 
				+*/
			
 
				+int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p node
			
 
				+*/
			
 
				+int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p node when the bus is
			
 
				+   idle with a given priority
			
 
				+*/
			
 
				+int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p node when the bus is
			
 
				+   idle
			
 
				+*/
			
 
				+int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p worker with a given
			
 
				+   priority
			
 
				+*/
			
 
				+int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p worker
			
 
				+*/
			
 
				+int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p worker when the bus
			
 
				+   is idle with a given priority
			
 
				+*/
			
 
				+int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio);
			
 
				+
			
 
				+/**
			
 
				+   Prefetch data for a given p task on a given p worker when the bus
			
 
				+   is idle
			
 
				+*/
			
 
				+int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker);
			
 
				+
			
 
				+/**
			
 
				+   Return the footprint for a given task, taking into account
			
 
				+   user-provided perfmodel footprint or size_base functions.
			
 
				+*/
			
 
				+uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Return the raw footprint for the data of a given task (without
			
 
				+   taking into account user-provided functions).
			
 
				+*/
			
 
				+uint32_t starpu_task_data_footprint(struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Return expected task duration in micro-seconds.
			
 
				+*/
			
 
				+double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Same as starpu_task_expected_length() but for a precise worker.
			
 
				+*/
			
 
				+double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Return an estimated speedup factor relative to CPU speed
			
 
				+*/
			
 
				+double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch);
			
 
				+
			
 
				+/**
			
 
				+   Return expected data transfer time in micro-seconds for the given \p
			
 
				+   memory_node. Prefer using starpu_task_expected_data_transfer_time_for() which is
			
 
				+   more precise.
			
 
				+*/
			
 
				+double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Return expected data transfer time in micro-seconds for the given
			
 
				+   \p worker.
			
 
				+*/
			
 
				+double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker);
			
 
				+
			
 
				+/**
			
 
				+   Predict the transfer time (in micro-seconds) to move \p handle to a
			
 
				+   memory node
			
 
				+*/
			
 
				+double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode);
			
 
				+
			
 
				+/**
			
 
				+   Return expected energy consumption in J
			
 
				+*/
			
 
				+double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Same as starpu_task_expected_energy but for a precise worker
			
 
				+*/
			
 
				+double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Return expected conversion time in ms (multiformat interface only)
			
 
				+*/
			
 
				+double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				+
			
 
				+typedef void (*starpu_notify_ready_soon_func)(void *data, struct starpu_task *task, double delay);
			
 
				+
			
 
				+/**
			
 
				+   Register a callback to be called when it is determined when a task
			
 
				+   will be ready an estimated amount of time from now, because its
			
 
				+   last dependency has just started and we know how long it will take.
			
 
				+*/
			
 
				+void starpu_task_notify_ready_soon_register(starpu_notify_ready_soon_func f, void *data);
			
 
				+
			
 
				+/**
			
 
				+   The scheduling policies indicates if the worker may pop tasks from
			
 
				+   the list of other workers or if there is a central list with task
			
 
				+   for all the workers
			
 
				+*/
			
 
				+void starpu_sched_ctx_worker_shares_tasks_lists(int workerid, int sched_ctx_id);
			
 
				+
			
 
				+void starpu_sched_task_break(struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   @name Worker operations
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Wake up \p workerid while temporarily entering the current worker
			
 
				+   relax state if needed during the waiting process. Return 1 if \p
			
 
				+   workerid has been woken up or its state_keep_awake flag has been
			
 
				+   set to \c 1, and \c 0 otherwise (if \p workerid was not in the
			
 
				+   STATE_SLEEPING or in the STATE_SCHEDULING).
			
 
				+*/
			
 
				+int starpu_wake_worker_relax(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Must be called to wake up a worker that is sleeping on the cond.
			
 
				+   Return 0 whenever the worker is not in a sleeping state or has the
			
 
				+   state_keep_awake flag on.
			
 
				+*/
			
 
				+int starpu_wake_worker_no_relax(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Version of starpu_wake_worker_no_relax() which assumes that the
			
 
				+   sched mutex is locked
			
 
				+*/
			
 
				+int starpu_wake_worker_locked(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Light version of starpu_wake_worker_relax() which, when possible,
			
 
				+   speculatively set keep_awake on the target worker without waiting
			
 
				+   for the worker to enter the relax state.
			
 
				+*/
			
 
				+int starpu_wake_worker_relax_light(int workerid);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_SCHEDULER_H__ */
			
--- a/include/starpu/1.3/starpu_simgrid_wrap.h
+++ b/include/starpu/1.3/starpu_simgrid_wrap.h
@@ -0,0 +1,28 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SIMGRID_WRAP_H__
			
 
				+#define __STARPU_SIMGRID_WRAP_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+#ifndef main
			
 
				+#define main starpu_main
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_SIMGRID_WRAP_H__ */
			
--- a/include/starpu/1.3/starpu_sink.h
+++ b/include/starpu/1.3/starpu_sink.h
@@ -0,0 +1,29 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_SINK_H__
			
 
				+#define __STARPU_SINK_H__
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Sink Sink
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+void starpu_sink_common_worker(int argc, char **argv);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#endif /* __STARPU_SINK_H__ */
			
--- a/include/starpu/1.3/starpu_stdlib.h
+++ b/include/starpu/1.3/starpu_stdlib.h
@@ -0,0 +1,274 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_STDLIB_H__
			
 
				+#define __STARPU_STDLIB_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Standard_Memory_Library Standard Memory Library
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Value passed to the function starpu_malloc_flags() to indicate the
			
 
				+   memory allocation should be pinned.
			
 
				+*/
			
 
				+#define STARPU_MALLOC_PINNED	((1ULL)<<1)
			
 
				+
			
 
				+/**
			
 
				+   Value passed to the function starpu_malloc_flags() to indicate the
			
 
				+   memory allocation should be in the limit defined by the environment
			
 
				+   variables \ref STARPU_LIMIT_CUDA_devid_MEM, \ref
			
 
				+   STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_OPENCL_devid_MEM, \ref
			
 
				+   STARPU_LIMIT_OPENCL_MEM and \ref STARPU_LIMIT_CPU_MEM (see Section
			
 
				+   \ref HowToLimitMemoryPerNode).
			
 
				+   If no memory is available, it tries to reclaim memory from StarPU.
			
 
				+   Memory allocated this way needs to be freed by calling the function
			
 
				+   starpu_free_flags() with the same flag.
			
 
				+*/
			
 
				+#define STARPU_MALLOC_COUNT	((1ULL)<<2)
			
 
				+
			
 
				+/**
			
 
				+   Value passed to the function starpu_malloc_flags() along
			
 
				+   ::STARPU_MALLOC_COUNT to indicate that while the memory allocation
			
 
				+   should be kept in the limits defined for ::STARPU_MALLOC_COUNT, no
			
 
				+   reclaiming should be performed by starpu_malloc_flags() itself,
			
 
				+   thus potentially overflowing the memory node a bit. StarPU will
			
 
				+   reclaim memory after next task termination, according to the \ref
			
 
				+   STARPU_MINIMUM_AVAILABLE_MEM, \ref STARPU_TARGET_AVAILABLE_MEM,
			
 
				+   \ref STARPU_MINIMUM_CLEAN_BUFFERS, and \ref
			
 
				+   STARPU_TARGET_CLEAN_BUFFERS environment variables. If
			
 
				+   ::STARPU_MEMORY_WAIT is set, no overflowing will happen,
			
 
				+   starpu_malloc_flags() will wait for other eviction mechanisms to
			
 
				+   release enough memory.
			
 
				+*/
			
 
				+#define STARPU_MALLOC_NORECLAIM	((1ULL)<<3)
			
 
				+
			
 
				+/**
			
 
				+   Value passed to starpu_memory_allocate() to specify that the
			
 
				+   function should wait for the requested amount of memory to become
			
 
				+   available, and atomically allocate it.
			
 
				+*/
			
 
				+#define STARPU_MEMORY_WAIT	((1ULL)<<4)
			
 
				+
			
 
				+/**
			
 
				+   Value passed to starpu_memory_allocate() to specify that the
			
 
				+   function should allocate the amount of memory, even if that means
			
 
				+   overflowing the total size of the memory node.
			
 
				+*/
			
 
				+#define STARPU_MEMORY_OVERFLOW	((1ULL)<<5)
			
 
				+
			
 
				+/**
			
 
				+   Value passed to the function starpu_malloc_flags() to indicate that
			
 
				+   when StarPU is using simgrid, the allocation can be "folded", i.e.
			
 
				+   a memory area is allocated, but its content is actually a replicate
			
 
				+   of the same memory area, to avoid having to actually allocate that
			
 
				+   much memory . This thus allows to have a memory area that does not
			
 
				+   actually consumes memory, to which one can read from and write to
			
 
				+   normally, but get bogus values.
			
 
				+*/
			
 
				+#define STARPU_MALLOC_SIMULATION_FOLDED	((1ULL)<<6)
			
 
				+
			
 
				+/**
			
 
				+   @deprecated
			
 
				+   Equivalent to starpu_malloc(). This macro is provided to avoid
			
 
				+   breaking old codes.
			
 
				+*/
			
 
				+#define starpu_data_malloc_pinned_if_possible	starpu_malloc
			
 
				+
			
 
				+/**
			
 
				+   @deprecated
			
 
				+   Equivalent to starpu_free(). This macro is provided to avoid
			
 
				+   breaking old codes.
			
 
				+*/
			
 
				+#define starpu_data_free_pinned_if_possible	starpu_free
			
 
				+
			
 
				+/**
			
 
				+   Set an alignment constraints for starpu_malloc() allocations. \p
			
 
				+   align must be a power of two. This is for instance called
			
 
				+   automatically by the OpenCL driver to specify its own alignment
			
 
				+   constraints.
			
 
				+*/
			
 
				+void starpu_malloc_set_align(size_t align);
			
 
				+
			
 
				+/**
			
 
				+   Allocate data of the given size \p dim in main memory, and return
			
 
				+   the pointer to the allocated data through \p A. It will also try to
			
 
				+   pin it in CUDA or OpenCL, so that data transfers from this buffer
			
 
				+   can be asynchronous, and thus permit data transfer and computation
			
 
				+   overlapping. The allocated buffer must be freed thanks to the
			
 
				+   starpu_free() function.
			
 
				+*/
			
 
				+int starpu_malloc(void **A, size_t dim);
			
 
				+
			
 
				+/**
			
 
				+   Free memory which has previously been allocated with
			
 
				+   starpu_malloc().
			
 
				+*/
			
 
				+int starpu_free(void *A);
			
 
				+
			
 
				+/**
			
 
				+   Perform a memory allocation based on the constraints defined by the
			
 
				+   given flag.
			
 
				+*/
			
 
				+int starpu_malloc_flags(void **A, size_t dim, int flags);
			
 
				+
			
 
				+/**
			
 
				+   Free memory by specifying its size. The given flags should be
			
 
				+   consistent with the ones given to starpu_malloc_flags() when
			
 
				+   allocating the memory.
			
 
				+*/
			
 
				+int starpu_free_flags(void *A, size_t dim, int flags);
			
 
				+
			
 
				+typedef int (*starpu_malloc_hook)(unsigned dst_node, void **A, size_t dim, int flags);
			
 
				+typedef int (*starpu_free_hook)(unsigned dst_node, void *A, size_t dim, int flags);
			
 
				+
			
 
				+/**
			
 
				+   Set allocation functions to be used by StarPU. By default, StarPU
			
 
				+   will use \c malloc() (or \c cudaHostAlloc() if CUDA GPUs are used)
			
 
				+   for all its data handle allocations. The application can specify
			
 
				+   another allocation primitive by calling this. The malloc_hook
			
 
				+   should pass the allocated pointer through the \c A parameter, and
			
 
				+   return 0 on success. On allocation failure, it should return
			
 
				+   -ENOMEM. The \c flags parameter contains ::STARPU_MALLOC_PINNED if
			
 
				+   the memory should be pinned by the hook for GPU transfer
			
 
				+   efficiency. The hook can use starpu_memory_pin() to achieve this.
			
 
				+   The \c dst_node parameter is the starpu memory node, one can
			
 
				+   convert it to an hwloc logical id with
			
 
				+   starpu_memory_nodes_numa_id_to_hwloclogid() or to an OS NUMA number
			
 
				+   with starpu_memory_nodes_numa_devid_to_id().
			
 
				+*/
			
 
				+void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook);
			
 
				+
			
 
				+/**
			
 
				+   Pin the given memory area, so that CPU-GPU transfers can be done
			
 
				+   asynchronously with DMAs. The memory must be unpinned with
			
 
				+   starpu_memory_unpin() before being freed. Return 0 on success, -1
			
 
				+   on error.
			
 
				+*/
			
 
				+int starpu_memory_pin(void *addr, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   Unpin the given memory area previously pinned with
			
 
				+   starpu_memory_pin(). Return 0 on success, -1 on error.
			
 
				+*/
			
 
				+int starpu_memory_unpin(void *addr, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   If a memory limit is defined on the given node (see Section \ref
			
 
				+   HowToLimitMemoryPerNode), return the amount of total memory on the
			
 
				+   node. Otherwise return -1.
			
 
				+*/
			
 
				+starpu_ssize_t starpu_memory_get_total(unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   If a memory limit is defined on the given node (see Section \ref
			
 
				+   HowToLimitMemoryPerNode), return the amount of available memory on
			
 
				+   the node. Otherwise return -1.
			
 
				+*/
			
 
				+starpu_ssize_t starpu_memory_get_available(unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   Return the amount of total memory on all memory nodes for whose a
			
 
				+   memory limit is defined (see Section \ref HowToLimitMemoryPerNode).
			
 
				+*/
			
 
				+starpu_ssize_t starpu_memory_get_total_all_nodes(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the amount of available memory on all memory nodes for whose
			
 
				+   a memory limit is defined (see Section \ref
			
 
				+   HowToLimitMemoryPerNode).
			
 
				+*/
			
 
				+starpu_ssize_t starpu_memory_get_available_all_nodes(void);
			
 
				+
			
 
				+/**
			
 
				+   If a memory limit is defined on the given node (see Section \ref
			
 
				+   HowToLimitMemoryPerNode), try to allocate some of it. This does not
			
 
				+   actually allocate memory, but only accounts for it. This can be
			
 
				+   useful when the application allocates data another way, but want
			
 
				+   StarPU to be aware of the allocation size e.g. for memory
			
 
				+   reclaiming.
			
 
				+   By default, return <c>-ENOMEM</c> if there is not enough room on
			
 
				+   the given node. \p flags can be either ::STARPU_MEMORY_WAIT or
			
 
				+   ::STARPU_MEMORY_OVERFLOW to change this.
			
 
				+*/
			
 
				+int starpu_memory_allocate(unsigned node, size_t size, int flags);
			
 
				+
			
 
				+/**
			
 
				+   If a memory limit is defined on the given node (see Section \ref
			
 
				+   HowToLimitMemoryPerNode), free some of it. This does not actually
			
 
				+   free memory, but only accounts for it, like
			
 
				+   starpu_memory_allocate(). The amount does not have to be exactly
			
 
				+   the same as what was passed to starpu_memory_allocate(), only the
			
 
				+   eventual amount needs to be the same, i.e. one call to
			
 
				+   starpu_memory_allocate() can be followed by several calls to
			
 
				+   starpu_memory_deallocate() to declare the deallocation piece by
			
 
				+   piece.
			
 
				+*/
			
 
				+void starpu_memory_deallocate(unsigned node, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   If a memory limit is defined on the given node (see Section \ref
			
 
				+   HowToLimitMemoryPerNode), this will wait for \p size bytes to
			
 
				+   become available on \p node. Of course, since another thread may be
			
 
				+   allocating memory concurrently, this does not necessarily mean that
			
 
				+   this amount will be actually available, just that it was reached.
			
 
				+   To atomically wait for some amount of memory and reserve it,
			
 
				+   starpu_memory_allocate() should be used with the
			
 
				+   ::STARPU_MEMORY_WAIT flag.
			
 
				+*/
			
 
				+void starpu_memory_wait_available(unsigned node, size_t size);
			
 
				+
			
 
				+/**
			
 
				+   Sleep for the given \p nb_sec seconds.
			
 
				+   In simgrid mode, this only sleeps within virtual time.
			
 
				+  */
			
 
				+void starpu_sleep(float nb_sec);
			
 
				+
			
 
				+/**
			
 
				+   Sleep for the given \p nb_micro_sec micro-seconds.
			
 
				+   In simgrid mode, this only sleeps within virtual time.
			
 
				+  */
			
 
				+void starpu_usleep(float nb_micro_sec);
			
 
				+
			
 
				+/**
			
 
				+   Account for \p joules J being used.
			
 
				+   This is support in simgrid mode, to record how much energy was used, and will
			
 
				+   show up in further call to starpu_energy_used().
			
 
				+  */
			
 
				+void starpu_energy_use(float joules);
			
 
				+
			
 
				+/**
			
 
				+   Return the amount of energy having been used in J.
			
 
				+   This account the amounts passed to starpu_energy_use(), but also the static
			
 
				+   energy use set by the \ref STARPU_IDLE_POWER environment variable.
			
 
				+  */
			
 
				+double starpu_energy_used(void);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_STDLIB_H__ */
			
--- a/include/starpu/1.3/starpu_task.h
+++ b/include/starpu/1.3/starpu_task.h
--- a/include/starpu/1.3/starpu_task_bundle.h
+++ b/include/starpu/1.3/starpu_task_bundle.h
@@ -0,0 +1,97 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_TASK_BUNDLE_H__
			
 
				+#define __STARPU_TASK_BUNDLE_H__
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Task_Bundles Task Bundles
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_task;
			
 
				+struct starpu_perfmodel_arch;
			
 
				+
			
 
				+/**
			
 
				+   Opaque structure describing a list of tasks that should be
			
 
				+   scheduled on the same worker whenever it’s possible. It must be
			
 
				+   considered as a hint given to the scheduler as there is no
			
 
				+   guarantee that they will be executed on the same worker.
			
 
				+*/
			
 
				+typedef struct _starpu_task_bundle *starpu_task_bundle_t;
			
 
				+
			
 
				+/**
			
 
				+   Factory function creating and initializing \p bundle, when the call
			
 
				+   returns, memory needed is allocated and \p bundle is ready to use.
			
 
				+*/
			
 
				+void starpu_task_bundle_create(starpu_task_bundle_t *bundle);
			
 
				+
			
 
				+/**
			
 
				+   Insert \p task in \p bundle. Until \p task is removed from \p
			
 
				+   bundle its expected length and data transfer time will be
			
 
				+   considered along those of the other tasks of bundle. This function
			
 
				+   must not be called if \p bundle is already closed and/or \p task is
			
 
				+   already submitted. On success, it returns 0. There are two cases of
			
 
				+   error : if \p bundle is already closed it returns <c>-EPERM</c>, if
			
 
				+   \p task was already submitted it returns <c>-EINVAL</c>.
			
 
				+*/
			
 
				+int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Remove \p task from \p bundle. Of course \p task must have been
			
 
				+   previously inserted in \p bundle. This function must not be called
			
 
				+   if \p bundle is already closed and/or \p task is already submitted.
			
 
				+   Doing so would result in undefined behaviour. On success, it
			
 
				+   returns 0. If \p bundle is already closed it returns
			
 
				+   <c>-ENOENT</c>.
			
 
				+*/
			
 
				+int starpu_task_bundle_remove(starpu_task_bundle_t bundle, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Inform the runtime that the user will not modify \p bundle anymore,
			
 
				+   it means no more inserting or removing task. Thus the runtime can
			
 
				+   destroy it when possible.
			
 
				+*/
			
 
				+void starpu_task_bundle_close(starpu_task_bundle_t bundle);
			
 
				+
			
 
				+/**
			
 
				+   Return the expected duration of \p bundle in micro-seconds.
			
 
				+*/
			
 
				+double starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Return the time (in micro-seconds) expected to transfer all data used within \p bundle.
			
 
				+*/
			
 
				+double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node);
			
 
				+
			
 
				+/**
			
 
				+   Return the expected energy consumption of \p bundle in J.
			
 
				+*/
			
 
				+double starpu_task_bundle_expected_energy(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch *arch, unsigned nimpl);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_TASK_BUNDLE_H__ */
			
--- a/include/starpu/1.3/starpu_task_dep.h
+++ b/include/starpu/1.3/starpu_task_dep.h
@@ -0,0 +1,221 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2011       Télécom-SudParis
			
 
				+ * Copyright (C) 2016       Uppsala University
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_TASK_DEP_H__
			
 
				+#define __STARPU_TASK_DEP_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Explicit_Dependencies Explicit Dependencies
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Declare task dependencies between a \p task and an array of tasks
			
 
				+   of length \p ndeps. This function must be called prior to the
			
 
				+   submission of the task, but it may called after the submission or
			
 
				+   the execution of the tasks in the array, provided the tasks are
			
 
				+   still valid (i.e. they were not automatically destroyed). Calling
			
 
				+   this function on a task that was already submitted or with an entry
			
 
				+   of \p task_array that is no longer a valid task results in an
			
 
				+   undefined behaviour. If \p ndeps is 0, no dependency is added. It
			
 
				+   is possible to call starpu_task_declare_deps_array() several times
			
 
				+   on the same task, in this case, the dependencies are added. It is
			
 
				+   possible to have redundancy in the task dependencies.
			
 
				+*/
			
 
				+void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
			
 
				+
			
 
				+/**
			
 
				+   Declare task dependencies between a \p task and an series of \p
			
 
				+   ndeps tasks, similarly to starpu_task_declare_deps_array(), but the
			
 
				+   tasks are passed after \p ndeps, which indicates how many tasks \p
			
 
				+   task shall be made to depend on. If \p ndeps is 0, no dependency is
			
 
				+   added.
			
 
				+*/
			
 
				+void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...);
			
 
				+
			
 
				+/**
			
 
				+   Declare task end dependencies between a \p task and an array of
			
 
				+   tasks of length \p ndeps. \p task will appear as terminated not
			
 
				+   only when \p task is termination, but also when the tasks of \p
			
 
				+   task_array have terminated. This function must be called prior to
			
 
				+   the termination of the task, but it may called after the submission
			
 
				+   or the execution of the tasks in the array, provided the tasks are
			
 
				+   still valid (i.e. they were not automatically destroyed). Calling
			
 
				+   this function on a task that was already terminated or with an
			
 
				+   entry of \p task_array that is no longer a valid task results in an
			
 
				+   undefined behaviour. If \p ndeps is 0, no dependency is added. It
			
 
				+   is possible to call starpu_task_declare_end_deps_array() several
			
 
				+   times on the same task, in this case, the dependencies are added.
			
 
				+   It is currently not implemented to have redundancy in the task
			
 
				+   dependencies.
			
 
				+*/
			
 
				+void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
			
 
				+
			
 
				+/**
			
 
				+   Declare task end dependencies between a \p task and an series of \p
			
 
				+   ndeps tasks, similarly to starpu_task_declare_end_deps_array(), but
			
 
				+   the tasks are passed after \p ndeps, which indicates how many tasks
			
 
				+   \p task 's termination shall be made to depend on. If \p ndeps is
			
 
				+   0, no dependency is added.
			
 
				+*/
			
 
				+void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...);
			
 
				+
			
 
				+/**
			
 
				+   Fill \p task_array with the list of tasks which are direct children
			
 
				+   of \p task. \p ndeps is the size of \p task_array.  This function
			
 
				+   returns the number of direct children. \p task_array can be set to
			
 
				+   <c>NULL</c> if \p ndeps is 0, which allows to compute the number of
			
 
				+   children before allocating an array to store them. This function
			
 
				+   can only be called if \p task has not completed yet, otherwise the
			
 
				+   results are undefined. The result may also be outdated if some
			
 
				+   additional dependency has been added in the meanwhile.
			
 
				+*/
			
 
				+int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
			
 
				+
			
 
				+/**
			
 
				+   Behave like starpu_task_get_task_succs(), except that it only
			
 
				+   reports tasks which will go through the scheduler, thus avoiding
			
 
				+   tasks with not codelet, or with explicit placement.
			
 
				+*/
			
 
				+int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
			
 
				+
			
 
				+/**
			
 
				+   Add \p nb_deps end dependencies to the task \p t. This means the
			
 
				+   task will not terminate until the required number of calls to the
			
 
				+   function starpu_task_end_dep_release() has been made.
			
 
				+*/
			
 
				+void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps);
			
 
				+
			
 
				+/**
			
 
				+   Unlock 1 end dependency to the task \p t. This function must be
			
 
				+   called after starpu_task_end_dep_add().
			
 
				+*/
			
 
				+void starpu_task_end_dep_release(struct starpu_task *t);
			
 
				+
			
 
				+/**
			
 
				+   Define a task logical identifer. It is possible to associate a task
			
 
				+   with a unique <em>tag</em> chosen by the application, and to
			
 
				+   express dependencies between tasks by the means of those tags. To
			
 
				+   do so, fill the field starpu_task::tag_id with a tag number (can be
			
 
				+   arbitrary) and set the field starpu_task::use_tag to 1. If
			
 
				+   starpu_tag_declare_deps() is called with this tag number, the task
			
 
				+   will not be started until the tasks which holds the declared
			
 
				+   dependency tags are completed.
			
 
				+*/
			
 
				+typedef uint64_t starpu_tag_t;
			
 
				+
			
 
				+/**
			
 
				+   Specify the dependencies of the task identified by tag \p id. The
			
 
				+   first argument specifies the tag which is configured, the second
			
 
				+   argument gives the number of tag(s) on which \p id depends. The
			
 
				+   following arguments are the tags which have to be terminated to
			
 
				+   unlock the task. This function must be called before the associated
			
 
				+   task is submitted to StarPU with starpu_task_submit().
			
 
				+
			
 
				+   <b>WARNING! Use with caution</b>. Because of the variable arity of
			
 
				+   starpu_tag_declare_deps(), note that the last arguments must be of
			
 
				+   type ::starpu_tag_t : constant values typically need to be
			
 
				+   explicitly casted. Otherwise, due to integer sizes and argument
			
 
				+   passing on the stack, the C compiler might consider the tag
			
 
				+   <c>0x200000003</c> instead of <c>0x2</c> and <c>0x3</c> when
			
 
				+   calling <c>starpu_tag_declare_deps(0x1, 2, 0x2, 0x3)</c>. Using the
			
 
				+   starpu_tag_declare_deps_array() function avoids this hazard.
			
 
				+
			
 
				+   \code{.c}
			
 
				+   //  Tag 0x1 depends on tags 0x32 and 0x52
			
 
				+   starpu_tag_declare_deps((starpu_tag_t)0x1, 2, (starpu_tag_t)0x32, (starpu_tag_t)0x52);
			
 
				+   \endcode
			
 
				+*/
			
 
				+void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_tag_declare_deps(), except that its does not take
			
 
				+   a variable number of arguments but an \p array of tags of size \p
			
 
				+   ndeps.
			
 
				+
			
 
				+   \code{.c}
			
 
				+   // Tag 0x1 depends on tags 0x32 and 0x52
			
 
				+   starpu_tag_t tag_array[2] = {0x32, 0x52};
			
 
				+   starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array);
			
 
				+   \endcode
			
 
				+*/
			
 
				+void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array);
			
 
				+
			
 
				+/**
			
 
				+   Block until the task associated to tag \p id has been executed.
			
 
				+   This is a blocking call which must therefore not be called within
			
 
				+   tasks or callbacks, but only from the application directly. It is
			
 
				+   possible to synchronize with the same tag multiple times, as long
			
 
				+   as the starpu_tag_remove() function is not called. Note that it is
			
 
				+   still possible to synchronize with a tag associated to a task for
			
 
				+   which the strucuture starpu_task was freed (e.g. if the field
			
 
				+   starpu_task::destroy was enabled).
			
 
				+*/
			
 
				+int starpu_tag_wait(starpu_tag_t id);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_tag_wait() except that it blocks until all the \p
			
 
				+   ntags tags contained in the array \p id are terminated.
			
 
				+*/
			
 
				+int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id);
			
 
				+
			
 
				+/**
			
 
				+   Clear the <em>already notified</em> status of a tag which is not
			
 
				+   associated with a task. Before that, calling
			
 
				+   starpu_tag_notify_from_apps() again will not notify the successors.
			
 
				+   After that, the next call to starpu_tag_notify_from_apps() will
			
 
				+   notify the successors.
			
 
				+*/
			
 
				+void starpu_tag_restart(starpu_tag_t id);
			
 
				+
			
 
				+/**
			
 
				+   Release the resources associated to tag \p id. It can be called
			
 
				+   once the corresponding task has been executed and when there is no
			
 
				+   other tag that depend on this tag anymore.
			
 
				+*/
			
 
				+void starpu_tag_remove(starpu_tag_t id);
			
 
				+
			
 
				+/**
			
 
				+   Explicitly unlock tag \p id. It may be useful in the case of
			
 
				+   applications which execute part of their computation outside StarPU
			
 
				+   tasks (e.g. third-party libraries). It is also provided as a
			
 
				+   convenient tool for the programmer, for instance to entirely
			
 
				+   construct the task DAG before actually giving StarPU the
			
 
				+   opportunity to execute the tasks. When called several times on the
			
 
				+   same tag, notification will be done only on first call, thus
			
 
				+   implementing "OR" dependencies, until the tag is restarted using
			
 
				+   starpu_tag_restart().
			
 
				+*/
			
 
				+void starpu_tag_notify_from_apps(starpu_tag_t id);
			
 
				+
			
 
				+struct starpu_task *starpu_tag_get_task(starpu_tag_t id);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_TASK_DEP_H__ */
			
--- a/include/starpu/1.3/starpu_task_list.h
+++ b/include/starpu/1.3/starpu_task_list.h
@@ -0,0 +1,135 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_TASK_LIST_H__
			
 
				+#define __STARPU_TASK_LIST_H__
			
 
				+
			
 
				+#include <starpu_task.h>
			
 
				+#include <starpu_util.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Task_Lists Task Lists
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/* NOTE: this needs to have at least the same size as lists in src/common/list.h */
			
 
				+#ifdef BUILDING_STARPU
			
 
				+#define STARPU_TASK_LIST_INLINE extern inline
			
 
				+#else
			
 
				+/**
			
 
				+   Store a double-chained list of tasks
			
 
				+*/
			
 
				+struct starpu_task_list
			
 
				+{
			
 
				+	struct starpu_task *head;  /**< head of the list */
			
 
				+	struct starpu_task *tail;  /**< tail of the list */
			
 
				+};
			
 
				+#define STARPU_TASK_LIST_INLINE extern
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Initialize a list structure
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+void starpu_task_list_init(struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Push \p task at the front of \p list
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+void starpu_task_list_push_front(struct starpu_task_list *list, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Push \p task at the back of \p list
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+void starpu_task_list_push_back(struct starpu_task_list *list, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Get the front of \p list (without removing it)
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_front(const struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Get the back of \p list (without removing it)
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_back(const struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Test if \p list is empty
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+int starpu_task_list_empty(const struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Remove \p task from \p list
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+void starpu_task_list_erase(struct starpu_task_list *list, struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Remove the element at the front of \p list
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_pop_front(struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Remove the element at the back of \p list
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_pop_back(struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Get the first task of \p list.
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_begin(const struct starpu_task_list *list);
			
 
				+
			
 
				+/**
			
 
				+   Get the end of \p list.
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_end(const struct starpu_task_list *list STARPU_ATTRIBUTE_UNUSED);
			
 
				+
			
 
				+/**
			
 
				+   Get the next task of \p list. This is not erase-safe.
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+struct starpu_task *starpu_task_list_next(const struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				+   Test whether the given task \p look is contained in the \p list.
			
 
				+*/
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+int starpu_task_list_ismember(const struct starpu_task_list *list, const struct starpu_task *look);
			
 
				+
			
 
				+STARPU_TASK_LIST_INLINE
			
 
				+void starpu_task_list_move(struct starpu_task_list *ldst, struct starpu_task_list *lsrc);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_TASK_LIST_H__ */
			
--- a/include/starpu/1.3/starpu_task_util.h
+++ b/include/starpu/1.3/starpu_task_util.h
@@ -0,0 +1,493 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_TASK_UTIL_H__
			
 
				+#define __STARPU_TASK_UTIL_H__
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <assert.h>
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Insert_Task Task Insert Utility
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/* NOTE: when adding a value here, please make sure to update both
			
 
				+ * src/util/starpu_task_insert_utils.c (in two places) and
			
 
				+ * mpi/src/starpu_mpi_task_insert.c and mpi/src/starpu_mpi_task_insert_fortran.c */
			
 
				+
			
 
				+#define STARPU_MODE_SHIFT	17
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to a constant value and the size of the constant
			
 
				+ */
			
 
				+#define STARPU_VALUE		 (1<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to a callback function
			
 
				+*/
			
 
				+#define STARPU_CALLBACK		 (2<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by two
			
 
				+   pointers: one to a callback function, and the other to be given as
			
 
				+   an argument to the callback function; this is equivalent to using
			
 
				+   both ::STARPU_CALLBACK and ::STARPU_CALLBACK_ARG.
			
 
				+*/
			
 
				+#define STARPU_CALLBACK_WITH_ARG (3<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to be given as an argument to the callback function
			
 
				+*/
			
 
				+#define STARPU_CALLBACK_ARG	 (4<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must
			
 
				+   be followed by a integer defining a priority level
			
 
				+*/
			
 
				+#define STARPU_PRIORITY		 (5<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   \ingroup API_MPI_Support
			
 
				+   Used when calling starpu_mpi_task_insert(), must be followed by a
			
 
				+   integer value which specified the node on which to execute the
			
 
				+   codelet.
			
 
				+ */
			
 
				+#define STARPU_EXECUTE_ON_NODE	 (6<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   \ingroup API_MPI_Support
			
 
				+   Used when calling starpu_mpi_task_insert(), must be followed by a
			
 
				+   data handle to specify that the node owning the given data will
			
 
				+   execute the codelet.
			
 
				+*/
			
 
				+#define STARPU_EXECUTE_ON_DATA	 (7<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_in_sert(), must be followed by an array of
			
 
				+   handles and the number of elements in the array (as int). This is equivalent
			
 
				+   to passing the handles as separate parameters with STARPU_R/W/RW.
			
 
				+*/
			
 
				+#define STARPU_DATA_ARRAY        (8<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_in_sert(), must be followed by an array of
			
 
				+   struct starpu_data_descr and the number of elements in the array (as int).
			
 
				+   This is equivalent to passing the handles with the corresponding modes.
			
 
				+*/
			
 
				+#define STARPU_DATA_MODE_ARRAY   (9<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a tag.
			
 
				+*/
			
 
				+#define STARPU_TAG               (10<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a tag.
			
 
				+*/
			
 
				+#define STARPU_HYPERVISOR_TAG	 (11<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   amount of floating point operations, as a double. Users <b>MUST</b>
			
 
				+   explicitly cast into double, otherwise parameter passing will not
			
 
				+   work.
			
 
				+*/
			
 
				+#define STARPU_FLOPS	         (12<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by the id
			
 
				+   of the scheduling context to which to submit the task to.
			
 
				+*/
			
 
				+#define STARPU_SCHED_CTX	 (13<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to a prologue callback function
			
 
				+*/
			
 
				+#define STARPU_PROLOGUE_CALLBACK   (14<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to be given as an argument to the prologue callback
			
 
				+   function
			
 
				+*/
			
 
				+#define STARPU_PROLOGUE_CALLBACK_ARG (15<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to a prologue callback pop function
			
 
				+*/
			
 
				+#define STARPU_PROLOGUE_CALLBACK_POP   (16<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   pointer to be given as an argument to the prologue callback pop
			
 
				+   function
			
 
				+*/
			
 
				+#define STARPU_PROLOGUE_CALLBACK_POP_ARG (17<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   integer value specifying the worker on which to execute the task
			
 
				+   (as specified by starpu_task::execute_on_a_specific_worker)
			
 
				+*/
			
 
				+#define STARPU_EXECUTE_ON_WORKER (18<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   unsigned long long value specifying the mask of worker on which to execute
			
 
				+   the task (as specified by starpu_task::where)
			
 
				+*/
			
 
				+#define STARPU_EXECUTE_WHERE     (19<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a tag
			
 
				+   stored in starpu_task::tag_id. Leave starpu_task::use_tag as 0.
			
 
				+*/
			
 
				+#define STARPU_TAG_ONLY          (20<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an unsigned
			
 
				+   stored in starpu_task::possibly_parallel.
			
 
				+*/
			
 
				+#define STARPU_POSSIBLY_PARALLEL    (21<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   used when calling starpu_task_insert(), must be
			
 
				+   followed by an integer value specifying the worker order in which
			
 
				+   to execute the tasks (as specified by starpu_task::workerorder)
			
 
				+*/
			
 
				+#define STARPU_WORKER_ORDER      (22<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   \ingroup API_MPI_Support
			
 
				+   Used when calling starpu_mpi_task_insert(), must be followed by a
			
 
				+   identifier to a node selection policy. This is needed when several
			
 
				+   nodes own data in ::STARPU_W mode.
			
 
				+*/
			
 
				+#define STARPU_NODE_SELECTION_POLICY (23<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   char * stored in starpu_task::name.
			
 
				+*/
			
 
				+#define STARPU_NAME		 (24<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   memory buffer containing the arguments to be given to the task, and
			
 
				+   by the size of the arguments. The memory buffer should be the
			
 
				+   result of a previous call to starpu_codelet_pack_args(), and will
			
 
				+   be freed (i.e. starpu_task::cl_arg_free will be set to 1)
			
 
				+*/
			
 
				+#define STARPU_CL_ARGS		(25<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), similarly to
			
 
				+   ::STARPU_CL_ARGS, must be followed by a memory buffer containing
			
 
				+   the arguments to be given to the task, and by the size of the
			
 
				+   arguments. The memory buffer should be the result of a previous
			
 
				+   call to starpu_codelet_pack_args(), and will NOT be freed (i.e.
			
 
				+   starpu_task::cl_arg_free will be set to 0)
			
 
				+*/
			
 
				+#define STARPU_CL_ARGS_NFREE	(26<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   number of tasks as int, and an array containing these tasks. The
			
 
				+   function starpu_task_declare_deps_array() will be called with the
			
 
				+   given values.
			
 
				+*/
			
 
				+#define STARPU_TASK_DEPS_ARRAY	(27<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   integer representing a color
			
 
				+*/
			
 
				+#define STARPU_TASK_COLOR       (28<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   array of characters representing the sequential consistency for
			
 
				+   each buffer of the task.
			
 
				+*/
			
 
				+#define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   integer stating if the task is synchronous or not
			
 
				+*/
			
 
				+#define STARPU_TASK_SYNCHRONOUS (30<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by a
			
 
				+   number of tasks as int, and an array containing these tasks. The
			
 
				+   function starpu_task_declare_end_deps_array() will be called with
			
 
				+   the given values.
			
 
				+*/
			
 
				+#define STARPU_TASK_END_DEPS_ARRAY	(31<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   integer which will be given to starpu_task_end_dep_add()
			
 
				+*/
			
 
				+#define STARPU_TASK_END_DEP	(32<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   unsigned being a number of workers, and an array of bits which size
			
 
				+   is the number of workers, the array indicates the set of workers
			
 
				+   which are allowed to execute the task.
			
 
				+*/
			
 
				+#define STARPU_TASK_WORKERIDS (33<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), must be followed by an
			
 
				+   unsigned which sets the sequential consistency for the data
			
 
				+   parameters of the task.
			
 
				+*/
			
 
				+#define STARPU_SEQUENTIAL_CONSISTENCY (34<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert() and alike, must be followed
			
 
				+   by a pointer to a struct starpu_profiling_task_info
			
 
				+ */
			
 
				+#define STARPU_TASK_PROFILING_INFO (35<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert() and alike, must be followed
			
 
				+   by an unsigned specifying not to allocate a submitorder id for the task
			
 
				+ */
			
 
				+#define STARPU_TASK_NO_SUBMITORDER (36<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), similarly to
			
 
				+   ::STARPU_CALLBACK_ARG, must be followed by a pointer to be given as
			
 
				+   an argument to the callback function, the argument will not be
			
 
				+   freed, i.e starpu_task::callback_arg_free will be set to 0
			
 
				+*/
			
 
				+#define STARPU_CALLBACK_ARG_NFREE	 (37<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), similarly to
			
 
				+   ::STARPU_CALLBACK_WITH_ARG, must be followed by two pointers: one
			
 
				+   to a callback function, and the other to be given as an argument to
			
 
				+   the callback function; this is equivalent to using both
			
 
				+   ::STARPU_CALLBACK and ::STARPU_CALLBACK_ARG_NFREE.
			
 
				+*/
			
 
				+#define STARPU_CALLBACK_WITH_ARG_NFREE	 (38<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), similarly to
			
 
				+   ::STARPU_PROLOGUE_CALLBACK_ARG, must be followed by a
			
 
				+   pointer to be given as an argument to the prologue callback
			
 
				+   function, the argument will not be
			
 
				+   freed, i.e starpu_task::prologue_callback_arg_free will be set to 0
			
 
				+*/
			
 
				+#define STARPU_PROLOGUE_CALLBACK_ARG_NFREE (39<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert(), similarly to
			
 
				+   ::STARPU_PROLOGUE_CALLBACK_POP_ARG, must be followed by a pointer
			
 
				+   to be given as an argument to the prologue callback pop function,
			
 
				+   the argument will not be freed, i.e
			
 
				+   starpu_task::prologue_callback_pop_arg_free will be set to 0
			
 
				+*/
			
 
				+#define STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE (40<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Used when calling starpu_task_insert() and alike, must be followed
			
 
				+   by a void* specifying the value to be set in the sched_data field of the
			
 
				+   task.
			
 
				+ */
			
 
				+#define STARPU_TASK_SCHED_DATA (41<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+#define STARPU_SHIFTED_MODE_MAX (42<<STARPU_MODE_SHIFT)
			
 
				+
			
 
				+/**
			
 
				+   Set the given \p task corresponding to \p cl with the following arguments.
			
 
				+   The argument list must be zero-terminated. The arguments
			
 
				+   following the codelet are the same as the ones for the function
			
 
				+   starpu_task_insert().
			
 
				+   If some arguments of type ::STARPU_VALUE are given, the parameter
			
 
				+   starpu_task::cl_arg_free will be set to 1.
			
 
				+*/
			
 
				+int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...);
			
 
				+
			
 
				+/**
			
 
				+   Create a task corresponding to \p cl with the following arguments.
			
 
				+   The argument list must be zero-terminated. The arguments
			
 
				+   following the codelet are the same as the ones for the function
			
 
				+   starpu_task_insert().
			
 
				+   If some arguments of type ::STARPU_VALUE are given, the parameter
			
 
				+   starpu_task::cl_arg_free will be set to 1.
			
 
				+*/
			
 
				+struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
			
 
				+
			
 
				+/**
			
 
				+   Create and submit a task corresponding to \p cl with the following
			
 
				+   given arguments. The argument list must be zero-terminated.
			
 
				+
			
 
				+   The arguments following the codelet can be of the following types:
			
 
				+   <ul>
			
 
				+   <li> ::STARPU_R, ::STARPU_W, ::STARPU_RW, ::STARPU_SCRATCH,
			
 
				+   ::STARPU_REDUX an access mode followed by a data handle;
			
 
				+   <li> ::STARPU_DATA_ARRAY followed by an array of data handles and
			
 
				+   its number of elements;
			
 
				+   <li> ::STARPU_DATA_MODE_ARRAY followed by an array of struct
			
 
				+   starpu_data_descr, i.e data handles with their associated access
			
 
				+   modes, and its number of elements;
			
 
				+   <li> ::STARPU_EXECUTE_ON_WORKER, ::STARPU_WORKER_ORDER followed by
			
 
				+   an integer value specifying the worker on which to execute the task
			
 
				+   (as specified by starpu_task::execute_on_a_specific_worker)
			
 
				+   <li> the specific values ::STARPU_VALUE, ::STARPU_CALLBACK,
			
 
				+   ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG,
			
 
				+   ::STARPU_PRIORITY, ::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS,
			
 
				+   ::STARPU_SCHED_CTX, ::STARPU_CL_ARGS, ::STARPU_CL_ARGS_NFREE,
			
 
				+   ::STARPU_TASK_DEPS_ARRAY, ::STARPU_TASK_COLOR,
			
 
				+   ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, ::STARPU_TASK_SYNCHRONOUS,
			
 
				+   ::STARPU_TASK_END_DEP followed by the appropriated objects as
			
 
				+   defined elsewhere.
			
 
				+   </ul>
			
 
				+
			
 
				+   When using ::STARPU_DATA_ARRAY, the access mode of the data handles
			
 
				+   is not defined, it will be taken from the codelet
			
 
				+   starpu_codelet::modes or starpu_codelet::dyn_modes field. One
			
 
				+   should use ::STARPU_DATA_MODE_ARRAY to define the data handles
			
 
				+   along with the access modes.
			
 
				+
			
 
				+   Parameters to be passed to the codelet implementation are defined
			
 
				+   through the type ::STARPU_VALUE. The function
			
 
				+   starpu_codelet_unpack_args() must be called within the codelet implementation to retrieve them.
			
 
				+*/
			
 
				+int starpu_task_insert(struct starpu_codelet *cl, ...);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_task_insert(). Kept to avoid breaking old codes.
			
 
				+*/
			
 
				+int starpu_insert_task(struct starpu_codelet *cl, ...);
			
 
				+
			
 
				+/**
			
 
				+   Assuming that there are already \p current_buffer data handles
			
 
				+   passed to the task, and if *allocated_buffers is not 0, the
			
 
				+   <c>task->dyn_handles</c> array has size \p *allocated_buffers, this
			
 
				+   function makes room for \p room other data handles, allocating or
			
 
				+   reallocating <c>task->dyn_handles</c> as necessary and updating \p
			
 
				+   *allocated_buffers accordingly. One can thus start with
			
 
				+   *allocated_buffers equal to 0 and current_buffer equal to 0, then
			
 
				+   make room by calling this function, then store handles with
			
 
				+   STARPU_TASK_SET_HANDLE(), make room again with this function, store
			
 
				+   yet more handles, etc.
			
 
				+*/
			
 
				+void starpu_task_insert_data_make_room(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int current_buffer, int room);
			
 
				+
			
 
				+/**
			
 
				+   Store data handle \p handle into task \p task with mode \p
			
 
				+   arg_type, updating \p *allocated_buffers and \p *current_buffer
			
 
				+   accordingly.
			
 
				+*/
			
 
				+void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle);
			
 
				+
			
 
				+/**
			
 
				+   Store \p nb_handles data handles \p handles into task \p task,
			
 
				+   updating \p *allocated_buffers and \p *current_buffer accordingly.
			
 
				+*/
			
 
				+void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles);
			
 
				+
			
 
				+/**
			
 
				+   Store \p nb_descrs data handles described by \p descrs into task \p
			
 
				+   task, updating \p *allocated_buffers and \p *current_buffer
			
 
				+   accordingly.
			
 
				+*/
			
 
				+void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs);
			
 
				+
			
 
				+/**
			
 
				+   Pack arguments of type ::STARPU_VALUE into a buffer which can be
			
 
				+   given to a codelet and later unpacked with the function
			
 
				+   starpu_codelet_unpack_args().
			
 
				+
			
 
				+   Instead of calling starpu_codelet_pack_args(), one can also call
			
 
				+   starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for
			
 
				+   each data, then starpu_codelet_pack_arg_fini().
			
 
				+*/
			
 
				+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
			
 
				+
			
 
				+struct starpu_codelet_pack_arg_data
			
 
				+{
			
 
				+	char *arg_buffer;
			
 
				+	size_t arg_buffer_size;
			
 
				+	size_t current_offset;
			
 
				+	int nargs;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Initialize struct starpu_codelet_pack_arg before calling
			
 
				+   starpu_codelet_pack_arg() and starpu_codelet_pack_arg_fini(). This
			
 
				+   will simply initialize the content of the structure.
			
 
				+*/
			
 
				+void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state);
			
 
				+
			
 
				+/**
			
 
				+   Pack one argument into struct starpu_codelet_pack_arg \p state.
			
 
				+   That structure has to be initialized before with
			
 
				+   starpu_codelet_pack_arg_init(), and after all
			
 
				+   starpu_codelet_pack_arg() calls performed,
			
 
				+   starpu_codelet_pack_arg_fini() has to be used to get the \p cl_arg
			
 
				+   and \p cl_arg_size to be put in the task.
			
 
				+*/
			
 
				+void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size);
			
 
				+
			
 
				+/**
			
 
				+   Finish packing data, after calling starpu_codelet_pack_arg_init()
			
 
				+   once and starpu_codelet_pack_arg() several times.
			
 
				+*/
			
 
				+void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size);
			
 
				+
			
 
				+/**
			
 
				+   Retrieve the arguments of type ::STARPU_VALUE associated to a
			
 
				+   task automatically created using the function starpu_task_insert(). If
			
 
				+   any parameter's value is 0, unpacking will stop there and ignore the remaining
			
 
				+   parameters.
			
 
				+*/
			
 
				+void starpu_codelet_unpack_args(void *cl_arg, ...);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_codelet_unpack_args(), but if any parameter is 0,
			
 
				+   copy the part of \p cl_arg that has not been read in \p buffer
			
 
				+   which can then be used in a later call to one of the unpack
			
 
				+   functions.
			
 
				+*/
			
 
				+void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_TASK_UTIL_H__ */
			
--- a/include/starpu/1.3/starpu_thread.h
+++ b/include/starpu/1.3/starpu_thread.h
@@ -0,0 +1,503 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+// The documentation for this file is in doc/doxygen/chapters/api/threads.doxy
			
 
				+
			
 
				+#ifndef __STARPU_THREAD_H__
			
 
				+#define __STARPU_THREAD_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+#include <starpu_util.h>
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+#include <pthread.h>
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
			
 
				+#include <simgrid/mutex.h>
			
 
				+#include <simgrid/cond.h>
			
 
				+#elif defined(STARPU_HAVE_XBT_SYNCHRO_H)
			
 
				+#include <xbt/synchro.h>
			
 
				+#else
			
 
				+#include <xbt/synchro_core.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_ACTOR_H
			
 
				+#include <simgrid/actor.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H
			
 
				+#include <simgrid/semaphore.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
			
 
				+#include <simgrid/mutex.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_COND_H
			
 
				+#include <simgrid/cond.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_BARRIER_H
			
 
				+#include <simgrid/barrier.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_HOST_H
			
 
				+#include <simgrid/host.h>
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MSG_H
			
 
				+#include <simgrid/msg.h>
			
 
				+#elif defined(STARPU_HAVE_MSG_MSG_H)
			
 
				+#include <msg/msg.h>
			
 
				+#endif
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU)
			
 
				+#include <pthread.h>
			
 
				+#include <semaphore.h>
			
 
				+#endif
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_create function.
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SIMGRID_ACTOR_H
			
 
				+typedef sg_actor_t starpu_pthread_t;
			
 
				+#else
			
 
				+typedef msg_process_t starpu_pthread_t;
			
 
				+#endif
			
 
				+typedef struct {
			
 
				+	size_t stacksize;
			
 
				+} starpu_pthread_attr_t;
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SIMGRID_ACTOR_H
			
 
				+typedef sg_host_t starpu_sg_host_t;
			
 
				+#else
			
 
				+typedef msg_host_t starpu_sg_host_t;
			
 
				+#endif
			
 
				+int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2);
			
 
				+starpu_pthread_t starpu_pthread_self(void);
			
 
				+int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host);
			
 
				+int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
			
 
				+starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]);
			
 
				+int starpu_pthread_join(starpu_pthread_t thread, void **retval);
			
 
				+int starpu_pthread_exit(void *retval) STARPU_ATTRIBUTE_NORETURN;
			
 
				+int starpu_pthread_attr_init(starpu_pthread_attr_t *attr);
			
 
				+int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr);
			
 
				+int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr, int detachstate);
			
 
				+int starpu_pthread_attr_setstacksize(starpu_pthread_attr_t *attr, size_t stacksize);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* STARPU_SIMGRID */
			
 
				+
			
 
				+typedef pthread_t starpu_pthread_t;
			
 
				+typedef pthread_attr_t starpu_pthread_attr_t;
			
 
				+
			
 
				+#define starpu_pthread_equal pthread_equal
			
 
				+#define starpu_pthread_self pthread_self
			
 
				+#define starpu_pthread_create pthread_create
			
 
				+#define starpu_pthread_create_on(name, thread, attr, routine, arg, where) starpu_pthread_create(thread, attr, routine, arg)
			
 
				+#define starpu_pthread_join pthread_join
			
 
				+#define starpu_pthread_exit pthread_exit
			
 
				+#define starpu_pthread_attr_init pthread_attr_init
			
 
				+#define starpu_pthread_attr_destroy pthread_attr_destroy
			
 
				+#define starpu_pthread_attr_setdetachstate pthread_attr_setdetachstate
			
 
				+#define starpu_pthread_attr_setstacksize pthread_attr_setstacksize
			
 
				+
			
 
				+#endif /* STARPU_SIMGRID, _MSC_VER */
			
 
				+
			
 
				+#ifdef STARPU_HAVE_PTHREAD_SETNAME_NP
			
 
				+#ifdef STARPU_HAVE_DARWIN
			
 
				+#define starpu_pthread_setname(name) pthread_setname_np(name)
			
 
				+#else
			
 
				+#define starpu_pthread_setname(name) pthread_setname_np(pthread_self(), name)
			
 
				+#endif
			
 
				+#else
			
 
				+#define starpu_pthread_setname(name)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_mutex_* functions.
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
			
 
				+typedef sg_mutex_t starpu_pthread_mutex_t;
			
 
				+#else
			
 
				+typedef xbt_mutex_t starpu_pthread_mutex_t;
			
 
				+#endif
			
 
				+typedef int starpu_pthread_mutexattr_t;
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_INITIALIZER NULL
			
 
				+
			
 
				+int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr);
			
 
				+int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr, int *type);
			
 
				+int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr, int type);
			
 
				+int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr);
			
 
				+int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
			
 
				+
			
 
				+typedef pthread_mutex_t starpu_pthread_mutex_t;
			
 
				+typedef pthread_mutexattr_t starpu_pthread_mutexattr_t;
			
 
				+
			
 
				+#define starpu_pthread_mutex_init pthread_mutex_init
			
 
				+#define starpu_pthread_mutex_destroy pthread_mutex_destroy
			
 
				+#define starpu_pthread_mutexattr_gettype pthread_mutexattr_gettype
			
 
				+#define starpu_pthread_mutexattr_settype pthread_mutexattr_settype
			
 
				+#define starpu_pthread_mutexattr_destroy pthread_mutexattr_destroy
			
 
				+#define starpu_pthread_mutexattr_init pthread_mutexattr_init
			
 
				+
			
 
				+#ifdef STARPU_FXT_LOCK_TRACES
			
 
				+int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex);
			
 
				+#else
			
 
				+#define starpu_pthread_mutex_lock pthread_mutex_lock
			
 
				+#define starpu_pthread_mutex_unlock pthread_mutex_unlock
			
 
				+#define starpu_pthread_mutex_trylock pthread_mutex_trylock
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
			
 
				+
			
 
				+#endif /* STARPU_SIMGRID, _MSC_VER */
			
 
				+
			
 
				+#if !defined(_MSC_VER) || defined(BUILDING_STARPU)
			
 
				+int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex);
			
 
				+void starpu_pthread_mutex_check_sched(starpu_pthread_mutex_t *mutex, char *file, int line);
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_key_* functions.
			
 
				+ */
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+
			
 
				+typedef int starpu_pthread_key_t;
			
 
				+int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function) (void *));
			
 
				+int starpu_pthread_key_delete(starpu_pthread_key_t key);
			
 
				+int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer);
			
 
				+void *starpu_pthread_getspecific(starpu_pthread_key_t key);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
			
 
				+
			
 
				+typedef pthread_key_t starpu_pthread_key_t;
			
 
				+
			
 
				+#define starpu_pthread_key_create pthread_key_create
			
 
				+#define starpu_pthread_key_delete pthread_key_delete
			
 
				+#define starpu_pthread_setspecific pthread_setspecific
			
 
				+#define starpu_pthread_getspecific pthread_getspecific
			
 
				+
			
 
				+#endif /* STARPU_SIMGRID, _MSC_VER */
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_cond_* functions.
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SIMGRID_COND_H
			
 
				+typedef sg_cond_t starpu_pthread_cond_t;
			
 
				+#else
			
 
				+typedef xbt_cond_t starpu_pthread_cond_t;
			
 
				+#endif
			
 
				+typedef int starpu_pthread_condattr_t;
			
 
				+#define STARPU_PTHREAD_COND_INITIALIZER NULL
			
 
				+
			
 
				+int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr);
			
 
				+int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond);
			
 
				+int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond);
			
 
				+int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
			
 
				+int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime);
			
 
				+int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
			
 
				+
			
 
				+typedef pthread_cond_t starpu_pthread_cond_t;
			
 
				+typedef pthread_condattr_t starpu_pthread_condattr_t;
			
 
				+#define STARPU_PTHREAD_COND_INITIALIZER PTHREAD_COND_INITIALIZER
			
 
				+
			
 
				+#define starpu_pthread_cond_init pthread_cond_init
			
 
				+#define starpu_pthread_cond_signal pthread_cond_signal
			
 
				+#define starpu_pthread_cond_broadcast pthread_cond_broadcast
			
 
				+
			
 
				+#ifdef STARPU_FXT_LOCK_TRACES
			
 
				+int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
			
 
				+#else
			
 
				+#define starpu_pthread_cond_wait pthread_cond_wait
			
 
				+#endif
			
 
				+
			
 
				+#define starpu_pthread_cond_timedwait pthread_cond_timedwait
			
 
				+#define starpu_pthread_cond_destroy pthread_cond_destroy
			
 
				+
			
 
				+#endif /* STARPU_SIMGRID, _MSC_VER */
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_rwlock_* functions.
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SIMGRID_MUTEX_H
			
 
				+typedef sg_mutex_t starpu_pthread_rwlock_t;
			
 
				+#else
			
 
				+typedef xbt_mutex_t starpu_pthread_rwlock_t;
			
 
				+#endif
			
 
				+typedef int starpu_pthread_rwlockattr_t;
			
 
				+
			
 
				+int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *rwlock, const starpu_pthread_rwlockattr_t *attr);
			
 
				+int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
			
 
				+
			
 
				+typedef pthread_rwlock_t starpu_pthread_rwlock_t;
			
 
				+typedef pthread_rwlockattr_t starpu_pthread_rwlockattr_t;
			
 
				+
			
 
				+#define starpu_pthread_rwlock_init pthread_rwlock_init
			
 
				+#define starpu_pthread_rwlock_destroy pthread_rwlock_destroy
			
 
				+
			
 
				+#ifdef STARPU_FXT_LOCK_TRACES
			
 
				+int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock);
			
 
				+#else
			
 
				+#define starpu_pthread_rwlock_rdlock pthread_rwlock_rdlock
			
 
				+#define starpu_pthread_rwlock_tryrdlock pthread_rwlock_tryrdlock
			
 
				+#define starpu_pthread_rwlock_wrlock pthread_rwlock_wrlock
			
 
				+#define starpu_pthread_rwlock_trywrlock pthread_rwlock_trywrlock
			
 
				+#define starpu_pthread_rwlock_unlock pthread_rwlock_unlock
			
 
				+#endif
			
 
				+
			
 
				+#endif /* STARPU_SIMGRID, _MSC_VER */
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_barrier_* functions.
			
 
				+ */
			
 
				+
			
 
				+#if defined(STARPU_SIMGRID) || (!defined(STARPU_HAVE_PTHREAD_BARRIER) && (!defined(_MSC_VER) || defined(BUILDING_STARPU)))
			
 
				+
			
 
				+#if defined(STARPU_SIMGRID) && (defined(STARPU_HAVE_SIMGRID_BARRIER_H) || defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) || defined(xbt_barrier_init))
			
 
				+#ifdef STARPU_HAVE_SIMGRID_BARRIER_H
			
 
				+typedef sg_bar_t starpu_pthread_barrier_t;
			
 
				+#else
			
 
				+typedef xbt_bar_t starpu_pthread_barrier_t;
			
 
				+#endif
			
 
				+typedef int starpu_pthread_barrierattr_t;
			
 
				+#ifdef SG_BARRIER_SERIAL_THREAD
			
 
				+#  define STARPU_PTHREAD_BARRIER_SERIAL_THREAD SG_BARRIER_SERIAL_THREAD
			
 
				+#else
			
 
				+#  define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1
			
 
				+#endif
			
 
				+#else
			
 
				+typedef struct {
			
 
				+	starpu_pthread_mutex_t mutex;
			
 
				+	starpu_pthread_cond_t cond;
			
 
				+	starpu_pthread_cond_t cond_destroy;
			
 
				+	unsigned count;
			
 
				+	unsigned done;
			
 
				+	unsigned busy;
			
 
				+} starpu_pthread_barrier_t;
			
 
				+typedef int starpu_pthread_barrierattr_t;
			
 
				+#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1
			
 
				+#endif
			
 
				+
			
 
				+int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count);
			
 
				+int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier);
			
 
				+int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) /* STARPU_SIMGRID, !STARPU_HAVE_PTHREAD_BARRIER */
			
 
				+
			
 
				+typedef pthread_barrier_t starpu_pthread_barrier_t;
			
 
				+typedef pthread_barrierattr_t starpu_pthread_barrierattr_t;
			
 
				+
			
 
				+#define starpu_pthread_barrier_init pthread_barrier_init
			
 
				+#define starpu_pthread_barrier_destroy pthread_barrier_destroy
			
 
				+
			
 
				+#ifdef STARPU_FXT_LOCK_TRACES
			
 
				+int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier);
			
 
				+#else
			
 
				+#define starpu_pthread_barrier_wait pthread_barrier_wait
			
 
				+#endif
			
 
				+#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD PTHREAD_BARRIER_SERIAL_THREAD
			
 
				+
			
 
				+#endif /* STARPU_SIMGRID, !STARPU_HAVE_PTHREAD_BARRIER, _MSC_VER */
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the pthread_spin_* functions.
			
 
				+ */
			
 
				+
			
 
				+#if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	int taken;
			
 
				+#elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)
			
 
				+	unsigned taken STARPU_ATTRIBUTE_ALIGNED(16);
			
 
				+#else /* we only have a trivial implementation yet ! */
			
 
				+	uint32_t taken STARPU_ATTRIBUTE_ALIGNED(16);
			
 
				+#endif
			
 
				+} starpu_pthread_spinlock_t;
			
 
				+
			
 
				+int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared);
			
 
				+int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock);
			
 
				+int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock);
			
 
				+int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock);
			
 
				+int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) /* !( defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)) */
			
 
				+
			
 
				+typedef pthread_spinlock_t starpu_pthread_spinlock_t;
			
 
				+#define starpu_pthread_spin_init pthread_spin_init
			
 
				+#define starpu_pthread_spin_destroy pthread_spin_destroy
			
 
				+#define starpu_pthread_spin_lock pthread_spin_lock
			
 
				+#define starpu_pthread_spin_trylock pthread_spin_trylock
			
 
				+#define starpu_pthread_spin_unlock pthread_spin_unlock
			
 
				+
			
 
				+#endif /* !( defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)) */
			
 
				+
			
 
				+/*
			
 
				+ * Other needed pthread definitions
			
 
				+ */
			
 
				+
			
 
				+#if defined(_MSC_VER) && !defined(BUILDING_STARPU)
			
 
				+typedef void* starpu_pthread_rwlock_t;
			
 
				+typedef void* starpu_pthread_mutex_t;
			
 
				+typedef void* starpu_pthread_cond_t;
			
 
				+typedef void* starpu_pthread_barrier_t;
			
 
				+#endif /* _MSC_VER */
			
 
				+
			
 
				+/*
			
 
				+ * Simgrid-specific register/wait synchronization
			
 
				+ *
			
 
				+ * Producers create a "queue" object, and when they have produced something,
			
 
				+ * they call either queue_signal or queue_broadcast in order to wake either one
			
 
				+ * or all consumers waiting on the queue.
			
 
				+ *
			
 
				+ * starpu_pthread_queue_init(&global_queue1->queue);
			
 
				+ * while (1) {
			
 
				+ * 	element = compute();
			
 
				+ * 	push(element, global_queue1);
			
 
				+ * 	starpu_pthread_queue_signal(global_queue1);
			
 
				+ * }
			
 
				+ * starpu_pthread_queue_destroy(&global_queue1->queue);
			
 
				+ *
			
 
				+ * Consumers create a "wait" object, then queue_register on as many queues they
			
 
				+ * want. In their consumption loop, they wait_reset, then test for availibility
			
 
				+ * on all producers, and if none was available, call wait_wait to actually wait
			
 
				+ * for producers. On termination, consumers have to queue_unregister before
			
 
				+ * destroying the "wait" object:
			
 
				+ *
			
 
				+ * starpu_pthread_wait_t wait;
			
 
				+ *
			
 
				+ * starpu_pthread_wait_init(&wait);
			
 
				+ * starpu_pthread_queue_register(&wait, &global_queue1->queue);
			
 
				+ * starpu_pthread_queue_register(&wait, &global_queue2->queue);
			
 
				+ *
			
 
				+ * while (1) {
			
 
				+ * 	int sleep = 1;
			
 
				+ * 	starpu_pthread_wait_reset(&wait);
			
 
				+ * 	if (global_queue1->navailable)
			
 
				+ * 	{
			
 
				+ * 		work(global_queue1);
			
 
				+ * 		sleep = 0;
			
 
				+ * 	}
			
 
				+ * 	if (global_queue2->navailable)
			
 
				+ * 	{
			
 
				+ * 		work(global_queue2);
			
 
				+ * 		sleep = 0;
			
 
				+ * 	}
			
 
				+ * 	if (sleep)
			
 
				+ * 		starpu_pthread_wait_wait(&wait);
			
 
				+ * }
			
 
				+ * starpu_pthread_queue_unregister(&wait, &global_queue1->queue);
			
 
				+ * starpu_pthread_queue_unregister(&wait, &global_queue2->queue);
			
 
				+ * starpu_pthread_wait_destroy(&wait);
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+typedef struct
			
 
				+{
			
 
				+	starpu_pthread_mutex_t mutex;
			
 
				+	starpu_pthread_cond_t cond;
			
 
				+	unsigned block;
			
 
				+} starpu_pthread_wait_t;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	starpu_pthread_mutex_t mutex;
			
 
				+	starpu_pthread_wait_t **queue;
			
 
				+	unsigned allocqueue;
			
 
				+	unsigned nqueue;
			
 
				+} starpu_pthread_queue_t;
			
 
				+
			
 
				+int starpu_pthread_queue_init(starpu_pthread_queue_t *q);
			
 
				+int starpu_pthread_queue_signal(starpu_pthread_queue_t *q);
			
 
				+int starpu_pthread_queue_broadcast(starpu_pthread_queue_t *q);
			
 
				+int starpu_pthread_queue_destroy(starpu_pthread_queue_t *q);
			
 
				+
			
 
				+int starpu_pthread_wait_init(starpu_pthread_wait_t *w);
			
 
				+int starpu_pthread_queue_register(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q);
			
 
				+int starpu_pthread_queue_unregister(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q);
			
 
				+int starpu_pthread_wait_reset(starpu_pthread_wait_t *w);
			
 
				+int starpu_pthread_wait_wait(starpu_pthread_wait_t *w);
			
 
				+int starpu_pthread_wait_timedwait(starpu_pthread_wait_t *w, const struct timespec *abstime);
			
 
				+int starpu_pthread_wait_destroy(starpu_pthread_wait_t *w);
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the semaphore functions.
			
 
				+ */
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H
			
 
				+typedef sg_sem_t starpu_sem_t;
			
 
				+#else
			
 
				+typedef msg_sem_t starpu_sem_t;
			
 
				+#endif
			
 
				+int starpu_sem_destroy(starpu_sem_t *);
			
 
				+int starpu_sem_getvalue(starpu_sem_t *, int *);
			
 
				+int starpu_sem_init(starpu_sem_t *, int, unsigned);
			
 
				+int starpu_sem_post(starpu_sem_t *);
			
 
				+int starpu_sem_trywait(starpu_sem_t *);
			
 
				+int starpu_sem_wait(starpu_sem_t *);
			
 
				+
			
 
				+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
			
 
				+
			
 
				+typedef sem_t starpu_sem_t;
			
 
				+#define starpu_sem_destroy sem_destroy
			
 
				+#define starpu_sem_getvalue sem_getvalue
			
 
				+#define starpu_sem_init sem_init
			
 
				+#define starpu_sem_post sem_post
			
 
				+int starpu_sem_trywait(starpu_sem_t *);
			
 
				+int starpu_sem_wait(starpu_sem_t *);
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_THREAD_H__ */
			
--- a/include/starpu/1.3/starpu_thread_util.h
+++ b/include/starpu/1.3/starpu_thread_util.h
@@ -0,0 +1,434 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+// The documentation for this file is in doc/doxygen/chapters/api/threads.doxy
			
 
				+
			
 
				+#ifndef __STARPU_THREAD_UTIL_H__
			
 
				+#define __STARPU_THREAD_UTIL_H__
			
 
				+
			
 
				+#include <starpu_util.h>
			
 
				+#include <starpu_thread.h>
			
 
				+#include <errno.h>
			
 
				+
			
 
				+#if !(defined(_MSC_VER) && !defined(BUILDING_STARPU))
			
 
				+/*
			
 
				+ * Encapsulation of the starpu_pthread_create_* functions.
			
 
				+ */
			
 
				+
			
 
				+#define STARPU_PTHREAD_CREATE_ON(name, thread, attr, routine, arg, where) do {		    		\
			
 
				+	int p_ret =  starpu_pthread_create_on((name), (thread), (attr), (routine), (arg), (where)); 	\
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0)) {								\
			
 
				+		fprintf(stderr,										\
			
 
				+			"%s:%d starpu_pthread_create_on: %s\n",						\
			
 
				+			__FILE__, __LINE__, strerror(p_ret));						\
			
 
				+		STARPU_ABORT();										\
			
 
				+	}												\
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_CREATE(thread, attr, routine, arg) do {		    	\
			
 
				+	int p_ret =  starpu_pthread_create((thread), (attr), (routine), (arg)); \
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0)) {					\
			
 
				+		fprintf(stderr,							\
			
 
				+			"%s:%d starpu_pthread_create: %s\n",			\
			
 
				+			__FILE__, __LINE__, strerror(p_ret));			\
			
 
				+		STARPU_ABORT();							\
			
 
				+	}									\
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_JOIN(thread, retval) do {		    	\
			
 
				+	int p_ret =  starpu_pthread_join((thread), (retval)); \
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0)) {					\
			
 
				+		fprintf(stderr,							\
			
 
				+			"%s:%d starpu_pthread_join: %s\n",			\
			
 
				+			__FILE__, __LINE__, strerror(p_ret));			\
			
 
				+		STARPU_ABORT();							\
			
 
				+	}									\
			
 
				+} while (0)
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the starpu_pthread_mutex_* functions.
			
 
				+ */
			
 
				+
			
 
				+#define _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                           \
			
 
				+	int p_ret = starpu_pthread_mutex_init((mutex), (attr));                \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_mutex_init: %s\n",               \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#ifdef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) do {                            \
			
 
				+	if (!attr)                                                             \
			
 
				+		memset(mutex, 0, sizeof(*mutex));                              \
			
 
				+	else                                                                   \
			
 
				+		_STARPU_PTHREAD_MUTEX_INIT(mutex, attr);                       \
			
 
				+} while (0)
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) do {                           \
			
 
				+	if (attr)                                                              \
			
 
				+		_STARPU_PTHREAD_MUTEX_INIT(mutex, attr);                       \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr)
			
 
				+#define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr)
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_DESTROY(mutex) do {                              \
			
 
				+	int p_ret = starpu_pthread_mutex_destroy(mutex);                       \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_mutex_destroy: %s\n",            \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while(0)
			
 
				+
			
 
				+#ifdef STARPU_DEBUG
			
 
				+#define _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line) \
			
 
				+	starpu_pthread_mutex_check_sched((mutex), file, line)
			
 
				+#else
			
 
				+#define _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line)
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_LOCK(mutex) do {				      \
			
 
				+	int p_ret = starpu_pthread_mutex_lock(mutex);			      \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_mutex_lock: %s\n",               \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+	_STARPU_CHECK_NOT_SCHED_MUTEX(mutex, __FILE__, __LINE__);                                  \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_LOCK_SCHED(mutex) do {			      \
			
 
				+	int p_ret = starpu_pthread_mutex_lock_sched(mutex);		      \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_mutex_lock_sched: %s\n",         \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_TRYLOCK(mutex) \
			
 
				+	_starpu_pthread_mutex_trylock(mutex, __FILE__, __LINE__)
			
 
				+static STARPU_INLINE
			
 
				+int _starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex, char *file, int line)
			
 
				+{
			
 
				+	int p_ret = starpu_pthread_mutex_trylock(mutex);
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) {
			
 
				+		fprintf(stderr,
			
 
				+			"%s:%d starpu_pthread_mutex_trylock: %s\n",
			
 
				+			file, line, strerror(p_ret));
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+	_STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line);
			
 
				+	return p_ret;
			
 
				+}
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(mutex) \
			
 
				+	_starpu_pthread_mutex_trylock_sched(mutex, __FILE__, __LINE__)
			
 
				+static STARPU_INLINE
			
 
				+int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *file, int line)
			
 
				+{
			
 
				+	int p_ret = starpu_pthread_mutex_trylock_sched(mutex);
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) {
			
 
				+		fprintf(stderr,
			
 
				+			"%s:%d starpu_pthread_mutex_trylock_sched: %s\n",
			
 
				+			file, line, strerror(p_ret));
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+	return p_ret;
			
 
				+}
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_UNLOCK(mutex) do {                               \
			
 
				+	_STARPU_CHECK_NOT_SCHED_MUTEX(mutex, __FILE__, __LINE__);              \
			
 
				+	int p_ret = starpu_pthread_mutex_unlock(mutex);                        \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_mutex_unlock: %s\n",             \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(mutex) do {                          \
			
 
				+	int p_ret = starpu_pthread_mutex_unlock_sched(mutex);                  \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_mutex_unlock_sched: %s\n",       \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the starpu_pthread_key_* functions.
			
 
				+ */
			
 
				+#define STARPU_PTHREAD_KEY_CREATE(key, destr) do {                            \
			
 
				+	int p_ret = starpu_pthread_key_create((key), (destr));	               \
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0)) {                                     \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_key_create: %s\n",               \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_KEY_DELETE(key) do {                                   \
			
 
				+	int p_ret = starpu_pthread_key_delete((key));	                       \
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0)) {                                     \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_key_delete: %s\n",               \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_SETSPECIFIC(key, ptr) do {                             \
			
 
				+	int p_ret = starpu_pthread_setspecific((key), (ptr));	               \
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0)) {                                     \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_setspecific: %s\n",              \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+	};                                                                     \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_GETSPECIFIC(key) starpu_pthread_getspecific((key))
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the starpu_pthread_rwlock_* functions.
			
 
				+ */
			
 
				+#define _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                         \
			
 
				+	int p_ret = starpu_pthread_rwlock_init((rwlock), (attr));              \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_rwlock_init: %s\n",              \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#ifdef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) do {                            \
			
 
				+	if (!attr)                                                             \
			
 
				+		memset(rwlock, 0, sizeof(*rwlock));                              \
			
 
				+	else                                                                   \
			
 
				+		_STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr);                       \
			
 
				+} while (0)
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) do {                           \
			
 
				+	if (attr)                                                              \
			
 
				+		_STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr);                       \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr)
			
 
				+#define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr)
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) do {                              \
			
 
				+	int p_ret = starpu_pthread_rwlock_rdlock(rwlock);                      \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_rwlock_rdlock: %s\n",            \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_RWLOCK_TRYRDLOCK(rwlock) \
			
 
				+	_starpu_pthread_rwlock_tryrdlock(rwlock, __FILE__, __LINE__)
			
 
				+static STARPU_INLINE
			
 
				+int _starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock, char *file, int line)
			
 
				+{
			
 
				+	int p_ret = starpu_pthread_rwlock_tryrdlock(rwlock);
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) {
			
 
				+		fprintf(stderr,
			
 
				+			"%s:%d starpu_pthread_rwlock_tryrdlock: %s\n",
			
 
				+			file, line, strerror(p_ret));
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+	return p_ret;
			
 
				+}
			
 
				+
			
 
				+#define STARPU_PTHREAD_RWLOCK_WRLOCK(rwlock) do {                              \
			
 
				+	int p_ret = starpu_pthread_rwlock_wrlock(rwlock);                      \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_rwlock_wrlock: %s\n",            \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_RWLOCK_TRYWRLOCK(rwlock) \
			
 
				+	_starpu_pthread_rwlock_trywrlock(rwlock, __FILE__, __LINE__)
			
 
				+static STARPU_INLINE
			
 
				+int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file, int line)
			
 
				+{
			
 
				+	int p_ret = starpu_pthread_rwlock_trywrlock(rwlock);
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) {
			
 
				+		fprintf(stderr,
			
 
				+			"%s:%d starpu_pthread_rwlock_trywrlock: %s\n",
			
 
				+			file, line, strerror(p_ret));
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+	return p_ret;
			
 
				+}
			
 
				+
			
 
				+#define STARPU_PTHREAD_RWLOCK_UNLOCK(rwlock) do {                              \
			
 
				+	int p_ret = starpu_pthread_rwlock_unlock(rwlock);                      \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_rwlock_unlock: %s\n",            \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_RWLOCK_DESTROY(rwlock) do {                            \
			
 
				+	int p_ret = starpu_pthread_rwlock_destroy(rwlock);                     \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_rwlock_destroy: %s\n",           \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the starpu_pthread_cond_* functions.
			
 
				+ */
			
 
				+#define _STARPU_PTHREAD_COND_INIT(cond, attr) do {                             \
			
 
				+	int p_ret = starpu_pthread_cond_init((cond), (attr));                  \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_cond_init: %s\n",                \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#ifdef STARPU_PTHREAD_COND_INITIALIZER_ZERO
			
 
				+#define STARPU_PTHREAD_COND_INIT(cond, attr) do {                            \
			
 
				+	if (!attr)                                                             \
			
 
				+		memset(cond, 0, sizeof(*cond));                              \
			
 
				+	else                                                                   \
			
 
				+		_STARPU_PTHREAD_COND_INIT(cond, attr);                       \
			
 
				+} while (0)
			
 
				+#define STARPU_PTHREAD_COND_INIT0(cond, attr) do {                           \
			
 
				+	if (attr)                                                              \
			
 
				+		_STARPU_PTHREAD_COND_INIT(cond, attr);                       \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#define STARPU_PTHREAD_COND_INIT(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr)
			
 
				+#define STARPU_PTHREAD_COND_INIT0(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr)
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_PTHREAD_COND_DESTROY(cond) do {                                \
			
 
				+	int p_ret = starpu_pthread_cond_destroy(cond);                         \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_cond_destroy: %s\n",             \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+			STARPU_ABORT();                                        \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_COND_SIGNAL(cond) do {                                 \
			
 
				+	int p_ret = starpu_pthread_cond_signal(cond);                          \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_cond_signal: %s\n",              \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_COND_BROADCAST(cond) do {                              \
			
 
				+	int p_ret = starpu_pthread_cond_broadcast(cond);                       \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_cond_broadcast: %s\n",           \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_COND_WAIT(cond, mutex) do {                            \
			
 
				+	int p_ret = starpu_pthread_cond_wait((cond), (mutex));                 \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_cond_wait: %s\n",                \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+/* pthread_cond_timedwait not yet available on windows, but we don't run simgrid there anyway */
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+#define STARPU_PTHREAD_COND_TIMEDWAIT(cond, mutex, abstime) \
			
 
				+	_starpu_pthread_cond_timedwait(cond, mutex, abstime, __FILE__, __LINE__)
			
 
				+static STARPU_INLINE
			
 
				+int _starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime, char *file, int line)
			
 
				+{
			
 
				+	int p_ret = starpu_pthread_cond_timedwait(cond, mutex, abstime);
			
 
				+	if (STARPU_UNLIKELY(p_ret != 0 && p_ret != ETIMEDOUT)) {
			
 
				+		fprintf(stderr,
			
 
				+			"%s:%d starpu_pthread_cond_timedwait: %s\n",
			
 
				+			file, line, strerror(p_ret));
			
 
				+		STARPU_ABORT();
			
 
				+	}
			
 
				+	return p_ret;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Encapsulation of the starpu_pthread_barrier_* functions.
			
 
				+ */
			
 
				+
			
 
				+#define STARPU_PTHREAD_BARRIER_INIT(barrier, attr, count) do {                \
			
 
				+	int p_ret = starpu_pthread_barrier_init((barrier), (attr), (count));          \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_barrier_init: %s\n",                    \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_BARRIER_DESTROY(barrier) do {                          \
			
 
				+	int p_ret = starpu_pthread_barrier_destroy((barrier));                        \
			
 
				+	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_barrier_destroy: %s\n",                 \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+		STARPU_ABORT();                                                \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+
			
 
				+#define STARPU_PTHREAD_BARRIER_WAIT(barrier) do {                             	\
			
 
				+	int p_ret = starpu_pthread_barrier_wait((barrier));				\
			
 
				+	if (STARPU_UNLIKELY(!((p_ret == 0) || (p_ret == STARPU_PTHREAD_BARRIER_SERIAL_THREAD)))) { \
			
 
				+		fprintf(stderr,                                                \
			
 
				+			"%s:%d starpu_pthread_barrier_wait: %s\n",                    \
			
 
				+			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				+			STARPU_ABORT();                                        \
			
 
				+	}                                                                      \
			
 
				+} while (0)
			
 
				+#endif /* _MSC_VER */
			
 
				+
			
 
				+#endif /* __STARPU_THREAD_UTIL_H__ */
			
--- a/include/starpu/1.3/starpu_tree.h
+++ b/include/starpu/1.3/starpu_tree.h
@@ -0,0 +1,58 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_TREE_H__
			
 
				+#define __STARPU_TREE_H__
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Tree Tree
			
 
				+   @brief This section describes the tree facilities provided by StarPU.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+struct starpu_tree
			
 
				+{
			
 
				+	struct starpu_tree *nodes;
			
 
				+	struct starpu_tree *father;
			
 
				+	int arity;
			
 
				+	int id;
			
 
				+	int level;
			
 
				+	int is_pu;
			
 
				+};
			
 
				+
			
 
				+void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited);
			
 
				+
			
 
				+void starpu_tree_prepare_children(unsigned arity, struct starpu_tree *father);
			
 
				+void starpu_tree_insert(struct starpu_tree *tree, int id, int level, int is_pu, int arity, struct starpu_tree *father);
			
 
				+
			
 
				+struct starpu_tree *starpu_tree_get(struct starpu_tree *tree, int id);
			
 
				+
			
 
				+struct starpu_tree *starpu_tree_get_neighbour(struct starpu_tree *tree, struct starpu_tree *node, char *visited, char *present);
			
 
				+
			
 
				+void starpu_tree_free(struct starpu_tree *tree);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_TREE_H__ */
			
--- a/include/starpu/1.3/starpu_util.h
+++ b/include/starpu/1.3/starpu_util.h
@@ -0,0 +1,648 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2008-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_UTIL_H__
			
 
				+#define __STARPU_UTIL_H__
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <stdint.h>
			
 
				+#include <string.h>
			
 
				+#include <assert.h>
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+#ifdef __GLIBC__
			
 
				+#include <execinfo.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID_MC
			
 
				+#include <simgrid/modelchecker.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Toolbox Toolbox
			
 
				+   @brief The following macros allow to make GCC extensions portable,
			
 
				+   and to have a code which can be compiled with any C compiler.
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return true (non-zero) if GCC version \p maj.\p min or later is
			
 
				+   being used (macro taken from glibc.)
			
 
				+*/
			
 
				+#if defined __GNUC__ && defined __GNUC_MINOR__
			
 
				+# define STARPU_GNUC_PREREQ(maj, min) \
			
 
				+	((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
			
 
				+#else
			
 
				+# define STARPU_GNUC_PREREQ(maj, min) 0
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, allow programmers to mark an
			
 
				+   expression as unlikely.
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_UNLIKELY(expr)          (__builtin_expect(!!(expr),0))
			
 
				+#else
			
 
				+#  define STARPU_UNLIKELY(expr)          (expr)
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, allow programmers to mark an
			
 
				+   expression as likely.
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_LIKELY(expr)            (__builtin_expect(!!(expr),1))
			
 
				+#else
			
 
				+#  define STARPU_LIKELY(expr)            (expr)
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to __attribute__((unused))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_UNUSED                  __attribute__((unused))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_UNUSED
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to __attribute__((noreturn))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_NORETURN                  __attribute__((noreturn))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_NORETURN
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to __attribute__((visibility ("internal")))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_INTERNAL      __attribute__ ((visibility ("internal")))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_INTERNAL
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to __attribute__((malloc))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_MALLOC                  __attribute__((malloc))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_MALLOC
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to __attribute__((warn_unused_result))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT      __attribute__((warn_unused_result))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to  __attribute__((pure))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_PURE                    __attribute__((pure))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_PURE
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   When building with a GNU C Compiler, defined to__attribute__((aligned(size)))
			
 
				+*/
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_ALIGNED(size)           __attribute__((aligned(size)))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_ALIGNED(size)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __GNUC__
			
 
				+#  define STARPU_ATTRIBUTE_FORMAT(type, string, first)                  __attribute__((format(type, string, first)))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_FORMAT(type, string, first)
			
 
				+#endif
			
 
				+
			
 
				+/* Note that if we're compiling C++, then just use the "inline"
			
 
				+   keyword, since it's part of C++ */
			
 
				+#if defined(c_plusplus) || defined(__cplusplus)
			
 
				+#  define STARPU_INLINE inline
			
 
				+#elif defined(_MSC_VER) || defined(__HP_cc)
			
 
				+#  define STARPU_INLINE __inline
			
 
				+#else
			
 
				+#  define STARPU_INLINE __inline__
			
 
				+#endif
			
 
				+
			
 
				+#if STARPU_GNUC_PREREQ(4, 3)
			
 
				+#  define STARPU_ATTRIBUTE_CALLOC_SIZE(num,size)   __attribute__((alloc_size(num,size)))
			
 
				+#  define STARPU_ATTRIBUTE_ALLOC_SIZE(size)        __attribute__((alloc_size(size)))
			
 
				+#else
			
 
				+#  define STARPU_ATTRIBUTE_CALLOC_SIZE(num,size)
			
 
				+#  define STARPU_ATTRIBUTE_ALLOC_SIZE(size)
			
 
				+#endif
			
 
				+
			
 
				+#if STARPU_GNUC_PREREQ(3, 1) && !defined(BUILDING_STARPU) && !defined(STARPU_USE_DEPRECATED_API) && !defined(STARPU_USE_DEPRECATED_ONE_ZERO_API)
			
 
				+#define STARPU_DEPRECATED  __attribute__((__deprecated__))
			
 
				+#else
			
 
				+#define STARPU_DEPRECATED
			
 
				+#endif /* __GNUC__ */
			
 
				+
			
 
				+#if STARPU_GNUC_PREREQ(3,3)
			
 
				+#define STARPU_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
			
 
				+#else
			
 
				+#define STARPU_WARN_UNUSED_RESULT
			
 
				+#endif /* __GNUC__ */
			
 
				+
			
 
				+#define STARPU_BACKTRACE_LENGTH	32
			
 
				+#ifdef __GLIBC__
			
 
				+#  define STARPU_DUMP_BACKTRACE() do { \
			
 
				+	void *__ptrs[STARPU_BACKTRACE_LENGTH]; \
			
 
				+	int __n = backtrace(__ptrs, STARPU_BACKTRACE_LENGTH); \
			
 
				+	backtrace_symbols_fd(__ptrs, __n, 2); \
			
 
				+} while (0)
			
 
				+#else
			
 
				+#  define STARPU_DUMP_BACKTRACE() do { } while (0)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID_MC
			
 
				+#define STARPU_SIMGRID_ASSERT(x) MC_assert(!!(x))
			
 
				+#else
			
 
				+#define STARPU_SIMGRID_ASSERT(x)
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Unless StarPU has been configured with the option \ref enable-fast
			
 
				+   "--enable-fast", this macro will abort if the expression \p x is false.
			
 
				+*/
			
 
				+#ifdef STARPU_NO_ASSERT
			
 
				+#define STARPU_ASSERT(x)		do { if (0) { (void) (x); } } while(0)
			
 
				+#else
			
 
				+#  if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS)
			
 
				+#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; } } while(0)
			
 
				+#  else
			
 
				+#    define STARPU_ASSERT(x)		do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); STARPU_SIMGRID_ASSERT(x); assert(x); } } while (0)
			
 
				+#  endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_NO_ASSERT
			
 
				+#define STARPU_ASSERT_ACCESSIBLE(x)	do { if (0) { (void) (x); } } while(0)
			
 
				+#else
			
 
				+#define STARPU_ASSERT_ACCESSIBLE(ptr)	do { volatile char __c STARPU_ATTRIBUTE_UNUSED = *(char*) (ptr); } while(0)
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Unless StarPU has been configured with the option \ref enable-fast
			
 
				+   "--enable-fast", this macro will abort if the expression \p x is false.
			
 
				+   The string \p msg will be displayed.
			
 
				+*/
			
 
				+#ifdef STARPU_NO_ASSERT
			
 
				+#define STARPU_ASSERT_MSG(x, msg, ...)	do { if (0) { (void) (x); (void) msg; } } while(0)
			
 
				+#else
			
 
				+#  if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS)
			
 
				+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); STARPU_SIMGRID_ASSERT(x); *(int*)NULL = 0; }} while(0)
			
 
				+#  else
			
 
				+#    define STARPU_ASSERT_MSG(x, msg, ...)	do { if (STARPU_UNLIKELY(!(x))) { STARPU_DUMP_BACKTRACE(); fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ## __VA_ARGS__); STARPU_SIMGRID_ASSERT(x); assert(x); } } while(0)
			
 
				+#  endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __APPLE_CC__
			
 
				+#  ifdef __clang_analyzer__
			
 
				+#    define _starpu_abort() exit(42)
			
 
				+#  else
			
 
				+#    define _starpu_abort() *(volatile int*)NULL = 0
			
 
				+#  endif
			
 
				+#else
			
 
				+#  define _starpu_abort() abort()
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Abort the program.
			
 
				+*/
			
 
				+#define STARPU_ABORT() do {                                          \
			
 
				+	STARPU_DUMP_BACKTRACE();                                     \
			
 
				+        fprintf(stderr, "[starpu][abort][%s()@%s:%d]\n", __starpu_func__, __FILE__, __LINE__); \
			
 
				+	_starpu_abort();				\
			
 
				+} while(0)
			
 
				+
			
 
				+/**
			
 
				+   Print the string '[starpu][abort][name of the calling function:name
			
 
				+   of the file:line in the file]' followed by the given string \p msg
			
 
				+   and abort the program
			
 
				+*/
			
 
				+#define STARPU_ABORT_MSG(msg, ...) do {					\
			
 
				+	STARPU_DUMP_BACKTRACE();                                        \
			
 
				+	fprintf(stderr, "[starpu][abort][%s()@%s:%d] " msg "\n", __starpu_func__, __FILE__, __LINE__, ## __VA_ARGS__); \
			
 
				+	_starpu_abort();				\
			
 
				+} while(0)
			
 
				+
			
 
				+#if defined(STARPU_HAVE_STRERROR_R)
			
 
				+#if (! defined(__GLIBC__) || !__GLIBC__) || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && (! defined(_GNU_SOURCE)))
			
 
				+/* XSI-compliant version of strerror_r returns an int */
			
 
				+#       define starpu_strerror_r(errnum, buf, buflen) \
			
 
				+	do \
			
 
				+	{ \
			
 
				+		int _ret = strerror_r((errnum), (buf), (buflen)); \
			
 
				+		STARPU_ASSERT(_ret == 0); \
			
 
				+	} \
			
 
				+	while (0)
			
 
				+#else
			
 
				+/* GNU-specific version of strerror_r returns a char * */
			
 
				+#       define starpu_strerror_r(errnum, buf, buflen) \
			
 
				+	do \
			
 
				+	{ \
			
 
				+		char * const _user_buf = (buf); \
			
 
				+		const size_t _user_buflen = (buflen); \
			
 
				+		/* the GNU-specific behaviour when 'buf' == NULL cannot be emulated with the XSI-compliant version */ \
			
 
				+		STARPU_ASSERT((buf) != NULL); \
			
 
				+		char * _tmp_buf = strerror_r((errnum), _user_buf, _user_buflen); \
			
 
				+		if (_tmp_buf != _user_buf) \
			
 
				+		{ \
			
 
				+			if (_user_buflen > 0) \
			
 
				+			{ \
			
 
				+				strncpy(_user_buf, _tmp_buf, _user_buflen-1); \
			
 
				+				_user_buf[_user_buflen-1] = '\0'; \
			
 
				+			} \
			
 
				+		} \
			
 
				+	} \
			
 
				+	while (0)
			
 
				+#endif /* strerror_r ABI version */
			
 
				+#endif  /* STARPU_HAVE_STRERROR_R */
			
 
				+
			
 
				+/**
			
 
				+   Abort the program (after displaying \p message) if \p err has a
			
 
				+   value which is not 0.
			
 
				+*/
			
 
				+#if defined(STARPU_HAVE_STRERROR_R)
			
 
				+#  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
			
 
				+			char xmessage[256]; starpu_strerror_r(-err, xmessage, 256); \
			
 
				+			fprintf(stderr, "[starpu] Unexpected value: <%d:%s> returned for " message "\n", err, xmessage, ## __VA_ARGS__); \
			
 
				+			STARPU_ABORT(); }}
			
 
				+#else
			
 
				+#  define STARPU_CHECK_RETURN_VALUE(err, message, ...) {if (STARPU_UNLIKELY(err != 0)) { \
			
 
				+			fprintf(stderr, "[starpu] Unexpected value: <%d> returned for " message "\n", err, ## __VA_ARGS__); \
			
 
				+			STARPU_ABORT(); }}
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   Abort the program (after displaying \p message) if \p err is
			
 
				+   different from \p value.
			
 
				+*/
			
 
				+#if defined(STARPU_HAVE_STRERROR_R)
			
 
				+#  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) {if (STARPU_UNLIKELY(err != value)) { \
			
 
				+			char xmessage[256]; starpu_strerror_r(-err, xmessage, 256); \
			
 
				+			fprintf(stderr, "[starpu] Unexpected value: <%d!=%d:%s> returned for " message "\n", err, value, xmessage, ## __VA_ARGS__); \
			
 
				+			STARPU_ABORT(); }}
			
 
				+#else
			
 
				+#  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) {if (STARPU_UNLIKELY(err != value)) { \
			
 
				+	       		fprintf(stderr, "[starpu] Unexpected value: <%d != %d> returned for " message "\n", err, value, ## __VA_ARGS__); \
			
 
				+			STARPU_ABORT(); }}
			
 
				+#endif
			
 
				+
			
 
				+/* Note: do not use _starpu_cmpxchg / _starpu_xchg / _starpu_cmpxchgl /
			
 
				+ * _starpu_xchgl / _starpu_cmpxchg64 / _starpu_xchg64, which only
			
 
				+ * assembly-hand-written fallbacks used when building with an old gcc.
			
 
				+ * Rather use STARPU_VAL_COMPARE_AND_SWAP and STARPU_VAL_EXCHANGE available on
			
 
				+ * all platforms with a recent-enough gcc */
			
 
				+
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+static __starpu_inline unsigned _starpu_cmpxchg(unsigned *ptr, unsigned old, unsigned next)
			
 
				+{
			
 
				+	__asm__ __volatile__("lock cmpxchgl %2,%1": "+a" (old), "+m" (*ptr) : "q" (next) : "memory");
			
 
				+	return old;
			
 
				+}
			
 
				+#define STARPU_HAVE_CMPXCHG
			
 
				+static __starpu_inline unsigned _starpu_xchg(unsigned *ptr, unsigned next)
			
 
				+{
			
 
				+	/* Note: xchg is always locked already */
			
 
				+	__asm__ __volatile__("xchgl %1,%0": "+m" (*ptr), "+q" (next) : : "memory");
			
 
				+	return next;
			
 
				+}
			
 
				+#define STARPU_HAVE_XCHG
			
 
				+
			
 
				+static __starpu_inline uint32_t _starpu_cmpxchg32(uint32_t *ptr, uint32_t old, uint32_t next)
			
 
				+{
			
 
				+	__asm__ __volatile__("lock cmpxchgl %2,%1": "+a" (old), "+m" (*ptr) : "q" (next) : "memory");
			
 
				+	return old;
			
 
				+}
			
 
				+#define STARPU_HAVE_CMPXCHG32
			
 
				+static __starpu_inline uint32_t _starpu_xchg32(uint32_t *ptr, uint32_t next)
			
 
				+{
			
 
				+	/* Note: xchg is always locked already */
			
 
				+	__asm__ __volatile__("xchgl %1,%0": "+m" (*ptr), "+q" (next) : : "memory");
			
 
				+	return next;
			
 
				+}
			
 
				+#define STARPU_HAVE_XCHG32
			
 
				+
			
 
				+#if defined(__i386__)
			
 
				+static __starpu_inline unsigned long _starpu_cmpxchgl(unsigned long *ptr, unsigned long old, unsigned long next)
			
 
				+{
			
 
				+	__asm__ __volatile__("lock cmpxchgl %2,%1": "+a" (old), "+m" (*ptr) : "q" (next) : "memory");
			
 
				+	return old;
			
 
				+}
			
 
				+#define STARPU_HAVE_CMPXCHGL
			
 
				+static __starpu_inline unsigned long _starpu_xchgl(unsigned long *ptr, unsigned long next)
			
 
				+{
			
 
				+	/* Note: xchg is always locked already */
			
 
				+	__asm__ __volatile__("xchgl %1,%0": "+m" (*ptr), "+q" (next) : : "memory");
			
 
				+	return next;
			
 
				+}
			
 
				+#define STARPU_HAVE_XCHGL
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__x86_64__)
			
 
				+static __starpu_inline unsigned long _starpu_cmpxchgl(unsigned long *ptr, unsigned long old, unsigned long next)
			
 
				+{
			
 
				+	__asm__ __volatile__("lock cmpxchgq %2,%1": "+a" (old), "+m" (*ptr) : "q" (next) : "memory");
			
 
				+	return old;
			
 
				+}
			
 
				+#define STARPU_HAVE_CMPXCHGL
			
 
				+static __starpu_inline unsigned long _starpu_xchgl(unsigned long *ptr, unsigned long next)
			
 
				+{
			
 
				+	/* Note: xchg is always locked already */
			
 
				+	__asm__ __volatile__("xchgq %1,%0": "+m" (*ptr), "+q" (next) : : "memory");
			
 
				+	return next;
			
 
				+}
			
 
				+#define STARPU_HAVE_XCHGL
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__i386__)
			
 
				+static __starpu_inline uint64_t _starpu_cmpxchg64(uint64_t *ptr, uint64_t old, uint64_t next)
			
 
				+{
			
 
				+	uint32_t next_hi = next >> 32;
			
 
				+	uint32_t next_lo = next & 0xfffffffful;
			
 
				+	__asm__ __volatile__("lock cmpxchg8b %1": "+A" (old), "+m" (*ptr) : "c" (next_hi), "b" (next_lo) : "memory");
			
 
				+	return old;
			
 
				+}
			
 
				+#define STARPU_HAVE_CMPXCHG64
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__x86_64__)
			
 
				+static __starpu_inline uint64_t _starpu_cmpxchg64(uint64_t *ptr, uint64_t old, uint64_t next)
			
 
				+{
			
 
				+	__asm__ __volatile__("lock cmpxchgq %2,%1": "+a" (old), "+m" (*ptr) : "q" (next) : "memory");
			
 
				+	return old;
			
 
				+}
			
 
				+#define STARPU_HAVE_CMPXCHG64
			
 
				+static __starpu_inline uint64_t _starpu_xchg64(uint64_t *ptr, uint64_t next)
			
 
				+{
			
 
				+	/* Note: xchg is always locked already */
			
 
				+	__asm__ __volatile__("xchgq %1,%0": "+m" (*ptr), "+q" (next) : : "memory");
			
 
				+	return next;
			
 
				+}
			
 
				+#define STARPU_HAVE_XCHG64
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#define STARPU_ATOMIC_SOMETHING(name,expr) \
			
 
				+static __starpu_inline unsigned starpu_atomic_##name(unsigned *ptr, unsigned value) \
			
 
				+{ \
			
 
				+	unsigned old, next; \
			
 
				+	while (1) \
			
 
				+	{ \
			
 
				+		old = *ptr; \
			
 
				+		next = expr; \
			
 
				+		if (_starpu_cmpxchg(ptr, old, next) == old) \
			
 
				+			break; \
			
 
				+	}; \
			
 
				+	return expr; \
			
 
				+}
			
 
				+#define STARPU_ATOMIC_SOMETHINGL(name,expr) \
			
 
				+static __starpu_inline unsigned long starpu_atomic_##name##l(unsigned long *ptr, unsigned long value) \
			
 
				+{ \
			
 
				+	unsigned long old, next; \
			
 
				+	while (1) \
			
 
				+	{ \
			
 
				+		old = *ptr; \
			
 
				+		next = expr; \
			
 
				+		if (_starpu_cmpxchgl(ptr, old, next) == old) \
			
 
				+			break; \
			
 
				+	}; \
			
 
				+	return expr; \
			
 
				+}
			
 
				+#define STARPU_ATOMIC_SOMETHING64(name,expr) \
			
 
				+static __starpu_inline uint64_t starpu_atomic_##name##64(uint64_t *ptr, uint64_t value) \
			
 
				+{ \
			
 
				+	uint64_t old, next; \
			
 
				+	while (1) \
			
 
				+	{ \
			
 
				+		old = *ptr; \
			
 
				+		next = expr; \
			
 
				+		if (_starpu_cmpxchg64(ptr, old, next) == old) \
			
 
				+			break; \
			
 
				+	}; \
			
 
				+	return expr; \
			
 
				+}
			
 
				+
			
 
				+/* Returns the new value */
			
 
				+#ifdef STARPU_HAVE_SYNC_FETCH_AND_ADD
			
 
				+#define STARPU_ATOMIC_ADD(ptr, value)  (__sync_fetch_and_add ((ptr), (value)) + (value))
			
 
				+#define STARPU_ATOMIC_ADDL(ptr, value)  (__sync_fetch_and_add ((ptr), (value)) + (value))
			
 
				+#define STARPU_ATOMIC_ADD64(ptr, value)  (__sync_fetch_and_add ((ptr), (value)) + (value))
			
 
				+#else
			
 
				+#if defined(STARPU_HAVE_CMPXCHG)
			
 
				+STARPU_ATOMIC_SOMETHING(add, old + value)
			
 
				+#define STARPU_ATOMIC_ADD(ptr, value) starpu_atomic_add(ptr, value)
			
 
				+#endif
			
 
				+#if defined(STARPU_HAVE_CMPXCHGL)
			
 
				+STARPU_ATOMIC_SOMETHINGL(add, old + value)
			
 
				+#define STARPU_ATOMIC_ADDL(ptr, value) starpu_atomic_addl(ptr, value)
			
 
				+#endif
			
 
				+#if defined(STARPU_HAVE_CMPXCHG64)
			
 
				+STARPU_ATOMIC_SOMETHING64(add, old + value)
			
 
				+#define STARPU_ATOMIC_ADD64(ptr, value) starpu_atomic_add64(ptr, value)
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SYNC_FETCH_AND_OR
			
 
				+#define STARPU_ATOMIC_OR(ptr, value)  (__sync_fetch_and_or ((ptr), (value)))
			
 
				+#define STARPU_ATOMIC_ORL(ptr, value)  (__sync_fetch_and_or ((ptr), (value)))
			
 
				+#define STARPU_ATOMIC_OR64(ptr, value)  (__sync_fetch_and_or ((ptr), (value)))
			
 
				+#else
			
 
				+#if defined(STARPU_HAVE_CMPXCHG)
			
 
				+STARPU_ATOMIC_SOMETHING(or, old | value)
			
 
				+#define STARPU_ATOMIC_OR(ptr, value) starpu_atomic_or(ptr, value)
			
 
				+#endif
			
 
				+#if defined(STARPU_HAVE_CMPXCHGL)
			
 
				+STARPU_ATOMIC_SOMETHINGL(or, old | value)
			
 
				+#define STARPU_ATOMIC_ORL(ptr, value) starpu_atomic_orl(ptr, value)
			
 
				+#endif
			
 
				+#if defined(STARPU_HAVE_CMPXCHG64)
			
 
				+STARPU_ATOMIC_SOMETHING64(or, old | value)
			
 
				+#define STARPU_ATOMIC_OR64(ptr, value) starpu_atomic_or64(ptr, value)
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP
			
 
				+#define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value)  (__sync_bool_compare_and_swap ((ptr), (old), (value)))
			
 
				+#define STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value)
			
 
				+#define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value)
			
 
				+#else
			
 
				+#ifdef STARPU_HAVE_CMPXCHG
			
 
				+#define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value) (_starpu_cmpxchg((ptr), (old), (value)) == (old))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_CMPXCHG32
			
 
				+#define STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) (_starpu_cmpxchg32((ptr), (old), (value)) == (old))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_CMPXCHG64
			
 
				+#define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) (_starpu_cmpxchg64((ptr), (old), (value)) == (old))
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP
			
 
				+#define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value)  (__sync_val_compare_and_swap ((ptr), (old), (value)))
			
 
				+#define STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value)
			
 
				+#define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value)
			
 
				+#else
			
 
				+#ifdef STARPU_HAVE_CMPXCHG
			
 
				+#define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (_starpu_cmpxchg((ptr), (old), (value)))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_CMPXCHG32
			
 
				+#define STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) (_starpu_cmpxchg32((ptr), (old), (value)))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_CMPXCHG64
			
 
				+#define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) (_starpu_cmpxchg64((ptr), (old), (value)))
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_HAVE_ATOMIC_EXCHANGE_N
			
 
				+#define STARPU_VAL_EXCHANGE(ptr, value) (__atomic_exchange_n((ptr), (value), __ATOMIC_SEQ_CST))
			
 
				+#define STARPU_VAL_EXCHANGEL(ptr, value) STARPU_VAL_EXCHANGE((ptr) (value))
			
 
				+#define STARPU_VAL_EXCHANGE32(ptr, value) STARPU_VAL_EXCHANGE((ptr) (value))
			
 
				+#define STARPU_VAL_EXCHANGE64(ptr, value) STARPU_VAL_EXCHANGE((ptr) (value))
			
 
				+#else
			
 
				+#ifdef STARPU_HAVE_XCHG
			
 
				+#define STARPU_VAL_EXCHANGE(ptr, value) (_starpu_xchg((ptr), (value)))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_XCHGL
			
 
				+#define STARPU_VAL_EXCHANGEL(ptr, value) (_starpu_xchgl((ptr), (value)))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_XCHG32
			
 
				+#define STARPU_VAL_EXCHANGE32(ptr, value) (_starpu_xchg32((ptr), (value)))
			
 
				+#endif
			
 
				+#ifdef STARPU_HAVE_XCHG64
			
 
				+#define STARPU_VAL_EXCHANGE64(ptr, value) (_starpu_xchg64((ptr), (value)))
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+/* Returns the previous value */
			
 
				+#ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET
			
 
				+#define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set ((ptr), (value)))
			
 
				+#define STARPU_RELEASE(ptr) (__sync_lock_release ((ptr)))
			
 
				+#elif defined(STARPU_HAVE_XCHG)
			
 
				+#define STARPU_TEST_AND_SET(ptr, value) (_starpu_xchg((ptr), (value)))
			
 
				+#define STARPU_RELEASE(ptr) (_starpu_xchg((ptr), 0))
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_HAVE_SYNC_SYNCHRONIZE
			
 
				+#define STARPU_SYNCHRONIZE() __sync_synchronize()
			
 
				+#elif defined(__i386__)
			
 
				+#define STARPU_SYNCHRONIZE() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
			
 
				+#elif defined(__KNC__) || defined(__KNF__)
			
 
				+#define STARPU_SYNCHRONIZE() __asm__ __volatile__("lock; addl $0,0(%%rsp)" ::: "memory")
			
 
				+#elif defined(__x86_64__)
			
 
				+#define STARPU_SYNCHRONIZE() __asm__ __volatile__("mfence" ::: "memory")
			
 
				+#elif defined(__ppc__) || defined(__ppc64__)
			
 
				+#define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" ::: "memory")
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   This macro can be used to do a synchronization.
			
 
				+*/
			
 
				+#if defined(__i386__)
			
 
				+#define STARPU_RMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
			
 
				+#elif defined(__KNC__) || defined(__KNF__)
			
 
				+#define STARPU_RMB() __asm__ __volatile__("lock; addl $0,0(%%rsp)" ::: "memory")
			
 
				+#elif defined(__x86_64__)
			
 
				+#define STARPU_RMB() __asm__ __volatile__("lfence" ::: "memory")
			
 
				+#elif defined(__ppc__) || defined(__ppc64__)
			
 
				+#define STARPU_RMB() __asm__ __volatile__("sync" ::: "memory")
			
 
				+#else
			
 
				+#define STARPU_RMB() STARPU_SYNCHRONIZE()
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   This macro can be used to do a synchronization.
			
 
				+*/
			
 
				+#if defined(__i386__)
			
 
				+#define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
			
 
				+#elif defined(__KNC__) || defined(__KNF__)
			
 
				+#define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%rsp)" ::: "memory")
			
 
				+#elif defined(__x86_64__)
			
 
				+#define STARPU_WMB() __asm__ __volatile__("sfence" ::: "memory")
			
 
				+#elif defined(__ppc__) || defined(__ppc64__)
			
 
				+#define STARPU_WMB() __asm__ __volatile__("sync" ::: "memory")
			
 
				+#else
			
 
				+#define STARPU_WMB() STARPU_SYNCHRONIZE()
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+#define STARPU_CACHELINE_SIZE 64
			
 
				+#elif defined(__ppc__) || defined(__ppc64__) || defined(__ia64__)
			
 
				+#define STARPU_CACHELINE_SIZE 128
			
 
				+#elif defined(__s390__) || defined(__s390x__)
			
 
				+#define STARPU_CACHELINE_SIZE 256
			
 
				+#else
			
 
				+/* Conservative default */
			
 
				+#define STARPU_CACHELINE_SIZE 1024
			
 
				+#endif
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+/* Try to fetch the system definition of timespec */
			
 
				+#include <sys/types.h>
			
 
				+#include <sys/stat.h>
			
 
				+#ifdef HAVE_UNISTD_H
			
 
				+#include <unistd.h>
			
 
				+#endif
			
 
				+#include <time.h>
			
 
				+#if !defined(_MSC_VER) || defined(BUILDING_STARPU)
			
 
				+#include <pthread.h>
			
 
				+#endif
			
 
				+#if !defined(STARPU_HAVE_STRUCT_TIMESPEC) || (defined(_MSC_VER) && _MSC_VER < 1900)
			
 
				+/* If it didn't get defined in the standard places, then define it ourself */
			
 
				+#ifndef STARPU_TIMESPEC_DEFINED
			
 
				+#define STARPU_TIMESPEC_DEFINED 1
			
 
				+struct timespec
			
 
				+{
			
 
				+     time_t  tv_sec;  /* Seconds */
			
 
				+     long    tv_nsec; /* Nanoseconds */
			
 
				+};
			
 
				+#endif /* STARPU_TIMESPEC_DEFINED */
			
 
				+#endif /* STARPU_HAVE_STRUCT_TIMESPEC */
			
 
				+/* Fetch gettimeofday on mingw/cygwin */
			
 
				+#if defined(__MINGW32__) || defined(__CYGWIN__)
			
 
				+#include <sys/time.h>
			
 
				+#endif
			
 
				+#else
			
 
				+#include <sys/time.h>
			
 
				+#endif /* _WIN32 */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_UTIL_H__ */
			
--- a/include/starpu/1.3/starpu_worker.h
+++ b/include/starpu/1.3/starpu_worker.h
@@ -0,0 +1,532 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ * Copyright (C) 2013       Thibaut Lambert
			
 
				+ * Copyright (C) 2016       Uppsala University
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __STARPU_WORKER_H__
			
 
				+#define __STARPU_WORKER_H__
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <starpu_config.h>
			
 
				+#include <starpu_thread.h>
			
 
				+#include <starpu_task.h>
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+#include <hwloc.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Workers_Properties Workers’ Properties
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+  Memory node Type
			
 
				+*/
			
 
				+enum starpu_node_kind
			
 
				+{
			
 
				+	STARPU_UNUSED=0,
			
 
				+	STARPU_CPU_RAM=1,
			
 
				+	STARPU_CUDA_RAM=2,
			
 
				+	STARPU_OPENCL_RAM=3,
			
 
				+	STARPU_DISK_RAM=4,
			
 
				+	STARPU_MIC_RAM=5,
			
 
				+	STARPU_MPI_MS_RAM=6
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Worker Architecture Type
			
 
				+
			
 
				+   The value 4 which was used by the driver SCC is no longer used as
			
 
				+   renumbering workers would make unusable old performance model
			
 
				+   files.
			
 
				+*/
			
 
				+enum starpu_worker_archtype
			
 
				+{
			
 
				+	STARPU_CPU_WORKER=0,        /**< CPU core */
			
 
				+	STARPU_CUDA_WORKER=1,       /**< NVIDIA CUDA device */
			
 
				+	STARPU_OPENCL_WORKER=2,     /**< OpenCL device */
			
 
				+	STARPU_MIC_WORKER=3,        /**< Intel MIC device */
			
 
				+	STARPU_MPI_MS_WORKER=5,     /**< MPI Slave device */
			
 
				+	STARPU_ANY_WORKER=6         /**< any worker, used in the hypervisor */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Structure needed to iterate on the collection
			
 
				+*/
			
 
				+struct starpu_sched_ctx_iterator
			
 
				+{
			
 
				+	/**
			
 
				+	   The index of the current worker in the collection, needed
			
 
				+	   when iterating on the collection.
			
 
				+	*/
			
 
				+	int cursor;
			
 
				+	void *value;
			
 
				+	void *possible_value;
			
 
				+	char visited[STARPU_NMAXWORKERS];
			
 
				+	int possibly_parallel;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   Types of structures the worker collection can implement
			
 
				+*/
			
 
				+enum starpu_worker_collection_type
			
 
				+{
			
 
				+	STARPU_WORKER_TREE,  /**< The collection is a tree */
			
 
				+	STARPU_WORKER_LIST   /**< The collection is an array */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+   A scheduling context manages a collection of workers that can be
			
 
				+   memorized using different data structures. Thus, a generic
			
 
				+   structure is available in order to simplify the choice of its type.
			
 
				+   Only the list data structure is available but further data
			
 
				+   structures(like tree) implementations are foreseen.
			
 
				+*/
			
 
				+struct starpu_worker_collection
			
 
				+{
			
 
				+	/**
			
 
				+	   The workerids managed by the collection
			
 
				+	*/
			
 
				+	int *workerids;
			
 
				+	void *collection_private;
			
 
				+	/**
			
 
				+	   The number of workers in the collection
			
 
				+	*/
			
 
				+	unsigned nworkers;
			
 
				+	void *unblocked_workers;
			
 
				+	unsigned nunblocked_workers;
			
 
				+	void *masters;
			
 
				+	unsigned nmasters;
			
 
				+	char present[STARPU_NMAXWORKERS];
			
 
				+	char is_unblocked[STARPU_NMAXWORKERS];
			
 
				+	char is_master[STARPU_NMAXWORKERS];
			
 
				+	/**
			
 
				+	   The type of structure
			
 
				+	*/
			
 
				+	enum starpu_worker_collection_type type;
			
 
				+	/**
			
 
				+	   Check if there is another element in collection
			
 
				+	*/
			
 
				+	unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
			
 
				+	/**
			
 
				+	   Return the next element in the collection
			
 
				+	*/
			
 
				+	int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
			
 
				+	/**
			
 
				+	   Add a new element in the collection
			
 
				+	*/
			
 
				+	int (*add)(struct starpu_worker_collection *workers, int worker);
			
 
				+	/**
			
 
				+	   Remove an element from the collection
			
 
				+	*/
			
 
				+	int (*remove)(struct starpu_worker_collection *workers, int worker);
			
 
				+	/**
			
 
				+	   Initialize the collection
			
 
				+	*/
			
 
				+	void (*init)(struct starpu_worker_collection *workers);
			
 
				+	/**
			
 
				+	   Deinitialize the colection
			
 
				+	*/
			
 
				+	void (*deinit)(struct starpu_worker_collection *workers);
			
 
				+	/**
			
 
				+	   Initialize the cursor if there is one
			
 
				+	*/
			
 
				+	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
			
 
				+	void (*init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task);
			
 
				+};
			
 
				+
			
 
				+extern struct starpu_worker_collection worker_list;
			
 
				+extern struct starpu_worker_collection worker_tree;
			
 
				+
			
 
				+/**
			
 
				+   Return the number of workers (i.e. processing units executing
			
 
				+   StarPU tasks). The return value should be at most \ref
			
 
				+   STARPU_NMAXWORKERS.
			
 
				+*/
			
 
				+unsigned starpu_worker_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of CPUs controlled by StarPU. The return value
			
 
				+   should be at most \ref STARPU_MAXCPUS.
			
 
				+*/
			
 
				+unsigned starpu_cpu_worker_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of CUDA devices controlled by StarPU. The return
			
 
				+   value should be at most \ref STARPU_MAXCUDADEVS.
			
 
				+*/
			
 
				+unsigned starpu_cuda_worker_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of OpenCL devices controlled by StarPU. The
			
 
				+   return value should be at most \ref STARPU_MAXOPENCLDEVS.
			
 
				+*/
			
 
				+unsigned starpu_opencl_worker_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of MIC workers controlled by StarPU.
			
 
				+*/
			
 
				+unsigned starpu_mic_worker_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of MPI Master Slave workers controlled by StarPU.
			
 
				+*/
			
 
				+unsigned starpu_mpi_ms_worker_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of MIC devices controlled by StarPU. The return
			
 
				+   value should be at most \ref STARPU_MAXMICDEVS.
			
 
				+*/
			
 
				+unsigned starpu_mic_device_get_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the current worker, i.e the one associated
			
 
				+   to the calling thread. The return value is either \c -1 if the
			
 
				+   current context is not a StarPU worker (i.e. when called from the
			
 
				+   application outside a task or a callback), or an integer between \c
			
 
				+   0 and starpu_worker_get_count() - \c 1.
			
 
				+*/
			
 
				+int starpu_worker_get_id(void);
			
 
				+
			
 
				+unsigned _starpu_worker_get_id_check(const char *f, int l);
			
 
				+
			
 
				+/**
			
 
				+   Similar to starpu_worker_get_id(), but abort when called from
			
 
				+   outside a worker (i.e. when starpu_worker_get_id() would return \c
			
 
				+   -1).
			
 
				+*/
			
 
				+unsigned starpu_worker_get_id_check(void);
			
 
				+
			
 
				+#define starpu_worker_get_id_check() _starpu_worker_get_id_check(__FILE__, __LINE__)
			
 
				+int starpu_worker_get_bindid(int workerid);
			
 
				+
			
 
				+void starpu_sched_find_all_worker_combinations(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the type of processing unit associated to the worker \p id.
			
 
				+   The worker identifier is a value returned by the function
			
 
				+   starpu_worker_get_id()). The return value indicates the
			
 
				+   architecture of the worker: ::STARPU_CPU_WORKER for a CPU core,
			
 
				+   ::STARPU_CUDA_WORKER for a CUDA device, and ::STARPU_OPENCL_WORKER
			
 
				+   for a OpenCL device. The return value for an invalid identifier is
			
 
				+   unspecified.
			
 
				+*/
			
 
				+enum starpu_worker_archtype starpu_worker_get_type(int id);
			
 
				+
			
 
				+/**
			
 
				+   Return the number of workers of \p type. A positive (or
			
 
				+   <c>NULL</c>) value is returned in case of success, <c>-EINVAL</c>
			
 
				+   indicates that \p type is not valid otherwise.
			
 
				+*/
			
 
				+int starpu_worker_get_count_by_type(enum starpu_worker_archtype type);
			
 
				+
			
 
				+/**
			
 
				+   Get the list of identifiers of workers of \p type. Fill the array
			
 
				+   \p workerids with the identifiers of the \p workers. The argument
			
 
				+   \p maxsize indicates the size of the array \p workerids. The return
			
 
				+   value gives the number of identifiers that were put in the array.
			
 
				+   <c>-ERANGE</c> is returned is \p maxsize is lower than the number
			
 
				+   of workers with the appropriate type: in that case, the array is
			
 
				+   filled with the \p maxsize first elements. To avoid such overflows,
			
 
				+   the value of maxsize can be chosen by the means of the function
			
 
				+   starpu_worker_get_count_by_type(), or by passing a value greater or
			
 
				+   equal to \ref STARPU_NMAXWORKERS.
			
 
				+*/
			
 
				+unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the \p num -th worker that has the
			
 
				+   specified \p type. If there is no such worker, -1 is returned.
			
 
				+*/
			
 
				+int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the worker that has the specified \p type
			
 
				+   and device id \p devid (which may not be the n-th, if some devices
			
 
				+   are skipped for instance). If there is no such worker, \c -1 is
			
 
				+   returned.
			
 
				+*/
			
 
				+int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
			
 
				+
			
 
				+/**
			
 
				+   Get the name of the worker \p id. StarPU associates a unique human
			
 
				+   readable string to each processing unit. This function copies at
			
 
				+   most the \p maxlen first bytes of the unique string associated to
			
 
				+   the worker \p id into the \p dst buffer. The caller is responsible
			
 
				+   for ensuring that \p dst is a valid pointer to a buffer of \p
			
 
				+   maxlen bytes at least. Calling this function on an invalid
			
 
				+   identifier results in an unspecified behaviour.
			
 
				+*/
			
 
				+void starpu_worker_get_name(int id, char *dst, size_t maxlen);
			
 
				+
			
 
				+/**
			
 
				+   Display on \p output the list (if any) of all the workers of the
			
 
				+   given \p type.
			
 
				+*/
			
 
				+void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type);
			
 
				+
			
 
				+/**
			
 
				+   Return the device id of the worker \p id. The worker should be
			
 
				+   identified with the value returned by the starpu_worker_get_id()
			
 
				+   function. In the case of a CUDA worker, this device identifier is
			
 
				+   the logical device identifier exposed by CUDA (used by the function
			
 
				+   \c cudaGetDevice() for instance). The device identifier of a CPU
			
 
				+   worker is the logical identifier of the core on which the worker
			
 
				+   was bound; this identifier is either provided by the OS or by the
			
 
				+   library <c>hwloc</c> in case it is available.
			
 
				+*/
			
 
				+int starpu_worker_get_devid(int id);
			
 
				+
			
 
				+int starpu_worker_get_mp_nodeid(int id);
			
 
				+
			
 
				+struct starpu_tree* starpu_workers_get_tree(void);
			
 
				+
			
 
				+unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx);
			
 
				+
			
 
				+unsigned starpu_worker_is_blocked_in_parallel(int workerid);
			
 
				+
			
 
				+unsigned starpu_worker_is_slave_somewhere(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Return worker \p type as a string.
			
 
				+*/
			
 
				+char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type);
			
 
				+
			
 
				+int starpu_bindid_get_workerids(int bindid, int **workerids);
			
 
				+
			
 
				+int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num);
			
 
				+
			
 
				+int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type);
			
 
				+
			
 
				+unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid);
			
 
				+
			
 
				+#ifdef STARPU_HAVE_HWLOC
			
 
				+/**
			
 
				+   If StarPU was compiled with \c hwloc support, return a duplicate of
			
 
				+   the \c hwloc cpuset associated with the worker \p workerid. The
			
 
				+   returned cpuset is obtained from a \c hwloc_bitmap_dup() function
			
 
				+   call. It must be freed by the caller using \c hwloc_bitmap_free().
			
 
				+*/
			
 
				+hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid);
			
 
				+/**
			
 
				+   If StarPU was compiled with \c hwloc support, return the \c hwloc
			
 
				+   object corresponding to  the worker \p workerid.
			
 
				+*/
			
 
				+hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid);
			
 
				+#endif
			
 
				+
			
 
				+int starpu_memory_node_get_devid(unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   Return the memory node associated to the current worker
			
 
				+*/
			
 
				+unsigned starpu_worker_get_local_memory_node(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the memory node associated to the worker
			
 
				+   identified by \p workerid.
			
 
				+*/
			
 
				+unsigned starpu_worker_get_memory_node(unsigned workerid);
			
 
				+
			
 
				+unsigned starpu_memory_nodes_get_count(void);
			
 
				+int starpu_memory_node_get_name(unsigned node, char *name, size_t size);
			
 
				+int starpu_memory_nodes_get_numa_count(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the memory node associated to the NUMA
			
 
				+   node identified by \p osid by the Operating System.
			
 
				+*/
			
 
				+int starpu_memory_nodes_numa_id_to_devid(int osid);
			
 
				+
			
 
				+/**
			
 
				+   Return the Operating System identifier of the memory node whose
			
 
				+   StarPU identifier is \p id.
			
 
				+*/
			
 
				+int starpu_memory_nodes_numa_devid_to_id(unsigned id);
			
 
				+
			
 
				+/**
			
 
				+   Return the type of \p node as defined by ::starpu_node_kind. For
			
 
				+   example, when defining a new data interface, this function should
			
 
				+   be used in the allocation function to determine on which device the
			
 
				+   memory needs to be allocated.
			
 
				+*/
			
 
				+enum starpu_node_kind starpu_node_get_kind(unsigned node);
			
 
				+
			
 
				+/**
			
 
				+   @name Scheduling operations
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return \c !0 if current worker has a scheduling operation in
			
 
				+   progress, and \c 0 otherwise.
			
 
				+*/
			
 
				+int starpu_worker_sched_op_pending(void);
			
 
				+
			
 
				+/**
			
 
				+   Allow other threads and workers to temporarily observe the current
			
 
				+   worker state, even though it is performing a scheduling operation.
			
 
				+   Must be called by a worker before performing a potentially blocking
			
 
				+   call such as acquiring a mutex other than its own sched_mutex. This
			
 
				+   function increases \c state_relax_refcnt from the current worker.
			
 
				+   No more than <c>UINT_MAX-1</c> nested starpu_worker_relax_on()
			
 
				+   calls should performed on the same worker. This function is
			
 
				+   automatically called by  starpu_worker_lock() to relax the caller
			
 
				+   worker state while attempting to lock the target worker.
			
 
				+*/
			
 
				+void starpu_worker_relax_on(void);
			
 
				+
			
 
				+/**
			
 
				+   Must be called after a potentially blocking call is complete, to
			
 
				+   restore the relax state in place before the corresponding
			
 
				+   starpu_worker_relax_on(). Decreases \c state_relax_refcnt. Calls to
			
 
				+   starpu_worker_relax_on() and starpu_worker_relax_off() must be
			
 
				+   properly paired. This function is automatically called by
			
 
				+   starpu_worker_unlock() after the target worker has been unlocked.
			
 
				+*/
			
 
				+void starpu_worker_relax_off(void);
			
 
				+
			
 
				+/**
			
 
				+   Return \c !0 if the current worker \c state_relax_refcnt!=0 and \c
			
 
				+   0 otherwise.
			
 
				+*/
			
 
				+int starpu_worker_get_relax_state(void);
			
 
				+
			
 
				+/**
			
 
				+   Acquire the sched mutex of \p workerid. If the caller is a worker,
			
 
				+   distinct from \p workerid, the caller worker automatically enters a
			
 
				+   relax state while acquiring the target worker lock.
			
 
				+*/
			
 
				+void starpu_worker_lock(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if
			
 
				+   successful, \c !0 if \p workerid sched mutex is held or the
			
 
				+   corresponding worker is not in a relax state. If the caller is a
			
 
				+   worker, distinct from \p workerid, the caller worker automatically
			
 
				+   enters relax state if successfully acquiring the target worker lock.
			
 
				+*/
			
 
				+int starpu_worker_trylock(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Release the previously acquired sched mutex of \p workerid. Restore
			
 
				+   the relax state of the caller worker if needed.
			
 
				+*/
			
 
				+void starpu_worker_unlock(int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Acquire the current worker sched mutex.
			
 
				+*/
			
 
				+void starpu_worker_lock_self(void);
			
 
				+
			
 
				+/**
			
 
				+   Release the current worker sched mutex.
			
 
				+*/
			
 
				+void starpu_worker_unlock_self(void);
			
 
				+
			
 
				+#ifdef STARPU_WORKER_CALLBACKS
			
 
				+/**
			
 
				+   If StarPU was compiled with blocking drivers support and worker
			
 
				+   callbacks support enabled, allow to specify an external resource
			
 
				+   manager callback to be notified about workers going to sleep.
			
 
				+*/
			
 
				+void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid));
			
 
				+
			
 
				+/**
			
 
				+   If StarPU was compiled with blocking drivers support and worker
			
 
				+   callbacks support enabled, allow to specify an external resource
			
 
				+   manager callback to be notified about workers waking-up.
			
 
				+*/
			
 
				+void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid));
			
 
				+#endif
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+/**
			
 
				+   @defgroup API_Parallel_Tasks Parallel Tasks
			
 
				+   @{
			
 
				+*/
			
 
				+
			
 
				+/**
			
 
				+   Return the number of different combined workers.
			
 
				+*/
			
 
				+unsigned starpu_combined_worker_get_count(void);
			
 
				+unsigned starpu_worker_is_combined_worker(int id);
			
 
				+
			
 
				+/**
			
 
				+   Return the identifier of the current combined worker.
			
 
				+*/
			
 
				+int starpu_combined_worker_get_id(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the size of the current combined worker, i.e. the total
			
 
				+   number of CPUS running the same task in the case of ::STARPU_SPMD
			
 
				+   parallel tasks, or the total number of threads that the task is
			
 
				+   allowed to start in the case of ::STARPU_FORKJOIN parallel tasks.
			
 
				+*/
			
 
				+int starpu_combined_worker_get_size(void);
			
 
				+
			
 
				+/**
			
 
				+   Return the rank of the current thread within the combined worker.
			
 
				+   Can only be used in ::STARPU_SPMD parallel tasks, to know which
			
 
				+   part of the task to work on.
			
 
				+*/
			
 
				+int starpu_combined_worker_get_rank(void);
			
 
				+
			
 
				+/**
			
 
				+   Register a new combined worker and get its identifier
			
 
				+*/
			
 
				+int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]);
			
 
				+
			
 
				+/**
			
 
				+   Get the description of a combined worker
			
 
				+*/
			
 
				+int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid);
			
 
				+
			
 
				+/**
			
 
				+   Variant of starpu_worker_can_execute_task() compatible with
			
 
				+   combined workers
			
 
				+*/
			
 
				+int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
			
 
				+
			
 
				+/**
			
 
				+   Initialise the barrier for the parallel task, and dispatch the task
			
 
				+   between the different workers of the given combined worker.
			
 
				+ */
			
 
				+void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid);
			
 
				+
			
 
				+/**
			
 
				+   Initialise the barrier for the parallel task, to be pushed to \p
			
 
				+   worker_size workers (without having to explicit a given combined
			
 
				+   worker).
			
 
				+*/
			
 
				+void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __STARPU_WORKER_H__ */
			
--- a/include/starpu/1.3/starpufft.h
+++ b/include/starpu/1.3/starpufft.h
@@ -0,0 +1,71 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2009-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+// The documentation for this file is in doc/doxygen/chapters/api/fft_support.doxy
			
 
				+
			
 
				+#ifndef __STARPU_FFT_H__
			
 
				+#define __STARPU_FFT_H__
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <complex.h>
			
 
				+#include <starpu.h>
			
 
				+#ifdef STARPU_USE_CUDA
			
 
				+#include <cufft.h>
			
 
				+#define STARPU_CUFFT_REPORT_ERROR(status) STARPUFFT(report_error)(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+#endif /* !STARPU_USE_CUDA */
			
 
				+
			
 
				+#define STARPUFFT_FORWARD -1
			
 
				+#define STARPUFFT_INVERSE 1
			
 
				+
			
 
				+#define __STARPUFFT(name) starpufft_##name
			
 
				+#define __STARPUFFTF(name) starpufftf_##name
			
 
				+#define __STARPUFFTL(name) starpufftl_##name
			
 
				+
			
 
				+#define __STARPUFFT_INTERFACE(starpufft,real) \
			
 
				+typedef real _Complex starpufft(complex); \
			
 
				+\
			
 
				+typedef struct starpufft(plan) *starpufft(plan); \
			
 
				+\
			
 
				+starpufft(plan) starpufft(plan_dft_1d)(int n, int sign, unsigned flags); \
			
 
				+starpufft(plan) starpufft(plan_dft_2d)(int n, int m, int sign, unsigned flags); \
			
 
				+starpufft(plan) starpufft(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags); \
			
 
				+starpufft(plan) starpufft(plan_dft_r2c_1d)(int n, unsigned flags); \
			
 
				+starpufft(plan) starpufft(plan_dft_c2r_1d)(int n, unsigned flags); \
			
 
				+\
			
 
				+void *starpufft(malloc)(size_t n); \
			
 
				+void starpufft(free)(void *p); \
			
 
				+\
			
 
				+int starpufft(execute)(starpufft(plan) p, void *in, void *out); \
			
 
				+struct starpu_task *starpufft(start)(starpufft(plan) p, void *in, void *out); \
			
 
				+\
			
 
				+int starpufft(execute_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \
			
 
				+struct starpu_task *starpufft(start_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \
			
 
				+\
			
 
				+void starpufft(cleanup)(starpufft(plan) p); \
			
 
				+void starpufft(destroy_plan)(starpufft(plan) p); \
			
 
				+\
			
 
				+void starpufft(startstats)(void); \
			
 
				+void starpufft(stopstats)(void); \
			
 
				+void starpufft(showstats)(FILE *out);
			
 
				+
			
 
				+__STARPUFFT_INTERFACE(__STARPUFFT, double)
			
 
				+__STARPUFFT_INTERFACE(__STARPUFFTF, float)
			
 
				+__STARPUFFT_INTERFACE(__STARPUFFTL, long double)
			
 
				+
			
 
				+/* Internal use */
			
 
				+extern int starpufft_last_plan_number;
			
 
				+
			
 
				+#endif // __STARPU_FFT_H__
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -312,6 +312,20 @@ struct starpu_perfmodel
 
				 void starpu_perfmodel_init(struct starpu_perfmodel *model);
			
 
				 
			
 
				 /**
			
 
				+   starpu_energy_start - start counting hardware events in an event set
			
 
				+*/
			
 
				+
			
 
				+int starpu_energy_start();
			
 
				+
			
 
				+/**
			
 
				+   starpu_energy_stop - stop counting hardware events in an event set
			
 
				+   \values -- an array to hold the counter values of the counting events
			
 
				+   \EventSet -- an integer handle for a PAPI event set as created by papi_create_eventset()
			
 
				+*/
			
 
				+
			
 
				+int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned ntasks);
			
 
				+
			
 
				+/**
			
 
				    Load the performance model found in the file named \p filename. \p model has to be
			
 
				    completely zero, and will be filled with the information stored in the given file.
			
 
				 */
			
--- a/lib/libsocl-1.3.a
+++ b/lib/libsocl-1.3.a
--- a/lib/libsocl-1.3.so
+++ b/lib/libsocl-1.3.so
@@ -0,0 +1 @@
 
				+libsocl-1.3.so.0.0.0
			
--- a/lib/libsocl-1.3.so.0
+++ b/lib/libsocl-1.3.so.0
@@ -0,0 +1 @@
 
				+libsocl-1.3.so.0.0.0
			
--- a/lib/libsocl-1.3.so.0.0.0
+++ b/lib/libsocl-1.3.so.0.0.0
--- a/lib/libstarpu-1.3.a
+++ b/lib/libstarpu-1.3.a
--- a/lib/libstarpu-1.3.so
+++ b/lib/libstarpu-1.3.so
@@ -0,0 +1 @@
 
				+libstarpu-1.3.so.0.0.0
			
--- a/lib/libstarpu-1.3.so.0
+++ b/lib/libstarpu-1.3.so.0
@@ -0,0 +1 @@
 
				+libstarpu-1.3.so.0.0.0
			
--- a/lib/libstarpu-1.3.so.0.0.0
+++ b/lib/libstarpu-1.3.so.0.0.0
--- a/lib/libstarpufft-1.3.a
+++ b/lib/libstarpufft-1.3.a
--- a/lib/libstarpufft-1.3.so
+++ b/lib/libstarpufft-1.3.so
@@ -0,0 +1 @@
 
				+libstarpufft-1.3.so.0.0.0
			
--- a/lib/libstarpufft-1.3.so.0
+++ b/lib/libstarpufft-1.3.so.0
@@ -0,0 +1 @@
 
				+libstarpufft-1.3.so.0.0.0
			
--- a/lib/libstarpufft-1.3.so.0.0.0
+++ b/lib/libstarpufft-1.3.so.0.0.0
--- a/lib/starpu/examples/add_vectors
+++ b/lib/starpu/examples/add_vectors
--- a/lib/starpu/examples/add_vectors_cpp11
+++ b/lib/starpu/examples/add_vectors_cpp11
--- a/lib/starpu/examples/add_vectors_interface
+++ b/lib/starpu/examples/add_vectors_interface
--- a/lib/starpu/examples/async_tasks_data_overhead.sh
+++ b/lib/starpu/examples/async_tasks_data_overhead.sh
@@ -0,0 +1,19 @@
 
				+#!/bin/bash
			
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2020       Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+#
			
 
				+ROOT=${0%.sh}
			
 
				+ROOT=${ROOT/tasks_data_overhead/tasks_overhead}
			
 
				+exec $STARPU_LAUNCH $ROOT -b 1 "$@"
			
--- a/lib/starpu/examples/async_tasks_overhead
+++ b/lib/starpu/examples/async_tasks_overhead
--- a/lib/starpu/examples/bcsr_data_interface
+++ b/lib/starpu/examples/bcsr_data_interface
--- a/lib/starpu/examples/binary
+++ b/lib/starpu/examples/binary
--- a/lib/starpu/examples/block
+++ b/lib/starpu/examples/block
--- a/lib/starpu/examples/block_data_interface
+++ b/lib/starpu/examples/block_data_interface
--- a/lib/starpu/examples/callback
+++ b/lib/starpu/examples/callback
--- a/lib/starpu/examples/complex
+++ b/lib/starpu/examples/complex