4 anni fa · 3d07c863ba
--- a/AUTHORS
+++ b/AUTHORS
@@ -11,6 +11,7 @@ Collin Nicolas, Inria, <nicolas.collin@inria.fr>
 
				 Danjean Vincent, University Grenoble Alpes, <Vincent.Danjean@ens-lyon.org>
			
 
				 Denis Alexandre, Inria, <alexandre.denis@inria.fr>
			
 
				 Eyraud-Dubois Lionel, Inria, <lionel.eyraud-dubois@inria.fr>
			
 
				+Flint Clément, Inria, <clement.flint@inria.fr>
			
 
				 Furmento Nathalie, CNRS, <nathalie.furmento@labri.fr>
			
 
				 Guermouche Amina, Télécom SudParis, <amina.guermouche@inria.fr>
			
 
				 Guilbaud Adrien, Inria, <adrien.guilbaud@inria.fr>
			
@@ -30,6 +31,7 @@ Nakov Stojce, Inria, <stojce.nakov@inria.fr>
 
				 Namyst Raymond, Université de Bordeaux, <raymond.namyst@labri.fr>
			
 
				 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
			
 
				 Pablo Joris, Inria, <joris.pablo@orange.fr>
			
 
				+Paillat Ludovic, Inria, <ludovic.paillat@inria.fr>
			
 
				 Pasqualinotto Damien, Université de Bordeaux, <dam.pasqualinotto@wanadoo.fr>
			
 
				 Pinto Vinicius Garcia, <vgpinto@inf.ufrgs.br>
			
 
				 Pitoiset Samuel, Inria, <samuel.pitoiset@inria.fr>
			
--- a/configure.ac
+++ b/configure.ac
@@ -2042,6 +2042,21 @@ AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"])
 
				 STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`"
			
 
				 AC_SUBST([STARPU_FXT_EVENT_DEFINES])
			
 
				 
			
 
				+# Heteroprio works better if it can store information based on the program's name
			
 
				+AC_MSG_CHECKING(whether the target supports program_invocation_short_name)
			
 
				+AC_LINK_IFELSE([AC_LANG_SOURCE(
			
 
				+	[
			
 
				+		#include <stdio.h>
			
 
				+		#include <errno.h>
			
 
				+		int main() {
			
 
				+			printf("%s\n", program_invocation_short_name);
			
 
				+			return 0;
			
 
				+		}
			
 
				+	])],
			
 
				+	[AC_DEFINE([STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME], [1], [variable program_invocation_short_name is available]) AC_MSG_RESULT(yes)],
			
 
				+	AC_MSG_RESULT(no)
			
 
				+)
			
 
				+
			
 
				 ###############################################################################
			
 
				 #                                                                             #
			
 
				 #                  Miscellaneous options for StarPU                           #
			
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -374,7 +374,11 @@ The tasks are stored inside buckets, where each bucket corresponds to a priority
 
				 worker uses an indirect access array to know the order in which it should access the buckets. Moreover,
			
 
				 all the tasks inside a bucket must be compatible with all the processing units that may access it (at least).
			
 
				 
			
 
				-As an example, see the following code where we have 5 types of tasks.
			
 
				+These priorities are now automatically assigned by Heteroprio in auto calibration mode using heuristics.
			
 
				+If you want to set these priorities manually, you can change \ref STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
			
 
				+and follow the example below.
			
 
				+
			
 
				+In this example code, we have 5 types of tasks.
			
 
				 CPU workers can compute all of them, but CUDA workers can only execute
			
 
				 tasks of types 0 and 1, and is expected to go 20 and 30 time
			
 
				 faster than the CPU, respectively.
			
@@ -431,4 +435,41 @@ So, in the given example, the priority of a task will be between 0 and 4 include
 
				 However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and
			
 
				 tasks of priorities 2-4 must provide CPU kernels (at least).
			
 
				 
			
 
				+\subsection LAHeteroprio Using locality aware Heteroprio
			
 
				+
			
 
				+Heteroprio supports a mode where locality is evaluated to guide the distribution
			
 
				+of the tasks (see https://peerj.com/articles/cs-190.pdf).
			
 
				+Currently, this mode is available using the dedicated function or an environment variable
			
 
				+\ref STARPU_HETEROPRIO_USE_LA, and can be configured using environment variables.
			
 
				+\code{.c}
			
 
				+void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality);
			
 
				+\endcode
			
 
				+
			
 
				+In this mode, multiple strategies are available to determine which memory node's workers are the most qualified for executing a specific task. This strategy can be set with \ref STARPU_LAHETEROPRIO_PUSH
			
 
				+and available strategies are:
			
 
				+- WORKER: the worker which pushed the task is preferred for the execution.
			
 
				+- LCs: the node with the shortest data transfer time (estimated by StarPU) is the most qualified
			
 
				+- LS_SDH: the node with the smallest data amount to be transferred will be preferred.
			
 
				+- LS_SDH2: similar to LS_SDH, but data in write access is counted in a quadratic manner to give them more importance.
			
 
				+- LS_SDHB: similar to LS_SDH, but data in write access is balanced with a coefficient (its value is set to 1000) and
			
 
				+for the same amount of data, the one with less pieces of data to be transferred will be preferred.
			
 
				+- LC_SMWB: similar to LS_SDH, but the amount of data in write access gets multiplied by a coefficient which gets closer to 2
			
 
				+as the amount of data in read access gets larger than the data in write access.
			
 
				+- AUTO: strategy by default, this one selects the best strategy and changes it in runtime to improve performance
			
 
				+
			
 
				+Other environment variables to configure LaHeteteroprio are documented in \ref ConfiguringLaHeteroprio
			
 
				+
			
 
				+\subsection AutoHeteroprio Using Heteroprio in auto-calibration mode
			
 
				+
			
 
				+In this mode, Heteroprio saves data about each program execution, in order to improve future ones.
			
 
				+By default, theses files are stored in the folder used by perfmodel, but this can be changed using the
			
 
				+\ref STARPU_HETEROPRIO_DATA_DIR environment variable. You can also specify the data filename directly using
			
 
				+\ref STARPU_HETEROPRIO_DATA_FILE.
			
 
				+
			
 
				+Additionally, to assign priorities to tasks, Heteroprio needs a way to detect that some tasks are similar.
			
 
				+By default, Heteroprio looks for tasks with the same perfmodel, or with the same codelet's name if no perfmodel was assigned.
			
 
				+This behavior can be changed to only consider the codelet's name by setting
			
 
				+\ref STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY to <c>1</c>
			
 
				+
			
 
				+Other environment variables to configure AutoHeteteroprio are documented in \ref ConfiguringAutoHeteroprio
			
 
				 */
			
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -622,6 +622,123 @@ Specify which PAPI events should be recorded in the trace (\ref PapiCounters).
 
				 
			
 
				 </dl>
			
 
				 
			
 
				+\section ConfiguringHeteroprio Configuring The Heteroprio Scheduler
			
 
				+
			
 
				+\subsection ConfiguringLaHeteroprio Configuring LAHeteroprio
			
 
				+<dl>
			
 
				+
			
 
				+<dt>STARPU_HETEROPRIO_USE_LA</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_HETEROPRIO_USE_LA
			
 
				+\addindex __env__STARPU_HETEROPRIO_USE_LA
			
 
				+Enable the locality aware mode of Heteroprio which guides the distribution of tasks to workers
			
 
				+in order to reduce the data transfers between memory nodes.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_LAHETEROPRIO_PUSH</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_LAHETEROPRIO_PUSH
			
 
				+\addindex __env__STARPU_LAHETEROPRIO_PUSH
			
 
				+Choose between the different push strategies for locality aware Heteroprio: 
			
 
				+WORKER, LcS, LS_SDH, LS_SDH2, LS_SDHB, LC_SMWB, AUTO (by default: AUTO). These are detailed in
			
 
				+\ref LAHeteroprio
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_LAHETEROPRIO_S_[ARCH]</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_LAHETEROPRIO_S_[ARCH]
			
 
				+\addindex __env__STARPU_LAHETEROPRIO_S_arch
			
 
				+Specify the number of memory nodes contained in an affinity group. An affinity
			
 
				+group will be composed of the closests memory nodes to a worker of a given architecture,
			
 
				+and this worker will look for tasks available inside these memory nodes, before
			
 
				+considering stealing tasks outside this group.
			
 
				+ARCH can be CPU, CUDA, OPENCL, MICC, SCC, MPI_MS, etc.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]
			
 
				+\addindex __env__STARPU_LAHETEROPRIO_PRIO_STEP_arch
			
 
				+Specify the number of buckets in the local memory node in which a worker will look for
			
 
				+available tasks, before this worker starts looking for tasks in other memory nodes' buckets.
			
 
				+ARCH indicates that this number is specific to a given arch which can be:
			
 
				+CPU, CUDA, OPENCL, MICC, SCC, MPI_MS, etc.
			
 
				+</dd>
			
 
				+
			
 
				+</dl>
			
 
				+
			
 
				+\subsection ConfiguringAutoHeteroprio Configuring AutoHeteroprio
			
 
				+<dl>
			
 
				+
			
 
				+<dt>STARPU_HETEROPRIO_USE_AUTO_CALIBRATION</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
			
 
				+\addindex __env__STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
			
 
				+Enable the auto calibration mode of Heteroprio which assign priorities to tasks automatically
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_HETEROPRIO_DATA_DIR</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_HETEROPRIO_DATA_DIR
			
 
				+\addindex __env__STARPU_HETEROPRIO_DATA_DIR
			
 
				+Specify the path of the directory where Heteroprio stores data about program executions.
			
 
				+By default, these are stored in the same directory used by perfmodel.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_HETEROPRIO_DATA_FILE</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_HETEROPRIO_DATA_FILE
			
 
				+\addindex __env__STARPU_HETEROPRIO_DATA_FILE
			
 
				+Specify the filename where Heteroprio will save data about the current program's execution.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
			
 
				+\addindex __env__STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
			
 
				+Choose how Heteroprio groups similar tasks. It can be <c>0</c> to group
			
 
				+the tasks with the same perfmodel or the same codelet's name if no perfmodel was assigned.
			
 
				+Or, it could be <c>1</c> to group the tasks only by codelet's name.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
			
 
				+\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
			
 
				+Enable the printing of priorities' data every time they get updated.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
			
 
				+\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
			
 
				+Enable the printing of priorities' order for each architecture every time there's a reordering.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
			
 
				+\addindex __env__STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
			
 
				+Specify the heuristic which will be used to assign priorities automatically.
			
 
				+It should be an integer between 0 and 27.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
			
 
				+\addindex __env__STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
			
 
				+Specify the period (in number of tasks pushed), between priorities reordering operations.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_AUTOHETEROPRIO_FREEZE_GATHERING</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
			
 
				+\addindex __env__STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
			
 
				+Disable data gathering from task executions.
			
 
				+</dd>
			
 
				+
			
 
				+</dl>
			
 
				+
			
 
				 \section Extensions Extensions
			
 
				 
			
 
				 <dl>
			
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -263,6 +263,7 @@ Documentation License”.
 
				 \input{group__API__Master__Slave}
			
 
				 \input{group__API__Random__Functions}
			
 
				 \input{group__API__Sink}
			
 
				+\input{group__API__HeteroPrio}
			
 
				 
			
 
				 \chapter{File Index}
			
 
				 \input{files}
			
--- a/include/schedulers/starpu_heteroprio.h
+++ b/include/schedulers/starpu_heteroprio.h
@@ -24,13 +24,86 @@ extern "C"
 
				 {
			
 
				 #endif
			
 
				 
			
 
				-#define STARPU_HETEROPRIO_MAX_PRIO 100
			
 
				+/**
			
 
				+   @defgroup API_HeteroPrio Heteroprio Scheduler
			
 
				+   @brief This is the interface for the heteroprio scheduler
			
 
				+   @{
			
 
				+ */
			
 
				 
			
 
				 #define STARPU_HETEROPRIO_MAX_PREFETCH 2
			
 
				 #if STARPU_HETEROPRIO_MAX_PREFETCH <= 0
			
 
				 #error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1
			
 
				 #endif
			
 
				 
			
 
				+#define STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT 28
			
 
				+
			
 
				+enum starpu_autoheteroprio_priority_ordering_policy
			
 
				+{
			
 
				+	STARPU_HETEROPRIO_NOD_TIME_COMBINATION, // 0
			
 
				+	STARPU_HETEROPRIO_BEST_NODS_SCORE,
			
 
				+	STARPU_HETEROPRIO_BEST_NODS,
			
 
				+	STARPU_HETEROPRIO_URT_PURE,
			
 
				+	STARPU_HETEROPRIO_URT,
			
 
				+	STARPU_HETEROPRIO_URT_2, // 5
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_PURE,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_2, // 10
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_3,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_4,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_5,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_6,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_7, // 15
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_8,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_9,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_10,
			
 
				+	STARPU_HETEROPRIO_URT_DOT_DIFF_11,
			
 
				+	STARPU_HETEROPRIO_URTS_PER_SECONDS, // 20
			
 
				+	STARPU_HETEROPRIO_URTS_PER_SECONDS_2,
			
 
				+	STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF,
			
 
				+	STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF,
			
 
				+	STARPU_HETEROPRIO_URTS_TIME_COMBINATION,
			
 
				+	STARPU_HETEROPRIO_NODS_PER_SECOND,
			
 
				+	STARPU_HETEROPRIO_NODS_TIME_RELEASED,
			
 
				+	STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF
			
 
				+};
			
 
				+
			
 
				+static const char starpu_autoheteroprio_priority_ordering_policy_names[STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT][64] = 
			
 
				+{
			
 
				+	"STARPU_HETEROPRIO_NOD_TIME_COMBINATION",
			
 
				+	"STARPU_HETEROPRIO_BEST_NODS_SCORE",
			
 
				+	"STARPU_HETEROPRIO_BEST_NODS",
			
 
				+	"STARPU_HETEROPRIO_URT_PURE",
			
 
				+	"STARPU_HETEROPRIO_URT",
			
 
				+	"STARPU_HETEROPRIO_URT_2",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_PURE",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_2",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_3",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_4",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_5",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_6",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_7",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_8",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_9",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_10",
			
 
				+	"STARPU_HETEROPRIO_URT_DOT_DIFF_11",
			
 
				+	"STARPU_HETEROPRIO_URTS_PER_SECONDS",
			
 
				+	"STARPU_HETEROPRIO_URTS_PER_SECONDS_2",
			
 
				+	"STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF",
			
 
				+	"STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF",
			
 
				+	"STARPU_HETEROPRIO_URTS_TIME_COMBINATION",
			
 
				+	"STARPU_HETEROPRIO_NODS_PER_SECOND",
			
 
				+	"STARPU_HETEROPRIO_NODS_TIME_RELEASED",
			
 
				+	"STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF"
			
 
				+};
			
 
				+
			
 
				+/** Set if heteroprio should use data locality or not */
			
 
				+void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality);
			
 
				+
			
 
				 /** Tell how many prio there are for a given arch */
			
 
				 void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio);
			
 
				 
			
@@ -40,11 +113,20 @@ void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_arc
 
				 /** Tell which arch is the faster for the tasks of a bucket (optional) */
			
 
				 void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id);
			
 
				 
			
 
				-/** Tell how slow is a arch for the tasks of a bucket (optional) */ 
			
 
				+/** Tell how slow is a arch for the tasks of a bucket (optional) */
			
 
				 void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor);
			
 
				 
			
 
				+/** One memory node will be one wgroup */
			
 
				+void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id);
			
 
				+
			
 
				+/** Print the current setup groups */
			
 
				+void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id);
			
 
				+
			
 
				+/** @} */
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				 #endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */
			
 
				+
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -158,6 +158,16 @@ extern struct starpu_worker_collection starpu_worker_list;
 
				 extern struct starpu_worker_collection starpu_worker_tree;
			
 
				 
			
 
				 /**
			
 
				+   Return true if type matches one of StarPU's defined worker architectures
			
 
				+*/
			
 
				+unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type);
			
 
				+
			
 
				+/**
			
 
				+   Convert a mask of architectures to a worker archtype
			
 
				+*/
			
 
				+enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask);
			
 
				+
			
 
				+/**
			
 
				    Return the number of workers (i.e. processing units executing
			
 
				    StarPU tasks). The return value should be at most \ref
			
 
				    STARPU_NMAXWORKERS.
			
@@ -257,6 +267,11 @@ int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num);
 
				 int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
			
 
				 
			
 
				 /**
			
 
				+   Return true if task has a codelet for this worker type
			
 
				+*/
			
 
				+unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task);
			
 
				+
			
 
				+/**
			
 
				    Get the name of the worker \p id. StarPU associates a unique human
			
 
				    readable string to each processing unit. This function copies at
			
 
				    most the \p maxlen first bytes of the unique string associated to
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -102,6 +102,7 @@ noinst_HEADERS = 						\
 
				 	core/detect_combined_workers.h				\
			
 
				 	sched_policies/helper_mct.h				\
			
 
				 	sched_policies/fifo_queues.h				\
			
 
				+	sched_policies/heteroprio.h				\
			
 
				 	datawizard/node_ops.h					\
			
 
				 	datawizard/footprint.h					\
			
 
				 	datawizard/datawizard.h					\
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -541,6 +541,12 @@ void _starpu_set_perf_model_dirs()
 
				 	snprintf(_perf_model_dir_debug, _PERF_MODEL_DIR_MAXLEN, "%s/debug/", _perf_model_dir);
			
 
				 }
			
 
				 
			
 
				+char *_starpu_get_perf_model_dir()
			
 
				+{
			
 
				+	_starpu_create_sampling_directory_if_needed();
			
 
				+	return _perf_model_dir;
			
 
				+}
			
 
				+
			
 
				 char *_starpu_get_perf_model_dir_codelet()
			
 
				 {
			
 
				 	_starpu_create_sampling_directory_if_needed();
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -65,6 +65,7 @@ struct starpu_perfmodel_arch;
 
				 
			
 
				 extern unsigned _starpu_calibration_minimum;
			
 
				 
			
 
				+char *_starpu_get_perf_model_dir();
			
 
				 char *_starpu_get_perf_model_dir_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
			
 
				 char *_starpu_get_perf_model_dir_bus();
			
 
				 char *_starpu_get_perf_model_dir_debug();
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -208,6 +208,7 @@ struct _starpu_driver_info starpu_driver_info[STARPU_NARCH];
 
				 
			
 
				 void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info)
			
 
				 {
			
 
				+	STARPU_ASSERT(archtype >= 0 && archtype < STARPU_NARCH);
			
 
				 	starpu_driver_info[archtype] = *info;
			
 
				 }
			
 
				 
			
@@ -2002,6 +2003,24 @@ void starpu_shutdown(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type)
			
 
				+{
			
 
				+	return (type >= 0 && type < STARPU_NARCH)
			
 
				+		&& (starpu_driver_info[type].name_upper != NULL);
			
 
				+}
			
 
				+
			
 
				+enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask)
			
 
				+{
			
 
				+	STARPU_ASSERT(mask && !(mask & (mask-1))); // ensures that only one bit of the mask is set
			
 
				+
			
 
				+	enum starpu_worker_archtype worker_type = ffs(mask)-2; // ffs(mask) is the indice of the lesser bit
			
 
				+
			
 
				+	STARPU_ASSERT(worker_type >= 0 && worker_type < STARPU_NARCH); // worker_type is positive and lesser than arch number
			
 
				+	STARPU_ASSERT(starpu_worker_archtype_is_valid(worker_type)); // worker_type is a valid worker architecture
			
 
				+
			
 
				+	return worker_type;
			
 
				+}
			
 
				+
			
 
				 #undef starpu_worker_get_count
			
 
				 unsigned starpu_worker_get_count(void)
			
 
				 {
			
@@ -2356,6 +2375,23 @@ int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int
 
				 	return ndevids;
			
 
				 }
			
 
				 
			
 
				+unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task)
			
 
				+{
			
 
				+	switch(worker_type)
			
 
				+	{
			
 
				+		case STARPU_CPU_WORKER:
			
 
				+			return task->cl->cpu_funcs[0] != NULL;
			
 
				+		case STARPU_CUDA_WORKER:
			
 
				+			return task->cl->cuda_funcs[0] != NULL;
			
 
				+		case STARPU_OPENCL_WORKER:
			
 
				+			return task->cl->opencl_funcs[0] != NULL;
			
 
				+		case STARPU_MPI_MS_WORKER:
			
 
				+			return task->cl->mpi_ms_funcs[0] != NULL;
			
 
				+		default:
			
 
				+			return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void starpu_worker_get_name(int id, char *dst, size_t maxlen)
			
 
				 {
			
 
				 	char *name = _starpu_config.workers[id].name;
			
@@ -2547,6 +2583,7 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 
				 
			
 
				 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
			
 
				 {
			
 
				+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
			
 
				 	const char *ret = starpu_driver_info[type].name_upper;
			
 
				 	if (!ret)
			
 
				 		ret = "unknown";
			
@@ -2555,6 +2592,7 @@ const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 
				 
			
 
				 const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
			
 
				 {
			
 
				+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
			
 
				 	const char *ret = starpu_driver_info[type].name_var;
			
 
				 	if (!ret)
			
 
				 		ret = "UNKNOWN";
			
@@ -2768,6 +2806,7 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
 
				 
			
 
				 enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
			
 
				 {
			
 
				+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
			
 
				 	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
			
 
				 	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
			
 
				 	return kind;
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -441,7 +441,7 @@ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worke
 
				 		info->executed_tasks = worker_info[workerid].executed_tasks;
			
 
				 	}
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->sched_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex);
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
			
 
				 
			
 
				 	if (info)
			
@@ -476,7 +476,7 @@ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worke
 
				 	_starpu_worker_reset_profiling_info_with_lock(workerid);
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
			
 
				-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->sched_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/src/sched_policies/heteroprio.c
+++ b/src/sched_policies/heteroprio.c
--- a/src/sched_policies/heteroprio.h
+++ b/src/sched_policies/heteroprio.h
@@ -0,0 +1,28 @@
 
				+#ifndef __SCHED_HETEROPRIO_H__
			
 
				+#define __SCHED_HETEROPRIO_H__
			
 
				+
			
 
				+#include <schedulers/starpu_heteroprio.h>
			
 
				+
			
 
				+#define CODELET_MAX_NAME_LENGTH 32
			
 
				+#define HETEROPRIO_MAX_PRIO 100
			
 
				+#define LAHETEROPRIO_MAX_WORKER_GROUPS 10
			
 
				+
			
 
				+#define AUTOHETEROPRIO_NO_NAME "NO_NAME"
			
 
				+
			
 
				+// will tend to ignore tasks older than this when measuring values such as NOD, execution time, etc.
			
 
				+// i.e. if there are more than STARPU_AUTOHETEROPRIO_RELEVANT_TASK_LIFE of the same type
			
 
				+#define AUTOHETEROPRIO_RELEVANT_TASK_LIFE 256
			
 
				+
			
 
				+#define AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE 16
			
 
				+
			
 
				+#define AUTOHETEROPRIO_EXTREMELY_LONG_TIME 999999999999999.0
			
 
				+#define AUTOHETEROPRIO_LONG_TIME 100000000.0
			
 
				+#define AUTOHETEROPRIO_FAIR_TIME 1000.0
			
 
				+
			
 
				+#define AUTOHETEROPRIO_DEFAULT_TASK_TIME AUTOHETEROPRIO_FAIR_TIME
			
 
				+
			
 
				+// at the end of the execution, if the sum of all worker profiling times is superior to this, the times will be compressed so that no time exceeds this one
			
 
				+// (probably in us)
			
 
				+#define AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME 1000000000.0
			
 
				+
			
 
				+#endif // __SCHED_HETEROPRIO_H__