4 years ago · 3d07c863ba
--- a/AUTHORS
+++ b/AUTHORS
@@ -11,6 +11,7 @@ Collin Nicolas, Inria, <nicolas.collin@inria.fr>
 
																 Danjean Vincent, University Grenoble Alpes, <Vincent.Danjean@ens-lyon.org>
															
 
																 Denis Alexandre, Inria, <alexandre.denis@inria.fr>
															
 
																 Eyraud-Dubois Lionel, Inria, <lionel.eyraud-dubois@inria.fr>
															
 
																+Flint Clément, Inria, <clement.flint@inria.fr>
															
 
																 Furmento Nathalie, CNRS, <nathalie.furmento@labri.fr>
															
 
																 Guermouche Amina, Télécom SudParis, <amina.guermouche@inria.fr>
															
 
																 Guilbaud Adrien, Inria, <adrien.guilbaud@inria.fr>
															
@@ -30,6 +31,7 @@ Nakov Stojce, Inria, <stojce.nakov@inria.fr>
 
																 Namyst Raymond, Université de Bordeaux, <raymond.namyst@labri.fr>
															
 
																 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
															
 
																 Pablo Joris, Inria, <joris.pablo@orange.fr>
															
 
																+Paillat Ludovic, Inria, <ludovic.paillat@inria.fr>
															
 
																 Pasqualinotto Damien, Université de Bordeaux, <dam.pasqualinotto@wanadoo.fr>
															
 
																 Pinto Vinicius Garcia, <vgpinto@inf.ufrgs.br>
															
 
																 Pitoiset Samuel, Inria, <samuel.pitoiset@inria.fr>
															
--- a/configure.ac
+++ b/configure.ac
@@ -2042,6 +2042,21 @@ AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"])
 
																 STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`"
															
 
																 AC_SUBST([STARPU_FXT_EVENT_DEFINES])
															
 
																+# Heteroprio works better if it can store information based on the program's name
															
 
																+AC_MSG_CHECKING(whether the target supports program_invocation_short_name)
															
 
																+AC_LINK_IFELSE([AC_LANG_SOURCE(
															
 
																+	[
															
 
																+		#include <stdio.h>
															
 
																+		#include <errno.h>
															
 
																+		int main() {
															
 
																+			printf("%s\n", program_invocation_short_name);
															
 
																+			return 0;
															
 
																+		}
															
 
																+	])],
															
 
																+	[AC_DEFINE([STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME], [1], [variable program_invocation_short_name is available]) AC_MSG_RESULT(yes)],
															
 
																+	AC_MSG_RESULT(no)
															
 
																+)
															
 
																+
															
 
																 ###############################################################################
															
 
																 #                                                                             #
															
 
																 #                  Miscellaneous options for StarPU                           #
															
--- a/doc/doxygen/chapters/320_scheduling.doxy
+++ b/doc/doxygen/chapters/320_scheduling.doxy
@@ -374,7 +374,11 @@ The tasks are stored inside buckets, where each bucket corresponds to a priority
 
																 worker uses an indirect access array to know the order in which it should access the buckets. Moreover,
															
 
																 all the tasks inside a bucket must be compatible with all the processing units that may access it (at least).
															
 
																-As an example, see the following code where we have 5 types of tasks.
															
 
																+These priorities are now automatically assigned by Heteroprio in auto calibration mode using heuristics.
															
 
																+If you want to set these priorities manually, you can change \ref STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
															
 
																+and follow the example below.
															
 
																+
															
 
																+In this example code, we have 5 types of tasks.
															
 
																 CPU workers can compute all of them, but CUDA workers can only execute
															
 
																 tasks of types 0 and 1, and is expected to go 20 and 30 time
															
 
																 faster than the CPU, respectively.
															
@@ -431,4 +435,41 @@ So, in the given example, the priority of a task will be between 0 and 4 include
 
																 However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and
															
 
																 tasks of priorities 2-4 must provide CPU kernels (at least).
															
 
																+\subsection LAHeteroprio Using locality aware Heteroprio
															
 
																+
															
 
																+Heteroprio supports a mode where locality is evaluated to guide the distribution
															
 
																+of the tasks (see https://peerj.com/articles/cs-190.pdf).
															
 
																+Currently, this mode is available using the dedicated function or an environment variable
															
 
																+\ref STARPU_HETEROPRIO_USE_LA, and can be configured using environment variables.
															
 
																+\code{.c}
															
 
																+void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality);
															
 
																+\endcode
															
 
																+
															
 
																+In this mode, multiple strategies are available to determine which memory node's workers are the most qualified for executing a specific task. This strategy can be set with \ref STARPU_LAHETEROPRIO_PUSH
															
 
																+and available strategies are:
															
 
																+- WORKER: the worker which pushed the task is preferred for the execution.
															
 
																+- LCs: the node with the shortest data transfer time (estimated by StarPU) is the most qualified
															
 
																+- LS_SDH: the node with the smallest data amount to be transferred will be preferred.
															
 
																+- LS_SDH2: similar to LS_SDH, but data in write access is counted in a quadratic manner to give them more importance.
															
 
																+- LS_SDHB: similar to LS_SDH, but data in write access is balanced with a coefficient (its value is set to 1000) and
															
 
																+for the same amount of data, the one with less pieces of data to be transferred will be preferred.
															
 
																+- LC_SMWB: similar to LS_SDH, but the amount of data in write access gets multiplied by a coefficient which gets closer to 2
															
 
																+as the amount of data in read access gets larger than the data in write access.
															
 
																+- AUTO: strategy by default, this one selects the best strategy and changes it in runtime to improve performance
															
 
																+
															
 
																+Other environment variables to configure LaHeteteroprio are documented in \ref ConfiguringLaHeteroprio
															
 
																+
															
 
																+\subsection AutoHeteroprio Using Heteroprio in auto-calibration mode
															
 
																+
															
 
																+In this mode, Heteroprio saves data about each program execution, in order to improve future ones.
															
 
																+By default, theses files are stored in the folder used by perfmodel, but this can be changed using the
															
 
																+\ref STARPU_HETEROPRIO_DATA_DIR environment variable. You can also specify the data filename directly using
															
 
																+\ref STARPU_HETEROPRIO_DATA_FILE.
															
 
																+
															
 
																+Additionally, to assign priorities to tasks, Heteroprio needs a way to detect that some tasks are similar.
															
 
																+By default, Heteroprio looks for tasks with the same perfmodel, or with the same codelet's name if no perfmodel was assigned.
															
 
																+This behavior can be changed to only consider the codelet's name by setting
															
 
																+\ref STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY to <c>1</c>
															
 
																+
															
 
																+Other environment variables to configure AutoHeteteroprio are documented in \ref ConfiguringAutoHeteroprio
															
 
																 */
															
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -622,6 +622,123 @@ Specify which PAPI events should be recorded in the trace (\ref PapiCounters).
 
																 </dl>
															
 
																+\section ConfiguringHeteroprio Configuring The Heteroprio Scheduler
															
 
																+
															
 
																+\subsection ConfiguringLaHeteroprio Configuring LAHeteroprio
															
 
																+<dl>
															
 
																+
															
 
																+<dt>STARPU_HETEROPRIO_USE_LA</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_HETEROPRIO_USE_LA
															
 
																+\addindex __env__STARPU_HETEROPRIO_USE_LA
															
 
																+Enable the locality aware mode of Heteroprio which guides the distribution of tasks to workers
															
 
																+in order to reduce the data transfers between memory nodes.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_LAHETEROPRIO_PUSH</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_LAHETEROPRIO_PUSH
															
 
																+\addindex __env__STARPU_LAHETEROPRIO_PUSH
															
 
																+Choose between the different push strategies for locality aware Heteroprio: 
															
 
																+WORKER, LcS, LS_SDH, LS_SDH2, LS_SDHB, LC_SMWB, AUTO (by default: AUTO). These are detailed in
															
 
																+\ref LAHeteroprio
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_LAHETEROPRIO_S_[ARCH]</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_LAHETEROPRIO_S_[ARCH]
															
 
																+\addindex __env__STARPU_LAHETEROPRIO_S_arch
															
 
																+Specify the number of memory nodes contained in an affinity group. An affinity
															
 
																+group will be composed of the closests memory nodes to a worker of a given architecture,
															
 
																+and this worker will look for tasks available inside these memory nodes, before
															
 
																+considering stealing tasks outside this group.
															
 
																+ARCH can be CPU, CUDA, OPENCL, MICC, SCC, MPI_MS, etc.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]
															
 
																+\addindex __env__STARPU_LAHETEROPRIO_PRIO_STEP_arch
															
 
																+Specify the number of buckets in the local memory node in which a worker will look for
															
 
																+available tasks, before this worker starts looking for tasks in other memory nodes' buckets.
															
 
																+ARCH indicates that this number is specific to a given arch which can be:
															
 
																+CPU, CUDA, OPENCL, MICC, SCC, MPI_MS, etc.
															
 
																+</dd>
															
 
																+
															
 
																+</dl>
															
 
																+
															
 
																+\subsection ConfiguringAutoHeteroprio Configuring AutoHeteroprio
															
 
																+<dl>
															
 
																+
															
 
																+<dt>STARPU_HETEROPRIO_USE_AUTO_CALIBRATION</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
															
 
																+\addindex __env__STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
															
 
																+Enable the auto calibration mode of Heteroprio which assign priorities to tasks automatically
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_HETEROPRIO_DATA_DIR</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_HETEROPRIO_DATA_DIR
															
 
																+\addindex __env__STARPU_HETEROPRIO_DATA_DIR
															
 
																+Specify the path of the directory where Heteroprio stores data about program executions.
															
 
																+By default, these are stored in the same directory used by perfmodel.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_HETEROPRIO_DATA_FILE</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_HETEROPRIO_DATA_FILE
															
 
																+\addindex __env__STARPU_HETEROPRIO_DATA_FILE
															
 
																+Specify the filename where Heteroprio will save data about the current program's execution.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
															
 
																+\addindex __env__STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
															
 
																+Choose how Heteroprio groups similar tasks. It can be <c>0</c> to group
															
 
																+the tasks with the same perfmodel or the same codelet's name if no perfmodel was assigned.
															
 
																+Or, it could be <c>1</c> to group the tasks only by codelet's name.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
															
 
																+\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
															
 
																+Enable the printing of priorities' data every time they get updated.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
															
 
																+\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
															
 
																+Enable the printing of priorities' order for each architecture every time there's a reordering.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
															
 
																+\addindex __env__STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
															
 
																+Specify the heuristic which will be used to assign priorities automatically.
															
 
																+It should be an integer between 0 and 27.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
															
 
																+\addindex __env__STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
															
 
																+Specify the period (in number of tasks pushed), between priorities reordering operations.
															
 
																+</dd>
															
 
																+
															
 
																+<dt>STARPU_AUTOHETEROPRIO_FREEZE_GATHERING</dt>
															
 
																+<dd>
															
 
																+\anchor STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
															
 
																+\addindex __env__STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
															
 
																+Disable data gathering from task executions.
															
 
																+</dd>
															
 
																+
															
 
																+</dl>
															
 
																+
															
 
																 \section Extensions Extensions
															
 
																 <dl>
															
--- a/doc/doxygen/refman.tex
+++ b/doc/doxygen/refman.tex
@@ -263,6 +263,7 @@ Documentation License”.
 
																 \input{group__API__Master__Slave}
															
 
																 \input{group__API__Random__Functions}
															
 
																 \input{group__API__Sink}
															
 
																+\input{group__API__HeteroPrio}
															
 
																 \chapter{File Index}
															
 
																 \input{files}
															
--- a/include/schedulers/starpu_heteroprio.h
+++ b/include/schedulers/starpu_heteroprio.h
@@ -24,13 +24,86 @@ extern "C"
 
																 {
															
 
																 #endif
															
 
																-#define STARPU_HETEROPRIO_MAX_PRIO 100
															
 
																+/**
															
 
																+   @defgroup API_HeteroPrio Heteroprio Scheduler
															
 
																+   @brief This is the interface for the heteroprio scheduler
															
 
																+   @{
															
 
																+ */
															
 
																 #define STARPU_HETEROPRIO_MAX_PREFETCH 2
															
 
																 #if STARPU_HETEROPRIO_MAX_PREFETCH <= 0
															
 
																 #error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1
															
 
																 #endif
															
 
																+#define STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT 28
															
 
																+
															
 
																+enum starpu_autoheteroprio_priority_ordering_policy
															
 
																+{
															
 
																+	STARPU_HETEROPRIO_NOD_TIME_COMBINATION, // 0
															
 
																+	STARPU_HETEROPRIO_BEST_NODS_SCORE,
															
 
																+	STARPU_HETEROPRIO_BEST_NODS,
															
 
																+	STARPU_HETEROPRIO_URT_PURE,
															
 
																+	STARPU_HETEROPRIO_URT,
															
 
																+	STARPU_HETEROPRIO_URT_2, // 5
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_PURE,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_2, // 10
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_3,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_4,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_5,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_6,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_7, // 15
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_8,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_9,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_10,
															
 
																+	STARPU_HETEROPRIO_URT_DOT_DIFF_11,
															
 
																+	STARPU_HETEROPRIO_URTS_PER_SECONDS, // 20
															
 
																+	STARPU_HETEROPRIO_URTS_PER_SECONDS_2,
															
 
																+	STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF,
															
 
																+	STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF,
															
 
																+	STARPU_HETEROPRIO_URTS_TIME_COMBINATION,
															
 
																+	STARPU_HETEROPRIO_NODS_PER_SECOND,
															
 
																+	STARPU_HETEROPRIO_NODS_TIME_RELEASED,
															
 
																+	STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF
															
 
																+};
															
 
																+
															
 
																+static const char starpu_autoheteroprio_priority_ordering_policy_names[STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT][64] = 
															
 
																+{
															
 
																+	"STARPU_HETEROPRIO_NOD_TIME_COMBINATION",
															
 
																+	"STARPU_HETEROPRIO_BEST_NODS_SCORE",
															
 
																+	"STARPU_HETEROPRIO_BEST_NODS",
															
 
																+	"STARPU_HETEROPRIO_URT_PURE",
															
 
																+	"STARPU_HETEROPRIO_URT",
															
 
																+	"STARPU_HETEROPRIO_URT_2",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_PURE",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_2",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_3",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_4",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_5",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_6",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_7",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_8",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_9",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_10",
															
 
																+	"STARPU_HETEROPRIO_URT_DOT_DIFF_11",
															
 
																+	"STARPU_HETEROPRIO_URTS_PER_SECONDS",
															
 
																+	"STARPU_HETEROPRIO_URTS_PER_SECONDS_2",
															
 
																+	"STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF",
															
 
																+	"STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF",
															
 
																+	"STARPU_HETEROPRIO_URTS_TIME_COMBINATION",
															
 
																+	"STARPU_HETEROPRIO_NODS_PER_SECOND",
															
 
																+	"STARPU_HETEROPRIO_NODS_TIME_RELEASED",
															
 
																+	"STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF"
															
 
																+};
															
 
																+
															
 
																+/** Set if heteroprio should use data locality or not */
															
 
																+void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality);
															
 
																+
															
 
																 /** Tell how many prio there are for a given arch */
															
 
																 void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio);
															
@@ -40,11 +113,20 @@ void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_arc
 
																 /** Tell which arch is the faster for the tasks of a bucket (optional) */
															
 
																 void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id);
															
 
																-/** Tell how slow is a arch for the tasks of a bucket (optional) */ 
															
 
																+/** Tell how slow is a arch for the tasks of a bucket (optional) */
															
 
																 void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor);
															
 
																+/** One memory node will be one wgroup */
															
 
																+void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id);
															
 
																+
															
 
																+/** Print the current setup groups */
															
 
																+void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id);
															
 
																+
															
 
																+/** @} */
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
 
																 #endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */
															
 
																+
															
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -158,6 +158,16 @@ extern struct starpu_worker_collection starpu_worker_list;
 
																 extern struct starpu_worker_collection starpu_worker_tree;
															
 
																 /**
															
 
																+   Return true if type matches one of StarPU's defined worker architectures
															
 
																+*/
															
 
																+unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type);
															
 
																+
															
 
																+/**
															
 
																+   Convert a mask of architectures to a worker archtype
															
 
																+*/
															
 
																+enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask);
															
 
																+
															
 
																+/**
															
 
																    Return the number of workers (i.e. processing units executing
															
 
																    StarPU tasks). The return value should be at most \ref
															
 
																    STARPU_NMAXWORKERS.
															
@@ -257,6 +267,11 @@ int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num);
 
																 int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
															
 
																 /**
															
 
																+   Return true if task has a codelet for this worker type
															
 
																+*/
															
 
																+unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task);
															
 
																+
															
 
																+/**
															
 
																    Get the name of the worker \p id. StarPU associates a unique human
															
 
																    readable string to each processing unit. This function copies at
															
 
																    most the \p maxlen first bytes of the unique string associated to
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -102,6 +102,7 @@ noinst_HEADERS = 						\
 
																 	core/detect_combined_workers.h				\
															
 
																 	sched_policies/helper_mct.h				\
															
 
																 	sched_policies/fifo_queues.h				\
															
 
																+	sched_policies/heteroprio.h				\
															
 
																 	datawizard/node_ops.h					\
															
 
																 	datawizard/footprint.h					\
															
 
																 	datawizard/datawizard.h					\
															
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -541,6 +541,12 @@ void _starpu_set_perf_model_dirs()
 
																 	snprintf(_perf_model_dir_debug, _PERF_MODEL_DIR_MAXLEN, "%s/debug/", _perf_model_dir);
															
 
																 }
															
 
																+char *_starpu_get_perf_model_dir()
															
 
																+{
															
 
																+	_starpu_create_sampling_directory_if_needed();
															
 
																+	return _perf_model_dir;
															
 
																+}
															
 
																+
															
 
																 char *_starpu_get_perf_model_dir_codelet()
															
 
																 {
															
 
																 	_starpu_create_sampling_directory_if_needed();
															
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -65,6 +65,7 @@ struct starpu_perfmodel_arch;
 
																 extern unsigned _starpu_calibration_minimum;
															
 
																+char *_starpu_get_perf_model_dir();
															
 
																 char *_starpu_get_perf_model_dir_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
															
 
																 char *_starpu_get_perf_model_dir_bus();
															
 
																 char *_starpu_get_perf_model_dir_debug();
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -208,6 +208,7 @@ struct _starpu_driver_info starpu_driver_info[STARPU_NARCH];
 
																 void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info)
															
 
																 {
															
 
																+	STARPU_ASSERT(archtype >= 0 && archtype < STARPU_NARCH);
															
 
																 	starpu_driver_info[archtype] = *info;
															
 
																 }
															
@@ -2002,6 +2003,24 @@ void starpu_shutdown(void)
 
																 #endif
															
 
																 }
															
 
																+unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type)
															
 
																+{
															
 
																+	return (type >= 0 && type < STARPU_NARCH)
															
 
																+		&& (starpu_driver_info[type].name_upper != NULL);
															
 
																+}
															
 
																+
															
 
																+enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask)
															
 
																+{
															
 
																+	STARPU_ASSERT(mask && !(mask & (mask-1))); // ensures that only one bit of the mask is set
															
 
																+
															
 
																+	enum starpu_worker_archtype worker_type = ffs(mask)-2; // ffs(mask) is the indice of the lesser bit
															
 
																+
															
 
																+	STARPU_ASSERT(worker_type >= 0 && worker_type < STARPU_NARCH); // worker_type is positive and lesser than arch number
															
 
																+	STARPU_ASSERT(starpu_worker_archtype_is_valid(worker_type)); // worker_type is a valid worker architecture
															
 
																+
															
 
																+	return worker_type;
															
 
																+}
															
 
																+
															
 
																 #undef starpu_worker_get_count
															
 
																 unsigned starpu_worker_get_count(void)
															
 
																 {
															
@@ -2356,6 +2375,23 @@ int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int
 
																 	return ndevids;
															
 
																 }
															
 
																+unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task)
															
 
																+{
															
 
																+	switch(worker_type)
															
 
																+	{
															
 
																+		case STARPU_CPU_WORKER:
															
 
																+			return task->cl->cpu_funcs[0] != NULL;
															
 
																+		case STARPU_CUDA_WORKER:
															
 
																+			return task->cl->cuda_funcs[0] != NULL;
															
 
																+		case STARPU_OPENCL_WORKER:
															
 
																+			return task->cl->opencl_funcs[0] != NULL;
															
 
																+		case STARPU_MPI_MS_WORKER:
															
 
																+			return task->cl->mpi_ms_funcs[0] != NULL;
															
 
																+		default:
															
 
																+			return 0;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																 void starpu_worker_get_name(int id, char *dst, size_t maxlen)
															
 
																 {
															
 
																 	char *name = _starpu_config.workers[id].name;
															
@@ -2547,6 +2583,7 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 
																 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
															
 
																 {
															
 
																+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
															
 
																 	const char *ret = starpu_driver_info[type].name_upper;
															
 
																 	if (!ret)
															
 
																 		ret = "unknown";
															
@@ -2555,6 +2592,7 @@ const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 
																 const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
															
 
																 {
															
 
																+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
															
 
																 	const char *ret = starpu_driver_info[type].name_var;
															
 
																 	if (!ret)
															
 
																 		ret = "UNKNOWN";
															
@@ -2768,6 +2806,7 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
 
																 enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
															
 
																 {
															
 
																+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
															
 
																 	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
															
 
																 	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
															
 
																 	return kind;
															
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -441,7 +441,7 @@ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worke
 
																 		info->executed_tasks = worker_info[workerid].executed_tasks;
															
 
																 	}
															
 
																-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->sched_mutex);
															
 
																+	STARPU_PTHREAD_MUTEX_LOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex);
															
 
																 	STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
															
 
																 	if (info)
															
@@ -476,7 +476,7 @@ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worke
 
																 	_starpu_worker_reset_profiling_info_with_lock(workerid);
															
 
																 	STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
															
 
																-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->sched_mutex);
															
 
																+	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex);
															
 
																 	return 0;
															
 
																 }
															
--- a/src/sched_policies/heteroprio.c
+++ b/src/sched_policies/heteroprio.c
--- a/src/sched_policies/heteroprio.h
+++ b/src/sched_policies/heteroprio.h
@@ -0,0 +1,28 @@
 
																+#ifndef __SCHED_HETEROPRIO_H__
															
 
																+#define __SCHED_HETEROPRIO_H__
															
 
																+
															
 
																+#include <schedulers/starpu_heteroprio.h>
															
 
																+
															
 
																+#define CODELET_MAX_NAME_LENGTH 32
															
 
																+#define HETEROPRIO_MAX_PRIO 100
															
 
																+#define LAHETEROPRIO_MAX_WORKER_GROUPS 10
															
 
																+
															
 
																+#define AUTOHETEROPRIO_NO_NAME "NO_NAME"
															
 
																+
															
 
																+// will tend to ignore tasks older than this when measuring values such as NOD, execution time, etc.
															
 
																+// i.e. if there are more than STARPU_AUTOHETEROPRIO_RELEVANT_TASK_LIFE of the same type
															
 
																+#define AUTOHETEROPRIO_RELEVANT_TASK_LIFE 256
															
 
																+
															
 
																+#define AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE 16
															
 
																+
															
 
																+#define AUTOHETEROPRIO_EXTREMELY_LONG_TIME 999999999999999.0
															
 
																+#define AUTOHETEROPRIO_LONG_TIME 100000000.0
															
 
																+#define AUTOHETEROPRIO_FAIR_TIME 1000.0
															
 
																+
															
 
																+#define AUTOHETEROPRIO_DEFAULT_TASK_TIME AUTOHETEROPRIO_FAIR_TIME
															
 
																+
															
 
																+// at the end of the execution, if the sum of all worker profiling times is superior to this, the times will be compressed so that no time exceeds this one
															
 
																+// (probably in us)
															
 
																+#define AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME 1000000000.0
															
 
																+
															
 
																+#endif // __SCHED_HETEROPRIO_H__