Browse Source

Update for the auto heteroprio scheduler

Nathalie Furmento 4 years ago
parent
commit
3d07c863ba

+ 2 - 0
AUTHORS

@@ -11,6 +11,7 @@ Collin Nicolas, Inria, <nicolas.collin@inria.fr>
 Danjean Vincent, University Grenoble Alpes, <Vincent.Danjean@ens-lyon.org>
 Danjean Vincent, University Grenoble Alpes, <Vincent.Danjean@ens-lyon.org>
 Denis Alexandre, Inria, <alexandre.denis@inria.fr>
 Denis Alexandre, Inria, <alexandre.denis@inria.fr>
 Eyraud-Dubois Lionel, Inria, <lionel.eyraud-dubois@inria.fr>
 Eyraud-Dubois Lionel, Inria, <lionel.eyraud-dubois@inria.fr>
+Flint Clément, Inria, <clement.flint@inria.fr>
 Furmento Nathalie, CNRS, <nathalie.furmento@labri.fr>
 Furmento Nathalie, CNRS, <nathalie.furmento@labri.fr>
 Guermouche Amina, Télécom SudParis, <amina.guermouche@inria.fr>
 Guermouche Amina, Télécom SudParis, <amina.guermouche@inria.fr>
 Guilbaud Adrien, Inria, <adrien.guilbaud@inria.fr>
 Guilbaud Adrien, Inria, <adrien.guilbaud@inria.fr>
@@ -30,6 +31,7 @@ Nakov Stojce, Inria, <stojce.nakov@inria.fr>
 Namyst Raymond, Université de Bordeaux, <raymond.namyst@labri.fr>
 Namyst Raymond, Université de Bordeaux, <raymond.namyst@labri.fr>
 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
 Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), <llnesi@inf.ufrgs.br>
 Pablo Joris, Inria, <joris.pablo@orange.fr>
 Pablo Joris, Inria, <joris.pablo@orange.fr>
+Paillat Ludovic, Inria, <ludovic.paillat@inria.fr>
 Pasqualinotto Damien, Université de Bordeaux, <dam.pasqualinotto@wanadoo.fr>
 Pasqualinotto Damien, Université de Bordeaux, <dam.pasqualinotto@wanadoo.fr>
 Pinto Vinicius Garcia, <vgpinto@inf.ufrgs.br>
 Pinto Vinicius Garcia, <vgpinto@inf.ufrgs.br>
 Pitoiset Samuel, Inria, <samuel.pitoiset@inria.fr>
 Pitoiset Samuel, Inria, <samuel.pitoiset@inria.fr>

+ 15 - 0
configure.ac

@@ -2042,6 +2042,21 @@ AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"])
 STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`"
 STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`"
 AC_SUBST([STARPU_FXT_EVENT_DEFINES])
 AC_SUBST([STARPU_FXT_EVENT_DEFINES])
 
 
+# Heteroprio works better if it can store information based on the program's name
+AC_MSG_CHECKING(whether the target supports program_invocation_short_name)
+AC_LINK_IFELSE([AC_LANG_SOURCE(
+	[
+		#include <stdio.h>
+		#include <errno.h>
+		int main() {
+			printf("%s\n", program_invocation_short_name);
+			return 0;
+		}
+	])],
+	[AC_DEFINE([STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME], [1], [variable program_invocation_short_name is available]) AC_MSG_RESULT(yes)],
+	AC_MSG_RESULT(no)
+)
+
 ###############################################################################
 ###############################################################################
 #                                                                             #
 #                                                                             #
 #                  Miscellaneous options for StarPU                           #
 #                  Miscellaneous options for StarPU                           #

+ 42 - 1
doc/doxygen/chapters/320_scheduling.doxy

@@ -374,7 +374,11 @@ The tasks are stored inside buckets, where each bucket corresponds to a priority
 worker uses an indirect access array to know the order in which it should access the buckets. Moreover,
 worker uses an indirect access array to know the order in which it should access the buckets. Moreover,
 all the tasks inside a bucket must be compatible with all the processing units that may access it (at least).
 all the tasks inside a bucket must be compatible with all the processing units that may access it (at least).
 
 
-As an example, see the following code where we have 5 types of tasks.
+These priorities are now automatically assigned by Heteroprio in auto calibration mode using heuristics.
+If you want to set these priorities manually, you can change \ref STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
+and follow the example below.
+
+In this example code, we have 5 types of tasks.
 CPU workers can compute all of them, but CUDA workers can only execute
 CPU workers can compute all of them, but CUDA workers can only execute
 tasks of types 0 and 1, and is expected to go 20 and 30 time
 tasks of types 0 and 1, and is expected to go 20 and 30 time
 faster than the CPU, respectively.
 faster than the CPU, respectively.
@@ -431,4 +435,41 @@ So, in the given example, the priority of a task will be between 0 and 4 include
 However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and
 However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and
 tasks of priorities 2-4 must provide CPU kernels (at least).
 tasks of priorities 2-4 must provide CPU kernels (at least).
 
 
+\subsection LAHeteroprio Using locality aware Heteroprio
+
+Heteroprio supports a mode where locality is evaluated to guide the distribution
+of the tasks (see https://peerj.com/articles/cs-190.pdf).
+Currently, this mode is available using the dedicated function or an environment variable
+\ref STARPU_HETEROPRIO_USE_LA, and can be configured using environment variables.
+\code{.c}
+void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality);
+\endcode
+
+In this mode, multiple strategies are available to determine which memory node's workers are the most qualified for executing a specific task. This strategy can be set with \ref STARPU_LAHETEROPRIO_PUSH
+and available strategies are:
+- WORKER: the worker which pushed the task is preferred for the execution.
+- LCs: the node with the shortest data transfer time (estimated by StarPU) is the most qualified
+- LS_SDH: the node with the smallest data amount to be transferred will be preferred.
+- LS_SDH2: similar to LS_SDH, but data in write access is counted in a quadratic manner to give them more importance.
+- LS_SDHB: similar to LS_SDH, but data in write access is balanced with a coefficient (its value is set to 1000) and
+for the same amount of data, the one with less pieces of data to be transferred will be preferred.
+- LC_SMWB: similar to LS_SDH, but the amount of data in write access gets multiplied by a coefficient which gets closer to 2
+as the amount of data in read access gets larger than the data in write access.
+- AUTO: strategy by default, this one selects the best strategy and changes it in runtime to improve performance
+
+Other environment variables to configure LaHeteteroprio are documented in \ref ConfiguringLaHeteroprio
+
+\subsection AutoHeteroprio Using Heteroprio in auto-calibration mode
+
+In this mode, Heteroprio saves data about each program execution, in order to improve future ones.
+By default, theses files are stored in the folder used by perfmodel, but this can be changed using the
+\ref STARPU_HETEROPRIO_DATA_DIR environment variable. You can also specify the data filename directly using
+\ref STARPU_HETEROPRIO_DATA_FILE.
+
+Additionally, to assign priorities to tasks, Heteroprio needs a way to detect that some tasks are similar.
+By default, Heteroprio looks for tasks with the same perfmodel, or with the same codelet's name if no perfmodel was assigned.
+This behavior can be changed to only consider the codelet's name by setting
+\ref STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY to <c>1</c>
+
+Other environment variables to configure AutoHeteteroprio are documented in \ref ConfiguringAutoHeteroprio
 */
 */

+ 117 - 0
doc/doxygen/chapters/501_environment_variables.doxy

@@ -622,6 +622,123 @@ Specify which PAPI events should be recorded in the trace (\ref PapiCounters).
 
 
 </dl>
 </dl>
 
 
+\section ConfiguringHeteroprio Configuring The Heteroprio Scheduler
+
+\subsection ConfiguringLaHeteroprio Configuring LAHeteroprio
+<dl>
+
+<dt>STARPU_HETEROPRIO_USE_LA</dt>
+<dd>
+\anchor STARPU_HETEROPRIO_USE_LA
+\addindex __env__STARPU_HETEROPRIO_USE_LA
+Enable the locality aware mode of Heteroprio which guides the distribution of tasks to workers
+in order to reduce the data transfers between memory nodes.
+</dd>
+
+<dt>STARPU_LAHETEROPRIO_PUSH</dt>
+<dd>
+\anchor STARPU_LAHETEROPRIO_PUSH
+\addindex __env__STARPU_LAHETEROPRIO_PUSH
+Choose between the different push strategies for locality aware Heteroprio: 
+WORKER, LcS, LS_SDH, LS_SDH2, LS_SDHB, LC_SMWB, AUTO (by default: AUTO). These are detailed in
+\ref LAHeteroprio
+</dd>
+
+<dt>STARPU_LAHETEROPRIO_S_[ARCH]</dt>
+<dd>
+\anchor STARPU_LAHETEROPRIO_S_[ARCH]
+\addindex __env__STARPU_LAHETEROPRIO_S_arch
+Specify the number of memory nodes contained in an affinity group. An affinity
+group will be composed of the closests memory nodes to a worker of a given architecture,
+and this worker will look for tasks available inside these memory nodes, before
+considering stealing tasks outside this group.
+ARCH can be CPU, CUDA, OPENCL, MICC, SCC, MPI_MS, etc.
+</dd>
+
+<dt>STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]</dt>
+<dd>
+\anchor STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]
+\addindex __env__STARPU_LAHETEROPRIO_PRIO_STEP_arch
+Specify the number of buckets in the local memory node in which a worker will look for
+available tasks, before this worker starts looking for tasks in other memory nodes' buckets.
+ARCH indicates that this number is specific to a given arch which can be:
+CPU, CUDA, OPENCL, MICC, SCC, MPI_MS, etc.
+</dd>
+
+</dl>
+
+\subsection ConfiguringAutoHeteroprio Configuring AutoHeteroprio
+<dl>
+
+<dt>STARPU_HETEROPRIO_USE_AUTO_CALIBRATION</dt>
+<dd>
+\anchor STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
+\addindex __env__STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
+Enable the auto calibration mode of Heteroprio which assign priorities to tasks automatically
+</dd>
+
+<dt>STARPU_HETEROPRIO_DATA_DIR</dt>
+<dd>
+\anchor STARPU_HETEROPRIO_DATA_DIR
+\addindex __env__STARPU_HETEROPRIO_DATA_DIR
+Specify the path of the directory where Heteroprio stores data about program executions.
+By default, these are stored in the same directory used by perfmodel.
+</dd>
+
+<dt>STARPU_HETEROPRIO_DATA_FILE</dt>
+<dd>
+\anchor STARPU_HETEROPRIO_DATA_FILE
+\addindex __env__STARPU_HETEROPRIO_DATA_FILE
+Specify the filename where Heteroprio will save data about the current program's execution.
+</dd>
+
+<dt>STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY</dt>
+<dd>
+\anchor STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
+\addindex __env__STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
+Choose how Heteroprio groups similar tasks. It can be <c>0</c> to group
+the tasks with the same perfmodel or the same codelet's name if no perfmodel was assigned.
+Or, it could be <c>1</c> to group the tasks only by codelet's name.
+</dd>
+
+<dt>STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE</dt>
+<dd>
+\anchor STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
+\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
+Enable the printing of priorities' data every time they get updated.
+</dd>
+
+<dt>STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING</dt>
+<dd>
+\anchor STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
+\addindex __env__STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
+Enable the printing of priorities' order for each architecture every time there's a reordering.
+</dd>
+
+<dt>STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY</dt>
+<dd>
+\anchor STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
+\addindex __env__STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
+Specify the heuristic which will be used to assign priorities automatically.
+It should be an integer between 0 and 27.
+</dd>
+
+<dt>STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL</dt>
+<dd>
+\anchor STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
+\addindex __env__STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
+Specify the period (in number of tasks pushed), between priorities reordering operations.
+</dd>
+
+<dt>STARPU_AUTOHETEROPRIO_FREEZE_GATHERING</dt>
+<dd>
+\anchor STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
+\addindex __env__STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
+Disable data gathering from task executions.
+</dd>
+
+</dl>
+
 \section Extensions Extensions
 \section Extensions Extensions
 
 
 <dl>
 <dl>

+ 1 - 0
doc/doxygen/refman.tex

@@ -263,6 +263,7 @@ Documentation License”.
 \input{group__API__Master__Slave}
 \input{group__API__Master__Slave}
 \input{group__API__Random__Functions}
 \input{group__API__Random__Functions}
 \input{group__API__Sink}
 \input{group__API__Sink}
+\input{group__API__HeteroPrio}
 
 
 \chapter{File Index}
 \chapter{File Index}
 \input{files}
 \input{files}

+ 84 - 2
include/schedulers/starpu_heteroprio.h

@@ -24,13 +24,86 @@ extern "C"
 {
 {
 #endif
 #endif
 
 
-#define STARPU_HETEROPRIO_MAX_PRIO 100
+/**
+   @defgroup API_HeteroPrio Heteroprio Scheduler
+   @brief This is the interface for the heteroprio scheduler
+   @{
+ */
 
 
 #define STARPU_HETEROPRIO_MAX_PREFETCH 2
 #define STARPU_HETEROPRIO_MAX_PREFETCH 2
 #if STARPU_HETEROPRIO_MAX_PREFETCH <= 0
 #if STARPU_HETEROPRIO_MAX_PREFETCH <= 0
 #error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1
 #error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1
 #endif
 #endif
 
 
+#define STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT 28
+
+enum starpu_autoheteroprio_priority_ordering_policy
+{
+	STARPU_HETEROPRIO_NOD_TIME_COMBINATION, // 0
+	STARPU_HETEROPRIO_BEST_NODS_SCORE,
+	STARPU_HETEROPRIO_BEST_NODS,
+	STARPU_HETEROPRIO_URT_PURE,
+	STARPU_HETEROPRIO_URT,
+	STARPU_HETEROPRIO_URT_2, // 5
+	STARPU_HETEROPRIO_URT_DOT_DIFF_PURE,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2,
+	STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE,
+	STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_2, // 10
+	STARPU_HETEROPRIO_URT_DOT_DIFF_3,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_4,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_5,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_6,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_7, // 15
+	STARPU_HETEROPRIO_URT_DOT_DIFF_8,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_9,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_10,
+	STARPU_HETEROPRIO_URT_DOT_DIFF_11,
+	STARPU_HETEROPRIO_URTS_PER_SECONDS, // 20
+	STARPU_HETEROPRIO_URTS_PER_SECONDS_2,
+	STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF,
+	STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF,
+	STARPU_HETEROPRIO_URTS_TIME_COMBINATION,
+	STARPU_HETEROPRIO_NODS_PER_SECOND,
+	STARPU_HETEROPRIO_NODS_TIME_RELEASED,
+	STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF
+};
+
+static const char starpu_autoheteroprio_priority_ordering_policy_names[STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT][64] = 
+{
+	"STARPU_HETEROPRIO_NOD_TIME_COMBINATION",
+	"STARPU_HETEROPRIO_BEST_NODS_SCORE",
+	"STARPU_HETEROPRIO_BEST_NODS",
+	"STARPU_HETEROPRIO_URT_PURE",
+	"STARPU_HETEROPRIO_URT",
+	"STARPU_HETEROPRIO_URT_2",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_PURE",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2",
+	"STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE",
+	"STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_2",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_3",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_4",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_5",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_6",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_7",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_8",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_9",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_10",
+	"STARPU_HETEROPRIO_URT_DOT_DIFF_11",
+	"STARPU_HETEROPRIO_URTS_PER_SECONDS",
+	"STARPU_HETEROPRIO_URTS_PER_SECONDS_2",
+	"STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF",
+	"STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF",
+	"STARPU_HETEROPRIO_URTS_TIME_COMBINATION",
+	"STARPU_HETEROPRIO_NODS_PER_SECOND",
+	"STARPU_HETEROPRIO_NODS_TIME_RELEASED",
+	"STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF"
+};
+
+/** Set if heteroprio should use data locality or not */
+void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality);
+
 /** Tell how many prio there are for a given arch */
 /** Tell how many prio there are for a given arch */
 void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio);
 void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio);
 
 
@@ -40,11 +113,20 @@ void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_arc
 /** Tell which arch is the faster for the tasks of a bucket (optional) */
 /** Tell which arch is the faster for the tasks of a bucket (optional) */
 void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id);
 void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id);
 
 
-/** Tell how slow is a arch for the tasks of a bucket (optional) */ 
+/** Tell how slow is a arch for the tasks of a bucket (optional) */
 void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor);
 void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor);
 
 
+/** One memory node will be one wgroup */
+void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id);
+
+/** Print the current setup groups */
+void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id);
+
+/** @} */
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif
 
 
 #endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */
 #endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */
+

+ 15 - 0
include/starpu_worker.h

@@ -158,6 +158,16 @@ extern struct starpu_worker_collection starpu_worker_list;
 extern struct starpu_worker_collection starpu_worker_tree;
 extern struct starpu_worker_collection starpu_worker_tree;
 
 
 /**
 /**
+   Return true if type matches one of StarPU's defined worker architectures
+*/
+unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type);
+
+/**
+   Convert a mask of architectures to a worker archtype
+*/
+enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask);
+
+/**
    Return the number of workers (i.e. processing units executing
    Return the number of workers (i.e. processing units executing
    StarPU tasks). The return value should be at most \ref
    StarPU tasks). The return value should be at most \ref
    STARPU_NMAXWORKERS.
    STARPU_NMAXWORKERS.
@@ -257,6 +267,11 @@ int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num);
 int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
 int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid);
 
 
 /**
 /**
+   Return true if task has a codelet for this worker type
+*/
+unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task);
+
+/**
    Get the name of the worker \p id. StarPU associates a unique human
    Get the name of the worker \p id. StarPU associates a unique human
    readable string to each processing unit. This function copies at
    readable string to each processing unit. This function copies at
    most the \p maxlen first bytes of the unique string associated to
    most the \p maxlen first bytes of the unique string associated to

+ 1 - 0
src/Makefile.am

@@ -102,6 +102,7 @@ noinst_HEADERS = 						\
 	core/detect_combined_workers.h				\
 	core/detect_combined_workers.h				\
 	sched_policies/helper_mct.h				\
 	sched_policies/helper_mct.h				\
 	sched_policies/fifo_queues.h				\
 	sched_policies/fifo_queues.h				\
+	sched_policies/heteroprio.h				\
 	datawizard/node_ops.h					\
 	datawizard/node_ops.h					\
 	datawizard/footprint.h					\
 	datawizard/footprint.h					\
 	datawizard/datawizard.h					\
 	datawizard/datawizard.h					\

+ 6 - 0
src/core/perfmodel/perfmodel.c

@@ -541,6 +541,12 @@ void _starpu_set_perf_model_dirs()
 	snprintf(_perf_model_dir_debug, _PERF_MODEL_DIR_MAXLEN, "%s/debug/", _perf_model_dir);
 	snprintf(_perf_model_dir_debug, _PERF_MODEL_DIR_MAXLEN, "%s/debug/", _perf_model_dir);
 }
 }
 
 
+char *_starpu_get_perf_model_dir()
+{
+	_starpu_create_sampling_directory_if_needed();
+	return _perf_model_dir;
+}
+
 char *_starpu_get_perf_model_dir_codelet()
 char *_starpu_get_perf_model_dir_codelet()
 {
 {
 	_starpu_create_sampling_directory_if_needed();
 	_starpu_create_sampling_directory_if_needed();

+ 1 - 0
src/core/perfmodel/perfmodel.h

@@ -65,6 +65,7 @@ struct starpu_perfmodel_arch;
 
 
 extern unsigned _starpu_calibration_minimum;
 extern unsigned _starpu_calibration_minimum;
 
 
+char *_starpu_get_perf_model_dir();
 char *_starpu_get_perf_model_dir_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 char *_starpu_get_perf_model_dir_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT;
 char *_starpu_get_perf_model_dir_bus();
 char *_starpu_get_perf_model_dir_bus();
 char *_starpu_get_perf_model_dir_debug();
 char *_starpu_get_perf_model_dir_debug();

+ 39 - 0
src/core/workers.c

@@ -208,6 +208,7 @@ struct _starpu_driver_info starpu_driver_info[STARPU_NARCH];
 
 
 void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info)
 void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info)
 {
 {
+	STARPU_ASSERT(archtype >= 0 && archtype < STARPU_NARCH);
 	starpu_driver_info[archtype] = *info;
 	starpu_driver_info[archtype] = *info;
 }
 }
 
 
@@ -2002,6 +2003,24 @@ void starpu_shutdown(void)
 #endif
 #endif
 }
 }
 
 
+unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type)
+{
+	return (type >= 0 && type < STARPU_NARCH)
+		&& (starpu_driver_info[type].name_upper != NULL);
+}
+
+enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask)
+{
+	STARPU_ASSERT(mask && !(mask & (mask-1))); // ensures that only one bit of the mask is set
+
+	enum starpu_worker_archtype worker_type = ffs(mask)-2; // ffs(mask) is the indice of the lesser bit
+
+	STARPU_ASSERT(worker_type >= 0 && worker_type < STARPU_NARCH); // worker_type is positive and lesser than arch number
+	STARPU_ASSERT(starpu_worker_archtype_is_valid(worker_type)); // worker_type is a valid worker architecture
+
+	return worker_type;
+}
+
 #undef starpu_worker_get_count
 #undef starpu_worker_get_count
 unsigned starpu_worker_get_count(void)
 unsigned starpu_worker_get_count(void)
 {
 {
@@ -2356,6 +2375,23 @@ int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int
 	return ndevids;
 	return ndevids;
 }
 }
 
 
+unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task)
+{
+	switch(worker_type)
+	{
+		case STARPU_CPU_WORKER:
+			return task->cl->cpu_funcs[0] != NULL;
+		case STARPU_CUDA_WORKER:
+			return task->cl->cuda_funcs[0] != NULL;
+		case STARPU_OPENCL_WORKER:
+			return task->cl->opencl_funcs[0] != NULL;
+		case STARPU_MPI_MS_WORKER:
+			return task->cl->mpi_ms_funcs[0] != NULL;
+		default:
+			return 0;
+	}
+}
+
 void starpu_worker_get_name(int id, char *dst, size_t maxlen)
 void starpu_worker_get_name(int id, char *dst, size_t maxlen)
 {
 {
 	char *name = _starpu_config.workers[id].name;
 	char *name = _starpu_config.workers[id].name;
@@ -2547,6 +2583,7 @@ unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs)
 
 
 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 {
 {
+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
 	const char *ret = starpu_driver_info[type].name_upper;
 	const char *ret = starpu_driver_info[type].name_upper;
 	if (!ret)
 	if (!ret)
 		ret = "unknown";
 		ret = "unknown";
@@ -2555,6 +2592,7 @@ const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type)
 
 
 const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
 const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type)
 {
 {
+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
 	const char *ret = starpu_driver_info[type].name_var;
 	const char *ret = starpu_driver_info[type].name_var;
 	if (!ret)
 	if (!ret)
 		ret = "UNKNOWN";
 		ret = "UNKNOWN";
@@ -2768,6 +2806,7 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
 
 
 enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
 enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
 {
 {
+	STARPU_ASSERT(type >= 0 && type < STARPU_NARCH);
 	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
 	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
 	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
 	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
 	return kind;
 	return kind;

+ 2 - 2
src/profiling/profiling.c

@@ -441,7 +441,7 @@ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worke
 		info->executed_tasks = worker_info[workerid].executed_tasks;
 		info->executed_tasks = worker_info[workerid].executed_tasks;
 	}
 	}
 
 
-	STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->sched_mutex);
+	STARPU_PTHREAD_MUTEX_LOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
 	STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
 
 
 	if (info)
 	if (info)
@@ -476,7 +476,7 @@ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worke
 	_starpu_worker_reset_profiling_info_with_lock(workerid);
 	_starpu_worker_reset_profiling_info_with_lock(workerid);
 
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->sched_mutex);
+	STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex);
 
 
 	return 0;
 	return 0;
 }
 }

File diff suppressed because it is too large
+ 3441 - 247
src/sched_policies/heteroprio.c


+ 28 - 0
src/sched_policies/heteroprio.h

@@ -0,0 +1,28 @@
+#ifndef __SCHED_HETEROPRIO_H__
+#define __SCHED_HETEROPRIO_H__
+
+#include <schedulers/starpu_heteroprio.h>
+
+#define CODELET_MAX_NAME_LENGTH 32
+#define HETEROPRIO_MAX_PRIO 100
+#define LAHETEROPRIO_MAX_WORKER_GROUPS 10
+
+#define AUTOHETEROPRIO_NO_NAME "NO_NAME"
+
+// will tend to ignore tasks older than this when measuring values such as NOD, execution time, etc.
+// i.e. if there are more than STARPU_AUTOHETEROPRIO_RELEVANT_TASK_LIFE of the same type
+#define AUTOHETEROPRIO_RELEVANT_TASK_LIFE 256
+
+#define AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE 16
+
+#define AUTOHETEROPRIO_EXTREMELY_LONG_TIME 999999999999999.0
+#define AUTOHETEROPRIO_LONG_TIME 100000000.0
+#define AUTOHETEROPRIO_FAIR_TIME 1000.0
+
+#define AUTOHETEROPRIO_DEFAULT_TASK_TIME AUTOHETEROPRIO_FAIR_TIME
+
+// at the end of the execution, if the sum of all worker profiling times is superior to this, the times will be compressed so that no time exceeds this one
+// (probably in us)
+#define AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME 1000000000.0
+
+#endif // __SCHED_HETEROPRIO_H__