|
@@ -18,10 +18,22 @@
|
|
#include <starpu_scheduler.h>
|
|
#include <starpu_scheduler.h>
|
|
#include <float.h>
|
|
#include <float.h>
|
|
|
|
|
|
|
|
+/* Alpha, Beta and Gamma are heft-specific values, which allows the
|
|
|
|
+ * user to set more precisely the weight of each computing value.
|
|
|
|
+ * Beta, for example, controls the weight of communications between
|
|
|
|
+ * memories for the computation of the best node to choose.
|
|
|
|
+ */
|
|
|
|
+/* The two thresolds concerns the prio nodes, which contains queues
|
|
|
|
+ * who can handle the priority of StarPU tasks. You can tune your
|
|
|
|
+ * scheduling by benching those values and choose which one is the
|
|
|
|
+ * best for your current application.
|
|
|
|
+ * The current value of the ntasks_threshold is the best we found
|
|
|
|
+ * so far across several types of applications (cholesky, LU, stencil).
|
|
|
|
+ */
|
|
#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
|
|
#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
|
|
#define _STARPU_SCHED_BETA_DEFAULT 1.0
|
|
#define _STARPU_SCHED_BETA_DEFAULT 1.0
|
|
#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
|
|
#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
|
|
-#define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 150000
|
|
|
|
|
|
+#define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 15000000
|
|
#define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0
|
|
#define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0
|
|
static double alpha = _STARPU_SCHED_ALPHA_DEFAULT;
|
|
static double alpha = _STARPU_SCHED_ALPHA_DEFAULT;
|
|
static double beta = _STARPU_SCHED_BETA_DEFAULT;
|
|
static double beta = _STARPU_SCHED_BETA_DEFAULT;
|
|
@@ -56,7 +68,7 @@ static void param_modified(struct starpu_top_param* d)
|
|
}
|
|
}
|
|
#endif /* !STARPU_USE_TOP */
|
|
#endif /* !STARPU_USE_TOP */
|
|
|
|
|
|
-static void initialize_dmda_center_policy(unsigned sched_ctx_id)
|
|
|
|
|
|
+static void initialize_heft_center_policy(unsigned sched_ctx_id)
|
|
{
|
|
{
|
|
starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
|
|
starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
|
|
const char *strval_alpha = getenv("STARPU_SCHED_ALPHA");
|
|
const char *strval_alpha = getenv("STARPU_SCHED_ALPHA");
|
|
@@ -84,23 +96,55 @@ static void initialize_dmda_center_policy(unsigned sched_ctx_id)
|
|
exp_len_threshold = atof(strval_exp_len_threshold);
|
|
exp_len_threshold = atof(strval_exp_len_threshold);
|
|
|
|
|
|
#ifdef STARPU_USE_TOP
|
|
#ifdef STARPU_USE_TOP
|
|
- starpu_top_register_parameter_float("DMDA_ALPHA", &alpha,
|
|
|
|
|
|
+ starpu_top_register_parameter_float("HEFT_ALPHA", &alpha,
|
|
alpha_minimum, alpha_maximum, param_modified);
|
|
alpha_minimum, alpha_maximum, param_modified);
|
|
- starpu_top_register_parameter_float("DMDA_BETA", &beta,
|
|
|
|
|
|
+ starpu_top_register_parameter_float("HEFT_BETA", &beta,
|
|
beta_minimum, beta_maximum, param_modified);
|
|
beta_minimum, beta_maximum, param_modified);
|
|
- starpu_top_register_parameter_float("DMDA_GAMMA", &_gamma,
|
|
|
|
|
|
+ starpu_top_register_parameter_float("HEFT_GAMMA", &_gamma,
|
|
gamma_minimum, gamma_maximum, param_modified);
|
|
gamma_minimum, gamma_maximum, param_modified);
|
|
- starpu_top_register_parameter_float("DMDA_IDLE_POWER", &idle_power,
|
|
|
|
|
|
+ starpu_top_register_parameter_float("HEFT_IDLE_POWER", &idle_power,
|
|
idle_power_minimum, idle_power_maximum, param_modified);
|
|
idle_power_minimum, idle_power_maximum, param_modified);
|
|
#endif /* !STARPU_USE_TOP */
|
|
#endif /* !STARPU_USE_TOP */
|
|
|
|
|
|
|
|
|
|
|
|
+/* The scheduling strategy look like this :
|
|
|
|
+ *
|
|
|
|
+ * |
|
|
|
|
+ * window_node
|
|
|
|
+ * |
|
|
|
|
+ * perfmodel_node <--push-- perfmodel_select_node --push--> eager_node
|
|
|
|
+ * | |
|
|
|
|
+ * | |
|
|
|
|
+ * >----------------------------------------------------<
|
|
|
|
+ * | |
|
|
|
|
+ * best_impl_node best_impl_node
|
|
|
|
+ * | |
|
|
|
|
+ * prio_node prio_node
|
|
|
|
+ * | |
|
|
|
|
+ * worker_node worker_node
|
|
|
|
+ *
|
|
|
|
+ * A window contain the tasks that failed to be pushed, so as when the prio_nodes reclaim
|
|
|
|
+ * tasks by calling room to their father (classically, just after a successful pop have
|
|
|
|
+ * been made by its associated worker_node), this call goes up to the window_node which
|
|
|
|
+ * pops a task from its local queue and try to schedule it by pushing it to the
|
|
|
|
+ * decision_node.
|
|
|
|
+ * The decision node takes care of the scheduling of tasks which are not
|
|
|
|
+ * calibrated, or tasks which don't have a performance model, because the scheduling
|
|
|
|
+ * architecture of this scheduler for tasks with no performance model is exactly
|
|
|
|
+ * the same as the tree-prio scheduler.
|
|
|
|
+ * Tasks with a perfmodel are pushed to the perfmodel_node, which takes care of the
|
|
|
|
+ * scheduling of those tasks on the correct worker_node.
|
|
|
|
+ * Finally, the task will be pushed to the prio_node which is the direct
|
|
|
|
+ * father in the tree of the worker_node the task has been scheduled on. This
|
|
|
|
+ * node will push the task on its local queue if no one of the two thresholds
|
|
|
|
+ * have been reached for it, or send a push_error signal to its father.
|
|
|
|
+ */
|
|
struct starpu_sched_tree * t = starpu_sched_tree_create(sched_ctx_id);
|
|
struct starpu_sched_tree * t = starpu_sched_tree_create(sched_ctx_id);
|
|
|
|
|
|
struct starpu_sched_node * window_node = starpu_sched_node_prio_create(NULL);
|
|
struct starpu_sched_node * window_node = starpu_sched_node_prio_create(NULL);
|
|
t->root = window_node;
|
|
t->root = window_node;
|
|
|
|
|
|
- struct starpu_heft_data heft_data =
|
|
|
|
|
|
+ struct starpu_mct_data mct_data =
|
|
{
|
|
{
|
|
.alpha = alpha,
|
|
.alpha = alpha,
|
|
.beta = beta,
|
|
.beta = beta,
|
|
@@ -108,18 +152,20 @@ static void initialize_dmda_center_policy(unsigned sched_ctx_id)
|
|
.idle_power = idle_power,
|
|
.idle_power = idle_power,
|
|
};
|
|
};
|
|
|
|
|
|
- struct starpu_sched_node * heft_node = starpu_sched_node_heft_create(&heft_data);
|
|
|
|
|
|
+ struct starpu_sched_node * perfmodel_node = starpu_sched_node_mct_create(&mct_data);
|
|
|
|
+ struct starpu_sched_node * no_perfmodel_node = starpu_sched_node_eager_create(NULL);
|
|
|
|
+ struct starpu_sched_node * calibrator_node = starpu_sched_node_eager_create(NULL);
|
|
|
|
|
|
- struct starpu_calibrator_data calibrator_data =
|
|
|
|
|
|
+ struct starpu_perfmodel_select_data perfmodel_select_data =
|
|
{
|
|
{
|
|
- .no_perf_model_node_create = starpu_sched_node_prio_create,
|
|
|
|
- .arg_no_perf_model = NULL,
|
|
|
|
- .next_node = heft_node,
|
|
|
|
|
|
+ .calibrator_node = calibrator_node,
|
|
|
|
+ .no_perfmodel_node = no_perfmodel_node,
|
|
|
|
+ .perfmodel_node = perfmodel_node,
|
|
};
|
|
};
|
|
|
|
|
|
- struct starpu_sched_node * calibrator_node = starpu_sched_node_calibrator_create(&calibrator_data);
|
|
|
|
- window_node->add_child(window_node, calibrator_node);
|
|
|
|
- starpu_sched_node_set_father(calibrator_node, window_node, sched_ctx_id);
|
|
|
|
|
|
+ struct starpu_sched_node * perfmodel_select_node = starpu_sched_node_perfmodel_select_create(&perfmodel_select_data);
|
|
|
|
+ window_node->add_child(window_node, perfmodel_select_node);
|
|
|
|
+ starpu_sched_node_set_father(perfmodel_select_node, window_node, sched_ctx_id);
|
|
|
|
|
|
struct starpu_prio_data prio_data =
|
|
struct starpu_prio_data prio_data =
|
|
{
|
|
{
|
|
@@ -141,25 +187,25 @@ static void initialize_dmda_center_policy(unsigned sched_ctx_id)
|
|
impl_node->add_child(impl_node, prio);
|
|
impl_node->add_child(impl_node, prio);
|
|
starpu_sched_node_set_father(prio, impl_node, sched_ctx_id);
|
|
starpu_sched_node_set_father(prio, impl_node, sched_ctx_id);
|
|
|
|
|
|
- calibrator_node->add_child(calibrator_node, impl_node);
|
|
|
|
- starpu_sched_node_set_father(impl_node, calibrator_node, sched_ctx_id);
|
|
|
|
|
|
+ perfmodel_select_node->add_child(perfmodel_select_node, impl_node);
|
|
|
|
+ starpu_sched_node_set_father(impl_node, perfmodel_select_node, sched_ctx_id);
|
|
}
|
|
}
|
|
|
|
|
|
starpu_sched_tree_update_workers(t);
|
|
starpu_sched_tree_update_workers(t);
|
|
starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t);
|
|
starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t);
|
|
}
|
|
}
|
|
|
|
|
|
-static void deinitialize_dmda_center_policy(unsigned sched_ctx_id)
|
|
|
|
|
|
+static void deinitialize_heft_center_policy(unsigned sched_ctx_id)
|
|
{
|
|
{
|
|
struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
|
|
struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
|
|
starpu_sched_tree_destroy(t);
|
|
starpu_sched_tree_destroy(t);
|
|
starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
|
|
starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
|
|
}
|
|
}
|
|
|
|
|
|
-struct starpu_sched_policy _starpu_sched_tree_dmda_policy =
|
|
|
|
|
|
+struct starpu_sched_policy _starpu_sched_tree_heft_policy =
|
|
{
|
|
{
|
|
- .init_sched = initialize_dmda_center_policy,
|
|
|
|
- .deinit_sched = deinitialize_dmda_center_policy,
|
|
|
|
|
|
+ .init_sched = initialize_heft_center_policy,
|
|
|
|
+ .deinit_sched = deinitialize_heft_center_policy,
|
|
.add_workers = starpu_sched_tree_add_workers,
|
|
.add_workers = starpu_sched_tree_add_workers,
|
|
.remove_workers = starpu_sched_tree_remove_workers,
|
|
.remove_workers = starpu_sched_tree_remove_workers,
|
|
.push_task = starpu_sched_tree_push_task,
|
|
.push_task = starpu_sched_tree_push_task,
|
|
@@ -167,6 +213,6 @@ struct starpu_sched_policy _starpu_sched_tree_dmda_policy =
|
|
.pre_exec_hook = starpu_sched_node_worker_pre_exec_hook,
|
|
.pre_exec_hook = starpu_sched_node_worker_pre_exec_hook,
|
|
.post_exec_hook = starpu_sched_node_worker_post_exec_hook,
|
|
.post_exec_hook = starpu_sched_node_worker_post_exec_hook,
|
|
.pop_every_task = NULL,
|
|
.pop_every_task = NULL,
|
|
- .policy_name = "tree-dmda",
|
|
|
|
- .policy_description = "dmda tree policy"
|
|
|
|
|
|
+ .policy_name = "tree-heft",
|
|
|
|
+ .policy_description = "heft tree policy"
|
|
};
|
|
};
|