/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013 INRIA * Copyright (C) 2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include /* The two thresolds concerns the prio nodes, which contains queues * who can handle the priority of StarPU tasks. You can tune your * scheduling by benching those values and choose which one is the * best for your current application. * The current value of the ntasks_threshold is the best we found * so far across several types of applications (cholesky, LU, stencil). */ #define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 30 #define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 static void initialize_heft_center_policy(unsigned sched_ctx_id) { starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST); unsigned ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT; double exp_len_threshold = _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT; const char *strval_ntasks_threshold = getenv("STARPU_NTASKS_THRESHOLD"); if (strval_ntasks_threshold) ntasks_threshold = atof(strval_ntasks_threshold); const char *strval_exp_len_threshold = getenv("STARPU_EXP_LEN_THRESHOLD"); if (strval_exp_len_threshold) exp_len_threshold = atof(strval_exp_len_threshold); /* The scheduling strategy look like this : * * | * window_node * | * perfmodel_node <--push-- perfmodel_select_node --push--> eager_node * | | * | | * >----------------------------------------------------< * | | * best_impl_node best_impl_node * | | * prio_node prio_node * | | * worker_node worker_node * * A window contain the tasks that failed to be pushed, so as when the prio_nodes reclaim * tasks by calling room to their father (classically, just after a successful pop have * been made by its associated worker_node), this call goes up to the window_node which * pops a task from its local queue and try to schedule it by pushing it to the * decision_node. * The decision node takes care of the scheduling of tasks which are not * calibrated, or tasks which don't have a performance model, because the scheduling * architecture of this scheduler for tasks with no performance model is exactly * the same as the tree-prio scheduler. * Tasks with a perfmodel are pushed to the perfmodel_node, which takes care of the * scheduling of those tasks on the correct worker_node. * Finally, the task will be pushed to the prio_node which is the direct * father in the tree of the worker_node the task has been scheduled on. This * node will push the task on its local queue if no one of the two thresholds * have been reached for it, or send a push_error signal to its father. */ struct starpu_sched_tree * t = starpu_sched_tree_create(sched_ctx_id); struct starpu_sched_node * window_node = starpu_sched_node_prio_create(NULL); t->root = window_node; struct starpu_sched_node * perfmodel_node = starpu_sched_node_mct_create(NULL); struct starpu_sched_node * no_perfmodel_node = starpu_sched_node_eager_create(NULL); struct starpu_sched_node * calibrator_node = starpu_sched_node_eager_create(NULL); struct starpu_perfmodel_select_data perfmodel_select_data = { .calibrator_node = calibrator_node, .no_perfmodel_node = no_perfmodel_node, .perfmodel_node = perfmodel_node, }; struct starpu_sched_node * perfmodel_select_node = starpu_sched_node_perfmodel_select_create(&perfmodel_select_data); window_node->add_child(window_node, perfmodel_select_node); starpu_sched_node_set_father(perfmodel_select_node, window_node, sched_ctx_id); struct starpu_prio_data prio_data = { .ntasks_threshold = ntasks_threshold, .exp_len_threshold = exp_len_threshold, }; unsigned i; for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) { struct starpu_sched_node * worker_node = starpu_sched_node_worker_get(i); STARPU_ASSERT(worker_node); struct starpu_sched_node * prio = starpu_sched_node_prio_create(&prio_data); prio->add_child(prio, worker_node); starpu_sched_node_set_father(worker_node, prio, sched_ctx_id); struct starpu_sched_node * impl_node = starpu_sched_node_best_implementation_create(NULL); impl_node->add_child(impl_node, prio); starpu_sched_node_set_father(prio, impl_node, sched_ctx_id); perfmodel_select_node->add_child(perfmodel_select_node, impl_node); starpu_sched_node_set_father(impl_node, perfmodel_select_node, sched_ctx_id); } starpu_sched_tree_update_workers(t); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); } static void deinitialize_heft_center_policy(unsigned sched_ctx_id) { struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_sched_tree_destroy(t); starpu_sched_ctx_delete_worker_collection(sched_ctx_id); } struct starpu_sched_policy _starpu_sched_tree_heft_policy = { .init_sched = initialize_heft_center_policy, .deinit_sched = deinitialize_heft_center_policy, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_node_worker_pre_exec_hook, .post_exec_hook = starpu_sched_node_worker_post_exec_hook, .pop_every_task = NULL, .policy_name = "tree-heft", .policy_description = "heft tree policy" };