/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SCHED_CONTEXT_H__ #define __SCHED_CONTEXT_H__ /** @file */ #include #include #include #include #include #include #include #include #include #include #include "sched_ctx_list.h" #ifdef STARPU_HAVE_HWLOC #include #endif #pragma GCC visibility push(hidden) #define NO_RESIZE -1 #define REQ_RESIZE 0 #define DO_RESIZE 1 #define STARPU_GLOBAL_SCHED_CTX 0 #define STARPU_NMAXSMS 13 struct _starpu_sched_ctx { /** id of the context used in user mode*/ unsigned id; /** boolean indicating whether the scheduling_ctx will be considered for scheduling (1) or not (0)*/ unsigned do_schedule; /** name of context */ const char *name; /** policy of the context */ struct starpu_sched_policy *sched_policy; /** data necessary for the policy */ void *policy_data; /** pointer for application use */ void *user_data; struct starpu_worker_collection *workers; /** we keep an initial sched which we never delete */ unsigned is_initial_sched; /** wait for the tasks submitted to the context to be executed */ struct _starpu_barrier_counter tasks_barrier; /** wait for the tasks ready of the context to be executed */ struct _starpu_barrier_counter ready_tasks_barrier; /** amount of ready flops in a context */ double ready_flops; /** Iteration number, as advertised by application */ long iterations[2]; int iteration_level; /*ready tasks that couldn't be pushed because the ctx has no workers*/ struct starpu_task_list empty_ctx_tasks; /*ready tasks that couldn't be pushed because the the window of tasks was already full*/ struct starpu_task_list waiting_tasks; /** min CPUs to execute*/ int min_ncpus; /** max CPUs to execute*/ int max_ncpus; /** min GPUs to execute*/ int min_ngpus; /** max GPUs to execute*/ int max_ngpus; /** in case we delete the context leave resources to the inheritor*/ unsigned inheritor; /** indicates whether the application finished submitting tasks to this context*/ unsigned finished_submit; /** By default we have a binary type of priority: either a task is a priority * task (level 1) or it is not (level 0). */ int min_priority; int max_priority; int min_priority_is_set; int max_priority_is_set; /** hwloc tree structure of workers */ #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_t hwloc_workers_set; #endif #ifdef STARPU_USE_SC_HYPERVISOR /** a structure containing a series of performance counters determining the resize procedure */ struct starpu_sched_ctx_performance_counters *perf_counters; #endif //STARPU_USE_SC_HYPERVISOR /** callback called when the context finished executed its submitted tasks */ void (*close_callback)(unsigned sched_ctx_id, void* args); void *close_args; /** value placing the contexts in their hierarchy */ unsigned hierarchy_level; /** if we execute non-StarPU code inside the context we have a single master worker that stays awake, if not master is -1 */ int main_master; /** ctx nesting the current ctx */ unsigned nesting_sched_ctx; /** perf model for the device comb of the ctx */ struct starpu_perfmodel_arch perf_arch; /** For parallel workers, say whether it is viewed as sequential or not. This is a helper for the prologue code. */ unsigned parallel_view; /** for ctxs without policy: flag to indicate that we want to get the threads to sleep in order to replace them with other threads or leave them awake & use them in the parallel code*/ unsigned awake_workers; /** callback function called when initializing the scheduler */ void (*callback_sched)(unsigned); int sub_ctxs[STARPU_NMAXWORKERS]; int nsub_ctxs; /** nr of SMs assigned to this ctx if we partition gpus*/ int nsms; int sms_start_idx; int sms_end_idx; int stream_worker; starpu_pthread_rwlock_t rwlock; starpu_pthread_t lock_write_owner; }; /** per-worker list of deferred ctx_change ops */ LIST_TYPE(_starpu_ctx_change, int sched_ctx_id; int op; int nworkers_to_notify; int *workerids_to_notify; int nworkers_to_change; int *workerids_to_change; ); struct _starpu_machine_config; /** init sched_ctx_id of all contextes*/ void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config); /** allocate all structures belonging to a context */ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerid, int nworkerids, unsigned is_init_sched, const char *sched_name, int min_prio_set, int min_prio, int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_callback)(unsigned), void *user_data, int nsub_ctxs, int *sub_ctxs, int nsms); /** delete all sched_ctx */ void _starpu_delete_all_sched_ctxs(); /** This function waits until all the tasks that were already submitted to a specific * context have been executed. */ int _starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id); /** This function waits until at most n tasks are still submitted. */ int _starpu_wait_for_n_submitted_tasks_of_sched_ctx(unsigned sched_ctx_id, unsigned n); /** In order to implement starpu_wait_for_all_tasks_of_ctx, we keep track of the number of * task currently submitted to the context */ void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); void _starpu_increment_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops); unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task); int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id); /** Return the corresponding index of the workerid in the ctx table */ int _starpu_get_index_in_ctx_of_workerid(unsigned sched_ctx, unsigned workerid); /** Get the mutex corresponding to the global workerid */ starpu_pthread_mutex_t *_starpu_get_sched_mutex(struct _starpu_sched_ctx *sched_ctx, int worker); /** Get workers belonging to a certain context, it returns the number of workers take care: no mutex taken, the list of workers might not be updated */ int _starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu_worker_archtype arch); /** Let the worker know it does not belong to the context and that it should stop poping from it */ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker); /** Check if the worker belongs to another sched_ctx */ unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id); /** indicates wheather this worker should go to sleep or not (if it is the last one awake in a context he should better keep awake) */ unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker); /** If starpu_sched_ctx_set_context() has been called, returns the context * id set by its last call, or the id of the initial context */ unsigned _starpu_sched_ctx_get_current_context(); /** verify that some worker can execute a certain task */ int _starpu_workers_able_to_execute_task(struct starpu_task *task, struct _starpu_sched_ctx *sched_ctx); void _starpu_fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx); unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id); struct starpu_perfmodel_arch * _starpu_sched_ctx_get_perf_archtype(unsigned sched_ctx); #ifdef STARPU_USE_SC_HYPERVISOR /** Notifies the hypervisor that a tasks was poped from the workers' list */ void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint); #endif //STARPU_USE_SC_HYPERVISOR void starpu_sched_ctx_add_combined_workers(int *combined_workers_to_add, unsigned n_combined_workers_to_add, unsigned sched_ctx_id); /** if the worker is the master of a parallel context, and the job is meant to be executed on this parallel context, return a pointer to the context */ struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j); #define _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(w,j) \ (_starpu_get_nsched_ctxs() <= 1 ? _starpu_get_sched_ctx_struct(0) : __starpu_sched_ctx_get_sched_ctx_for_worker_and_job((w),(j))) static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id); static inline int _starpu_sched_ctx_check_write_locked(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()); } #define STARPU_SCHED_CTX_CHECK_LOCK(sched_ctx_id) STARPU_ASSERT(_starpu_sched_ctx_check_write_locked((sched_ctx_id))) static inline void _starpu_sched_ctx_lock_write(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_WRLOCK(&sched_ctx->rwlock); sched_ctx->lock_write_owner = starpu_pthread_self(); } static inline void _starpu_sched_ctx_unlock_write(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); memset(&sched_ctx->lock_write_owner, 0, sizeof(sched_ctx->lock_write_owner)); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock); } static inline void _starpu_sched_ctx_lock_read(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_RDLOCK(&sched_ctx->rwlock); } static inline void _starpu_sched_ctx_unlock_read(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock); } static inline unsigned _starpu_sched_ctx_worker_is_master_for_child_ctx(unsigned sched_ctx_id, unsigned workerid, struct starpu_task *task) { unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id); if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS) { starpu_sched_ctx_move_task_to_ctx_locked(task, child_sched_ctx, 1); starpu_sched_ctx_revert_task_counters_ctx_locked(sched_ctx_id, task->flops); return 1; } return 0; } /** Go through the list of deferred ctx changes of the current worker and apply * any ctx change operation found until the list is empty */ void _starpu_worker_apply_deferred_ctx_changes(void); #pragma GCC visibility pop #endif // __SCHED_CONTEXT_H__