123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526 |
- /* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2012 INRIA
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
- #include <sched_ctx_hypervisor.h>
- #include <pthread.h>
- static int _compute_priority(unsigned sched_ctx)
- {
- struct policy_config *config = sched_ctx_hypervisor_get_config(sched_ctx);
- int total_priority = 0;
- struct starpu_sched_ctx_worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx);
- int worker;
- if(workers->init_cursor)
- workers->init_cursor(workers);
- while(workers->has_next(workers))
- {
- worker = workers->get_next(workers);
- total_priority += config->priority[worker];
- }
- if(workers->init_cursor)
- workers->deinit_cursor(workers);
- return total_priority;
- }
- static unsigned _find_poor_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move)
- {
- int i;
- int highest_priority = -1;
- int current_priority = 0;
- unsigned sched_ctx = STARPU_NMAX_SCHED_CTXS;
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
- int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
- struct policy_config *config = NULL;
- for(i = 0; i < nsched_ctxs; i++)
- {
- if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && sched_ctxs[i] != req_sched_ctx)
- {
- unsigned nworkers = starpu_get_nworkers_of_sched_ctx(sched_ctxs[i]);
- config = sched_ctx_hypervisor_get_config(sched_ctxs[i]);
- if((nworkers + nworkers_to_move) <= config->max_nworkers)
- {
- current_priority = _compute_priority(sched_ctxs[i]);
- if (highest_priority < current_priority)
- {
- highest_priority = current_priority;
- sched_ctx = sched_ctxs[i];
- }
- }
- }
- }
- return sched_ctx;
- }
- int* _get_first_workers(unsigned sched_ctx, unsigned *nworkers, enum starpu_archtype arch)
- {
- struct policy_config *config = sched_ctx_hypervisor_get_config(sched_ctx);
- int *curr_workers = (int*)malloc((*nworkers) * sizeof(int));
- int i;
- for(i = 0; i < *nworkers; i++)
- curr_workers[i] = -1;
- struct starpu_sched_ctx_worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx);
- int index;
- int worker;
- int considered = 0;
- if(workers->init_cursor)
- workers->init_cursor(workers);
- for(index = 0; index < *nworkers; index++)
- {
- while(workers->has_next(workers))
- {
- considered = 0;
- worker = workers->get_next(workers);
- enum starpu_archtype curr_arch = starpu_worker_get_type(worker);
- if(arch == 0 || curr_arch == arch)
- {
- if(!config->fixed_workers[worker])
- {
- for(i = 0; i < index; i++)
- {
- if(curr_workers[i] == worker)
- {
- considered = 1;
- break;
- }
- }
- if(!considered)
- {
- /* the first iteration*/
- if(curr_workers[index] < 0)
- curr_workers[index] = worker;
- /* small priority worker is the first to leave the ctx*/
- else if(config->priority[worker] <
- config->priority[curr_workers[index]])
- curr_workers[index] = worker;
- /* if we don't consider priorities check for the workers
- with the biggest idle time */
- else if(config->priority[worker] ==
- config->priority[curr_workers[index]])
- {
- double worker_idle_time = sched_ctx_hypervisor_get_idle_time(sched_ctx, worker);
- double curr_worker_idle_time = sched_ctx_hypervisor_get_idle_time(sched_ctx, curr_workers[index]);
- if(worker_idle_time > curr_worker_idle_time)
- curr_workers[index] = worker;
- }
- }
- }
- }
- }
- if(curr_workers[index] < 0)
- {
- *nworkers = index;
- break;
- }
- }
- if(workers->init_cursor)
- workers->deinit_cursor(workers);
- return curr_workers;
- }
- static unsigned _get_potential_nworkers(struct policy_config *config, unsigned sched_ctx, enum starpu_archtype arch)
- {
- struct starpu_sched_ctx_worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx);
- unsigned potential_workers = 0;
- int worker;
- if(workers->init_cursor)
- workers->init_cursor(workers);
- while(workers->has_next(workers))
- {
- worker = workers->get_next(workers);
- enum starpu_archtype curr_arch = starpu_worker_get_type(worker);
- if(arch == 0 || curr_arch == arch)
- {
- if(!config->fixed_workers[worker])
- potential_workers++;
- }
- }
- if(workers->init_cursor)
- workers->deinit_cursor(workers);
- return potential_workers;
- }
- static unsigned _get_nworkers_to_move(unsigned req_sched_ctx)
- {
- struct policy_config *config = sched_ctx_hypervisor_get_config(req_sched_ctx);
- unsigned nworkers = starpu_get_nworkers_of_sched_ctx(req_sched_ctx);
- unsigned nworkers_to_move = 0;
- unsigned potential_moving_workers = _get_potential_nworkers(config, req_sched_ctx, 0);
- if(potential_moving_workers > 0)
- {
- if(potential_moving_workers <= config->min_nworkers)
- /* if we have to give more than min better give it all */
- /* => empty ctx will block until having the required workers */
- nworkers_to_move = potential_moving_workers;
- else if(potential_moving_workers > config->max_nworkers)
- {
- if((potential_moving_workers - config->granularity) > config->max_nworkers)
- nworkers_to_move = config->granularity;
- else
- nworkers_to_move = potential_moving_workers - config->max_nworkers;
- }
- else if(potential_moving_workers > config->granularity)
- {
- if((nworkers - config->granularity) > config->min_nworkers)
- nworkers_to_move = config->granularity;
- else
- nworkers_to_move = potential_moving_workers - config->min_nworkers;
- }
- else
- {
- int nfixed_workers = nworkers - potential_moving_workers;
- if(nfixed_workers >= config->min_nworkers)
- nworkers_to_move = potential_moving_workers;
- else
- nworkers_to_move = potential_moving_workers - (config->min_nworkers - nfixed_workers);
- }
- if((nworkers - nworkers_to_move) > config->max_nworkers)
- nworkers_to_move = nworkers - config->max_nworkers;
- }
- return nworkers_to_move;
- }
- static unsigned _simple_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize)
- {
- int ret = 1;
- if(force_resize)
- pthread_mutex_lock(&act_hypervisor_mutex);
- else
- ret = pthread_mutex_trylock(&act_hypervisor_mutex);
- if(ret != EBUSY)
- {
- unsigned nworkers_to_move = _get_nworkers_to_move(sender_sched_ctx);
- if(nworkers_to_move > 0)
- {
- unsigned poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
- if(receiver_sched_ctx == STARPU_NMAX_SCHED_CTXS)
- poor_sched_ctx = _find_poor_sched_ctx(sender_sched_ctx, nworkers_to_move);
- else
- {
- poor_sched_ctx = receiver_sched_ctx;
- struct policy_config *config = sched_ctx_hypervisor_get_config(poor_sched_ctx);
- unsigned nworkers = starpu_get_nworkers_of_sched_ctx(poor_sched_ctx);
- unsigned nshared_workers = starpu_get_nshared_workers(sender_sched_ctx, poor_sched_ctx);
- if((nworkers+nworkers_to_move-nshared_workers) > config->max_nworkers)
- nworkers_to_move = nworkers > config->max_nworkers ? 0 : (config->max_nworkers - nworkers+nshared_workers);
- if(nworkers_to_move == 0) poor_sched_ctx = STARPU_NMAX_SCHED_CTXS;
- }
- if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS)
- {
- int *workers_to_move = _get_first_workers(sender_sched_ctx, &nworkers_to_move, 0);
- sched_ctx_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move);
- struct policy_config *new_config = sched_ctx_hypervisor_get_config(poor_sched_ctx);
- int i;
- for(i = 0; i < nworkers_to_move; i++)
- new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle;
- free(workers_to_move);
- }
- }
- pthread_mutex_unlock(&act_hypervisor_mutex);
- return 1;
- }
- return 0;
- }
- static int* _get_workers_to_move(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *nworkers)
- {
- int *workers = NULL;
- double v_receiver = sched_ctx_hypervisor_get_ctx_velocity(receiver_sched_ctx);
- double receiver_remainig_flops = sched_ctx_hypervisor_get_flops_left(receiver_sched_ctx);
- double sender_exp_end = sched_ctx_hypervisor_get_exp_end(sender_sched_ctx);
- double sender_v_cpu = sched_ctx_hypervisor_get_cpu_velocity(sender_sched_ctx);
- // double v_gcpu = sched_ctx_hypervisor_get_gpu_velocity(sender_sched_ctx);
- double v_for_rctx = (receiver_remainig_flops/(sender_exp_end - starpu_timing_now())) - v_receiver;
- // v_for_rctx /= 2;
- int nworkers_needed = v_for_rctx/sender_v_cpu;
- /* printf("%d->%d: v_rec %lf v %lf v_cpu %lf w_needed %d \n", sender_sched_ctx, receiver_sched_ctx, */
- /* v_receiver, v_for_rctx, sender_v_cpu, nworkers_needed); */
- if(nworkers_needed > 0)
- {
- struct policy_config *sender_config = sched_ctx_hypervisor_get_config(sender_sched_ctx);
- unsigned potential_moving_cpus = _get_potential_nworkers(sender_config, sender_sched_ctx, STARPU_CPU_WORKER);
- unsigned potential_moving_gpus = _get_potential_nworkers(sender_config, sender_sched_ctx, STARPU_CUDA_WORKER);
- unsigned sender_nworkers = starpu_get_nworkers_of_sched_ctx(sender_sched_ctx);
- struct policy_config *config = sched_ctx_hypervisor_get_config(receiver_sched_ctx);
- unsigned nworkers_ctx = starpu_get_nworkers_of_sched_ctx(receiver_sched_ctx);
- if(nworkers_needed < (potential_moving_cpus + 5 * potential_moving_gpus))
- {
- if((sender_nworkers - nworkers_needed) >= sender_config->min_nworkers)
- {
- if((nworkers_ctx + nworkers_needed) > config->max_nworkers)
- nworkers_needed = nworkers_ctx > config->max_nworkers ? 0 : (config->max_nworkers - nworkers_ctx);
- if(nworkers_needed > 0)
- {
- int ngpus = nworkers_needed / 5;
- int *gpus;
- gpus = _get_first_workers(sender_sched_ctx, &ngpus, STARPU_CUDA_WORKER);
- int ncpus = nworkers_needed - ngpus;
- int *cpus;
- cpus = _get_first_workers(sender_sched_ctx, &ncpus, STARPU_CPU_WORKER);
- workers = (int*)malloc(nworkers_needed*sizeof(int));
- int i;
- for(i = 0; i < ngpus; i++)
- workers[(*nworkers)++] = gpus[i];
- for(i = 0; i < ncpus; i++)
- workers[(*nworkers)++] = cpus[i];
- free(gpus);
- free(cpus);
- }
- }
- }
- else
- {
- int nworkers_to_move = _get_nworkers_to_move(sender_sched_ctx);
- if(sender_nworkers - nworkers_to_move >= sender_config->min_nworkers)
- {
- unsigned nshared_workers = starpu_get_nshared_workers(sender_sched_ctx, receiver_sched_ctx);
- if((nworkers_ctx + nworkers_to_move - nshared_workers) > config->max_nworkers)
- nworkers_to_move = nworkers_ctx > config->max_nworkers ? 0 : (config->max_nworkers - nworkers_ctx + nshared_workers);
- if(nworkers_to_move > 0)
- {
- workers = _get_first_workers(sender_sched_ctx, &nworkers_to_move, 0);
- *nworkers = nworkers_to_move;
- }
- }
- }
- }
- return workers;
- }
- static unsigned _simple_resize2(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize)
- {
- int ret = 1;
- if(force_resize)
- pthread_mutex_lock(&act_hypervisor_mutex);
- else
- ret = pthread_mutex_trylock(&act_hypervisor_mutex);
- if(ret != EBUSY)
- {
- int nworkers_to_move = 0;
- int *workers_to_move = _get_workers_to_move(sender_sched_ctx, receiver_sched_ctx, &nworkers_to_move);
- if(nworkers_to_move > 0)
- {
- sched_ctx_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move);
- struct policy_config *new_config = sched_ctx_hypervisor_get_config(receiver_sched_ctx);
- int i;
- for(i = 0; i < nworkers_to_move; i++)
- new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle;
- free(workers_to_move);
- }
- pthread_mutex_unlock(&act_hypervisor_mutex);
- return 1;
- }
- return 0;
- }
- static unsigned simple_resize(unsigned sender_sched_ctx)
- {
- return _simple_resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 1);
- }
- static void simple_manage_idle_time(unsigned req_sched_ctx, int worker, double idle_time)
- {
- struct policy_config *config = sched_ctx_hypervisor_get_config(req_sched_ctx);
- if(config != NULL && idle_time > config->max_idle[worker])
- simple_resize(req_sched_ctx);
- return;
- }
- int _find_fastest_sched_ctx()
- {
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
- int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
- double first_exp_end = sched_ctx_hypervisor_get_exp_end(sched_ctxs[0]);
- int fastest_sched_ctx = first_exp_end == -1.0 ? -1 : sched_ctxs[0];
- double curr_exp_end = 0.0;
- int i;
- for(i = 1; i < nsched_ctxs; i++)
- {
- curr_exp_end = sched_ctx_hypervisor_get_exp_end(sched_ctxs[i]);
- if(first_exp_end > curr_exp_end && curr_exp_end != -1.0)
- {
- first_exp_end = curr_exp_end;
- fastest_sched_ctx = sched_ctxs[i];
- }
- }
- return fastest_sched_ctx;
- }
- int _find_slowest_sched_ctx()
- {
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
- int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
- int slowest_sched_ctx = -1;
- double curr_exp_end = 0.0;
- double last_exp_end = -1.0;
- int i;
- for(i = 0; i < nsched_ctxs; i++)
- {
- curr_exp_end = sched_ctx_hypervisor_get_exp_end(sched_ctxs[i]);
- /*if it hasn't started bc of no ressources give it priority */
- if(curr_exp_end == -1.0)
- return sched_ctxs[i];
- if(last_exp_end < curr_exp_end)
- {
- slowest_sched_ctx = sched_ctxs[i];
- last_exp_end = curr_exp_end;
- }
- }
- return slowest_sched_ctx;
- }
- int _find_slowest_available_sched_ctx(unsigned sched_ctx)
- {
- int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs();
- int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs();
- int slowest_sched_ctx = -1;
- double curr_exp_end = 0.0;
- double last_exp_end = -1.0;
- int i;
- for(i = 0; i < nsched_ctxs; i++)
- {
- if(sched_ctxs[i] != sched_ctx)
- {
- curr_exp_end = sched_ctx_hypervisor_get_exp_end(sched_ctxs[i]);
- /*if it hasn't started bc of no ressources give it priority */
- if(curr_exp_end == -1.0)
- return sched_ctxs[i];
- if(last_exp_end < curr_exp_end)
- {
- slowest_sched_ctx = sched_ctxs[i];
- last_exp_end = curr_exp_end;
- }
- }
- }
- return slowest_sched_ctx;
- }
- static void simple_manage_gflops_rate(unsigned sched_ctx)
- {
- double exp_end = sched_ctx_hypervisor_get_exp_end(sched_ctx);
- double flops_left_pct = sched_ctx_hypervisor_get_flops_left_pct(sched_ctx);
- if(flops_left_pct == 0.0f)
- {
- int slowest_sched_ctx = _find_slowest_available_sched_ctx(sched_ctx);
- if(slowest_sched_ctx != -1)
- {
- double slowest_flops_left_pct = sched_ctx_hypervisor_get_flops_left_pct(slowest_sched_ctx);
- printf("ctx %d finished & gives away the res to %d; slow_left %lf\n", sched_ctx, slowest_sched_ctx, slowest_flops_left_pct);
- if(slowest_flops_left_pct != 0.0f)
- {
- struct policy_config* config = sched_ctx_hypervisor_get_config(sched_ctx);
- config->min_nworkers = 0;
- config->max_nworkers = 0;
- _simple_resize(sched_ctx, slowest_sched_ctx, 1);
- sched_ctx_hypervisor_stop_resize(slowest_sched_ctx);
- }
- }
- }
- int fastest_sched_ctx = _find_fastest_sched_ctx();
- int slowest_sched_ctx = _find_slowest_sched_ctx();
- if(fastest_sched_ctx != -1 && slowest_sched_ctx != -1 && fastest_sched_ctx != slowest_sched_ctx)
- {
- double fastest_exp_end = sched_ctx_hypervisor_get_exp_end(fastest_sched_ctx);
- double slowest_exp_end = sched_ctx_hypervisor_get_exp_end(slowest_sched_ctx);
- double fastest_bef_res_exp_end = sched_ctx_hypervisor_get_bef_res_exp_end(fastest_sched_ctx);
- double slowest_bef_res_exp_end = sched_ctx_hypervisor_get_bef_res_exp_end(slowest_sched_ctx);
- // (fastest_bef_res_exp_end < slowest_bef_res_exp_end ||
- // fastest_bef_res_exp_end == 0.0 || slowest_bef_res_exp_end == 0)))
- if((slowest_exp_end == -1.0 && fastest_exp_end != -1.0) || ((fastest_exp_end + (fastest_exp_end*0.5)) < slowest_exp_end ))
- {
- double fast_flops_left_pct = sched_ctx_hypervisor_get_flops_left_pct(fastest_sched_ctx);
- if(fast_flops_left_pct < 0.8)
- _simple_resize(fastest_sched_ctx, slowest_sched_ctx, 0);
- }
- }
- }
- struct hypervisor_policy idle_policy =
- {
- .manage_idle_time = simple_manage_idle_time,
- .manage_gflops_rate = simple_manage_gflops_rate,
- .resize = simple_resize,
- };
- struct hypervisor_policy app_driven_policy =
- {
- .manage_idle_time = simple_manage_idle_time,
- .manage_gflops_rate = simple_manage_gflops_rate,
- .resize = simple_resize,
- };
- struct hypervisor_policy gflops_rate_policy =
- {
- .manage_idle_time = simple_manage_idle_time,
- .manage_gflops_rate = simple_manage_gflops_rate,
- .resize = simple_resize,
- };
|