/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include #include static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer); static unsigned _compute_max_speed(int ns, int nw, double w_in_s[ns][nw], unsigned *in_sched_ctxs, int *workers) { double speed[ns][nw]; unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; int w,s; struct sc_hypervisor_wrapper* sc_w = NULL; for(s = 0; s < ns; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); for(w = 0; w < nw; w++) { w_in_s[s][w] = 0.0; int worker = workers == NULL ? w : workers[w]; enum starpu_worker_archtype arch = starpu_worker_get_type(worker); speed[s][w] = sc_hypervisor_get_speed(sc_w, arch); } } struct timeval start_time; struct timeval end_time; gettimeofday(&start_time, NULL); double res = _glp_resolve(ns, nw, speed, w_in_s, 1); gettimeofday(&end_time, NULL); long diff_s = end_time.tv_sec - start_time.tv_sec; long diff_us = end_time.tv_usec - start_time.tv_usec; __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000; if(res > 0.0) return 1; return 0; } /* * GNU Linear Programming Kit backend */ #ifdef STARPU_HAVE_GLPK_H #include static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer) { int w = 0, s = 0; glp_prob *lp; lp = glp_create_prob(); glp_set_prob_name(lp, "StarPU theoretical bound"); glp_set_obj_dir(lp, GLP_MAX); glp_set_obj_name(lp, "total speed"); { int ne = 2 * ns * nw /* worker execution time */ + 1 + 1 ; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; /* Variables: x[s][w] the acknwoledgment that the worker w belongs to the context s */ glp_add_cols(lp, nw*ns + 1); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "w%ds%dn", w, s); glp_set_col_name(lp, s*nw+w+1, name); if (integer) { glp_set_col_kind(lp, s*nw+w+1, GLP_IV); glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0, 1); } else glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0.0, 1.0); } /* vmax should be positif */ /* Z = vmax structural variable, x[s][w] are auxiliar variables */ glp_set_col_name(lp, nw*ns+1, "vmax"); glp_set_col_bnds(lp, nw*ns+1, GLP_LO, 0.0, 0.0); glp_set_obj_coef(lp, nw*ns+1, 1.); int curr_row_idx = 0; /* Total worker speed */ glp_add_rows(lp, 1); /*sum(x[s][w]*speed[s][w]) >= vmax */ char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "worker %s", name); glp_set_row_name(lp, curr_row_idx + 1, title); for(s = 0; s < ns; s++) { for (w = 0; w < nw; w++) { /* x[s][w] */ ia[n] = curr_row_idx + 1; ja[n] = s*nw+w+1; ar[n] = speed[s][w]; n++; } } /* vmax */ ia[n] = curr_row_idx + 1; ja[n] = nw*ns+1; ar[n] = (-1); n++; glp_set_row_bnds(lp, curr_row_idx + 1, GLP_LO, 0.0, 0.0); curr_row_idx += 1 ; /* sum(x[s][w]) = 1 */ glp_add_rows(lp, nw); for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "w%x", w); glp_set_row_name(lp, curr_row_idx+w+1, title); for(s = 0; s < ns; s++) { ia[n] = curr_row_idx+w+1; ja[n] = s*nw+w+1; ar[n] = 1; n++; } if(integer) glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); else glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); } if(n != ne) printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne); STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); } glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; int ret = glp_simplex(lp, &parm); if (ret) { glp_delete_prob(lp); lp = NULL; return 0.0; } if (integer) { glp_iocp iocp; glp_init_iocp(&iocp); iocp.msg_lev = GLP_MSG_OFF; glp_intopt(lp, &iocp); int stat = glp_mip_status(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); lp = NULL; return 0.0; } } int stat = glp_get_prim_stat(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); lp = NULL; printf("No sol!!!\n"); return 0.0; } double res = glp_get_obj_val(lp); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { if (integer) w_in_s[s][w] = (double)glp_mip_col_val(lp, s*nw+w+1); else w_in_s[s][w] = glp_get_col_prim(lp, s*nw+w+1); } glp_delete_prob(lp); return res; } static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; double w_in_s[ns][nw]; unsigned found_sol = _compute_max_speed(ns, nw, w_in_s, sched_ctxs, workers); /* if we did find at least one solution redistribute the resources */ if(found_sol) { struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); int w, s; double nworkers_per_ctx[ns][tw->nw]; int nworkers_per_ctx_rounded[ns][tw->nw]; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { nworkers_per_ctx[s][w] = 0.0; nworkers_per_ctx_rounded[s][w] = 0; } } for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = starpu_worker_get_type(w); int idx = sc_hypervisor_get_index_for_arch(STARPU_CUDA_WORKER, tw); nworkers_per_ctx[s][idx] += w_in_s[s][w]; if(arch == STARPU_CUDA_WORKER) { if(w_in_s[s][w] >= 0.3) nworkers_per_ctx_rounded[s][idx]++; } else { int idx = sc_hypervisor_get_index_for_arch(STARPU_CPU_WORKER, tw); nworkers_per_ctx[s][idx] += w_in_s[s][w]; if(w_in_s[s][w] > 0.5) nworkers_per_ctx_rounded[s][idx]++; } } } /* for(s = 0; s < ns; s++) */ /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */ /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */ sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, tw->nw, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs, tw); free(tw); } } static void throughput_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_SPEED) { if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) { _try_resizing(NULL, -1, NULL, -1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void throughput_lp_handle_idle_cycle(unsigned sched_ctx, int worker) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_IDLE) { if(sc_hypervisor_check_idle(sched_ctx, worker)) { _try_resizing(NULL, -1, NULL, -1); // sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void throughput_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void throughput_lp_end_ctx(__attribute__((unused))unsigned sched_ctx) { /* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */ /* int worker; */ /* for(worker = 0; worker < 12; worker++) */ /* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */ return; } struct sc_hypervisor_policy throughput_lp_policy = { .size_ctxs = NULL, .resize_ctxs = throughput_lp_resize_ctxs, .handle_poped_task = throughput_lp_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = throughput_lp_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = throughput_lp_end_ctx, .init_worker = NULL, .custom = 0, .name = "throughput_lp" }; #endif /* STARPU_HAVE_GLPK_H */