/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2012 INRIA * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "lp_tools.h" #ifdef HAVE_GLPK_H static double _glp_get_nworkers_per_ctx(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int total_nw[nw]) { int s, w; glp_prob *lp; int ne = (ns*nw+1)*(ns+nw) + 1; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; lp = glp_create_prob(); glp_set_prob_name(lp, "sample"); glp_set_obj_dir(lp, GLP_MAX); glp_set_obj_name(lp, "max speed"); /* we add nw*ns columns one for each type of worker in each context and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/ glp_add_cols(lp, nw*ns+1); for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "worker%dctx%d", w, s); glp_set_col_name(lp, n, name); glp_set_col_bnds(lp, n, GLP_LO, 0.3, 0.0); n++; } } /*1/tmax should belong to the interval [0.0;1.0]*/ glp_set_col_name(lp, n, "vmax"); glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0); /* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliar variables */ glp_set_obj_coef(lp, n, 1.0); n = 1; /* one row corresponds to one ctx*/ glp_add_rows(lp, ns); for(s = 0; s < ns; s++) { char name[32]; snprintf(name, sizeof(name), "ctx%d", s); glp_set_row_name(lp, s+1, name); glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.); for(w = 0; w < nw; w++) { int s2; for(s2 = 0; s2 < ns; s2++) { if(s2 == s) { ia[n] = s+1; ja[n] = w + nw*s2 + 1; ar[n] = v[s][w]; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } else { ia[n] = s+1; ja[n] = w + nw*s2 + 1; ar[n] = 0.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } n++; } } /* 1/tmax */ ia[n] = s+1; ja[n] = ns*nw+1; ar[n] = (-1) * flops[s]; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); n++; } /*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */ glp_add_rows(lp, nw); for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "w%d", w); glp_set_row_name(lp, ns+w+1, name); for(s = 0; s < ns; s++) { int w2; for(w2 = 0; w2 < nw; w2++) { if(w2 == w) { ia[n] = ns+w+1; ja[n] = w2+s*nw + 1; ar[n] = 1.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } else { ia[n] = ns+w+1; ja[n] = w2+s*nw + 1; ar[n] = 0.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } n++; } } /* 1/tmax */ ia[n] = ns+w+1; ja[n] = ns*nw+1; ar[n] = 0.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); n++; /*sum(all gpus) = 3*/ if(w == 0) glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); /*sum(all cpus) = 9*/ if(w == 1) glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); } STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; glp_simplex(lp, &parm); double vmax = glp_get_obj_val(lp); n = 1; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { res[s][w] = glp_get_col_prim(lp, n); n++; } } glp_delete_prob(lp); return vmax; } #endif //HAVE_GLPK_H double _lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers]) { int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs(); #ifdef HAVE_GLPK_H double v[nsched_ctxs][ntypes_of_workers]; double flops[nsched_ctxs]; #endif int i = 0; struct sched_ctx_wrapper* sc_w; for(i = 0; i < nsched_ctxs; i++) { sc_w = sched_ctx_hypervisor_get_wrapper(sched_ctxs[i]); #ifdef HAVE_GLPK_H v[i][0] = 200.0;//_get_velocity_per_worker_type(sc_w, STARPU_CUDA_WORKER); v[i][1] = 20.0;//_get_velocity_per_worker_type(sc_w, STARPU_CPU_WORKER); flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/ // printf("%d: flops %lf\n", sched_ctxs[i], flops[i]); #endif } #ifdef HAVE_GLPK_H return 1/_glp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw); #else return 0.0; #endif } double _lp_get_tmax(int nw, int *workers) { int ntypes_of_workers = 2; int total_nw[ntypes_of_workers]; _get_total_nw(workers, nw, 2, total_nw); int nsched_ctxs = sched_ctx_hypervisor_get_nsched_ctxs(); double res[nsched_ctxs][ntypes_of_workers]; return _lp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, res, total_nw) * 1000; } void _lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw]) { int s, w; double left_res[nw]; for(w = 0; w < nw; w++) left_res[nw] = 0.0; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { int x = floor(res[s][w]); double x_double = (double)x; double diff = res[s][w] - x_double; if(diff != 0.0) { if(diff > 0.5) { if(left_res[w] != 0.0) { if((diff + left_res[w]) > 0.5) { res_rounded[s][w] = x + 1; left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w])); } else { res_rounded[s][w] = x; left_res[w] = (-1.0) * (diff + left_res[w]); } } else { res_rounded[s][w] = x + 1; left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]); } } else { if((diff + left_res[w]) > 0.5) { res_rounded[s][w] = x + 1; left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w])); } else { res_rounded[s][w] = x; left_res[w] = diff; } } } } } } void _lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw]) { int *sched_ctxs = sched_ctx_hypervisor_get_sched_ctxs(); int s, s2, w; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { enum starpu_archtype arch; if(w == 0) arch = STARPU_CUDA_WORKER; if(w == 1) arch = STARPU_CPU_WORKER; int workers_move[STARPU_NMAXWORKERS]; int nw_move = 0; int workers_add[STARPU_NMAXWORKERS]; int nw_add = 0; if(w == 1) { int nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch); if(nworkers_ctx > res_rounded[s][w]) { int nworkers_to_move = nworkers_ctx - res_rounded[s][w]; int *workers_to_move = _get_first_workers(sched_ctxs[s], &nworkers_to_move, arch); int i; for(i = 0; i < nworkers_to_move; i++) workers_move[nw_move++] = workers_to_move[i]; free(workers_to_move); } } else { double nworkers_ctx = get_nworkers_ctx(sched_ctxs[s], arch) * 1.0; if(nworkers_ctx > res[s][w]) { double nworkers_to_move = nworkers_ctx - res[s][w]; int x = floor(nworkers_to_move); double x_double = (double)x; double diff = nworkers_to_move - x_double; if(diff == 0.0) { int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch); if(x > 0) { int i; for(i = 0; i < x; i++) workers_move[nw_move++] = workers_to_move[i]; } free(workers_to_move); } else { x+=1; int *workers_to_move = _get_first_workers(sched_ctxs[s], &x, arch); if(x > 0) { int i; for(i = 0; i < x-1; i++) workers_move[nw_move++] = workers_to_move[i]; if(diff > 0.8) workers_move[nw_move++] = workers_to_move[x-1]; else if(diff > 0.3) workers_add[nw_add++] = workers_to_move[x-1]; } free(workers_to_move); } } } for(s2 = 0; s2 < ns; s2++) { if(sched_ctxs[s2] != sched_ctxs[s]) { double nworkers_ctx2 = get_nworkers_ctx(sched_ctxs[s2], arch) * 1.0; if((res[s2][w] - nworkers_ctx2) >= 0.0 && nw_move > 0) { sched_ctx_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0); nw_move = 0; break; } if((res[s2][w] - nworkers_ctx2) >= 0.0 && (res[s2][w] - nworkers_ctx2) <= (double)nw_add && nw_add > 0) { sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]); nw_add = 0; break; } } } if(nw_move > 0) sched_ctx_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0); } } } void _lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers) { int current_nworkers = workers == NULL ? starpu_worker_get_count() : nworkers; int *current_sched_ctxs = sched_ctxs == NULL ? sched_ctx_hypervisor_get_sched_ctxs() : sched_ctxs; int s, w; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { enum starpu_archtype arch; if(w == 0) arch = STARPU_CUDA_WORKER; if(w == 1) arch = STARPU_CPU_WORKER; if(w == 1) { int nworkers_to_add = res_rounded[s][w]; int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &nworkers_to_add, arch); if(nworkers_to_add > 0) { sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, nworkers_to_add, current_sched_ctxs[s]); sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]); struct policy_config *new_config = sched_ctx_hypervisor_get_config(current_sched_ctxs[s]); int i; for(i = 0; i < nworkers_to_add; i++) new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle; } free(workers_to_add); } else { double nworkers_to_add = res[s][w]; int x = floor(nworkers_to_add); double x_double = (double)x; double diff = nworkers_to_add - x_double; if(diff == 0.0) { int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch); if(x > 0) { sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]); sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]); } free(workers_to_add); } else { x+=1; int *workers_to_add = _get_first_workers_in_list(workers, current_nworkers, &x, arch); if(x > 0) { if(diff >= 0.3) sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x, current_sched_ctxs[s]); else sched_ctx_hypervisor_add_workers_to_sched_ctx(workers_to_add, x-1, current_sched_ctxs[s]); sched_ctx_hypervisor_start_resize(current_sched_ctxs[s]); } free(workers_to_add); } } } sched_ctx_hypervisor_stop_resize(current_sched_ctxs[s]); } }