exa2pro
/
starpu-max


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
							/* StarPU --- Runtime system for heterogeneous multicore architectures.
 *
 * Copyright (C) 2010-2012  INRIA
 *
 * StarPU is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * StarPU is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 */

#include <math.h>
#include "sc_hypervisor_lp.h"
#include "sc_hypervisor_policy.h"
#include <starpu_config.h>

#ifdef STARPU_HAVE_GLPK_H


#endif //STARPU_HAVE_GLPK_H

double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers])
{
	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
#ifdef STARPU_HAVE_GLPK_H
	double v[nsched_ctxs][ntypes_of_workers];
	double flops[nsched_ctxs];

	int i = 0;
	struct sc_hypervisor_wrapper* sc_w;
	for(i = 0; i < nsched_ctxs; i++)
	{
		sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]);
#ifdef STARPU_USE_CUDA
		int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
		if(ncuda != 0)
		{
			v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			v[i][1] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
		}
		else
			v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
#else
		v[i][0] = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
#endif // STARPU_USE_CUDA
		flops[i] = sc_w->remaining_flops/1000000000; //sc_w->total_flops/1000000000; /* in gflops*/
//		printf("%d: flops %lf\n", sched_ctxs[i], flops[i]);
	}

	return 1/sc_hypervisor_lp_simulate_distrib_flops(nsched_ctxs, ntypes_of_workers, v, flops, res, total_nw);
#else//STARPU_HAVE_GLPK_H
	return 0.0;
#endif//STARPU_HAVE_GLPK_H
}

double sc_hypervisor_lp_get_tmax(int nw, int *workers)
{
	int ntypes_of_workers = 2;
	int total_nw[ntypes_of_workers];
	sc_hypervisor_group_workers_by_type(workers, nw, 2, total_nw);

	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();

	double res[nsched_ctxs][ntypes_of_workers];
	return sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, ntypes_of_workers, res, total_nw) * 1000;
}

void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw])
{
	int s, w;
	double left_res[nw];
	for(w = 0; w < nw; w++)
		left_res[nw] = 0.0;
	for(s = 0; s < ns; s++)
	{
		for(w = 0; w < nw; w++)
		{
			int x = floor(res[s][w]);
			double x_double = (double)x;
			double diff = res[s][w] - x_double;

			if(diff != 0.0)
			{
				if(diff > 0.5)
				{
					if(left_res[w] != 0.0)
					{
						if((diff + left_res[w]) > 0.5)
						{
							res_rounded[s][w] = x + 1;
							left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
						}
						else
						{
							res_rounded[s][w] = x;
							left_res[w] = (-1.0) * (diff + left_res[w]);
						}
					}
					else
					{
						res_rounded[s][w] = x + 1;
						left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]);
					}

				}
				else
				{
					if((diff + left_res[w]) > 0.5)
					{
						res_rounded[s][w] = x + 1;
						left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w]));
					}
					else
					{
						res_rounded[s][w] = x;
						left_res[w] = diff;
					}
				}
			}
			else 
				res_rounded[s][w] = x;
		}
	}
}

void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, 
				  int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], 
				  int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
				  int res_rounded[ns][nw], double res[ns][nw])
{
	int w;
	for(w = 0; w < nw; w++)
	{
		enum starpu_archtype arch = STARPU_ANY_WORKER;
		if(w == 0) arch = STARPU_CUDA_WORKER;
		if(w == 1) arch = STARPU_CPU_WORKER;
		
		
		if(w == 1)
		{
			int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
			if(nworkers_ctx > res_rounded[sched_ctx_idx][w])
			{
				int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w];
				int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch);
				int i;
				for(i = 0; i < nworkers_to_move; i++)
					tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
				free(workers_to_move);
			}
		}
		else
		{
			double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0;
			if(nworkers_ctx > res[sched_ctx_idx][w])
			{
				double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w];
				int x = floor(nworkers_to_move);
				double x_double = (double)x;
				double diff = nworkers_to_move - x_double;
				if(diff == 0.0)
				{
					int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
					if(x > 0)
					{
						int i;
						for(i = 0; i < x; i++)
							tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
						
					}
					free(workers_to_move);
				}
				else
				{
					x+=1;
					int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch);
					if(x > 0)
					{
						int i;
						for(i = 0; i < x-1; i++)
							tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i];
						
						if(diff > 0.8)
							tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1];
						else
							if(diff > 0.3)
								tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1];
						
					}
					free(workers_to_move);
				}
			}
		}
	}
}

void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, 
				int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], 
				int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS],
				int *nw_move, int workers_move[STARPU_NMAXWORKERS], 
				int *nw_add, int workers_add[STARPU_NMAXWORKERS],
				int res_rounded[ns][nw], double res[ns][nw])
{
	int w;
	int j = 0, k = 0;
	for(w = 0; w < nw; w++)
	{
		enum starpu_archtype arch = STARPU_ANY_WORKER;
		if(w == 0) arch = STARPU_CUDA_WORKER;
		if(w == 1) arch = STARPU_CPU_WORKER;
		
		int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch);
		int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2;
		
		if( nw_needed > 0 && tmp_nw_move[w] > 0)
		{
			*nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed;
			int i = 0;
			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			{
				if(tmp_workers_move[w][i] != -1)
				{
					workers_move[j++] = tmp_workers_move[w][i];
					tmp_workers_move[w][i] = -1;
					if(j == *nw_move)
						break;
				}
			}
			tmp_nw_move[w] -=  *nw_move;
		}
		
		
		double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0);
		int x = floor(needed);
		double x_double = (double)x;
		double diff = needed - x_double;
		if(diff > 0.3 && tmp_nw_add[w] > 0)
		{
			*nw_add = tmp_nw_add[w];
			int i = 0;
			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			{
				if(tmp_workers_add[w][i] != -1)
				{
					workers_add[k++] = tmp_workers_add[w][i];
					tmp_workers_add[w][i] = -1;
					if(k == *nw_add)
						break;
				}
			}
			tmp_nw_add[w] -=  *nw_add;
		}
	}
}

void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], 
				int *nw_move, int workers_move[STARPU_NMAXWORKERS])
{
	int w;
	for(w = 0; w < nw; w++)
	{
		if(tmp_nw_move[w] > 0)
		{
			*nw_move += tmp_nw_move[w];
			int i = 0, j = 0;
			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			{
				if(tmp_workers_move[w][i] != -1)
				{
					workers_move[j++] = tmp_workers_move[w][i];
					tmp_workers_move[w][i] = -1;
					if(j == *nw_move)
						break;
				}
			}
			
		}
	}
}

void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw])
{
	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
	int s, s2, w;
	for(s = 0; s < ns; s++)
	{
		int tmp_workers_move[nw][STARPU_NMAXWORKERS];
		int tmp_nw_move[nw];

		int tmp_workers_add[nw][STARPU_NMAXWORKERS];
		int tmp_nw_add[nw];
		

		for(w = 0; w < nw; w++)		
		{
			tmp_nw_move[w] = 0;
			tmp_nw_add[w] = 0;
			int i;
			for(i = 0; i < STARPU_NMAXWORKERS; i++)
			{
				tmp_workers_move[w][i] = -1;
				tmp_workers_add[w][i] = -1;
			}
		}

		/* find workers that ctx s has to give away */
		_lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s, 
					      tmp_nw_move, tmp_workers_move, 
					      tmp_nw_add, tmp_workers_add, res_rounded, res);

		for(s2 = 0; s2 < ns; s2++)
		{
			if(sched_ctxs[s2] != sched_ctxs[s])
			{
				/* find workers that ctx s2 wants to accept from ctx s 
				   the rest of it will probably accepted by another ctx */
				int workers_move[STARPU_NMAXWORKERS];
				int nw_move = 0;
				
				int workers_add[STARPU_NMAXWORKERS];
				int nw_add = 0;
				

				_lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2, 
							   tmp_nw_move, tmp_workers_move, 
							   tmp_nw_add, tmp_workers_add,
							   &nw_move, workers_move, 
							   &nw_add, workers_add,
							   res_rounded, res);
				
				if(nw_move > 0)
				{
					sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
					nw_move = 0;
				}

				if(nw_add > 0)
				{
					sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]);
					nw_add = 0;
				}
			}
		}

		/* if there are workers that weren't accepted by anyone but ctx s wants
		   to get rid of them just remove them from ctx s */
		int workers_move[STARPU_NMAXWORKERS];
		int nw_move = 0;
				
		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
					   &nw_move, workers_move);
		if(nw_move > 0)
			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
	}
}

void sc_hypervisor_lp_distribute_resources_in_ctxs(int* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers)
{
	unsigned current_nworkers = workers == NULL ? starpu_worker_get_count() : (unsigned)nworkers;
	int s, w;
	int start[nw];
	for(w = 0; w < nw; w++)
		start[w] = 0;
	for(s = 0; s < ns; s++)
	{
		int workers_add[STARPU_NMAXWORKERS];
                int nw_add = 0;
		
		for(w = 0; w < nw; w++)
		{
			enum starpu_archtype arch;

#ifdef STARPU_USE_CUDA
			int ncuda = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER);
			if(ncuda != 0)
			{
				if(w == 0) arch = STARPU_CUDA_WORKER;
				if(w == 1) arch = STARPU_CPU_WORKER;
			}
			else
				if(w == 0) arch = STARPU_CPU_WORKER;
#else
			if(w == 0) arch = STARPU_CPU_WORKER;
#endif //STARPU_USE_CUDA
			if(w == 1)
			{
				int nworkers_to_add = res_rounded[s][w];
				int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, current_nworkers, &nworkers_to_add, arch);
				int i;
				for(i = 0; i < nworkers_to_add; i++)
					workers_add[nw_add++] = workers_to_add[i];
				free(workers_to_add);
			}
			
			else
			{
				double nworkers_to_add = res[s][w];
				int x = floor(nworkers_to_add);
				double x_double = (double)x;
				double diff = nworkers_to_add - x_double;
				if(diff == 0.0)
				{
					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
					int i;
					for(i = 0; i < x; i++)
						workers_add[nw_add++] = workers_to_add[i];
					free(workers_to_add);
				}
				else
				{
					x+=1;
					int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, current_nworkers, &x, arch);
					int i;
					if(diff >= 0.3)
						for(i = 0; i < x; i++)
							workers_add[nw_add++] = workers_to_add[i];
					else
						for(i = 0; i < x-1; i++)
							workers_add[nw_add++] = workers_to_add[i];

					free(workers_to_add);
				}
			}
		}
		if(nw_add > 0)
		{
			sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
			sc_hypervisor_start_resize(sched_ctxs[s]);
		}

//		sc_hypervisor_stop_resize(current_sched_ctxs[s]);
	}
}

/* nw = all the workers (either in a list or on all machine) */
void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], int *sched_ctxs_input, int *workers_input, unsigned do_size)
{
	int w, s;
	double nworkers[ns][2];
	int nworkers_rounded[ns][2];
	for(s = 0; s < ns; s++)
	{
		nworkers[s][0] = 0.0;
		nworkers[s][1] = 0.0;
		nworkers_rounded[s][0] = 0;
		nworkers_rounded[s][1] = 0;
		
	}
	
	for(s = 0; s < ns; s++)
	{
		for(w = 0; w < nw; w++)
		{
			enum starpu_archtype arch = starpu_worker_get_type(w);
			
			if(arch == STARPU_CUDA_WORKER)
			{
				nworkers[s][0] += w_in_s[s][w];
				if(w_in_s[s][w] >= 0.3)
					nworkers_rounded[s][0]++;
			}
			else
			{
				nworkers[s][1] += w_in_s[s][w];
				if(w_in_s[s][w] > 0.5)
					nworkers_rounded[s][1]++;
			}
		}
	}
	
	if(!do_size)
		sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
	else
	{
		int *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input;

		unsigned has_workers = 0;
		for(s = 0; s < ns; s++)
		{
			int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], 
										 STARPU_ANY_WORKER);
			if(nworkers_ctx != 0)
			{
				has_workers = 1;
				break;
			}
		}
		if(has_workers)
			sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, 2, nworkers_rounded, nworkers);
		else
			sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, 2, nworkers_rounded, nworkers, workers_input, nw);
	}
	return;
}

double sc_hypervisor_lp_find_tmax(double t1, double t2)
{
	return t1 + ((t2 - t1)/2);
}