exa2pro
/
starpu-max


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
							/* StarPU --- Runtime system for heterogeneous multicore architectures.
 *
 * Copyright (C) 2010-2011  Université de Bordeaux 1
 * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
 * Copyright (C) 2011, 2012  INRIA
 *
 * StarPU is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * StarPU is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 */

/* Work stealing policy */

#include <float.h>

#include <core/workers.h>
#include <sched_policies/deque_queues.h>

typedef struct{
	struct _starpu_deque_jobq **queue_array;
	unsigned rr_worker;
	/* keep track of the work performed from the beginning of the algorithm to make
	 * better decisions about which queue to select when stealing or deferring work
	 */
	unsigned performed_total;
	pthread_mutex_t sched_mutex;
	pthread_cond_t sched_cond;
	unsigned last_pop_worker;
	unsigned last_push_worker;
} work_stealing_data;

#ifdef USE_OVERLOAD

/**
 * Minimum number of task we wait for being processed before we start assuming
 * on which worker the computation would be faster.
 */
static int calibration_value = 0;

#endif /* USE_OVERLOAD */


/**
 * Return a worker from which a task can be stolen.
 * Selecting a worker is done in a round-robin fashion, unless
 * the worker previously selected doesn't own any task,
 * then we return the first non-empty worker.
 */
static unsigned select_victim_round_robin(unsigned sched_ctx_id)
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
	unsigned worker = ws->last_pop_worker;
	unsigned nworkers = starpu_get_nworkers_of_sched_ctx(sched_ctx_id);

	/* If the worker's queue is empty, let's try
	 * the next ones */
	while (!ws->queue_array[worker]->njobs)
	{
		worker = (worker + 1) % nworkers;
		if (worker == ws->last_pop_worker)
		{
			/* We got back to the first worker,
			 * don't go in infinite loop */
			break;
		}
	}

	ws->last_pop_worker = (worker + 1) % nworkers;

	return worker;
}

/**
 * Return a worker to whom add a task.
 * Selecting a worker is done in a round-robin fashion.
 */
static unsigned select_worker_round_robin(unsigned sched_ctx_id)
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
	unsigned worker = ws->last_push_worker;
	unsigned nworkers = starpu_get_nworkers_of_sched_ctx(sched_ctx_id);

	ws->last_push_worker = (ws->last_push_worker + 1) % nworkers;

	return worker;
}

#ifdef USE_OVERLOAD

/**
 * Return a ratio helpful to determine whether a worker is suitable to steal
 * tasks from or to put some tasks in its queue.
 *
 * \return	a ratio with a positive or negative value, describing the current state of the worker :
 * 		a smaller value implies a faster worker with an relatively emptier queue : more suitable to put tasks in
 * 		a bigger value implies a slower worker with an reletively more replete queue : more suitable to steal tasks from
 */
static float overload_metric(unsigned sched_ctx_id, unsigned id)
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);
	float execution_ratio = 0.0f;
	float current_ratio = 0.0f;

	int nprocessed = _starpu_get_deque_nprocessed(ws->queue_array[id]);
	unsigned njobs = _starpu_get_deque_njobs(ws->queue_array[id]);

	/* Did we get enough information ? */
	if (performed_total > 0 && nprocessed > 0)
	{
		/* How fast or slow is the worker compared to the other workers */
		execution_ratio = (float) nprocessed / performed_total;
		/* How replete is its queue */
		current_ratio = (float) njobs / nprocessed;
	}
	else
	{
		return 0.0f;
	}

	return (current_ratio - execution_ratio);
}

/**
 * Return the most suitable worker from which a task can be stolen.
 * The number of previously processed tasks, total and local,
 * and the number of tasks currently awaiting to be processed
 * by the tasks are taken into account to select the most suitable
 * worker to steal task from.
 */
static unsigned select_victim_overload(unsigned sched_ctx_id)
{
	unsigned worker;
	float  worker_ratio;
	unsigned best_worker = 0;
	float best_ratio = FLT_MIN;	

	/* Don't try to play smart until we get
	 * enough informations. */
	if (performed_total < calibration_value)
		return select_victim_round_robin(sched_ctx_id);

	struct worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx_id);

        if(workers->init_cursor)
                workers->init_cursor(workers);

	while(workers->has_next(workers))
        {
                worker = workers->get_next(workers);
		worker_ratio = overload_metric(sched_ctx_id, worker);

		if (worker_ratio > best_ratio)
		{
			best_worker = worker;
			best_ratio = worker_ratio;
		}
	}

	if(workers->init_cursor)
                workers->deinit_cursor(workers);

	return best_worker;
}

/**
 * Return the most suitable worker to whom add a task.
 * The number of previously processed tasks, total and local,
 * and the number of tasks currently awaiting to be processed
 * by the tasks are taken into account to select the most suitable
 * worker to add a task to.
 */
static unsigned select_worker_overload(unsigned sched_ctx_id)
{
	unsigned worker;
	float  worker_ratio;
	unsigned best_worker = 0;
	float best_ratio = FLT_MAX;

	/* Don't try to play smart until we get
	 * enough informations. */
	if (performed_total < calibration_value)
		return select_worker_round_robin(sched_ctx_id);

	struct worker_collection *workers = starpu_get_worker_collection_of_sched_ctx(sched_ctx_id);

        if(workers->init_cursor)
                workers->init_cursor(workers);

	while(workers->has_next(workers))
        {
                worker = workers->get_next(workers);

		worker_ratio = overload_metric(sched_ctx_id, worker);

		if (worker_ratio < best_ratio)
		{
			best_worker = worker;
			best_ratio = worker_ratio;
		}
	}

	if(workers->init_cursor)
                workers->deinit_cursor(workers);

	return best_worker;
}

#endif /* USE_OVERLOAD */


/**
 * Return a worker from which a task can be stolen.
 * This is a phony function used to call the right
 * function depending on the value of USE_OVERLOAD.
 */
static inline unsigned select_victim(unsigned sched_ctx_id)
{
#ifdef USE_OVERLOAD
	return select_victim_overload(sched_ctx_id);
#else
	return select_victim_round_robin(sched_ctx_id);
#endif /* USE_OVERLOAD */
}

/**
 * Return a worker from which a task can be stolen.
 * This is a phony function used to call the right
 * function depending on the value of USE_OVERLOAD.
 */
static inline unsigned select_worker(unsigned sched_ctx_id)
{
#ifdef USE_OVERLOAD
	return select_worker_overload(sched_ctx_id);
#else
	return select_worker_round_robin(sched_ctx_id);
#endif /* USE_OVERLOAD */
}


#ifdef STARPU_DEVEL
#warning TODO rewrite ... this will not scale at all now
#endif
static struct starpu_task *ws_pop_task(unsigned sched_ctx_id)
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);

	struct starpu_task *task;
	struct _starpu_deque_jobq *q;

	int workerid = starpu_worker_get_id();

	STARPU_ASSERT(workerid != -1);

	q = ws->queue_array[workerid];

	task = _starpu_deque_pop_task(q, workerid);
	if (task)
	{
		/* there was a local task */
		ws->performed_total++;
		q->nprocessed++;
		q->njobs--;
		return task;
	}

	/* we need to steal someone's job */
	unsigned victim = select_victim(sched_ctx_id);
	struct _starpu_deque_jobq *victimq = ws->queue_array[victim];

	task = _starpu_deque_pop_task(victimq, workerid);
	if (task)
	{
		_STARPU_TRACE_WORK_STEALING(q, workerid);
		ws->performed_total++;

		/* Beware : we have to increase the number of processed tasks of
		 * the stealer, not the victim ! */
		q->nprocessed++;
		victimq->njobs--;
	}

	return task;
}

int ws_push_task(struct starpu_task *task)
{
	unsigned sched_ctx_id = task->sched_ctx;
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);

	struct _starpu_deque_jobq *deque_queue;
	struct _starpu_job *j = _starpu_get_job_associated_to_task(task); 
	int workerid = starpu_worker_get_id();

	pthread_mutex_t *changing_ctx_mutex = starpu_get_changing_ctx_mutex(sched_ctx_id);
        unsigned nworkers;
        int ret_val = -1;

	/* if the context has no workers return */
        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
        nworkers = starpu_get_nworkers_of_sched_ctx(sched_ctx_id);
        if(nworkers == 0)
        {
                _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
                return ret_val;
        }

	_STARPU_PTHREAD_MUTEX_LOCK(&ws->sched_mutex);

	/* If the current thread is not a worker but
	 * the main thread (-1), we find the better one to
	 * put task on its queue */
	if (workerid == -1)
		workerid = select_worker(sched_ctx_id);

	deque_queue = ws->queue_array[workerid];

	_STARPU_TRACE_JOB_PUSH(task, 0);
	_starpu_job_list_push_back(deque_queue->jobq, j);
	deque_queue->njobs++;

	_STARPU_PTHREAD_COND_SIGNAL(&ws->sched_cond);
	_STARPU_PTHREAD_MUTEX_UNLOCK(&ws->sched_mutex);

        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);

	return 0;
}

static void ws_add_workers(unsigned sched_ctx_id, int *workerids,unsigned nworkers) 
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);

	unsigned i;
	int workerid;
	
	for (i = 0; i < nworkers; i++)
	{
		workerid = workerids[i];
		ws->queue_array[workerid] = _starpu_create_deque();
		/**
		 * The first WS_POP_TASK will increase NPROCESSED though no task was actually performed yet,
		 * we need to initialize it at -1.
		 */
		ws->queue_array[workerid]->nprocessed = -1;
		ws->queue_array[workerid]->njobs = 0;

		starpu_worker_set_sched_condition(sched_ctx_id, workerid, &ws->sched_mutex, &ws->sched_cond);
	}
}

static void ws_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);

	unsigned i;
	int workerid;
	
	for (i = 0; i < nworkers; i++)
	{
		workerid = workerids[i];
		_starpu_destroy_deque(ws->queue_array[workerid]);
		starpu_worker_set_sched_condition(sched_ctx_id, workerid, NULL, NULL);
	}
}

static void initialize_ws_policy(unsigned sched_ctx_id) 
{
	starpu_create_worker_collection_for_sched_ctx(sched_ctx_id, WORKER_LIST);

	work_stealing_data *ws = (work_stealing_data*)malloc(sizeof(work_stealing_data));
	starpu_set_sched_ctx_policy_data(sched_ctx_id, (void*)ws);
	
	ws->last_pop_worker = 0;
	ws->last_push_worker = 0;

	/**
	 * The first WS_POP_TASK will increase PERFORMED_TOTAL though no task was actually performed yet,
	 * we need to initialize it at -1.
	 */
	ws->performed_total = -1;

	ws->queue_array = (struct _starpu_deque_jobq**)malloc(STARPU_NMAXWORKERS*sizeof(struct _starpu_deque_jobq*));

	_STARPU_PTHREAD_MUTEX_INIT(&ws->sched_mutex, NULL);
	_STARPU_PTHREAD_COND_INIT(&ws->sched_cond, NULL);
}

static void deinit_ws_policy(unsigned sched_ctx_id)
{
	work_stealing_data *ws = (work_stealing_data*)starpu_get_sched_ctx_policy_data(sched_ctx_id);

	free(ws->queue_array);
	_STARPU_PTHREAD_MUTEX_DESTROY(&ws->sched_mutex);
	_STARPU_PTHREAD_COND_DESTROY(&ws->sched_cond);
        free(ws);
        starpu_delete_worker_collection_for_sched_ctx(sched_ctx_id);
}

struct starpu_sched_policy _starpu_sched_ws_policy =
{
	.init_sched = initialize_ws_policy,
	.deinit_sched = deinit_ws_policy,
	.add_workers = ws_add_workers,
	.remove_workers = ws_remove_workers,
	.push_task = ws_push_task,
	.pop_task = ws_pop_task,
	.pre_exec_hook = NULL,
	.post_exec_hook = NULL,
	.pop_every_task = NULL,
	.policy_name = "ws",
	.policy_description = "work stealing"
};