exa2pro
/
starpu-max


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
							/*
 * StarPU
 * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 */

#include <unistd.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <common/config.h>
#include <core/perfmodel/perfmodel.h>
#include <core/jobs.h>
#include <core/workers.h>
#include <pthread.h>
#include <datawizard/datawizard.h>
#include <core/perfmodel/regression.h>
#include <common/config.h>

/*
 * History based model
 */


static void insert_history_entry(struct starpu_history_entry_t *entry, struct starpu_history_list_t **list, struct starpu_htbl32_node_s **history_ptr)
{
	struct starpu_history_list_t *link;
	struct starpu_history_entry_t *old;

	link = malloc(sizeof(struct starpu_history_list_t));
	link->next = *list;
	link->entry = entry;
	*list = link;

	old = htbl_insert_32(history_ptr, entry->footprint, entry);
	/* that may fail in case there is some concurrency issue */
	STARPU_ASSERT(old == NULL);
}


static void dump_reg_model(FILE *f, struct starpu_regression_model_t *reg_model)
{
	fprintf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%d\n", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny, reg_model->alpha, reg_model->beta, reg_model->nsample);
}

static void scan_reg_model(FILE *f, struct starpu_regression_model_t *reg_model)
{
	int res;

	res = fscanf(f, "%le\t%le\t%le\t%le\t%le\t%le\t%d\n", &reg_model->sumlnx, &reg_model->sumlnx2, &reg_model->sumlny, &reg_model->sumlnxlny, &reg_model->alpha, &reg_model->beta, &reg_model->nsample);
	STARPU_ASSERT(res == 7);
}


static void dump_history_entry(FILE *f, struct starpu_history_entry_t *entry)
{
	fprintf(f, "%x\t%zu\t%le\t%le\t%le\t%le\t%d\n", entry->footprint, entry->size, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample);
}

static void scan_history_entry(FILE *f, struct starpu_history_entry_t *entry)
{
	int res;

	res = fscanf(f, "%x\t%zu\t%le\t%le\t%le\t%le\t%d\n", &entry->footprint, &entry->size, &entry->mean, &entry->deviation, &entry->sum, &entry->sum2, &entry->nsample);
	STARPU_ASSERT(res == 7);
}

static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model, unsigned scan_history)
{
	unsigned nentries;

	int res = fscanf(f, "%d\n", &nentries);
	STARPU_ASSERT(res == 1);

	scan_reg_model(f, &per_arch_model->regression);

	res = fscanf(f, "%le\t%le\t%le\n", 
		&per_arch_model->regression.a,
		&per_arch_model->regression.b,
		&per_arch_model->regression.c);
	STARPU_ASSERT(res == 3);

	if (isnan(per_arch_model->regression.a)||isnan(per_arch_model->regression.b)||isnan(per_arch_model->regression.c))
	{
		per_arch_model->regression.valid = 0;
	}
	else {
		per_arch_model->regression.valid = 1;
	}

	if (!scan_history)
		return;

	/* parse core entries */
	unsigned i;
	for (i = 0; i < nentries; i++) {
		struct starpu_history_entry_t *entry = malloc(sizeof(struct starpu_history_entry_t));
		STARPU_ASSERT(entry);

		scan_history_entry(f, entry);
		
		/* insert the entry in the hashtable and the list structures  */
		insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
	}
}

static void parse_model_file(FILE *f, struct starpu_perfmodel_t *model, unsigned scan_history)
{
	parse_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT], scan_history);
	parse_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT], scan_history);
	parse_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT], scan_history);
}

static void dump_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel_t *per_arch_model)
{
	/* count the number of elements in the lists */
	struct starpu_history_list_t *ptr;
	unsigned nentries = 0;

	ptr = per_arch_model->list;
	while(ptr) {
		nentries++;
		ptr = ptr->next;
	}

	/* header */
	fprintf(f, "%d\n", nentries);

	dump_reg_model(f, &per_arch_model->regression);

	double a,b,c;
	regression_non_linear_power(per_arch_model->list, &a, &b, &c);
	fprintf(f, "%le\t%le\t%le\n", a, b, c);

	ptr = per_arch_model->list;
	while (ptr) {
		//memcpy(&entries_array[i++], ptr->entry, sizeof(struct starpu_history_entry_t));
		dump_history_entry(f, ptr->entry);
		ptr = ptr->next;
	}
}

static void dump_model_file(FILE *f, struct starpu_perfmodel_t *model)
{
	dump_per_arch_model_file(f, &model->per_arch[STARPU_CORE_DEFAULT]);
	dump_per_arch_model_file(f, &model->per_arch[STARPU_CUDA_DEFAULT]);
	dump_per_arch_model_file(f, &model->per_arch[STARPU_GORDON_DEFAULT]);
}

static void initialize_per_arch_model(struct starpu_per_arch_perfmodel_t *per_arch_model)
{
	per_arch_model->history = NULL;
	per_arch_model->list = NULL;
}

static void initialize_model(struct starpu_perfmodel_t *model)
{
	initialize_per_arch_model(&model->per_arch[STARPU_CORE_DEFAULT]);
	initialize_per_arch_model(&model->per_arch[STARPU_CUDA_DEFAULT]);
	initialize_per_arch_model(&model->per_arch[STARPU_GORDON_DEFAULT]);
}

static struct starpu_model_list_t *registered_models = NULL;
//static unsigned debug_modelid = 0;

#ifdef MODEL_DEBUG
static void get_model_debug_path(struct starpu_perfmodel_t *model, const char *arch, char *path, size_t maxlen)
{
	strncpy(path, PERF_MODEL_DIR, maxlen);
	strncat(path, model->symbol, maxlen);
	
	char hostname[32];
	gethostname(hostname, 32);
	strncat(path, ".", maxlen);
	strncat(path, hostname, maxlen);
	strncat(path, ".", maxlen);
	strncat(path, arch, maxlen);
	strncat(path, ".debug", maxlen);
}
#endif


void register_model(struct starpu_perfmodel_t *model)
{
	/* add the model to a linked list */
	struct starpu_model_list_t *node = malloc(sizeof(struct starpu_model_list_t));

	node->model = model;
	//model->debug_modelid = debug_modelid++;

	/* put this model at the beginning of the list */
	node->next = registered_models;
	registered_models = node;

#ifdef MODEL_DEBUG
	char debugpath[256];
	get_model_debug_path(model, "cuda", debugpath, 256);
	model->per_arch[STARPU_CUDA_DEFAULT].debug_file = fopen(debugpath, "a+");
	STARPU_ASSERT(model->per_arch[STARPU_CUDA_DEFAULT].debug_file);

	get_model_debug_path(model, "core", debugpath, 256);
	model->per_arch[STARPU_CORE_DEFAULT].debug_file = fopen(debugpath, "a+");
	STARPU_ASSERT(model->per_arch[STARPU_CORE_DEFAULT].debug_file);

	get_model_debug_path(model, "gordon", debugpath, 256);
	model->per_arch[STARPU_GORDON_DEFAULT].debug_file = fopen(debugpath, "a+");
	STARPU_ASSERT(model->per_arch[STARPU_GORDON_DEFAULT].debug_file);
#endif

	return;
}

static void get_model_path(struct starpu_perfmodel_t *model, char *path, size_t maxlen)
{
	strncpy(path, PERF_MODEL_DIR, maxlen);
	strncat(path, model->symbol, maxlen);
	
	char hostname[32];
	gethostname(hostname, 32);
	strncat(path, ".", maxlen);
	strncat(path, hostname, maxlen);
}

void save_history_based_model(struct starpu_perfmodel_t *model)
{
	STARPU_ASSERT(model);
	STARPU_ASSERT(model->symbol);

	/* TODO checks */

	/* filename = $PERF_MODEL_DIR/symbol.hostname */
	char path[256];
	get_model_path(model, path, 256);

#ifdef VERBOSE
	fprintf(stderr, "Opening performance model file %s for model %s\n", path, model->symbol);
#endif

	/* overwrite existing file, or create it */
	FILE *f;
	f = fopen(path, "w+");
	STARPU_ASSERT(f);

	dump_model_file(f, model);

	fclose(f);

#ifdef DEBUG_MODEL
	fclose(model->gordon_debug_file);
	fclose(model->cuda_debug_file);
	fclose(model->core_debug_file);
#endif
}

void dump_registered_models(void)
{
	struct starpu_model_list_t *node;
	node = registered_models;

#ifdef VERBOSE
	fprintf(stderr, "DUMP MODELS !\n");
#endif

	while (node) {
		save_history_based_model(node->model);		
		node = node->next;

		/* XXX free node */
	}
}

static int directory_existence_was_tested = 0;

static void create_sampling_directory_if_needed(void)
{
	/* Testing if a directory exists and creating it otherwise 
	   may not be safe: it is possible that the permission are
	   changed in between. Instead, we create it and check if
	   it already existed before */
	int ret;
	ret = mkdir(PERF_MODEL_DIR, S_IRWXU);
	if (ret == -1)
	{
		STARPU_ASSERT(errno == EEXIST);

		/* make sure that it is actually a directory */
		struct stat sb;
		stat(PERF_MODEL_DIR, &sb);
		STARPU_ASSERT(S_ISDIR(sb.st_mode));
	}
}

void load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history)
{
	STARPU_ASSERT(model);
	STARPU_ASSERT(model->symbol);

	unsigned have_to_load;
	have_to_load = __sync_bool_compare_and_swap (&model->is_loaded, 
				STARPU_PERFMODEL_NOT_LOADED,
				STARPU_PERFMODEL_LOADING);
	if (!have_to_load)
	{
		/* someone is already loading the model, we wait until it's finished */
		while (model->is_loaded != STARPU_PERFMODEL_LOADED)
		{
			__sync_synchronize();
		}
		return;
	}
	
	//init_mutex(&model->model_mutex);
	pthread_spin_init(&model->model_mutex, 0);
	pthread_spin_lock(&model->model_mutex);

		/* make sure the performance model directory exists (or create it) */
		if (!directory_existence_was_tested)
		{
			create_sampling_directory_if_needed();
			directory_existence_was_tested = 1;
		}

		/*
		 * We need to keep track of all the model that were opened so that we can 
		 * possibly update them at runtime termination ...
		 */
		register_model(model);
	
		char path[256];
		get_model_path(model, path, 256);
	
#ifdef VERBOSE
		fprintf(stderr, "Opening performance model file %s for model %s\n", path, model->symbol);
#endif
	
		/* try to open an existing file and load it */
		int res;
		res = access(path, F_OK); 
		if (res == 0) {
		//	fprintf(stderr, "File exists !\n");
	
			FILE *f;
			f = fopen(path, "r");
			STARPU_ASSERT(f);
	
			parse_model_file(f, model, scan_history);
	
			fclose(f);
		}
		else {
			//fprintf(stderr, "File does not exists !\n");
			initialize_model(model);
		}
	
	
		if (starpu_get_env_number("CALIBRATE") != -1)
		{
			fprintf(stderr, "CALIBRATE model %s\n", model->symbol);
			model->benchmarking = 1;
		}
		else {
			model->benchmarking = 0;
		}
	
		model->is_loaded = STARPU_PERFMODEL_LOADED;

	pthread_spin_unlock(&model->model_mutex);
}

/* This function is intended to be used by external tools that should read the
 * performance model files */
int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel_t *model)
{
	model->symbol = symbol;

	/* where is the file if it exists ? */
	char path[256];
	get_model_path(model, path, 256);

//	fprintf(stderr, "get_model_path -> %s\n", path);

	/* does it exist ? */
	int res;
	res = access(path, F_OK);
	if (res) {
		fprintf(stderr, "There is no performance model for symbol %s\n", symbol);
		return 1;
	}

	FILE *f = fopen(path, "r");
	STARPU_ASSERT(f);

	parse_model_file(f, model, 1);

	return 0;
}

void starpu_perfmodel_debugfilepath(struct starpu_perfmodel_t *model,
		enum starpu_perf_archtype arch, char **path, size_t maxlen)
{
	char *archname;

	STARPU_ASSERT(path);

	switch(arch) {
		case STARPU_CORE_DEFAULT:
			archname = "core";
			break;
		case STARPU_CUDA_DEFAULT:
			archname = "cuda";
			break;
		case STARPU_GORDON_DEFAULT:
			archname = "gordon";
			break;
		default:
			/* unknown architecture */
			*path = NULL;
			return;
	}

	get_model_debug_path(model, archname, *path, maxlen);
}

double regression_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)
{
	double exp = -1.0;
	size_t size = job_get_data_size(j);
	struct starpu_regression_model_t *regmodel;

	if (STARPU_UNLIKELY(model->is_loaded != STARPU_PERFMODEL_LOADED))
		load_history_based_model(model, 0);

	regmodel = &model->per_arch[arch].regression;

	if (regmodel->valid)
		exp = regmodel->a*pow(size, regmodel->b) + regmodel->c;

	return exp;
}

double history_based_job_expected_length(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct job_s *j)
{
	double exp;
	struct starpu_per_arch_perfmodel_t *per_arch_model;
	struct starpu_history_entry_t *entry;
	struct starpu_htbl32_node_s *history;

	if (STARPU_UNLIKELY(model->is_loaded != STARPU_PERFMODEL_LOADED))
		load_history_based_model(model, 1);

	if (STARPU_UNLIKELY(!j->footprint_is_computed))
		compute_buffers_footprint(j);
		
	uint32_t key = j->footprint;

	per_arch_model = &model->per_arch[arch];

	history = per_arch_model->history;
	if (!history)
		return -1.0;

	pthread_spin_lock(&model->model_mutex);
	entry = htbl_search_32(history, key);
	pthread_spin_unlock(&model->model_mutex);

	exp = entry?entry->mean:-1.0;

	return exp;
}

void update_perfmodel_history(job_t j, enum starpu_perf_archtype arch, unsigned cpuid, double measured)
{
	struct starpu_perfmodel_t *model = j->task->cl->model;

	if (model)
	{
		struct starpu_per_arch_perfmodel_t *per_arch_model = &model->per_arch[arch];

		if (model->type == HISTORY_BASED || model->type == REGRESSION_BASED)
		{
			uint32_t key = j->footprint;
			struct starpu_history_entry_t *entry;

			struct starpu_htbl32_node_s *history;
			struct starpu_htbl32_node_s **history_ptr;
			struct starpu_regression_model_t *reg_model;

			struct starpu_history_list_t **list;


			history = per_arch_model->history;
			history_ptr = &per_arch_model->history;
			reg_model = &per_arch_model->regression;
			list = &per_arch_model->list;

			pthread_spin_lock(&model->model_mutex);
	
				entry = htbl_search_32(history, key);
	
				if (!entry)
				{
					/* this is the first entry with such a footprint */
					entry = malloc(sizeof(struct starpu_history_entry_t));
					STARPU_ASSERT(entry);
						entry->mean = measured;
						entry->sum = measured;
	
						entry->deviation = 0.0;
						entry->sum2 = measured*measured;
	
						entry->size = job_get_data_size(j);
	
						entry->footprint = key;
						entry->nsample = 1;
	
					insert_history_entry(entry, list, history_ptr);
	
				}
				else {
					/* there is already some entry with the same footprint */
					entry->sum += measured;
					entry->sum2 += measured*measured;
					entry->nsample++;
	
					unsigned n = entry->nsample;
					entry->mean = entry->sum / n;
					entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
				}
			
				STARPU_ASSERT(entry);
			
			/* update the regression model as well */
			double logy, logx;
			logx = logl(entry->size);
			logy = logl(measured);

			reg_model->sumlnx += logx;
			reg_model->sumlnx2 += logx*logx;
			reg_model->sumlny += logy;
			reg_model->sumlnxlny += logx*logy;
			reg_model->nsample++;

			unsigned n = reg_model->nsample;
			
			double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
			double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);

			reg_model->beta = num/denom;
			reg_model->alpha = expl((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n);
			
			pthread_spin_unlock(&model->model_mutex);
		}

#ifdef MODEL_DEBUG
		FILE * debug_file = per_arch_model->debug_file;

		pthread_spin_lock(&model->model_mutex);

		STARPU_ASSERT(j->footprint_is_computed);

		fprintf(debug_file, "%x\t%d\t%lf\t%lf\t%d\t\t", j->footprint, job_get_data_size(j), measured, j->predicted, cpuid);
		unsigned i;
			
		struct starpu_task *task = j->task;
		for (i = 0; i < task->cl->nbuffers; i++)
		{
			data_state *state = task->buffers[i].handle;

			STARPU_ASSERT(state->ops);
			STARPU_ASSERT(state->ops->display);
			state->ops->display(state, debug_file);
		}
		fprintf(debug_file, "\n");	


		pthread_spin_unlock(&model->model_mutex);
#endif
	}
}