Browse Source

Rewrite the tool to display performance models based on regression.

Cédric Augonnet 14 years ago
parent
commit
0e31e57c71

+ 23 - 1
include/starpu_util.h

@@ -23,6 +23,7 @@
 #include <string.h>
 #include <assert.h>
 #include <starpu_config.h>
+#include <starpu_perfmodel.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -161,9 +162,17 @@ static __inline int starpu_get_env_number(const char *str)
 void starpu_trace_user_event(unsigned long code);
 
 #define STARPU_FXT_MAX_FILES	64
+
+struct starpu_fxt_codelet_event {
+	char symbol[256]; /* name of the codelet */
+	int workerid;
+	uint32_t hash;
+	size_t size;
+	float time;
+};
+
 struct starpu_fxt_options {
 	unsigned per_task_colour;
-	unsigned generate_distrib;
 	unsigned no_counter;
 	unsigned no_bus;
 	unsigned ninputfiles;
@@ -171,12 +180,25 @@ struct starpu_fxt_options {
 	char *out_paje_path;
 	char *distrib_time_path;
 	char *activity_path;
+	char *dag_path;
 
 	/* In case we are going to gather multiple traces (eg in the case of
 	 * MPI processes), we may need to prefix the name of the containers. */
 	char *file_prefix;
 	uint64_t file_offset;
 	int file_rank;
+
+	/*
+	 *	Output parameters
+	 */
+
+	char worker_names[STARPU_NMAXWORKERS][256]; 
+	enum starpu_perf_archtype worker_archtypes[STARPU_NMAXWORKERS];
+	int nworkers;
+
+	/* In case we want to dump the list of codelets to an external tool */
+	struct starpu_fxt_codelet_event **dumped_codelets;
+	long dumped_codelets_count;
 };
 
 void starpu_fxt_options_init(struct starpu_fxt_options *options);

+ 27 - 8
src/core/perfmodel/perfmodel.c

@@ -72,14 +72,6 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel_t *model, enum
 	double exp = -1.0;
 	double (*per_arch_cost_model)(struct starpu_buffer_descr_t *);
 	
-	if (!model->is_loaded)
-	{
-		model->benchmarking = _starpu_get_calibrate_flag();
-		
-		_starpu_register_model(model);
-		model->is_loaded = 1;
-	}
-
 	per_arch_cost_model = model->per_arch[arch].cost_model;
 
 	if (per_arch_cost_model)
@@ -134,6 +126,33 @@ static double common_task_expected_perf(struct starpu_perfmodel_t *model, enum s
 	return -1.0;
 }
 
+void _starpu_load_perfmodel(struct starpu_perfmodel_t *model)
+{
+	if (!model || model->is_loaded)
+		return;
+
+	switch (model->type) {
+		case STARPU_PER_ARCH:
+		case STARPU_COMMON:
+			break;
+
+		case STARPU_HISTORY_BASED:
+			_starpu_load_history_based_model(model, 1);
+			break;
+
+		case STARPU_NL_REGRESSION_BASED:
+		case STARPU_REGRESSION_BASED:
+			_starpu_load_history_based_model(model, 0);
+			break;
+
+		default:
+			STARPU_ABORT();
+	}
+
+	_starpu_register_model(model);
+	model->is_loaded = 1;
+}
+
 static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch)
 {
 	if (model) {

+ 2 - 0
src/core/perfmodel/perfmodel.h

@@ -93,6 +93,8 @@ void _starpu_get_perf_model_dir_debug(char *path, size_t maxlen);
 
 double _starpu_history_based_job_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j);
 void _starpu_register_model(struct starpu_perfmodel_t *model);
+void _starpu_load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history);
+void _starpu_load_perfmodel(struct starpu_perfmodel_t *model);
 void _starpu_initialize_registered_performance_models(void);
 void _starpu_deinitialize_registered_performance_models(void);
 

+ 1 - 7
src/core/perfmodel/perfmodel_history.c

@@ -392,7 +392,7 @@ void _starpu_deinitialize_registered_performance_models(void)
  * was loaded or not (this is very likely to have been already loaded). If the
  * model was not loaded yet, we take the lock in write mode, and if the model
  * is still not loaded once we have the lock, we do load it.  */
-static void load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history)
+void _starpu_load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history)
 {
 
 	STARPU_ASSERT(model);
@@ -608,8 +608,6 @@ double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel_t *mod
 	size_t size = _starpu_job_get_data_size(j);
 	struct starpu_regression_model_t *regmodel;
 
-	load_history_based_model(model, 0);
-
 	regmodel = &model->per_arch[arch].regression;
 
 	if (regmodel->valid)
@@ -624,8 +622,6 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 	size_t size = _starpu_job_get_data_size(j);
 	struct starpu_regression_model_t *regmodel;
 
-	load_history_based_model(model, 0);
-
 	regmodel = &model->per_arch[arch].regression;
 
 	if (regmodel->nl_valid)
@@ -641,8 +637,6 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel_t *model,
 	struct starpu_history_entry_t *entry;
 	struct starpu_htbl32_node_s *history;
 
-	load_history_based_model(model, 1);
-
 	uint32_t key = _starpu_compute_buffers_footprint(j);
 
 	per_arch_model = &model->per_arch[arch];

+ 5 - 1
src/core/task.c

@@ -250,6 +250,9 @@ int starpu_task_submit(struct starpu_task *task)
                 }
 
 		_starpu_detect_implicit_data_deps(task);
+
+		if (task->cl->model)
+			_starpu_load_perfmodel(task->cl->model);
 	}
 
 	/* If profiling is activated, we allocate a structure to store the
@@ -262,7 +265,8 @@ int starpu_task_submit(struct starpu_task *task)
 	/* The task is considered as block until we are sure there remains not
 	 * dependency. */
 	task->status = STARPU_TASK_BLOCKED;
-	
+
+
 	if (profiling)
 		starpu_clock_gettime(&info->submit_time);
 

+ 126 - 37
src/debug/traces/starpu_fxt.c

@@ -175,7 +175,8 @@ static void update_accumulated_time(int worker, double sleep_time, double exec_t
 	double elapsed = current_timestamp - last_activity_flush_timestamp[worker];
 	if (forceflush || (elapsed > ACTIVITY_PERIOD))
 	{		
-		fprintf(activity_file, "%d\t%lf\t%lf\t%lf\t%lf\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]);
+		if (activity_file)
+			fprintf(activity_file, "%d\t%lf\t%lf\t%lf\t%lf\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]);
 
 		/* reset the accumulated times */
 		last_activity_flush_timestamp[worker] = current_timestamp;
@@ -192,10 +193,13 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 {
 	char *prefix = options->file_prefix;
 
-	fprintf(out_paje_file, "7       %f	%"PRIu64"      Mn      %sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), ev->param[0], prefix, options->file_prefix, ev->param[0]);
-
-	if (!options->no_bus)
-		fprintf(out_paje_file, "13       %f bw %sMEMNODE%"PRIu64" 0.0\n", 0.0f, prefix, ev->param[0]);
+	if (out_paje_file)
+	{
+		fprintf(out_paje_file, "7       %f	%"PRIu64"      Mn      %sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), ev->param[0], prefix, options->file_prefix, ev->param[0]);
+	
+		if (!options->no_bus)
+			fprintf(out_paje_file, "13       %f bw %sMEMNODE%"PRIu64" 0.0\n", 0.0f, prefix, ev->param[0]);
+	}
 }
 
 static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -207,6 +211,7 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 	*/
 	char *prefix = options->file_prefix;
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "7       %f	%s%"PRIu64"      T      %sMEMNODE%"PRIu64"       %s%"PRIu64"\n",
 		get_event_time_stamp(ev, options), prefix, ev->param[3], prefix, ev->param[2], prefix, ev->param[3]);
 
@@ -214,6 +219,7 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 	int workerid = register_worker_id(ev->param[3]);
 
 	char *kindstr = "";
+	enum starpu_perf_archtype archtype = 0;
 
 	switch (ev->param[0]) {
 		case STARPU_FUT_APPS_KEY:
@@ -223,30 +229,39 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 		case STARPU_FUT_CPU_KEY:
 			set_next_cpu_worker_color(workerid);
 			kindstr = "cpu";
+			archtype = STARPU_CPU_DEFAULT;
 			break;
 		case STARPU_FUT_CUDA_KEY:
 			set_next_cuda_worker_color(workerid);
 			kindstr = "cuda";
+			archtype = STARPU_CUDA_DEFAULT + devid;
 			break;
 		case STARPU_FUT_OPENCL_KEY:
 			set_next_opencl_worker_color(workerid);
 			kindstr = "opencl";
+			archtype = STARPU_OPENCL_DEFAULT + devid;
 			break;
 		default:
 			STARPU_ABORT();
 	}
 
 	/* start initialization */
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f     S      %s%"PRIu64"      I\n",
 			get_event_time_stamp(ev, options), prefix, ev->param[3]);
 
+	if (activity_file)
 	fprintf(activity_file, "name\t%d\t%s %d\n", workerid, kindstr, devid);
+
+	snprintf(options->worker_names[workerid], 256, "%s %d", kindstr, devid);
+	options->worker_archtypes[workerid] = archtype;
 }
 
 static void handle_worker_init_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	char *prefix = options->file_prefix;
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f     S      %s%"PRIu64"      B\n",
 			get_event_time_stamp(ev, options), prefix, ev->param[0]);
 
@@ -261,6 +276,7 @@ static void handle_worker_deinit_start(struct fxt_ev_64 *ev, struct starpu_fxt_o
 {
 	char *prefix = options->file_prefix;
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f     S      %s%"PRIu64"      D\n",
 			get_event_time_stamp(ev, options), prefix, ev->param[0]);
 }
@@ -269,6 +285,7 @@ static void handle_worker_deinit_end(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 {
 	char *prefix = options->file_prefix;
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "8       %f	%s%"PRIu64"	T\n",
 			get_event_time_stamp(ev, options), prefix, ev->param[1]);
 }
@@ -316,6 +333,7 @@ static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options
 	}
 
 	/* create the Paje state */
+	if (out_paje_file)
 	fprintf(out_paje_file, "6       %s       S       %s \"%f %f %f\" \n", name, name, red, green, blue);
 }
 
@@ -339,9 +357,13 @@ static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_op
 
 	create_paje_state_if_not_found(name, options);
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n", start_codelet_time, prefix, ev->param[1], name);
 }
 
+static long dumped_codelets_count;
+static struct starpu_fxt_codelet_event *dumped_codelets;
+
 static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	int worker;
@@ -355,15 +377,28 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 	size_t codelet_size = ev->param[1];
 	uint32_t codelet_hash = ev->param[2];
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", end_codelet_time, prefix, ev->param[3]);
 
 	float codelet_length = (end_codelet_time - last_codelet_start[worker]);
 
 	update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
 	
-	if (options->generate_distrib)
+	if (distrib_time)
 	fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx64"\t%f\n", last_codelet_symbol[worker],
 				prefix, worker, codelet_size, codelet_hash, codelet_length);
+
+	if (options->dumped_codelets)
+	{
+		dumped_codelets_count++;
+		dumped_codelets = realloc(dumped_codelets, dumped_codelets_count*sizeof(struct starpu_fxt_codelet_event));
+
+		snprintf(dumped_codelets[dumped_codelets_count - 1].symbol, 256, "%s", last_codelet_symbol[worker]);
+		dumped_codelets[dumped_codelets_count - 1].workerid = worker;
+		dumped_codelets[dumped_codelets_count - 1].size = codelet_size;
+		dumped_codelets[dumped_codelets_count - 1].hash = codelet_hash;
+		dumped_codelets[dumped_codelets_count - 1].time = codelet_length;
+	}
 }
 
 static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -376,9 +411,11 @@ static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *o
 	worker = find_worker_id(ev->param[1]);
 	if (worker < 0)
 	{
+		if (out_paje_file)
 		fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
 	}
 	else {
+		if (out_paje_file)
 		fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
 	}
 }
@@ -387,7 +424,10 @@ static void handle_start_callback(struct fxt_ev_64 *ev, struct starpu_fxt_option
 {
 	int worker;
 	worker = find_worker_id(ev->param[1]);
-	if (worker < 0) return;
+	if (worker < 0)
+		return;
+
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
 }
 
@@ -395,7 +435,10 @@ static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options
 {
 	int worker;
 	worker = find_worker_id(ev->param[1]);
-	if (worker < 0) return;
+	if (worker < 0)
+		return;
+
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
 }
 
@@ -403,8 +446,10 @@ static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options
 {
 	int worker;
 	worker = find_worker_id(ev->param[1]);
-	if (worker < 0) return;
+	if (worker < 0)
+		return;
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
 				get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
 }
@@ -420,6 +465,7 @@ static void handle_start_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *
 	float start_sleep_time = get_event_time_stamp(ev, options);
 	last_sleep_start[worker] = start_sleep_time;
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
 				get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
 }
@@ -432,6 +478,7 @@ static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 
 	float end_sleep_timestamp = get_event_time_stamp(ev, options);
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
 				end_sleep_timestamp, options->file_prefix, ev->param[0]);
 
@@ -455,8 +502,11 @@ static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
 	if (!options->no_bus)
 	{
-		fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      Co\n", get_event_time_stamp(ev, options), prefix, dst);
-		fprintf(out_paje_file, "18       %f	L      %sp	%u	%sMEMNODE%u	com_%u\n", get_event_time_stamp(ev, options), prefix, size, prefix, src, comid);
+		if (out_paje_file)
+		{
+			fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      Co\n", get_event_time_stamp(ev, options), prefix, dst);
+			fprintf(out_paje_file, "18       %f	L      %sp	%u	%sMEMNODE%u	com_%u\n", get_event_time_stamp(ev, options), prefix, size, prefix, src, comid);
+		}
 
 		/* create a structure to store the start of the communication, this will be matched later */
 		communication_t com = communication_new();
@@ -482,8 +532,11 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
 	if (!options->no_bus)
 	{
-		fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      No\n", get_event_time_stamp(ev, options), prefix, dst);
-		fprintf(out_paje_file, "19       %f	L      %sp	%u	%sMEMNODE%u	com_%u\n", get_event_time_stamp(ev, options), prefix, size, prefix, dst, comid);
+		if (out_paje_file)
+		{
+			fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      No\n", get_event_time_stamp(ev, options), prefix, dst);
+			fprintf(out_paje_file, "19       %f	L      %sp	%u	%sMEMNODE%u	com_%u\n", get_event_time_stamp(ev, options), prefix, size, prefix, dst, comid);
+		}
 
 		/* look for a data transfer to match */
 		communication_itor_t itor;
@@ -517,6 +570,7 @@ static void handle_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options
 {
 	unsigned memnode = ev->param[0];
 
+	if (out_paje_file)
 	fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      %s\n",
 		get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr);
 }
@@ -532,10 +586,10 @@ static void handle_job_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 
 	curq_size++;
 
-	if (!options->no_counter)
+	if (!options->no_counter && out_paje_file)
 		fprintf(out_paje_file, "13       %f ntask %ssched %f\n", current_timestamp, options->file_prefix, (float)curq_size);
 
-
+	if (activity_file)
 	fprintf(activity_file, "cnt_ready\t%lf\t%d\n", current_timestamp, curq_size);
 }
 
@@ -545,9 +599,10 @@ static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *opti
 
 	curq_size--;
 
-	if (!options->no_counter)
+	if (!options->no_counter && out_paje_file)
 		fprintf(out_paje_file, "13       %f ntask %ssched %f\n", current_timestamp, options->file_prefix, (float)curq_size);
 
+	if (activity_file)
 	fprintf(activity_file, "cnt_ready\t%lf\t%d\n", current_timestamp, curq_size);
 }
 
@@ -556,6 +611,7 @@ void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 {
 	float current_timestamp = get_event_time_stamp(ev, options);
 	unsigned long nsubmitted = ev->param[0]; 
+	if (activity_file)
 	fprintf(activity_file, "cnt_submitted\t%lf\t%lu\n", current_timestamp, nsubmitted);
 }
 
@@ -643,6 +699,7 @@ static void handle_mpi_barrier(struct fxt_ev_64 *ev, struct starpu_fxt_options *
 	STARPU_ASSERT(rank == options->file_rank);
 
 	/* Add an event in the trace */
+	if (out_paje_file)
 	fprintf(out_paje_file, "9       %f     event      %sp      %d\n", get_event_time_stamp(ev, options), options->file_prefix, rank);
 }
 
@@ -669,6 +726,7 @@ static void handle_set_profiling(struct fxt_ev_64 *ev, struct starpu_fxt_options
 {
 	int status = ev->param[0];
 
+	if (activity_file)
 	fprintf(activity_file, "set_profiling\t%lf\t%d\n", get_event_time_stamp(ev, options), status);
 }
 
@@ -691,10 +749,12 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 		itor = communication_list_next(itor))
 	{
 		current_bandwidth += itor->bandwidth;
+		if (out_paje_file)
 		fprintf(out_paje_file, "13  %f bw %sMEMNODE0 %f\n",
 				itor->comm_start, prefix, current_bandwidth);
 
 		current_bandwidth_per_node[itor->node] +=  itor->bandwidth;
+		if (out_paje_file)
 		fprintf(out_paje_file, "13  %f bw %sMEMNODE%u %f\n",
 				itor->comm_start, prefix, itor->node, current_bandwidth_per_node[itor->node]);
 	}
@@ -735,12 +795,15 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 
 	/* TODO starttime ...*/
 	/* create the "program" container */
-	fprintf(out_paje_file, "7      0.0 %sp      P      MPIroot       program%s \n", prefix, prefix);
-	/* create a variable with the number of tasks */
-	if (!options->no_counter)
+	if (out_paje_file)
 	{
-		fprintf(out_paje_file, "7     %f    %ssched   Sc    %sp     scheduler \n", 0.0, prefix, prefix);
-		fprintf(out_paje_file, "13    0.0    ntask %ssched 0.0\n", prefix);
+		fprintf(out_paje_file, "7      0.0 %sp      P      MPIroot       program%s \n", prefix, prefix);
+		/* create a variable with the number of tasks */
+		if (!options->no_counter)
+		{
+			fprintf(out_paje_file, "7     %f    %ssched   Sc    %sp     scheduler \n", 0.0, prefix, prefix);
+			fprintf(out_paje_file, "13    0.0    ntask %ssched 0.0\n", prefix);
+		}
 	}
 
 	struct fxt_ev_64 ev;
@@ -932,61 +995,86 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 void starpu_fxt_options_init(struct starpu_fxt_options *options)
 {
 	options->per_task_colour = 0;
-	options->generate_distrib = 0;
 	options->no_counter = 0;
 	options->no_bus = 0;
 	options->ninputfiles = 0;
 	options->out_paje_path = "paje.trace";
+	options->dag_path = "dag.dot";
 	options->distrib_time_path = "distrib.data";
+	options->dumped_codelets = NULL;
 	options->activity_path = "activity.data";
-
 }
 
 static
 void starpu_fxt_distrib_file_init(struct starpu_fxt_options *options)
 {
-	if (options->generate_distrib)
+	dumped_codelets_count = 0;
+	dumped_codelets = NULL;
+
+	if (options->distrib_time_path)
+	{
 		distrib_time = fopen(options->distrib_time_path, "w+");
+	}
+	else {
+		distrib_time = NULL;
+	}
 }
 
 static
 void starpu_fxt_distrib_file_close(struct starpu_fxt_options *options)
 {
-	if (options->generate_distrib)
+	if (distrib_time)
 		fclose(distrib_time);
+
+	if (options->dumped_codelets)
+	{
+		*options->dumped_codelets = dumped_codelets;
+		options->dumped_codelets_count = dumped_codelets_count;
+	}
 }
 
 static
 void starpu_fxt_activity_file_init(struct starpu_fxt_options *options)
 {
-	activity_file = fopen(options->activity_path, "w+");
+	if (options->activity_path)
+		activity_file = fopen(options->activity_path, "w+");
+	else
+		activity_file = NULL;
 }
 
 static
 void starpu_fxt_activity_file_close(void)
 {
-	fclose(activity_file);
+	if (activity_file)
+		fclose(activity_file);
 }
 
 static
 void starpu_fxt_paje_file_init(struct starpu_fxt_options *options)
 {
 	/* create a new file */
-	out_paje_file = fopen(options->out_paje_path, "w+");
-	if (!out_paje_file)
+	if (options->out_paje_path)
 	{
-		fprintf(stderr,"error while opening %s\n", options->out_paje_path);
-		perror("fopen");
-		exit(1);
-	}
+		out_paje_file = fopen(options->out_paje_path, "w+");
+		if (!out_paje_file)
+		{
+			fprintf(stderr,"error while opening %s\n", options->out_paje_path);
+			perror("fopen");
+			exit(1);
+		}
 
-	starpu_fxt_write_paje_header(out_paje_file);
+		starpu_fxt_write_paje_header(out_paje_file);
+	}
+	else {
+		out_paje_file = NULL;
+	}
 }
 
 static
 void starpu_fxt_paje_file_close(void)
 {
-	fclose(out_paje_file);
+	if (out_paje_file)
+		fclose(out_paje_file);
 }
 
 static uint64_t starpu_fxt_find_start_time(char *filename_in)
@@ -1025,8 +1113,7 @@ static uint64_t starpu_fxt_find_start_time(char *filename_in)
 
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 {
-	starpu_fxt_dag_init("dag.dot");
-
+	starpu_fxt_dag_init(options->dag_path);
 	starpu_fxt_distrib_file_init(options);
 	starpu_fxt_activity_file_init(options);
 
@@ -1153,5 +1240,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 	starpu_fxt_distrib_file_close(options);
 
 	starpu_fxt_dag_terminate();
+
+	options->nworkers = nworkers;
 }
 #endif // STARPU_USE_FXT

+ 16 - 1
src/debug/traces/starpu_fxt_dag.c

@@ -27,6 +27,12 @@ static unsigned cluster_cnt;
 
 void starpu_fxt_dag_init(char *out_path)
 {
+	if (!out_path)
+	{
+		out_file = NULL;
+		return;
+	}
+
 	/* create a new file */
 	out_file = fopen(out_path, "w+");
 	if (!out_file) {
@@ -47,6 +53,9 @@ void starpu_fxt_dag_init(char *out_path)
 
 void starpu_fxt_dag_terminate(void)
 {
+	if (!out_file)
+		return;
+
 	/* Close the last cluster */
 	fprintf(out_file, "}\n");
 	/* Close the graph */
@@ -56,29 +65,35 @@ void starpu_fxt_dag_terminate(void)
 
 void starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father)
 {
+	if (out_file)
 	fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n", 
 		(unsigned long long)father, (unsigned long long)child);
 }
 
 void starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ)
 {
+	if (out_file)
 	fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
 } 
 
 void starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color)
 {
-
+	if (out_file)
 	fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n", 
 		(unsigned long long)tag, color);
 }
 
 void starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color)
 {
+	if (out_file)
 	fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
 }
 
 void starpu_fxt_dag_add_sync_point(void)
 {
+	if (!out_file)
+		return;
+
 	/* Close the previous cluster */
 	fprintf(out_file, "}\n");
 

+ 5 - 2
src/debug/traces/starpu_fxt_mpi.c

@@ -209,8 +209,11 @@ static void display_all_transfers_from_trace(FILE *out_paje_file, int src)
 
 			unsigned long id = mpi_com_id++;
 			/* TODO replace 0 by a MPI program ? */
-			fprintf(out_paje_file, "18	%f	MPIL	MPIroot   %ld	mpi_%d_p	mpicom_%lu\n", start_date, size, /* XXX */src, id);
-			fprintf(out_paje_file, "19	%f	MPIL	MPIroot	  %ld	mpi_%d_p	mpicom_%lu\n", end_date, size, /* XXX */dst, id);
+			if (out_paje_file)
+			{
+				fprintf(out_paje_file, "18	%f	MPIL	MPIroot   %ld	mpi_%d_p	mpicom_%lu\n", start_date, size, /* XXX */src, id);
+				fprintf(out_paje_file, "19	%f	MPIL	MPIroot	  %ld	mpi_%d_p	mpicom_%lu\n", end_date, size, /* XXX */dst, id);
+			}
 		}
 		else
 		{

+ 1 - 1
src/drivers/driver_common/driver_common.c

@@ -36,7 +36,7 @@ void _starpu_driver_update_job_feedback(starpu_job_t j, struct starpu_worker_s *
 	int profiling = starpu_profiling_status_get();
 	int updated = 0;
 
-	if (cl->model && cl->model->benchmarking)
+	if (cl->model && _starpu_get_calibrate_flag())
 		calibrate_model = 1;
 
 	if (profiling_info || calibrate_model)

+ 19 - 19
tests/perfmodels/non_linear_regression_based.c

@@ -57,15 +57,19 @@ static void test_memset(int nelems)
 
 	starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int));
 
-	struct starpu_task *task = starpu_task_create();
-
-	task->cl = &memset_cl;
-	task->buffers[0].handle = handle;
-	task->buffers[0].mode = STARPU_W;
-	task->synchronous = 1;
-
-	int ret = starpu_task_submit(task);
-	assert(!ret);
+	int nloops = 200;
+	int loop;
+	for (loop = 0; loop < nloops; loop++)
+	{
+		struct starpu_task *task = starpu_task_create();
+	
+		task->cl = &memset_cl;
+		task->buffers[0].handle = handle;
+		task->buffers[0].mode = STARPU_W;
+	
+		int ret = starpu_task_submit(task);
+		assert(!ret);
+	} 
 
 	starpu_data_unregister(handle);
 }
@@ -75,21 +79,17 @@ int main(int argc, char **argv)
 	struct starpu_conf conf;
 	starpu_conf_init(&conf);
 
-	conf.sched_policy_name = "dm";
+	conf.sched_policy_name = "greedy";
 	conf.calibrate = 1;
 
 	starpu_init(&conf);
 
-	int nloops = 32;
-	int loop, slog;
-	for (loop = 0; loop < nloops; loop++)
+	int slog;
+	for (slog = 8; slog < 25; slog++)
 	{
-		for (slog = 8; slog < 25; slog++)
-		{
-			int size = 1 << slog;
-			test_memset(size);
-		}
-	} 
+		int size = 1 << slog;
+		test_memset(size);
+	}
 
 	starpu_shutdown();
 

+ 2 - 2
tests/perfmodels/regression_based.c

@@ -71,7 +71,7 @@ static starpu_codelet nl_memset_cl =
 
 static void test_memset(int nelems, starpu_codelet *codelet)
 {
-	int nloops = 20;
+	int nloops = 100;
 	int loop;
 	starpu_data_handle handle;
 
@@ -96,7 +96,7 @@ int main(int argc, char **argv)
 	struct starpu_conf conf;
 	starpu_conf_init(&conf);
 
-	conf.sched_policy_name = "dm";
+	conf.sched_policy_name = "greedy";
 	conf.calibrate = 1;
 
 	starpu_init(&conf);

+ 3 - 1
tools/Makefile.am

@@ -44,8 +44,10 @@ endif
 bin_PROGRAMS +=	starpu_perfmodel_display
 starpu_perfmodel_display_SOURCES = starpu_perfmodel_display.c
 
-bin_PROGRAMS += starpu_machine_display
+bin_PROGRAMS +=	starpu_regression_display
+starpu_regression_display_SOURCES = starpu_regression_display.c
 
+bin_PROGRAMS += starpu_machine_display
 starpu_machine_display_SOURCES = starpu_machine_display.c
 
 noinst_PROGRAMS =	cbc2paje lp2paje

+ 0 - 6
tools/starpu_fxt_tool.c

@@ -62,12 +62,6 @@ static void parse_args(int argc, char **argv)
 			continue;
 		}
 
-		if (strcmp(argv[i], "-d") == 0) {
-			options.generate_distrib = 1;
-			reading_input_filenames = 0;
-			continue;
-		}
-
 		if (strcmp(argv[i], "-h") == 0) {
 		        fprintf(stderr, "Usage : %s [-c] [-no-counter] [-no-bus] [-i input_filename] [-o output_filename]\n", argv[0]);
 			fprintf(stderr, "\t-c: use a different colour for every type of task.\n");

+ 290 - 0
tools/starpu_regression_display.c

@@ -0,0 +1,290 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2011  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <assert.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <starpu.h>
+#include <starpu_perfmodel.h>
+#include <core/perfmodel/perfmodel.h> // we need to browse the list associated to history-based models
+
+#ifdef __MINGW32__
+#include <windows.h>
+#endif
+
+static struct starpu_perfmodel_t model;
+
+/* what kernel ? */
+static char *symbol = NULL;
+/* which architecture ? (NULL = all)*/
+static char *arch = NULL;
+
+static struct starpu_fxt_codelet_event *dumped_codelets;
+static long dumped_codelets_count;
+
+static struct starpu_fxt_options options;
+
+static int archtype_is_found[STARPU_NARCH_VARIATIONS];
+static long dumped_per_archtype_count[STARPU_NARCH_VARIATIONS];
+
+static char data_file_name[256];
+static char gnuplot_file_name[256];
+
+static void usage(char **argv)
+{
+	fprintf(stderr, "Usage: %s [ options ]\n", argv[0]);
+        fprintf(stderr, "\n");
+        fprintf(stderr, "One must specify a symbol with the -s option\n");
+        fprintf(stderr, "Options:\n");
+        fprintf(stderr, "   -s <symbol>         specify the symbol\n");
+	fprintf(stderr, "   -i <Fxt files>      input FxT files generated by StarPU\n");
+        fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda, gordon)\n");
+        fprintf(stderr, "\n");
+}
+
+static void parse_args(int argc, char **argv)
+{
+	/* Default options */
+	starpu_fxt_options_init(&options);
+
+	options.out_paje_path = NULL;
+	options.activity_path = NULL;
+	options.distrib_time_path = NULL;
+	options.dag_path = NULL;
+
+	options.dumped_codelets = &dumped_codelets;
+
+	/* We want to support arguments such as "-i trace_*" */
+	unsigned reading_input_filenames = 0;
+
+	int i;
+	for (i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "-s") == 0) {
+			symbol = argv[++i];
+			continue;
+		}
+
+		if (strcmp(argv[i], "-i") == 0) {
+			options.filenames[options.ninputfiles++] = argv[++i];
+			reading_input_filenames = 1;
+			continue;
+		}
+
+		if (strcmp(argv[i], "-a") == 0) {
+			arch = argv[++i];
+			continue;
+		}
+
+		if (strcmp(argv[i], "-h") == 0) {
+			usage(argv);
+		        exit(-1);
+		}
+
+		/* If the reading_input_filenames flag is set, and that the
+		 * argument does not match an option, we assume this may be
+		 * another filename */
+		if (reading_input_filenames)
+		{
+			options.filenames[options.ninputfiles++] = argv[i];
+			continue;
+		}
+	}
+}
+
+static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, int *first)
+{
+	if (!archtype_is_found[arch])
+		return;
+
+	if (*first)
+	{
+		*first = 0;
+	}
+	else {
+		fprintf(gnuplot_file, ",\\\n\t");
+	}
+
+	char arch_name[256];
+	starpu_perfmodel_get_arch_name(arch, arch_name, 256);
+
+	fprintf(gnuplot_file, "\"< grep -w \\^%d %s\" using 2:3 title \"%s\"", arch, data_file_name, arch_name);
+
+	struct starpu_per_arch_perfmodel_t *arch_model = &model->per_arch[arch];
+
+	/* Only display the regression model if we could actually build a model */
+	if (arch_model->regression.valid)
+	{
+		fprintf(stderr, "\tLinear: y = alpha size ^ beta\n");
+		fprintf(stderr, "\t\talpha = %le\n", arch_model->regression.alpha * 0.001);
+		fprintf(stderr, "\t\tbeta = %le\n", arch_model->regression.beta);
+
+		fprintf(gnuplot_file, ",\\\n\t");
+		fprintf(gnuplot_file, "0.001 * %f * x ** %f title \"Linear Regression %s\"",
+			arch_model->regression.alpha, arch_model->regression.beta, arch_name);
+	}
+
+	if (arch_model->regression.nl_valid)
+	{
+		fprintf(stderr, "\tNon-Linear: y = a size ^b + c\n");
+		fprintf(stderr, "\t\ta = %le\n", arch_model->regression.a * 0.001);
+		fprintf(stderr, "\t\tb = %le\n", arch_model->regression.b);
+		fprintf(stderr, "\t\tc = %le\n", arch_model->regression.c * 0.001);
+
+		fprintf(gnuplot_file, ",\\\n\t");
+		fprintf(gnuplot_file, "0.001 * %f * x ** %f + 0.001 * %f title \"Non-Linear Regression %s\"",
+			arch_model->regression.a, arch_model->regression.b,  arch_model->regression.c, arch_name);
+	}
+}
+
+static void dump_data_file(FILE *data_file)
+{
+	memset(archtype_is_found, 0, STARPU_NARCH_VARIATIONS*sizeof(int));
+
+	int i;
+	for (i = 0; i < options.dumped_codelets_count; i++)
+	{
+		/* Dump only if the symbol matches user's request */
+		if (strcmp(dumped_codelets[i].symbol, symbol) == 0) {
+			int workerid = dumped_codelets[i].workerid;
+			enum starpu_perf_archtype archtype = options.worker_archtypes[workerid];
+			archtype_is_found[archtype] = 1;
+
+			size_t size = dumped_codelets[i].size;
+			float time = dumped_codelets[i].time;
+
+			fprintf(data_file, "%d	%f	%f\n", archtype, (float)size, time);
+		}
+	}
+}
+
+static void display_selected_models(FILE *gnuplot_file, struct starpu_perfmodel_t *model)
+{
+	fprintf(gnuplot_file, "#!/usr/bin/gnuplot -persist\n");
+	fprintf(gnuplot_file, "\n");
+	fprintf(gnuplot_file, "set term postscript eps enhanced color\n");
+	fprintf(gnuplot_file, "set output \"regression_%s.eps\"\n", symbol);
+	fprintf(gnuplot_file, "set title \"Model for codelet %s\"\n", symbol);
+	fprintf(gnuplot_file, "set xlabel \"Size\"\n");
+	fprintf(gnuplot_file, "set ylabel \"Time\"\n");
+	fprintf(gnuplot_file, "\n");
+	fprintf(gnuplot_file, "set logscale x\n");
+	fprintf(gnuplot_file, "set logscale y\n");
+	fprintf(gnuplot_file, "\n");
+
+	int first = 1;
+	fprintf(gnuplot_file, "plot\t");
+
+	if (arch == NULL)
+	{
+		/* display all architectures */
+		unsigned archid;
+		for (archid = 0; archid < STARPU_NARCH_VARIATIONS; archid++)
+			display_perf_model(gnuplot_file, model, archid, &first);
+	}
+	else {
+		if (strcmp(arch, "cpu") == 0) {
+			display_perf_model(gnuplot_file, model, STARPU_CPU_DEFAULT, &first);
+			return;
+		}
+
+		int k;
+		if (sscanf(arch, "cpu:%d", &k) == 1)
+		{
+			/* For combined CPU workers */
+			if ((k < 1) || (k > STARPU_NMAXCPUS))
+			{
+				fprintf(stderr, "Invalid CPU size\n");
+				exit(-1);
+			}
+
+			display_perf_model(gnuplot_file, model, STARPU_CPU_DEFAULT + k - 1, &first);
+			return;
+		}
+
+		if (strcmp(arch, "cuda") == 0) {
+			unsigned archid;
+			for (archid = STARPU_CUDA_DEFAULT; archid < STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS; archid++)
+			{
+				char archname[32];
+				starpu_perfmodel_get_arch_name(archid, archname, 32);
+				display_perf_model(gnuplot_file, model, archid, &first);
+			}
+			return;
+		}
+
+		/* There must be a cleaner way ! */
+		int gpuid;
+		int nmatched;
+		nmatched = sscanf(arch, "cuda_%d", &gpuid);
+		if (nmatched == 1)
+		{
+			unsigned archid = STARPU_CUDA_DEFAULT+ gpuid;
+			display_perf_model(gnuplot_file, model, archid, &first);
+			return;
+		}
+
+		if (strcmp(arch, "gordon") == 0) {
+			display_perf_model(gnuplot_file, model, STARPU_GORDON_DEFAULT, &first);
+			return;
+		}
+
+		fprintf(stderr, "Unknown architecture requested, aborting.\n");
+		exit(-1);
+	}
+}
+
+int main(int argc, char **argv)
+{
+#ifdef __MINGW32__
+	WSADATA wsadata;
+	WSAStartup(MAKEWORD(1,0), &wsadata);
+#endif
+
+	parse_args(argc, argv);
+
+	/* We need at least a symbol name */
+	if (!symbol)
+	{
+		fprintf(stderr, "No symbol was specified\n");
+		return 1;
+	}
+
+	/* Load the performance model associated to the symbol */
+	int ret = starpu_load_history_debug(symbol, &model);
+	if (ret == 1)
+	{
+		fprintf(stderr, "The performance model could not be loaded\n");
+		return 1;
+	}
+
+	starpu_fxt_generate_trace(&options);
+
+	snprintf(data_file_name, 256, "starpu_%s.data", symbol);
+	snprintf(gnuplot_file_name, 256, "starpu_%s.gp", symbol);
+
+	FILE *data_file = fopen(data_file_name, "w+");
+	STARPU_ASSERT(data_file);
+	dump_data_file(data_file);
+	fclose(data_file);
+
+	FILE *gnuplot_file = fopen(gnuplot_file_name, "w+");
+	STARPU_ASSERT(gnuplot_file);
+	display_selected_models(gnuplot_file, &model);
+	fclose(gnuplot_file);
+
+	return 0;
+}