Browse Source

mic (perfmodels): merge trunk

Thibaud Lambert 11 years ago
parent
commit
3afcf39929

+ 3 - 0
ChangeLog

@@ -49,6 +49,9 @@ Small features:
     to enable or disable sequential consistency
     to enable or disable sequential consistency
   * New configure option --enable-fxt-lock which enables additional
   * New configure option --enable-fxt-lock which enables additional
     trace events focused on locks behaviour during the execution
     trace events focused on locks behaviour during the execution
+  * New function starpu_perfmodel_directory() to print directory
+    storing performance models. Available through the new option -d of
+    the tool starpu_perfmodel_display
 
 
 Changes:
 Changes:
   * Fix of the livelock issue discovered while executing applications
   * Fix of the livelock issue discovered while executing applications

+ 4 - 0
doc/doxygen/chapters/api/performance_model.doxy

@@ -235,6 +235,10 @@ returns the architecture type of a given worker.
 \ingroup API_Performance_Model
 \ingroup API_Performance_Model
 prints a list of all performance models on \p output
 prints a list of all performance models on \p output
 
 
+\fn int starpu_perfmodel_directory(FILE *output)
+\ingroup API_Performance_Model
+prints the directory name storing performance models on \p output
+
 \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
 \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
 \ingroup API_Performance_Model
 \ingroup API_Performance_Model
 todo
 todo

+ 6 - 0
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -129,6 +129,12 @@ blocked)
 \ingroup API_Scheduling_Contexts
 \ingroup API_Scheduling_Contexts
 Return 1 if the worker belongs to the context and 0 otherwise
 Return 1 if the worker belongs to the context and 0 otherwise
 
 
+\fn unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id)
+\ingroup API_Scheduling_Contexts
+Return the workerid if the worker belongs to the context and -1 otherwise.
+If the thread calling this function is not a worker the function returns -1
+as it calls the function \ref starpu_worker_get_id()
+
 \fn unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 \fn unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 \ingroup API_Scheduling_Contexts
 \ingroup API_Scheduling_Contexts
 Check if a worker is shared between several contexts
 Check if a worker is shared between several contexts

+ 5 - 0
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -162,4 +162,9 @@ Whether \ref STARPU_PREFETCH was set
 \ingroup API_Scheduling_Policy
 \ingroup API_Scheduling_Policy
 Prefetch data for a given task on a given node
 Prefetch data for a given task on a given node
 
 
+\fn void starpu_sched_ctx_worker_shares_tasks_lists (int workerid, int sched_ctx_id)
+\ingroup API_Scheduling_Policy
+The scheduling policies indicates if the worker may pop tasks from the list of other workers
+or if there is a central list with task for all the workers
+
 */
 */

+ 8 - 1
doc/doxygen/chapters/environment_variables.doxy

@@ -599,8 +599,15 @@ the speed of the other contexts, but only by the the value that a context should
 By default the values of the speed of the workers is printed during the execution
 By default the values of the speed of the workers is printed during the execution
 of the application. If the value 1 is given to this environment variable this printing
 of the application. If the value 1 is given to this environment variable this printing
 is not done.
 is not done.
-
 </dd>
 </dd>
 
 
+<dt>SC_HYPERVISOR_LAZY_RESIZE</dt>
+<dd>
+\anchor SC_HYPERVISOR_LAZY_RESIZE
+\addindex __env__SC_HYPERVISOR_LAZY_RESIZE
+By default the hypervisor resizes the contexts in a lazy way, that is workers are firstly added to a new context
+before removing them from the previous one. Once this workers are clearly taken into account 
+into the new context (a task was poped there) we remove them from the previous one. However if the application
+would like that the change in the distribution of workers should change right away this variable should be set to 0
 </dl>
 </dl>
 */
 */

+ 2 - 2
examples/sched_ctx/sched_ctx.c

@@ -76,8 +76,8 @@ int main(int argc, char **argv)
 #endif
 #endif
 
 
 	/*create contexts however you want*/
 	/*create contexts however you want*/
-	unsigned sched_ctx1 = starpu_sched_ctx_create("dmda", procs1, nprocs1, "ctx1");
-	unsigned sched_ctx2 = starpu_sched_ctx_create("dmda", procs2, nprocs2, "ctx2");
+	unsigned sched_ctx1 = starpu_sched_ctx_create("eager", procs1, nprocs1, "ctx1");
+	unsigned sched_ctx2 = starpu_sched_ctx_create("eager", procs2, nprocs2, "ctx2");
 
 
 	/*indicate what to do with the resources when context 2 finishes (it depends on your application)*/
 	/*indicate what to do with the resources when context 2 finishes (it depends on your application)*/
 	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);
 	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);

+ 1 - 0
include/starpu_perfmodel.h

@@ -149,6 +149,7 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
 int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
 
 
 void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured);
 void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured);
+void starpu_perfmodel_directory(FILE *output);
 
 
 void starpu_bus_print_bandwidth(FILE *f);
 void starpu_bus_print_bandwidth(FILE *f);
 void starpu_bus_print_affinity(FILE *f);
 void starpu_bus_print_affinity(FILE *f);

+ 2 - 0
include/starpu_sched_ctx.h

@@ -60,6 +60,8 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id);
 
 
 unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id);
 unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id);
 
 
+unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id);
+
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid);
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid);
 
 
 int starpu_sched_get_min_priority(void);
 int starpu_sched_get_min_priority(void);

+ 4 - 0
include/starpu_scheduler.h

@@ -51,6 +51,10 @@ struct starpu_sched_policy **starpu_sched_get_predefined_policies();
 
 
 void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond);
 void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond);
 
 
+/* This function must be called to wake up a worker that is sleeping on the cond. 
+ * It returns 0 whenever the worker is not in a sleeping state */
+int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
+
 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
 
 
 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);
 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);

+ 1 - 0
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -17,6 +17,7 @@
 
 
 #include <stdlib.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdio.h>
+#include <unistd.h>
 #include <string.h>
 #include <string.h>
 #include <time.h>
 #include <time.h>
 #include <math.h>
 #include <math.h>

+ 3 - 3
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -347,7 +347,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 				
 				
 				if(nw_move > 0)
 				if(nw_move > 0)
 				{
 				{
-					sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
+					sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize()));
 					nw_move = 0;
 					nw_move = 0;
 				}
 				}
 
 
@@ -367,7 +367,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
 		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
 					   &nw_move, workers_move);
 					   &nw_move, workers_move);
 		if(nw_move > 0)
 		if(nw_move > 0)
-			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
+			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
 	}
 	}
 }
 }
 int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
 int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
@@ -475,7 +475,7 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 			sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
 			sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
 			int workers_remove[STARPU_NMAXWORKERS];
 			int workers_remove[STARPU_NMAXWORKERS];
 			int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
 			int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
-			sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], 0);
+			sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
 			sc_hypervisor_start_resize(sched_ctxs[s]);
 			sc_hypervisor_start_resize(sched_ctxs[s]);
 		}
 		}
 
 

+ 6 - 0
sc_hypervisor/src/sc_hypervisor.c

@@ -953,6 +953,12 @@ int sc_hypervisor_get_nsched_ctxs()
 	return ns;
 	return ns;
 }
 }
 
 
+int _sc_hypervisor_use_lazy_resize(void)
+{
+	char* lazy = getenv("SC_HYPERVISOR_LAZY_RESIZE");
+	return lazy ? atof(lazy)  : 1;
+}
+
 void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
 {
 	hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request));
 	hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request));

+ 3 - 0
sc_hypervisor/src/sc_hypervisor_intern.h

@@ -120,4 +120,7 @@ void _remove_config(unsigned sched_ctx);
 double _get_max_speed_gap();
 double _get_max_speed_gap();
 
 
 double _get_optimal_v(unsigned sched_ctx);
 double _get_optimal_v(unsigned sched_ctx);
+
 void _set_optimal_v(unsigned sched_ctx, double optimal_v);
 void _set_optimal_v(unsigned sched_ctx, double optimal_v);
+
+int _sc_hypervisor_use_lazy_resize(void);

+ 3 - 1
src/core/errorcheck.h

@@ -35,7 +35,9 @@ enum _starpu_worker_status
 	/* during the execution of the callback */
 	/* during the execution of the callback */
 	STATUS_CALLBACK,
 	STATUS_CALLBACK,
 	/* while sleeping because there is nothing to do */
 	/* while sleeping because there is nothing to do */
-	STATUS_SLEEPING
+	STATUS_SLEEPING,
+	/* while a sleeping worker is about to wake up (to avoid waking twice for the same worker) */
+	STATUS_WAKING_UP
 };
 };
 
 
 /* Specify what the local worker is currently doing (eg. executing a callback).
 /* Specify what the local worker is currently doing (eg. executing a callback).

+ 8 - 1
src/core/perfmodel/perfmodel_history.c

@@ -519,7 +519,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 			arch.devid = devid;
 			arch.devid = devid;
 			fprintf(f, "###############\n");
 			fprintf(f, "###############\n");
 			fprintf(f, "# %s_%u\n", name, devid); 
 			fprintf(f, "# %s_%u\n", name, devid); 
-			fprintf(f, "# number of workers on %s_%d devices\n", name, devid);
+			fprintf(f, "# number of workers on device %s_%d\n", name, devid);
 			if(ncore != NULL)
 			if(ncore != NULL)
 				fprintf(f, "%u\n", ncore[devid]);
 				fprintf(f, "%u\n", ncore[devid]);
 			else
 			else
@@ -1028,6 +1028,13 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 }
 }
 
 
+void starpu_perfmodel_directory(FILE *output)
+{
+	char perf_model_dir[256];
+	_starpu_get_perf_model_dir(perf_model_dir, 256);
+	fprintf(output, "directory: <%s>\n", perf_model_dir);
+}
+
 /* This function is intended to be used by external tools that should read
 /* This function is intended to be used by external tools that should read
  * the performance model files */
  * the performance model files */
 int starpu_perfmodel_list(FILE *output)
 int starpu_perfmodel_list(FILE *output)

+ 0 - 5
src/core/perfmodel/perfmodel_print.c

@@ -236,12 +236,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			{
 			{
 				perf_arch.devid = devid;
 				perf_arch.devid = devid;
 				for (implid = 0; implid <STARPU_MAXIMPLEMENTATIONS; implid ++)
 				for (implid = 0; implid <STARPU_MAXIMPLEMENTATIONS; implid ++)
-				{
-					char archname[32];
-					starpu_perfmodel_get_arch_name(&perf_arch, archname, 32, implid);
-					fprintf(output, "performance model for %s\n", archname);
 					starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
 					starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
-				}
 			}
 			}
 			return 0;
 			return 0;
 		}
 		}

+ 8 - 0
src/core/sched_ctx.c

@@ -1018,6 +1018,14 @@ unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_
 	}
 	}
 	return 0;
 	return 0;
 }
 }
+unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id)
+{
+	int workerid = starpu_worker_get_id();
+	if(workerid != -1)
+		if(starpu_sched_ctx_contains_worker(workerid, sched_ctx_id))
+			return workerid;
+	return -1;
+}
 		 
 		 
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 {
 {

+ 15 - 3
src/core/sched_policy.c

@@ -543,8 +543,8 @@ struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker
 	for (l = worker->sched_ctx_list; l; l = l->next)
 	for (l = worker->sched_ctx_list; l; l = l->next)
 	{
 	{
 		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
 		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
-		if(worker->removed_from_ctx[sched_ctx->id])
-			return sched_ctx;
+/* 		if(worker->removed_from_ctx[sched_ctx->id]) */
+/* 			return sched_ctx; */
 		if(sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
 		if(sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
 		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
 		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
 		{
 		{
@@ -587,8 +587,14 @@ pick:
 	if(!task)
 	if(!task)
 	{		
 	{		
 		struct _starpu_sched_ctx *sched_ctx ;
 		struct _starpu_sched_ctx *sched_ctx ;
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+		int been_here[STARPU_NMAX_SCHED_CTXS];
+		int i;
+		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+			been_here[i] = 0;
 
 
-		if(!task)
+		while(!task)
+#endif
 		{
 		{
 			if(worker->nsched_ctxs == 1)
 			if(worker->nsched_ctxs == 1)
 				sched_ctx = _starpu_get_initial_sched_ctx();
 				sched_ctx = _starpu_get_initial_sched_ctx();
@@ -608,7 +614,13 @@ pick:
 				_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
 				_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
 				worker->removed_from_ctx[sched_ctx->id] = 0;
 				worker->removed_from_ctx[sched_ctx->id] = 0;
 			}
 			}
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+			if((!task && sched_ctx->pop_counter[worker->workerid] == 0 && been_here[sched_ctx->id]) || worker->nsched_ctxs == 1)
+				break;
 
 
+
+			been_here[sched_ctx->id] = 1;
+#endif
 			sched_ctx->pop_counter[worker->workerid]++;
 			sched_ctx->pop_counter[worker->workerid]++;
 		}
 		}
 	  }
 	  }

+ 15 - 0
src/core/workers.c

@@ -1523,6 +1523,21 @@ void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sc
 	*sched_mutex = &config.workers[workerid].sched_mutex;
 	*sched_mutex = &config.workers[workerid].sched_mutex;
 }
 }
 
 
+int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
+{
+	int success = 0;
+	STARPU_PTHREAD_MUTEX_LOCK(mutex);
+	if (config.workers[workerid].status == STATUS_SLEEPING)
+	{
+		config.workers[workerid].status = STATUS_WAKING_UP;
+		STARPU_PTHREAD_COND_SIGNAL(cond);
+		success = 1;
+	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
+	return success;
+}
+
+
 int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
 int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
 {
 {
 	unsigned nworkers = starpu_worker_get_count();
 	unsigned nworkers = starpu_worker_get_count();

+ 4 - 4
src/drivers/driver_common/driver_common.c

@@ -150,11 +150,11 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	}
 	}
 }
 }
 
 
-
-
 static void _starpu_worker_set_status_sleeping(int workerid)
 static void _starpu_worker_set_status_sleeping(int workerid)
 {
 {
-	if (_starpu_worker_get_status(workerid) != STATUS_SLEEPING)
+	if ( _starpu_worker_get_status(workerid) == STATUS_WAKING_UP)
+		_starpu_worker_set_status(workerid, STATUS_SLEEPING);
+	else if (_starpu_worker_get_status(workerid) != STATUS_SLEEPING)
 	{
 	{
 		_STARPU_TRACE_WORKER_SLEEP_START;
 		_STARPU_TRACE_WORKER_SLEEP_START;
 		_starpu_worker_restart_sleeping(workerid);
 		_starpu_worker_restart_sleeping(workerid);
@@ -165,7 +165,7 @@ static void _starpu_worker_set_status_sleeping(int workerid)
 
 
 static void _starpu_worker_set_status_wakeup(int workerid)
 static void _starpu_worker_set_status_wakeup(int workerid)
 {
 {
-	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING)
+	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING || _starpu_worker_get_status(workerid) == STATUS_WAKING_UP)
 	{
 	{
 		_STARPU_TRACE_WORKER_SLEEP_END;
 		_STARPU_TRACE_WORKER_SLEEP_END;
 		_starpu_worker_stop_sleeping(workerid);
 		_starpu_worker_stop_sleeping(workerid);

+ 3 - 3
src/sched_policies/eager_central_policy.c

@@ -89,9 +89,9 @@ static int push_task_eager_policy(struct starpu_task *task)
 		starpu_pthread_mutex_t *sched_mutex;
 		starpu_pthread_mutex_t *sched_mutex;
 		starpu_pthread_cond_t *sched_cond;
 		starpu_pthread_cond_t *sched_cond;
 		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
 		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-		STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-		STARPU_PTHREAD_COND_SIGNAL(sched_cond);
-		STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
+
+		if (starpu_wakeup_worker(worker, sched_cond, sched_mutex))
+		    break; // wake up a single worker
 	}
 	}
 
 
 	return ret_val;
 	return ret_val;

+ 16 - 3
tools/starpu_perfmodel_display.c

@@ -32,6 +32,8 @@
 
 
 /* display all available models */
 /* display all available models */
 static int plist = 0;
 static int plist = 0;
+/* display directory */
+static int pdirectory = 0;
 /* what kernel ? */
 /* what kernel ? */
 static char *psymbol = NULL;
 static char *psymbol = NULL;
 /* what parameter should be displayed ? (NULL = all) */
 /* what parameter should be displayed ? (NULL = all) */
@@ -54,6 +56,7 @@ static void usage()
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
+	fprintf(stderr, "   -d                  display the directory storing performance models\n");
 	fprintf(stderr, "   -h, --help          display this help and exit\n");
 	fprintf(stderr, "   -h, --help          display this help and exit\n");
 	fprintf(stderr, "   -v, --version       output version information and exit\n\n");
 	fprintf(stderr, "   -v, --version       output version information and exit\n\n");
         fprintf(stderr, "Reports bugs to <"PACKAGE_BUGREPORT">.");
         fprintf(stderr, "Reports bugs to <"PACKAGE_BUGREPORT">.");
@@ -71,6 +74,7 @@ static void parse_args(int argc, char **argv)
 		{"help",      no_argument,       NULL, 'h'},
 		{"help",      no_argument,       NULL, 'h'},
 		/* XXX Would be cleaner to set a flag */
 		/* XXX Would be cleaner to set a flag */
 		{"list",      no_argument,       NULL, 'l'},
 		{"list",      no_argument,       NULL, 'l'},
+		{"dir",       no_argument,       NULL, 'd'},
 		{"parameter", required_argument, NULL, 'p'},
 		{"parameter", required_argument, NULL, 'p'},
 		{"symbol",    required_argument, NULL, 's'},
 		{"symbol",    required_argument, NULL, 's'},
 		{"version",   no_argument,       NULL, 'v'},
 		{"version",   no_argument,       NULL, 'v'},
@@ -78,7 +82,7 @@ static void parse_args(int argc, char **argv)
 	};
 	};
 
 
 	int option_index;
 	int option_index;
-	while ((c = getopt_long(argc, argv, "ls:p:a:f:h", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dls:p:a:f:h", long_options, &option_index)) != -1)
 	{
 	{
 		switch (c)
 		switch (c)
 		{
 		{
@@ -108,6 +112,11 @@ static void parse_args(int argc, char **argv)
 			sscanf(optarg, "%08x", &pspecific_footprint);
 			sscanf(optarg, "%08x", &pspecific_footprint);
 			break;
 			break;
 
 
+		case 'd':
+			/* directory */
+			pdirectory = 1;
+			break;
+
 		case 'h':
 		case 'h':
 			usage();
 			usage();
 			exit(EXIT_SUCCESS);
 			exit(EXIT_SUCCESS);
@@ -122,7 +131,7 @@ static void parse_args(int argc, char **argv)
 		}
 		}
 	}
 	}
 
 
-	if (!psymbol && !plist)
+	if (!psymbol && !plist && !pdirectory)
 	{
 	{
 		fprintf(stderr, "Incorrect usage, aborting\n");
 		fprintf(stderr, "Incorrect usage, aborting\n");
                 usage();
                 usage();
@@ -144,7 +153,11 @@ int main(int argc, char **argv)
 	{
 	{
                 starpu_perfmodel_list(stdout);
                 starpu_perfmodel_list(stdout);
         }
         }
-        else
+        else if (pdirectory)
+	{
+		starpu_perfmodel_directory(stdout);
+	}
+	else
 	{
 	{
 		struct starpu_perfmodel model;
 		struct starpu_perfmodel model;
                 int ret = starpu_perfmodel_load_symbol(psymbol, &model);
                 int ret = starpu_perfmodel_load_symbol(psymbol, &model);