Thibaud Lambert лет назад: 11
Родитель
Сommit
3afcf39929

+ 3 - 0
ChangeLog

@@ -49,6 +49,9 @@ Small features:
     to enable or disable sequential consistency
   * New configure option --enable-fxt-lock which enables additional
     trace events focused on locks behaviour during the execution
+  * New function starpu_perfmodel_directory() to print directory
+    storing performance models. Available through the new option -d of
+    the tool starpu_perfmodel_display
 
 Changes:
   * Fix of the livelock issue discovered while executing applications

+ 4 - 0
doc/doxygen/chapters/api/performance_model.doxy

@@ -235,6 +235,10 @@ returns the architecture type of a given worker.
 \ingroup API_Performance_Model
 prints a list of all performance models on \p output
 
+\fn int starpu_perfmodel_directory(FILE *output)
+\ingroup API_Performance_Model
+prints the directory name storing performance models on \p output
+
 \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, enum starpu_perfmodel_archtype arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
 \ingroup API_Performance_Model
 todo

+ 6 - 0
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -129,6 +129,12 @@ blocked)
 \ingroup API_Scheduling_Contexts
 Return 1 if the worker belongs to the context and 0 otherwise
 
+\fn unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id)
+\ingroup API_Scheduling_Contexts
+Return the workerid if the worker belongs to the context and -1 otherwise.
+If the thread calling this function is not a worker the function returns -1
+as it calls the function \ref starpu_worker_get_id()
+
 \fn unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 \ingroup API_Scheduling_Contexts
 Check if a worker is shared between several contexts

+ 5 - 0
doc/doxygen/chapters/api/scheduling_policy.doxy

@@ -162,4 +162,9 @@ Whether \ref STARPU_PREFETCH was set
 \ingroup API_Scheduling_Policy
 Prefetch data for a given task on a given node
 
+\fn void starpu_sched_ctx_worker_shares_tasks_lists (int workerid, int sched_ctx_id)
+\ingroup API_Scheduling_Policy
+The scheduling policies indicates if the worker may pop tasks from the list of other workers
+or if there is a central list with task for all the workers
+
 */

+ 8 - 1
doc/doxygen/chapters/environment_variables.doxy

@@ -599,8 +599,15 @@ the speed of the other contexts, but only by the the value that a context should
 By default the values of the speed of the workers is printed during the execution
 of the application. If the value 1 is given to this environment variable this printing
 is not done.
-
 </dd>
 
+<dt>SC_HYPERVISOR_LAZY_RESIZE</dt>
+<dd>
+\anchor SC_HYPERVISOR_LAZY_RESIZE
+\addindex __env__SC_HYPERVISOR_LAZY_RESIZE
+By default the hypervisor resizes the contexts in a lazy way, that is workers are firstly added to a new context
+before removing them from the previous one. Once this workers are clearly taken into account 
+into the new context (a task was poped there) we remove them from the previous one. However if the application
+would like that the change in the distribution of workers should change right away this variable should be set to 0
 </dl>
 */

+ 2 - 2
examples/sched_ctx/sched_ctx.c

@@ -76,8 +76,8 @@ int main(int argc, char **argv)
 #endif
 
 	/*create contexts however you want*/
-	unsigned sched_ctx1 = starpu_sched_ctx_create("dmda", procs1, nprocs1, "ctx1");
-	unsigned sched_ctx2 = starpu_sched_ctx_create("dmda", procs2, nprocs2, "ctx2");
+	unsigned sched_ctx1 = starpu_sched_ctx_create("eager", procs1, nprocs1, "ctx1");
+	unsigned sched_ctx2 = starpu_sched_ctx_create("eager", procs2, nprocs2, "ctx2");
 
 	/*indicate what to do with the resources when context 2 finishes (it depends on your application)*/
 	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);

+ 1 - 0
include/starpu_perfmodel.h

@@ -149,6 +149,7 @@ void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmo
 int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output);
 
 void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured);
+void starpu_perfmodel_directory(FILE *output);
 
 void starpu_bus_print_bandwidth(FILE *f);
 void starpu_bus_print_affinity(FILE *f);

+ 2 - 0
include/starpu_sched_ctx.h

@@ -60,6 +60,8 @@ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id);
 
 unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id);
 
+unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id);
+
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid);
 
 int starpu_sched_get_min_priority(void);

+ 4 - 0
include/starpu_scheduler.h

@@ -51,6 +51,10 @@ struct starpu_sched_policy **starpu_sched_get_predefined_policies();
 
 void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond);
 
+/* This function must be called to wake up a worker that is sleeping on the cond. 
+ * It returns 0 whenever the worker is not in a sleeping state */
+int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
+
 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl);
 
 int starpu_push_local_task(int workerid, struct starpu_task *task, int back);

+ 1 - 0
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -17,6 +17,7 @@
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <unistd.h>
 #include <string.h>
 #include <time.h>
 #include <math.h>

+ 3 - 3
sc_hypervisor/src/policies_utils/lp_tools.c

@@ -347,7 +347,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 				
 				if(nw_move > 0)
 				{
-					sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, 0);
+					sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize()));
 					nw_move = 0;
 				}
 
@@ -367,7 +367,7 @@ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rou
 		_lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, 
 					   &nw_move, workers_move);
 		if(nw_move > 0)
-			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], 0);
+			sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
 	}
 }
 int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove)
@@ -475,7 +475,7 @@ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns,
 			sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]);
 			int workers_remove[STARPU_NMAXWORKERS];
 			int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove);
-			sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], 0);
+			sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize()));
 			sc_hypervisor_start_resize(sched_ctxs[s]);
 		}
 

+ 6 - 0
sc_hypervisor/src/sc_hypervisor.c

@@ -953,6 +953,12 @@ int sc_hypervisor_get_nsched_ctxs()
 	return ns;
 }
 
+int _sc_hypervisor_use_lazy_resize(void)
+{
+	char* lazy = getenv("SC_HYPERVISOR_LAZY_RESIZE");
+	return lazy ? atof(lazy)  : 1;
+}
+
 void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers)
 {
 	hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request));

+ 3 - 0
sc_hypervisor/src/sc_hypervisor_intern.h

@@ -120,4 +120,7 @@ void _remove_config(unsigned sched_ctx);
 double _get_max_speed_gap();
 
 double _get_optimal_v(unsigned sched_ctx);
+
 void _set_optimal_v(unsigned sched_ctx, double optimal_v);
+
+int _sc_hypervisor_use_lazy_resize(void);

+ 3 - 1
src/core/errorcheck.h

@@ -35,7 +35,9 @@ enum _starpu_worker_status
 	/* during the execution of the callback */
 	STATUS_CALLBACK,
 	/* while sleeping because there is nothing to do */
-	STATUS_SLEEPING
+	STATUS_SLEEPING,
+	/* while a sleeping worker is about to wake up (to avoid waking twice for the same worker) */
+	STATUS_WAKING_UP
 };
 
 /* Specify what the local worker is currently doing (eg. executing a callback).

+ 8 - 1
src/core/perfmodel/perfmodel_history.c

@@ -519,7 +519,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 			arch.devid = devid;
 			fprintf(f, "###############\n");
 			fprintf(f, "# %s_%u\n", name, devid); 
-			fprintf(f, "# number of workers on %s_%d devices\n", name, devid);
+			fprintf(f, "# number of workers on device %s_%d\n", name, devid);
 			if(ncore != NULL)
 				fprintf(f, "%u\n", ncore[devid]);
 			else
@@ -1028,6 +1028,13 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 }
 
+void starpu_perfmodel_directory(FILE *output)
+{
+	char perf_model_dir[256];
+	_starpu_get_perf_model_dir(perf_model_dir, 256);
+	fprintf(output, "directory: <%s>\n", perf_model_dir);
+}
+
 /* This function is intended to be used by external tools that should read
  * the performance model files */
 int starpu_perfmodel_list(FILE *output)

+ 0 - 5
src/core/perfmodel/perfmodel_print.c

@@ -236,12 +236,7 @@ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char
 			{
 				perf_arch.devid = devid;
 				for (implid = 0; implid <STARPU_MAXIMPLEMENTATIONS; implid ++)
-				{
-					char archname[32];
-					starpu_perfmodel_get_arch_name(&perf_arch, archname, 32, implid);
-					fprintf(output, "performance model for %s\n", archname);
 					starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output);
-				}
 			}
 			return 0;
 		}

+ 8 - 0
src/core/sched_ctx.c

@@ -1018,6 +1018,14 @@ unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_
 	}
 	return 0;
 }
+unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id)
+{
+	int workerid = starpu_worker_get_id();
+	if(workerid != -1)
+		if(starpu_sched_ctx_contains_worker(workerid, sched_ctx_id))
+			return workerid;
+	return -1;
+}
 		 
 unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid)
 {

+ 15 - 3
src/core/sched_policy.c

@@ -543,8 +543,8 @@ struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker
 	for (l = worker->sched_ctx_list; l; l = l->next)
 	{
 		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
-		if(worker->removed_from_ctx[sched_ctx->id])
-			return sched_ctx;
+/* 		if(worker->removed_from_ctx[sched_ctx->id]) */
+/* 			return sched_ctx; */
 		if(sched_ctx->pop_counter[worker->workerid] < worker->nsched_ctxs &&
 		   smallest_counter > sched_ctx->pop_counter[worker->workerid])
 		{
@@ -587,8 +587,14 @@ pick:
 	if(!task)
 	{		
 		struct _starpu_sched_ctx *sched_ctx ;
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+		int been_here[STARPU_NMAX_SCHED_CTXS];
+		int i;
+		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
+			been_here[i] = 0;
 
-		if(!task)
+		while(!task)
+#endif
 		{
 			if(worker->nsched_ctxs == 1)
 				sched_ctx = _starpu_get_initial_sched_ctx();
@@ -608,7 +614,13 @@ pick:
 				_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
 				worker->removed_from_ctx[sched_ctx->id] = 0;
 			}
+#ifndef STARPU_NON_BLOCKING_DRIVERS
+			if((!task && sched_ctx->pop_counter[worker->workerid] == 0 && been_here[sched_ctx->id]) || worker->nsched_ctxs == 1)
+				break;
 
+
+			been_here[sched_ctx->id] = 1;
+#endif
 			sched_ctx->pop_counter[worker->workerid]++;
 		}
 	  }

+ 15 - 0
src/core/workers.c

@@ -1523,6 +1523,21 @@ void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sc
 	*sched_mutex = &config.workers[workerid].sched_mutex;
 }
 
+int starpu_wakeup_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex)
+{
+	int success = 0;
+	STARPU_PTHREAD_MUTEX_LOCK(mutex);
+	if (config.workers[workerid].status == STATUS_SLEEPING)
+	{
+		config.workers[workerid].status = STATUS_WAKING_UP;
+		STARPU_PTHREAD_COND_SIGNAL(cond);
+		success = 1;
+	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
+	return success;
+}
+
+
 int starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize)
 {
 	unsigned nworkers = starpu_worker_get_count();

+ 4 - 4
src/drivers/driver_common/driver_common.c

@@ -150,11 +150,11 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	}
 }
 
-
-
 static void _starpu_worker_set_status_sleeping(int workerid)
 {
-	if (_starpu_worker_get_status(workerid) != STATUS_SLEEPING)
+	if ( _starpu_worker_get_status(workerid) == STATUS_WAKING_UP)
+		_starpu_worker_set_status(workerid, STATUS_SLEEPING);
+	else if (_starpu_worker_get_status(workerid) != STATUS_SLEEPING)
 	{
 		_STARPU_TRACE_WORKER_SLEEP_START;
 		_starpu_worker_restart_sleeping(workerid);
@@ -165,7 +165,7 @@ static void _starpu_worker_set_status_sleeping(int workerid)
 
 static void _starpu_worker_set_status_wakeup(int workerid)
 {
-	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING)
+	if (_starpu_worker_get_status(workerid) == STATUS_SLEEPING || _starpu_worker_get_status(workerid) == STATUS_WAKING_UP)
 	{
 		_STARPU_TRACE_WORKER_SLEEP_END;
 		_starpu_worker_stop_sleeping(workerid);

+ 3 - 3
src/sched_policies/eager_central_policy.c

@@ -89,9 +89,9 @@ static int push_task_eager_policy(struct starpu_task *task)
 		starpu_pthread_mutex_t *sched_mutex;
 		starpu_pthread_cond_t *sched_cond;
 		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-		STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-		STARPU_PTHREAD_COND_SIGNAL(sched_cond);
-		STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
+
+		if (starpu_wakeup_worker(worker, sched_cond, sched_mutex))
+		    break; // wake up a single worker
 	}
 
 	return ret_val;

+ 16 - 3
tools/starpu_perfmodel_display.c

@@ -32,6 +32,8 @@
 
 /* display all available models */
 static int plist = 0;
+/* display directory */
+static int pdirectory = 0;
 /* what kernel ? */
 static char *psymbol = NULL;
 /* what parameter should be displayed ? (NULL = all) */
@@ -54,6 +56,7 @@ static void usage()
         fprintf(stderr, "   -p <parameter>      specify the parameter (e.g. a, b, c, mean, stddev)\n");
         fprintf(stderr, "   -a <arch>           specify the architecture (e.g. cpu, cpu:k, cuda)\n");
 	fprintf(stderr, "   -f <footprint>      display the history-based model for the specified footprint\n");
+	fprintf(stderr, "   -d                  display the directory storing performance models\n");
 	fprintf(stderr, "   -h, --help          display this help and exit\n");
 	fprintf(stderr, "   -v, --version       output version information and exit\n\n");
         fprintf(stderr, "Reports bugs to <"PACKAGE_BUGREPORT">.");
@@ -71,6 +74,7 @@ static void parse_args(int argc, char **argv)
 		{"help",      no_argument,       NULL, 'h'},
 		/* XXX Would be cleaner to set a flag */
 		{"list",      no_argument,       NULL, 'l'},
+		{"dir",       no_argument,       NULL, 'd'},
 		{"parameter", required_argument, NULL, 'p'},
 		{"symbol",    required_argument, NULL, 's'},
 		{"version",   no_argument,       NULL, 'v'},
@@ -78,7 +82,7 @@ static void parse_args(int argc, char **argv)
 	};
 
 	int option_index;
-	while ((c = getopt_long(argc, argv, "ls:p:a:f:h", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dls:p:a:f:h", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -108,6 +112,11 @@ static void parse_args(int argc, char **argv)
 			sscanf(optarg, "%08x", &pspecific_footprint);
 			break;
 
+		case 'd':
+			/* directory */
+			pdirectory = 1;
+			break;
+
 		case 'h':
 			usage();
 			exit(EXIT_SUCCESS);
@@ -122,7 +131,7 @@ static void parse_args(int argc, char **argv)
 		}
 	}
 
-	if (!psymbol && !plist)
+	if (!psymbol && !plist && !pdirectory)
 	{
 		fprintf(stderr, "Incorrect usage, aborting\n");
                 usage();
@@ -144,7 +153,11 @@ int main(int argc, char **argv)
 	{
                 starpu_perfmodel_list(stdout);
         }
-        else
+        else if (pdirectory)
+	{
+		starpu_perfmodel_directory(stdout);
+	}
+	else
 	{
 		struct starpu_perfmodel model;
                 int ret = starpu_perfmodel_load_symbol(psymbol, &model);