12 lat temu · 631853b887
--- a/sc_hypervisor/include/sc_hypervisor_policy.h
+++ b/sc_hypervisor/include/sc_hypervisor_policy.h
@@ -94,12 +94,11 @@ double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper*
 
				 /* compute the velocity of a type of worker in a context depending on its history */ 
			
 
				 double sc_hypervisor_get_ref_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
			
 
				 
			
 
				-/* check if there are contexts a lot more delayed than others */
			
 
				-int sc_hypervisor_has_velocity_gap_btw_ctxs(void);
			
 
				-
			
 
				 /* get the list of workers grouped by type */
			
 
				 void sc_hypervisor_group_workers_by_type(int *workers, int nworkers, int ntypes_of_workers, int total_nw[ntypes_of_workers]);
			
 
				 
			
 
				+/* check if we trigger resizing or not */
			
 
				+unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/debit_lp_policy.c
@@ -232,7 +232,7 @@ static void debit_lp_handle_poped_task(unsigned sched_ctx, int worker, struct st
 
				 	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 	if(ret != EBUSY)
			
 
				 	{
			
 
				-		if(sc_hypervisor_has_velocity_gap_btw_ctxs())
			
 
				+		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
			
 
				 		{
			
 
				 			int ns = sc_hypervisor_get_nsched_ctxs();
			
 
				 			int nw = starpu_worker_get_count(); /* Number of different workers */
			
--- a/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c
@@ -21,7 +21,7 @@
 
				 #ifdef STARPU_HAVE_GLPK_H
			
 
				 static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint)
			
 
				 {
			
 
				-	if(sc_hypervisor_has_velocity_gap_btw_ctxs())
			
 
				+	if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
			
 
				 	{
			
 
				 		int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
			
 
				 
			
--- a/sc_hypervisor/src/hypervisor_policies/idle_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/idle_policy.c
@@ -30,9 +30,7 @@ unsigned worker_belong_to_other_sched_ctx(unsigned sched_ctx, int worker)
 
				 
			
 
				 void idle_handle_idle_cycle(unsigned sched_ctx, int worker)
			
 
				 {
			
 
				-	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				-	struct sc_hypervisor_policy_config *config = sc_w->config;
			
 
				-	if(config != NULL &&  sc_w->current_idle_time[worker] > config->max_idle[worker])
			
 
				+	if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
			
 
				 	{
			
 
				 		if(worker_belong_to_other_sched_ctx(sched_ctx, worker))
			
 
				 			sc_hypervisor_remove_workers_from_sched_ctx(&worker, 1, sched_ctx, 1);
			
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c
@@ -334,7 +334,7 @@ static void ispeed_lp_handle_poped_task(unsigned sched_ctx, int worker, struct s
 
				 	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 	if(ret != EBUSY)
			
 
				 	{
			
 
				-		if(sc_hypervisor_has_velocity_gap_btw_ctxs())
			
 
				+		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
			
 
				 		{
			
 
				 			int ns = sc_hypervisor_get_nsched_ctxs();
			
 
				 			int nw = starpu_worker_get_count(); /* Number of different workers */
			
--- a/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c
@@ -146,7 +146,7 @@ static void ispeed_handle_poped_task(unsigned sched_ctx, int worker, struct star
 
				 	int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex);
			
 
				 	if(ret != EBUSY)
			
 
				 	{
			
 
				-		if(sc_hypervisor_has_velocity_gap_btw_ctxs())
			
 
				+		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
			
 
				 		{
			
 
				 			unsigned fastest_sched_ctx = _get_fastest_sched_ctx();
			
 
				 			unsigned slowest_sched_ctx = _get_slowest_sched_ctx();
			
--- a/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
+++ b/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c
@@ -168,7 +168,7 @@ static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct sta
 
				 			return;
			
 
				 		}
			
 
				 
			
 
				-		if(sc_hypervisor_has_velocity_gap_btw_ctxs())
			
 
				+		if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker))
			
 
				 		{
			
 
				 			int ns = sc_hypervisor_get_nsched_ctxs();
			
 
				 			int nw = starpu_worker_get_count(); /* Number of different workers */
			
--- a/sc_hypervisor/src/policies_utils/policy_tools.c
+++ b/sc_hypervisor/src/policies_utils/policy_tools.c
@@ -16,7 +16,7 @@
 
				 
			
 
				 
			
 
				 #include "sc_hypervisor_policy.h"
			
 
				-
			
 
				+#include "sc_hypervisor_intern.h"
			
 
				 static int _compute_priority(unsigned sched_ctx)
			
 
				 {
			
 
				 	struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx);
			
@@ -526,7 +526,7 @@ double sc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper*
 
				 
			
 
				 
			
 
				 /* check if there is a big velocity gap between the contexts */
			
 
				-int sc_hypervisor_has_velocity_gap_btw_ctxs()
			
 
				+unsigned _check_velocity_gap_btw_ctxs()
			
 
				 {
			
 
				 	int *sched_ctxs = sc_hypervisor_get_sched_ctxs();
			
 
				 	int nsched_ctxs = sc_hypervisor_get_nsched_ctxs();
			
@@ -554,7 +554,7 @@ int sc_hypervisor_has_velocity_gap_btw_ctxs()
 
				 					{
			
 
				 						double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v ;
			
 
				 //						if(gap > 1.5)
			
 
				-						if(gap > 3.0)
			
 
				+						if(gap > _get_max_velocity_gap())
			
 
				 							return 1;
			
 
				 					}
			
 
				 				}
			
@@ -628,3 +628,25 @@ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *wo
 
				         }
			
 
				 }
			
 
				 
			
 
				+static unsigned _check_idle(unsigned sched_ctx, int worker)
			
 
				+{
			
 
				+	struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx);
			
 
				+	struct sc_hypervisor_policy_config *config = sc_w->config;
			
 
				+	if(config != NULL &&  sc_w->current_idle_time[worker] > config->max_idle[worker])
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker)
			
 
				+{
			
 
				+	unsigned criteria = _get_resize_criteria();
			
 
				+	if(criteria != SC_NOTHING)
			
 
				+	{
			
 
				+		if(criteria == SC_IDLE)
			
 
				+			return _check_idle(sched_ctx, worker);
			
 
				+		else
			
 
				+			return _check_velocity_gap_btw_ctxs();
			
 
				+	}
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
--- a/sc_hypervisor/src/sc_hypervisor.c
+++ b/sc_hypervisor/src/sc_hypervisor.c
@@ -133,6 +133,11 @@ struct starpu_sched_ctx_performance_counters* sc_hypervisor_init(struct sc_hyper
 
				 {
			
 
				 	hypervisor.min_tasks = 0;
			
 
				 	hypervisor.nsched_ctxs = 0;
			
 
				+	char* vel_gap = getenv("MAX_VELOCITY_GAP");
			
 
				+	hypervisor.max_velocity_gap = vel_gap ? atof(vel_gap) : SC_VELOCITY_MAX_GAP_DEFAULT;
			
 
				+	char* crit =  getenv("HYPERVISOR_TRIGGER_RESIZE");
			
 
				+	hypervisor.resize_criteria = strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING);
			
 
				+
			
 
				 	starpu_pthread_mutex_init(&act_hypervisor_mutex, NULL);
			
 
				 	hypervisor.start_executing_time = starpu_timing_now();
			
 
				 	int i;
			
@@ -210,21 +215,24 @@ void sc_hypervisor_start_resize(unsigned sched_ctx)
 
				 
			
 
				 static void _print_current_time()
			
 
				 {
			
 
				-	double curr_time = starpu_timing_now();
			
 
				-	double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
 
				-	fprintf(stdout, "Time: %lf\n", elapsed_time);
			
 
				-	int i;
			
 
				-	for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				+	if(!getenv("HYPERVISOR_STOP_PRINT"))
			
 
				 	{
			
 
				-		if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				+		double curr_time = starpu_timing_now();
			
 
				+		double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */
			
 
				+		fprintf(stdout, "Time: %lf\n", elapsed_time);
			
 
				+		int i;
			
 
				+		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			
 
				 		{
			
 
				-			struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				-
			
 
				-			double cpu_speed = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				-			double cuda_speed = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				-			int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER);
			
 
				-			int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER);
			
 
				-			fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda);
			
 
				+			if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS)
			
 
				+			{
			
 
				+				struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]];
			
 
				+				
			
 
				+				double cpu_speed = sc_hypervisor_get_velocity(sc_w, STARPU_CPU_WORKER);
			
 
				+				double cuda_speed = sc_hypervisor_get_velocity(sc_w, STARPU_CUDA_WORKER);
			
 
				+				int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER);
			
 
				+				int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER);
			
 
				+				fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda);
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 	return;
			
@@ -364,6 +372,16 @@ static double _get_best_total_elapsed_flops(struct sc_hypervisor_wrapper* sc_w,
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
 
				+double _get_max_velocity_gap()
			
 
				+{
			
 
				+	return hypervisor.max_velocity_gap;
			
 
				+}
			
 
				+
			
 
				+unsigned _get_resize_criteria()
			
 
				+{
			
 
				+	return hypervisor.resize_criteria;
			
 
				+}
			
 
				+
			
 
				 /* compute an average value of the cpu/cuda velocity */
			
 
				 double sc_hypervisorsc_hypervisor_get_velocity_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch)
			
 
				 {
			
--- a/sc_hypervisor/src/sc_hypervisor_intern.h
+++ b/sc_hypervisor/src/sc_hypervisor_intern.h
@@ -16,6 +16,11 @@
 
				 
			
 
				 #include <sc_hypervisor.h>
			
 
				 #include <common/uthash.h>
			
 
				+
			
 
				+#define SC_VELOCITY_MAX_GAP_DEFAULT 50
			
 
				+#define SC_NOTHING 0
			
 
				+#define SC_IDLE 1
			
 
				+#define SC_SPEED 2
			
 
				 struct size_request
			
 
				 {
			
 
				 	int *workers;
			
@@ -74,6 +79,12 @@ struct sc_hypervisor
 
				 
			
 
				 	/* time when the hypervisor started */
			
 
				 	double start_executing_time;
			
 
				+
			
 
				+	/* max velocity diff btw ctx before triggering resizing */
			
 
				+	double max_velocity_gap;
			
 
				+	
			
 
				+	/* criteria to trigger resizing */
			
 
				+	unsigned resize_criteria;
			
 
				 };
			
 
				 
			
 
				 struct sc_hypervisor_adjustment
			
@@ -88,3 +99,7 @@ struct sc_hypervisor hypervisor;
 
				 void _add_config(unsigned sched_ctx);
			
 
				 
			
 
				 void _remove_config(unsigned sched_ctx);
			
 
				+
			
 
				+double _get_max_velocity_gap();
			
 
				+
			
 
				+unsigned _get_resize_criteria();
			
--- a/src/common/utils.c
+++ b/src/common/utils.c
@@ -134,7 +134,7 @@ char *_starpu_get_home_path(void)
 
				 		static int warn;
			
 
				 		if (!warn) {
			
 
				 			warn = 1;
			
 
				-			_STARPU_DISP("couldn't find a home place to put starpu data, using /tmp\n");
			
 
				+			_STARPU_DISP("couldn't find a $STARPU_HOME place to put .starpu data, using /tmp\n");
			
 
				 		}
			
 
				 		path = "/tmp";
			
 
				 	}
			
--- a/src/core/combined_workers.c
+++ b/src/core/combined_workers.c
@@ -125,12 +125,12 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
				 #ifdef CPU_OR
			
 
				 		CPU_OR(&combined_worker->cpu_set,
			
 
				 			&combined_worker->cpu_set,
			
 
				-			&config->workers[id].initial_cpu_set);
			
 
				+			&config->workers[id].cpu_set);
			
 
				 #else
			
 
				 		int j;
			
 
				 		for (j = 0; j < CPU_SETSIZE; j++)
			
 
				 		{
			
 
				-			if (CPU_ISSET(j, &config->workers[id].initial_cpu_set))
			
 
				+			if (CPU_ISSET(j, &config->workers[id].cpu_set))
			
 
				 				CPU_SET(j, &combined_worker->cpu_set);
			
 
				 		}
			
 
				 #endif
			
@@ -139,7 +139,7 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 		hwloc_bitmap_or(combined_worker->hwloc_cpu_set,
			
 
				 				combined_worker->hwloc_cpu_set,
			
 
				-				config->workers[id].initial_hwloc_cpu_set);
			
 
				+				config->workers[id].hwloc_cpu_set);
			
 
				 #endif
			
 
				 #endif
			
 
				 	}
			
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -288,7 +288,7 @@ void starpu_tag_restart(starpu_tag_t id)
 
				 	struct _starpu_tag *tag = gettag_struct(id);
			
 
				 
			
 
				 	_starpu_spin_lock(&tag->lock);
			
 
				-	STARPU_ASSERT_MSG(tag->state == STARPU_DONE, "Only completed tags can be restarted (was %d)", tag->state);
			
 
				+	STARPU_ASSERT_MSG(tag->state == STARPU_DONE, "Only completed tags can be restarted (%llu was %d)", (unsigned long long) id, tag->state);
			
 
				 	tag->state = STARPU_BLOCKED;
			
 
				 	_starpu_spin_unlock(&tag->lock);
			
 
				 }
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -313,7 +313,7 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
 
				 	struct _starpu_cg_list *tag_successors = &tag->tag_successors;
			
 
				 
			
 
				 	_starpu_spin_lock(&tag->lock);
			
 
				-	STARPU_ASSERT_MSG(tag->is_assigned == 1 || !tag_successors->ndeps, "a tag can be assigned only one task to wake");
			
 
				+	STARPU_ASSERT_MSG(tag->is_assigned == 1 || !tag_successors->ndeps, "a tag can be assigned only one task to wake (%llu had %u assigned tasks, and %u successors)", (unsigned long long) tag->id, tag->is_assigned, tag_successors->ndeps);
			
 
				 
			
 
				 	if (tag_successors->ndeps != tag_successors->ndeps_completed)
			
 
				 	{
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -963,7 +963,7 @@ int starpu_perfmodel_list(FILE *output)
 
				         }
			
 
				         else
			
 
				 	{
			
 
				-		_STARPU_DISP("Could not open the perfmodel directory <%s>\n", path);
			
 
				+		_STARPU_DISP("Could not open the perfmodel directory <%s>: \n", path, strerror(errno));
			
 
				         }
			
 
				 	return 0;
			
 
				 }
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -234,7 +234,7 @@ static void _starpu_sched_ctx_create_hwloc_tree(struct _starpu_sched_ctx *sched_
 
				 		{
			
 
				 			hwloc_bitmap_or(sched_ctx->hwloc_workers_set,
			
 
				 					sched_ctx->hwloc_workers_set,
			
 
				-					config->workers[worker].initial_hwloc_cpu_set);
			
 
				+					config->workers[worker].hwloc_cpu_set);
			
 
				 		}
			
 
				 
			
 
				 	}
			
@@ -578,7 +578,7 @@ static void _starpu_check_workers(int *workerids, int nworkers)
 
				 	for(i = 0; i < nworkers; i++)
			
 
				 	{
			
 
				 		/* take care the user does not ask for a resource that does not exist */
			
 
				-		STARPU_ASSERT_MSG(workerids[i] >= 0 &&  workerids[i] <= nworkers_conf, "workerid = %d", workerids[i]);
			
 
				+		STARPU_ASSERT_MSG(workerids[i] >= 0 &&  workerids[i] <= nworkers_conf, "requested to add workerid = %d, but that is beyond the range 0 to %d", workerids[i], nworkers_conf);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -111,7 +111,7 @@ void _starpu_simgrid_execute_job(struct _starpu_job *j, enum starpu_perfmodel_ar
 
				 	{
			
 
				 		length = starpu_task_expected_length(task, perf_arch, j->nimpl);
			
 
				 		STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length),
			
 
				-			"Codelet %s does not have a perfmodel, or is not calibrated enough",
			
 
				+				"Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated",
			
 
				 			_starpu_job_get_model_name(j));
			
 
				 	}
			
 
				 
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -363,7 +363,7 @@ void _starpu_task_check_deprecated_fields(struct starpu_task *task)
 
				 	if (task->cl)
			
 
				 	{
			
 
				 		unsigned i;
			
 
				-		for(i=0; i<task->cl->nbuffers ; i++)
			
 
				+		for(i=0; i<STARPU_MIN(task->cl->nbuffers, STARPU_NMAXBUFS) ; i++)
			
 
				 		{
			
 
				 			if (task->buffers[i].handle && task->handles[i])
			
 
				 			{
			
@@ -427,7 +427,7 @@ int starpu_task_submit(struct starpu_task *task)
 
				 
			
 
				 		/* Check buffers */
			
 
				 		if (task->dyn_handles == NULL)
			
 
				-			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
			
 
				+			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
			
 
				 
			
 
				 		if (task->dyn_handles)
			
 
				 		{
			
@@ -438,7 +438,7 @@ int starpu_task_submit(struct starpu_task *task)
 
				 		{
			
 
				 			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 			/* Make sure handles are not partitioned */
			
 
				-			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
			
 
				+			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task");
			
 
				 			/* Provide the home interface for now if any,
			
 
				 			 * for can_execute hooks */
			
 
				 			if (handle->home_node != -1)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1278,10 +1278,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 
				 
			
 
				 #ifdef __GLIBC__
			
 
				 		/* Save the initial cpuset */
			
 
				-		CPU_ZERO(&workerarg->initial_cpu_set);
			
 
				-		CPU_SET(workerarg->bindid, &workerarg->initial_cpu_set);
			
 
				-		CPU_ZERO(&workerarg->current_cpu_set);
			
 
				-		CPU_SET(workerarg->bindid, &workerarg->current_cpu_set);
			
 
				+		CPU_ZERO(&workerarg->cpu_set);
			
 
				+		CPU_SET(workerarg->bindid, &workerarg->cpu_set);
			
 
				 #endif /* __GLIBC__ */
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
@@ -1295,9 +1293,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 
				 		worker_obj->userdata = &config->workers[worker];
			
 
				 
			
 
				 		/* Clear the cpu set and set the cpu */
			
 
				-		workerarg->initial_hwloc_cpu_set =
			
 
				-			hwloc_bitmap_dup (worker_obj->cpuset);
			
 
				-		workerarg->current_hwloc_cpu_set =
			
 
				+		workerarg->hwloc_cpu_set =
			
 
				 			hwloc_bitmap_dup (worker_obj->cpuset);
			
 
				 #endif
			
 
				 	}
			
@@ -1340,8 +1336,7 @@ _starpu_destroy_topology (
 
				 	{
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 		struct _starpu_worker *workerarg = &config->workers[worker];
			
 
				-		hwloc_bitmap_free(workerarg->initial_hwloc_cpu_set);
			
 
				-		hwloc_bitmap_free(workerarg->current_hwloc_cpu_set);
			
 
				+		hwloc_bitmap_free(workerarg->hwloc_cpu_set);
			
 
				 #endif
			
 
				 	}
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -396,7 +396,7 @@ void _starpu_worker_init(struct _starpu_worker *worker, unsigned fut_key)
 
				 
			
 
				         _STARPU_DEBUG("worker %d is ready on logical cpu %d\n", devid, worker->bindid);
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				-	_STARPU_DEBUG("worker %d cpuset start at %d\n", devid, hwloc_bitmap_first(worker->initial_hwloc_cpu_set));
			
 
				+	_STARPU_DEBUG("worker %d cpuset start at %d\n", devid, hwloc_bitmap_first(worker->hwloc_cpu_set));
			
 
				 #endif
			
 
				 
			
 
				 	_starpu_memory_node_set_local_key(&worker->memory_node);
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -106,12 +106,10 @@ struct _starpu_worker
 
				 	unsigned parallel_sect;
			
 
				 
			
 
				 #ifdef __GLIBC__
			
 
				-	cpu_set_t initial_cpu_set;
			
 
				-	cpu_set_t current_cpu_set;
			
 
				+	cpu_set_t cpu_set;
			
 
				 #endif /* __GLIBC__ */
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				-	hwloc_bitmap_t initial_hwloc_cpu_set;
			
 
				-	hwloc_bitmap_t current_hwloc_cpu_set;
			
 
				+	hwloc_bitmap_t hwloc_cpu_set;
			
 
				 #endif
			
 
				 };
			
 
				 
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -162,7 +162,7 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 
				 
			
 
				 	retval = r->retval;
			
 
				 	if (retval)
			
 
				-		_STARPU_DISP("REQUEST %p COMPLETED (retval %d) !\n", r, r->retval);
			
 
				+		_STARPU_DISP("REQUEST %p completed with retval %d!\n", r, r->retval);
			
 
				 
			
 
				 
			
 
				 	r->refcnt--;
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -75,7 +75,8 @@ int starpu_data_get_nb_children(starpu_data_handle_t handle)
 
				 
			
 
				 starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i)
			
 
				 {
			
 
				-	STARPU_ASSERT_MSG(i < handle->nchildren, "Invalid child index %u, maximum %u", i, handle->nchildren);
			
 
				+	STARPU_ASSERT_MSG(handle->nchildren != 0, "Data %p has to be partitioned before accessing children", handle);
			
 
				+	STARPU_ASSERT_MSG(i < handle->nchildren, "Invalid child index %u in handle %p, maximum %u", i, handle, handle->nchildren);
			
 
				 	return &handle->children[i];
			
 
				 }
			
 
				 
			
@@ -104,8 +105,8 @@ starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_handle,
 
				 		unsigned next_child;
			
 
				 		next_child = va_arg(pa, unsigned);
			
 
				 
			
 
				-		STARPU_ASSERT_MSG(current_handle->nchildren != 0, "Data has to be partitioned before accessing children");
			
 
				-		STARPU_ASSERT_MSG(next_child < current_handle->nchildren, "Bogus child number");
			
 
				+		STARPU_ASSERT_MSG(current_handle->nchildren != 0, "Data %p has to be partitioned before accessing children", current_handle);
			
 
				+		STARPU_ASSERT_MSG(next_child < current_handle->nchildren, "Bogus child number %u, data %p only has %u children", next_child, current_handle, current_handle->nchildren);
			
 
				 
			
 
				 		current_handle = &current_handle->children[next_child];
			
 
				 	}
			
@@ -122,7 +123,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
				 	/* first take care to properly lock the data header */
			
 
				 	_starpu_spin_lock(&initial_handle->header_lock);
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(initial_handle->nchildren == 0, "there should not be mutiple filters applied on the same data");
			
 
				+	STARPU_ASSERT_MSG(initial_handle->nchildren == 0, "there should not be mutiple filters applied on the same data %p, futher filtering has to be done on children", initial_handle);
			
 
				 
			
 
				 	/* how many parts ? */
			
 
				 	if (f->get_nchildren)
			
@@ -130,7 +131,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
				 	else
			
 
				 	  nparts = f->nchildren;
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(nparts > 0, "Partitioning in 0 piece does not make sense");
			
 
				+	STARPU_ASSERT_MSG(nparts > 0, "Partitioning data %p in 0 piece does not make sense", initial_handle);
			
 
				 
			
 
				 	/* allocate the children */
			
 
				 	starpu_data_create_children(initial_handle, nparts, f);
			
@@ -277,7 +278,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 
				 
			
 
				 	_starpu_spin_lock(&root_handle->header_lock);
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(root_handle->nchildren != 0, "data is not partitioned");
			
 
				+	STARPU_ASSERT_MSG(root_handle->nchildren != 0, "data %p is not partitioned, can not unpartition it", root_handle);
			
 
				 
			
 
				 	/* first take all the children lock (in order !) */
			
 
				 	for (child = 0; child < root_handle->nchildren; child++)
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -421,7 +421,7 @@ int starpu_data_set_tag(starpu_data_handle_t handle, int tag)
 
				 	entry = (struct handle_tag_entry *) malloc(sizeof(*entry));
			
 
				 	STARPU_ASSERT(entry != NULL);
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(!(starpu_data_get_data_handle_from_tag(tag)),"A data handle with tag %d had already been registered.\n",tag);
			
 
				+	STARPU_ASSERT_MSG(!(starpu_data_get_data_handle_from_tag(tag)),"data handle %p already has tag %d\n", starpu_data_get_data_handle_from_tag(tag), tag);
			
 
				 
			
 
				 	entry->tag = tag;
			
 
				 	entry->handle = handle;
			
@@ -442,7 +442,7 @@ int starpu_data_release_tag(starpu_data_handle_t handle)
 
				 	{
			
 
				 		_starpu_spin_lock(&registered_tag_handles_lock);
			
 
				 		HASH_FIND_INT(registered_tag_handles, &handle->tag, tag_entry);
			
 
				-		STARPU_ASSERT_MSG((tag_entry != NULL),"Handle %p with tag %d isn't in the hashmap !",handle,handle->tag);
			
 
				+		STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %d isn't in the hashmap !",handle,handle->tag);
			
 
				 
			
 
				 		HASH_DEL(registered_tag_handles, tag_entry);
			
 
				 		free(tag_entry);
			
@@ -559,7 +559,7 @@ static void _starpu_data_unregister_fetch_data_callback(void *_arg)
 
				 static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned coherent)
			
 
				 {
			
 
				 	STARPU_ASSERT(handle);
			
 
				-	STARPU_ASSERT_MSG(handle->nchildren == 0, "data needs to be unpartitioned before unregistration");
			
 
				+	STARPU_ASSERT_MSG(handle->nchildren == 0, "data %p needs to be unpartitioned before unregistration", handle);
			
 
				 
			
 
				 	if (coherent)
			
 
				 	{
			
@@ -736,7 +736,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 
			
 
				 void starpu_data_unregister(starpu_data_handle_t handle)
			
 
				 {
			
 
				-	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data must not be unregistered twice");
			
 
				+	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
			
 
				 	_starpu_data_unregister(handle, 1);
			
 
				 }
			
 
				 
			
@@ -748,7 +748,7 @@ void starpu_data_unregister_no_coherency(starpu_data_handle_t handle)
 
				 void starpu_data_unregister_submit(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	_starpu_spin_lock(&handle->header_lock);
			
 
				-	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data must not be unregistered twice");
			
 
				+	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
			
 
				 	handle->lazy_unregister = 1;
			
 
				 	_starpu_spin_unlock(&handle->header_lock);
			
 
				 	_starpu_data_unregister(handle, 0);
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -27,7 +27,7 @@ static size_t _malloc_align = sizeof(void*);
 
				 
			
 
				 void starpu_malloc_set_align(size_t align)
			
 
				 {
			
 
				-	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align must be a power of two");
			
 
				+	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align);
			
 
				 	if (_malloc_align < align)
			
 
				 		_malloc_align = align;
			
 
				 }
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -135,6 +135,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 
				 	unsigned cnt;
			
 
				 	int ret;
			
 
				 
			
 
				+	STARPU_ASSERT(dst_node != src_node);
			
 
				+
			
 
				 	if (handle->nchildren == 0)
			
 
				 	{
			
 
				 		struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
			
@@ -210,6 +212,23 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
			
 
				+{
			
 
				+	unsigned child;
			
 
				+
			
 
				+	replicate->allocated = 0;
			
 
				+
			
 
				+	/* XXX why do we need that ? */
			
 
				+	replicate->automatically_allocated = 0;
			
 
				+
			
 
				+	for (child = 0; child < handle->nchildren; child++)
			
 
				+	{
			
 
				+		/* Notify children that their buffer has been deallocated too */
			
 
				+		starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
			
 
				+		notify_handle_children(child_handle, &child_handle->per_node[node], node);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
			
 
				 {
			
 
				 	size_t freed = 0;
			
@@ -244,12 +263,7 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
 
				 		mc->ops->free_data_on_node(mc->chunk_interface, node);
			
 
				 
			
 
				 		if (handle)
			
 
				-		{
			
 
				-			replicate->allocated = 0;
			
 
				-
			
 
				-			/* XXX why do we need that ? */
			
 
				-			replicate->automatically_allocated = 0;
			
 
				-		}
			
 
				+			notify_handle_children(handle, replicate, node);
			
 
				 
			
 
				 		freed = mc->size;
			
 
				 
			
@@ -298,6 +312,10 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
				 	if (handle->wt_mask & (1<<node))
			
 
				 		return 0;
			
 
				 
			
 
				+	/* This data was registered from this node, we will not be able to drop it anyway */
			
 
				+	if ((int) node == handle->home_node)
			
 
				+		return 0;
			
 
				+
			
 
				 	/* REDUX memchunk */
			
 
				 	if (mc->relaxed_coherency == 2)
			
 
				 	{
			
@@ -332,26 +350,35 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
				 		/* check if they are all "free" */
			
 
				 		if (may_free_subtree(handle, node))
			
 
				 		{
			
 
				-			STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				+			int target = -1;
			
 
				 
			
 
				-#ifdef STARPU_MEMORY_STATS
			
 
				-			if (handle->per_node[node].state == STARPU_OWNER)
			
 
				-				_starpu_memory_handle_stats_invalidated(handle, node);
			
 
				-			/* else XXX Considering only owner to invalidate */
			
 
				-#endif
			
 
				+			/* XXX Considering only owner to invalidate */
			
 
				+
			
 
				+			STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				 
			
 
				 			/* in case there was nobody using that buffer, throw it
			
 
				-			 * away after writing it back to main memory if we can*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+			 * away after writing it back to main memory */
			
 
				+			if (handle->home_node != -1)
			
 
				+				target = handle->home_node;
			
 
				+			else
			
 
				+				/* NULL-registered data, push to RAM if it's not what we are flushing */
			
 
				+				if (node != 0)
			
 
				+					target = 0;
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				 
			
 
				 			size_t size_handle = _starpu_data_get_size(handle);
			
 
				 
			
 
				 			if (_starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
			
 
				 			{
			
 
				-				transfer_subtree_to_node(handle, node, STARPU_MAIN_RAM);
			
 
				-
			
 
				-#ifdef STARPU_MEMORY_STATS
			
 
				-				_starpu_memory_handle_stats_loaded_owner(handle, STARPU_MAIN_RAM);
			
 
				-#endif
			
 
				+				target = STARPU_MAIN_RAM;
			
 
				 			}
			
 
				 			else
			
 
				 			{	
			
@@ -360,7 +387,6 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
				 				unsigned nnodes = starpu_memory_nodes_get_count();
			
 
				 				unsigned int i;
			
 
				 				double time_disk = 0;
			
 
				-				unsigned disk = 0;
			
 
				 				
			
 
				 				for (i = 0; i < nnodes; i++)
			
 
				 				{
			
@@ -369,28 +395,28 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
				 						/* only time can change between disk <-> main_ram 
			
 
				 						 * and not between main_ram <-> worker if we compare diks*/
			
 
				 						double time_tmp = _starpu_predict_transfer_time(i, STARPU_MAIN_RAM, size_handle);
			
 
				-						if (disk == 0 || time_disk > time_tmp)
			
 
				+						if (target == -1 || time_disk > time_tmp)
			
 
				 						{
			
 
				-							disk = i;
			
 
				+							target = i;
			
 
				 							time_disk = time_tmp;
			
 
				 						}	
			
 
				 					}
			
 
				 				}
			
 
				+			}      
			
 
				 
			
 
				-				STARPU_ASSERT_MSG(disk != 0, "MEMORY FULL");
			
 
				-
			
 
				-				/* transfer */
			
 
				-				transfer_subtree_to_node(handle, node, disk);
			
 
				 
			
 
				+			if (target != -1)
			
 
				+			{
			
 
				+				transfer_subtree_to_node(handle, node, target);
			
 
				 #ifdef STARPU_MEMORY_STATS
			
 
				-				_starpu_memory_handle_stats_loaded_owner(handle, disk);
			
 
				-#endif				
			
 
				-				
			
 
				-			}      
			
 
				-			STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				+				_starpu_memory_handle_stats_loaded_owner(handle, target);
			
 
				+#endif
			
 
				 
			
 
				-			/* now the actual buffer may be freed */
			
 
				-			freed = do_free_mem_chunk(mc, node);
			
 
				+				STARPU_ASSERT(handle->per_node[node].refcnt == 0);
			
 
				+
			
 
				+				/* now the actual buffer may be freed */
			
 
				+				freed = do_free_mem_chunk(mc, node);
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		/* unlock the leafs */
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -250,8 +250,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1)))
			
 
				 						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
			
 
				 
			
 
				-					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet has to be RW");
			
 
				-					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet has to be R");
			
 
				+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet %p has to be RW", redux_task->cl);
			
 
				+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet %p has to be R", redux_task->cl);
			
 
				 
			
 
				 					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0);
			
 
				 					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1);
			
@@ -309,7 +309,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 
			
 
				 			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
			
 
				 				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_W, 0);
			
 
				-			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_W, "Parameter of initialization codelet has to be W");
			
 
				+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_W, "Parameter of initialization codelet %p has to be W", redux_task->cl);
			
 
				 
			
 
				 			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
			
 
				 
			
@@ -338,8 +338,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1))
			
 
				 				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
			
 
				 
			
 
				-			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet has to be RW");
			
 
				-			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet has to be R");
			
 
				+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet %p has to be RW", redux_task->cl);
			
 
				+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet %p has to be R", redux_task->cl);
			
 
				 
			
 
				 			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
			
 
				 			STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1);
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -118,7 +118,7 @@ int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, unsigned node,
 
				 			   enum starpu_data_access_mode mode, void (*callback)(void *), void *arg)
			
 
				 {
			
 
				 	STARPU_ASSERT(handle);
			
 
				-	STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data is not possible");
			
 
				+	STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data (%p) is not possible", handle);
			
 
				         _STARPU_LOG_IN();
			
 
				 
			
 
				 	struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) malloc(sizeof(struct user_interaction_wrapper));
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -158,7 +158,7 @@ static void register_worker_id(unsigned long tid, int workerid)
 
				 
			
 
				 	HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry);
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(workerid < STARPU_NMAXWORKERS, "Too many workers in this trace, please increase the maximum number of CPUs and GPUs to the same value as was used for execution");
			
 
				+	STARPU_ASSERT_MSG(workerid < STARPU_NMAXWORKERS, "Too many workers in this trace, please increase in ./configure invocation the maximum number of CPUs and GPUs to the same value as was used for execution");
			
 
				 
			
 
				 	/* only register a thread once */
			
 
				 	STARPU_ASSERT(entry == NULL);
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -68,7 +68,7 @@ _starpu_cpu_discover_devices(struct _starpu_machine_config *config)
 
				 
			
 
				 	if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
			
 
				 		/* unknown, using logical procesors as fallback */
			
 
				-		_STARPU_DISP("Warning: OS did not report CPU cores. Assuming there is only one thread per core.\n");
			
 
				+		_STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n");
			
 
				 		config->cpu_depth = hwloc_get_type_depth(topology->hwtopology,
			
 
				 							 HWLOC_OBJ_PU);
			
 
				 	}
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -481,7 +481,7 @@ int _starpu_cuda_driver_run_once(struct starpu_driver *d)
 
				 		switch (res)
			
 
				 		{
			
 
				 			case -EAGAIN:
			
 
				-				_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				+				_STARPU_DISP("ouch, CUDA could not actually run task %p, putting it back...\n", task);
			
 
				 				_starpu_push_task_to_workers(task);
			
 
				 				STARPU_ABORT();
			
 
				 			default:
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -685,7 +685,7 @@ void *_starpu_mic_src_worker(void *arg)
 
				 			switch (res)
			
 
				 			{
			
 
				 				case -EAGAIN:
			
 
				-					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				+					_STARPU_DISP("ouch, Xeon Phi could not actually run task %p, putting it back...\n", task);
			
 
				 					_starpu_push_task_to_workers(task);
			
 
				 					STARPU_ABORT();
			
 
				 					continue;
			
--- a/src/drivers/mp_common/source_common.c
+++ b/src/drivers/mp_common/source_common.c
@@ -65,7 +65,7 @@ int _starpu_src_common_lookup(struct _starpu_mp_node *node,
 
				 						&arg_size);
			
 
				 
			
 
				 	if (answer == STARPU_ERROR_LOOKUP) {
			
 
				-		_STARPU_DISP("Error looking up %s\n", func_name);
			
 
				+		_STARPU_DISP("Error looking up symbol %s\n", func_name);
			
 
				 		return -ESPIPE;
			
 
				 	}
			
 
				 
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -677,7 +677,7 @@ int _starpu_opencl_driver_run_once(struct starpu_driver *d)
 
				 		switch (res)
			
 
				 		{
			
 
				 			case -EAGAIN:
			
 
				-				_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				+				_STARPU_DISP("ouch, OpenCL could not actually run task %p, putting it back...\n", task);
			
 
				 				_starpu_push_task_to_workers(task);
			
 
				 				STARPU_ABORT();
			
 
				 				return 0;
			
--- a/src/drivers/opencl/driver_opencl_utils.c
+++ b/src/drivers/opencl/driver_opencl_utils.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				- * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -271,7 +271,7 @@ int _starpu_opencl_compile_or_load_opencl_from_string(const char *opencl_program
 
				 		// Create the compute program from the source buffer
			
 
				 		program = clCreateProgramWithSource(context, 1, (const char **) &opencl_program_source, NULL, &err);
			
 
				 		if (!program || err != CL_SUCCESS) {
			
 
				-			_STARPU_DISP("Error: Failed to load program source!\n");
			
 
				+			_STARPU_DISP("Error: Failed to load program source with options %s!\n", build_options);
			
 
				 			return EXIT_FAILURE;
			
 
				 		}
			
 
				 
			
--- a/src/drivers/scc/driver_scc_source.c
+++ b/src/drivers/scc/driver_scc_source.c
@@ -387,7 +387,7 @@ void *_starpu_scc_src_worker(void *arg)
 
				 			switch (res)
			
 
				 			{
			
 
				 				case -EAGAIN:
			
 
				-					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				+					_STARPU_DISP("ouch, SCC could not actually run task %p, putting it back...\n", task);
			
 
				 					_starpu_push_task_to_workers(task);
			
 
				 					STARPU_ABORT();
			
 
				 					continue;
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -212,7 +212,7 @@ static struct starpu_task *dmda_pop_task(unsigned sched_ctx_id)
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	struct _starpu_fifo_taskq *fifo = dt->queue_array[workerid];
			
 
				 
			
 
				-	STARPU_ASSERT_MSG(fifo, "worker %d does not belong to ctx %d anymore \n", workerid, sched_ctx_id);
			
 
				+	STARPU_ASSERT_MSG(fifo, "worker %d does not belong to ctx %d anymore.\n", workerid, sched_ctx_id);
			
 
				 
			
 
				 	task = _starpu_fifo_pop_local_task(fifo);
			
 
				 	if (task)