13 年之前 · 8de7758807
--- a/src/core/combined_workers.c
+++ b/src/core/combined_workers.c
@@ -57,7 +57,7 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
																 	int new_workerid;
															
 
																 	/* Return the number of actual workers. */
															
 
																-	struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 	int basic_worker_count = (int)config->topology.nworkers;
															
 
																 	int combined_worker_id = (int)config->topology.ncombinedworkers;
															
@@ -95,7 +95,7 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
																 	fprintf(stderr, "into worker %d\n", new_workerid);
															
 
																 #endif
															
 
																-	struct starpu_combined_worker_s *combined_worker =
															
 
																+	struct _starpu_combined_worker *combined_worker =
															
 
																 		&config->combined_workers[combined_worker_id];
															
 
																 	combined_worker->worker_size = nworkers;
															
@@ -132,7 +132,8 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
																 			&config->workers[id].initial_cpu_set);
															
 
																 #else
															
 
																 		int j;
															
 
																-		for (j = 0; j < CPU_SETSIZE; j++) {
															
 
																+		for (j = 0; j < CPU_SETSIZE; j++)
															
 
																+		{
															
 
																 			if (CPU_ISSET(j, &config->workers[id].initial_cpu_set))
															
 
																 				CPU_SET(j, &combined_worker->cpu_set);
															
 
																 		}
															
@@ -153,7 +154,7 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
																 int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid)
															
 
																 {
															
 
																 	/* Check that this is the id of a combined worker */
															
 
																-	struct starpu_combined_worker_s *worker;
															
 
																+	struct _starpu_combined_worker *worker;
															
 
																 	worker = _starpu_get_combined_worker_struct(workerid);
															
 
																 	STARPU_ASSERT(worker);
															
--- a/src/core/debug.c
+++ b/src/core/debug.c
@@ -25,6 +25,7 @@ static pthread_mutex_t logfile_mutex = PTHREAD_MUTEX_INITIALIZER;
 
																 static FILE *logfile;
															
 
																 #endif
															
 
																+/* Tell gdb whether FXT is compiled in or not */
															
 
																 int _starpu_use_fxt
															
 
																 #ifdef STARPU_USE_FXT
															
 
																 	= 1
															
@@ -36,7 +37,7 @@ void _starpu_open_debug_logfile(void)
 
																 #ifdef STARPU_VERBOSE
															
 
																 	/* what is  the name of the file ? default = "starpu.log" */
															
 
																 	char *logfile_name;
															
 
																-	
															
 
																+
															
 
																 	logfile_name = getenv("STARPU_LOGFILENAME");
															
 
																 	if (!logfile_name)
															
 
																 	{
															
@@ -60,9 +61,9 @@ void _starpu_print_to_logfile(const char *format STARPU_ATTRIBUTE_UNUSED, ...)
 
																 #ifdef STARPU_VERBOSE
															
 
																 	va_list args;
															
 
																 	va_start(args, format);
															
 
																-	PTHREAD_MUTEX_LOCK(&logfile_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&logfile_mutex);
															
 
																 	vfprintf(logfile, format, args);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&logfile_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&logfile_mutex);
															
 
																 	va_end( args );
															
 
																 #endif
															
 
																 }
															
--- a/src/core/debug.h
+++ b/src/core/debug.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
--- a/src/core/dependencies/cg.c
+++ b/src/core/dependencies/cg.c
@@ -1,7 +1,8 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2012 inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -19,32 +20,35 @@
 
																 #include <common/config.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <core/jobs.h>
															
 
																+#include <core/task.h>
															
 
																 #include <core/dependencies/cg.h>
															
 
																 #include <core/dependencies/tags.h>
															
 
																-void _starpu_cg_list_init(struct starpu_cg_list_s *list)
															
 
																+void _starpu_cg_list_init(struct _starpu_cg_list *list)
															
 
																 {
															
 
																-	list->nsuccs = 0;
															
 
																+	_starpu_spin_init(&list->lock);
															
 
																 	list->ndeps = 0;
															
 
																 	list->ndeps_completed = 0;
															
 
																+	list->terminated = 0;
															
 
																+
															
 
																+	list->nsuccs = 0;
															
 
																 #ifdef STARPU_DYNAMIC_DEPS_SIZE
															
 
																 	/* this is a small initial default value ... may be changed */
															
 
																 	list->succ_list_size = 0;
															
 
																-	list->succ =
															
 
																-		(struct starpu_cg_s **) realloc(NULL, list->succ_list_size*sizeof(struct starpu_cg_s *));
															
 
																+	list->succ = NULL;
															
 
																 #endif
															
 
																 }
															
 
																-void _starpu_cg_list_deinit(struct starpu_cg_list_s *list)
															
 
																+void _starpu_cg_list_deinit(struct _starpu_cg_list *list)
															
 
																 {
															
 
																 	unsigned id;
															
 
																 	for (id = 0; id < list->nsuccs; id++)
															
 
																 	{
															
 
																-		starpu_cg_t *cg = list->succ[id];
															
 
																+		struct _starpu_cg *cg = list->succ[id];
															
 
																 		/* We remove the reference on the completion group, and free it
															
 
																-		 * if there is no more reference. */		
															
 
																+		 * if there is no more reference. */
															
 
																 		unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1);
															
 
																 		if (ntags == 0)
															
 
																 			free(list->succ[id]);
															
@@ -53,12 +57,19 @@ void _starpu_cg_list_deinit(struct starpu_cg_list_s *list)
 
																 #ifdef STARPU_DYNAMIC_DEPS_SIZE
															
 
																 	free(list->succ);
															
 
																 #endif
															
 
																+	_starpu_spin_destroy(&list->lock);
															
 
																 }
															
 
																-void _starpu_add_successor_to_cg_list(struct starpu_cg_list_s *successors, starpu_cg_t *cg)
															
 
																+/* Returns whether the completion was already terminated, and caller should
															
 
																+ * thus immediately proceed. */
															
 
																+int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg)
															
 
																 {
															
 
																+	int ret;
															
 
																 	STARPU_ASSERT(cg);
															
 
																+	_starpu_spin_lock(&successors->lock);
															
 
																+	ret = successors->terminated;
															
 
																+
															
 
																 	/* where should that cg should be put in the array ? */
															
 
																 	unsigned index = STARPU_ATOMIC_ADD(&successors->nsuccs, 1) - 1;
															
@@ -72,50 +83,58 @@ void _starpu_add_successor_to_cg_list(struct starpu_cg_list_s *successors, starp
 
																 			successors->succ_list_size = 4;
															
 
																 		/* NB: this is thread safe as the tag->lock is taken */
															
 
																-		successors->succ = (struct starpu_cg_s **) realloc(successors->succ, 
															
 
																-			successors->succ_list_size*sizeof(struct starpu_cg_s *));
															
 
																+		successors->succ = (struct _starpu_cg **) realloc(successors->succ,
															
 
																+			successors->succ_list_size*sizeof(struct _starpu_cg *));
															
 
																 	}
															
 
																 #else
															
 
																 	STARPU_ASSERT(index < STARPU_NMAXDEPS);
															
 
																 #endif
															
 
																 	successors->succ[index] = cg;
															
 
																+	_starpu_spin_unlock(&successors->lock);
															
 
																+
															
 
																+	return ret;
															
 
																 }
															
 
																-void _starpu_notify_cg(starpu_cg_t *cg)
															
 
																+/* Note: in case of a tag, it must be already locked */
															
 
																+void _starpu_notify_cg(struct _starpu_cg *cg)
															
 
																 {
															
 
																 	STARPU_ASSERT(cg);
															
 
																 	unsigned remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1);
															
 
																-	if (remaining == 0) {
															
 
																+	if (remaining == 0)
															
 
																+	{
															
 
																 		cg->remaining = cg->ntags;
															
 
																-		struct starpu_tag_s *tag;
															
 
																-		struct starpu_cg_list_s *tag_successors, *job_successors;
															
 
																-		starpu_job_t j;
															
 
																+		struct _starpu_tag *tag;
															
 
																+		struct _starpu_cg_list *tag_successors, *job_successors;
															
 
																+		struct _starpu_job *j;
															
 
																 		/* the group is now completed */
															
 
																-		switch (cg->cg_type) {
															
 
																-			case STARPU_CG_APPS: {
															
 
																+		switch (cg->cg_type)
															
 
																+		{
															
 
																+			case STARPU_CG_APPS:
															
 
																+			{
															
 
																 				/* this is a cg for an application waiting on a set of
															
 
																-	 			 * tags, wake the thread */
															
 
																-				PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																+				 * tags, wake the thread */
															
 
																+				_STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																 				cg->succ.succ_apps.completed = 1;
															
 
																-				PTHREAD_COND_SIGNAL(&cg->succ.succ_apps.cg_cond);
															
 
																-				PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																+				_STARPU_PTHREAD_COND_SIGNAL(&cg->succ.succ_apps.cg_cond);
															
 
																+				_STARPU_PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																 				break;
															
 
																 			}
															
 
																-			case STARPU_CG_TAG: {
															
 
																+			case STARPU_CG_TAG:
															
 
																+			{
															
 
																 				tag = cg->succ.tag;
															
 
																 				tag_successors = &tag->tag_successors;
															
 
																-	
															
 
																+
															
 
																 				tag_successors->ndeps_completed++;
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning FIXME: who locks this?
															
 
																-#endif
															
 
																+				/* Note: the tag is already locked by the
															
 
																+				 * caller. */
															
 
																 				if ((tag->state == STARPU_BLOCKED) &&
															
 
																-					(tag_successors->ndeps == tag_successors->ndeps_completed)) {
															
 
																+					(tag_successors->ndeps == tag_successors->ndeps_completed))
															
 
																+				{
															
 
																 					/* reset the counter so that we can reuse the completion group */
															
 
																 					tag_successors->ndeps_completed = 0;
															
 
																 					_starpu_tag_set_ready(tag);
															
@@ -123,21 +142,29 @@ void _starpu_notify_cg(starpu_cg_t *cg)
 
																 				break;
															
 
																 			}
															
 
																- 		        case STARPU_CG_TASK: {
															
 
																+ 		        case STARPU_CG_TASK:
															
 
																+			{
															
 
																 				j = cg->succ.job;
															
 
																+				_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																+
															
 
																 				job_successors = &j->job_successors;
															
 
																 				unsigned ndeps_completed =
															
 
																 					STARPU_ATOMIC_ADD(&job_successors->ndeps_completed, 1);
															
 
																-				if (job_successors->ndeps == ndeps_completed)
															
 
																+				/* Need to atomically test submitted and check
															
 
																+				 * dependencies, since this is concurrent with
															
 
																+				 * _starpu_submit_job */
															
 
																+				if (j->submitted && job_successors->ndeps == ndeps_completed)
															
 
																 				{
															
 
																 					/* Note that this also ensures that tag deps are
															
 
																 					 * fulfilled. This counter is reseted only when the
															
 
																 					 * dependencies are are all fulfilled) */
															
 
																-					_starpu_enforce_deps_and_schedule(j, 1);
															
 
																-				}
															
 
																+					_starpu_enforce_deps_and_schedule(j);
															
 
																+				} else
															
 
																+					_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+
															
 
																 				break;
															
 
																 			}
															
@@ -148,21 +175,33 @@ void _starpu_notify_cg(starpu_cg_t *cg)
 
																 	}
															
 
																 }
															
 
																-void _starpu_notify_cg_list(struct starpu_cg_list_s *successors)
															
 
																+/* Caller just has to promise that the list will not disappear.
															
 
																+ * _starpu_notify_cg_list protects the list itself.
															
 
																+ * No job lock should be held, since we might want to immediately call the callback of an empty task.
															
 
																+ */
															
 
																+void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
															
 
																 {
															
 
																-	unsigned nsuccs;
															
 
																 	unsigned succ;
															
 
																-	nsuccs = successors->nsuccs;
															
 
																-
															
 
																-	for (succ = 0; succ < nsuccs; succ++)
															
 
																+	_starpu_spin_lock(&successors->lock);
															
 
																+	successors->terminated = 1;
															
 
																+	/* Note: some thread might be concurrently adding other items */
															
 
																+	for (succ = 0; succ < successors->nsuccs; succ++)
															
 
																 	{
															
 
																-		struct starpu_cg_s *cg = successors->succ[succ];
															
 
																+		struct _starpu_cg *cg = successors->succ[succ];
															
 
																 		STARPU_ASSERT(cg);
															
 
																+		unsigned cg_type = cg->cg_type;
															
 
																-		struct starpu_tag_s *cgtag = NULL;
															
 
																+		if (cg_type == STARPU_CG_APPS)
															
 
																+		{
															
 
																+			/* Remove the temporary ref to the cg */
															
 
																+			memmove(&successors->succ[succ], &successors->succ[succ+1], (successors->nsuccs-(succ+1)) * sizeof(successors->succ[succ]));
															
 
																+			succ--;
															
 
																+			successors->nsuccs--;
															
 
																+		}
															
 
																+		_starpu_spin_unlock(&successors->lock);
															
 
																-		unsigned cg_type = cg->cg_type;
															
 
																+		struct _starpu_tag *cgtag = NULL;
															
 
																 		if (cg_type == STARPU_CG_TAG)
															
 
																 		{
															
@@ -171,43 +210,12 @@ void _starpu_notify_cg_list(struct starpu_cg_list_s *successors)
 
																 			_starpu_spin_lock(&cgtag->lock);
															
 
																 		}
															
 
																-		if (cg_type == STARPU_CG_TASK)
															
 
																-		{
															
 
																-			starpu_job_t j = cg->succ.job;
															
 
																-			PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-		}			
															
 
																-
															
 
																 		_starpu_notify_cg(cg);
															
 
																-		if (cg_type == STARPU_CG_TASK)
															
 
																-		{
															
 
																-			starpu_job_t j = cg->succ.job;
															
 
																-			
															
 
																-			/* In case this task was immediately terminated, since
															
 
																-			 * _starpu_notify_cg_list already hold the sync_mutex
															
 
																-			 * lock, it is its reponsability to destroy the task if
															
 
																-			 * needed. */
															
 
																-			unsigned must_destroy_task = 0;
															
 
																-			struct starpu_task *task = j->task;
															
 
																-
															
 
																-			if (j->submitted && (j->terminated > 0) && task->destroy && task->detach)
															
 
																-				must_destroy_task = 1;
															
 
																-
															
 
																-			PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																-
															
 
																-			if (must_destroy_task)
															
 
																-				starpu_task_destroy(task);
															
 
																-		}			
															
 
																-
															
 
																-		if (cg_type == STARPU_CG_APPS) {
															
 
																-			/* Remove the temporary ref to the cg */
															
 
																-			memmove(&successors->succ[succ], &successors->succ[succ+1], (nsuccs-(succ+1)) * sizeof(successors->succ[succ]));
															
 
																-			succ--;
															
 
																-			nsuccs--;
															
 
																-			successors->nsuccs--;
															
 
																-		}
															
 
																-
															
 
																 		if (cg_type == STARPU_CG_TAG)
															
 
																 			_starpu_spin_unlock(&cgtag->lock);
															
 
																+
															
 
																+		_starpu_spin_lock(&successors->lock);
															
 
																 	}
															
 
																+	_starpu_spin_unlock(&successors->lock);
															
 
																 }
															
--- a/src/core/dependencies/cg.h
+++ b/src/core/dependencies/cg.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -31,54 +31,74 @@
 
																 #define STARPU_NMAXDEPS	256
															
 
																 #endif
															
 
																-/* Completion Group list */
															
 
																-struct starpu_cg_list_s {
															
 
																-	unsigned nsuccs; /* how many successors ? */
															
 
																+struct _starpu_job;
															
 
																+
															
 
																+/* Completion Group list, records both the number of expected notifications
															
 
																+ * before the completion can start, and the list of successors when the
															
 
																+ * completion is finished. */
															
 
																+struct _starpu_cg_list
															
 
																+{
															
 
																+	/* Protects atomicity of the list and the terminated flag */
															
 
																+	struct _starpu_spinlock lock;
															
 
																+
															
 
																+	/* Number of notifications to be waited for */
															
 
																 	unsigned ndeps; /* how many deps ? */
															
 
																 	unsigned ndeps_completed; /* how many deps are done ? */
															
 
																+
															
 
																+	/* Whether the completion is finished. */
															
 
																+	unsigned terminated;
															
 
																+
															
 
																+	/* List of successors */
															
 
																+	unsigned nsuccs; /* how many successors ? */
															
 
																 #ifdef STARPU_DYNAMIC_DEPS_SIZE
															
 
																 	unsigned succ_list_size;
															
 
																-	struct starpu_cg_s **succ;
															
 
																+	struct _starpu_cg **succ;
															
 
																 #else
															
 
																-	struct starpu_cg_s *succ[STARPU_NMAXDEPS];
															
 
																+	struct _starpu_cg *succ[STARPU_NMAXDEPS];
															
 
																 #endif
															
 
																 };
															
 
																-#define STARPU_CG_APPS	(1<<0)
															
 
																-#define STARPU_CG_TAG	(1<<1)
															
 
																-#define STARPU_CG_TASK	(1<<2)
															
 
																+enum _starpu_cg_type
															
 
																+{
															
 
																+	STARPU_CG_APPS=(1<<0),
															
 
																+	STARPU_CG_TAG=(1<<1),
															
 
																+	STARPU_CG_TASK=(1<<2)
															
 
																+};
															
 
																 /* Completion Group */
															
 
																-typedef struct starpu_cg_s {
															
 
																+struct _starpu_cg
															
 
																+{
															
 
																 	unsigned ntags; /* number of tags depended on */
															
 
																 	unsigned remaining; /* number of remaining tags */
															
 
																-	unsigned cg_type; /* STARPU_CG_APPS or STARPU_CG_TAG or STARPU_CG_TASK */
															
 
																+	enum _starpu_cg_type cg_type;
															
 
																-	union {
															
 
																+	union
															
 
																+	{
															
 
																 		/* STARPU_CG_TAG */
															
 
																-		struct starpu_tag_s *tag;
															
 
																+		struct _starpu_tag *tag;
															
 
																 		/* STARPU_CG_TASK */
															
 
																-		struct starpu_job_s *job;
															
 
																+		struct _starpu_job *job;
															
 
																 		/* STARPU_CG_APPS */
															
 
																 		/* in case this completion group is related to an application,
															
 
																 		 * we have to explicitely wake the waiting thread instead of
															
 
																 		 * reschedule the corresponding task */
															
 
																-		struct {
															
 
																+		struct
															
 
																+		{
															
 
																 			unsigned completed;
															
 
																 			pthread_mutex_t cg_mutex;
															
 
																 			pthread_cond_t cg_cond;
															
 
																 		} succ_apps;
															
 
																 	} succ;
															
 
																-} starpu_cg_t;
															
 
																-
															
 
																-void _starpu_cg_list_init(struct starpu_cg_list_s *list);
															
 
																-void _starpu_cg_list_deinit(struct starpu_cg_list_s *list);
															
 
																-void _starpu_add_successor_to_cg_list(struct starpu_cg_list_s *successors, starpu_cg_t *cg);
															
 
																-void _starpu_notify_cg(starpu_cg_t *cg);
															
 
																-void _starpu_notify_cg_list(struct starpu_cg_list_s *successors);
															
 
																-void _starpu_notify_task_dependencies(struct starpu_job_s *j);
															
 
																+};
															
 
																+
															
 
																+void _starpu_cg_list_init(struct _starpu_cg_list *list);
															
 
																+void _starpu_cg_list_deinit(struct _starpu_cg_list *list);
															
 
																+int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg);
															
 
																+void _starpu_notify_cg(struct _starpu_cg *cg);
															
 
																+void _starpu_notify_cg_list(struct _starpu_cg_list *successors);
															
 
																+void _starpu_notify_task_dependencies(struct _starpu_job *j);
															
 
																 #endif // __CG_H__
															
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -31,44 +31,45 @@
 
																  */
															
 
																 /* the header lock must be taken by the caller */
															
 
																-static starpu_data_requester_t may_unlock_data_req_list_head(starpu_data_handle handle)
															
 
																+static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	starpu_data_requester_list_t req_list;
															
 
																+	struct _starpu_data_requester_list *req_list;
															
 
																 	if (handle->reduction_refcnt > 0)
															
 
																 	{
															
 
																 		req_list = handle->reduction_req_list;
															
 
																 	}
															
 
																-	else {
															
 
																-		if (starpu_data_requester_list_empty(handle->reduction_req_list))
															
 
																+	else
															
 
																+	{
															
 
																+		if (_starpu_data_requester_list_empty(handle->reduction_req_list))
															
 
																 			req_list = handle->req_list;
															
 
																 		else
															
 
																 			req_list = handle->reduction_req_list;
															
 
																 	}
															
 
																 	/* if there is no one to unlock ... */
															
 
																-	if (starpu_data_requester_list_empty(req_list))
															
 
																+	if (_starpu_data_requester_list_empty(req_list))
															
 
																 		return NULL;
															
 
																 	/* if there is no reference to the data anymore, we can use it */
															
 
																 	if (handle->refcnt == 0)
															
 
																-		return starpu_data_requester_list_pop_front(req_list);
															
 
																+		return _starpu_data_requester_list_pop_front(req_list);
															
 
																 	if (handle->current_mode == STARPU_W)
															
 
																 		return NULL;
															
 
																 	/* data->current_mode == STARPU_R, so we can process more readers */
															
 
																-	starpu_data_requester_t r = starpu_data_requester_list_front(req_list);
															
 
																+	struct _starpu_data_requester *r = _starpu_data_requester_list_front(req_list);
															
 
																-	starpu_access_mode r_mode = r->mode;
															
 
																+	enum starpu_access_mode r_mode = r->mode;
															
 
																 	if (r_mode == STARPU_RW)
															
 
																 		r_mode = STARPU_W;
															
 
																-	
															
 
																+
															
 
																 	/* If this is a STARPU_R, STARPU_SCRATCH or STARPU_REDUX type of
															
 
																 	 * access, we only proceed if the cuurrent mode is the same as the
															
 
																 	 * requested mode. */
															
 
																 	if (r_mode == handle->current_mode)
															
 
																-		return starpu_data_requester_list_pop_front(req_list);
															
 
																+		return _starpu_data_requester_list_pop_front(req_list);
															
 
																 	else
															
 
																 		return NULL;
															
 
																 }
															
@@ -78,9 +79,9 @@ static starpu_data_requester_t may_unlock_data_req_list_head(starpu_data_handle
 
																  * with the current mode, the request is put in the per-handle list of
															
 
																  * "requesters", and this function returns 1. */
															
 
																 static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_codelet,
															
 
																-					starpu_data_handle handle, starpu_access_mode mode,
															
 
																-					void (*callback)(void *), void *argcb,
															
 
																-					starpu_job_t j, unsigned buffer_index)
															
 
																+						       starpu_data_handle_t handle, enum starpu_access_mode mode,
															
 
																+						       void (*callback)(void *), void *argcb,
															
 
																+						       struct _starpu_job *j, unsigned buffer_index)
															
 
																 {
															
 
																 	if (mode == STARPU_RW)
															
 
																 		mode = STARPU_W;
															
@@ -93,7 +94,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
																 		while (_starpu_spin_trylock(&handle->header_lock))
															
 
																 			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		_starpu_spin_lock(&handle->header_lock);
															
 
																 	}
															
@@ -115,7 +117,7 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
																 	 * current one, we can proceed. */
															
 
																 	unsigned put_in_list = 1;
															
 
																-	starpu_access_mode previous_mode = handle->current_mode;
															
 
																+	enum starpu_access_mode previous_mode = handle->current_mode;
															
 
																 	if (!frozen && ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode))))
															
 
																 	{
															
@@ -125,13 +127,14 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
																 		if ((handle->reduction_refcnt == 0) && (previous_mode == STARPU_REDUX) && (mode != STARPU_REDUX))
															
 
																 		{
															
 
																-			starpu_data_end_reduction_mode(handle);
															
 
																+			_starpu_data_end_reduction_mode(handle);
															
 
																 			/* Since we need to perform a mode change, we freeze
															
 
																 			 * the request if needed. */
															
 
																 			put_in_list = (handle->reduction_refcnt > 0);
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			put_in_list = 0;
															
 
																 		}
															
 
																 	}
															
@@ -140,32 +143,35 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
																 	{
															
 
																 		/* there cannot be multiple writers or a new writer
															
 
																 		 * while the data is in read mode */
															
 
																-		
															
 
																+
															
 
																+		handle->busy_count++;
															
 
																 		/* enqueue the request */
															
 
																-		starpu_data_requester_t r = starpu_data_requester_new();
															
 
																-			r->mode = mode;
															
 
																-			r->is_requested_by_codelet = request_from_codelet;
															
 
																-			r->j = j;
															
 
																-			r->buffer_index = buffer_index;
															
 
																-			r->ready_data_callback = callback;
															
 
																-			r->argcb = argcb;
															
 
																+		struct _starpu_data_requester *r = _starpu_data_requester_new();
															
 
																+		r->mode = mode;
															
 
																+		r->is_requested_by_codelet = request_from_codelet;
															
 
																+		r->j = j;
															
 
																+		r->buffer_index = buffer_index;
															
 
																+		r->ready_data_callback = callback;
															
 
																+		r->argcb = argcb;
															
 
																 		/* We put the requester in a specific list if this is a reduction task */
															
 
																-		starpu_data_requester_list_t req_list =
															
 
																+		struct _starpu_data_requester_list *req_list =
															
 
																 			is_a_reduction_task?handle->reduction_req_list:handle->req_list;
															
 
																-		starpu_data_requester_list_push_back(req_list, r);
															
 
																+		_starpu_data_requester_list_push_back(req_list, r);
															
 
																 		/* failed */
															
 
																 		put_in_list = 1;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		handle->refcnt++;
															
 
																+		handle->busy_count++;
															
 
																 		handle->current_mode = mode;
															
 
																 		if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
															
 
																-			starpu_data_start_reduction_mode(handle);
															
 
																+			_starpu_data_start_reduction_mode(handle);
															
 
																 		/* success */
															
 
																 		put_in_list = 0;
															
@@ -176,32 +182,37 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
																 }
															
 
																-
															
 
																-unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle handle, starpu_access_mode mode,
															
 
																-						void (*callback)(void *), void *argcb)
															
 
																+unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_access_mode mode,
															
 
																+							  void (*callback)(void *), void *argcb)
															
 
																 {
															
 
																 	return _starpu_attempt_to_submit_data_request(0, handle, mode, callback, argcb, NULL, 0);
															
 
																 }
															
 
																-static unsigned attempt_to_submit_data_request_from_job(starpu_job_t j, unsigned buffer_index)
															
 
																+static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, unsigned buffer_index)
															
 
																 {
															
 
																-	/* Note that we do not access j->task->buffers, but j->ordered_buffers
															
 
																+	/* Note that we do not access j->task->handles, but j->ordered_buffers
															
 
																 	 * which is a sorted copy of it. */
															
 
																-	starpu_data_handle handle = j->ordered_buffers[buffer_index].handle;
															
 
																-	starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
															
 
																+	starpu_data_handle_t handle = j->ordered_buffers[buffer_index].handle;
															
 
																+	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
															
 
																 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
															
 
																-
															
 
																 }
															
 
																-static unsigned _submit_job_enforce_data_deps(starpu_job_t j, unsigned start_buffer_index)
															
 
																+static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned start_buffer_index)
															
 
																 {
															
 
																 	unsigned buf;
															
 
																 	unsigned nbuffers = j->task->cl->nbuffers;
															
 
																 	for (buf = start_buffer_index; buf < nbuffers; buf++)
															
 
																 	{
															
 
																-                if (attempt_to_submit_data_request_from_job(j, buf)) {
															
 
																+		if (buf && j->ordered_buffers[buf-1].handle == j->ordered_buffers[buf].handle)
															
 
																+			/* We have already requested this data, skip it. This
															
 
																+			 * depends on ordering putting writes before reads, see
															
 
																+			 * _starpu_compar_handles.  */
															
 
																+			continue;
															
 
																+
															
 
																+                if (attempt_to_submit_data_request_from_job(j, buf))
															
 
																+		{
															
 
																                         j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
															
 
																 			return 1;
															
 
																                 }
															
@@ -214,9 +225,9 @@ static unsigned _submit_job_enforce_data_deps(starpu_job_t j, unsigned start_buf
 
																    with concurrent data-access at the same time in the scheduling engine (eg.
															
 
																    there can be 2 tasks reading a piece of data, but there cannot be one
															
 
																    reading and another writing) */
															
 
																-unsigned _starpu_submit_job_enforce_data_deps(starpu_job_t j)
															
 
																+unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
															
 
																 {
															
 
																-	struct starpu_codelet_t *cl = j->task->cl;
															
 
																+	struct starpu_codelet *cl = j->task->cl;
															
 
																 	if ((cl == NULL) || (cl->nbuffers == 0))
															
 
																 		return 0;
															
@@ -224,41 +235,49 @@ unsigned _starpu_submit_job_enforce_data_deps(starpu_job_t j)
 
																 	/* Compute an ordered list of the different pieces of data so that we
															
 
																 	 * grab then according to a total order, thus avoiding a deadlock
															
 
																 	 * condition */
															
 
																-	memcpy(j->ordered_buffers, j->task->buffers, cl->nbuffers*sizeof(starpu_buffer_descr));
															
 
																+	unsigned i;
															
 
																+	for (i=0 ; i<cl->nbuffers ; i++)
															
 
																+	{
															
 
																+		j->ordered_buffers[i].handle = j->task->handles[i];
															
 
																+		j->ordered_buffers[i].mode = j->task->cl->modes[i];
															
 
																+	}
															
 
																+
															
 
																 	_starpu_sort_task_handles(j->ordered_buffers, cl->nbuffers);
															
 
																 	return _submit_job_enforce_data_deps(j, 0);
															
 
																 }
															
 
																-static unsigned unlock_one_requester(starpu_data_requester_t r)
															
 
																+static unsigned unlock_one_requester(struct _starpu_data_requester *r)
															
 
																 {
															
 
																-	starpu_job_t j = r->j;
															
 
																+	struct _starpu_job *j = r->j;
															
 
																 	unsigned nbuffers = j->task->cl->nbuffers;
															
 
																 	unsigned buffer_index = r->buffer_index;
															
 
																 	if (buffer_index + 1 < nbuffers)
															
 
																-	{
															
 
																 		/* not all buffers are protected yet */
															
 
																 		return _submit_job_enforce_data_deps(j, buffer_index + 1);
															
 
																-	}
															
 
																 	else
															
 
																 		return 0;
															
 
																 }
															
 
																 /* The header lock must already be taken by the caller */
															
 
																-void _starpu_notify_data_dependencies(starpu_data_handle handle)
															
 
																+void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
															
 
																 {
															
 
																 	/* A data access has finished so we remove a reference. */
															
 
																 	STARPU_ASSERT(handle->refcnt > 0);
															
 
																 	handle->refcnt--;
															
 
																+	STARPU_ASSERT(handle->busy_count > 0);
															
 
																+	handle->busy_count--;
															
 
																+	_starpu_data_check_not_busy(handle);
															
 
																 	/* The handle has been destroyed in between (eg. this was a temporary
															
 
																 	 * handle created for a reduction.) */
															
 
																 	if (handle->lazy_unregister && handle->refcnt == 0)
															
 
																 	{
															
 
																+		_starpu_spin_unlock(&handle->header_lock);
															
 
																 		starpu_data_unregister_no_coherency(handle);
															
 
																 		/* Warning: in case we unregister the handle, we must be sure
															
 
																-		 * that the application will not try to unlock the header after
															
 
																+		 * that the caller will not try to unlock the header after
															
 
																 		 * !*/
															
 
																 		return;
															
 
																 	}
															
@@ -270,28 +289,28 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 
																 		//fprintf(stderr, "NOTIFY REDUCTION TASK RED REFCNT %d\n", handle->reduction_refcnt);
															
 
																 		handle->reduction_refcnt--;
															
 
																 		if (handle->reduction_refcnt == 0)
															
 
																-			starpu_data_end_reduction_mode_terminate(handle);
															
 
																+			_starpu_data_end_reduction_mode_terminate(handle);
															
 
																 	}
															
 
																-
															
 
																-	starpu_data_requester_t r;
															
 
																+	struct _starpu_data_requester *r;
															
 
																 	while ((r = may_unlock_data_req_list_head(handle)))
															
 
																 	{
															
 
																 		/* STARPU_RW accesses are treated as STARPU_W */
															
 
																-		starpu_access_mode r_mode = r->mode;
															
 
																+		enum starpu_access_mode r_mode = r->mode;
															
 
																 		if (r_mode == STARPU_RW)
															
 
																 			r_mode = STARPU_W;
															
 
																 		int put_in_list = 1;
															
 
																 		if ((handle->reduction_refcnt == 0) && (handle->current_mode == STARPU_REDUX) && (r_mode != STARPU_REDUX))
															
 
																 		{
															
 
																-			starpu_data_end_reduction_mode(handle);
															
 
																+			_starpu_data_end_reduction_mode(handle);
															
 
																 			/* Since we need to perform a mode change, we freeze
															
 
																 			 * the request if needed. */
															
 
																 			put_in_list = (handle->reduction_refcnt > 0);
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			put_in_list = 0;
															
 
																 		}
															
@@ -299,14 +318,16 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 
																 		{
															
 
																 			/* We need to put the request back because we must
															
 
																 			 * perform a reduction before. */
															
 
																-			starpu_data_requester_list_push_front(handle->req_list, r);
															
 
																+			_starpu_data_requester_list_push_front(handle->req_list, r);
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			/* The data is now attributed to that request so we put a
															
 
																 			 * reference on it. */
															
 
																 			handle->refcnt++;
															
 
																-		
															
 
																-			starpu_access_mode previous_mode = handle->current_mode;
															
 
																+			handle->busy_count++;
															
 
																+
															
 
																+			enum starpu_access_mode previous_mode = handle->current_mode;
															
 
																 			handle->current_mode = r_mode;
															
 
																 			/* In case we enter in a reduction mode, we invalidate all per
															
@@ -314,14 +335,14 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 
																 			 * kept intact because we'll reduce a valid copy of the
															
 
																 			 * "per-node replicate" with the per-worker replicates .*/
															
 
																 			if ((r_mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
															
 
																-				starpu_data_start_reduction_mode(handle);
															
 
																+				_starpu_data_start_reduction_mode(handle);
															
 
																 			_starpu_spin_unlock(&handle->header_lock);
															
 
																 			if (r->is_requested_by_codelet)
															
 
																 			{
															
 
																 				if (!unlock_one_requester(r))
															
 
																-					_starpu_push_task(r->j, 0);
															
 
																+					_starpu_push_task(r->j);
															
 
																 			}
															
 
																 			else
															
 
																 			{
															
@@ -331,9 +352,12 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 
																 				r->ready_data_callback(r->argcb);
															
 
																 			}
															
 
																-			starpu_data_requester_delete(r);
															
 
																-			
															
 
																+			_starpu_data_requester_delete(r);
															
 
																+
															
 
																 			_starpu_spin_lock(&handle->header_lock);
															
 
																+			STARPU_ASSERT(handle->busy_count > 0);
															
 
																+			handle->busy_count--;
															
 
																+			_starpu_data_check_not_busy(handle);
															
 
																 		}
															
 
																 	}
															
 
																 }
															
--- a/src/core/dependencies/data_concurrency.h
+++ b/src/core/dependencies/data_concurrency.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -20,13 +20,13 @@
 
																 #include <core/jobs.h>
															
 
																-unsigned _starpu_submit_job_enforce_data_deps(starpu_job_t j);
															
 
																+unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j);
															
 
																-void _starpu_notify_data_dependencies(starpu_data_handle handle);
															
 
																+void _starpu_notify_data_dependencies(starpu_data_handle_t handle);
															
 
																-unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle handle,
															
 
																-		starpu_access_mode mode,
															
 
																-		void (*callback)(void *), void *argcb);
															
 
																+unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle,
															
 
																+							  enum starpu_access_mode mode,
															
 
																+							  void (*callback)(void *), void *argcb);
															
 
																 #endif // __DATA_CONCURRENCY_H__
															
--- a/src/core/dependencies/dependencies.c
+++ b/src/core/dependencies/dependencies.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -24,15 +24,15 @@
 
																 #include <core/sched_policy.h>
															
 
																 #include <core/dependencies/data_concurrency.h>
															
 
																-/* We assume that j->sync_mutex is taken by the caller */
															
 
																-void _starpu_notify_dependencies(struct starpu_job_s *j)
															
 
																+/* We assume that the job will not disappear under our hands */
															
 
																+void _starpu_notify_dependencies(struct _starpu_job *j)
															
 
																 {
															
 
																 	STARPU_ASSERT(j);
															
 
																 	STARPU_ASSERT(j->task);
															
 
																 	/* unlock tasks depending on that task */
															
 
																 	_starpu_notify_task_dependencies(j);
															
 
																-	
															
 
																+
															
 
																 	/* unlock tags depending on that task */
															
 
																 	if (j->task->use_tag)
															
 
																 		_starpu_notify_tag_dependencies(j->tag);
															
--- a/src/core/dependencies/htable.c
+++ b/src/core/dependencies/htable.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -18,31 +18,30 @@
 
																 #include <core/dependencies/htable.h>
															
 
																 #include <string.h>
															
 
																-void *_starpu_htbl_search_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag)
															
 
																+void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
															
 
																 {
															
 
																 	unsigned currentbit;
															
 
																-	starpu_htbl_node_t *current_htbl = htbl;
															
 
																+	struct _starpu_htbl_node *current_htbl = htbl;
															
 
																-	/* 000000000001111 with STARPU_HTBL_NODE_SIZE 1's */
															
 
																-	starpu_tag_t mask = (1<<STARPU_HTBL_NODE_SIZE)-1;
															
 
																+	/* 000000000001111 with _STARPU_HTBL_NODE_SIZE 1's */
															
 
																+	starpu_tag_t mask = (1<<_STARPU_HTBL_NODE_SIZE)-1;
															
 
																-	for(currentbit = 0; currentbit < STARPU_TAG_SIZE; currentbit+=STARPU_HTBL_NODE_SIZE)
															
 
																+	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
															
 
																 	{
															
 
																-	
															
 
																 	//	printf("search : current bit = %d \n", currentbit);
															
 
																 		if (STARPU_UNLIKELY(current_htbl == NULL))
															
 
																 			return NULL;
															
 
																-		/* 0000000000001111 
															
 
																+		/* 0000000000001111
															
 
																 		 *     | currentbit
															
 
																 		 * 0000111100000000 = offloaded_mask
															
 
																 		 *         |last_currentbit
															
 
																 		 * */
															
 
																-		unsigned last_currentbit = 
															
 
																-			STARPU_TAG_SIZE - (currentbit + STARPU_HTBL_NODE_SIZE);
															
 
																+		unsigned last_currentbit =
															
 
																+			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
															
 
																 		starpu_tag_t offloaded_mask = mask << last_currentbit;
															
 
																-		unsigned current_index = 
															
 
																+		unsigned current_index =
															
 
																 			(tag & (offloaded_mask)) >> (last_currentbit);
															
 
																 		current_htbl = current_htbl->children[current_index];
															
@@ -55,49 +54,48 @@ void *_starpu_htbl_search_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag)
 
																  * returns the previous value of the tag, or NULL else
															
 
																  */
															
 
																-void *_starpu_htbl_insert_tag(starpu_htbl_node_t **htbl, starpu_tag_t tag, void *entry)
															
 
																+void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag, void *entry)
															
 
																 {
															
 
																-
															
 
																 	unsigned currentbit;
															
 
																-	starpu_htbl_node_t **current_htbl_ptr = htbl;
															
 
																-	starpu_htbl_node_t *previous_htbl_ptr = NULL;
															
 
																+	struct _starpu_htbl_node **current_htbl_ptr = htbl;
															
 
																+	struct _starpu_htbl_node *previous_htbl_ptr = NULL;
															
 
																-	/* 000000000001111 with STARPU_HTBL_NODE_SIZE 1's */
															
 
																-	starpu_tag_t mask = (1<<STARPU_HTBL_NODE_SIZE)-1;
															
 
																+	/* 000000000001111 with _STARPU_HTBL_NODE_SIZE 1's */
															
 
																+	starpu_tag_t mask = (1<<_STARPU_HTBL_NODE_SIZE)-1;
															
 
																-	for(currentbit = 0; currentbit < STARPU_TAG_SIZE; currentbit+=STARPU_HTBL_NODE_SIZE)
															
 
																+	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
															
 
																 	{
															
 
																-		if (*current_htbl_ptr == NULL) {
															
 
																+		if (*current_htbl_ptr == NULL)
															
 
																+		{
															
 
																 			/* TODO pad to change that 1 into 16 ? */
															
 
																-			*current_htbl_ptr = (starpu_htbl_node_t *) calloc(1, sizeof(starpu_htbl_node_t));
															
 
																-			assert(*current_htbl_ptr);
															
 
																+			*current_htbl_ptr = (struct _starpu_htbl_node *) calloc(1, sizeof(struct _starpu_htbl_node));
															
 
																+			STARPU_ASSERT(*current_htbl_ptr);
															
 
																 			if (previous_htbl_ptr)
															
 
																 				previous_htbl_ptr->nentries++;
															
 
																 		}
															
 
																-		/* 0000000000001111 
															
 
																+		/* 0000000000001111
															
 
																 		 *     | currentbit
															
 
																 		 * 0000111100000000 = offloaded_mask
															
 
																 		 *         |last_currentbit
															
 
																 		 * */
															
 
																-		unsigned last_currentbit = 
															
 
																-			STARPU_TAG_SIZE - (currentbit + STARPU_HTBL_NODE_SIZE);
															
 
																+		unsigned last_currentbit =
															
 
																+			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
															
 
																 		starpu_tag_t offloaded_mask = mask << last_currentbit;
															
 
																-		unsigned current_index = 
															
 
																+		unsigned current_index =
															
 
																 			(tag & (offloaded_mask)) >> (last_currentbit);
															
 
																 		previous_htbl_ptr = *current_htbl_ptr;
															
 
																-		current_htbl_ptr = 
															
 
																+		current_htbl_ptr =
															
 
																 			&((*current_htbl_ptr)->children[current_index]);
															
 
																-
															
 
																 	}
															
 
																-	/* current_htbl either contains NULL or a previous entry 
															
 
																+	/* current_htbl either contains NULL or a previous entry
															
 
																 	 * we overwrite it anyway */
															
 
																 	void *old_entry = *current_htbl_ptr;
															
 
																-	*current_htbl_ptr = (starpu_htbl_node_t *) entry;
															
 
																+	*current_htbl_ptr = (struct _starpu_htbl_node *) entry;
															
 
																 	if (!old_entry)
															
 
																 		previous_htbl_ptr->nentries++;
															
@@ -106,43 +104,48 @@ void *_starpu_htbl_insert_tag(starpu_htbl_node_t **htbl, starpu_tag_t tag, void
 
																 }
															
 
																 /* returns the entry corresponding to the tag and remove it from the htbl */
															
 
																-void *_starpu_htbl_remove_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag)
															
 
																+void *_starpu_htbl_remove_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag)
															
 
																 {
															
 
																 	/* NB : if the entry is "NULL", we assume this means it is not present XXX */
															
 
																 	unsigned currentbit;
															
 
																-	starpu_htbl_node_t *current_htbl_ptr = htbl;
															
 
																+	struct _starpu_htbl_node **current_htbl_ptr_parent = htbl;
															
 
																+	struct _starpu_htbl_node *current_htbl_ptr = *current_htbl_ptr_parent;
															
 
																 	/* remember the path to the tag */
															
 
																-	starpu_htbl_node_t *path[(STARPU_TAG_SIZE + STARPU_HTBL_NODE_SIZE - 1)/(STARPU_HTBL_NODE_SIZE)];
															
 
																+	struct _starpu_htbl_node *path[(_STARPU_TAG_SIZE + _STARPU_HTBL_NODE_SIZE - 1)/(_STARPU_HTBL_NODE_SIZE)];
															
 
																+	struct _starpu_htbl_node **path_parent[(_STARPU_TAG_SIZE + _STARPU_HTBL_NODE_SIZE - 1)/(_STARPU_HTBL_NODE_SIZE)];
															
 
																-	/* 000000000001111 with STARPU_HTBL_NODE_SIZE 1's */
															
 
																-	starpu_tag_t mask = (1<<STARPU_HTBL_NODE_SIZE)-1;
															
 
																+	/* 000000000001111 with _STARPU_HTBL_NODE_SIZE 1's */
															
 
																+	starpu_tag_t mask = (1<<_STARPU_HTBL_NODE_SIZE)-1;
															
 
																 	int level, maxlevel;
															
 
																 	unsigned tag_is_present = 1;
															
 
																-	for(currentbit = 0, level = 0; currentbit < STARPU_TAG_SIZE; currentbit+=STARPU_HTBL_NODE_SIZE, level++)
															
 
																+	for(currentbit = 0, level = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE, level++)
															
 
																 	{
															
 
																+		path_parent[level] = current_htbl_ptr_parent;
															
 
																 		path[level] = current_htbl_ptr;
															
 
																-		if (STARPU_UNLIKELY(!current_htbl_ptr)) {
															
 
																+		if (STARPU_UNLIKELY(!current_htbl_ptr))
															
 
																+		{
															
 
																 			tag_is_present = 0;
															
 
																 			break;
															
 
																 		}
															
 
																-		/* 0000000000001111 
															
 
																+		/* 0000000000001111
															
 
																 		 *     | currentbit
															
 
																 		 * 0000111100000000 = offloaded_mask
															
 
																 		 *         |last_currentbit
															
 
																 		 * */
															
 
																-		unsigned last_currentbit = 
															
 
																-			STARPU_TAG_SIZE - (currentbit + STARPU_HTBL_NODE_SIZE);
															
 
																+		unsigned last_currentbit =
															
 
																+			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
															
 
																 		starpu_tag_t offloaded_mask = mask << last_currentbit;
															
 
																-		unsigned current_index = 
															
 
																+		unsigned current_index =
															
 
																 			(tag & (offloaded_mask)) >> (last_currentbit);
															
 
																-		
															
 
																-		current_htbl_ptr = 
															
 
																-			current_htbl_ptr->children[current_index];
															
 
																+
															
 
																+		current_htbl_ptr_parent = 
															
 
																+			&current_htbl_ptr->children[current_index];
															
 
																+		current_htbl_ptr = *current_htbl_ptr_parent;
															
 
																 	}
															
 
																 	maxlevel = level;
															
@@ -151,8 +154,9 @@ void *_starpu_htbl_remove_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag)
 
																 	void *old_entry = current_htbl_ptr;
															
 
																-	if (tag_is_present) {
															
 
																-		/* the tag was in the htbl, so we have to unroll the search 
															
 
																+	if (tag_is_present)
															
 
																+	{
															
 
																+		/* the tag was in the htbl, so we have to unroll the search
															
 
																  		 * to remove possibly useless htbl (internal) nodes */
															
 
																 		for (level = maxlevel - 1; level >= 0; level--)
															
 
																 		{
															
@@ -166,10 +170,28 @@ void *_starpu_htbl_remove_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag)
 
																 				break;
															
 
																 			/* we remove this node */
															
 
																-			free(path[level]);
															
 
																+			//free(path[level]);
															
 
																+			*(path_parent[level]) = NULL;
															
 
																 		}
															
 
																 	}
															
 
																 	/* we return the entry if there was one */
															
 
																 	return old_entry;
															
 
																 }
															
 
																+
															
 
																+void _starpu_htbl_clear_tags(struct _starpu_htbl_node **htbl, unsigned level, void (*free_entry)(void *))
															
 
																+{
															
 
																+	unsigned i;
															
 
																+	struct _starpu_htbl_node *tbl = *htbl;
															
 
																+
															
 
																+	if (!tbl)
															
 
																+		return;
															
 
																+
															
 
																+	if (level * _STARPU_HTBL_NODE_SIZE < _STARPU_TAG_SIZE) {
															
 
																+		for (i = 0; i < 1<<_STARPU_HTBL_NODE_SIZE; i++)
															
 
																+			_starpu_htbl_clear_tags(&tbl->children[i], level + 1, free_entry);
															
 
																+		free(tbl);
															
 
																+	} else
															
 
																+		free_entry(tbl);
															
 
																+	*htbl = NULL;
															
 
																+}
															
--- a/src/core/dependencies/htable.h
+++ b/src/core/dependencies/htable.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -28,16 +28,18 @@
 
																 #include <assert.h>
															
 
																 #include <core/dependencies/tags.h>
															
 
																-#define STARPU_HTBL_NODE_SIZE	16
															
 
																+#define _STARPU_HTBL_NODE_SIZE	16
															
 
																-typedef struct starpu_htbl_node_s {
															
 
																+struct _starpu_htbl_node
															
 
																+{
															
 
																 	unsigned nentries;
															
 
																-	struct starpu_htbl_node_s *children[1<<STARPU_HTBL_NODE_SIZE];
															
 
																-} starpu_htbl_node_t;
															
 
																+	struct _starpu_htbl_node *children[1<<_STARPU_HTBL_NODE_SIZE];
															
 
																+};
															
 
																-void *_starpu_htbl_search_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag);
															
 
																-void *_starpu_htbl_insert_tag(starpu_htbl_node_t **htbl, starpu_tag_t tag, void *entry);
															
 
																-void *_starpu_htbl_remove_tag(starpu_htbl_node_t *htbl, starpu_tag_t tag);
															
 
																+void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag);
															
 
																+void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag, void *entry);
															
 
																+void *_starpu_htbl_remove_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag);
															
 
																+void _starpu_htbl_clear_tags(struct _starpu_htbl_node **htbl, unsigned level, void (*free_entry)(void*));
															
 
																 #endif
															
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -28,21 +28,21 @@
 
																 #endif
															
 
																 /* Read after Write (RAW) or Read after Read (RAR) */
															
 
																-static void _starpu_add_reader_after_writer(starpu_data_handle handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																+static void _starpu_add_reader_after_writer(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																 {
															
 
																 	/* Add this task to the list of readers */
															
 
																-	struct starpu_task_wrapper_list *link = (struct starpu_task_wrapper_list *) malloc(sizeof(struct starpu_task_wrapper_list));
															
 
																+	struct _starpu_task_wrapper_list *link = (struct _starpu_task_wrapper_list *) malloc(sizeof(struct _starpu_task_wrapper_list));
															
 
																 	link->task = post_sync_task;
															
 
																 	link->next = handle->last_submitted_readers;
															
 
																 	handle->last_submitted_readers = link;
															
 
																 	/* This task depends on the previous writer if any */
															
 
																-	if (handle->last_submitted_writer)
															
 
																+	if (handle->last_submitted_writer && handle->last_submitted_writer != post_sync_task)
															
 
																 	{
															
 
																 		_STARPU_DEP_DEBUG("RAW %p\n", handle);
															
 
																 		struct starpu_task *task_array[1] = {handle->last_submitted_writer};
															
 
																 		_STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_submitted_writer, pre_sync_task);
															
 
																-		starpu_task_declare_deps_array(pre_sync_task, 1, task_array);
															
 
																+		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
															
 
																 	}
															
 
																         else
															
 
																         {
															
@@ -58,57 +58,67 @@ static void _starpu_add_reader_after_writer(starpu_data_handle handle, struct st
 
																 #endif
															
 
																 		handle->last_submitted_ghost_writer_id_is_valid)
															
 
																 	{
															
 
																-		starpu_job_t pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																-		STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, pre_sync_job->job_id);
															
 
																+		struct _starpu_job *pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																+		_STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, pre_sync_job->job_id);
															
 
																 		_starpu_bound_job_id_dep(pre_sync_job, handle->last_submitted_ghost_writer_id);
															
 
																 		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_writer_id, pre_sync_task);
															
 
																 	}
															
 
																+
															
 
																+	if (!pre_sync_task->cl)
															
 
																+		_starpu_get_job_associated_to_task(pre_sync_task)->implicit_dep_handle = handle;
															
 
																 }
															
 
																 /* Write after Read (WAR) */
															
 
																-static void _starpu_add_writer_after_readers(starpu_data_handle handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																+static void _starpu_add_writer_after_readers(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																 {
															
 
																 	/* Count the readers */
															
 
																 	unsigned nreaders = 0;
															
 
																-	struct starpu_task_wrapper_list *l;
															
 
																+	struct _starpu_task_wrapper_list *l;
															
 
																 	l = handle->last_submitted_readers;
															
 
																 	while (l)
															
 
																 	{
															
 
																-		nreaders++;
															
 
																+		if (l->task != post_sync_task)
															
 
																+			nreaders++;
															
 
																 		l = l->next;
															
 
																 	}
															
 
																 	_STARPU_DEP_DEBUG("%d readers\n", nreaders);
															
 
																-	/* Put all tasks in the list into task_array */
															
 
																-	struct starpu_task *task_array[nreaders];
															
 
																-	unsigned i = 0;
															
 
																-	l = handle->last_submitted_readers;
															
 
																-	while (l)
															
 
																+	if (nreaders > 0)
															
 
																 	{
															
 
																-		STARPU_ASSERT(l->task);
															
 
																-		task_array[i++] = l->task;
															
 
																-		_STARPU_DEP_DEBUG("dep %p -> %p\n", l->task, pre_sync_task);
															
 
																+		/* Put all tasks in the list into task_array */
															
 
																+		struct starpu_task *task_array[nreaders];
															
 
																+		unsigned i = 0;
															
 
																+		l = handle->last_submitted_readers;
															
 
																+		while (l)
															
 
																+		{
															
 
																+			STARPU_ASSERT(l->task);
															
 
																+			if (l->task != post_sync_task) {
															
 
																+				task_array[i++] = l->task;
															
 
																+				_STARPU_DEP_DEBUG("dep %p -> %p\n", l->task, pre_sync_task);
															
 
																+			}
															
 
																-		struct starpu_task_wrapper_list *prev = l;
															
 
																-		l = l->next;
															
 
																-		free(prev);
															
 
																+			struct _starpu_task_wrapper_list *prev = l;
															
 
																+			l = l->next;
															
 
																+			free(prev);
															
 
																+		}
															
 
																+		_starpu_task_declare_deps_array(pre_sync_task, nreaders, task_array, 0);
															
 
																 	}
															
 
																 #ifndef STARPU_USE_FXT
															
 
																 	if (_starpu_bound_recording)
															
 
																 #endif
															
 
																 	{
															
 
																 		/* Declare all dependencies with ghost readers */
															
 
																-		starpu_job_t pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																+		struct _starpu_job *pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																-		struct starpu_jobid_list *ghost_readers_id = handle->last_submitted_ghost_readers_id;
															
 
																+		struct _starpu_jobid_list *ghost_readers_id = handle->last_submitted_ghost_readers_id;
															
 
																 		while (ghost_readers_id)
															
 
																 		{
															
 
																 			unsigned long id = ghost_readers_id->id;
															
 
																-			STARPU_TRACE_GHOST_TASK_DEPS(id, pre_sync_job->job_id);
															
 
																+			_STARPU_TRACE_GHOST_TASK_DEPS(id, pre_sync_job->job_id);
															
 
																 			_starpu_bound_job_id_dep(pre_sync_job, id);
															
 
																 			_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", id, pre_sync_task);
															
 
																-			struct starpu_jobid_list *prev = ghost_readers_id;
															
 
																+			struct _starpu_jobid_list *prev = ghost_readers_id;
															
 
																 			ghost_readers_id = ghost_readers_id->next;
															
 
																 			free(prev);
															
 
																 		}
															
@@ -118,17 +128,19 @@ static void _starpu_add_writer_after_readers(starpu_data_handle handle, struct s
 
																 	handle->last_submitted_readers = NULL;
															
 
																 	handle->last_submitted_writer = post_sync_task;
															
 
																-	starpu_task_declare_deps_array(pre_sync_task, nreaders, task_array);
															
 
																+	if (!post_sync_task->cl)
															
 
																+		_starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle;
															
 
																 }
															
 
																+
															
 
																 /* Write after Write (WAW) */
															
 
																-static void _starpu_add_writer_after_writer(starpu_data_handle handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																+static void _starpu_add_writer_after_writer(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																 {
															
 
																 	/* (Read) Write */
															
 
																 	/* This task depends on the previous writer */
															
 
																-	if (handle->last_submitted_writer)
															
 
																+	if (handle->last_submitted_writer && handle->last_submitted_writer != post_sync_task)
															
 
																 	{
															
 
																 		struct starpu_task *task_array[1] = {handle->last_submitted_writer};
															
 
																-		starpu_task_declare_deps_array(pre_sync_task, 1, task_array);
															
 
																+		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
															
 
																 		_STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_submitted_writer, pre_sync_task);
															
 
																 	}
															
 
																         else
															
@@ -145,8 +157,8 @@ static void _starpu_add_writer_after_writer(starpu_data_handle handle, struct st
 
																 	{
															
 
																 		if (handle->last_submitted_ghost_writer_id_is_valid)
															
 
																 		{
															
 
																-			starpu_job_t pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																-			STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, pre_sync_job->job_id);
															
 
																+			struct _starpu_job *pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																+			_STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, pre_sync_job->job_id);
															
 
																 			_starpu_bound_job_id_dep(pre_sync_job, handle->last_submitted_ghost_writer_id);
															
 
																 			_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_writer_id, pre_sync_task);
															
 
																 			handle->last_submitted_ghost_writer_id_is_valid = 0;
															
@@ -158,21 +170,11 @@ static void _starpu_add_writer_after_writer(starpu_data_handle handle, struct st
 
																 	}
															
 
																 	handle->last_submitted_writer = post_sync_task;
															
 
																-}
															
 
																-static void disable_last_writer_callback(void *cl_arg)
															
 
																-{
															
 
																-	starpu_data_handle handle = (starpu_data_handle) cl_arg;
															
 
																-	
															
 
																-	/* NB: we don't take the handle->sequential_consistency_mutex mutex
															
 
																-	 * because the empty task that is used for synchronization is going to
															
 
																-	 * be unlock in the context of a call to
															
 
																-	 * _starpu_detect_implicit_data_deps_with_handle. It will therefore
															
 
																-	 * already have been locked. */
															
 
																-	handle->last_submitted_writer = NULL;
															
 
																+	if (!post_sync_task->cl)
															
 
																+		_starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle;
															
 
																 }
															
 
																-
															
 
																 /* This function adds the implicit task dependencies introduced by data
															
 
																  * sequential consistency. Two tasks are provided: pre_sync and post_sync which
															
 
																  * respectively indicates which task is going to depend on the previous deps
															
@@ -180,24 +182,26 @@ static void disable_last_writer_callback(void *cl_arg)
 
																  * introduced by a task submission, both tasks are just the submitted task, but
															
 
																  * in the case of user interactions with the DSM, these may be different tasks.
															
 
																  * */
															
 
																-/* NB : handle->sequential_consistency_mutex must be hold by the caller */
															
 
																-void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task,
															
 
																-						starpu_data_handle handle, starpu_access_mode mode)
															
 
																+/* NB : handle->sequential_consistency_mutex must be hold by the caller;
															
 
																+ * returns a task, to be submitted after releasing that mutex. */
															
 
																+struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task,
															
 
																+						   starpu_data_handle_t handle, enum starpu_access_mode mode)
															
 
																 {
															
 
																+	struct starpu_task *task = NULL;
															
 
																+
															
 
																 	STARPU_ASSERT(!(mode & STARPU_SCRATCH));
															
 
																         _STARPU_LOG_IN();
															
 
																 	if (handle->sequential_consistency)
															
 
																 	{
															
 
																-		starpu_job_t pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																-		starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task);
															
 
																+		struct _starpu_job *pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
															
 
																+		struct _starpu_job *post_sync_job = _starpu_get_job_associated_to_task(post_sync_task);
															
 
																 		/* Skip tasks that are associated to a reduction phase so that
															
 
																 		 * they do not interfere with the application. */
															
 
																 		if (pre_sync_job->reduction_task || post_sync_job->reduction_task)
															
 
																-			return;
															
 
																-	
															
 
																-	
															
 
																+			return NULL;
															
 
																+
															
 
																 		_STARPU_DEP_DEBUG("Tasks %p %p\n", pre_sync_task, post_sync_task);
															
 
																 		/* In case we are generating the DAG, we add an implicit
															
 
																 		 * dependency between the pre and the post sync tasks in case
															
@@ -208,12 +212,12 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
																 #endif
															
 
																 		)
															
 
																 		{
															
 
																-			STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job->job_id);
															
 
																+			_STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job->job_id);
															
 
																 			_starpu_bound_task_dep(post_sync_job, pre_sync_job);
															
 
																 		}
															
 
																-		starpu_access_mode previous_mode = handle->last_submitted_mode;
															
 
																-	
															
 
																+		enum starpu_access_mode previous_mode = handle->last_submitted_mode;
															
 
																+
															
 
																 		if (mode & STARPU_W)
															
 
																 		{
															
 
																 			_STARPU_DEP_DEBUG("W %p\n", handle);
															
@@ -222,17 +226,17 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
																 				_STARPU_DEP_DEBUG("WAW %p\n", handle);
															
 
																 				_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
															
 
																 			}
															
 
																-			else {
															
 
																+			else
															
 
																+			{
															
 
																 				/* The task submitted previously were in read-only
															
 
																 				 * mode: this task must depend on all those read-only
															
 
																 				 * tasks and we get rid of the list of readers */
															
 
																-			
															
 
																 				_STARPU_DEP_DEBUG("WAR %p\n", handle);
															
 
																 				_starpu_add_writer_after_readers(handle, pre_sync_task, post_sync_task);
															
 
																 			}
															
 
																-	
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			_STARPU_DEP_DEBUG("R %p %d -> %d\n", handle, previous_mode, mode);
															
 
																 			/* Add a reader, after a writer or a reader. */
															
 
																 			STARPU_ASSERT(pre_sync_task);
															
@@ -253,23 +257,20 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
																 				new_sync_task = starpu_task_create();
															
 
																 				STARPU_ASSERT(new_sync_task);
															
 
																 				new_sync_task->cl = NULL;
															
 
																-				new_sync_task->callback_func = disable_last_writer_callback;
															
 
																-				new_sync_task->callback_arg = handle;
															
 
																 #ifdef STARPU_USE_FXT
															
 
																 				_starpu_get_job_associated_to_task(new_sync_task)->model_name = "sync_task_redux";
															
 
																 #endif
															
 
																 				_starpu_add_writer_after_readers(handle, new_sync_task, new_sync_task);
															
 
																-				_starpu_task_submit_internal(new_sync_task);
															
 
																+				task = new_sync_task;
															
 
																 			}
															
 
																-	
															
 
																 			_starpu_add_reader_after_writer(handle, pre_sync_task, post_sync_task);
															
 
																 		}
															
 
																-	
															
 
																 		handle->last_submitted_mode = mode;
															
 
																 	}
															
 
																         _STARPU_LOG_OUT();
															
 
																+	return task;
															
 
																 }
															
 
																 /* Create the implicit dependencies for a newly submitted task */
															
@@ -280,7 +281,7 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
																 	/* We don't want to enforce a sequential consistency for tasks that are
															
 
																 	 * not visible to the application. */
															
 
																-	starpu_job_t j = _starpu_get_job_associated_to_task(task);
															
 
																+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																 	if (j->reduction_task)
															
 
																 		return;
															
@@ -289,16 +290,21 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
																 	unsigned buffer;
															
 
																 	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 	{
															
 
																-		starpu_data_handle handle = task->buffers[buffer].handle;
															
 
																-		starpu_access_mode mode = task->buffers[buffer].mode;
															
 
																+		starpu_data_handle_t handle = task->handles[buffer];
															
 
																+		enum starpu_access_mode mode = task->cl->modes[buffer];
															
 
																+		struct starpu_task *new_task;
															
 
																 		/* Scratch memory does not introduce any deps */
															
 
																 		if (mode & STARPU_SCRATCH)
															
 
																 			continue;
															
 
																-		PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																-		_starpu_detect_implicit_data_deps_with_handle(task, task, handle, mode);
															
 
																-		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																+		new_task = _starpu_detect_implicit_data_deps_with_handle(task, task, handle, mode);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+		if (new_task) {
															
 
																+			int ret = starpu_task_submit_internal(new_task);
															
 
																+			STARPU_ASSERT(!ret);
															
 
																+		}
															
 
																 	}
															
 
																         _STARPU_LOG_OUT();
															
 
																 }
															
@@ -311,9 +317,10 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
																  * sequence, f(Ar) g(Ar) h(Aw), we expect to have h depend on both f and g, but
															
 
																  * if h is submitted after the termination of f or g, StarPU will not create a
															
 
																  * dependency as this is not needed anymore. */
															
 
																-void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, starpu_data_handle handle)
															
 
																+/* the sequential_consistency_mutex of the handle has to be already held */
															
 
																+void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, starpu_data_handle_t handle)
															
 
																 {
															
 
																-	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																 	if (handle->sequential_consistency)
															
 
																 	{
															
@@ -323,19 +330,18 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 		if (task == handle->last_submitted_writer)
															
 
																 		{
															
 
																 			handle->last_submitted_writer = NULL;
															
 
																-			
															
 
																+
															
 
																 #ifndef STARPU_USE_FXT
															
 
																 			if (_starpu_bound_recording)
															
 
																 #endif
															
 
																 			{
															
 
																 				/* Save the previous writer as the ghost last writer */
															
 
																 				handle->last_submitted_ghost_writer_id_is_valid = 1;
															
 
																-				starpu_job_t ghost_job = _starpu_get_job_associated_to_task(task);
															
 
																+				struct _starpu_job *ghost_job = _starpu_get_job_associated_to_task(task);
															
 
																 				handle->last_submitted_ghost_writer_id = ghost_job->job_id;
															
 
																 			}
															
 
																-			
															
 
																 		}
															
 
																-		
															
 
																+
															
 
																 		/* XXX can a task be both the last writer associated to a data
															
 
																 		 * and be in its list of readers ? If not, we should not go
															
 
																 		 * through the entire list once we have detected it was the
															
@@ -343,12 +349,15 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 		/* Same if this is one of the readers: we go through the list
															
 
																 		 * of readers and remove the task if it is found. */
															
 
																-		struct starpu_task_wrapper_list *l;
															
 
																+		struct _starpu_task_wrapper_list *l;
															
 
																 		l = handle->last_submitted_readers;
															
 
																-		struct starpu_task_wrapper_list *prev = NULL;
															
 
																+		struct _starpu_task_wrapper_list *prev = NULL;
															
 
																+#ifdef STARPU_DEVEL
															
 
																+#warning TODO: use double-linked list to make finding ourself fast
															
 
																+#endif
															
 
																 		while (l)
															
 
																 		{
															
 
																-			struct starpu_task_wrapper_list *next = l->next;
															
 
																+			struct _starpu_task_wrapper_list *next = l->next;
															
 
																 			if (l->task == task)
															
 
																 			{
															
@@ -360,11 +369,11 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 #endif
															
 
																 				{
															
 
																 					/* Save the job id of the reader task in the ghost reader linked list list */
															
 
																-					starpu_job_t ghost_reader_job = _starpu_get_job_associated_to_task(task);
															
 
																-					struct starpu_jobid_list *link = (struct starpu_jobid_list *) malloc(sizeof(struct starpu_jobid_list));
															
 
																+					struct _starpu_job *ghost_reader_job = _starpu_get_job_associated_to_task(task);
															
 
																+					struct _starpu_jobid_list *link = (struct _starpu_jobid_list *) malloc(sizeof(struct _starpu_jobid_list));
															
 
																 					STARPU_ASSERT(link);
															
 
																 					link->next = handle->last_submitted_ghost_readers_id;
															
 
																-					link->id = ghost_reader_job->job_id; 
															
 
																+					link->id = ghost_reader_job->job_id;
															
 
																 					handle->last_submitted_ghost_readers_id = link;
															
 
																 				}
															
@@ -372,7 +381,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 				{
															
 
																 					prev->next = next;
															
 
																 				}
															
 
																-				else {
															
 
																+				else
															
 
																+				{
															
 
																 					/* This is the first element of the list */
															
 
																 					handle->last_submitted_readers = next;
															
 
																 				}
															
@@ -383,7 +393,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 				 * as soon as we find the task. TODO: check how
															
 
																 				 * duplicate dependencies are treated. */
															
 
																 			}
															
 
																-			else {
															
 
																+			else
															
 
																+			{
															
 
																 				prev = l;
															
 
																 			}
															
@@ -391,34 +402,34 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 		}
															
 
																 	}
															
 
																-	PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																 }
															
 
																-void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle handle)
															
 
																+void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle_t handle)
															
 
																 {
															
 
																         _STARPU_LOG_IN();
															
 
																-	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																 	if (handle->sequential_consistency)
															
 
																 	{
															
 
																 		handle->post_sync_tasks_cnt++;
															
 
																-		struct starpu_task_wrapper_list *link = (struct starpu_task_wrapper_list *) malloc(sizeof(struct starpu_task_wrapper_list));
															
 
																+		struct _starpu_task_wrapper_list *link = (struct _starpu_task_wrapper_list *) malloc(sizeof(struct _starpu_task_wrapper_list));
															
 
																 		link->task = post_sync_task;
															
 
																 		link->next = handle->post_sync_tasks;
															
 
																-		handle->post_sync_tasks = link;		
															
 
																+		handle->post_sync_tasks = link;
															
 
																 	}
															
 
																-	PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																         _STARPU_LOG_OUT();
															
 
																 }
															
 
																-void _starpu_unlock_post_sync_tasks(starpu_data_handle handle)
															
 
																+void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	struct starpu_task_wrapper_list *post_sync_tasks = NULL;
															
 
																+	struct _starpu_task_wrapper_list *post_sync_tasks = NULL;
															
 
																 	unsigned do_submit_tasks = 0;
															
 
																-	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																 	if (handle->sequential_consistency)
															
 
																 	{
															
@@ -431,36 +442,38 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle handle)
 
																 			post_sync_tasks = handle->post_sync_tasks;
															
 
																 			handle->post_sync_tasks = NULL;
															
 
																 		}
															
 
																-
															
 
																 	}
															
 
																-	PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																 	if (do_submit_tasks)
															
 
																 	{
															
 
																-		struct starpu_task_wrapper_list *link = post_sync_tasks;
															
 
																+		struct _starpu_task_wrapper_list *link = post_sync_tasks;
															
 
																-		while (link) {
															
 
																+		while (link)
															
 
																+		{
															
 
																 			/* There is no need to depend on that task now, since it was already unlocked */
															
 
																 			_starpu_release_data_enforce_sequential_consistency(link->task, handle);
															
 
																-			int ret = _starpu_task_submit_internal(link->task);
															
 
																+			int ret = starpu_task_submit_internal(link->task);
															
 
																 			STARPU_ASSERT(!ret);
															
 
																+			struct _starpu_task_wrapper_list *tmp = link;
															
 
																 			link = link->next;
															
 
																+			free(tmp);
															
 
																 		}
															
 
																 	}
															
 
																 }
															
 
																 /* If sequential consistency mode is enabled, this function blocks until the
															
 
																  * handle is available in the requested access mode. */
															
 
																-int _starpu_data_wait_until_available(starpu_data_handle handle, starpu_access_mode mode)
															
 
																+int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_access_mode mode)
															
 
																 {
															
 
																 	/* If sequential consistency is enabled, wait until data is available */
															
 
																-	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
															
 
																 	int sequential_consistency = handle->sequential_consistency;
															
 
																 	if (sequential_consistency)
															
 
																 	{
															
 
																-		struct starpu_task *sync_task;
															
 
																+		struct starpu_task *sync_task, *new_task;
															
 
																 		sync_task = starpu_task_create();
															
 
																 		sync_task->detach = 0;
															
 
																 		sync_task->destroy = 1;
															
@@ -470,16 +483,22 @@ int _starpu_data_wait_until_available(starpu_data_handle handle, starpu_access_m
 
																 		/* It is not really a RW access, but we want to make sure that
															
 
																 		 * all previous accesses are done */
															
 
																-		_starpu_detect_implicit_data_deps_with_handle(sync_task, sync_task, handle, mode);
															
 
																-		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+		new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, sync_task, handle, mode);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+
															
 
																+		if (new_task) {
															
 
																+			int ret = starpu_task_submit_internal(new_task);
															
 
																+			STARPU_ASSERT(!ret);
															
 
																+		}
															
 
																 		/* TODO detect if this is superflous */
															
 
																-		int ret = _starpu_task_submit_internal(sync_task);
															
 
																+		int ret = starpu_task_submit_internal(sync_task);
															
 
																 		STARPU_ASSERT(!ret);
															
 
																 		starpu_task_wait(sync_task);
															
 
																 	}
															
 
																-	else {
															
 
																-		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																+	else
															
 
																+	{
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
															
 
																 	}
															
 
																 	return 0;
															
--- a/src/core/dependencies/implicit_data_deps.h
+++ b/src/core/dependencies/implicit_data_deps.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -21,16 +21,16 @@
 
																 #include <starpu.h>
															
 
																 #include <common/config.h>
															
 
																-void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task,
															
 
																-						starpu_data_handle handle, starpu_access_mode mode);
															
 
																+struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task,
															
 
																+						   starpu_data_handle_t handle, enum starpu_access_mode mode);
															
 
																 void _starpu_detect_implicit_data_deps(struct starpu_task *task);
															
 
																-void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, starpu_data_handle handle);
															
 
																+void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, starpu_data_handle_t handle);
															
 
																-void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle handle);
															
 
																-void _starpu_unlock_post_sync_tasks(starpu_data_handle handle);
															
 
																+void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle_t handle);
															
 
																+void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle);
															
 
																 /* This function blocks until the handle is available in the requested mode */
															
 
																-int _starpu_data_wait_until_available(starpu_data_handle handle, starpu_access_mode mode);
															
 
																+int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_access_mode mode);
															
 
																 #endif // __IMPLICIT_DATA_DEPS_H__
															
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -25,12 +25,12 @@
 
																 #include <core/dependencies/data_concurrency.h>
															
 
																 #include <profiling/bound.h>
															
 
																-static starpu_htbl_node_t *tag_htbl = NULL;
															
 
																+static struct _starpu_htbl_node *tag_htbl = NULL;
															
 
																 static pthread_rwlock_t tag_global_rwlock = PTHREAD_RWLOCK_INITIALIZER;
															
 
																-static starpu_cg_t *create_cg_apps(unsigned ntags)
															
 
																+static struct _starpu_cg *create_cg_apps(unsigned ntags)
															
 
																 {
															
 
																-	starpu_cg_t *cg = (starpu_cg_t *) malloc(sizeof(starpu_cg_t));
															
 
																+	struct _starpu_cg *cg = (struct _starpu_cg *) malloc(sizeof(struct _starpu_cg));
															
 
																 	STARPU_ASSERT(cg);
															
 
																 	cg->ntags = ntags;
															
@@ -38,16 +38,15 @@ static starpu_cg_t *create_cg_apps(unsigned ntags)
 
																 	cg->cg_type = STARPU_CG_APPS;
															
 
																 	cg->succ.succ_apps.completed = 0;
															
 
																-	PTHREAD_MUTEX_INIT(&cg->succ.succ_apps.cg_mutex, NULL);
															
 
																-	PTHREAD_COND_INIT(&cg->succ.succ_apps.cg_cond, NULL);
															
 
																+	_STARPU_PTHREAD_MUTEX_INIT(&cg->succ.succ_apps.cg_mutex, NULL);
															
 
																+	_STARPU_PTHREAD_COND_INIT(&cg->succ.succ_apps.cg_cond, NULL);
															
 
																 	return cg;
															
 
																 }
															
 
																-
															
 
																-static starpu_cg_t *create_cg_tag(unsigned ntags, struct starpu_tag_s *tag)
															
 
																+static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
															
 
																 {
															
 
																-	starpu_cg_t *cg = (starpu_cg_t *) malloc(sizeof(starpu_cg_t));
															
 
																+	struct _starpu_cg *cg = (struct _starpu_cg *) malloc(sizeof(struct _starpu_cg));
															
 
																 	STARPU_ASSERT(cg);
															
 
																 	cg->ntags = ntags;
															
@@ -60,10 +59,10 @@ static starpu_cg_t *create_cg_tag(unsigned ntags, struct starpu_tag_s *tag)
 
																 	return cg;
															
 
																 }
															
 
																-static struct starpu_tag_s *_starpu_tag_init(starpu_tag_t id)
															
 
																+static struct _starpu_tag *_starpu_tag_init(starpu_tag_t id)
															
 
																 {
															
 
																-	struct starpu_tag_s *tag;
															
 
																-	tag = (struct starpu_tag_s *) malloc(sizeof(struct starpu_tag_s));
															
 
																+	struct _starpu_tag *tag;
															
 
																+	tag = (struct _starpu_tag *) malloc(sizeof(struct _starpu_tag));
															
 
																 	STARPU_ASSERT(tag);
															
 
																 	tag->job = NULL;
															
@@ -80,15 +79,9 @@ static struct starpu_tag_s *_starpu_tag_init(starpu_tag_t id)
 
																 	return tag;
															
 
																 }
															
 
																-void starpu_tag_remove(starpu_tag_t id)
															
 
																+static void _starpu_tag_free(void *_tag)
															
 
																 {
															
 
																-	struct starpu_tag_s *tag;
															
 
																-
															
 
																-	pthread_rwlock_wrlock(&tag_global_rwlock);
															
 
																-
															
 
																-	tag = (struct starpu_tag_s *) _starpu_htbl_remove_tag(tag_htbl, id);
															
 
																-
															
 
																-	pthread_rwlock_unlock(&tag_global_rwlock);
															
 
																+	struct _starpu_tag *tag = (struct _starpu_tag *) _tag;
															
 
																 	if (tag) {
															
 
																 		_starpu_spin_lock(&tag->lock);
															
@@ -98,7 +91,7 @@ void starpu_tag_remove(starpu_tag_t id)
 
																 		for (succ = 0; succ < nsuccs; succ++)
															
 
																 		{
															
 
																-			struct starpu_cg_s *cg = tag->tag_successors.succ[succ];
															
 
																+			struct _starpu_cg *cg = tag->tag_successors.succ[succ];
															
 
																 			unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1);
															
 
																 			unsigned remaining __attribute__ ((unused)) = STARPU_ATOMIC_ADD(&cg->remaining, -1);
															
@@ -113,20 +106,43 @@ void starpu_tag_remove(starpu_tag_t id)
 
																 #endif
															
 
																 		_starpu_spin_unlock(&tag->lock);
															
 
																+
															
 
																+		free(tag);
															
 
																 	}
															
 
																+}
															
 
																+
															
 
																+void starpu_tag_remove(starpu_tag_t id)
															
 
																+{
															
 
																+	struct _starpu_tag *tag;
															
 
																+
															
 
																+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
															
 
																+
															
 
																+	tag = (struct _starpu_tag *) _starpu_htbl_remove_tag(&tag_htbl, id);
															
 
																-	free(tag);
															
 
																+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock);
															
 
																+
															
 
																+	_starpu_tag_free(tag);
															
 
																+}
															
 
																+
															
 
																+void _starpu_tag_clear(void)
															
 
																+{
															
 
																+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
															
 
																+
															
 
																+	_starpu_htbl_clear_tags(&tag_htbl, 0, _starpu_tag_free);
															
 
																+
															
 
																+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock);
															
 
																 }
															
 
																-static struct starpu_tag_s *gettag_struct(starpu_tag_t id)
															
 
																+static struct _starpu_tag *gettag_struct(starpu_tag_t id)
															
 
																 {
															
 
																-	pthread_rwlock_wrlock(&tag_global_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock);
															
 
																 	/* search if the tag is already declared or not */
															
 
																-	struct starpu_tag_s *tag;
															
 
																-	tag = (struct starpu_tag_s *) _starpu_htbl_search_tag(tag_htbl, id);
															
 
																+	struct _starpu_tag *tag;
															
 
																+	tag = (struct _starpu_tag *) _starpu_htbl_search_tag(tag_htbl, id);
															
 
																-	if (tag == NULL) {
															
 
																+	if (tag == NULL)
															
 
																+	{
															
 
																 		/* the tag does not exist yet : create an entry */
															
 
																 		tag = _starpu_tag_init(id);
															
@@ -136,18 +152,18 @@ static struct starpu_tag_s *gettag_struct(starpu_tag_t id)
 
																 		STARPU_ASSERT(old == NULL);
															
 
																 	}
															
 
																-	pthread_rwlock_unlock(&tag_global_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock);
															
 
																 	return tag;
															
 
																 }
															
 
																 /* lock should be taken */
															
 
																-void _starpu_tag_set_ready(struct starpu_tag_s *tag)
															
 
																+void _starpu_tag_set_ready(struct _starpu_tag *tag)
															
 
																 {
															
 
																 	/* mark this tag as ready to run */
															
 
																 	tag->state = STARPU_READY;
															
 
																 	/* declare it to the scheduler ! */
															
 
																-	struct starpu_job_s *j = tag->job;
															
 
																+	struct _starpu_job *j = tag->job;
															
 
																 	/* In case the task job is going to be scheduled immediately, and if
															
 
																 	 * the task is "empty", calling _starpu_push_task would directly try to enforce
															
@@ -155,35 +171,37 @@ void _starpu_tag_set_ready(struct starpu_tag_s *tag)
 
																 	 * lock again, resulting in a deadlock. */
															
 
																 	_starpu_spin_unlock(&tag->lock);
															
 
																-	PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-
															
 
																 	/* enforce data dependencies */
															
 
																-	_starpu_enforce_deps_starting_from_task(j, 1);
															
 
																-
															
 
																-	PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+	_starpu_enforce_deps_starting_from_task(j);
															
 
																 	_starpu_spin_lock(&tag->lock);
															
 
																 }
															
 
																 /* the lock must be taken ! */
															
 
																-static void _starpu_tag_add_succ(struct starpu_tag_s *tag, starpu_cg_t *cg)
															
 
																+static void _starpu_tag_add_succ(struct _starpu_tag *tag, struct _starpu_cg *cg)
															
 
																 {
															
 
																 	STARPU_ASSERT(tag);
															
 
																 	_starpu_add_successor_to_cg_list(&tag->tag_successors, cg);
															
 
																-	if (tag->state == STARPU_DONE) {
															
 
																+	if (tag->state == STARPU_DONE)
															
 
																+	{
															
 
																 		/* the tag was already completed sooner */
															
 
																 		_starpu_notify_cg(cg);
															
 
																 	}
															
 
																 }
															
 
																-void _starpu_notify_tag_dependencies(struct starpu_tag_s *tag)
															
 
																+void _starpu_notify_tag_dependencies(struct _starpu_tag *tag)
															
 
																 {
															
 
																 	_starpu_spin_lock(&tag->lock);
															
 
																+	if (tag->state == STARPU_DONE) {
															
 
																+		_starpu_spin_unlock(&tag->lock);
															
 
																+		return;
															
 
																+	}
															
 
																+
															
 
																 	tag->state = STARPU_DONE;
															
 
																-	STARPU_TRACE_TAG_DONE(tag);
															
 
																+	_STARPU_TRACE_TAG_DONE(tag);
															
 
																 	_starpu_notify_cg_list(&tag->tag_successors);
															
@@ -192,20 +210,20 @@ void _starpu_notify_tag_dependencies(struct starpu_tag_s *tag)
 
																 void starpu_tag_notify_from_apps(starpu_tag_t id)
															
 
																 {
															
 
																-	struct starpu_tag_s *tag = gettag_struct(id);
															
 
																+	struct _starpu_tag *tag = gettag_struct(id);
															
 
																 	_starpu_notify_tag_dependencies(tag);
															
 
																 }
															
 
																-void _starpu_tag_declare(starpu_tag_t id, struct starpu_job_s *job)
															
 
																+void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
															
 
																 {
															
 
																-	STARPU_TRACE_TAG(id, job);
															
 
																+	_STARPU_TRACE_TAG(id, job);
															
 
																 	job->task->use_tag = 1;
															
 
																-	
															
 
																-	struct starpu_tag_s *tag= gettag_struct(id);
															
 
																+
															
 
																+	struct _starpu_tag *tag= gettag_struct(id);
															
 
																 	tag->job = job;
															
 
																 	tag->is_assigned = 1;
															
 
																-	
															
 
																+
															
 
																 	job->tag = tag;
															
 
																 	/* the tag is now associated to a job */
															
@@ -219,65 +237,65 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 
																 	unsigned i;
															
 
																 	/* create the associated completion group */
															
 
																-	struct starpu_tag_s *tag_child = gettag_struct(id);
															
 
																+	struct _starpu_tag *tag_child = gettag_struct(id);
															
 
																 	_starpu_spin_lock(&tag_child->lock);
															
 
																-
															
 
																-	starpu_cg_t *cg = create_cg_tag(ndeps, tag_child);
															
 
																+	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
															
 
																+	_starpu_spin_unlock(&tag_child->lock);
															
 
																 	STARPU_ASSERT(ndeps != 0);
															
 
																-	
															
 
																+
															
 
																 	for (i = 0; i < ndeps; i++)
															
 
																 	{
															
 
																 		starpu_tag_t dep_id = array[i];
															
 
																-		
															
 
																+
															
 
																 		/* id depends on dep_id
															
 
																 		 * so cg should be among dep_id's successors*/
															
 
																-		STARPU_TRACE_TAG_DEPS(id, dep_id);
															
 
																+		_STARPU_TRACE_TAG_DEPS(id, dep_id);
															
 
																 		_starpu_bound_tag_dep(id, dep_id);
															
 
																-		struct starpu_tag_s *tag_dep = gettag_struct(dep_id);
															
 
																+		struct _starpu_tag *tag_dep = gettag_struct(dep_id);
															
 
																 		STARPU_ASSERT(tag_dep != tag_child);
															
 
																 		_starpu_spin_lock(&tag_dep->lock);
															
 
																+		_starpu_spin_lock(&tag_child->lock);
															
 
																 		_starpu_tag_add_succ(tag_dep, cg);
															
 
																+		_starpu_spin_unlock(&tag_child->lock);
															
 
																 		_starpu_spin_unlock(&tag_dep->lock);
															
 
																 	}
															
 
																-
															
 
																-	_starpu_spin_unlock(&tag_child->lock);
															
 
																 }
															
 
																 void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
															
 
																 {
															
 
																 	unsigned i;
															
 
																-	
															
 
																+
															
 
																 	/* create the associated completion group */
															
 
																-	struct starpu_tag_s *tag_child = gettag_struct(id);
															
 
																+	struct _starpu_tag *tag_child = gettag_struct(id);
															
 
																 	_starpu_spin_lock(&tag_child->lock);
															
 
																-
															
 
																-	starpu_cg_t *cg = create_cg_tag(ndeps, tag_child);
															
 
																+	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
															
 
																+	_starpu_spin_unlock(&tag_child->lock);
															
 
																 	STARPU_ASSERT(ndeps != 0);
															
 
																-	
															
 
																+
															
 
																 	va_list pa;
															
 
																 	va_start(pa, ndeps);
															
 
																 	for (i = 0; i < ndeps; i++)
															
 
																 	{
															
 
																 		starpu_tag_t dep_id;
															
 
																 		dep_id = va_arg(pa, starpu_tag_t);
															
 
																-	
															
 
																+
															
 
																 		/* id depends on dep_id
															
 
																 		 * so cg should be among dep_id's successors*/
															
 
																-		STARPU_TRACE_TAG_DEPS(id, dep_id);
															
 
																+		_STARPU_TRACE_TAG_DEPS(id, dep_id);
															
 
																 		_starpu_bound_tag_dep(id, dep_id);
															
 
																-		struct starpu_tag_s *tag_dep = gettag_struct(dep_id);
															
 
																+		struct _starpu_tag *tag_dep = gettag_struct(dep_id);
															
 
																 		STARPU_ASSERT(tag_dep != tag_child);
															
 
																 		_starpu_spin_lock(&tag_dep->lock);
															
 
																+		_starpu_spin_lock(&tag_child->lock);
															
 
																 		_starpu_tag_add_succ(tag_dep, cg);
															
 
																+		_starpu_spin_unlock(&tag_child->lock);
															
 
																 		_starpu_spin_unlock(&tag_dep->lock);
															
 
																 	}
															
 
																 	va_end(pa);
															
 
																-
															
 
																-	_starpu_spin_unlock(&tag_child->lock);
															
 
																 }
															
 
																 /* this function may be called by the application (outside callbacks !) */
															
@@ -286,12 +304,13 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
																 	unsigned i;
															
 
																 	unsigned current;
															
 
																-	struct starpu_tag_s *tag_array[ntags];
															
 
																+	struct _starpu_tag *tag_array[ntags];
															
 
																 	_STARPU_LOG_IN();
															
 
																 	/* It is forbidden to block within callbacks or codelets */
															
 
																-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
															
 
																+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
															
 
																+	{
															
 
																 		_STARPU_LOG_OUT_TAG("edeadlk");
															
 
																 		return -EDEADLK;
															
 
																 	}
															
@@ -299,8 +318,8 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
																 	/* only wait the tags that are not done yet */
															
 
																 	for (i = 0, current = 0; i < ntags; i++)
															
 
																 	{
															
 
																-		struct starpu_tag_s *tag = gettag_struct(id[i]);
															
 
																-		
															
 
																+		struct _starpu_tag *tag = gettag_struct(id[i]);
															
 
																+
															
 
																 		_starpu_spin_lock(&tag->lock);
															
 
																 		if (tag->state == STARPU_DONE)
															
@@ -321,9 +340,9 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
																 		_STARPU_LOG_OUT_TAG("all deps are already fulfilled");
															
 
																 		return 0;
															
 
																 	}
															
 
																-	
															
 
																+
															
 
																 	/* there is at least one task that is not finished */
															
 
																-	starpu_cg_t *cg = create_cg_apps(current);
															
 
																+	struct _starpu_cg *cg = create_cg_apps(current);
															
 
																 	for (i = 0; i < current; i++)
															
 
																 	{
															
@@ -331,15 +350,15 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
																 		_starpu_spin_unlock(&tag_array[i]->lock);
															
 
																 	}
															
 
																-	PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																 	while (!cg->succ.succ_apps.completed)
															
 
																-		PTHREAD_COND_WAIT(&cg->succ.succ_apps.cg_cond, &cg->succ.succ_apps.cg_mutex);
															
 
																+		_STARPU_PTHREAD_COND_WAIT(&cg->succ.succ_apps.cg_cond, &cg->succ.succ_apps.cg_mutex);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex);
															
 
																-	PTHREAD_MUTEX_DESTROY(&cg->succ.succ_apps.cg_mutex);
															
 
																-	PTHREAD_COND_DESTROY(&cg->succ.succ_apps.cg_cond);
															
 
																+	_STARPU_PTHREAD_MUTEX_DESTROY(&cg->succ.succ_apps.cg_mutex);
															
 
																+	_STARPU_PTHREAD_COND_DESTROY(&cg->succ.succ_apps.cg_cond);
															
 
																 	free(cg);
															
--- a/src/core/dependencies/tags.h
+++ b/src/core/dependencies/tags.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -23,9 +23,10 @@
 
																 #include <common/starpu_spinlock.h>
															
 
																 #include <core/dependencies/cg.h>
															
 
																-#define STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
															
 
																+#define _STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
															
 
																-typedef enum {
															
 
																+enum _starpu_tag_state
															
 
																+{
															
 
																 	/* this tag is not declared by any task */
															
 
																 	STARPU_INVALID_STATE,
															
 
																 	/* _starpu_tag_declare was called to associate the tag to a task */
															
@@ -40,31 +41,34 @@ typedef enum {
 
																 //	STARPU_SCHEDULED,
															
 
																 	/* the task has been performed */
															
 
																 	STARPU_DONE
															
 
																-} starpu_tag_state;
															
 
																+};
															
 
																-struct starpu_job_s;
															
 
																+struct _starpu_job;
															
 
																-struct starpu_tag_s {
															
 
																-	starpu_spinlock_t lock;
															
 
																+struct _starpu_tag
															
 
																+{
															
 
																+	/* Lock for this structure. Locking order is in dependency order: a tag
															
 
																+	 * must not be locked before locking a tag it depends on */
															
 
																+	struct _starpu_spinlock lock;
															
 
																 	starpu_tag_t id; /* an identifier for the task */
															
 
																-	starpu_tag_state state;
															
 
																+	enum _starpu_tag_state state;
															
 
																-	struct starpu_cg_list_s tag_successors;
															
 
																+	struct _starpu_cg_list tag_successors;
															
 
																-	struct starpu_job_s *job; /* which job is associated to the tag if any ? */
															
 
																+	struct _starpu_job *job; /* which job is associated to the tag if any ? */
															
 
																 	unsigned is_assigned;
															
 
																 	unsigned is_submitted;
															
 
																 };
															
 
																-void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);
															
 
																+void _starpu_notify_dependencies(struct _starpu_job *j);
															
 
																+void _starpu_notify_tag_dependencies(struct _starpu_tag *tag);
															
 
																-void _starpu_notify_dependencies(struct starpu_job_s *j);
															
 
																-void _starpu_notify_tag_dependencies(struct starpu_tag_s *tag);
															
 
																+void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job);
															
 
																+void _starpu_tag_set_ready(struct _starpu_tag *tag);
															
 
																-void _starpu_tag_declare(starpu_tag_t id, struct starpu_job_s *job);
															
 
																-void _starpu_tag_set_ready(struct starpu_tag_s *tag);
															
 
																+unsigned _starpu_submit_job_enforce_task_deps(struct _starpu_job *j);
															
 
																-unsigned _starpu_submit_job_enforce_task_deps(struct starpu_job_s *j);
															
 
																+void _starpu_tag_clear(void);
															
 
																 #endif // __TAGS_H__
															
--- a/src/core/dependencies/task_deps.c
+++ b/src/core/dependencies/task_deps.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -26,9 +26,9 @@
 
																 #include <core/dependencies/data_concurrency.h>
															
 
																 #include <profiling/bound.h>
															
 
																-static starpu_cg_t *create_cg_task(unsigned ntags, starpu_job_t j)
															
 
																+static struct _starpu_cg *create_cg_task(unsigned ntags, struct _starpu_job *j)
															
 
																 {
															
 
																-	starpu_cg_t *cg = (starpu_cg_t *) malloc(sizeof(starpu_cg_t));
															
 
																+	struct _starpu_cg *cg = (struct _starpu_cg *) malloc(sizeof(struct _starpu_cg));
															
 
																 	STARPU_ASSERT(cg);
															
 
																 	cg->ntags = ntags;
															
@@ -41,55 +41,59 @@ static starpu_cg_t *create_cg_task(unsigned ntags, starpu_job_t j)
 
																 	return cg;
															
 
																 }
															
 
																-/* the job lock must be taken */
															
 
																-static void _starpu_task_add_succ(starpu_job_t j, starpu_cg_t *cg)
															
 
																+static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg)
															
 
																 {
															
 
																 	STARPU_ASSERT(j);
															
 
																-	_starpu_add_successor_to_cg_list(&j->job_successors, cg);
															
 
																-
															
 
																-	if (j->terminated) {
															
 
																+	if (_starpu_add_successor_to_cg_list(&j->job_successors, cg))
															
 
																 		/* the task was already completed sooner */
															
 
																 		_starpu_notify_cg(cg);
															
 
																-	}
															
 
																 }
															
 
																-void _starpu_notify_task_dependencies(starpu_job_t j)
															
 
																+void _starpu_notify_task_dependencies(struct _starpu_job *j)
															
 
																 {
															
 
																 	_starpu_notify_cg_list(&j->job_successors);
															
 
																 }
															
 
																 /* task depends on the tasks in task array */
															
 
																-void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
															
 
																+void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check)
															
 
																 {
															
 
																 	if (ndeps == 0)
															
 
																 		return;
															
 
																-	starpu_job_t job;
															
 
																+	struct _starpu_job *job;
															
 
																 	job = _starpu_get_job_associated_to_task(task);
															
 
																-	PTHREAD_MUTEX_LOCK(&job->sync_mutex);
															
 
																+	if (check)
															
 
																+		STARPU_ASSERT_MSG(!job->submitted || !task->destroy || task->detach, "Task dependencies have to be set before submission");
															
 
																+	else
															
 
																+		STARPU_ASSERT_MSG(job->terminated <= 1, "Task dependencies have to be set before termination");
															
 
																-	starpu_cg_t *cg = create_cg_task(ndeps, job);
															
 
																+	struct _starpu_cg *cg = create_cg_task(ndeps, job);
															
 
																 	unsigned i;
															
 
																 	for (i = 0; i < ndeps; i++)
															
 
																 	{
															
 
																 		struct starpu_task *dep_task = task_array[i];
															
 
																-		starpu_job_t dep_job;
															
 
																+		struct _starpu_job *dep_job;
															
 
																 		dep_job = _starpu_get_job_associated_to_task(dep_task);
															
 
																-		STARPU_ASSERT(dep_job != job);
															
 
																-		STARPU_TRACE_TASK_DEPS(dep_job, job);
															
 
																+		STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself.");
															
 
																+		if (check)
															
 
																+			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Task dependencies have to be set before submission");
															
 
																+		else
															
 
																+			STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination");
															
 
																+
															
 
																+		_STARPU_TRACE_TASK_DEPS(dep_job, job);
															
 
																 		_starpu_bound_task_dep(job, dep_job);
															
 
																-		PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
															
 
																 		_starpu_task_add_succ(dep_job, cg);
															
 
																-		PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex);
															
 
																 	}
															
 
																+}
															
 
																-	
															
 
																-	PTHREAD_MUTEX_UNLOCK(&job->sync_mutex);
															
 
																+void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
															
 
																+{
															
 
																+	_starpu_task_declare_deps_array(task, ndeps, task_array, 1);
															
 
																 }
															
--- a/src/core/errorcheck.c
+++ b/src/core/errorcheck.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -18,9 +18,9 @@
 
																 #include <core/errorcheck.h>
															
 
																 #include <core/workers.h>
															
 
																-void _starpu_set_local_worker_status(starpu_worker_status st)
															
 
																+void _starpu_set_local_worker_status(enum _starpu_worker_status st)
															
 
																 {
															
 
																-	struct starpu_worker_s *worker = _starpu_get_local_worker_key();
															
 
																+	struct _starpu_worker *worker = _starpu_get_local_worker_key();
															
 
																 	/* It is possible that we call this function from the application (and
															
 
																 	 * thereforce outside a worker), for instance if we are executing the
															
@@ -29,9 +29,9 @@ void _starpu_set_local_worker_status(starpu_worker_status st)
 
																 		worker->status = st;
															
 
																 }
															
 
																-starpu_worker_status _starpu_get_local_worker_status(void)
															
 
																+enum _starpu_worker_status _starpu_get_local_worker_status(void)
															
 
																 {
															
 
																-	struct starpu_worker_s *worker = _starpu_get_local_worker_key();
															
 
																+	struct _starpu_worker *worker = _starpu_get_local_worker_key();
															
 
																 	if (STARPU_UNLIKELY(!worker))
															
 
																 		return STATUS_INVALID;
															
@@ -42,7 +42,7 @@ starpu_worker_status _starpu_get_local_worker_status(void)
 
																  * execution of a task. */
															
 
																 unsigned _starpu_worker_may_perform_blocking_calls(void)
															
 
																 {
															
 
																-	starpu_worker_status st = _starpu_get_local_worker_status();
															
 
																+	enum _starpu_worker_status st = _starpu_get_local_worker_status();
															
 
																 	return ( !(st == STATUS_CALLBACK) && !(st == STATUS_EXECUTING));
															
 
																 }
															
--- a/src/core/errorcheck.h
+++ b/src/core/errorcheck.h
@@ -1,8 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																- * Copyright (C) 2011  INRIA
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -22,7 +21,8 @@
 
																 #include <starpu.h>
															
 
																 /* This type describes in which state a worker may be. */
															
 
																-typedef enum {
															
 
																+enum _starpu_worker_status
															
 
																+{
															
 
																 	/* invalid status (for instance if we request the status of some thread
															
 
																 	 * that is not controlled by StarPU */
															
 
																 	STATUS_INVALID,
															
@@ -35,20 +35,16 @@ typedef enum {
 
																 	/* during the execution of the callback */
															
 
																 	STATUS_CALLBACK,
															
 
																 	/* while sleeping because there is nothing to do */
															
 
																-	STATUS_SLEEPING,
															
 
																-	/* changing ctx because a new one was create */
															
 
																-	STATUS_CHANGING_CTX,
															
 
																-	/* after having done join */
															
 
																-	STATUS_JOINED
															
 
																-} starpu_worker_status;
															
 
																+	STATUS_SLEEPING
															
 
																+};
															
 
																 /* Specify what the local worker is currently doing (eg. executing a callback).
															
 
																  * This permits to detect if this is legal to do a blocking call for instance.
															
 
																  * */
															
 
																-void _starpu_set_local_worker_status(starpu_worker_status st);
															
 
																+void _starpu_set_local_worker_status(enum _starpu_worker_status st);
															
 
																 /* Indicate what type of operation the worker is currently doing. */
															
 
																-starpu_worker_status _starpu_get_local_worker_status(void);
															
 
																+enum _starpu_worker_status _starpu_get_local_worker_status(void);
															
 
																 /* It is forbidden to do blocking calls during some operations such as callback
															
 
																  * or during the execution of a task. This function indicates whether it is
															
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -1,8 +1,9 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																+ * Copyright (C) 2011  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -26,23 +27,28 @@
 
																 #include <profiling/profiling.h>
															
 
																 #include <profiling/bound.h>
															
 
																 #include <starpu_top.h>
															
 
																+#include <top/starpu_top_core.h>
															
 
																-size_t _starpu_job_get_data_size(starpu_job_t j)
															
 
																+size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j)
															
 
																 {
															
 
																-	size_t size = 0;
															
 
																-
															
 
																 	struct starpu_task *task = j->task;
															
 
																-	unsigned nbuffers = task->cl->nbuffers;
															
 
																-
															
 
																-	unsigned buffer;
															
 
																-	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																-	{
															
 
																-		starpu_data_handle handle = task->buffers[buffer].handle;
															
 
																-		size += _starpu_data_get_size(handle);
															
 
																+	if (model && model->per_arch[arch][nimpl].size_base) {
															
 
																+		return model->per_arch[arch][nimpl].size_base(task, arch, nimpl);
															
 
																+	} else if (model && model->size_base) {
															
 
																+		return model->size_base(task, nimpl);
															
 
																+	} else {
															
 
																+		unsigned nbuffers = task->cl->nbuffers;
															
 
																+		size_t size = 0;
															
 
																+
															
 
																+		unsigned buffer;
															
 
																+		for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																+		{
															
 
																+			starpu_data_handle_t handle = task->handles[buffer];
															
 
																+			size += _starpu_data_get_size(handle);
															
 
																+		}
															
 
																+		return size;
															
 
																 	}
															
 
																-
															
 
																-	return size;
															
 
																 }
															
 
																 /* we need to identify each task to generate the DAG. */
															
@@ -50,18 +56,18 @@ static unsigned job_cnt = 0;
 
																 void _starpu_exclude_task_from_dag(struct starpu_task *task)
															
 
																 {
															
 
																-	starpu_job_t j = _starpu_get_job_associated_to_task(task);
															
 
																+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																 	j->exclude_from_dag = 1;
															
 
																 }
															
 
																-/* create an internal starpu_job_t structure to encapsulate the task */
															
 
																-starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task)
															
 
																+/* create an internal struct _starpu_job structure to encapsulate the task */
															
 
																+struct _starpu_job* __attribute__((malloc)) _starpu_job_create(struct starpu_task *task)
															
 
																 {
															
 
																-	starpu_job_t job;
															
 
																+	struct _starpu_job *job;
															
 
																         _STARPU_LOG_IN();
															
 
																-	job = starpu_job_new();
															
 
																+	job = _starpu_job_new();
															
 
																 	job->nimpl =0; /* best implementation */
															
 
																 	job->task = task;
															
@@ -71,7 +77,7 @@ starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task
 
																 	job->terminated = 0;
															
 
																 #ifndef STARPU_USE_FXT
															
 
																-	if (_starpu_bound_recording || starpu_top_status_get())
															
 
																+	if (_starpu_bound_recording || _starpu_top_status_get())
															
 
																 #endif
															
 
																 		job->job_id = STARPU_ATOMIC_ADD(&job_cnt, 1);
															
 
																 #ifdef STARPU_USE_FXT
															
@@ -84,8 +90,10 @@ starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task
 
																 	_starpu_cg_list_init(&job->job_successors);
															
 
																-	PTHREAD_MUTEX_INIT(&job->sync_mutex, NULL);
															
 
																-	PTHREAD_COND_INIT(&job->sync_cond, NULL);
															
 
																+	job->implicit_dep_handle = NULL;
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_INIT(&job->sync_mutex, NULL);
															
 
																+	_STARPU_PTHREAD_COND_INIT(&job->sync_cond, NULL);
															
 
																 	job->bound_task = NULL;
															
@@ -99,29 +107,33 @@ starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task
 
																 	return job;
															
 
																 }
															
 
																-void _starpu_job_destroy(starpu_job_t j)
															
 
																+void _starpu_job_destroy(struct _starpu_job *j)
															
 
																 {
															
 
																-	PTHREAD_COND_DESTROY(&j->sync_cond);
															
 
																-	PTHREAD_MUTEX_DESTROY(&j->sync_mutex);
															
 
																+	/* Wait for any code that was still working on the job (and was
															
 
																+	 * probably our waker) */
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+	_STARPU_PTHREAD_COND_DESTROY(&j->sync_cond);
															
 
																+	_STARPU_PTHREAD_MUTEX_DESTROY(&j->sync_mutex);
															
 
																 	if (j->task_size > 1)
															
 
																 	{
															
 
																-		PTHREAD_BARRIER_DESTROY(&j->before_work_barrier);
															
 
																-		PTHREAD_BARRIER_DESTROY(&j->after_work_barrier);
															
 
																+		_STARPU_PTHREAD_BARRIER_DESTROY(&j->before_work_barrier);
															
 
																+		_STARPU_PTHREAD_BARRIER_DESTROY(&j->after_work_barrier);
															
 
																 	}
															
 
																 	_starpu_cg_list_deinit(&j->job_successors);
															
 
																-	starpu_job_delete(j);
															
 
																+	_starpu_job_delete(j);
															
 
																 }
															
 
																-void _starpu_wait_job(starpu_job_t j)
															
 
																+void _starpu_wait_job(struct _starpu_job *j)
															
 
																 {
															
 
																 	STARPU_ASSERT(j->task);
															
 
																 	STARPU_ASSERT(!j->task->detach);
															
 
																         _STARPU_LOG_IN();
															
 
																-	PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																 	/* We wait for the flag to have a value of 2 which means that both the
															
 
																 	 * codelet's implementation and its callback have been executed. That
															
@@ -129,25 +141,20 @@ void _starpu_wait_job(starpu_job_t j)
 
																 	 * executed (so that we cannot destroy the task while it is still being
															
 
																 	 * manipulated by the driver). */
															
 
																 	while (j->terminated != 2)
															
 
																-		PTHREAD_COND_WAIT(&j->sync_cond, &j->sync_mutex);
															
 
																+		_STARPU_PTHREAD_COND_WAIT(&j->sync_cond, &j->sync_mutex);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																         _STARPU_LOG_OUT();
															
 
																 }
															
 
																-void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_locked, int workerid)
															
 
																+void _starpu_handle_job_termination(struct _starpu_job *j, int workerid)
															
 
																 {
															
 
																 	struct starpu_task *task = j->task;
															
 
																 	unsigned sched_ctx = task->sched_ctx;
															
 
																-	
															
 
																-	if (!job_is_already_locked)
															
 
																-		PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-	task->status = STARPU_TASK_FINISHED;
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-	/* in case there are dependencies, wake up the proper tasks */
															
 
																-	j->submitted = 0;
															
 
																-	_starpu_notify_dependencies(j);
															
 
																+	task->status = STARPU_TASK_FINISHED;
															
 
																 	/* We must have set the j->terminated flag early, so that it is
															
 
																 	 * possible to express task dependencies within the callback
															
@@ -155,46 +162,54 @@ void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_lock
 
																 	 * the callback is not done yet. */
															
 
																 	j->terminated = 1;
															
 
																-	if (!job_is_already_locked)
															
 
																-		PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+
															
 
																+	/* Task does not have a cl, but has explicit data dependencies, we need
															
 
																+	 * to tell them that we will not exist any more before notifying the
															
 
																+	 * tasks waiting for us */
															
 
																+	if (j->implicit_dep_handle)
															
 
																+		_starpu_release_data_enforce_sequential_consistency(j->task, j->implicit_dep_handle);
															
 
																-	/* the callback is executed after the dependencies so that we may remove the tag 
															
 
																+	/* in case there are dependencies, wake up the proper tasks */
															
 
																+	_starpu_notify_dependencies(j);
															
 
																+
															
 
																+	/* the callback is executed after the dependencies so that we may remove the tag
															
 
																  	 * of the task itself */
															
 
																 	if (task->callback_func)
															
 
																 	{
															
 
																 		int profiling = starpu_profiling_status_get();
															
 
																 		if (profiling && task->profiling_info)
															
 
																-			starpu_clock_gettime(&task->profiling_info->callback_start_time);
															
 
																+			_starpu_clock_gettime(&task->profiling_info->callback_start_time);
															
 
																 		/* so that we can check whether we are doing blocking calls
															
 
																 		 * within the callback */
															
 
																 		_starpu_set_local_worker_status(STATUS_CALLBACK);
															
 
																-		
															
 
																-		
															
 
																+
															
 
																+
															
 
																 		/* Perhaps we have nested callbacks (eg. with chains of empty
															
 
																 		 * tasks). So we store the current task and we will restore it
															
 
																 		 * later. */
															
 
																-		struct starpu_task *current_task = starpu_get_current_task();
															
 
																+		struct starpu_task *current_task = starpu_task_get_current();
															
 
																 		_starpu_set_current_task(task);
															
 
																-		STARPU_TRACE_START_CALLBACK(j);
															
 
																+		_STARPU_TRACE_START_CALLBACK(j);
															
 
																 		task->callback_func(task->callback_arg);
															
 
																-		STARPU_TRACE_END_CALLBACK(j);
															
 
																-		
															
 
																+		_STARPU_TRACE_END_CALLBACK(j);
															
 
																+
															
 
																 		_starpu_set_current_task(current_task);
															
 
																 		_starpu_set_local_worker_status(STATUS_UNKNOWN);
															
 
																 		if (profiling && task->profiling_info)
															
 
																-			starpu_clock_gettime(&task->profiling_info->callback_end_time);
															
 
																+			_starpu_clock_gettime(&task->profiling_info->callback_end_time);
															
 
																 	}
															
 
																 	/* control task should not execute post_exec_hook */
															
 
																 	if(task->cl != NULL && !task->control_task)
															
 
																 	  _starpu_sched_post_exec_hook(task);
															
 
																-	STARPU_TRACE_TASK_DONE(j);
															
 
																+	_STARPU_TRACE_TASK_DONE(j);
															
 
																 	/* NB: we do not save those values before the callback, in case the
															
 
																 	 * application changes some parameters eventually (eg. a task may not
															
@@ -203,27 +218,24 @@ void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_lock
 
																 	int detach = task->detach;
															
 
																 	int regenerate = task->regenerate;
															
 
																-	if (!detach)
															
 
																+	/* we do not desallocate the job structure if some is going to
															
 
																+	 * wait after the task */
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																+	/* A value of 2 is put to specify that not only the codelet but
															
 
																+	 * also the callback were executed. */
															
 
																+	j->terminated = 2;
															
 
																+	_STARPU_PTHREAD_COND_BROADCAST(&j->sync_cond);
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+
															
 
																+	if (detach)
															
 
																 	{
															
 
																-		/* we do not desallocate the job structure if some is going to
															
 
																-		 * wait after the task */
															
 
																-		if (!job_is_already_locked)
															
 
																-			PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-		/* A value of 2 is put to specify that not only the codelet but
															
 
																-		 * also the callback were executed. */
															
 
																-		j->terminated = 2;
															
 
																-		PTHREAD_COND_BROADCAST(&j->sync_cond);
															
 
																-
															
 
																-		if (!job_is_already_locked)
															
 
																-			PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																-	}
															
 
																-	else {
															
 
																 		/* no one is going to synchronize with that task so we release
															
 
																 		 * the data structures now. In case the job was already locked
															
 
																 		 * by the caller, it is its responsability to destroy the task.
															
 
																 		 * */
															
 
																-		if (!job_is_already_locked && destroy)
															
 
																-			starpu_task_destroy(task);
															
 
																+		if (destroy)
															
 
																+			_starpu_task_destroy(task);
															
 
																 	}
															
 
																 	if (regenerate)
															
@@ -233,10 +245,9 @@ void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_lock
 
																 		/* We reuse the same job structure */
															
 
																 		int ret = _starpu_submit_job(j, 1);
															
 
																 		STARPU_ASSERT(!ret);
															
 
																-	}	
															
 
																-	else {
															
 
																-		_starpu_decrement_nsubmitted_tasks();
															
 
																 	}
															
 
																+	_starpu_decrement_nsubmitted_tasks();
															
 
																+	_starpu_decrement_nready_tasks();
															
 
																 	_starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx);
															
@@ -244,9 +255,9 @@ void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_lock
 
																 		_starpu_decrement_nsubmitted_tasks_of_worker(workerid);
															
 
																 }
															
 
																-/* This function is called when a new task is submitted to StarPU 
															
 
																+/* This function is called when a new task is submitted to StarPU
															
 
																  * it returns 1 if the tag deps are not fulfilled, 0 otherwise */
															
 
																-static unsigned _starpu_not_all_tag_deps_are_fulfilled(starpu_job_t j)
															
 
																+static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
															
 
																 {
															
 
																 	unsigned ret;
															
@@ -256,9 +267,9 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(starpu_job_t j)
 
																 		return 0;
															
 
																 	}
															
 
																-	struct starpu_tag_s *tag = j->tag;
															
 
																+	struct _starpu_tag *tag = j->tag;
															
 
																-	struct starpu_cg_list_s *tag_successors = &tag->tag_successors;
															
 
																+	struct _starpu_cg_list *tag_successors = &tag->tag_successors;
															
 
																 	_starpu_spin_lock(&tag->lock);
															
@@ -268,7 +279,8 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(starpu_job_t j)
 
																                 j->task->status = STARPU_TASK_BLOCKED_ON_TAG;
															
 
																 		ret = 1;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* existing deps (if any) are fulfilled */
															
 
																 		tag->state = STARPU_READY;
															
 
																 		/* already prepare for next run */
															
@@ -280,97 +292,95 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(starpu_job_t j)
 
																 	return ret;
															
 
																 }
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning TODO remove the job_is_already_locked parameter
															
 
																-#endif
															
 
																-static unsigned _starpu_not_all_task_deps_are_fulfilled(starpu_job_t j, unsigned job_is_already_locked)
															
 
																+static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j)
															
 
																 {
															
 
																 	unsigned ret;
															
 
																-	struct starpu_cg_list_s *job_successors = &j->job_successors;
															
 
																-
															
 
																-	if (!job_is_already_locked)
															
 
																-		PTHREAD_MUTEX_LOCK(&j->sync_mutex);	
															
 
																+	struct _starpu_cg_list *job_successors = &j->job_successors;
															
 
																 	if (!j->submitted || (job_successors->ndeps != job_successors->ndeps_completed))
															
 
																 	{
															
 
																                 j->task->status = STARPU_TASK_BLOCKED_ON_TASK;
															
 
																 		ret = 1;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* existing deps (if any) are fulfilled */
															
 
																 		/* already prepare for next run */
															
 
																 		job_successors->ndeps_completed = 0;
															
 
																 		ret = 0;
															
 
																 	}
															
 
																-	if (!job_is_already_locked)
															
 
																-		PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																-
															
 
																 	return ret;
															
 
																 }
															
 
																-
															
 
																-
															
 
																 /*
															
 
																  *	In order, we enforce tag, task and data dependencies. The task is
															
 
																  *	passed to the scheduler only once all these constraints are fulfilled.
															
 
																+ *
															
 
																+ *	The job mutex has to be taken for atomicity with task submission, and
															
 
																+ *	is released here.
															
 
																  */
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning TODO remove the job_is_already_locked parameter
															
 
																-#endif
															
 
																-unsigned _starpu_enforce_deps_and_schedule(starpu_job_t j, unsigned job_is_already_locked)
															
 
																+unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j)
															
 
																 {
															
 
																 	unsigned ret;
															
 
																         _STARPU_LOG_IN();
															
 
																 	/* enfore tag dependencies */
															
 
																-	if (_starpu_not_all_tag_deps_are_fulfilled(j)) {
															
 
																+	if (_starpu_not_all_tag_deps_are_fulfilled(j))
															
 
																+	{
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																                 _STARPU_LOG_OUT_TAG("not_all_tag_deps_are_fulfilled");
															
 
																 		return 0;
															
 
																         }
															
 
																 	/* enfore task dependencies */
															
 
																-	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked)) {
															
 
																+	if (_starpu_not_all_task_deps_are_fulfilled(j))
															
 
																+	{
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																                 _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled");
															
 
																 		return 0;
															
 
																         }
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																 	/* enforce data dependencies */
															
 
																-	if (_starpu_submit_job_enforce_data_deps(j)) {
															
 
																+	if (_starpu_submit_job_enforce_data_deps(j))
															
 
																+	{
															
 
																                 _STARPU_LOG_OUT_TAG("enforce_data_deps");
															
 
																 		return 0;
															
 
																         }
															
 
																-	ret = _starpu_push_task(j, job_is_already_locked);
															
 
																+	ret = _starpu_push_task(j);
															
 
																         _STARPU_LOG_OUT();
															
 
																 	return ret;
															
 
																 }
															
 
																 /* Tag deps are already fulfilled */
															
 
																-#ifdef STARPU_DEVEL
															
 
																-#warning TODO remove the job_is_already_locked parameter
															
 
																-#endif
															
 
																-unsigned _starpu_enforce_deps_starting_from_task(starpu_job_t j, unsigned job_is_already_locked)
															
 
																+unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j)
															
 
																 {
															
 
																 	unsigned ret;
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																 	/* enfore task dependencies */
															
 
																-	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked))
															
 
																+	if (_starpu_not_all_task_deps_are_fulfilled(j))
															
 
																+	{
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																 		return 0;
															
 
																+	}
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																 	/* enforce data dependencies */
															
 
																 	if (_starpu_submit_job_enforce_data_deps(j))
															
 
																 		return 0;
															
 
																-	ret = _starpu_push_task(j, job_is_already_locked);
															
 
																+	ret = _starpu_push_task(j);
															
 
																 	return ret;
															
 
																 }
															
 
																 /* This function must be called with worker->sched_mutex taken */
															
 
																-struct starpu_task *_starpu_pop_local_task(struct starpu_worker_s *worker)
															
 
																+struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker)
															
 
																 {
															
 
																 	struct starpu_task *task = NULL;
															
@@ -380,27 +390,27 @@ struct starpu_task *_starpu_pop_local_task(struct starpu_worker_s *worker)
 
																 	return task;
															
 
																 }
															
 
																-int _starpu_push_local_task(struct starpu_worker_s *worker, struct starpu_task *task, int back)
															
 
																+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back)
															
 
																 {
															
 
																 	/* Check that the worker is able to execute the task ! */
															
 
																 	STARPU_ASSERT(task && task->cl);
															
 
																 	if (STARPU_UNLIKELY(!(worker->worker_mask & task->cl->where)))
															
 
																 		return -ENODEV;
															
 
																-	PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(worker->sched_mutex);
															
 
																 	if (back)
															
 
																 		starpu_task_list_push_back(&worker->local_tasks, task);
															
 
																 	else
															
 
																 		starpu_task_list_push_front(&worker->local_tasks, task);
															
 
																-	PTHREAD_COND_BROADCAST(&worker->sched_cond);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
															
 
																+	_STARPU_PTHREAD_COND_BROADCAST(worker->sched_cond);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(worker->sched_mutex);
															
 
																 	return 0;
															
 
																 }
															
 
																-const char *_starpu_get_model_name(starpu_job_t j)
															
 
																+const char *_starpu_get_model_name(struct _starpu_job *j)
															
 
																 {
															
 
																 	if (!j)
															
 
																 		return NULL;
															
@@ -410,7 +420,8 @@ const char *_starpu_get_model_name(starpu_job_t j)
 
																             && task->cl->model
															
 
																             && task->cl->model->symbol)
															
 
																                 return task->cl->model->symbol;
															
 
																-        else {
															
 
																+        else
															
 
																+	{
															
 
																 #ifdef STARPU_USE_FXT
															
 
																                 return j->model_name;
															
 
																 #else
															
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -1,8 +1,9 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																+ * Copyright (C) 2011  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -42,20 +43,19 @@
 
																 #include <cuda.h>
															
 
																 #endif
															
 
																-struct starpu_worker_s;
															
 
																+struct _starpu_worker;
															
 
																 /* codelet function */
															
 
																-typedef void (*cl_func)(void **, void *);
															
 
																-typedef void (*callback)(void *);
															
 
																+typedef void (*_starpu_cl_func_t)(void **, void *);
															
 
																-#define STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
															
 
																-#define STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
															
 
																-#define STARPU_SPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SPU)
															
 
																-#define STARPU_GORDON_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_GORDON)
															
 
																-#define STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
															
 
																+#define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
															
 
																+#define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
															
 
																+#define _STARPU_SPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SPU)
															
 
																+#define _STARPU_GORDON_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_GORDON)
															
 
																+#define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
															
 
																 /* A job is the internal representation of a task. */
															
 
																-LIST_TYPE(starpu_job,
															
 
																+LIST_TYPE(_starpu_job,
															
 
																 	/* The implementation associated to the job */
															
 
																 	unsigned nimpl;
															
@@ -71,15 +71,20 @@ LIST_TYPE(starpu_job,
 
																 	/* To avoid deadlocks, we reorder the different buffers accessed to by
															
 
																 	 * the task so that we always grab the rw-lock associated to the
															
 
																 	 * handles in the same order. */
															
 
																-	struct starpu_buffer_descr_t ordered_buffers[STARPU_NMAXBUFS];
															
 
																-	
															
 
																+	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
															
 
																+
															
 
																 	/* If a tag is associated to the job, this points to the internal data
															
 
																 	 * structure that describes the tag status. */
															
 
																-	struct starpu_tag_s *tag;
															
 
																+	struct _starpu_tag *tag;
															
 
																 	/* Maintain a list of all the completion groups that depend on the job.
															
 
																 	 * */
															
 
																-	struct starpu_cg_list_s job_successors;
															
 
																+	struct _starpu_cg_list job_successors;
															
 
																+
															
 
																+	/* For tasks with cl==NULL but submitted with explicit data dependency,
															
 
																+	 * the handle for this dependency, so as to remove the task from the
															
 
																+	 * last_writer/readers */
															
 
																+	starpu_data_handle_t implicit_dep_handle;
															
 
																 	/* The value of the footprint that identifies the job may be stored in
															
 
																 	 * this structure. */
															
@@ -128,43 +133,43 @@ LIST_TYPE(starpu_job,
 
																 	/* Parallel workers may have to synchronize before/after the execution of a parallel task. */
															
 
																 	pthread_barrier_t before_work_barrier;
															
 
																 	pthread_barrier_t after_work_barrier;
															
 
																-);
															
 
																+)
															
 
																-/* Create an internal starpu_job_t structure to encapsulate the task. */
															
 
																-starpu_job_t __attribute__((malloc)) _starpu_job_create(struct starpu_task *task);
															
 
																+/* Create an internal struct _starpu_job *structure to encapsulate the task. */
															
 
																+struct _starpu_job* __attribute__((malloc)) _starpu_job_create(struct starpu_task *task);
															
 
																 /* Destroy the data structure associated to the job structure */
															
 
																-void _starpu_job_destroy(starpu_job_t j);
															
 
																+void _starpu_job_destroy(struct _starpu_job *j);
															
 
																 /* Wait for the termination of the job */
															
 
																-void _starpu_wait_job(starpu_job_t j);
															
 
																+void _starpu_wait_job(struct _starpu_job *j);
															
 
																 /* Specify that the task should not appear in the DAG generated by debug tools. */
															
 
																 void _starpu_exclude_task_from_dag(struct starpu_task *task);
															
 
																 /* try to submit job j, enqueue it if it's not schedulable yet */
															
 
																-unsigned _starpu_enforce_deps_and_schedule(starpu_job_t j, unsigned job_is_already_locked);
															
 
																-unsigned _starpu_enforce_deps_starting_from_task(starpu_job_t j, unsigned job_is_already_locked);
															
 
																+unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j);
															
 
																+unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j);
															
 
																 /* This function must be called after the execution of a job, this triggers all
															
 
																  * job's dependencies and perform the callback function if any. */
															
 
																-void _starpu_handle_job_termination(starpu_job_t j, unsigned job_is_already_locked, int workerid);
															
 
																+void _starpu_handle_job_termination(struct _starpu_job *j, int workerid);
															
 
																 /* Get the sum of the size of the data accessed by the job. */
															
 
																-size_t _starpu_job_get_data_size(starpu_job_t j);
															
 
																+size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, unsigned nimpl, struct _starpu_job *j);
															
 
																 /* Get a task from the local pool of tasks that were explicitly attributed to
															
 
																  * that worker. */
															
 
																-struct starpu_task *_starpu_pop_local_task(struct starpu_worker_s *worker);
															
 
																+struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
															
 
																 /* Put a task into the pool of tasks that are explicitly attributed to the
															
 
																  * specified worker. If "back" is set, the task is put at the back of the list.
															
 
																  * Considering the tasks are popped from the back, this value should be 0 to
															
 
																  * enforce a FIFO ordering. */
															
 
																-int _starpu_push_local_task(struct starpu_worker_s *worker, struct starpu_task *task, int back);
															
 
																+int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
															
 
																 /* Returns the symbol associated to that job if any. */
															
 
																-const char *_starpu_get_model_name(starpu_job_t j);
															
 
																+const char *_starpu_get_model_name(struct _starpu_job *j);
															
 
																 #endif // __JOBS_H__
															
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -30,7 +30,7 @@
 
																 #ifdef STARPU_HAVE_WINDOWS
															
 
																 #include <windows.h>
															
 
																 #endif
															
 
																-		
															
 
																+
															
 
																 /* This flag indicates whether performance models should be calibrated or not.
															
 
																  *	0: models need not be calibrated
															
 
																  *	1: models must be calibrated
															
@@ -50,7 +50,7 @@ unsigned _starpu_get_calibrate_flag(void)
 
																 enum starpu_perf_archtype starpu_worker_get_perf_archtype(int workerid)
															
 
																 {
															
 
																-	struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 	/* This workerid may either be a basic worker or a combined worker */
															
 
																 	unsigned nworkers = config->topology.nworkers;
															
@@ -68,14 +68,18 @@ enum starpu_perf_archtype starpu_worker_get_perf_archtype(int workerid)
 
																  * PER ARCH model
															
 
																  */
															
 
																-static double per_arch_task_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
															
 
																+static double per_arch_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
															
 
																 {
															
 
																-	double exp = -1.0;
															
 
																-	double (*per_arch_cost_model)(struct starpu_buffer_descr_t *);
															
 
																-	
															
 
																+	double exp = NAN;
															
 
																+	double (*per_arch_cost_function)(struct starpu_task *task, enum starpu_perf_archtype arch, unsigned nimpl);
															
 
																+	double (*per_arch_cost_model)(struct starpu_buffer_descr *);
															
 
																+
															
 
																+	per_arch_cost_function = model->per_arch[arch][nimpl].cost_function;
															
 
																 	per_arch_cost_model = model->per_arch[arch][nimpl].cost_model;
															
 
																-	if (per_arch_cost_model)
															
 
																+	if (per_arch_cost_function)
															
 
																+		exp = per_arch_cost_function(task, arch, nimpl);
															
 
																+	else if (per_arch_cost_model)
															
 
																 		exp = per_arch_cost_model(task->buffers);
															
 
																 	return exp;
															
@@ -89,50 +93,67 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 
																 {
															
 
																 	if (perf_archtype < STARPU_CUDA_DEFAULT)
															
 
																 	{
															
 
																-		return STARPU_CPU_ALPHA * (perf_archtype + 1);
															
 
																+		return _STARPU_CPU_ALPHA * (perf_archtype + 1);
															
 
																 	}
															
 
																 	else if (perf_archtype < STARPU_OPENCL_DEFAULT)
															
 
																 	{
															
 
																-		return STARPU_CUDA_ALPHA;
															
 
																+		return _STARPU_CUDA_ALPHA;
															
 
																 	}
															
 
																 	else if (perf_archtype < STARPU_GORDON_DEFAULT)
															
 
																 	{
															
 
																-		return STARPU_OPENCL_ALPHA;
															
 
																+		return _STARPU_OPENCL_ALPHA;
															
 
																 	}
															
 
																-	else if (perf_archtype < STARPU_NARCH_VARIATIONS) {
															
 
																+	else if (perf_archtype < STARPU_NARCH_VARIATIONS)
															
 
																+	{
															
 
																 		/* Gordon value */
															
 
																-		return STARPU_GORDON_ALPHA;
															
 
																+		return _STARPU_GORDON_ALPHA;
															
 
																 	}
															
 
																 	STARPU_ABORT();
															
 
																 	/* Never reached ! */
															
 
																-	return -1.0;
															
 
																+	return NAN;
															
 
																 }
															
 
																-static double common_task_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_task *task)
															
 
																+static double common_task_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct starpu_task *task, unsigned nimpl)
															
 
																 {
															
 
																 	double exp;
															
 
																 	double alpha;
															
 
																-	if (model->cost_model) {
															
 
																+	if (model->cost_function)
															
 
																+	{
															
 
																+		exp = model->cost_function(task, nimpl);
															
 
																+		alpha = starpu_worker_get_relative_speedup(arch);
															
 
																+
															
 
																+		STARPU_ASSERT(!_STARPU_IS_ZERO(alpha));
															
 
																+
															
 
																+		return (exp/alpha);
															
 
																+	}
															
 
																+	else if (model->cost_model)
															
 
																+	{
															
 
																 		exp = model->cost_model(task->buffers);
															
 
																 		alpha = starpu_worker_get_relative_speedup(arch);
															
 
																-		STARPU_ASSERT(alpha != 0.0f);
															
 
																+		STARPU_ASSERT(!_STARPU_IS_ZERO(alpha));
															
 
																 		return (exp/alpha);
															
 
																 	}
															
 
																-	return -1.0;
															
 
																+	return NAN;
															
 
																 }
															
 
																-void _starpu_load_perfmodel(struct starpu_perfmodel_t *model)
															
 
																+void _starpu_load_perfmodel(struct starpu_perfmodel *model)
															
 
																 {
															
 
																 	if (!model || model->is_loaded)
															
 
																 		return;
															
 
																-	switch (model->type) {
															
 
																+	int load_model = _starpu_register_model(model);
															
 
																+
															
 
																+	if (!load_model)
															
 
																+		return;
															
 
																+
															
 
																+	switch (model->type)
															
 
																+	{
															
 
																 		case STARPU_PER_ARCH:
															
 
																 		case STARPU_COMMON:
															
 
																 			break;
															
@@ -150,20 +171,21 @@ void _starpu_load_perfmodel(struct starpu_perfmodel_t *model)
 
																 			STARPU_ABORT();
															
 
																 	}
															
 
																-	_starpu_register_model(model);
															
 
																 	model->is_loaded = 1;
															
 
																 }
															
 
																-static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch,  unsigned nimpl)
															
 
																+static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, enum starpu_perf_archtype arch,  unsigned nimpl)
															
 
																 {
															
 
																-	if (model) {
															
 
																-		starpu_job_t j = _starpu_get_job_associated_to_task(task);
															
 
																-		switch (model->type) {
															
 
																+	if (model)
															
 
																+	{
															
 
																+		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																+		switch (model->type)
															
 
																+		{
															
 
																 			case STARPU_PER_ARCH:
															
 
																 				return per_arch_task_expected_perf(model, arch, task, nimpl);
															
 
																 			case STARPU_COMMON:
															
 
																-				return common_task_expected_perf(model, arch, task);
															
 
																+				return common_task_expected_perf(model, arch, task, nimpl);
															
 
																 			case STARPU_HISTORY_BASED:
															
@@ -178,7 +200,7 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
																 			default:
															
 
																 				STARPU_ABORT();
															
 
																-		};
															
 
																+		}
															
 
																 	}
															
 
																 	/* no model was found */
															
@@ -196,13 +218,89 @@ double starpu_task_expected_power(struct starpu_task *task, enum starpu_perf_arc
 
																 	return starpu_model_expected_perf(task, task->cl->power_model, arch, nimpl);
															
 
																 }
															
 
																+double starpu_task_expected_conversion_time(struct starpu_task *task,
															
 
																+					    enum starpu_perf_archtype arch,
															
 
																+					    unsigned nimpl)
															
 
																+{
															
 
																+	unsigned i;
															
 
																+	int err;
															
 
																+	double sum = 0.0;
															
 
																+	unsigned int node, cpu_node;
															
 
																+
															
 
																+	/* We need to get one node per archtype. This is kinda ugly,
															
 
																+	 * but it does the job.
															
 
																+	 * XXX : Should we return 0 if there are no devices ?
															
 
																+	 * (err != 1 && err != -ERANGE)
															
 
																+	 */
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	int cpu_worker;
															
 
																+	err = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER,
															
 
																+					    &cpu_worker, 1);
															
 
																+	if (err != 1 && err != -ERANGE)
															
 
																+		return 0.0;
															
 
																+	cpu_node = starpu_worker_get_memory_node(cpu_worker);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	int cuda_worker, cuda_node;
															
 
																+	err = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER,
															
 
																+					    &cuda_worker, 1);
															
 
																+	if (err != 1 && err != -ERANGE)
															
 
																+		return 0.0;
															
 
																+	cuda_node = starpu_worker_get_memory_node(cuda_worker);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	int opencl_worker, opencl_node;
															
 
																+	err = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER,
															
 
																+					    &opencl_worker, 1);
															
 
																+	if (err != 1 && err != -ERANGE)
															
 
																+		return 0.0;
															
 
																+
															
 
																+	opencl_node = starpu_worker_get_memory_node(opencl_worker);
															
 
																+#endif
															
 
																+
															
 
																+	for (i = 0; i < task->cl->nbuffers; i++)
															
 
																+	{
															
 
																+		starpu_data_handle_t handle;
															
 
																+		struct starpu_task *conversion_task;
															
 
																+
															
 
																+		handle = task->handles[i];
															
 
																+		if (!_starpu_data_is_multiformat_handle(handle))
															
 
																+			continue;
															
 
																+
															
 
																+		if (arch < STARPU_CUDA_DEFAULT)
															
 
																+			node = cpu_node;
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+		else if (arch >= STARPU_CUDA_DEFAULT && arch < STARPU_OPENCL_DEFAULT)
															
 
																+			node = cuda_node;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+		else if (arch >= STARPU_OPENCL_DEFAULT && arch < STARPU_GORDON_DEFAULT)
															
 
																+			node = opencl_node;
															
 
																+#endif
															
 
																+		else
															
 
																+			STARPU_ASSERT(0);
															
 
																+
															
 
																+		if (!_starpu_handle_needs_conversion_task(handle, node))
															
 
																+			continue;
															
 
																+
															
 
																+		conversion_task = _starpu_create_conversion_task(handle, node);
															
 
																+		sum += starpu_task_expected_length(conversion_task, arch, nimpl);
															
 
																+		handle->refcnt--;
															
 
																+		handle->busy_count--;
															
 
																+		starpu_task_deinit(conversion_task);
															
 
																+		free(conversion_task);
															
 
																+	}
															
 
																+
															
 
																+	return sum;
															
 
																+}
															
 
																+
															
 
																 /* Predict the transfer time (in µs) to move a handle to a memory node */
															
 
																-double starpu_data_expected_transfer_time(starpu_data_handle handle, unsigned memory_node, starpu_access_mode mode)
															
 
																+double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_access_mode mode)
															
 
																 {
															
 
																 	/* If we don't need to read the content of the handle */
															
 
																 	if (!(mode & STARPU_R))
															
 
																 		return 0.0;
															
 
																-	
															
 
																+
															
 
																 	if (_starpu_is_data_present_or_requested(handle, memory_node))
															
 
																 		return 0.0;
															
@@ -215,7 +313,7 @@ double starpu_data_expected_transfer_time(starpu_data_handle handle, unsigned me
 
																 	if (size == 0)
															
 
																 		return 0.0;
															
 
																-	uint32_t src_node = _starpu_select_src_node(handle);
															
 
																+	uint32_t src_node = _starpu_select_src_node(handle, memory_node);
															
 
																 	return _starpu_predict_transfer_time(src_node, memory_node, size);
															
 
																 }
															
@@ -229,8 +327,8 @@ double starpu_task_expected_data_transfer_time(uint32_t memory_node, struct star
 
																 	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 	{
															
 
																-		starpu_data_handle handle = task->buffers[buffer].handle;
															
 
																-		starpu_access_mode mode = task->buffers[buffer].mode;
															
 
																+		starpu_data_handle_t handle = task->handles[buffer];
															
 
																+		enum starpu_access_mode mode = task->cl->modes[buffer];
															
 
																 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
															
 
																 	}
															
@@ -238,6 +336,119 @@ double starpu_task_expected_data_transfer_time(uint32_t memory_node, struct star
 
																 	return penalty;
															
 
																 }
															
 
																+/* Return the expected duration of the entire task bundle in µs */
															
 
																+double _starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, enum starpu_perf_archtype arch, unsigned nimpl)
															
 
																+{
															
 
																+	double expected_length = 0.0;
															
 
																+
															
 
																+	/* We expect the length of the bundle the be the sum of the different tasks length. */
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																+
															
 
																+	struct _starpu_task_bundle_entry *entry;
															
 
																+	entry = bundle->list;
															
 
																+
															
 
																+	while (entry)
															
 
																+	{
															
 
																+		double task_length = starpu_task_expected_length(entry->task, arch, nimpl);
															
 
																+
															
 
																+		/* In case the task is not calibrated, we consider the task
															
 
																+		 * ends immediately. */
															
 
																+		if (task_length > 0.0)
															
 
																+			expected_length += task_length;
															
 
																+
															
 
																+		entry = entry->next;
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+
															
 
																+	return expected_length;
															
 
																+}
															
 
																+
															
 
																+/* Return the expected power consumption of the entire task bundle in J */
															
 
																+double _starpu_task_bundle_expected_power(starpu_task_bundle_t bundle, enum starpu_perf_archtype arch, unsigned nimpl)
															
 
																+{
															
 
																+	double expected_power = 0.0;
															
 
																+
															
 
																+	/* We expect total consumption of the bundle the be the sum of the different tasks consumption. */
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																+
															
 
																+	struct _starpu_task_bundle_entry *entry;
															
 
																+	entry = bundle->list;
															
 
																+
															
 
																+	while (entry)
															
 
																+	{
															
 
																+		double task_power = starpu_task_expected_power(entry->task, arch, nimpl);
															
 
																+
															
 
																+		/* In case the task is not calibrated, we consider the task
															
 
																+		 * ends immediately. */
															
 
																+		if (task_power > 0.0)
															
 
																+			expected_power += task_power;
															
 
																+
															
 
																+		entry = entry->next;
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+
															
 
																+	return expected_power;
															
 
																+}
															
 
																+
															
 
																+/* Return the time (in µs) expected to transfer all data used within the bundle */
															
 
																+double _starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node)
															
 
																+{
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																+
															
 
																+	struct _starpu_handle_list *handles = NULL;
															
 
																+
															
 
																+	/* We list all the handle that are accessed within the bundle. */
															
 
																+
															
 
																+	/* For each task in the bundle */
															
 
																+	struct _starpu_task_bundle_entry *entry = bundle->list;
															
 
																+	while (entry)
															
 
																+	{
															
 
																+		struct starpu_task *task = entry->task;
															
 
																+
															
 
																+		if (task->cl)
															
 
																+		{
															
 
																+			unsigned b;
															
 
																+			for (b = 0; b < task->cl->nbuffers; b++)
															
 
																+			{
															
 
																+				starpu_data_handle_t handle = task->handles[b];
															
 
																+				enum starpu_access_mode mode = task->cl->modes[b];
															
 
																+
															
 
																+				if (!(mode & STARPU_R))
															
 
																+					continue;
															
 
																+
															
 
																+				/* Insert the handle in the sorted list in case
															
 
																+				 * it's not already in that list. */
															
 
																+				_insertion_handle_sorted(&handles, handle, mode);
															
 
																+			}
															
 
																+		}
															
 
																+
															
 
																+		entry = entry->next;
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+
															
 
																+	/* Compute the sum of data transfer time, and destroy the list */
															
 
																+
															
 
																+	double total_exp = 0.0;
															
 
																+
															
 
																+	while (handles)
															
 
																+	{
															
 
																+		struct _starpu_handle_list *current = handles;
															
 
																+		handles = handles->next;
															
 
																+
															
 
																+		double exp;
															
 
																+		exp = starpu_data_expected_transfer_time(current->handle, memory_node, current->mode);
															
 
																+
															
 
																+		total_exp += exp;
															
 
																+
															
 
																+		free(current);
															
 
																+	}
															
 
																+
															
 
																+	return total_exp;
															
 
																+}
															
 
																+
															
 
																 static int directory_existence_was_tested = 0;
															
 
																 void _starpu_get_perf_model_dir(char *path, size_t maxlen)
															
@@ -246,13 +457,15 @@ void _starpu_get_perf_model_dir(char *path, size_t maxlen)
 
																 	/* use the directory specified at configure time */
															
 
																 	snprintf(path, maxlen, "%s", STARPU_PERF_MODEL_DIR);
															
 
																 #else
															
 
																-	/* by default, we use $HOME/.starpu/sampling */
															
 
																-	const char *home_path = getenv("HOME");
															
 
																+	const char *home_path = getenv("XDG_CACHE_HOME");
															
 
																+	if (!home_path)
															
 
																+		home_path = getenv("STARPU_HOME");
															
 
																+	if (!home_path)
															
 
																+		home_path = getenv("HOME");
															
 
																 	if (!home_path)
															
 
																 		home_path = getenv("USERPROFILE");
															
 
																-	if (!home_path) {
															
 
																+	if (!home_path)
															
 
																 		_STARPU_ERROR("couldn't find a home place to put starpu data\n");
															
 
																-	}
															
 
																 	snprintf(path, maxlen, "%s/.starpu/sampling/", home_path);
															
 
																 #endif
															
 
																 }
															
@@ -285,8 +498,8 @@ void _starpu_create_sampling_directory_if_needed(void)
 
																 		/* The performance of the codelets are stored in
															
 
																 		 * $STARPU_PERF_MODEL_DIR/codelets/ while those of the bus are stored in
															
 
																 		 * $STARPU_PERF_MODEL_DIR/bus/ so that we don't have name collisions */
															
 
																-		
															
 
																-		/* Testing if a directory exists and creating it otherwise 
															
 
																+
															
 
																+		/* Testing if a directory exists and creating it otherwise
															
 
																 		   may not be safe: it is possible that the permission are
															
 
																 		   changed in between. Instead, we create it and check if
															
 
																 		   it already existed before */
															
@@ -295,14 +508,21 @@ void _starpu_create_sampling_directory_if_needed(void)
 
																 		if (ret == -1)
															
 
																 		{
															
 
																-			STARPU_ASSERT(errno == EEXIST);
															
 
																-	
															
 
																+			if (errno != EEXIST) {
															
 
																+				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
															
 
																+				perror("mkdir");
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																+
															
 
																 			/* make sure that it is actually a directory */
															
 
																 			struct stat sb;
															
 
																 			stat(perf_model_dir, &sb);
															
 
																-			STARPU_ASSERT(S_ISDIR(sb.st_mode));
															
 
																+			if (!S_ISDIR(sb.st_mode)) {
															
 
																+				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																 		}
															
 
																-	
															
 
																+
															
 
																 		/* Per-task performance models */
															
 
																 		char perf_model_dir_codelets[256];
															
 
																 		_starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
															
@@ -310,14 +530,22 @@ void _starpu_create_sampling_directory_if_needed(void)
 
																 		ret = _starpu_mkpath(perf_model_dir_codelets, S_IRWXU);
															
 
																 		if (ret == -1)
															
 
																 		{
															
 
																-			STARPU_ASSERT(errno == EEXIST);
															
 
																-	
															
 
																+			if (errno != EEXIST) {
															
 
																+				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
															
 
																+				perror("mkdir");
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																+
															
 
																+
															
 
																 			/* make sure that it is actually a directory */
															
 
																 			struct stat sb;
															
 
																 			stat(perf_model_dir_codelets, &sb);
															
 
																-			STARPU_ASSERT(S_ISDIR(sb.st_mode));
															
 
																+			if (!S_ISDIR(sb.st_mode)) {
															
 
																+				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																 		}
															
 
																-	
															
 
																+
															
 
																 		/* Performance of the memory subsystem */
															
 
																 		char perf_model_dir_bus[256];
															
 
																 		_starpu_get_perf_model_dir_bus(perf_model_dir_bus, 256);
															
@@ -325,14 +553,21 @@ void _starpu_create_sampling_directory_if_needed(void)
 
																 		ret = _starpu_mkpath(perf_model_dir_bus, S_IRWXU);
															
 
																 		if (ret == -1)
															
 
																 		{
															
 
																-			STARPU_ASSERT(errno == EEXIST);
															
 
																-	
															
 
																+			if (errno != EEXIST) {
															
 
																+				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
															
 
																+				perror("mkdir");
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																+
															
 
																 			/* make sure that it is actually a directory */
															
 
																 			struct stat sb;
															
 
																 			stat(perf_model_dir_bus, &sb);
															
 
																-			STARPU_ASSERT(S_ISDIR(sb.st_mode));
															
 
																+			if (!S_ISDIR(sb.st_mode)) {
															
 
																+				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																 		}
															
 
																-	
															
 
																+
															
 
																 		/* Performance debug measurements */
															
 
																 		char perf_model_dir_debug[256];
															
 
																 		_starpu_get_perf_model_dir_debug(perf_model_dir_debug, 256);
															
@@ -340,14 +575,22 @@ void _starpu_create_sampling_directory_if_needed(void)
 
																 		ret = _starpu_mkpath(perf_model_dir_debug, S_IRWXU);
															
 
																 		if (ret == -1)
															
 
																 		{
															
 
																-			STARPU_ASSERT(errno == EEXIST);
															
 
																-	
															
 
																+			if (errno != EEXIST) {
															
 
																+				fprintf(stderr,"Error making starpu directory %s:\n", perf_model_dir);
															
 
																+				perror("mkdir");
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																+
															
 
																+
															
 
																 			/* make sure that it is actually a directory */
															
 
																 			struct stat sb;
															
 
																 			stat(perf_model_dir_debug, &sb);
															
 
																-			STARPU_ASSERT(S_ISDIR(sb.st_mode));
															
 
																+			if (!S_ISDIR(sb.st_mode)) {
															
 
																+				fprintf(stderr,"Error: %s is not a directory:\n", perf_model_dir);
															
 
																+				STARPU_ASSERT(0);
															
 
																+			}
															
 
																 		}
															
 
																-	
															
 
																+
															
 
																 		directory_existence_was_tested = 1;
															
 
																 	}
															
 
																 }
															
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -24,67 +24,22 @@
 
																 #include <starpu_perfmodel.h>
															
 
																 //#include <core/jobs.h>
															
 
																 #include <common/htable32.h>
															
 
																+#include <core/task_bundle.h>
															
 
																 //#include <core/workers.h>
															
 
																 #include <pthread.h>
															
 
																 #include <stdio.h>
															
 
																-struct starpu_buffer_descr_t;
															
 
																-struct starpu_jobq_s;
															
 
																-struct starpu_job_s;
															
 
																+struct starpu_buffer_descr;
															
 
																+struct _starpu_job;
															
 
																 enum starpu_perf_archtype;
															
 
																-struct starpu_history_entry_t {
															
 
																-	//double measured;
															
 
																-	
															
 
																-	/* mean_n = 1/n sum */
															
 
																-	double mean;
															
 
																-
															
 
																-	/* n dev_n = sum2 - 1/n (sum)^2 */
															
 
																-	double deviation;
															
 
																-
															
 
																-	/* sum of samples */
															
 
																-	double sum;
															
 
																-
															
 
																-	/* sum of samples^2 */
															
 
																-	double sum2;
															
 
																-
															
 
																-//	/* sum of ln(measured) */
															
 
																-//	double sumlny;
															
 
																-//
															
 
																-//	/* sum of ln(size) */
															
 
																-//	double sumlnx;
															
 
																-//	double sumlnx2;
															
 
																-//
															
 
																-//	/* sum of ln(size) ln(measured) */
															
 
																-//	double sumlnxlny;
															
 
																-//
															
 
																-	unsigned nsample;
															
 
																-
															
 
																-	uint32_t footprint;
															
 
																-#ifdef STARPU_HAVE_WINDOWS
															
 
																-	unsigned size; /* in bytes */
															
 
																-#else
															
 
																-	size_t size; /* in bytes */
															
 
																-#endif
															
 
																-};
															
 
																-
															
 
																-struct starpu_history_list_t {
															
 
																-	struct starpu_history_list_t *next;
															
 
																-	struct starpu_history_entry_t *entry;
															
 
																-};
															
 
																-
															
 
																-struct starpu_model_list_t {
															
 
																-	struct starpu_model_list_t *next;
															
 
																-	struct starpu_perfmodel_t *model;
															
 
																-};
															
 
																-
															
 
																-//
															
 
																 ///* File format */
															
 
																-//struct model_file_format {
															
 
																+//struct model_file_format
															
 
																+// {
															
 
																 //	unsigned ncore_entries;
															
 
																 //	unsigned ncuda_entries;
															
 
																 //	/* contains core entries, then cuda ones */
															
 
																-//	struct starpu_history_entry_t entries[];
															
 
																+//	struct starpu_history_entry entries[];
															
 
																 //}
															
 
																 void _starpu_get_perf_model_dir(char *path, size_t maxlen);
															
@@ -92,18 +47,18 @@ void _starpu_get_perf_model_dir_codelets(char *path, size_t maxlen);
 
																 void _starpu_get_perf_model_dir_bus(char *path, size_t maxlen);
															
 
																 void _starpu_get_perf_model_dir_debug(char *path, size_t maxlen);
															
 
																-double _starpu_history_based_job_expected_perf(struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch, struct starpu_job_s *j, unsigned nimpl);
															
 
																-void _starpu_register_model(struct starpu_perfmodel_t *model);
															
 
																-void _starpu_load_history_based_model(struct starpu_perfmodel_t *model, unsigned scan_history);
															
 
																-void _starpu_load_perfmodel(struct starpu_perfmodel_t *model);
															
 
																+double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl);
															
 
																+int _starpu_register_model(struct starpu_perfmodel *model);
															
 
																+void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history);
															
 
																+void _starpu_load_perfmodel(struct starpu_perfmodel *model);
															
 
																 void _starpu_initialize_registered_performance_models(void);
															
 
																 void _starpu_deinitialize_registered_performance_models(void);
															
 
																-double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel_t *model,
															
 
																-					enum starpu_perf_archtype arch, struct starpu_job_s *j, unsigned nimpl);
															
 
																-double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel_t *model,
															
 
																-					enum starpu_perf_archtype arch, struct starpu_job_s *j, unsigned nimpl);
															
 
																-void _starpu_update_perfmodel_history(struct starpu_job_s *j, struct starpu_perfmodel_t *model, enum starpu_perf_archtype arch,
															
 
																+double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model,
															
 
																+					enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl);
															
 
																+double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model,
															
 
																+					enum starpu_perf_archtype arch, struct _starpu_job *j, unsigned nimpl);
															
 
																+void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, enum starpu_perf_archtype arch,
															
 
																 				unsigned cpuid, double measured, unsigned nimpl);
															
 
																 void _starpu_create_sampling_directory_if_needed(void);
															
@@ -111,6 +66,13 @@ void _starpu_create_sampling_directory_if_needed(void);
 
																 void _starpu_load_bus_performance_files(void);
															
 
																 double _starpu_predict_transfer_time(unsigned src_node, unsigned dst_node, size_t size);
															
 
																+/* Return the expected duration of the entire task bundle in µs. */
															
 
																+double _starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, enum starpu_perf_archtype arch, unsigned nimpl);
															
 
																+/* Return the time (in µs) expected to transfer all data used within the bundle */
															
 
																+double _starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node);
															
 
																+/* Return the expected power consumption of the entire task bundle in J. */
															
 
																+double _starpu_task_bundle_expected_power(starpu_task_bundle_t bundle, enum starpu_perf_archtype arch, unsigned nimpl);
															
 
																+
															
 
																 void _starpu_set_calibrate_flag(unsigned val);
															
 
																 unsigned _starpu_get_calibrate_flag(void);
															
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009, 2010-2011  UniversitÃ© de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -46,14 +46,16 @@
 
																 #define MAXCPUS	32
															
 
																-struct dev_timing {
															
 
																+/* timing is in Âµs per byte (i.e. slowness, inverse of bandwidth) */
															
 
																+struct dev_timing
															
 
																+{
															
 
																 	int cpu_id;
															
 
																 	double timing_htod;
															
 
																 	double timing_dtoh;
															
 
																 };
															
 
																-static double bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{-1.0}};
															
 
																-static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{ -1.0}};
															
 
																+static double bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{NAN}};
															
 
																+static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{NAN}};
															
 
																 static unsigned was_benchmarked = 0;
															
 
																 static unsigned ncpus = 0;
															
 
																 static int ncuda = 0;
															
@@ -65,15 +67,16 @@ static int nopencl = 0;
 
																 static int cuda_affinity_matrix[STARPU_MAXCUDADEVS][MAXCPUS];
															
 
																 static double cudadev_timing_htod[STARPU_MAXNODES] = {0.0};
															
 
																 static double cudadev_timing_dtoh[STARPU_MAXNODES] = {0.0};
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+static double cudadev_timing_dtod[STARPU_MAXNODES][STARPU_MAXNODES] = {{0.0}};
															
 
																+#endif
															
 
																 static struct dev_timing cudadev_timing_per_cpu[STARPU_MAXNODES*MAXCPUS];
															
 
																-static size_t cuda_size = SIZE;
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 static int opencl_affinity_matrix[STARPU_MAXOPENCLDEVS][MAXCPUS];
															
 
																 static double opencldev_timing_htod[STARPU_MAXNODES] = {0.0};
															
 
																 static double opencldev_timing_dtoh[STARPU_MAXNODES] = {0.0};
															
 
																 static struct dev_timing opencldev_timing_per_cpu[STARPU_MAXNODES*MAXCPUS];
															
 
																-static size_t opencl_size = SIZE;
															
 
																 #endif
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
@@ -85,8 +88,9 @@ static hwloc_topology_t hwtopology;
 
																 #ifdef STARPU_USE_CUDA
															
 
																 static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int cpu, struct dev_timing *dev_timing_per_cpu)
															
 
																 {
															
 
																-	struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																+	size_t size = SIZE;
															
 
																 	/* Initiliaze CUDA context on the device */
															
 
																 	cudaSetDevice(dev);
															
@@ -105,34 +109,31 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
																 	cudaError_t cures;
															
 
																 	cures = cudaGetDeviceProperties(&prop, dev);
															
 
																 	if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures);
															
 
																-        if (cuda_size > prop.totalGlobalMem/4) cuda_size = prop.totalGlobalMem/4;
															
 
																+        if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4;
															
 
																 	/* Allocate a buffer on the device */
															
 
																 	unsigned char *d_buffer;
															
 
																-	cudaMalloc((void **)&d_buffer, cuda_size);
															
 
																-	assert(d_buffer);
															
 
																+	cudaMalloc((void **)&d_buffer, size);
															
 
																+	STARPU_ASSERT(d_buffer);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																-
															
 
																 	/* Allocate a buffer on the host */
															
 
																 	unsigned char *h_buffer;
															
 
																-	cudaHostAlloc((void **)&h_buffer, cuda_size, 0);
															
 
																-	assert(h_buffer);
															
 
																+	cures = cudaHostAlloc((void **)&h_buffer, size, 0);
															
 
																+	STARPU_ASSERT(cures == cudaSuccess);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																-
															
 
																 	/* Fill them */
															
 
																-	memset(h_buffer, 0, cuda_size);
															
 
																-	cudaMemset(d_buffer, 0, cuda_size);
															
 
																+	memset(h_buffer, 0, size);
															
 
																+	cudaMemset(d_buffer, 0, size);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																-
															
 
																 	unsigned iter;
															
 
																 	double timing;
															
 
																 	struct timeval start;
															
@@ -142,25 +143,25 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
																 	gettimeofday(&start, NULL);
															
 
																 	for (iter = 0; iter < NITER; iter++)
															
 
																 	{
															
 
																-		cudaMemcpy(d_buffer, h_buffer, cuda_size, cudaMemcpyHostToDevice);
															
 
																+		cudaMemcpy(d_buffer, h_buffer, size, cudaMemcpyHostToDevice);
															
 
																 		cudaThreadSynchronize();
															
 
																 	}
															
 
																 	gettimeofday(&end, NULL);
															
 
																 	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
															
 
																-	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_htod = timing/NITER;
															
 
																+	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_htod = timing/NITER/size;
															
 
																 	/* Measure download bandwidth */
															
 
																 	gettimeofday(&start, NULL);
															
 
																 	for (iter = 0; iter < NITER; iter++)
															
 
																 	{
															
 
																-		cudaMemcpy(h_buffer, d_buffer, cuda_size, cudaMemcpyDeviceToHost);
															
 
																+		cudaMemcpy(h_buffer, d_buffer, size, cudaMemcpyDeviceToHost);
															
 
																 		cudaThreadSynchronize();
															
 
																 	}
															
 
																 	gettimeofday(&end, NULL);
															
 
																 	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
															
 
																-	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_dtoh = timing/NITER;
															
 
																+	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_dtoh = timing/NITER/size;
															
 
																 	/* Free buffers */
															
 
																 	cudaFreeHost(h_buffer);
															
@@ -168,6 +169,65 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_cuda(int dev, int
 
																 	cudaThreadExit();
															
 
																 }
															
 
																+
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
															
 
																+{
															
 
																+	size_t size = SIZE;
															
 
																+
															
 
																+        /* Get the maximum size which can be allocated on the device */
															
 
																+	struct cudaDeviceProp prop;
															
 
																+	cudaError_t cures;
															
 
																+	cures = cudaGetDeviceProperties(&prop, src);
															
 
																+	if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures);
															
 
																+        if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4;
															
 
																+	cures = cudaGetDeviceProperties(&prop, dst);
															
 
																+	if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures);
															
 
																+        if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4;
															
 
																+
															
 
																+	/* Initiliaze CUDA context on the source */
															
 
																+	cudaSetDevice(src);
															
 
																+
															
 
																+	/* Allocate a buffer on the device */
															
 
																+	unsigned char *s_buffer;
															
 
																+	cudaMalloc((void **)&s_buffer, size);
															
 
																+	STARPU_ASSERT(s_buffer);
															
 
																+	cudaMemset(s_buffer, 0, size);
															
 
																+
															
 
																+	/* Initiliaze CUDA context on the destination */
															
 
																+	cudaSetDevice(dst);
															
 
																+
															
 
																+	/* Allocate a buffer on the device */
															
 
																+	unsigned char *d_buffer;
															
 
																+	cudaMalloc((void **)&d_buffer, size);
															
 
																+	STARPU_ASSERT(d_buffer);
															
 
																+	cudaMemset(d_buffer, 0, size);
															
 
																+
															
 
																+	unsigned iter;
															
 
																+	double timing;
															
 
																+	struct timeval start;
															
 
																+	struct timeval end;
															
 
																+
															
 
																+	/* Measure upload bandwidth */
															
 
																+	gettimeofday(&start, NULL);
															
 
																+	for (iter = 0; iter < NITER; iter++)
															
 
																+	{
															
 
																+		cudaMemcpyPeer(d_buffer, dst, s_buffer, src, size);
															
 
																+		cudaThreadSynchronize();
															
 
																+	}
															
 
																+	gettimeofday(&end, NULL);
															
 
																+	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
															
 
																+
															
 
																+	cudadev_timing_dtod[src+1][dst+1] = timing/NITER/size;
															
 
																+
															
 
																+	/* Free buffers */
															
 
																+	cudaFree(d_buffer);
															
 
																+	cudaSetDevice(src);
															
 
																+	cudaFree(s_buffer);
															
 
																+
															
 
																+	cudaThreadExit();
															
 
																+}
															
 
																+#endif
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
@@ -176,8 +236,9 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																         cl_context context;
															
 
																         cl_command_queue queue;
															
 
																         cl_int err=0;
															
 
																+	size_t size = SIZE;
															
 
																-        struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+        struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																 	/* Initialize OpenCL context on the device */
															
@@ -191,28 +252,28 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																         starpu_opencl_get_device(dev, &device);
															
 
																 	err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxMemAllocSize), &maxMemAllocSize, NULL);
															
 
																         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																-        if (opencl_size > (size_t)maxMemAllocSize/4) opencl_size = maxMemAllocSize/4;
															
 
																+        if (size > (size_t)maxMemAllocSize/4) size = maxMemAllocSize/4;
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																 	/* Allocate a buffer on the device */
															
 
																 	cl_mem d_buffer;
															
 
																-	d_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, opencl_size, NULL, &err);
															
 
																+	d_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &err);
															
 
																 	if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																         /* Allocate a buffer on the host */
															
 
																 	unsigned char *h_buffer;
															
 
																-        h_buffer = malloc(opencl_size);
															
 
																-	assert(h_buffer);
															
 
																+        h_buffer = (unsigned char *)malloc(size);
															
 
																+	STARPU_ASSERT(h_buffer);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
 
																         /* Fill them */
															
 
																-	memset(h_buffer, 0, opencl_size);
															
 
																-        err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, opencl_size, h_buffer, 0, NULL, NULL);
															
 
																+	memset(h_buffer, 0, size);
															
 
																+        err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL);
															
 
																         if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 	/* hack to avoid third party libs to rebind threads */
															
 
																 	_starpu_bind_thread_on_cpu(config, cpu);
															
@@ -226,25 +287,25 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																 	gettimeofday(&start, NULL);
															
 
																 	for (iter = 0; iter < NITER; iter++)
															
 
																 	{
															
 
																-                err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, opencl_size, h_buffer, 0, NULL, NULL);
															
 
																+                err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL);
															
 
																                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 	}
															
 
																 	gettimeofday(&end, NULL);
															
 
																 	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
															
 
																-	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_htod = timing/NITER;
															
 
																+	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_htod = timing/NITER/size;
															
 
																 	/* Measure download bandwidth */
															
 
																 	gettimeofday(&start, NULL);
															
 
																 	for (iter = 0; iter < NITER; iter++)
															
 
																 	{
															
 
																-                err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, opencl_size, h_buffer, 0, NULL, NULL);
															
 
																+                err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL);
															
 
																                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
															
 
																 	}
															
 
																 	gettimeofday(&end, NULL);
															
 
																 	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
															
 
																-	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_dtoh = timing/NITER;
															
 
																+	dev_timing_per_cpu[(dev+1)*MAXCPUS+cpu].timing_dtoh = timing/NITER/size;
															
 
																 	/* Free buffers */
															
 
																 	clReleaseMemObject(d_buffer);
															
@@ -258,8 +319,8 @@ static void measure_bandwidth_between_host_and_dev_on_cpu_with_opencl(int dev, i
 
																 /* NB: we want to sort the bandwidth by DECREASING order */
															
 
																 static int compar_dev_timing(const void *left_dev_timing, const void *right_dev_timing)
															
 
																 {
															
 
																-	const struct dev_timing *left = left_dev_timing;
															
 
																-	const struct dev_timing *right = right_dev_timing;
															
 
																+	const struct dev_timing *left = (const struct dev_timing *)left_dev_timing;
															
 
																+	const struct dev_timing *right = (const struct dev_timing *)right_dev_timing;
															
 
																 	double left_dtoh = left->timing_dtoh;
															
 
																 	double left_htod = left->timing_htod;
															
@@ -291,7 +352,7 @@ static int find_numa_node(hwloc_obj_t obj)
 
																 	STARPU_ASSERT(current->depth == HWLOC_OBJ_NODE);
															
 
																-	return current->logical_index; 
															
 
																+	return current->logical_index;
															
 
																 }
															
 
																 #endif
															
@@ -308,12 +369,24 @@ static void measure_bandwidth_between_cpus_and_dev(int dev, struct dev_timing *d
 
																 	/* If no NUMA node was found, we assume that we have a single memory
															
 
																 	 * bank. */
															
 
																 	const unsigned no_node_obj_was_found = (nnuma_nodes == 0);
															
 
																-	
															
 
																-	unsigned is_available_per_numa_node[nnuma_nodes];
															
 
																-	double dev_timing_htod_per_numa_node[nnuma_nodes];
															
 
																-	double dev_timing_dtoh_per_numa_node[nnuma_nodes];
															
 
																-	memset(is_available_per_numa_node, 0, nnuma_nodes*sizeof(unsigned));
															
 
																+	unsigned *is_available_per_numa_node = NULL;
															
 
																+	double *dev_timing_htod_per_numa_node = NULL;
															
 
																+	double *dev_timing_dtoh_per_numa_node = NULL;
															
 
																+
															
 
																+	if (!no_node_obj_was_found)
															
 
																+	{
															
 
																+		is_available_per_numa_node = (unsigned *)malloc(nnuma_nodes * sizeof(unsigned));
															
 
																+		STARPU_ASSERT(is_available_per_numa_node);
															
 
																+
															
 
																+		dev_timing_htod_per_numa_node = (double *)malloc(nnuma_nodes * sizeof(double));
															
 
																+		STARPU_ASSERT(dev_timing_htod_per_numa_node);
															
 
																+
															
 
																+		dev_timing_dtoh_per_numa_node = (double *)malloc(nnuma_nodes * sizeof(double));
															
 
																+		STARPU_ASSERT(dev_timing_dtoh_per_numa_node);
															
 
																+
															
 
																+		memset(is_available_per_numa_node, 0, nnuma_nodes*sizeof(unsigned));
															
 
																+	}
															
 
																 #endif
															
 
																 	unsigned cpu;
															
@@ -327,9 +400,9 @@ static void measure_bandwidth_between_cpus_and_dev(int dev, struct dev_timing *d
 
																 		if (!no_node_obj_was_found)
															
 
																 		{
															
 
																 			hwloc_obj_t obj = hwloc_get_obj_by_depth(hwtopology, cpu_depth, cpu);
															
 
																-	
															
 
																+
															
 
																 			numa_id = find_numa_node(obj);
															
 
																-	
															
 
																+
															
 
																 			if (is_available_per_numa_node[numa_id])
															
 
																 			{
															
 
																 				/* We reuse the previous numbers for that NUMA node */
															
@@ -364,6 +437,15 @@ static void measure_bandwidth_between_cpus_and_dev(int dev, struct dev_timing *d
 
																 		}
															
 
																 #endif
															
 
																         }
															
 
																+
															
 
																+#ifdef STARPU_HAVE_HWLOC
															
 
																+	if (!no_node_obj_was_found)
															
 
																+	{
															
 
																+		free(is_available_per_numa_node);
															
 
																+		free(dev_timing_htod_per_numa_node);
															
 
																+		free(dev_timing_dtoh_per_numa_node);
															
 
																+	}
															
 
																+#endif /* STARPU_HAVE_HWLOC */
															
 
																 }
															
 
																 static void measure_bandwidth_between_host_and_dev(int dev, double *dev_timing_htod, double *dev_timing_dtoh,
															
@@ -386,7 +468,7 @@ static void measure_bandwidth_between_host_and_dev(int dev, double *dev_timing_h
 
																 		double bandwidth_sum2 = bandwidth_dtoh*bandwidth_dtoh + bandwidth_htod*bandwidth_htod;
															
 
																-		_STARPU_DISP("BANDWIDTH GPU %d CPU %u - htod %lf - dtoh %lf - %lf\n", dev, current_cpu, bandwidth_htod, bandwidth_dtoh, sqrt(bandwidth_sum2));
															
 
																+		_STARPU_DISP("BANDWIDTH GPU %d CPU %u - htod %f - dtoh %f - %f\n", dev, current_cpu, bandwidth_htod, bandwidth_dtoh, sqrt(bandwidth_sum2));
															
 
																 	}
															
 
																 	unsigned best_cpu = dev_timing_per_cpu[(dev+1)*MAXCPUS+0].cpu_id;
															
@@ -405,6 +487,9 @@ static void benchmark_all_gpu_devices(void)
 
																 {
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																 	int i;
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+	int j;
															
 
																+#endif
															
 
																 	_STARPU_DEBUG("Benchmarking the speed of the bus\n");
															
@@ -428,21 +513,33 @@ static void benchmark_all_gpu_devices(void)
 
																 #warning Missing binding support, StarPU will not be able to properly benchmark NUMA topology
															
 
																 #endif
															
 
																-	struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 	ncpus = _starpu_topology_get_nhwcpu(config);
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-        cudaGetDeviceCount(&ncuda);
															
 
																+	ncuda = _starpu_get_cuda_device_count();
															
 
																 	for (i = 0; i < ncuda; i++)
															
 
																 	{
															
 
																+		fprintf(stderr," CUDA %d...", i);
															
 
																 		/* measure bandwidth between Host and Device i */
															
 
																 		measure_bandwidth_between_host_and_dev(i, cudadev_timing_htod, cudadev_timing_dtoh, cudadev_timing_per_cpu, 'C');
															
 
																 	}
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+	for (i = 0; i < ncuda; i++)
															
 
																+		for (j = 0; j < ncuda; j++)
															
 
																+			if (i != j)
															
 
																+			{
															
 
																+				fprintf(stderr," CUDA %d -> %d...", i, j);
															
 
																+				/* measure bandwidth between Host and Device i */
															
 
																+				measure_bandwidth_between_dev_and_dev_cuda(i, j);
															
 
																+			}
															
 
																+#endif
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																         nopencl = _starpu_opencl_get_device_count();
															
 
																 	for (i = 0; i < nopencl; i++)
															
 
																 	{
															
 
																+		fprintf(stderr," OpenCL %d...", i);
															
 
																 		/* measure bandwith between Host and Device i */
															
 
																 		measure_bandwidth_between_host_and_dev(i, opencldev_timing_htod, opencldev_timing_dtoh, opencldev_timing_per_cpu, 'O');
															
 
																 	}
															
@@ -477,7 +574,7 @@ static void get_bus_path(const char *type, char *path, size_t maxlen)
 
																 	char hostname[32];
															
 
																 	char *forced_hostname = getenv("STARPU_HOSTNAME");
															
 
																 	if (forced_hostname && forced_hostname[0])
															
 
																-		snprintf(hostname, sizeof(hostname), forced_hostname);
															
 
																+		snprintf(hostname, sizeof(hostname), "%s", forced_hostname);
															
 
																 	else
															
 
																 		gethostname(hostname, sizeof(hostname));
															
 
																 	strncat(path, ".", maxlen);
															
@@ -495,6 +592,7 @@ static void get_affinity_path(char *path, size_t maxlen)
 
																 static void load_bus_affinity_file_content(void)
															
 
																 {
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																 	FILE *f;
															
 
																 	char path[256];
															
@@ -503,13 +601,12 @@ static void load_bus_affinity_file_content(void)
 
																 	f = fopen(path, "r");
															
 
																 	STARPU_ASSERT(f);
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																-	struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+	struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																 	ncpus = _starpu_topology_get_nhwcpu(config);
															
 
																         int gpu;
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-        cudaGetDeviceCount(&ncuda);
															
 
																+	ncuda = _starpu_get_cuda_device_count();
															
 
																 	for (gpu = 0; gpu < ncuda; gpu++)
															
 
																 	{
															
 
																 		int ret;
															
@@ -532,7 +629,7 @@ static void load_bus_affinity_file_content(void)
 
																 		ret = fscanf(f, "\n");
															
 
																 		STARPU_ASSERT(ret == 0);
															
 
																 	}
															
 
																-#endif
															
 
																+#endif /* !STARPU_USE_CUDA */
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																         nopencl = _starpu_opencl_get_device_count();
															
 
																 	for (gpu = 0; gpu < nopencl; gpu++)
															
@@ -557,21 +654,21 @@ static void load_bus_affinity_file_content(void)
 
																 		ret = fscanf(f, "\n");
															
 
																 		STARPU_ASSERT(ret == 0);
															
 
																 	}
															
 
																-#endif
															
 
																-#endif
															
 
																+#endif /* !STARPU_USE_OPENCL */
															
 
																 	fclose(f);
															
 
																+#endif /* !(STARPU_USE_CUDA_ || STARPU_USE_OPENCL */
															
 
																+
															
 
																 }
															
 
																 static void write_bus_affinity_file_content(void)
															
 
																 {
															
 
																-	FILE *f;
															
 
																-
															
 
																 	STARPU_ASSERT(was_benchmarked);
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																+	FILE *f;
															
 
																 	char path[256];
															
 
																 	get_affinity_path(path, 256);
															
 
																-
															
 
																 	f = fopen(path, "w+");
															
 
																 	if (!f)
															
 
																 	{
															
@@ -581,7 +678,6 @@ static void write_bus_affinity_file_content(void)
 
																 		STARPU_ABORT();
															
 
																 	}
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																 	unsigned cpu;
															
 
																         int gpu;
															
@@ -689,12 +785,14 @@ static int load_bus_latency_file_content(void)
 
																 			double latency;
															
 
																 			n = fscanf(f, "%lf", &latency);
															
 
																-			if (n != 1) {
															
 
																+			if (n != 1)
															
 
																+			{
															
 
																 				fclose(f);
															
 
																 				return 0;
															
 
																 			}
															
 
																 			n = getc(f);
															
 
																-			if (n != '\t') {
															
 
																+			if (n != '\t')
															
 
																+			{
															
 
																 				fclose(f);
															
 
																 				return 0;
															
 
																 			}
															
@@ -703,7 +801,8 @@ static int load_bus_latency_file_content(void)
 
																 		}
															
 
																 		n = getc(f);
															
 
																-		if (n != '\n') {
															
 
																+		if (n != '\n')
															
 
																+		{
															
 
																 			fclose(f);
															
 
																 			return 0;
															
 
																 		}
															
@@ -750,17 +849,19 @@ static void write_bus_latency_file_content(void)
 
																 			if ((src > maxnode) || (dst > maxnode))
															
 
																 			{
															
 
																 				/* convention */
															
 
																-				latency = -1.0;
															
 
																+				latency = NAN;
															
 
																 			}
															
 
																 			else if (src == dst)
															
 
																 			{
															
 
																 				latency = 0.0;
															
 
																 			}
															
 
																-			else {
															
 
																+			else
															
 
																+			{
															
 
																+				/* Âµs */
															
 
																                                 latency = ((src && dst)?2000.0:500.0);
															
 
																 			}
															
 
																-			fprintf(f, "%lf\t", latency);
															
 
																+			fprintf(f, "%f\t", latency);
															
 
																 		}
															
 
																 		fprintf(f, "\n");
															
@@ -828,13 +929,15 @@ static int load_bus_bandwidth_file_content(void)
 
																 			double bandwidth;
															
 
																 			n = fscanf(f, "%lf", &bandwidth);
															
 
																-			if (n != 1) {
															
 
																+			if (n != 1)
															
 
																+			{
															
 
																 				fprintf(stderr,"didn't get a number\n");
															
 
																 				fclose(f);
															
 
																 				return 0;
															
 
																 			}
															
 
																 			n = getc(f);
															
 
																-			if (n != '\t') {
															
 
																+			if (n != '\t')
															
 
																+			{
															
 
																 				fclose(f);
															
 
																 				return 0;
															
 
																 			}
															
@@ -843,7 +946,8 @@ static int load_bus_bandwidth_file_content(void)
 
																 		}
															
 
																 		n = getc(f);
															
 
																-		if (n != '\n') {
															
 
																+		if (n != '\n')
															
 
																+		{
															
 
																 			fclose(f);
															
 
																 			return 0;
															
 
																 		}
															
@@ -883,36 +987,43 @@ static void write_bus_bandwidth_file_content(void)
 
																 			if ((src > maxnode) || (dst > maxnode))
															
 
																 			{
															
 
																-				bandwidth = -1.0;
															
 
																+				bandwidth = NAN;
															
 
																 			}
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																 			else if (src != dst)
															
 
																 			{
															
 
																-                                double time_src_to_ram=0.0, time_ram_to_dst=0.0;
															
 
																-                                double timing;
															
 
																-                                /* Bandwidth = (SIZE)/(time i -> ram + time ram -> j)*/
															
 
																+				double slowness = 0.0;
															
 
																+				/* Total bandwidth is the harmonic mean of bandwidths */
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-				time_src_to_ram = (src==0)?0.0:cudadev_timing_dtoh[src];
															
 
																-                                time_ram_to_dst = (dst==0)?0.0:cudadev_timing_htod[dst];
															
 
																-				timing =time_src_to_ram + time_ram_to_dst;
															
 
																-				bandwidth = 1.0*cuda_size/timing;
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+				if (src && src <= ncuda && dst && dst <= ncuda)
															
 
																+					/* Direct GPU-GPU transfert */
															
 
																+					slowness = cudadev_timing_dtod[src][dst];
															
 
																+				else
															
 
																+#endif
															
 
																+				{
															
 
																+					if (src && src <= ncuda)
															
 
																+						slowness += cudadev_timing_dtoh[src];
															
 
																+					if (dst && dst <= ncuda)
															
 
																+						slowness += cudadev_timing_htod[dst];
															
 
																+				}
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-                                if (src > ncuda)
															
 
																-                                        time_src_to_ram = (src==0)?0.0:opencldev_timing_dtoh[src-ncuda];
															
 
																-                                if (dst > ncuda)
															
 
																-                                        time_ram_to_dst = (dst==0)?0.0:opencldev_timing_htod[dst-ncuda];
															
 
																-				timing =time_src_to_ram + time_ram_to_dst;
															
 
																-				bandwidth = 1.0*opencl_size/timing;
															
 
																+				if (src > ncuda)
															
 
																+					slowness += opencldev_timing_dtoh[src-ncuda];
															
 
																+				if (dst > ncuda)
															
 
																+					slowness += opencldev_timing_htod[dst-ncuda];
															
 
																 #endif
															
 
																+				bandwidth = 1.0/slowness;
															
 
																 			}
															
 
																 #endif
															
 
																-			else {
															
 
																+			else
															
 
																+			{
															
 
																 			        /* convention */
															
 
																 			        bandwidth = 0.0;
															
 
																 			}
															
 
																-			fprintf(f, "%lf\t", bandwidth);
															
 
																+			fprintf(f, "%f\t", bandwidth);
															
 
																 		}
															
 
																 		fprintf(f, "\n");
															
@@ -921,37 +1032,38 @@ static void write_bus_bandwidth_file_content(void)
 
																 	fclose(f);
															
 
																 }
															
 
																-void starpu_print_bus_bandwidth(FILE *f)
															
 
																+void starpu_bus_print_bandwidth(FILE *f)
															
 
																 {
															
 
																-  int src, dst, maxnode;
															
 
																+	int src, dst, maxnode;
															
 
																-  maxnode = ncuda;
															
 
																+        maxnode = ncuda;
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-  maxnode += nopencl;
															
 
																+        maxnode += nopencl;
															
 
																 #endif
															
 
																-  fprintf(f, "from\t");
															
 
																-  fprintf(f, "to RAM\t\t");
															
 
																-  for (dst = 0; dst < ncuda; dst++)
															
 
																-    fprintf(f, "to CUDA %d\t", dst);
															
 
																-  for (dst = 0; dst < nopencl; dst++)
															
 
																-    fprintf(f, "to OpenCL %d\t", dst);
															
 
																-  fprintf(f, "\n");
															
 
																-
															
 
																-  for (src = 0; src <= maxnode; src++)
															
 
																-    {
															
 
																-      if (!src)
															
 
																-	fprintf(f, "RAM\t");
															
 
																-      else if (src <= ncuda)
															
 
																-	fprintf(f, "CUDA %d\t", src-1);
															
 
																-      else
															
 
																-	fprintf(f, "OpenCL%d\t", src-ncuda-1);
															
 
																-      for (dst = 0; dst <= maxnode; dst++)
															
 
																-	fprintf(f, "%f\t", bandwidth_matrix[src][dst]);
															
 
																-
															
 
																-      fprintf(f, "\n");
															
 
																-    }
															
 
																+	fprintf(f, "from\t");
															
 
																+	fprintf(f, "to RAM\t\t");
															
 
																+	for (dst = 0; dst < ncuda; dst++)
															
 
																+		fprintf(f, "to CUDA %d\t", dst);
															
 
																+	for (dst = 0; dst < nopencl; dst++)
															
 
																+		fprintf(f, "to OpenCL %d\t", dst);
															
 
																+	fprintf(f, "\n");
															
 
																+
															
 
																+	for (src = 0; src <= maxnode; src++)
															
 
																+	{
															
 
																+		if (!src)
															
 
																+			fprintf(f, "RAM\t");
															
 
																+		else if (src <= ncuda)
															
 
																+			fprintf(f, "CUDA %d\t", src-1);
															
 
																+		else
															
 
																+			fprintf(f, "OpenCL%d\t", src-ncuda-1);
															
 
																+		for (dst = 0; dst <= maxnode; dst++)
															
 
																+			fprintf(f, "%f\t", bandwidth_matrix[src][dst]);
															
 
																+
															
 
																+		fprintf(f, "\n");
															
 
																+	}
															
 
																 }
															
 
																+
															
 
																 static void generate_bus_bandwidth_file(void)
															
 
																 {
															
 
																 	if (!was_benchmarked)
															
@@ -990,16 +1102,18 @@ static void check_bus_config_file()
 
																         get_config_path(path, 256);
															
 
																         res = access(path, F_OK);
															
 
																-        if (res) {
															
 
																+        if (res)
															
 
																+	{
															
 
																 		fprintf(stderr, "No performance model for the bus, calibrating...");
															
 
																 		starpu_force_bus_sampling();
															
 
																 		fprintf(stderr, "done\n");
															
 
																         }
															
 
																-        else {
															
 
																+        else
															
 
																+	{
															
 
																                 FILE *f;
															
 
																                 int ret, read_cuda, read_opencl;
															
 
																                 unsigned read_cpus;
															
 
																-                struct starpu_machine_config_s *config = _starpu_get_machine_config();
															
 
																+                struct _starpu_machine_config *config = _starpu_get_machine_config();
															
 
																                 // Loading configuration from file
															
 
																                 f = fopen(path, "r");
															
@@ -1019,24 +1133,27 @@ static void check_bus_config_file()
 
																                 // Loading current configuration
															
 
																                 ncpus = _starpu_topology_get_nhwcpu(config);
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-                cudaGetDeviceCount(&ncuda);
															
 
																+		ncuda = _starpu_get_cuda_device_count();
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																                 nopencl = _starpu_opencl_get_device_count();
															
 
																 #endif
															
 
																                 // Checking if both configurations match
															
 
																-                if (read_cpus != ncpus) {
															
 
																+                if (read_cpus != ncpus)
															
 
																+		{
															
 
																 			fprintf(stderr, "Current configuration does not match the bus performance model (CPUS: (stored) %u != (current) %u), recalibrating...", read_cpus, ncpus);
															
 
																                         starpu_force_bus_sampling();
															
 
																 			fprintf(stderr, "done\n");
															
 
																                 }
															
 
																-                else if (read_cuda != ncuda) {
															
 
																+                else if (read_cuda != ncuda)
															
 
																+		{
															
 
																                         fprintf(stderr, "Current configuration does not match the bus performance model (CUDA: (stored) %d != (current) %d), recalibrating...", read_cuda, ncuda);
															
 
																                         starpu_force_bus_sampling();
															
 
																 			fprintf(stderr, "done\n");
															
 
																                 }
															
 
																-                else if (read_opencl != nopencl) {
															
 
																+                else if (read_opencl != nopencl)
															
 
																+		{
															
 
																                         fprintf(stderr, "Current configuration does not match the bus performance model (OpenCL: (stored) %d != (current) %d), recalibrating...", read_opencl, nopencl);
															
 
																                         starpu_force_bus_sampling();
															
 
																 			fprintf(stderr, "done\n");
															
@@ -1094,11 +1211,12 @@ void _starpu_load_bus_performance_files(void)
 
																 	load_bus_bandwidth_file();
															
 
																 }
															
 
																+/* (in Âµs) */
															
 
																 double _starpu_predict_transfer_time(unsigned src_node, unsigned dst_node, size_t size)
															
 
																 {
															
 
																 	double bandwidth = bandwidth_matrix[src_node][dst_node];
															
 
																 	double latency = latency_matrix[src_node][dst_node];
															
 
																-	struct starpu_machine_topology_s *topology = &_starpu_get_machine_config()->topology;
															
 
																+	struct starpu_machine_topology *topology = &_starpu_get_machine_config()->topology;
															
 
																 	return latency + (size/bandwidth)*2*(topology->ncudagpus+topology->nopenclgpus);
															
 
																 }
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
--- a/src/core/perfmodel/regression.c
+++ b/src/core/perfmodel/regression.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -112,12 +112,13 @@ static double test_r(double c, unsigned n, unsigned *x, double *y)
 
																 	return r;
															
 
																 }
															
 
																-static unsigned find_list_size(struct starpu_history_list_t *list_history)
															
 
																+static unsigned find_list_size(struct starpu_history_list *list_history)
															
 
																 {
															
 
																 	unsigned cnt = 0;
															
 
																-	struct starpu_history_list_t *ptr = list_history;
															
 
																-	while (ptr) {
															
 
																+	struct starpu_history_list *ptr = list_history;
															
 
																+	while (ptr)
															
 
																+	{
															
 
																 		cnt++;
															
 
																 		ptr = ptr->next;
															
 
																 	}
															
@@ -138,12 +139,13 @@ static double find_list_min(double *y, unsigned n)
 
																 	return min;
															
 
																 }
															
 
																-static void dump_list(unsigned *x, double *y, struct starpu_history_list_t *list_history)
															
 
																+static void dump_list(unsigned *x, double *y, struct starpu_history_list *list_history)
															
 
																 {
															
 
																-	struct starpu_history_list_t *ptr = list_history;
															
 
																+	struct starpu_history_list *ptr = list_history;
															
 
																 	unsigned i = 0;
															
 
																-	while (ptr) {
															
 
																+	while (ptr)
															
 
																+	{
															
 
																 		x[i] = ptr->entry->size;
															
 
																 		y[i] = ptr->entry->mean;
															
@@ -153,11 +155,11 @@ static void dump_list(unsigned *x, double *y, struct starpu_history_list_t *list
 
																 }
															
 
																-/* y = ax^b + c 
															
 
																+/* y = ax^b + c
															
 
																  * 	return 0 if success, -1 otherwise
															
 
																  * 	if success, a, b and c are modified
															
 
																  * */
															
 
																-int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, double *a, double *b, double *c)
															
 
																+int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double *a, double *b, double *c)
															
 
																 {
															
 
																 	unsigned n = find_list_size(ptr);
															
@@ -171,7 +173,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, doubl
 
																 	double cmin = 0.0;
															
 
																 	double cmax = find_list_min(y, n);
															
 
																-	
															
 
																+
															
 
																 	unsigned iter;
															
 
																 	double err = 100000.0;
															
@@ -180,7 +182,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, doubl
 
																 	{
															
 
																 		double c1, c2;
															
 
																 		double r1, r2;
															
 
																-		
															
 
																+
															
 
																 		double radius = 0.01;
															
 
																 		c1 = cmin + (0.5-radius)*(cmax - cmin);
															
@@ -197,23 +199,21 @@ int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, doubl
 
																 		{
															
 
																 			cmax = (cmin + cmax)/2;
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			/* 2 is better */
															
 
																 			cmin = (cmin + cmax)/2;
															
 
																 		}
															
 
																 		if (fabs(err - STARPU_MIN(err1, err2)) < EPS)
															
 
																-		{
															
 
																-			err = STARPU_MIN(err1, err2);
															
 
																 			break;
															
 
																-		}
															
 
																 		err = STARPU_MIN(err1, err2);
															
 
																 	}
															
 
																 	*c = (cmin + cmax)/2;
															
 
																-	*b = compute_b(*c, n, x, y); 
															
 
																+	*b = compute_b(*c, n, x, y);
															
 
																 	*a = exp(compute_a(*c, *b, n, x, y));
															
 
																 	free(x);
															
@@ -221,4 +221,3 @@ int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, doubl
 
																 	return 0;
															
 
																 }
															
 
																-
															
--- a/src/core/perfmodel/regression.h
+++ b/src/core/perfmodel/regression.h
@@ -24,6 +24,6 @@
 
																 #include <core/perfmodel/perfmodel.h>
															
 
																 #include <starpu.h>
															
 
																-int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, double *a, double *b, double *c);
															
 
																+int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double *a, double *b, double *c);
															
 
																 #endif // __REGRESSION_H__ 
															
--- a/src/core/progress_hook.c
+++ b/src/core/progress_hook.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -21,7 +21,8 @@
 
																 #define NMAXHOOKS	16
															
 
																-struct progression_hook {
															
 
																+struct progression_hook
															
 
																+{
															
 
																 	unsigned (*func)(void *arg);
															
 
																 	void *arg;
															
 
																 	unsigned active;
															
@@ -36,7 +37,7 @@ static int active_hook_cnt = 0;
 
																 int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
															
 
																 {
															
 
																 	int hook;
															
 
																-	PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock);
															
 
																 	for (hook = 0; hook < NMAXHOOKS; hook++)
															
 
																 	{
															
 
																 		if (!hooks[hook].active)
															
@@ -47,13 +48,13 @@ int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
 
																 			hooks[hook].active = 1;
															
 
																 			active_hook_cnt++;
															
 
																-			PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																-			
															
 
																+			_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																+
															
 
																 			return hook;
															
 
																 		}
															
 
																 	}
															
 
																-	PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																 	starpu_wake_all_blocked_workers();
															
@@ -63,22 +64,22 @@ int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
 
																 void starpu_progression_hook_deregister(int hook_id)
															
 
																 {
															
 
																-	PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock);
															
 
																 	if (hooks[hook_id].active)
															
 
																 		active_hook_cnt--;
															
 
																 	hooks[hook_id].active = 0;
															
 
																-	PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																 }
															
 
																 unsigned _starpu_execute_registered_progression_hooks(void)
															
 
																 {
															
 
																 	/* If there is no hook registered, we short-cut loop. */
															
 
																-	PTHREAD_RWLOCK_RDLOCK(&progression_hook_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_RDLOCK(&progression_hook_rwlock);
															
 
																 	int no_hook = (active_hook_cnt == 0);
															
 
																-	PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																+	_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																 	if (no_hook)
															
 
																 		return 1;
															
@@ -92,9 +93,9 @@ unsigned _starpu_execute_registered_progression_hooks(void)
 
																 	{
															
 
																 		unsigned active;
															
 
																-		PTHREAD_RWLOCK_RDLOCK(&progression_hook_rwlock);
															
 
																+		_STARPU_PTHREAD_RWLOCK_RDLOCK(&progression_hook_rwlock);
															
 
																 		active = hooks[hook].active;
															
 
																-		PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																+		_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
															
 
																 		unsigned may_block_hook = 1;
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010-2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -36,17 +36,17 @@ int starpu_get_prefetch_flag(void)
 
																  *	Predefined policies
															
 
																  */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_ws_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_prio_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_random_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_dm_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_dmda_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_dmda_ready_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_dmda_sorted_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_eager_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_parallel_heft_policy; */
															
 
																-/* extern struct starpu_sched_policy_s _starpu_sched_pgreedy_policy; */
															
 
																-extern struct starpu_sched_policy_s heft_policy;
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_ws_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_prio_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_random_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_dm_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_dmda_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_eager_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy; */
															
 
																+/* extern struct starpu_sched_policy _starpu_sched_pgreedy_policy; */
															
 
																+extern struct starpu_sched_policy heft_policy;
															
 
																 static struct starpu_sched_policy_s *predefined_policies[] = {
															
 
																 	/* &_starpu_sched_ws_policy, */
															
@@ -62,7 +62,7 @@ static struct starpu_sched_policy_s *predefined_policies[] = {
 
																 	/* &_starpu_sched_pgreedy_policy */
															
 
																 };
															
 
																-struct starpu_sched_policy_s *_starpu_get_sched_policy(struct starpu_sched_ctx *sched_ctx)
															
 
																+struct starpu_sched_policy *_starpu_get_sched_policy(struct starpu_sched_ctx *sched_ctx)
															
 
																 {
															
 
																 	return sched_ctx->sched_policy;
															
 
																 }
															
@@ -71,7 +71,7 @@ struct starpu_sched_policy_s *_starpu_get_sched_policy(struct starpu_sched_ctx *
 
																  *	Methods to initialize the scheduling policy
															
 
																  */
															
 
																-static void load_sched_policy(struct starpu_sched_policy_s *sched_policy, struct starpu_sched_ctx *sched_ctx)
															
 
																+static void load_sched_policy(struct starpu_sched_policy *sched_policy, struct starpu_sched_ctx *sched_ctx)
															
 
																 {
															
 
																 	STARPU_ASSERT(sched_policy);
															
@@ -91,6 +91,7 @@ static void load_sched_policy(struct starpu_sched_policy_s *sched_policy, struct
 
																 	policy->deinit_sched = sched_policy->deinit_sched;
															
 
																 	policy->push_task = sched_policy->push_task;
															
 
																 	policy->pop_task = sched_policy->pop_task;
															
 
																+	policy->pre_exec_hook = sched_policy->pre_exec_hook;
															
 
																 	policy->post_exec_hook = sched_policy->post_exec_hook;
															
 
																 	policy->pop_every_task = sched_policy->pop_every_task;
															
 
																 	policy->push_task_notify = sched_policy->push_task_notify;
															
@@ -99,20 +100,20 @@ static void load_sched_policy(struct starpu_sched_policy_s *sched_policy, struct
 
																 	policy->remove_workers = sched_policy->remove_workers;
															
 
																 }
															
 
																-static struct starpu_sched_policy_s *find_sched_policy_from_name(const char *policy_name)
															
 
																+static struct starpu_sched_policy *find_sched_policy_from_name(const char *policy_name)
															
 
																 {
															
 
																-
															
 
																 	if (!policy_name)
															
 
																 		return NULL;
															
 
																 	unsigned i;
															
 
																 	for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
															
 
																 	{
															
 
																-		struct starpu_sched_policy_s *p;
															
 
																+		struct starpu_sched_policy *p;
															
 
																 		p = predefined_policies[i];
															
 
																 		if (p->policy_name)
															
 
																 		{
															
 
																-			if (strcmp(policy_name, p->policy_name) == 0) {
															
 
																+			if (strcmp(policy_name, p->policy_name) == 0)
															
 
																+			{
															
 
																 				/* we found a policy with the requested name */
															
 
																 				return p;
															
 
																 			}
															
@@ -127,23 +128,24 @@ static struct starpu_sched_policy_s *find_sched_policy_from_name(const char *pol
 
																 static void display_sched_help_message(void)
															
 
																 {
															
 
																 	const char *sched_env = getenv("STARPU_SCHED");
															
 
																-	if (sched_env && (strcmp(sched_env, "help") == 0)) {
															
 
																+	if (sched_env && (strcmp(sched_env, "help") == 0))
															
 
																+	{
															
 
																 		fprintf(stderr, "STARPU_SCHED can be either of\n");
															
 
																 		/* display the description of all predefined policies */
															
 
																 		unsigned i;
															
 
																 		for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++)
															
 
																 		{
															
 
																-			struct starpu_sched_policy_s *p;
															
 
																+			struct starpu_sched_policy *p;
															
 
																 			p = predefined_policies[i];
															
 
																 			fprintf(stderr, "%s\t-> %s\n", p->policy_name, p->policy_description);
															
 
																 		}
															
 
																 	 }
															
 
																 }
															
 
																-static struct starpu_sched_policy_s *select_sched_policy(struct starpu_machine_config_s *config, const char *policy_name)
															
 
																+static struct starpu_sched_policy *select_sched_policy(struct _starpu_machine_config *config)
															
 
																 {
															
 
																-	struct starpu_sched_policy_s *selected_policy = NULL;
															
 
																+	struct starpu_sched_policy *selected_policy = NULL;
															
 
																 	struct starpu_conf *user_conf = config->user_conf;
															
 
																 	/* First, we check whether the application explicitely gave a scheduling policy or not */
															
@@ -152,19 +154,12 @@ static struct starpu_sched_policy_s *select_sched_policy(struct starpu_machine_c
 
																 	/* Otherwise, we look if the application specified the name of a policy to load */
															
 
																 	const char *sched_pol_name;
															
 
																-	if (user_conf && (user_conf->sched_policy_name))
															
 
																-	{
															
 
																+	sched_pol_name = getenv("STARPU_SCHED");
															
 
																+	if (sched_pol_name == NULL && user_conf && user_conf->sched_policy_name)
															
 
																 		sched_pol_name = user_conf->sched_policy_name;
															
 
																-	}
															
 
																-	else {
															
 
																-		sched_pol_name = getenv("STARPU_SCHED");
															
 
																-	}
															
 
																 	if (sched_pol_name)
															
 
																 		selected_policy = find_sched_policy_from_name(sched_pol_name);
															
 
																-	else
															
 
																-		if(policy_name)
															
 
																-			selected_policy = find_sched_policy_from_name(policy_name);
															
 
																 	/* Perhaps there was no policy that matched the name */
															
 
																 	if (selected_policy)
															
@@ -175,7 +170,7 @@ static struct starpu_sched_policy_s *select_sched_policy(struct starpu_machine_c
 
																 	return &heft_policy;
															
 
																 }
															
 
																-void _starpu_init_sched_policy(struct starpu_machine_config_s *config, struct starpu_sched_ctx *sched_ctx, const char *policy_name)
															
 
																+void _starpu_init_sched_policy(struct starpu_machine_config *config, struct starpu_sched_ctx *sched_ctx)
															
 
																 {
															
 
																 	/* Perhaps we have to display some help */
															
 
																 	display_sched_help_message();
															
@@ -187,19 +182,16 @@ void _starpu_init_sched_policy(struct starpu_machine_config_s *config, struct st
 
																 	/* By default, we don't calibrate */
															
 
																 	unsigned do_calibrate = 0;
															
 
																-	if (config->user_conf && (config->user_conf->calibrate != -1))
															
 
																-	{
															
 
																-		do_calibrate = config->user_conf->calibrate;
															
 
																-	}
															
 
																-	else {
															
 
																-		int res = starpu_get_env_number("STARPU_CALIBRATE");
															
 
																-		do_calibrate =  (res < 0)?0:(unsigned)res;
															
 
																-	}
															
 
																+	int res = starpu_get_env_number("STARPU_CALIBRATE");
															
 
																+	if (res == -1 && config->user_conf)
															
 
																+		res = config->user_conf->calibrate;
															
 
																+
															
 
																+	do_calibrate = (res < 0)?0:(unsigned)res;
															
 
																 	_starpu_set_calibrate_flag(do_calibrate);
															
 
																-	struct starpu_sched_policy_s *selected_policy;
															
 
																-	selected_policy = select_sched_policy(config, policy_name);
															
 
																+	struct starpu_sched_policy *selected_policy;
															
 
																+	selected_policy = select_sched_policy(config);
															
 
																 	load_sched_policy(selected_policy, sched_ctx);
															
@@ -208,7 +200,7 @@ void _starpu_init_sched_policy(struct starpu_machine_config_s *config, struct st
 
																 void _starpu_deinit_sched_policy(struct starpu_sched_ctx *sched_ctx)
															
 
																 {
															
 
																-        struct starpu_sched_policy_s *policy = sched_ctx->sched_policy;
															
 
																+	struct starpu_sched_policy_s *policy = sched_ctx->sched_policy;
															
 
																 	if (policy->deinit_sched)
															
 
																 		policy->deinit_sched(sched_ctx->id);
															
 
																 }
															
@@ -224,9 +216,9 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 	/* Is this a basic worker or a combined worker ? */
															
 
																 	int is_basic_worker = (workerid < nbasic_workers);
															
 
																-	unsigned memory_node; 
															
 
																-	struct starpu_worker_s *worker = NULL;
															
 
																-	struct starpu_combined_worker_s *combined_worker = NULL;
															
 
																+	unsigned memory_node;
															
 
																+	struct _starpu_worker *worker = NULL;
															
 
																+	struct _starpu_combined_worker *combined_worker = NULL;
															
 
																 	if (is_basic_worker)
															
 
																 	{
															
@@ -254,9 +246,34 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 	if (is_basic_worker)
															
 
																 	{
															
 
																+		unsigned node = starpu_worker_get_memory_node(workerid);
															
 
																+		if (_starpu_task_uses_multiformat_handles(task))
															
 
																+		{
															
 
																+			unsigned i;
															
 
																+			for (i = 0; i < task->cl->nbuffers; i++)
															
 
																+			{
															
 
																+				struct starpu_task *conversion_task;
															
 
																+				starpu_data_handle_t handle;
															
 
																+
															
 
																+				handle = task->handles[i];
															
 
																+				if (!_starpu_handle_needs_conversion_task(handle, node))
															
 
																+					continue;
															
 
																+
															
 
																+				conversion_task = _starpu_create_conversion_task(handle, node);
															
 
																+				conversion_task->mf_skip = 1;
															
 
																+				conversion_task->execute_on_a_specific_worker = 1;
															
 
																+				conversion_task->workerid = workerid;
															
 
																+				_starpu_task_submit_conversion_task(conversion_task, workerid);
															
 
																+				//_STARPU_DEBUG("Pushing a conversion task\n");
															
 
																+			}
															
 
																+
															
 
																+			for (i = 0; i < task->cl->nbuffers; i++)
															
 
																+				task->handles[i]->mf_node = node;
															
 
																+		}
															
 
																 		return _starpu_push_local_task(worker, task, 0);
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* This is a combined worker so we create task aliases */
															
 
																 		int worker_size = combined_worker->worker_size;
															
 
																 		int *combined_workerid = combined_worker->combined_workerid;
															
@@ -264,13 +281,13 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 		int ret = 0;
															
 
																 		int i;
															
 
																-		starpu_job_t j = _starpu_get_job_associated_to_task(task);
															
 
																+		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																 		j->task_size = worker_size;
															
 
																 		j->combined_workerid = workerid;
															
 
																 		j->active_task_alias_count = 0;
															
 
																-		PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
															
 
																-		PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
															
 
																+		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
															
 
																+		_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
															
 
																 		for (i = 0; i < worker_size; i++)
															
 
																 		{
															
@@ -304,7 +321,7 @@ static int _starpu_nworkers_able_to_execute_task(struct starpu_task *task, struc
 
																 }
															
 
																 /* the generic interface that call the proper underlying implementation */
															
 
																-int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
															
 
																+int _starpu_push_task(struct _starpu_job *j)
															
 
																 {
															
 
																 	struct starpu_task *task = j->task;
															
 
																 	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
															
@@ -340,6 +357,7 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 
																         _STARPU_LOG_IN();
															
 
																+	_starpu_increment_nready_tasks();
															
 
																 	task->status = STARPU_TASK_READY;
															
 
																 	_starpu_profiling_set_task_push_start_time(task);
															
@@ -348,7 +366,7 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 
																 	 * corresponding dependencies */
															
 
																 	if (task->cl == NULL)
															
 
																 	{
															
 
																-		_starpu_handle_job_termination(j, job_is_already_locked, -1);
															
 
																+		_starpu_handle_job_termination(j, -1);
															
 
																                 _STARPU_LOG_OUT_TAG("handle_job_termination");
															
 
																 		return 0;
															
 
																 	}
															
@@ -358,7 +376,7 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 
																 	{
															
 
																 		ret = _starpu_push_task_on_specific_worker(task, task->workerid);
															
 
																 	}
															
 
																-	else 
															
 
																+	else
															
 
																 	{
															
 
																 		STARPU_ASSERT(sched_ctx->sched_policy->push_task);
															
@@ -366,7 +384,7 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 
																 		if(ret == -1)
															
 
																 		{
															
 
																 			printf("repush task \n");
															
 
																-			ret = _starpu_push_task(j, job_is_already_locked);
															
 
																+			ret = _starpu_push_task(j);
															
 
																 		}
															
 
																 	}
															
@@ -376,21 +394,88 @@ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
 
																         return ret;
															
 
																 }
															
 
																-struct starpu_task *_starpu_pop_task(struct starpu_worker_s *worker)
															
 
																+/*
															
 
																+ * Given a handle that needs to be converted in order to be used on the given
															
 
																+ * node, returns a task that takes care of the conversion.
															
 
																+ */
															
 
																+struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
															
 
																+						   unsigned int node)
															
 
																+{
															
 
																+	struct starpu_task *conversion_task;
															
 
																+	struct starpu_multiformat_interface *format_interface;
															
 
																+	enum starpu_node_kind node_kind;
															
 
																+
															
 
																+	conversion_task = starpu_task_create();
															
 
																+	conversion_task->synchronous = 0;
															
 
																+	conversion_task->handles[0] = handle;
															
 
																+
															
 
																+	/* The node does not really matter here */
															
 
																+	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
															
 
																+	node_kind = starpu_node_get_kind(node);
															
 
																+
															
 
																+	handle->refcnt++;
															
 
																+	handle->busy_count++;
															
 
																+
															
 
																+	struct starpu_multiformat_data_interface_ops *mf_ops;
															
 
																+	mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
															
 
																+	switch(node_kind)
															
 
																+	{
															
 
																+	case STARPU_CPU_RAM:
															
 
																+		switch (starpu_node_get_kind(handle->mf_node))
															
 
																+		{
															
 
																+		case STARPU_CPU_RAM:
															
 
																+			STARPU_ASSERT(0);
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+		case STARPU_CUDA_RAM:
															
 
																+			conversion_task->cl = mf_ops->cuda_to_cpu_cl;
															
 
																+			break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+		case STARPU_OPENCL_RAM:
															
 
																+			conversion_task->cl = mf_ops->opencl_to_cpu_cl;
															
 
																+			break;
															
 
																+#endif
															
 
																+		default:
															
 
																+			fprintf(stderr, "Oops : %u\n", handle->mf_node);
															
 
																+			STARPU_ASSERT(0);
															
 
																+		}
															
 
																+		break;
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	case STARPU_CUDA_RAM:
															
 
																+		conversion_task->cl = mf_ops->cpu_to_cuda_cl;
															
 
																+		break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	case STARPU_OPENCL_RAM:
															
 
																+		conversion_task->cl = mf_ops->cpu_to_opencl_cl;
															
 
																+		break;
															
 
																+#endif
															
 
																+	case STARPU_SPU_LS: /* Not supported */
															
 
																+	default:
															
 
																+		STARPU_ASSERT(0);
															
 
																+	}
															
 
																+
															
 
																+	conversion_task->cl->modes[0] = STARPU_RW;
															
 
																+	return conversion_task;
															
 
																+}
															
 
																+
															
 
																+struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
															
 
																 {
															
 
																 	struct starpu_task *task;
															
 
																+	int worker_id;
															
 
																+	unsigned node;
															
 
																 	/* We can't tell in advance which task will be picked up, so we measure
															
 
																 	 * a timestamp, and will attribute it afterwards to the task. */
															
 
																 	int profiling = starpu_profiling_status_get();
															
 
																 	struct timespec pop_start_time;
															
 
																 	if (profiling)
															
 
																-		starpu_clock_gettime(&pop_start_time);
															
 
																-	
															
 
																-	PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
															
 
																+		_starpu_clock_gettime(&pop_start_time);
															
 
																+pick:
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
															
 
																 	/* perhaps there is some local task to be executed first */
															
 
																 	task = _starpu_pop_local_task(worker);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
															
 
																 	/* get tasks from the stacks of the strategy */
															
@@ -409,21 +494,68 @@ struct starpu_task *_starpu_pop_task(struct starpu_worker_s *worker)
 
																 				sched_ctx_mutex = _starpu_get_sched_mutex(sched_ctx, worker->workerid);
															
 
																 				if(sched_ctx_mutex != NULL)
															
 
																 				{
															
 
																-					PTHREAD_MUTEX_LOCK(sched_ctx_mutex);
															
 
																+					_STARPU_PTHREAD_MUTEX_LOCK(sched_ctx_mutex);
															
 
																 					if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task)
															
 
																 					{
															
 
																 						task = sched_ctx->sched_policy->pop_task();
															
 
																-						PTHREAD_MUTEX_UNLOCK(sched_ctx_mutex);
															
 
																+						_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx_mutex);
															
 
																 						break;
															
 
																 					}
															
 
																-					PTHREAD_MUTEX_UNLOCK(sched_ctx_mutex);
															
 
																+					_STARPU_PTHREAD_MUTEX_UNLOCK(sched_ctx_mutex);
															
 
																 				}
															
 
																 			}
															
 
																 		}
															
 
																 	  }
															
 
																-	/* Note that we may get a NULL task in case the scheduler was unlocked
															
 
																-	 * for some reason. */
															
 
																+	if (!task)
															
 
																+		goto profiling;
															
 
																+
															
 
																+	/* Make sure we do not bother with all the multiformat-specific code if 
															
 
																+	 * it is not necessary. */
															
 
																+	if (!_starpu_task_uses_multiformat_handles(task))
															
 
																+		goto profiling;
															
 
																+
															
 
																+
															
 
																+	/* This is either a conversion task, or a regular task for which the
															
 
																+	 * conversion tasks have already been created and submitted */
															
 
																+	if (task->mf_skip)
															
 
																+		goto profiling;
															
 
																+
															
 
																+	worker_id = starpu_worker_get_id();
															
 
																+	if (!starpu_worker_can_execute_task(worker_id, task, 0))
															
 
																+		return task;
															
 
																+
															
 
																+	node = starpu_worker_get_memory_node(worker_id);
															
 
																+
															
 
																+	/*
															
 
																+	 * We do have a task that uses multiformat handles. Let's create the 
															
 
																+	 * required conversion tasks.
															
 
																+	 */
															
 
																+	unsigned i;
															
 
																+	for (i = 0; i < task->cl->nbuffers; i++)
															
 
																+	{
															
 
																+		struct starpu_task *conversion_task;
															
 
																+		starpu_data_handle_t handle;
															
 
																+
															
 
																+		handle = task->handles[i];
															
 
																+		if (!_starpu_handle_needs_conversion_task(handle, node))
															
 
																+			continue;
															
 
																+		conversion_task = _starpu_create_conversion_task(handle, node);
															
 
																+		conversion_task->mf_skip = 1;
															
 
																+		conversion_task->execute_on_a_specific_worker = 1;
															
 
																+		conversion_task->workerid = worker_id;
															
 
																+		/*
															
 
																+		 * Next tasks will need to know where these handles have gone.
															
 
																+		 */
															
 
																+		handle->mf_node = node;
															
 
																+		_starpu_task_submit_conversion_task(conversion_task, worker_id);
															
 
																+	}
															
 
																+
															
 
																+	task->mf_skip = 1;
															
 
																+	starpu_task_list_push_front(&worker->local_tasks, task);
															
 
																+	goto pick;
															
 
																+
															
 
																+profiling:
															
 
																 	if (profiling && task)
															
 
																 	{
															
 
																 		struct starpu_task_profiling_info *profiling_info;
															
@@ -436,7 +568,7 @@ struct starpu_task *_starpu_pop_task(struct starpu_worker_s *worker)
 
																 		{
															
 
																 			memcpy(&profiling_info->pop_start_time,
															
 
																 				&pop_start_time, sizeof(struct timespec));
															
 
																-			starpu_clock_gettime(&profiling_info->pop_end_time);
															
 
																+			_starpu_clock_gettime(&profiling_info->pop_end_time);
															
 
																 		}
															
 
																 	}
															
@@ -473,6 +605,13 @@ struct starpu_task *_starpu_pop_every_task(struct starpu_sched_ctx *sched_ctx)
 
																 	return sched_ctx->sched_policy->pop_every_task();
															
 
																 }
															
 
																+void _starpu_sched_pre_exec_hook(struct starpu_task *task)
															
 
																+{
															
 
																+	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
															
 
																+	if (sched_ctx->sched_policy->pre_exec_hook)
															
 
																+		sched_ctx->sched_policy->pre_exec_hook(task);
															
 
																+}
															
 
																+
															
 
																 void _starpu_sched_post_exec_hook(struct starpu_task *task)
															
 
																 {
															
 
																 	struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
															
@@ -489,20 +628,21 @@ void _starpu_sched_post_exec_hook(struct starpu_task *task)
 
																 void _starpu_wait_on_sched_event(void)
															
 
																 {
															
 
																- 	struct starpu_worker_s *worker = _starpu_get_local_worker_key();
															
 
																+	struct _starpu_worker *worker = _starpu_get_local_worker_key();
															
 
																-	PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(worker->sched_mutex);
															
 
																 	_starpu_handle_all_pending_node_data_requests(worker->memory_node);
															
 
																 	if (_starpu_machine_is_running())
															
 
																 	{
															
 
																 #ifndef STARPU_NON_BLOCKING_DRIVERS
															
 
																-		pthread_cond_wait(&worker->sched_cond, &worker->sched_mutex);
															
 
																+		_STARPU_PTHREAD_COND_WAIT(worker->sched_cond,
															
 
																+					  worker->sched_mutex);
															
 
																 #endif
															
 
																 	}
															
 
																-	PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(worker->sched_mutex);
															
 
																 }
															
 
																 /* The scheduling policy may put tasks directly into a worker's local queue so
															
@@ -512,9 +652,8 @@ void _starpu_wait_on_sched_event(void)
 
																  * a FIFO ordering. */
															
 
																 int starpu_push_local_task(int workerid, struct starpu_task *task, int back)
															
 
																 {
															
 
																-	struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid);
															
 
																+	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
															
 
																 	return _starpu_push_local_task(worker, task, back);
															
 
																 }
															
 
																-
															
--- a/src/core/sched_policy.h
+++ b/src/core/sched_policy.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2011  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -23,21 +23,26 @@
 
																 #include <core/sched_ctx.h>
															
 
																 #include <starpu_scheduler.h>
															
 
																-struct starpu_machine_config_s;
															
 
																-struct starpu_sched_policy_s *_starpu_get_sched_policy( struct starpu_sched_ctx *sched_ctx);
															
 
																+struct starpu_machine_config;
															
 
																+struct starpu_sched_policy *_starpu_get_sched_policy( struct starpu_sched_ctx *sched_ctx);
															
 
																-void _starpu_init_sched_policy(struct starpu_machine_config_s *config, 
															
 
																-			       struct starpu_sched_ctx *sched_ctx, const char *policy_name);
															
 
																+void _starpu_init_sched_policy(struct starpu_machine_config *config, 
															
 
																+			       struct starpu_sched_ctx *sched_ctx);
															
 
																 void _starpu_deinit_sched_policy(struct starpu_sched_ctx *sched_ctx);
															
 
																-int _starpu_push_task(starpu_job_t task, unsigned job_is_already_locked);
															
 
																+int _starpu_push_task(struct _starpu_job *task);
															
 
																 /* pop a task that can be executed on the worker */
															
 
																-struct starpu_task *_starpu_pop_task(struct starpu_worker_s *worker);
															
 
																+struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker);
															
 
																 /* pop every task that can be executed on the worker */
															
 
																 struct starpu_task *_starpu_pop_every_task(struct starpu_sched_ctx *sched_ctx);
															
 
																 void _starpu_sched_post_exec_hook(struct starpu_task *task);
															
 
																 void _starpu_wait_on_sched_event(void);
															
 
																+struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
															
 
																+						   unsigned int node);
															
 
																+
															
 
																+void _starpu_sched_pre_exec_hook(struct starpu_task *task);
															
 
																+
															
 
																 #endif // __SCHED_POLICY_H__
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  * Copyright (C) 2011  INRIA
															
 
																  *
															
@@ -19,22 +19,24 @@
 
																 #include <starpu.h>
															
 
																 #include <starpu_profiling.h>
															
 
																-#include <starpu_task_bundle.h>
															
 
																 #include <core/workers.h>
															
 
																 #include <core/sched_ctx.h>
															
 
																 #include <core/jobs.h>
															
 
																 #include <core/task.h>
															
 
																+#include <core/task_bundle.h>
															
 
																 #include <common/config.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <profiling/profiling.h>
															
 
																 #include <profiling/bound.h>
															
 
																+#include <math.h>
															
 
																+#include <string.h>
															
 
																 /* XXX this should be reinitialized when StarPU is shutdown (or we should make
															
 
																  * sure that no task remains !) */
															
 
																 /* TODO we could make this hierarchical to avoid contention ? */
															
 
																 static pthread_cond_t submitted_cond = PTHREAD_COND_INITIALIZER;
															
 
																 static pthread_mutex_t submitted_mutex = PTHREAD_MUTEX_INITIALIZER;
															
 
																-static long int nsubmitted = 0;
															
 
																+static long int nsubmitted = 0, nready = 0;
															
 
																 static void _starpu_increment_nsubmitted_tasks(void);
															
@@ -76,10 +78,11 @@ void starpu_task_init(struct starpu_task *task)
 
																 	task->profiling_info = NULL;
															
 
																-	task->predicted = -1.0;
															
 
																+	task->predicted = NAN;
															
 
																+	task->predicted_transfer = NAN;
															
 
																 	task->starpu_private = NULL;
															
 
																-
															
 
																+	task->magic = 42;
															
 
																 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
															
 
																 	task->control_task = 0;
															
@@ -103,19 +106,11 @@ void starpu_task_deinit(struct starpu_task *task)
 
																 	}
															
 
																 	/* If case the task is (still) part of a bundle */
															
 
																-	struct starpu_task_bundle *bundle = task->bundle;
															
 
																+	starpu_task_bundle_t bundle = task->bundle;
															
 
																 	if (bundle)
															
 
																-	{
															
 
																-		PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																-		int ret = starpu_task_bundle_remove(bundle, task);
															
 
																-
															
 
																-		/* Perhaps the bundle was destroyed when removing the last
															
 
																-		 * entry */
															
 
																-		if (ret != 1)
															
 
																-			PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																-	}
															
 
																+		starpu_task_bundle_remove(bundle, task);
															
 
																-	starpu_job_t j = (struct starpu_job_s *)task->starpu_private;
															
 
																+	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
															
 
																 	if (j)
															
 
																 		_starpu_job_destroy(j);
															
@@ -140,100 +135,220 @@ struct starpu_task * __attribute__((malloc)) starpu_task_create(void)
 
																  * called automatically after the execution of a task by setting the "destroy"
															
 
																  * flag of the starpu_task structure (default behaviour). Calling this function
															
 
																  * on a statically allocated task results in an undefined behaviour. */
															
 
																-void starpu_task_destroy(struct starpu_task *task)
															
 
																+void _starpu_task_destroy(struct starpu_task *task)
															
 
																 {
															
 
																-	STARPU_ASSERT(task);
															
 
																    /* If starpu_task_destroy is called in a callback, we just set the destroy
															
 
																       flag. The task will be destroyed after the callback returns */
															
 
																-   if (task == starpu_get_current_task()
															
 
																-       && _starpu_get_local_worker_status() == STATUS_CALLBACK) {
															
 
																+   if (task == starpu_task_get_current()
															
 
																+       && _starpu_get_local_worker_status() == STATUS_CALLBACK)
															
 
																+   {
															
 
																-      task->destroy = 1;
															
 
																+	   task->destroy = 1;
															
 
																-   } else {
															
 
																-
															
 
																-      starpu_task_deinit(task);
															
 
																-
															
 
																-      /* TODO handle the case of task with detach = 1 and destroy = 1 */
															
 
																-      /* TODO handle the case of non terminated tasks -> return -EINVAL */
															
 
																-	
															
 
																-      free(task);
															
 
																+   }
															
 
																+   else
															
 
																+   {
															
 
																+	   starpu_task_deinit(task);
															
 
																+	   /* TODO handle the case of task with detach = 1 and destroy = 1 */
															
 
																+	   /* TODO handle the case of non terminated tasks -> return -EINVAL */
															
 
																+	   free(task);
															
 
																    }
															
 
																 }
															
 
																+void starpu_task_destroy(struct starpu_task *task)
															
 
																+{
															
 
																+	STARPU_ASSERT(task);
															
 
																+	STARPU_ASSERT_MSG(!task->destroy || !task->detach, "starpu_task_destroy must not be called for task with destroy = 1 and detach = 1");
															
 
																+	_starpu_task_destroy(task);
															
 
																+}
															
 
																+
															
 
																 int starpu_task_wait(struct starpu_task *task)
															
 
																 {
															
 
																         _STARPU_LOG_IN();
															
 
																 	STARPU_ASSERT(task);
															
 
																-	if (task->detach || task->synchronous) {
															
 
																+	STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0");
															
 
																+
															
 
																+	if (task->detach || task->synchronous)
															
 
																+	{
															
 
																 		_STARPU_DEBUG("Task is detached or asynchronous. Waiting returns immediately\n");
															
 
																 		_STARPU_LOG_OUT_TAG("einval");
															
 
																 		return -EINVAL;
															
 
																 	}
															
 
																-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
															
 
																+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
															
 
																+	{
															
 
																 		_STARPU_LOG_OUT_TAG("edeadlk");
															
 
																 		return -EDEADLK;
															
 
																 	}
															
 
																-	starpu_job_t j = (struct starpu_job_s *)task->starpu_private;
															
 
																+	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
															
 
																 	_starpu_wait_job(j);
															
 
																 	/* as this is a synchronous task, the liberation of the job
															
 
																 	   structure was deferred */
															
 
																 	if (task->destroy)
															
 
																-		free(task);
															
 
																+		_starpu_task_destroy(task);
															
 
																         _STARPU_LOG_OUT();
															
 
																 	return 0;
															
 
																 }
															
 
																-starpu_job_t _starpu_get_job_associated_to_task(struct starpu_task *task)
															
 
																+struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task)
															
 
																 {
															
 
																 	STARPU_ASSERT(task);
															
 
																 	if (!task->starpu_private)
															
 
																 	{
															
 
																-		starpu_job_t j = _starpu_job_create(task);
															
 
																+		struct _starpu_job *j = _starpu_job_create(task);
															
 
																 		task->starpu_private = j;
															
 
																 	}
															
 
																-	return (struct starpu_job_s *)task->starpu_private;
															
 
																+	return (struct _starpu_job *)task->starpu_private;
															
 
																 }
															
 
																 /* NB in case we have a regenerable task, it is possible that the job was
															
 
																  * already counted. */
															
 
																-int _starpu_submit_job(starpu_job_t j, unsigned do_not_increment_nsubmitted)
															
 
																+int _starpu_submit_job(struct _starpu_job *j)
															
 
																 {
															
 
																         _STARPU_LOG_IN();
															
 
																 	/* notify bound computation of a new task */
															
 
																 	_starpu_bound_record(j);
															
 
																-	j->terminated = 0;
															
 
																+	_starpu_increment_nsubmitted_tasks();
															
 
																+	_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
															
 
																-	if (!do_not_increment_nsubmitted){
															
 
																-		_starpu_increment_nsubmitted_tasks();
															
 
																-		_starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx);
															
 
																-	}
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-	PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																-	
															
 
																+	/* Need to atomically set submitted to 1 and check dependencies, since
															
 
																+	 * this is concucrent with _starpu_notify_cg */
															
 
																+	j->terminated = 0;
															
 
																 	j->submitted = 1;
															
 
																-       
															
 
																-	int ret = _starpu_enforce_deps_and_schedule(j, 1);
															
 
																+	int ret = _starpu_enforce_deps_and_schedule(j);
															
 
																 	PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																         _STARPU_LOG_OUT();
															
 
																         return ret;
															
 
																 }
															
 
																+void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
															
 
																+{
															
 
																+	if (!cl)
															
 
																+		return;
															
 
																+
															
 
																+	int is_where_unset = cl->where == 0;
															
 
																+
															
 
																+	/* Check deprecated and unset fields (where, <device>_func,
															
 
																+ 	 * <device>_funcs) */
															
 
																+
															
 
																+	/* CPU */
															
 
																+	if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS && cl->cpu_funcs[0])
															
 
																+	{
															
 
																+		fprintf(stderr, "[warning] [struct starpu_codelet] both cpu_func and cpu_funcs are set. Ignoring cpu_func.\n");
															
 
																+		cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
															
 
																+	{
															
 
																+		cl->cpu_funcs[0] = cl->cpu_func;
															
 
																+		cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->cpu_funcs[0] && cl->cpu_func == 0)
															
 
																+	{
															
 
																+		cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->cpu_funcs[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where |= STARPU_CPU;
															
 
																+	}
															
 
																+
															
 
																+	/* CUDA */
															
 
																+	if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0])
															
 
																+	{
															
 
																+		fprintf(stderr, "[warning] [struct starpu_codelet] both cuda_func and cuda_funcs are set. Ignoring cuda_func.\n");
															
 
																+		cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS)
															
 
																+	{
															
 
																+		cl->cuda_funcs[0] = cl->cuda_func;
															
 
																+		cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->cuda_funcs[0] && cl->cuda_func == 0)
															
 
																+	{
															
 
																+		cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->cuda_funcs[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where |= STARPU_CUDA;
															
 
																+	}
															
 
																+
															
 
																+	/* OpenCL */
															
 
																+	if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS && cl->opencl_funcs[0])
															
 
																+	{
															
 
																+		fprintf(stderr, "[warning] [struct starpu_codelet] both opencl_func and opencl_funcs are set. Ignoring opencl_func.\n");
															
 
																+		cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS)
															
 
																+	{
															
 
																+		cl->opencl_funcs[0] = cl->opencl_func;
															
 
																+		cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->opencl_funcs[0] && cl->opencl_func == 0)
															
 
																+	{
															
 
																+		cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->opencl_funcs[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where |= STARPU_OPENCL;
															
 
																+	}
															
 
																+
															
 
																+	/* Gordon */
															
 
																+	if (cl->gordon_func && cl->gordon_func != STARPU_MULTIPLE_GORDON_IMPLEMENTATIONS)
															
 
																+	{
															
 
																+		cl->gordon_funcs[0] = cl->gordon_func;
															
 
																+		cl->gordon_func = STARPU_MULTIPLE_GORDON_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->gordon_funcs[0] && cl->gordon_func == 0)
															
 
																+	{
															
 
																+		cl->gordon_func = STARPU_MULTIPLE_GORDON_IMPLEMENTATIONS;
															
 
																+	}
															
 
																+	if (cl->gordon_funcs[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where = STARPU_GORDON;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+void _starpu_task_check_deprecated_fields(struct starpu_task *task)
															
 
																+{
															
 
																+	if (task->cl)
															
 
																+	{
															
 
																+		unsigned i;
															
 
																+		for(i=0; i<task->cl->nbuffers ; i++)
															
 
																+		{
															
 
																+			if (task->buffers[i].handle && task->handles[i])
															
 
																+			{
															
 
																+				fprintf(stderr, "[warning][struct starpu_task] task->buffers[%u] and task->handles[%u] both set. Ignoring task->buffers[%u] ?\n", i, i, i);
															
 
																+				STARPU_ASSERT(task->buffers[i].mode == task->cl->modes[i]);
															
 
																+				STARPU_ABORT();
															
 
																+			}
															
 
																+			if (task->buffers[i].handle)
															
 
																+			{
															
 
																+				task->handles[i] = task->buffers[i].handle;
															
 
																+				task->cl->modes[i] = task->buffers[i].mode;
															
 
																+			}
															
 
																+			task->buffers[i].handle = NULL;
															
 
																+			task->buffers[i].mode = STARPU_NONE;
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																 /* application should submit new tasks to StarPU through this function */
															
 
																 int starpu_task_submit(struct starpu_task *task)
															
 
																 {
															
 
																+	STARPU_ASSERT(task);
															
 
																+	STARPU_ASSERT(task->magic == 42);
															
 
																 	unsigned nsched_ctxs = _starpu_get_nsched_ctxs();
															
 
																 	task->sched_ctx = (nsched_ctxs == 1 || task->control_task) ? 
															
@@ -246,7 +361,8 @@ int starpu_task_submit(struct starpu_task *task)
 
																 	{
															
 
																 		/* Perhaps it is not possible to submit a synchronous
															
 
																 		 * (blocking) task */
															
 
																-                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
															
 
																+                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
															
 
																+		{
															
 
																                         _STARPU_LOG_OUT_TAG("EDEADLK");
															
 
																 			return -EDEADLK;
															
 
																                 }
															
@@ -254,26 +370,33 @@ int starpu_task_submit(struct starpu_task *task)
 
																 		task->detach = 0;
															
 
																 	}
															
 
																-	STARPU_ASSERT(task);
															
 
																+	_starpu_task_check_deprecated_fields(task);
															
 
																+	_starpu_codelet_check_deprecated_fields(task->cl);
															
 
																 	if (task->cl)
															
 
																 	{
															
 
																-		uint32_t where = task->cl->where;
															
 
																 		unsigned i;
															
 
																-		if (!_starpu_worker_exists(where)) {
															
 
																+
															
 
																+		/* Check the type of worker(s) required by the task exist */
															
 
																+		if (!_starpu_worker_exists(task))
															
 
																+		{
															
 
																                         _STARPU_LOG_OUT_TAG("ENODEV");
															
 
																 			return -ENODEV;
															
 
																                 }
															
 
																-		assert(task->cl->nbuffers <= STARPU_NMAXBUFS);
															
 
																-		for (i = 0; i < task->cl->nbuffers; i++) {
															
 
																+
															
 
																+		/* Check buffers */
															
 
																+		STARPU_ASSERT(task->cl->nbuffers <= STARPU_NMAXBUFS);
															
 
																+		for (i = 0; i < task->cl->nbuffers; i++)
															
 
																+		{
															
 
																 			/* Make sure handles are not partitioned */
															
 
																-			assert(task->buffers[i].handle->nchildren == 0);
															
 
																+			STARPU_ASSERT(task->handles[i]->nchildren == 0);
															
 
																 		}
															
 
																 		/* In case we require that a task should be explicitely
															
 
																 		 * executed on a specific worker, we make sure that the worker
															
 
																 		 * is able to execute this task.  */
															
 
																-		if (task->execute_on_a_specific_worker && !starpu_combined_worker_may_execute_task(task->workerid, task, 0)) {
															
 
																+		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
															
 
																+		{
															
 
																                         _STARPU_LOG_OUT_TAG("ENODEV");
															
 
																 			return -ENODEV;
															
 
																                 }
															
@@ -300,17 +423,21 @@ int starpu_task_submit(struct starpu_task *task)
 
																 	if (profiling)
															
 
																-		starpu_clock_gettime(&info->submit_time);
															
 
																+		_starpu_clock_gettime(&info->submit_time);
															
 
																-	/* internally, StarPU manipulates a starpu_job_t which is a wrapper around a
															
 
																+	/* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a
															
 
																 	* task structure, it is possible that this job structure was already
															
 
																 	* allocated, for instance to enforce task depenencies. */
															
 
																-	starpu_job_t j = _starpu_get_job_associated_to_task(task);
															
 
																+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																-	ret = _starpu_submit_job(j, 0);
															
 
																+	ret = _starpu_submit_job(j);
															
 
																 	if (is_sync)
															
 
																+	{
															
 
																 		_starpu_wait_job(j);
															
 
																+		if (task->destroy)
															
 
																+		     _starpu_task_destroy(task);
															
 
																+	}
															
 
																         _STARPU_LOG_OUT();
															
 
																 	return ret;
															
@@ -322,16 +449,115 @@ int _starpu_task_submit_internal(struct starpu_task *task)
 
																 	return starpu_task_submit(task);
															
 
																 }
															
 
																-void starpu_display_codelet_stats(struct starpu_codelet_t *cl)
															
 
																+/* The StarPU core can submit tasks directly to the scheduler or a worker,
															
 
																+ * skipping dependencies completely (when it knows what it is doing).  */
															
 
																+int _starpu_task_submit_nodeps(struct starpu_task *task)
															
 
																+{
															
 
																+	_starpu_task_check_deprecated_fields(task);
															
 
																+	_starpu_codelet_check_deprecated_fields(task->cl);
															
 
																+
															
 
																+	if (task->cl)
															
 
																+	{
															
 
																+		if (task->cl->model)
															
 
																+			_starpu_load_perfmodel(task->cl->model);
															
 
																+
															
 
																+		if (task->cl->power_model)
															
 
																+			_starpu_load_perfmodel(task->cl->power_model);
															
 
																+	}
															
 
																+
															
 
																+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																+	_starpu_increment_nsubmitted_tasks();
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																+
															
 
																+	j->submitted = 1;
															
 
																+
															
 
																+	if (task->cl)
															
 
																+	{
															
 
																+		/* This would be done by data dependencies checking */
															
 
																+		unsigned i;
															
 
																+		for (i=0 ; i<task->cl->nbuffers ; i++)
															
 
																+		{
															
 
																+			j->ordered_buffers[i].handle = j->task->handles[i];
															
 
																+			j->ordered_buffers[i].mode = j->task->cl->modes[i];
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+
															
 
																+	return _starpu_push_task(j);
															
 
																+}
															
 
																+
															
 
																+/*
															
 
																+ * worker->sched_mutex must be locked when calling this function.
															
 
																+ */
															
 
																+int _starpu_task_submit_conversion_task(struct starpu_task *task,
															
 
																+					unsigned int workerid)
															
 
																+{
															
 
																+	STARPU_ASSERT(task->cl);
															
 
																+	STARPU_ASSERT(task->execute_on_a_specific_worker);
															
 
																+
															
 
																+	_starpu_task_check_deprecated_fields(task);
															
 
																+	_starpu_codelet_check_deprecated_fields(task->cl);
															
 
																+
															
 
																+	/* We should factorize that */
															
 
																+	if (task->cl->model)
															
 
																+		_starpu_load_perfmodel(task->cl->model);
															
 
																+
															
 
																+	if (task->cl->power_model)
															
 
																+		_starpu_load_perfmodel(task->cl->power_model);
															
 
																+
															
 
																+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																+	_starpu_increment_nsubmitted_tasks();
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
															
 
																+	j->submitted = 1;
															
 
																+	_starpu_increment_nready_tasks();
															
 
																+
															
 
																+	unsigned i;
															
 
																+	for (i=0 ; i<task->cl->nbuffers ; i++)
															
 
																+	{
															
 
																+		j->ordered_buffers[i].handle = j->task->handles[i];
															
 
																+		j->ordered_buffers[i].mode = j->task->cl->modes[i];
															
 
																+	}
															
 
																+
															
 
																+        _STARPU_LOG_IN();
															
 
																+
															
 
																+	task->status = STARPU_TASK_READY;
															
 
																+	_starpu_profiling_set_task_push_start_time(task);
															
 
																+
															
 
																+	unsigned node = starpu_worker_get_memory_node(workerid);
															
 
																+	if (starpu_get_prefetch_flag())
															
 
																+		starpu_prefetch_task_input_on_node(task, node);
															
 
																+
															
 
																+	struct _starpu_worker *worker;
															
 
																+	worker = _starpu_get_worker_struct(workerid);
															
 
																+	starpu_task_list_push_front(&worker->local_tasks, task);
															
 
																+
															
 
																+	_starpu_profiling_set_task_push_end_time(task);
															
 
																+
															
 
																+        _STARPU_LOG_OUT();
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+void starpu_codelet_init(struct starpu_codelet *cl)
															
 
																+{
															
 
																+	memset(cl, 0, sizeof(struct starpu_codelet));
															
 
																+}
															
 
																+
															
 
																+void starpu_display_codelet_stats(struct starpu_codelet *cl)
															
 
																 {
															
 
																 	unsigned worker;
															
 
																 	unsigned nworkers = starpu_worker_get_count();
															
 
																-	if (cl->model && cl->model->symbol)
															
 
																+	if (cl->name)
															
 
																+		fprintf(stderr, "Statistics for codelet %s\n", cl->name);
															
 
																+	else if (cl->model && cl->model->symbol)
															
 
																 		fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
															
 
																 	unsigned long total = 0;
															
 
																-	
															
 
																+
															
 
																 	for (worker = 0; worker < nworkers; worker++)
															
 
																 		total += cl->per_worker_stats[worker];
															
@@ -355,42 +581,72 @@ int starpu_task_wait_for_all(void)
 
																 	unsigned sched_ctx = nsched_ctxs == 1 ? 0 : starpu_get_sched_ctx();
															
 
																 	starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx);
															
 
																-/* 	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) */
															
 
																-/* 		return -EDEADLK; */
															
 
																+	return 0;
															
 
																+}
															
 
																-/* 	PTHREAD_MUTEX_LOCK(&submitted_mutex); */
															
 
																+/*
															
 
																+ * We wait until there is no ready task any more (i.e. StarPU will not be able
															
 
																+ * to progress any more).
															
 
																+ */
															
 
																+int starpu_task_wait_for_no_ready(void)
															
 
																+{
															
 
																+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
															
 
																+		return -EDEADLK;
															
 
																-/* 	STARPU_TRACE_TASK_WAIT_FOR_ALL; */
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																+
															
 
																+	_STARPU_TRACE_TASK_WAIT_FOR_ALL;
															
 
																+
															
 
																+	while (nready > 0)
															
 
																+		_STARPU_PTHREAD_COND_WAIT(&submitted_cond, &submitted_mutex);
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																-/* 	while (nsubmitted > 0) */
															
 
																-/* 		PTHREAD_COND_WAIT(&submitted_cond, &submitted_mutex); */
															
 
																-	
															
 
																-/* 	PTHREAD_MUTEX_UNLOCK(&submitted_mutex); */
															
 
																 	return 0;
															
 
																 }
															
 
																 void _starpu_decrement_nsubmitted_tasks(void)
															
 
																 {
															
 
																-	PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																 	if (--nsubmitted == 0)
															
 
																-		PTHREAD_COND_BROADCAST(&submitted_cond);
															
 
																+		_STARPU_PTHREAD_COND_BROADCAST(&submitted_cond);
															
 
																-	STARPU_TRACE_UPDATE_TASK_CNT(nsubmitted);
															
 
																+	_STARPU_TRACE_UPDATE_TASK_CNT(nsubmitted);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																 }
															
 
																 static void _starpu_increment_nsubmitted_tasks(void)
															
 
																 {
															
 
																-	PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																 	nsubmitted++;
															
 
																-	STARPU_TRACE_UPDATE_TASK_CNT(nsubmitted);
															
 
																+	_STARPU_TRACE_UPDATE_TASK_CNT(nsubmitted);
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																+}
															
 
																+
															
 
																+void _starpu_increment_nready_tasks(void)
															
 
																+{
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																+
															
 
																+	nready++;
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																+}
															
 
																+
															
 
																+void _starpu_decrement_nready_tasks(void)
															
 
																+{
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&submitted_mutex);
															
 
																+
															
 
																+	if (--nready == 0)
															
 
																+		_STARPU_PTHREAD_COND_BROADCAST(&submitted_cond);
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
															
 
																 }
															
 
																 void _starpu_initialize_current_task_key(void)
															
@@ -401,7 +657,7 @@ void _starpu_initialize_current_task_key(void)
 
																 /* Return the task currently executed by the worker, or NULL if this is called
															
 
																  * either from a thread that is not a task or simply because there is no task
															
 
																  * being executed at the moment. */
															
 
																-struct starpu_task *starpu_get_current_task(void)
															
 
																+struct starpu_task *starpu_task_get_current(void)
															
 
																 {
															
 
																 	return (struct starpu_task *) pthread_getspecific(current_task_key);
															
 
																 }
															
@@ -410,3 +666,95 @@ void _starpu_set_current_task(struct starpu_task *task)
 
																 {
															
 
																 	pthread_setspecific(current_task_key, task);
															
 
																 }
															
 
																+
															
 
																+/*
															
 
																+ * Returns 0 if tasks does not use any multiformat handle, 1 otherwise.
															
 
																+ */
															
 
																+int
															
 
																+_starpu_task_uses_multiformat_handles(struct starpu_task *task)
															
 
																+{
															
 
																+	unsigned i;
															
 
																+	for (i = 0; i < task->cl->nbuffers; i++)
															
 
																+	{
															
 
																+		if (_starpu_data_is_multiformat_handle(task->handles[i]))
															
 
																+			return 1;
															
 
																+	}
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+/*
															
 
																+ * Checks whether the given handle needs to be converted in order to be used on
															
 
																+ * the node given as the second argument.
															
 
																+ */
															
 
																+int
															
 
																+_starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
															
 
																+				     unsigned int node)
															
 
																+{
															
 
																+	enum starpu_node_kind node_kind;
															
 
																+
															
 
																+	node_kind = starpu_node_get_kind(node);
															
 
																+
															
 
																+	/*
															
 
																+	 * Here, we assume that CUDA devices and OpenCL devices use the 
															
 
																+	 * same data structure. A conversion is only needed when moving 
															
 
																+	 * data from a CPU to a GPU, or the other way around.
															
 
																+	 */
															
 
																+	switch (node_kind)
															
 
																+	{
															
 
																+		case STARPU_CPU_RAM:
															
 
																+			switch(starpu_node_get_kind(handle->mf_node))
															
 
																+			{
															
 
																+				case STARPU_CPU_RAM:
															
 
																+					return 0;
															
 
																+				case STARPU_CUDA_RAM:      /* Fall through */
															
 
																+				case STARPU_OPENCL_RAM:
															
 
																+					return 1;
															
 
																+				case STARPU_SPU_LS: /* Not supported */
															
 
																+				default:
															
 
																+					STARPU_ASSERT(0);
															
 
																+			}
															
 
																+			break;
															
 
																+		case STARPU_CUDA_RAM:    /* Fall through */
															
 
																+		case STARPU_OPENCL_RAM:
															
 
																+			switch(starpu_node_get_kind(handle->mf_node))
															
 
																+			{
															
 
																+				case STARPU_CPU_RAM:
															
 
																+					return 1;
															
 
																+				case STARPU_CUDA_RAM:
															
 
																+				case STARPU_OPENCL_RAM:
															
 
																+					return 0;
															
 
																+				case STARPU_SPU_LS: /* Not supported */
															
 
																+				default:
															
 
																+					STARPU_ASSERT(0);
															
 
																+			}
															
 
																+			break;
															
 
																+		case STARPU_SPU_LS:            /* Not supported */
															
 
																+		default:
															
 
																+			STARPU_ASSERT(0);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	STARPU_ASSERT(cl->cpu_func == STARPU_MULTIPLE_CPU_IMPLEMENTATIONS);
															
 
																+	return cl->cpu_funcs[nimpl];
															
 
																+}
															
 
																+
															
 
																+starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	STARPU_ASSERT(cl->cuda_func == STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS);
															
 
																+	return cl->cuda_funcs[nimpl];
															
 
																+}
															
 
																+
															
 
																+starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	STARPU_ASSERT(cl->opencl_func == STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS);
															
 
																+	return cl->opencl_funcs[nimpl];
															
 
																+}
															
 
																+
															
 
																+starpu_gordon_func_t _starpu_task_get_gordon_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	STARPU_ASSERT(cl->gordon_func == STARPU_MULTIPLE_GORDON_IMPLEMENTATIONS);
															
 
																+	return cl->gordon_funcs[nimpl];
															
 
																+}
															
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -1,7 +1,8 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011 INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -22,9 +23,16 @@
 
																 #include <common/config.h>
															
 
																 #include <core/jobs.h>
															
 
																+/* Internal version of starpu_task_destroy: don't check task->destroy flag */
															
 
																+void _starpu_task_destroy(struct starpu_task *task);
															
 
																+
															
 
																 /* In order to implement starpu_task_wait_for_all, we keep track of the number of
															
 
																  * task currently submitted */
															
 
																 void _starpu_decrement_nsubmitted_tasks(void);
															
 
																+/* In order to implement starpu_task_wait_for_no_ready, we keep track of the number of
															
 
																+ * task currently ready */
															
 
																+void _starpu_increment_nready_tasks(void);
															
 
																+void _starpu_decrement_nready_tasks(void);
															
 
																 /* A pthread key is used to store the task currently executed on the thread.
															
 
																  * _starpu_initialize_current_task_key initializes this pthread key and
															
@@ -34,15 +42,35 @@ void _starpu_set_current_task(struct starpu_task *task);
 
																 /* NB the second argument makes it possible to count regenerable tasks only
															
 
																  * once. */
															
 
																-int _starpu_submit_job(starpu_job_t j, unsigned do_not_increment_nsubmitted);
															
 
																+int _starpu_submit_job(struct _starpu_job *j);
															
 
																+
															
 
																+int _starpu_task_submit_nodeps(struct starpu_task *task);
															
 
																+
															
 
																+void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check);
															
 
																 /* Returns the job structure (which is the internal data structure associated
															
 
																  * to a task). */
															
 
																-starpu_job_t _starpu_get_job_associated_to_task(struct starpu_task *task);
															
 
																+struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task);
															
 
																 struct starpu_task *_starpu_create_task_alias(struct starpu_task *task);
															
 
																 /* Submits starpu internal tasks to the initial context */
															
 
																 int _starpu_task_submit_internal(struct starpu_task *task);
															
 
																+int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
															
 
																+					 unsigned int node);
															
 
																+
															
 
																+int _starpu_task_uses_multiformat_handles(struct starpu_task *task);
															
 
																+
															
 
																+int _starpu_task_submit_conversion_task(struct starpu_task *task,
															
 
																+					unsigned int workerid);
															
 
																+
															
 
																+void _starpu_task_check_deprecated_fields(struct starpu_task *task);
															
 
																+void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl);
															
 
																+
															
 
																+starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+starpu_gordon_func_t _starpu_task_get_gordon_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+
															
 
																 #endif // __CORE_TASK_H__
															
--- a/src/core/task_bundle.c
+++ b/src/core/task_bundle.c
@@ -2,6 +2,7 @@
 
																  *
															
 
																  * Copyright (C) 2011  Université de Bordeaux 1
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																+ * Copyright (C) 2012  Inria
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -17,69 +18,50 @@
 
																 #include <starpu.h>
															
 
																 #include <starpu_task_bundle.h>
															
 
																+#include <core/task_bundle.h>
															
 
																 #include <starpu_scheduler.h>
															
 
																 #include <common/config.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <common/list.h>
															
 
																 /* Initialize a task bundle */
															
 
																-void starpu_task_bundle_init(struct starpu_task_bundle *bundle)
															
 
																+void starpu_task_bundle_create(starpu_task_bundle_t *bundle)
															
 
																 {
															
 
																-	STARPU_ASSERT(bundle);
															
 
																+	*bundle = (starpu_task_bundle_t) malloc(sizeof(struct _starpu_task_bundle));
															
 
																+	STARPU_ASSERT(*bundle);
															
 
																-	PTHREAD_MUTEX_INIT(&bundle->mutex, NULL);
															
 
																-	bundle->closed = 0;
															
 
																+	_STARPU_PTHREAD_MUTEX_INIT(&(*bundle)->mutex, NULL);
															
 
																+	/* Of course at the beginning a bundle is open,
															
 
																+	 * user can insert and remove tasks from it */
															
 
																+	(*bundle)->closed = 0;
															
 
																 	/* Start with an empty list */
															
 
																-	bundle->previous_workerid = -1;
															
 
																-	bundle->list = NULL;
															
 
																+	(*bundle)->list = NULL;
															
 
																-	/* By default, bundle are destroyed */
															
 
																-	bundle->destroy = 1;
															
 
																-
															
 
																-}
															
 
																-
															
 
																-/* Deinitialize a bundle. In case the destroy flag is set, the bundle structure
															
 
																- * is freed too. */
															
 
																-void starpu_task_bundle_deinit(struct starpu_task_bundle *bundle)
															
 
																-{
															
 
																-	/* Remove all entries from the bundle (which is likely to be empty) */
															
 
																-	while (bundle->list)
															
 
																-	{
															
 
																-		struct starpu_task_bundle_entry *entry = bundle->list;
															
 
																-		bundle->list = bundle->list->next;
															
 
																-		free(entry);
															
 
																-	}
															
 
																-
															
 
																-	PTHREAD_MUTEX_DESTROY(&bundle->mutex);
															
 
																-
															
 
																-	if (bundle->destroy)
															
 
																-		free(bundle);
															
 
																 }
															
 
																-/* Insert a task into a bundle. */
															
 
																-int starpu_task_bundle_insert(struct starpu_task_bundle *bundle, struct starpu_task *task)
															
 
																+int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *task)
															
 
																 {
															
 
																-	PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																 	if (bundle->closed)
															
 
																 	{
															
 
																-		/* The bundle is closed, we cannot add tasks anymore */
															
 
																-		PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+		/* The bundle is closed, we cannot add task anymore */
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 		return -EPERM;
															
 
																 	}
															
 
																 	if (task->status != STARPU_TASK_INVALID)
															
 
																 	{
															
 
																-		/* the task has already been submitted, it's too late to put it
															
 
																+		/* The task has already been submitted, it's too late to put it
															
 
																 		 * into a bundle now. */
															
 
																-		PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 		return -EINVAL;
															
 
																 	}
															
 
																 	/* Insert a task at the end of the bundle */
															
 
																-	struct starpu_task_bundle_entry *entry;
															
 
																-	entry = (struct starpu_task_bundle_entry *) malloc(sizeof(struct starpu_task_bundle_entry));
															
 
																+	struct _starpu_task_bundle_entry *entry;
															
 
																+	entry = (struct _starpu_task_bundle_entry *) malloc(sizeof(struct _starpu_task_bundle_entry));
															
 
																 	STARPU_ASSERT(entry);
															
 
																 	entry->task = task;
															
 
																 	entry->next = NULL;
															
@@ -88,8 +70,9 @@ int starpu_task_bundle_insert(struct starpu_task_bundle *bundle, struct starpu_t
 
																 	{
															
 
																 		bundle->list = entry;
															
 
																 	}
															
 
																-	else {
															
 
																-		struct starpu_task_bundle_entry *item;
															
 
																+	else
															
 
																+	{
															
 
																+		struct _starpu_task_bundle_entry *item;
															
 
																 		item = bundle->list;
															
 
																 		while (item->next)
															
 
																 			item = item->next;
															
@@ -97,24 +80,28 @@ int starpu_task_bundle_insert(struct starpu_task_bundle *bundle, struct starpu_t
 
																 		item->next = entry;
															
 
																 	}
															
 
																+	/* Mark the task as belonging the bundle */
															
 
																 	task->bundle = bundle;
															
 
																-	PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 	return 0;
															
 
																 }
															
 
																-/* Remove a task from a bundle. This method must be called with bundle->mutex
															
 
																- * hold. This function returns 0 if the task was found, -ENOENT if the element
															
 
																- * was not found, 1 if the element is found and if the list was deinitialized
															
 
																- * because it became empty. */
															
 
																-int starpu_task_bundle_remove(struct starpu_task_bundle *bundle, struct starpu_task *task)
															
 
																+int starpu_task_bundle_remove(starpu_task_bundle_t bundle, struct starpu_task *task)
															
 
																 {
															
 
																-	struct starpu_task_bundle_entry *item;
															
 
																+	struct _starpu_task_bundle_entry *item;
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																 	item = bundle->list;
															
 
																+	/* List is empty, there is no way the task
															
 
																+	 * belong to it */
															
 
																 	if (!item)
															
 
																+	{
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 		return -ENOENT;
															
 
																+	}
															
 
																 	STARPU_ASSERT(task->bundle == bundle);
															
 
																 	task->bundle = NULL;
															
@@ -128,23 +115,27 @@ int starpu_task_bundle_remove(struct starpu_task_bundle *bundle, struct starpu_t
 
																 		/* If the list is now empty, deinitialize the bundle */
															
 
																 		if (bundle->closed && bundle->list == NULL)
															
 
																 		{
															
 
																-			PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																-			starpu_task_bundle_deinit(bundle);
															
 
																-			return 1;
															
 
																+			_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+			_starpu_task_bundle_destroy(bundle);
															
 
																+			return 0;
															
 
																 		}
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 		return 0;
															
 
																 	}
															
 
																+	/* Go through the list until we find the right task,
															
 
																+	 * then we delete it */
															
 
																 	while (item->next)
															
 
																 	{
															
 
																-		struct starpu_task_bundle_entry *next;
															
 
																+		struct _starpu_task_bundle_entry *next;
															
 
																 		next = item->next;
															
 
																 		if (next->task == task)
															
 
																 		{
															
 
																 			/* Remove the next element */
															
 
																 			item->next = next->next;
															
 
																+			_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 			free(next);
															
 
																 			return 0;
															
 
																 		}
															
@@ -152,101 +143,61 @@ int starpu_task_bundle_remove(struct starpu_task_bundle *bundle, struct starpu_t
 
																 		item = next;
															
 
																 	}
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+
															
 
																 	/* We could not find the task in the bundle */
															
 
																 	return -ENOENT;
															
 
																 }
															
 
																-/* Close a bundle. No task can be added to a closed bundle. A closed bundle
															
 
																- * automatically gets deinitialized when it becomes empty. */
															
 
																-void starpu_task_bundle_close(struct starpu_task_bundle *bundle)
															
 
																+/* Close a bundle. No task can be added to a closed bundle. Tasks can still be
															
 
																+ * removed from a closed bundle. A closed bundle automatically gets
															
 
																+ * deinitialized when it becomes empty. A closed bundle cannot be reopened. */
															
 
																+void starpu_task_bundle_close(starpu_task_bundle_t bundle)
															
 
																 {
															
 
																-	PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																-	/* If the bundle is already empty, we deinitialize it now. */
															
 
																+	/* If the bundle is already empty, we deinitialize it now as the
															
 
																+	 * user closed it and thus don't intend to insert new tasks in it. */
															
 
																 	if (bundle->list == NULL)
															
 
																 	{
															
 
																-		PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																-		starpu_task_bundle_deinit(bundle);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+		_starpu_task_bundle_destroy(bundle);
															
 
																 		return;
															
 
																 	}
															
 
																 	/* Mark the bundle as closed */
															
 
																 	bundle->closed = 1;
															
 
																-	PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																 }
															
 
																-/* Return the expected duration of the entire task bundle in µs */
															
 
																-double starpu_task_bundle_expected_length(struct starpu_task_bundle *bundle,  enum starpu_perf_archtype arch, unsigned nimpl)
															
 
																+void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle)
															
 
																 {
															
 
																-	double expected_length = 0.0;
															
 
																-
															
 
																-	/* We expect the length of the bundle the be the sum of the different tasks length. */
															
 
																-	PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																-
															
 
																-	struct starpu_task_bundle_entry *entry;
															
 
																-	entry = bundle->list;
															
 
																-
															
 
																-	while (entry) {
															
 
																-		double task_length = starpu_task_expected_length(entry->task, arch, nimpl);
															
 
																-
															
 
																-		/* In case the task is not calibrated, we consider the task
															
 
																-		 * ends immediately. */
															
 
																-		if (task_length > 0.0)
															
 
																-			expected_length += task_length;
															
 
																-
															
 
																-		entry = entry->next;
															
 
																+	/* Remove all entries from the bundle (which is likely to be empty) */
															
 
																+	while (bundle->list)
															
 
																+	{
															
 
																+		struct _starpu_task_bundle_entry *entry = bundle->list;
															
 
																+		bundle->list = bundle->list->next;
															
 
																+		free(entry);
															
 
																 	}
															
 
																-	
															
 
																-	PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																-	return expected_length;
															
 
																-}
															
 
																-
															
 
																-/* Return the expected power consumption of the entire task bundle in J */
															
 
																-double starpu_task_bundle_expected_power(struct starpu_task_bundle *bundle,  enum starpu_perf_archtype arch, unsigned nimpl)
															
 
																-{
															
 
																-	double expected_power = 0.0;
															
 
																+	_STARPU_PTHREAD_MUTEX_DESTROY(&bundle->mutex);
															
 
																-	/* We expect total consumption of the bundle the be the sum of the different tasks consumption. */
															
 
																-	PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																-
															
 
																-	struct starpu_task_bundle_entry *entry;
															
 
																-	entry = bundle->list;
															
 
																-
															
 
																-	while (entry) {
															
 
																-		double task_power = starpu_task_expected_power(entry->task, arch, nimpl);
															
 
																-
															
 
																-		/* In case the task is not calibrated, we consider the task
															
 
																-		 * ends immediately. */
															
 
																-		if (task_power > 0.0)
															
 
																-			expected_power += task_power;
															
 
																-
															
 
																-		entry = entry->next;
															
 
																-	}
															
 
																-	
															
 
																-	PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																-
															
 
																-	return expected_power;
															
 
																+	free(bundle);
															
 
																 }
															
 
																-struct handle_list {
															
 
																-	starpu_data_handle handle;
															
 
																-	starpu_access_mode mode;
															
 
																-	struct handle_list *next;
															
 
																-};
															
 
																-
															
 
																-static void insertion_handle_sorted(struct handle_list **listp, starpu_data_handle handle, starpu_access_mode mode)
															
 
																+void _insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_access_mode mode)
															
 
																 {
															
 
																 	STARPU_ASSERT(listp);
															
 
																-	struct handle_list *list = *listp;
															
 
																+	struct _starpu_handle_list *list = *listp;
															
 
																+	/* If the list is empty or the handle's address the smallest among the
															
 
																+	 * list, we insert it as first element */
															
 
																 	if (!list || list->handle > handle)
															
 
																 	{
															
 
																-		/* We insert the first element of the list */
															
 
																-		struct handle_list *link = (struct handle_list *) malloc(sizeof(struct handle_list));
															
 
																+		struct _starpu_handle_list *link = (struct _starpu_handle_list *) malloc(sizeof(struct _starpu_handle_list));
															
 
																 		STARPU_ASSERT(link);
															
 
																 		link->handle = handle;
															
 
																 		link->mode = mode;
															
@@ -255,25 +206,26 @@ static void insertion_handle_sorted(struct handle_list **listp, starpu_data_hand
 
																 		return;
															
 
																 	}
															
 
																-	/* Look for the element or a place to insert it. */
															
 
																-	struct handle_list *prev = list;
															
 
																+	struct _starpu_handle_list *prev = list;
															
 
																-	while (list && (handle > list->handle))
															
 
																+	/* Look for the same handle if already present in the list.
															
 
																+	 * Else place it right before the smallest following handle */
															
 
																+	while (list && (handle >= list->handle))
															
 
																 	{
															
 
																 		prev = list;
															
 
																 		list = list->next;
															
 
																 	}
															
 
																-	/* The element should be in prev or not in the list */
															
 
																-
															
 
																 	if (prev->handle == handle)
															
 
																 	{
															
 
																-		/* The handle is already in the list */
															
 
																-		prev->mode |= mode;
															
 
																+		/* The handle is already in the list, the merge both the access modes */
															
 
																+		prev->mode = (enum starpu_access_mode) ((int) prev->mode | (int) mode);
															
 
																 	}
															
 
																-	else {
															
 
																-		/* The handle was not in the list, we insert it after prev */
															
 
																-		struct handle_list *link = (struct handle_list *) malloc(sizeof(struct handle_list));
															
 
																+	else
															
 
																+	{
															
 
																+		/* The handle was not in the list, we insert it after 'prev', thus right before
															
 
																+		 * 'list' which is the smallest following handle */
															
 
																+		struct _starpu_handle_list *link = (struct _starpu_handle_list *) malloc(sizeof(struct _starpu_handle_list));
															
 
																 		STARPU_ASSERT(link);
															
 
																 		link->handle = handle;
															
 
																 		link->mode = mode;
															
@@ -281,59 +233,3 @@ static void insertion_handle_sorted(struct handle_list **listp, starpu_data_hand
 
																 		prev->next = link;
															
 
																 	}
															
 
																 }
															
 
																-
															
 
																-/* Return the time (in µs) expected to transfer all data used within the bundle */
															
 
																-double starpu_task_bundle_expected_data_transfer_time(struct starpu_task_bundle *bundle, unsigned memory_node)
															
 
																-{
															
 
																-	PTHREAD_MUTEX_LOCK(&bundle->mutex);
															
 
																-
															
 
																-	struct handle_list *handles = NULL;
															
 
																-
															
 
																-	/* We list all the handle that are accessed within the bundle. */
															
 
																-
															
 
																-	/* For each task in the bundle */
															
 
																-	struct starpu_task_bundle_entry *entry = bundle->list;
															
 
																-	while (entry) {
															
 
																-		struct starpu_task *task = entry->task;
															
 
																-
															
 
																-		if (task->cl)
															
 
																-		{
															
 
																-			unsigned b;
															
 
																-			for (b = 0; b < task->cl->nbuffers; b++)
															
 
																-			{
															
 
																-				starpu_data_handle handle = task->buffers[b].handle;
															
 
																-				starpu_access_mode mode = task->buffers[b].mode;
															
 
																-
															
 
																-				if (!(mode & STARPU_R))
															
 
																-					continue;
															
 
																-
															
 
																-				/* Insert the handle in the sorted list in case
															
 
																-				 * it's not already in that list. */
															
 
																-				insertion_handle_sorted(&handles, handle, mode);
															
 
																-			}
															
 
																-		}
															
 
																-
															
 
																-		entry = entry->next;
															
 
																-	}
															
 
																-
															
 
																-	/* Compute the sum of data transfer time, and destroy the list */
															
 
																-
															
 
																-	double total_exp = 0.0;
															
 
																-
															
 
																-	while (handles)
															
 
																-	{
															
 
																-		struct handle_list *current = handles;
															
 
																-		handles = handles->next;
															
 
																-
															
 
																-		double exp;
															
 
																-		exp = starpu_data_expected_transfer_time(current->handle, memory_node, current->mode);
															
 
																-
															
 
																-		total_exp += exp;
															
 
																-
															
 
																-		free(current);
															
 
																-	}
															
 
																-
															
 
																-	PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
															
 
																-
															
 
																-	return total_exp;
															
 
																-}
															
--- a/src/core/task_bundle.h
+++ b/src/core/task_bundle.h
@@ -0,0 +1,142 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2012 Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#ifndef __CORE_TASK_BUNDLE_H__
															
 
																+#define __CORE_TASK_BUNDLE_H__
															
 
																+
															
 
																+#if ! defined(_MSC_VER)
															
 
																+#  include <pthread.h>
															
 
																+#endif
															
 
																+
															
 
																+/* struct _starpu_task_bundle_entry
															
 
																+ * ================================
															
 
																+ * Purpose
															
 
																+ * =======
															
 
																+ * Structure used to describe a linked list containing tasks in _starpu_task_bundle.
															
 
																+ *
															
 
																+ * Fields
															
 
																+ * ======
															
 
																+ * task			Pointer to the task structure.
															
 
																+ *
															
 
																+ * next			Pointer to the next element in the linked list.
															
 
																+ */
															
 
																+
															
 
																+struct _starpu_task_bundle_entry
															
 
																+{
															
 
																+	struct starpu_task *task;
															
 
																+	struct _starpu_task_bundle_entry *next;
															
 
																+};
															
 
																+
															
 
																+/* struct _starpu_task_bundle
															
 
																+ * ==========================
															
 
																+ * Purpose
															
 
																+ * =======
															
 
																+ * Structure describing a list of tasks that should be scheduled on the same
															
 
																+ * worker whenever it's possible.
															
 
																+ * It must be considered as a hint given to the scheduler as there is no guarantee that
															
 
																+ * they will be executed on the same worker.
															
 
																+ *
															
 
																+ * Fields
															
 
																+ * ======
															
 
																+ * mutex		Mutex protecting the structure.
															
 
																+ *
															
 
																+ * list			Array of tasks included in the bundle.
															
 
																+ *
															
 
																+ * closed		Used to know if the user is still willing to
															
 
																+ * 			add/remove some tasks in the bundle. Especially useful for
															
 
																+ * 			the runtime to know whether it is safe to destroy a bundle.
															
 
																+ */
															
 
																+
															
 
																+struct _starpu_task_bundle
															
 
																+{
															
 
																+	/* Mutex protecting the bundle */
															
 
																+#if defined(_MSC_VER)
															
 
																+	void *mutex;
															
 
																+#else
															
 
																+	pthread_mutex_t mutex;
															
 
																+#endif
															
 
																+
															
 
																+	struct _starpu_task_bundle_entry *list;
															
 
																+
															
 
																+	int closed;
															
 
																+};
															
 
																+
															
 
																+/* struct _starpu_handle_list
															
 
																+ * ==========================
															
 
																+ * Purpose
															
 
																+ * =======
															
 
																+ * Structure describing a list of handles sorted by address to speed-up
															
 
																+ * when looking for an element.
															
 
																+ * The list cannot containes duplicate handles.
															
 
																+ *
															
 
																+ * Fields
															
 
																+ * ======
															
 
																+ * handle		Pointer to the handle structure.
															
 
																+ *
															
 
																+ * access_mode		Total access mode over the whole bundle.
															
 
																+ *
															
 
																+ * next			Pointer to the next element in the linked list.
															
 
																+ */
															
 
																+
															
 
																+struct _starpu_handle_list
															
 
																+{
															
 
																+	starpu_data_handle_t handle;
															
 
																+	enum starpu_access_mode mode;
															
 
																+	struct _starpu_handle_list *next;
															
 
																+};
															
 
																+
															
 
																+/* _starpu_task_bundle_destroy
															
 
																+ * ==========================
															
 
																+ * Purpose
															
 
																+ * =======
															
 
																+ * Destroy and deinitialize a bundle,
															
 
																+ * memory previoulsy allocated is freed.
															
 
																+ *
															
 
																+ * Arguments
															
 
																+ * =========
															
 
																+ * bundle		(input)
															
 
																+ * 			Bundle to destroy.
															
 
																+ */
															
 
																+void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle);
															
 
																+
															
 
																+/* _insertion_handle_sorted
															
 
																+ * ========================
															
 
																+ * Purpose
															
 
																+ * =======
															
 
																+ * Insert an handle in a _starpu_handle_list, elements are sorted
															
 
																+ * in increasing order, considering their physical address.
															
 
																+ * As the list doesn't accept duplicate elements, a handle with the
															
 
																+ * same address as an handle contained in the list is not inserted, but
															
 
																+ * its mode access is merged with the one of the latter.
															
 
																+ *
															
 
																+ * Arguments
															
 
																+ * =========
															
 
																+ * listp		(input, output)
															
 
																+ * 			Pointer to the first element of the list.
															
 
																+ * 			In the case of an empty list or an inserted handle with small address,
															
 
																+ * 			it should have changed when the call returns.
															
 
																+ *
															
 
																+ * handle		(input)
															
 
																+ * 			Handle to insert in the list.
															
 
																+ *
															
 
																+ * mode			(input)
															
 
																+ * 			Access mode of the handle.
															
 
																+ */
															
 
																+void _insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_access_mode mode);
															
 
																+
															
 
																+#endif // __CORE_TASK_BUNDLE_H__
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  INRIA
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -23,7 +23,7 @@
 
																 #include <core/debug.h>
															
 
																 #include <core/topology.h>
															
 
																 #include <drivers/cuda/driver_cuda.h>
															
 
																-#include <common/hash.h>
															
 
																+#include <starpu_hash.h>
															
 
																 #include <profiling/profiling.h>
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
@@ -43,18 +43,17 @@
 
																 #define hwloc_bitmap_singlify hwloc_cpuset_singlify
															
 
																 #endif
															
 
																-		
															
 
																 static unsigned topology_is_initialized = 0;
															
 
																-static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *config);
															
 
																+static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config);
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																 #  ifdef STARPU_USE_CUDA
															
 
																-static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s *config);
															
 
																-static struct starpu_htbl32_node_s *devices_using_cuda = NULL;
															
 
																+static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config);
															
 
																+static struct starpu_htbl32_node *devices_using_cuda = NULL;
															
 
																 #  endif
															
 
																 #  ifdef STARPU_USE_OPENCL
															
 
																-static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config_s *config);
															
 
																+static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config *config);
															
 
																 #  endif
															
 
																 static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int *explicit_workers_gpuid,
															
 
																                                              int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus);
															
@@ -66,9 +65,9 @@ static unsigned may_bind_automatically = 0;
 
																  */
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config)
															
 
																 {
															
 
																-	struct starpu_machine_topology_s *topology = &config->topology;
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																         _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_cuda_gpuid,
															
 
																                                          config->user_conf==NULL?NULL:(int *)config->user_conf->workers_cuda_gpuid,
															
@@ -78,9 +77,9 @@ static void _starpu_initialize_workers_cuda_gpuid(struct starpu_machine_config_s
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config *config)
															
 
																 {
															
 
																-	struct starpu_machine_topology_s *topology = &config->topology;
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																         _starpu_initialize_workers_gpuid(config->user_conf==NULL?0:config->user_conf->use_explicit_workers_opencl_gpuid,
															
 
																                                          config->user_conf==NULL?NULL:(int *)config->user_conf->workers_opencl_gpuid,
															
@@ -93,9 +92,11 @@ static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config
 
																                 unsigned tmp[STARPU_NMAXWORKERS];
															
 
																                 unsigned nb=0;
															
 
																                 int i;
															
 
																-                for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
															
 
																-                        uint32_t key = _starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
															
 
																-                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL) {
															
 
																+                for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
															
 
																+		{
															
 
																+                        uint32_t key = starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
															
 
																+                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL)
															
 
																+			{
															
 
																                                 tmp[nb] = topology->workers_opencl_gpuid[i];
															
 
																                                 nb++;
															
 
																                         }
															
@@ -106,14 +107,16 @@ static void _starpu_initialize_workers_opencl_gpuid(struct starpu_machine_config
 
																 #endif /* STARPU_USE_CUDA */
															
 
																         {
															
 
																                 // Detect identical devices
															
 
																-                struct starpu_htbl32_node_s *devices_already_used = NULL;
															
 
																+                struct starpu_htbl32_node *devices_already_used = NULL;
															
 
																                 unsigned tmp[STARPU_NMAXWORKERS];
															
 
																                 unsigned nb=0;
															
 
																                 int i;
															
 
																-                for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
															
 
																-                        uint32_t key = _starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
															
 
																-                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL) {
															
 
																+                for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
															
 
																+		{
															
 
																+                        uint32_t key = starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
															
 
																+                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL)
															
 
																+			{
															
 
																                                 _starpu_htbl_insert_32(&devices_already_used, key, config);
															
 
																                                 tmp[nb] = topology->workers_opencl_gpuid[i];
															
 
																                                 nb ++;
															
@@ -143,14 +146,7 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
																 	 * cpus. */
															
 
																 	/* what do we use, explicit value, env. variable, or round-robin ? */
															
 
																-	if (use_explicit_workers_gpuid)
															
 
																-	{
															
 
																-		/* we use the explicit value from the user */
															
 
																-		memcpy(workers_gpuid,
															
 
																-                       explicit_workers_gpuid,
															
 
																-                       STARPU_NMAXWORKERS*sizeof(unsigned));
															
 
																-	}
															
 
																-	else if ((strval = getenv(varname)))
															
 
																+	if ((strval = getenv(varname)))
															
 
																 	{
															
 
																 		/* STARPU_WORKERS_CUDAID certainly contains less entries than
															
 
																 		 * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
															
@@ -162,7 +158,8 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
																 		/* we use the content of the STARPU_WORKERS_CUDAID env. variable */
															
 
																 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
															
 
																 		{
															
 
																-			if (!wrap) {
															
 
																+			if (!wrap)
															
 
																+			{
															
 
																 				long int val;
															
 
																 				val = strtol(strval, &endptr, 10);
															
 
																 				if (endptr != strval)
															
@@ -170,22 +167,31 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
																 					workers_gpuid[i] = (unsigned)val;
															
 
																 					strval = endptr;
															
 
																 				}
															
 
																-				else {
															
 
																+				else
															
 
																+				{
															
 
																 					/* there must be at least one entry */
															
 
																 					STARPU_ASSERT(i != 0);
															
 
																 					number_of_entries = i;
															
 
																-	
															
 
																+
															
 
																 					/* there is no more values in the string */
															
 
																 					wrap = 1;
															
 
																 					workers_gpuid[i] = workers_gpuid[0];
															
 
																 				}
															
 
																 			}
															
 
																-			else {
															
 
																+			else
															
 
																+			{
															
 
																 				workers_gpuid[i] = workers_gpuid[i % number_of_entries];
															
 
																 			}
															
 
																 		}
															
 
																 	}
															
 
																+	else if (use_explicit_workers_gpuid)
															
 
																+	{
															
 
																+		/* we use the explicit value from the user */
															
 
																+		memcpy(workers_gpuid,
															
 
																+                       explicit_workers_gpuid,
															
 
																+                       STARPU_NMAXWORKERS*sizeof(unsigned));
															
 
																+	}
															
 
																 	else
															
 
																 	{
															
 
																 		/* by default, we take a round robin policy */
															
@@ -200,7 +206,7 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
																 #endif
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-static inline int _starpu_get_next_cuda_gpuid(struct starpu_machine_config_s *config)
															
 
																+static inline int _starpu_get_next_cuda_gpuid(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	unsigned i = ((config->current_cuda_gpuid++) % config->topology.ncudagpus);
															
@@ -209,7 +215,7 @@ static inline int _starpu_get_next_cuda_gpuid(struct starpu_machine_config_s *co
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-static inline int _starpu_get_next_opencl_gpuid(struct starpu_machine_config_s *config)
															
 
																+static inline int _starpu_get_next_opencl_gpuid(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	unsigned i = ((config->current_opencl_gpuid++) % config->topology.nopenclgpus);
															
@@ -217,9 +223,9 @@ static inline int _starpu_get_next_opencl_gpuid(struct starpu_machine_config_s *
 
																 }
															
 
																 #endif
															
 
																-static void _starpu_init_topology(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_init_topology(struct _starpu_machine_config *config)
															
 
																 {
															
 
																-	struct starpu_machine_topology_s *topology = &config->topology;
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																 	if (!topology_is_initialized)
															
 
																 	{
															
@@ -260,24 +266,21 @@ static void _starpu_init_topology(struct starpu_machine_config_s *config)
 
																 	}
															
 
																 }
															
 
																-unsigned _starpu_topology_get_nhwcpu(struct starpu_machine_config_s *config)
															
 
																+unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	_starpu_init_topology(config);
															
 
																-	
															
 
																+
															
 
																 	return config->topology.nhwcpus;
															
 
																 }
															
 
																-static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
															
 
																+static int _starpu_init_machine_config(struct _starpu_machine_config *config,
															
 
																 				struct starpu_conf *user_conf)
															
 
																 {
															
 
																-	int explicitval STARPU_ATTRIBUTE_UNUSED;
															
 
																-	unsigned use_accelerator = 0;
															
 
																-
															
 
																 	int i;
															
 
																 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
															
 
																 		config->workers[i].workerid = i;
															
 
																-	struct starpu_machine_topology_s *topology = &config->topology;
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																 	topology->nworkers = 0;
															
 
																 	topology->ncombinedworkers = 0;
															
@@ -287,36 +290,49 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 	_starpu_initialize_workers_bindid(config);
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	if (user_conf && (user_conf->ncuda == 0))
															
 
																+	int ncuda = -1;
															
 
																+	ncuda = starpu_get_env_number("STARPU_NCUDA");
															
 
																+
															
 
																+	/* STARPU_NCUDA is not set. Did the user specify anything ? */
															
 
																+	if (ncuda == -1 && user_conf)
															
 
																+		ncuda = user_conf->ncuda;
															
 
																+
															
 
																+	
															
 
																+	if (ncuda != 0)
															
 
																 	{
															
 
																-		/* the user explicitely disabled CUDA */
															
 
																-		topology->ncudagpus = 0;
															
 
																-	}
															
 
																-	else {
															
 
																-		/* we need to initialize CUDA early to count the number of devices */
															
 
																+		/* The user did not disable CUDA. We need to initialize CUDA
															
 
																+ 		 * early to count the number of devices */
															
 
																 		_starpu_init_cuda();
															
 
																-		if (user_conf && (user_conf->ncuda != -1))
															
 
																+		if (ncuda == -1)
															
 
																 		{
															
 
																-			explicitval = user_conf->ncuda;
															
 
																-		}
															
 
																-		else {
															
 
																-			explicitval = starpu_get_env_number("STARPU_NCUDA");
															
 
																+			/* Nothing was specified, so let's choose ! */
															
 
																+			ncuda = STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
															
 
																 		}
															
 
																+		else
															
 
																+		{
															
 
																+			/* Let's make sure this value is OK. */
															
 
																+			if (ncuda > STARPU_MAXCUDADEVS)
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d CUDA devices requested. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n",
															
 
																+					ncuda, STARPU_MAXCUDADEVS);
															
 
																+				ncuda = STARPU_MAXCUDADEVS;
															
 
																+			}
															
 
																-		if (explicitval < 0) {
															
 
																-			config->topology.ncudagpus =
															
 
																-				STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
															
 
																-		} else {
															
 
																-			/* use the specified value */
															
 
																-			topology->ncudagpus = (unsigned)explicitval;
															
 
																-			STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
															
 
																+			if ((unsigned) ncuda > _starpu_get_cuda_device_count())
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d CUDA devices requested. Only %d available.\n",
															
 
																+					ncuda, _starpu_get_cuda_device_count());
															
 
																+				ncuda = _starpu_get_cuda_device_count();
															
 
																+			}
															
 
																 		}
															
 
																-		STARPU_ASSERT(config->topology.ncudagpus + config->topology.nworkers <= STARPU_NMAXWORKERS);
															
 
																 	}
															
 
																-	if (topology->ncudagpus > 0)
															
 
																-		use_accelerator = 1;
															
 
																+	/* Now we know how many CUDA devices will be used */
															
 
																+	topology->ncudagpus = ncuda;
															
 
																+	STARPU_ASSERT(topology->ncudagpus <= STARPU_MAXCUDADEVS);
															
 
																 	_starpu_initialize_workers_cuda_gpuid(config);
															
@@ -325,14 +341,14 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 	{
															
 
																 		config->workers[topology->nworkers + cudagpu].arch = STARPU_CUDA_WORKER;
															
 
																 		int devid = _starpu_get_next_cuda_gpuid(config);
															
 
																-		enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
															
 
																+		enum starpu_perf_archtype arch = (enum starpu_perf_archtype)((int)STARPU_CUDA_DEFAULT + devid);
															
 
																 		config->workers[topology->nworkers + cudagpu].devid = devid;
															
 
																-		config->workers[topology->nworkers + cudagpu].perf_arch = arch; 
															
 
																+		config->workers[topology->nworkers + cudagpu].perf_arch = arch;
															
 
																 		config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
															
 
																 		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
															
 
																 		config->worker_mask |= STARPU_CUDA;
															
 
																-                uint32_t key = _starpu_crc32_be(devid, 0);
															
 
																+                uint32_t key = starpu_crc32_be(devid, 0);
															
 
																                 _starpu_htbl_insert_32(&devices_using_cuda, key, config);
															
 
																         }
															
@@ -340,46 +356,49 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	if (user_conf && (user_conf->nopencl == 0))
															
 
																+	int nopencl;
															
 
																+	nopencl = starpu_get_env_number("STARPU_NOPENCL");
															
 
																+
															
 
																+	/* STARPU_NOPENCL is not set. Did the user specify anything ? */
															
 
																+	if (nopencl == -1 && user_conf)
															
 
																+		nopencl = user_conf->nopencl;
															
 
																+
															
 
																+	if (nopencl != 0)
															
 
																 	{
															
 
																-		/* the user explicitely disabled OpenCL */
															
 
																-		topology->nopenclgpus = 0;
															
 
																-	}
															
 
																-	else {
															
 
																-		/* we need to initialize OpenCL early to count the number of devices */
															
 
																-		int nb_devices;
															
 
																+		/* The user did not disable OPENCL. We need to initialize OpenCL
															
 
																+ 		 * early to count the number of devices */
															
 
																 		_starpu_opencl_init();
															
 
																+		int nb_devices;
															
 
																 		nb_devices = STARPU_MIN(_starpu_opencl_get_device_count(), STARPU_MAXOPENCLDEVS);
															
 
																-		if (user_conf && (user_conf->nopencl != -1))
															
 
																+		if (nopencl == -1)
															
 
																 		{
															
 
																-			explicitval = user_conf->nopencl;
															
 
																+			/* Nothing was specified, so let's choose ! */
															
 
																+			nopencl = nb_devices;
															
 
																 		}
															
 
																-		else {
															
 
																-			explicitval = starpu_get_env_number("STARPU_NOPENCL");
															
 
																-		}
															
 
																-
															
 
																-
															
 
																-		if (explicitval < 0) {
															
 
																-			topology->nopenclgpus = nb_devices;
															
 
																-		}
															
 
																-		else {
															
 
																-			if (explicitval > nb_devices) {
															
 
																+		else
															
 
																+		{
															
 
																+			/* Let's make sure this value is OK. */
															
 
																+			if (nopencl > nb_devices)
															
 
																+			{
															
 
																 				/* The user requires more OpenCL devices than there is available */
															
 
																-				topology->nopenclgpus = nb_devices;
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d OpenCL devices requested. Only %d available.\n",
															
 
																+					nopencl, nb_devices);
															
 
																+					topology->nopenclgpus = nb_devices;
															
 
																 			}
															
 
																-			else {
															
 
																-				/* use the specified value */
															
 
																-				topology->nopenclgpus = (unsigned)explicitval;
															
 
																+			if (nopencl > STARPU_MAXOPENCLDEVS)
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d OpenCL devices requested. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices.\n",
															
 
																+					nopencl, STARPU_MAXOPENCLDEVS);
															
 
																+				nopencl = STARPU_MAXOPENCLDEVS;
															
 
																 			}
															
 
																-			STARPU_ASSERT(topology->nopenclgpus <= STARPU_MAXOPENCLDEVS);
															
 
																 		}
															
 
																-		STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																 	}
															
 
																-	if (topology->nopenclgpus > 0)
															
 
																-		use_accelerator = 1;
															
 
																-	// TODO: use_accelerator pour les OpenCL?
															
 
																+	topology->nopenclgpus = nopencl;
															
 
																+	STARPU_ASSERT(topology->nopenclgpus + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																 	_starpu_initialize_workers_opencl_gpuid(config);
															
@@ -387,14 +406,15 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 	for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
															
 
																 	{
															
 
																 		int devid = _starpu_get_next_opencl_gpuid(config);
															
 
																-		if (devid == -1) { // There is no more devices left
															
 
																+		if (devid == -1)
															
 
																+		{ // There is no more devices left
															
 
																 			topology->nopenclgpus = openclgpu;
															
 
																 			break;
															
 
																 		}
															
 
																 		config->workers[topology->nworkers + openclgpu].arch = STARPU_OPENCL_WORKER;
															
 
																-		enum starpu_perf_archtype arch = STARPU_OPENCL_DEFAULT + devid;
															
 
																+		enum starpu_perf_archtype arch = (enum starpu_perf_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
															
 
																 		config->workers[topology->nworkers + openclgpu].devid = devid;
															
 
																-		config->workers[topology->nworkers + openclgpu].perf_arch = arch; 
															
 
																+		config->workers[topology->nworkers + openclgpu].perf_arch = arch;
															
 
																 		config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
															
 
																 		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + openclgpu].workerid);
															
 
																 		config->worker_mask |= STARPU_OPENCL;
															
@@ -402,26 +422,37 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 	topology->nworkers += topology->nopenclgpus;
															
 
																 #endif
															
 
																-	
															
 
																+
															
 
																 #ifdef STARPU_USE_GORDON
															
 
																-	if (user_conf && (user_conf->ncuda != -1)) {
															
 
																-		explicitval = user_conf->ncuda;
															
 
																-	}
															
 
																-	else {
															
 
																-		explicitval = starpu_get_env_number("STARPU_NGORDON");
															
 
																-	}
															
 
																+	int ngordon;
															
 
																+	ngordon = starpu_get_env_number("STARPU_NGORDON");
															
 
																+
															
 
																+	/* STARPU_NGORDON is not set. Did the user specify anything ? */
															
 
																+	if (ngordon == -1 && user_conf)
															
 
																+		ngordon = user_conf->ngordon;
															
 
																-	if (explicitval < 0) {
															
 
																-		topology->ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
															
 
																-	} else {
															
 
																-		/* use the specified value */
															
 
																-		topology->ngordon_spus = (unsigned)explicitval;
															
 
																-		STARPU_ASSERT(topology->ngordon_spus <= NMAXGORDONSPUS);
															
 
																+	if (ngordon != 0)
															
 
																+	{
															
 
																+		if (ngordon == -1)
															
 
																+		{
															
 
																+			/* Nothing was specified, so let's choose ! */
															
 
																+			ngordon = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			STARPU_ASSERT(ngordon <= NMAXGORDONSPUS);
															
 
																+			if (ngordon > STARPU_MAXGORDONSPUS);
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d Gordon CPUs devices requested. Only %d supported\n",
															
 
																+					ngordon, NMAXGORDONSPUS);
															
 
																+				ngordon = NMAXGORDONSPUS;
															
 
																+			}
															
 
																+		}
															
 
																 	}
															
 
																-	STARPU_ASSERT(topology->ngordon_spus + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																-	if (topology->ngordon_spus > 0)
															
 
																-		use_accelerator = 1;
															
 
																+	topology->ngordon_spus = ngordon;
															
 
																+	STARPU_ASSERT(topology->ngordon_spus + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																 	unsigned spu;
															
 
																 	for (spu = 0; spu < config->ngordon_spus; spu++)
															
@@ -441,24 +472,37 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 /* we put the CPU section after the accelerator : in case there was an
															
 
																  * accelerator found, we devote one cpu */
															
 
																 #ifdef STARPU_USE_CPU
															
 
																-	if (user_conf && (user_conf->ncpus != -1)) {
															
 
																-		explicitval = user_conf->ncpus;
															
 
																-	}
															
 
																-	else {
															
 
																-		explicitval = starpu_get_env_number("STARPU_NCPUS");
															
 
																-	}
															
 
																+	int ncpu;
															
 
																+	ncpu = starpu_get_env_number("STARPU_NCPUS");
															
 
																-	if (explicitval < 0) {
															
 
																-		unsigned already_busy_cpus = (topology->ngordon_spus?1:0) + topology->ncudagpus + topology->nopenclgpus;
															
 
																-		long avail_cpus = topology->nhwcpus - (use_accelerator?already_busy_cpus:0);
															
 
																-		if (avail_cpus < 0)
															
 
																-			avail_cpus = 0;
															
 
																-		topology->ncpus = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
															
 
																-	} else {
															
 
																-		/* use the specified value */
															
 
																-		topology->ncpus = (unsigned)explicitval;
															
 
																-		STARPU_ASSERT(topology->ncpus <= STARPU_MAXCPUS);
															
 
																+	/* STARPU_NCPUS is not set. Did the user specify anything ? */
															
 
																+	if (ncpu == -1 && user_conf)
															
 
																+		ncpu = user_conf->ncpus;
															
 
																+
															
 
																+	if (ncpu != 0)
															
 
																+	{
															
 
																+		if (ncpu == -1)
															
 
																+		{
															
 
																+			unsigned already_busy_cpus = (topology->ngordon_spus?1:0) + topology->ncudagpus + topology->nopenclgpus;
															
 
																+			long avail_cpus = topology->nhwcpus - already_busy_cpus;
															
 
																+			if (avail_cpus < 0)
															
 
																+				avail_cpus = 0;
															
 
																+			ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			if (ncpu > STARPU_MAXCPUS)
															
 
																+			{
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d CPU devices requested. Only %d enabled. Use configure option --enable-maxcpus=xxx to update the maximum value of supported CPU devices.\n",
															
 
																+					ncpu, STARPU_MAXCPUS);
															
 
																+				ncpu = STARPU_MAXCPUS;
															
 
																+			}
															
 
																+		}
															
 
																 	}
															
 
																+
															
 
																+
															
 
																+	topology->ncpus = ncpu;
															
 
																 	STARPU_ASSERT(topology->ncpus + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																 	unsigned cpu;
															
@@ -487,12 +531,12 @@ static int _starpu_init_machine_config(struct starpu_machine_config_s *config,
 
																 /*
															
 
																  * Bind workers on the different processors
															
 
																  */
															
 
																-static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	char *strval;
															
 
																 	unsigned i;
															
 
																-	struct starpu_machine_topology_s *topology = &config->topology;
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																 	config->current_bindid = 0;
															
@@ -504,14 +548,7 @@ static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *co
 
																 	 * cpus. */
															
 
																 	/* what do we use, explicit value, env. variable, or round-robin ? */
															
 
																-	if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
															
 
																-	{
															
 
																-		/* we use the explicit value from the user */
															
 
																-		memcpy(topology->workers_bindid,
															
 
																-			config->user_conf->workers_bindid,
															
 
																-			STARPU_NMAXWORKERS*sizeof(unsigned));
															
 
																-	}
															
 
																-	else if ((strval = getenv("STARPU_WORKERS_CPUID")))
															
 
																+	if ((strval = getenv("STARPU_WORKERS_CPUID")))
															
 
																 	{
															
 
																 		/* STARPU_WORKERS_CPUID certainly contains less entries than
															
 
																 		 * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
															
@@ -523,7 +560,8 @@ static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *co
 
																 		/* we use the content of the STARPU_WORKERS_CUDAID env. variable */
															
 
																 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
															
 
																 		{
															
 
																-			if (!wrap) {
															
 
																+			if (!wrap)
															
 
																+			{
															
 
																 				long int val;
															
 
																 				val = strtol(strval, &endptr, 10);
															
 
																 				if (endptr != strval)
															
@@ -531,7 +569,8 @@ static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *co
 
																 					topology->workers_bindid[i] = (unsigned)(val % topology->nhwcpus);
															
 
																 					strval = endptr;
															
 
																 				}
															
 
																-				else {
															
 
																+				else
															
 
																+				{
															
 
																 					/* there must be at least one entry */
															
 
																 					STARPU_ASSERT(i != 0);
															
 
																 					number_of_entries = i;
															
@@ -542,11 +581,19 @@ static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *co
 
																 					topology->workers_bindid[i] = topology->workers_bindid[0];
															
 
																 				}
															
 
																 			}
															
 
																-			else {
															
 
																+			else
															
 
																+			{
															
 
																 				topology->workers_bindid[i] = topology->workers_bindid[i % number_of_entries];
															
 
																 			}
															
 
																 		}
															
 
																 	}
															
 
																+	else if (config->user_conf && config->user_conf->use_explicit_workers_bindid)
															
 
																+	{
															
 
																+		/* we use the explicit value from the user */
															
 
																+		memcpy(topology->workers_bindid,
															
 
																+			config->user_conf->workers_bindid,
															
 
																+			STARPU_NMAXWORKERS*sizeof(unsigned));
															
 
																+	}
															
 
																 	else
															
 
																 	{
															
 
																 		/* by default, we take a round robin policy */
															
@@ -559,10 +606,10 @@ static void _starpu_initialize_workers_bindid(struct starpu_machine_config_s *co
 
																  * worker. In case a list of preferred cpus was specified, we look for a an
															
 
																  * available cpu among the list if possible, otherwise a round-robin policy is
															
 
																  * used. */
															
 
																-static inline int _starpu_get_next_bindid(struct starpu_machine_config_s *config,
															
 
																+static inline int _starpu_get_next_bindid(struct _starpu_machine_config *config,
															
 
																 				int *preferred_binding, int npreferred)
															
 
																 {
															
 
																-	struct starpu_machine_topology_s *topology = &config->topology;
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																 	unsigned found = 0;
															
 
																 	int current_preferred;
															
@@ -600,23 +647,32 @@ static inline int _starpu_get_next_bindid(struct starpu_machine_config_s *config
 
																 	return (int)topology->workers_bindid[i];
															
 
																 }
															
 
																-void _starpu_bind_thread_on_cpu(struct starpu_machine_config_s *config STARPU_ATTRIBUTE_UNUSED, unsigned cpuid)
															
 
																+void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, unsigned cpuid)
															
 
																 {
															
 
																+	if (starpu_get_env_number("STARPU_WORKERS_NOBIND") > 0)
															
 
																+		return;
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																-	int ret;
															
 
																+	const struct hwloc_topology_support *support;
															
 
																+
															
 
																 	_starpu_init_topology(config);
															
 
																-	hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->cpu_depth, cpuid);
															
 
																-	hwloc_cpuset_t set = obj->cpuset;
															
 
																-	hwloc_bitmap_singlify(set);
															
 
																-	ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD);
															
 
																-	if (ret)
															
 
																+	support = hwloc_topology_get_support(config->topology.hwtopology);
															
 
																+	if (support->cpubind->set_thisthread_cpubind)
															
 
																 	{
															
 
																-		perror("binding thread");
															
 
																-		STARPU_ABORT();
															
 
																+		hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->cpu_depth, cpuid);
															
 
																+		hwloc_cpuset_t set = obj->cpuset;
															
 
																+		int ret;
															
 
																+
															
 
																+		hwloc_bitmap_singlify(set);
															
 
																+		ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD);
															
 
																+		if (ret)
															
 
																+		{
															
 
																+			perror("binding thread");
															
 
																+			STARPU_ABORT();
															
 
																+		}
															
 
																 	}
															
 
																-#elif defined(HAVE_PTHREAD_SETAFFINITY_NP)
															
 
																+#elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__)
															
 
																 	int ret;
															
 
																 	/* fix the thread on the correct cpu */
															
 
																 	cpu_set_t aff_mask;
															
@@ -634,7 +690,8 @@ void _starpu_bind_thread_on_cpu(struct starpu_machine_config_s *config STARPU_AT
 
																 #elif defined(__MINGW32__) || defined(__CYGWIN__)
															
 
																 	DWORD mask = 1 << cpuid;
															
 
																-	if (!SetThreadAffinityMask(GetCurrentThread(), mask)) {
															
 
																+	if (!SetThreadAffinityMask(GetCurrentThread(), mask))
															
 
																+	{
															
 
																 		fprintf(stderr,"SetThreadMaskAffinity(%lx) failed\n", mask);
															
 
																 		STARPU_ABORT();
															
 
																 	}
															
@@ -664,23 +721,26 @@ static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
 
																 	{
															
 
																 		unsigned memory_node = -1;
															
 
																 		unsigned is_a_set_of_accelerators = 0;
															
 
																-		struct starpu_worker_s *workerarg = &config->workers[worker];
															
 
																+		struct _starpu_worker *workerarg = &config->workers[worker];
															
 
																 		/* Perhaps the worker has some "favourite" bindings  */
															
 
																 		int *preferred_binding = NULL;
															
 
																 		int npreferred = 0;
															
 
																-		
															
 
																+
															
 
																 		/* select the memory node that contains worker's memory */
															
 
																-		switch (workerarg->arch) {
															
 
																+		switch (workerarg->arch)
															
 
																+		{
															
 
																 			case STARPU_CPU_WORKER:
															
 
																 			/* "dedicate" a cpu cpu to that worker */
															
 
																 				is_a_set_of_accelerators = 0;
															
 
																 				memory_node = ram_memory_node;
															
 
																+				_starpu_memory_node_worker_add(ram_memory_node);
															
 
																 				break;
															
 
																 #ifdef STARPU_USE_GORDON
															
 
																 			case STARPU_GORDON_WORKER:
															
 
																 				is_a_set_of_accelerators = 1;
															
 
																 				memory_node = ram_memory_node;
															
 
																+				_starpu_memory_node_worker_add(ram_memory_node);
															
 
																 				break;
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_CUDA
															
@@ -693,9 +753,23 @@ static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
 
																 				}
															
 
																 				is_a_set_of_accelerators = 0;
															
 
																 				memory_node = _starpu_register_memory_node(STARPU_CUDA_RAM, workerarg->devid);
															
 
																+				_starpu_memory_node_worker_add(memory_node);
															
 
																 				_starpu_register_bus(0, memory_node);
															
 
																 				_starpu_register_bus(memory_node, 0);
															
 
																+#ifdef HAVE_CUDA_MEMCPY_PEER
															
 
																+				unsigned worker2;
															
 
																+				for (worker2 = 0; worker2 < worker; worker2++)
															
 
																+				{
															
 
																+					struct _starpu_worker *workerarg = &config->workers[worker];
															
 
																+					if (workerarg->arch == STARPU_CUDA_WORKER)
															
 
																+					{
															
 
																+						unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
															
 
																+						_starpu_register_bus(memory_node2, memory_node);
															
 
																+						_starpu_register_bus(memory_node, memory_node2);
															
 
																+					}
															
 
																+				}
															
 
																+#endif
															
 
																 				break;
															
 
																 #endif
															
@@ -709,6 +783,7 @@ static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
 
																 				}
															
 
																 				is_a_set_of_accelerators = 0;
															
 
																 				memory_node = _starpu_register_memory_node(STARPU_OPENCL_RAM, workerarg->devid);
															
 
																+				_starpu_memory_node_worker_add(memory_node);
															
 
																 				_starpu_register_bus(0, memory_node);
															
 
																 				_starpu_register_bus(memory_node, 0);
															
 
																 				break;
															
@@ -718,13 +793,15 @@ static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
 
																 				STARPU_ABORT();
															
 
																 		}
															
 
																-		if (is_a_set_of_accelerators) {
															
 
																+		if (is_a_set_of_accelerators)
															
 
																+		{
															
 
																 			if (accelerator_bindid == -1)
															
 
																 				accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
															
 
																 			workerarg->bindid = accelerator_bindid;
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
															
 
																 		}
															
@@ -755,7 +832,7 @@ static void _starpu_init_workers_binding(struct starpu_machine_config_s *config)
 
																 }
															
 
																-int _starpu_build_topology(struct starpu_machine_config_s *config)
															
 
																+int _starpu_build_topology(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	int ret;
															
@@ -773,7 +850,7 @@ int _starpu_build_topology(struct starpu_machine_config_s *config)
 
																 	return 0;
															
 
																 }
															
 
																-void _starpu_destroy_topology(struct starpu_machine_config_s *config __attribute__ ((unused)))
															
 
																+void _starpu_destroy_topology(struct _starpu_machine_config *config __attribute__ ((unused)))
															
 
																 {
															
 
																 	/* cleanup StarPU internal data structures */
															
 
																 	_starpu_deinit_memory_nodes();
															
@@ -782,7 +859,7 @@ void _starpu_destroy_topology(struct starpu_machine_config_s *config __attribute
 
																 	for (worker = 0; worker < config->topology.nworkers; worker++)
															
 
																 	{
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																-		struct starpu_worker_s *workerarg = &config->workers[worker];
															
 
																+		struct _starpu_worker *workerarg = &config->workers[worker];
															
 
																 		hwloc_bitmap_free(workerarg->initial_hwloc_cpu_set);
															
 
																 		hwloc_bitmap_free(workerarg->current_hwloc_cpu_set);
															
 
																 #endif
															
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2010, 2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -24,20 +24,24 @@
 
																 #include <common/fxt.h>
															
 
																 /* TODO actually move this struct into this header */
															
 
																-struct starpu_machine_config_s;
															
 
																+struct _starpu_machine_config;
															
 
																 /* Detect the number of memory nodes and where to bind the different workers. */
															
 
																-int _starpu_build_topology(struct starpu_machine_config_s *config);
															
 
																+int _starpu_build_topology(struct _starpu_machine_config *config);
															
 
																 /* Destroy all resources used to store the topology of the machine. */
															
 
																-void _starpu_destroy_topology(struct starpu_machine_config_s *config);
															
 
																+void _starpu_destroy_topology(struct _starpu_machine_config *config);
															
 
																 /* returns the number of physical cpus */
															
 
																-unsigned _starpu_topology_get_nhwcpu(struct starpu_machine_config_s *config);
															
 
																+unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config);
															
 
																 /* Bind the current thread on the CPU logically identified by "cpuid". The
															
 
																  * logical ordering of the processors is either that of hwloc (if available),
															
 
																  * or the ordering exposed by the OS. */
															
 
																-void _starpu_bind_thread_on_cpu(struct starpu_machine_config_s *config, unsigned cpuid);
															
 
																+void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config, unsigned cpuid);
															
 
																+
															
 
																+struct _starpu_combined_worker;
															
 
																+/* Bind the current thread on the set of CPUs for the given combined worker. */
															
 
																+void _starpu_bind_thread_on_cpus(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, struct _starpu_combined_worker *combined_worker);
															
 
																 #endif // __TOPOLOGY_H__
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2010, 2011  Institut National de Recherche en Informatique et Automatique
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  * Copyright (C) 2011  INRIA
															
@@ -35,90 +35,140 @@
 
																 /* acquire/release semantic for concurrent initialization/de-initialization */
															
 
																 static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
															
 
																 static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER;
															
 
																-
															
 
																-static int init_count;
															
 
																+static int init_count = 0;
															
 
																 static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
															
 
																 static pthread_key_t worker_key;
															
 
																-static struct starpu_machine_config_s config;
															
 
																+static struct _starpu_machine_config config;
															
 
																+
															
 
																+int _starpu_is_initialized(void)
															
 
																+{
															
 
																+	return initialized == INITIALIZED;
															
 
																+}
															
 
																-struct starpu_machine_config_s *_starpu_get_machine_config(void)
															
 
																+struct _starpu_machine_config *_starpu_get_machine_config(void)
															
 
																 {
															
 
																 	return &config;
															
 
																 }
															
 
																+/* Makes sure that at least one of the workers of type <arch> can execute
															
 
																+ * <task>*/
															
 
																+static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
															
 
																+						      enum starpu_archtype arch)
															
 
																+{
															
 
																+	int i;
															
 
																+	int nworkers = starpu_worker_get_count_by_type(arch);
															
 
																+	int workers[nworkers];
															
 
																+	STARPU_ASSERT(nworkers != -EINVAL);
															
 
																+	(void) starpu_worker_get_ids_by_type(arch, workers, nworkers);
															
 
																+	for (i = 0; i < nworkers; i++)
															
 
																+		if (task->cl->can_execute(workers[i], task, 0))
															
 
																+			return 1;
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																 /* in case a task is submitted, we may check whether there exists a worker
															
 
																    that may execute the task or not */
															
 
																-
															
 
																-uint32_t _starpu_worker_exists(uint32_t task_mask)
															
 
																+uint32_t _starpu_worker_exists(struct starpu_task *task)
															
 
																 {
															
 
																-	return (task_mask & config.worker_mask);
															
 
																-} 
															
 
																+	if (!(task->cl->where & config.worker_mask))
															
 
																+		return 0;
															
 
																+
															
 
																+	if (!task->cl->can_execute)
															
 
																+		return 1;
															
 
																-uint32_t _starpu_may_submit_cuda_task(void)
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	if ((task->cl->where & STARPU_CPU) &&
															
 
																+	    _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
															
 
																+		return 1;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	if ((task->cl->where & STARPU_CUDA) &&
															
 
																+	    _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
															
 
																+		return 1;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	if ((task->cl->where & STARPU_OPENCL) &&
															
 
																+	    _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
															
 
																+		return 1;
															
 
																+#endif
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+uint32_t _starpu_can_submit_cuda_task(void)
															
 
																 {
															
 
																 	return (STARPU_CUDA & config.worker_mask);
															
 
																 }
															
 
																-uint32_t _starpu_may_submit_cpu_task(void)
															
 
																+uint32_t _starpu_can_submit_cpu_task(void)
															
 
																 {
															
 
																 	return (STARPU_CPU & config.worker_mask);
															
 
																 }
															
 
																-uint32_t _starpu_may_submit_opencl_task(void)
															
 
																+uint32_t _starpu_can_submit_opencl_task(void)
															
 
																 {
															
 
																 	return (STARPU_OPENCL & config.worker_mask);
															
 
																 }
															
 
																-static int _starpu_may_use_nth_implementation(enum starpu_archtype arch, struct starpu_codelet_t *cl, unsigned nimpl)
															
 
																+static int _starpu_can_use_nth_implementation(enum starpu_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
															
 
																 {
															
 
																-	switch(arch) {
															
 
																+	switch(arch)
															
 
																+	{
															
 
																 	case STARPU_CPU_WORKER:
															
 
																-		return !(cl->cpu_func == STARPU_MULTIPLE_CPU_IMPLEMENTATIONS &&
															
 
																-			cl->cpu_funcs[nimpl] == NULL);
															
 
																+	{
															
 
																+		starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
															
 
																+		return func != NULL;
															
 
																+	}
															
 
																 	case STARPU_CUDA_WORKER:
															
 
																-		return !(cl->cuda_func == STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS &&
															
 
																-			cl->cuda_funcs[nimpl] == NULL);
															
 
																+	{
															
 
																+		starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
															
 
																+		return func != NULL;
															
 
																+	}
															
 
																 	case STARPU_OPENCL_WORKER:
															
 
																-		return !(cl->opencl_func == STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS &&
															
 
																-			cl->opencl_funcs[nimpl] == NULL);
															
 
																+	{
															
 
																+		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
															
 
																+		return func != NULL;
															
 
																+	}
															
 
																 	case STARPU_GORDON_WORKER:
															
 
																-		return !(cl->gordon_func == STARPU_MULTIPLE_GORDON_IMPLEMENTATIONS &&
															
 
																-			cl->gordon_funcs[nimpl] == 0);
															
 
																+	{
															
 
																+		starpu_gordon_func_t func = _starpu_task_get_gordon_nth_implementation(cl, nimpl);
															
 
																+		return func != 0;
															
 
																+	}
															
 
																 	default:
															
 
																-		return 0;
															
 
																+		STARPU_ASSERT_MSG(0, "Unknown arch type");
															
 
																 	}
															
 
																+	return 0;
															
 
																 }
															
 
																-int starpu_worker_may_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																+int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																 {
															
 
																 	/* TODO: check that the task operand sizes will fit on that device */
															
 
																-	/* TODO: call application-provided function for various cases like
															
 
																-	 * double support, shared memory size limit, etc. */
															
 
																-	return !!((task->cl->where & config.workers[workerid].worker_mask) &&
															
 
																-		_starpu_may_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
															
 
																+	return (task->cl->where & config.workers[workerid].worker_mask) &&
															
 
																+		_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
															
 
																+		(!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
															
 
																 }
															
 
																-int starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																+int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																 {
															
 
																 	/* TODO: check that the task operand sizes will fit on that device */
															
 
																 	/* TODO: call application-provided function for various cases like
															
 
																 	 * double support, shared memory size limit, etc. */
															
 
																-	struct starpu_codelet_t *cl = task->cl;
															
 
																+	struct starpu_codelet *cl = task->cl;
															
 
																 	unsigned nworkers = config.topology.nworkers;
															
 
																 	/* Is this a parallel worker ? */
															
 
																 	if (workerid < nworkers)
															
 
																 	{
															
 
																 		return !!((task->cl->where & config.workers[workerid].worker_mask) &&
															
 
																-				_starpu_may_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
															
 
																+				_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		if ((cl->type == STARPU_SPMD) || (cl->type == STARPU_FORKJOIN))
															
 
																 		{
															
 
																 			/* TODO we should add other types of constraints */
															
@@ -126,7 +176,7 @@ int starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_tas
 
																 			/* Is the worker larger than requested ? */
															
 
																 			int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
															
 
																 			return !!((worker_size <= task->cl->max_parallelism) &&
															
 
																-				_starpu_may_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
															
 
																+				_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
@@ -141,11 +191,11 @@ int starpu_combined_worker_may_execute_task(unsigned workerid, struct starpu_tas
 
																  */
															
 
																 #ifdef STARPU_USE_GORDON
															
 
																-static unsigned gordon_inited = 0;	
															
 
																-static struct starpu_worker_set_s gordon_worker_set;
															
 
																+static unsigned gordon_inited = 0;
															
 
																+static struct _starpu_worker_set gordon_worker_set;
															
 
																 #endif
															
 
																-static void _starpu_init_worker_queue(struct starpu_worker_s *workerarg)
															
 
																+static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
															
 
																 {
															
 
																 	pthread_cond_t *cond = &workerarg->sched_cond;
															
 
																 	pthread_mutex_t *mutex = &workerarg->sched_mutex;
															
@@ -155,7 +205,7 @@ static void _starpu_init_worker_queue(struct starpu_worker_s *workerarg)
 
																 	_starpu_memory_node_register_condition(cond, mutex, memory_node);
															
 
																 }
															
 
																-static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_launch_drivers(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	config->running = 1;
															
@@ -167,14 +217,14 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 
																 	unsigned worker;
															
 
																 	for (worker = 0; worker < nworkers; worker++)
															
 
																 	{
															
 
																-		struct starpu_worker_s *workerarg = &config->workers[worker];
															
 
																+		struct _starpu_worker *workerarg = &config->workers[worker];
															
 
																 		workerarg->config = config;
															
 
																 		_starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
															
 
																-		PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
															
 
																-		PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
															
 
																+		_STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
															
 
																+		_STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
															
 
																 		workerarg->worker_size = 1;
															
 
																 		workerarg->combined_workerid = workerarg->workerid;
															
@@ -184,30 +234,31 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 
																 		/* we have a single local list */
															
 
																 		/* afterwards there would be a mutex + cond for the list of each strategy */
															
 
																-		PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
															
 
																-		PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
															
 
																+		_STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
															
 
																+		_STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
															
 
																 		/* if some codelet's termination cannot be handled directly :
															
 
																 		 * for instance in the Gordon driver, Gordon tasks' callbacks
															
 
																 		 * may be executed by another thread than that of the Gordon
															
 
																 		 * driver so that we cannot call the push_codelet_output method
															
 
																 		 * directly */
															
 
																-		workerarg->terminated_jobs = starpu_job_list_new();
															
 
																+		workerarg->terminated_jobs = _starpu_job_list_new();
															
 
																 		starpu_task_list_init(&workerarg->local_tasks);
															
 
																-	
															
 
																+
															
 
																 		workerarg->status = STATUS_INITIALIZING;
															
 
																 		_STARPU_DEBUG("initialising worker %u\n", worker);
															
 
																 		_starpu_init_worker_queue(workerarg);
															
 
																-		switch (workerarg->arch) {
															
 
																+		switch (workerarg->arch)
															
 
																+		{
															
 
																 #ifdef STARPU_USE_CPU
															
 
																 			case STARPU_CPU_WORKER:
															
 
																 				workerarg->set = NULL;
															
 
																 				workerarg->worker_is_initialized = 0;
															
 
																-				pthread_create(&workerarg->worker_thread, 
															
 
																+				pthread_create(&workerarg->worker_thread,
															
 
																 						NULL, _starpu_cpu_worker, workerarg);
															
 
																 				break;
															
 
																 #endif
															
@@ -215,7 +266,7 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 
																 			case STARPU_CUDA_WORKER:
															
 
																 				workerarg->set = NULL;
															
 
																 				workerarg->worker_is_initialized = 0;
															
 
																-				pthread_create(&workerarg->worker_thread, 
															
 
																+				pthread_create(&workerarg->worker_thread,
															
 
																 						NULL, _starpu_cuda_worker, workerarg);
															
 
																 				break;
															
@@ -224,34 +275,34 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 
																 			case STARPU_OPENCL_WORKER:
															
 
																 				workerarg->set = NULL;
															
 
																 				workerarg->worker_is_initialized = 0;
															
 
																-				pthread_create(&workerarg->worker_thread, 
															
 
																+				pthread_create(&workerarg->worker_thread,
															
 
																 						NULL, _starpu_opencl_worker, workerarg);
															
 
																 				break;
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_GORDON
															
 
																 			case STARPU_GORDON_WORKER:
															
 
																-				/* we will only launch gordon once, but it will handle 
															
 
																+				/* we will only launch gordon once, but it will handle
															
 
																 				 * the different SPU workers */
															
 
																 				if (!gordon_inited)
															
 
																 				{
															
 
																-					gordon_worker_set.nworkers = config->ngordon_spus; 
															
 
																+					gordon_worker_set.nworkers = config->ngordon_spus;
															
 
																 					gordon_worker_set.workers = &config->workers[worker];
															
 
																 					gordon_worker_set.set_is_initialized = 0;
															
 
																-					pthread_create(&gordon_worker_set.worker_thread, NULL, 
															
 
																+					pthread_create(&gordon_worker_set.worker_thread, NULL,
															
 
																 							_starpu_gordon_worker, &gordon_worker_set);
															
 
																-					PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
															
 
																+					_STARPU_PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
															
 
																 					while (!gordon_worker_set.set_is_initialized)
															
 
																-						PTHREAD_COND_WAIT(&gordon_worker_set.ready_cond,
															
 
																+						_STARPU_PTHREAD_COND_WAIT(&gordon_worker_set.ready_cond,
															
 
																 									&gordon_worker_set.mutex);
															
 
																-					PTHREAD_MUTEX_UNLOCK(&gordon_worker_set.mutex);
															
 
																+					_STARPU_PTHREAD_MUTEX_UNLOCK(&gordon_worker_set.mutex);
															
 
																 					gordon_inited = 1;
															
 
																 				}
															
 
																-				
															
 
																+
															
 
																 				workerarg->set = &gordon_worker_set;
															
 
																 				gordon_worker_set.joined = 0;
															
 
																 				workerarg->worker_is_running = 1;
															
@@ -265,16 +316,17 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 
																 	for (worker = 0; worker < nworkers; worker++)
															
 
																 	{
															
 
																-		struct starpu_worker_s *workerarg = &config->workers[worker];
															
 
																+		struct _starpu_worker *workerarg = &config->workers[worker];
															
 
																-		switch (workerarg->arch) {
															
 
																+		switch (workerarg->arch)
															
 
																+		{
															
 
																 			case STARPU_CPU_WORKER:
															
 
																 			case STARPU_CUDA_WORKER:
															
 
																-			case STARPU_OPENCL_WORKER:			  
															
 
																-				PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																+			case STARPU_OPENCL_WORKER:
															
 
																+				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																 				while (!workerarg->worker_is_initialized)
															
 
																-					PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
															
 
																-				PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																+					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
															
 
																+				_STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																 				break;
															
 
																 #ifdef STARPU_USE_GORDON
															
 
																 			case STARPU_GORDON_WORKER:
															
@@ -289,14 +341,14 @@ static void _starpu_launch_drivers(struct starpu_machine_config_s *config)
 
																 }
															
 
																-void _starpu_set_local_worker_key(struct starpu_worker_s *worker)
															
 
																+void _starpu_set_local_worker_key(struct _starpu_worker *worker)
															
 
																 {
															
 
																 	pthread_setspecific(worker_key, worker);
															
 
																 }
															
 
																-struct starpu_worker_s *_starpu_get_local_worker_key(void)
															
 
																+struct _starpu_worker *_starpu_get_local_worker_key(void)
															
 
																 {
															
 
																-	return (struct starpu_worker_s *) pthread_getspecific(worker_key);
															
 
																+	return (struct _starpu_worker *) pthread_getspecific(worker_key);
															
 
																 }
															
 
																 /* Initialize the starpu_conf with default values */
															
@@ -329,19 +381,49 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 {
															
 
																 	int ret;
															
 
																-	PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																+#ifdef __GNUC__
															
 
																+#ifndef __OPTIMIZE__
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
															
 
																+#endif
															
 
																+#endif
															
 
																+#if 0
															
 
																+#ifndef STARPU_NO_ASSERT
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured without --enable-fast\n");
															
 
																+#endif
															
 
																+#endif
															
 
																+#ifdef STARPU_MEMORY_STATUS
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-memory-status, which slows down a bit\n");
															
 
																+#endif
															
 
																+#ifdef STARPU_VERBOSE
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_FXT
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --with-fxt, which slows down a bit\n");
															
 
																+#endif
															
 
																+#ifdef STARPU_PERF_DEBUG
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
															
 
																+#endif
															
 
																+#ifdef STARPU_MODEL_DEBUG
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
															
 
																+#endif
															
 
																+#ifdef STARPU_DATA_STATS
															
 
																+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-stats, which slows down a bit\n");
															
 
																+#endif
															
 
																+
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 	while (initialized == CHANGING)
															
 
																 		/* Wait for the other one changing it */
															
 
																-		PTHREAD_COND_WAIT(&init_cond, &init_mutex);
															
 
																+		_STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
															
 
																 	init_count++;
															
 
																-	if (initialized == INITIALIZED) {
															
 
																-	  /* He initialized it, don't do it again, and let the others get the mutex */
															
 
																-	  PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																-	  return 0;
															
 
																-	  }
															
 
																+	if (initialized == INITIALIZED)
															
 
																+	{
															
 
																+		/* He initialized it, don't do it again, and let the others get the mutex */
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+		return 0;
															
 
																+	}
															
 
																 	/* initialized == UNINITIALIZED */
															
 
																 	initialized = CHANGING;
															
 
																-	PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																 #ifdef __MINGW32__
															
 
																 	WSADATA wsadata;
															
@@ -351,7 +433,7 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	srand(2008);
															
 
																 #ifdef STARPU_USE_FXT
															
 
																-		_starpu_start_fxt_profiling();
															
 
																+	_starpu_start_fxt_profiling();
															
 
																 #endif
															
 
																 	_starpu_open_debug_logfile();
															
@@ -370,21 +452,21 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	_starpu_init_all_sched_ctxs(&config);
															
 
																 	ret = _starpu_build_topology(&config);
															
 
																-	if (ret) {
															
 
																-		PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																+	if (ret)
															
 
																+	{
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 		init_count--;
															
 
																 		initialized = UNINITIALIZED;
															
 
																 		/* Let somebody else try to do it */
															
 
																-		PTHREAD_COND_SIGNAL(&init_cond);
															
 
																-		PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+		_STARPU_PTHREAD_COND_SIGNAL(&init_cond);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																 		return ret;
															
 
																 	}
															
 
																 	/* We need to store the current task handled by the different
															
 
																 	 * threads */
															
 
																-	_starpu_initialize_current_task_key();	
															
 
																+	_starpu_initialize_current_task_key();
															
 
																-	/* initialize the scheduling policy */
															
 
																 	struct starpu_sched_ctx *sched_ctx;
															
 
																 	if(user_conf == NULL)
															
@@ -398,20 +480,21 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	/* Launch "basic" workers (ie. non-combined workers) */
															
 
																 	_starpu_launch_drivers(&config);
															
 
																-	PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 	initialized = INITIALIZED;
															
 
																 	/* Tell everybody that we initialized */
															
 
																-	PTHREAD_COND_BROADCAST(&init_cond);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_COND_BROADCAST(&init_cond);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+	_STARPU_DEBUG("Initialisation finished\n");
															
 
																 	return 0;
															
 
																 }
															
 
																 /*
															
 
																- * Handle runtime termination 
															
 
																+ * Handle runtime termination
															
 
																  */
															
 
																-static void _starpu_terminate_workers(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_terminate_workers(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	int status STARPU_ATTRIBUTE_UNUSED;
															
 
																 	unsigned workerid;
															
@@ -419,21 +502,24 @@ static void _starpu_terminate_workers(struct starpu_machine_config_s *config)
 
																 	for (workerid = 0; workerid < config->topology.nworkers; workerid++)
															
 
																 	{
															
 
																 		starpu_wake_all_blocked_workers();
															
 
																-		
															
 
																+
															
 
																 		_STARPU_DEBUG("wait for worker %u\n", workerid);
															
 
																-		struct starpu_worker_set_s *set = config->workers[workerid].set;
															
 
																-		struct starpu_worker_s *worker = &config->workers[workerid];
															
 
																+		struct _starpu_worker_set *set = config->workers[workerid].set;
															
 
																+		struct _starpu_worker *worker = &config->workers[workerid];
															
 
																 		/* in case StarPU termination code is called from a callback,
															
 
																  		 * we have to check if pthread_self() is the worker itself */
															
 
																-		if (set){ 
															
 
																-			if (!set->joined) {
															
 
																+		if (set)
															
 
																+		{
															
 
																+			if (!set->joined)
															
 
																+			{
															
 
																 				if (!pthread_equal(pthread_self(), set->worker_thread))
															
 
																 				{
															
 
																 					status = pthread_join(set->worker_thread, NULL);
															
 
																 #ifdef STARPU_VERBOSE
															
 
																-					if (status) {
															
 
																+					if (status)
															
 
																+					{
															
 
																 						_STARPU_DEBUG("pthread_join -> %d\n", status);
															
 
																                                         }
															
 
																 #endif
															
@@ -442,12 +528,14 @@ static void _starpu_terminate_workers(struct starpu_machine_config_s *config)
 
																 				set->joined = 1;
															
 
																 			}
															
 
																 		}
															
 
																-		else {
															
 
																+		else
															
 
																+		{
															
 
																 			if (!pthread_equal(pthread_self(), worker->worker_thread))
															
 
																 			{
															
 
																 				status = pthread_join(worker->worker_thread, NULL);
															
 
																 #ifdef STARPU_VERBOSE
															
 
																-				if (status) {
															
 
																+				if (status)
															
 
																+				{
															
 
																 					_STARPU_DEBUG("pthread_join -> %d\n", status);
															
 
																                                 }
															
 
																 #endif
															
@@ -455,12 +543,14 @@ static void _starpu_terminate_workers(struct starpu_machine_config_s *config)
 
																 		}
															
 
																 		STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
															
 
																-		starpu_job_list_delete(worker->terminated_jobs);
															
 
																+		_starpu_job_list_delete(worker->terminated_jobs);
															
 
																 	}
															
 
																 }
															
 
																 unsigned _starpu_machine_is_running(void)
															
 
																 {
															
 
																+	/* running is just protected by a memory barrier */
															
 
																+	STARPU_SYNCHRONIZE();
															
 
																 	return config.running;
															
 
																 }
															
@@ -484,26 +574,32 @@ unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED)
 
																 #endif
															
 
																 }
															
 
																-static void _starpu_kill_all_workers(struct starpu_machine_config_s *config)
															
 
																+static void _starpu_kill_all_workers(struct _starpu_machine_config *config)
															
 
																 {
															
 
																 	/* set the flag which will tell workers to stop */
															
 
																 	config->running = 0;
															
 
																+	/* running is just protected by a memory barrier */
															
 
																+	STARPU_SYNCHRONIZE();
															
 
																 	starpu_wake_all_blocked_workers();
															
 
																 }
															
 
																 void starpu_shutdown(void)
															
 
																 {
															
 
																 	const char *stats;
															
 
																-	PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 	init_count--;
															
 
																-	if (init_count){
															
 
																-		PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																-		/* Still somebody needing StarPU, don't deinitialize */
															
 
																+	if (init_count)
															
 
																+	{
															
 
																+		_STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																 		return;
															
 
																 	}
															
 
																+
															
 
																 	/* We're last */
															
 
																 	initialized = CHANGING;
															
 
																-	PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+
															
 
																+	starpu_task_wait_for_no_ready();
															
 
																 	_starpu_display_msi_stats();
															
 
																 	_starpu_display_alloc_cache_stats();
															
@@ -511,6 +607,11 @@ void starpu_shutdown(void)
 
																 	/* tell all workers to shutdown */
															
 
																 	_starpu_kill_all_workers(&config);
															
 
																+#ifdef STARPU_MEMORY_STATUS
															
 
																+	if ((stats = getenv("STARPU_MEMORY_STATS")) && atoi(stats))
															
 
																+		_starpu_display_data_stats();
															
 
																+#endif
															
 
																+
															
 
																 #ifdef STARPU_DATA_STATS
															
 
																 	_starpu_display_comm_amounts();
															
 
																 #endif
															
@@ -535,13 +636,18 @@ void starpu_shutdown(void)
 
																 	_starpu_data_interface_shutdown();
															
 
																+	/* Drop all remaining tags */
															
 
																+	_starpu_tag_clear();
															
 
																+
															
 
																 	_starpu_close_debug_logfile();
															
 
																-	PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 	initialized = UNINITIALIZED;
															
 
																 	/* Let someone else that wants to initialize it again do it */
															
 
																-	PTHREAD_COND_SIGNAL(&init_cond);
															
 
																-	PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+	_STARPU_PTHREAD_COND_SIGNAL(&init_cond);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																+
															
 
																+	_STARPU_DEBUG("Shutdown finished\n");
															
 
																 }
															
 
																 unsigned starpu_worker_get_count(void)
															
@@ -602,14 +708,15 @@ unsigned starpu_spu_worker_get_count(void)
 
																  * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
															
 
																 int starpu_worker_get_id(void)
															
 
																 {
															
 
																-	struct starpu_worker_s * worker;
															
 
																+	struct _starpu_worker * worker;
															
 
																 	worker = _starpu_get_local_worker_key();
															
 
																 	if (worker)
															
 
																 	{
															
 
																 		return worker->workerid;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* there is no worker associated to that thread, perhaps it is
															
 
																 		 * a thread from the application or this is some SPU worker */
															
 
																 		return -1;
															
@@ -618,14 +725,15 @@ int starpu_worker_get_id(void)
 
																 int starpu_combined_worker_get_id(void)
															
 
																 {
															
 
																-	struct starpu_worker_s *worker;
															
 
																+	struct _starpu_worker *worker;
															
 
																 	worker = _starpu_get_local_worker_key();
															
 
																 	if (worker)
															
 
																 	{
															
 
																 		return worker->combined_workerid;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* there is no worker associated to that thread, perhaps it is
															
 
																 		 * a thread from the application or this is some SPU worker */
															
 
																 		return -1;
															
@@ -634,14 +742,15 @@ int starpu_combined_worker_get_id(void)
 
																 int starpu_combined_worker_get_size(void)
															
 
																 {
															
 
																-	struct starpu_worker_s *worker;
															
 
																+	struct _starpu_worker *worker;
															
 
																 	worker = _starpu_get_local_worker_key();
															
 
																 	if (worker)
															
 
																 	{
															
 
																 		return worker->worker_size;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* there is no worker associated to that thread, perhaps it is
															
 
																 		 * a thread from the application or this is some SPU worker */
															
 
																 		return -1;
															
@@ -650,14 +759,15 @@ int starpu_combined_worker_get_size(void)
 
																 int starpu_combined_worker_get_rank(void)
															
 
																 {
															
 
																-	struct starpu_worker_s *worker;
															
 
																+	struct _starpu_worker *worker;
															
 
																 	worker = _starpu_get_local_worker_key();
															
 
																 	if (worker)
															
 
																 	{
															
 
																 		return worker->current_rank;
															
 
																 	}
															
 
																-	else {
															
 
																+	else
															
 
																+	{
															
 
																 		/* there is no worker associated to that thread, perhaps it is
															
 
																 		 * a thread from the application or this is some SPU worker */
															
 
																 		return -1;
															
@@ -669,18 +779,12 @@ int starpu_worker_get_devid(int id)
 
																 	return config.workers[id].devid;
															
 
																 }
															
 
																-struct starpu_worker_s *_starpu_get_worker_struct(unsigned id)
															
 
																+struct _starpu_worker *_starpu_get_worker_struct(unsigned id)
															
 
																 {
															
 
																 	return &config.workers[id];
															
 
																 }
															
 
																-struct starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id)
															
 
																-{
															
 
																-	STARPU_ASSERT(id >= 0 && id <= STARPU_NMAX_SCHED_CTXS);
															
 
																-	return &config.sched_ctxs[id];
															
 
																-}
															
 
																-
															
 
																-struct starpu_combined_worker_s *_starpu_get_combined_worker_struct(unsigned id)
															
 
																+struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
															
 
																 {
															
 
																 	unsigned basic_worker_count = starpu_worker_get_count();
															
@@ -723,14 +827,14 @@ void starpu_worker_get_name(int id, char *dst, size_t maxlen)
 
																 }
															
 
																 /* Retrieve the status which indicates what the worker is currently doing. */
															
 
																-starpu_worker_status _starpu_worker_get_status(int workerid)
															
 
																+enum _starpu_worker_status _starpu_worker_get_status(int workerid)
															
 
																 {
															
 
																 	return config.workers[workerid].status;
															
 
																 }
															
 
																 /* Change the status of the worker which indicates what the worker is currently
															
 
																  * doing (eg. executing a callback). */
															
 
																-void _starpu_worker_set_status(int workerid, starpu_worker_status status)
															
 
																+void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
															
 
																 {
															
 
																 	config.workers[workerid].status = status;
															
 
																 }
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																  * Copyright (C) 2011  INRIA
															
 
																  *
															
@@ -54,15 +54,16 @@
 
																 #include <starpu_parameters.h>
															
 
																-struct starpu_worker_s {
															
 
																-	struct starpu_machine_config_s *config;
															
 
																+struct _starpu_worker
															
 
																+{
															
 
																+	struct _starpu_machine_config *config;
															
 
																         pthread_mutex_t mutex;
															
 
																 	enum starpu_archtype arch; /* what is the type of worker ? */
															
 
																 	uint32_t worker_mask; /* what is the type of worker ? */
															
 
																 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
															
 
																 	pthread_t worker_thread; /* the thread which runs the worker */
															
 
																 	int devid; /* which cpu/gpu/etc is controlled by the workker ? */
															
 
																-	int bindid; /* which cpu is the driver bound to ? */
															
 
																+	int bindid; /* which cpu is the driver bound to ? (logical index) */
															
 
																 	int workerid; /* uniquely identify the worker among all processing units types */
															
 
																 	int combined_workerid; /* combined worker currently using this worker */
															
 
																 	int current_rank; /* current rank in case the worker is used in a parallel fashion */
															
@@ -72,11 +73,12 @@ struct starpu_worker_s {
 
																 	pthread_cond_t sched_cond; /* condition variable used when the worker waits for tasks. */
															
 
																 	pthread_mutex_t sched_mutex; /* mutex protecting sched_cond */
															
 
																 	struct starpu_task_list local_tasks; /* this queue contains tasks that have been explicitely submitted to that queue */
															
 
																-	struct starpu_worker_set_s *set; /* in case this worker belongs to a set */
															
 
																-	struct starpu_job_list_s *terminated_jobs; /* list of pending jobs which were executed */
															
 
																+	struct starpu_task *current_task; /* task currently executed by this worker */
															
 
																+	struct _starpu_worker_set *set; /* in case this worker belongs to a set */
															
 
																+	struct _starpu_job_list *terminated_jobs; /* list of pending jobs which were executed */
															
 
																 	unsigned worker_is_running;
															
 
																 	unsigned worker_is_initialized;
															
 
																-	starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
															
 
																+	enum _starpu_worker_status status; /* what is the worker doing now ? (eg. CALLBACK) */
															
 
																 	char name[48];
															
 
																 	char short_name[10];
															
@@ -96,7 +98,8 @@ struct starpu_worker_s {
 
																 #endif
															
 
																 };
															
 
																-struct starpu_combined_worker_s {
															
 
																+struct _starpu_combined_worker
															
 
																+{
															
 
																 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
															
 
																 	uint32_t worker_mask; /* what is the type of workers ? */
															
 
																 	int worker_size;
															
@@ -111,22 +114,23 @@ struct starpu_combined_worker_s {
 
																 #endif
															
 
																 };
															
 
																-/* in case a single CPU worker may control multiple 
															
 
																+/* in case a single CPU worker may control multiple
															
 
																  * accelerators (eg. Gordon for n SPUs) */
															
 
																-struct starpu_worker_set_s {
															
 
																+struct _starpu_worker_set
															
 
																+{
															
 
																         pthread_mutex_t mutex;
															
 
																 	pthread_t worker_thread; /* the thread which runs the worker */
															
 
																 	unsigned nworkers;
															
 
																 	unsigned joined; /* only one thread may call pthread_join*/
															
 
																 	void *retval;
															
 
																-	struct starpu_worker_s *workers;
															
 
																+	struct _starpu_worker *workers;
															
 
																         pthread_cond_t ready_cond; /* indicate when the set is ready */
															
 
																 	unsigned set_is_initialized;
															
 
																 };
															
 
																-struct starpu_machine_config_s {
															
 
																-
															
 
																-	struct starpu_machine_topology_s topology;
															
 
																+struct _starpu_machine_config
															
 
																+{
															
 
																+	struct starpu_machine_topology topology;
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 	int cpu_depth;
															
@@ -134,20 +138,20 @@ struct starpu_machine_config_s {
 
																 	/* Where to bind workers ? */
															
 
																 	int current_bindid;
															
 
																-	
															
 
																+
															
 
																 	/* Which GPU(s) do we use for CUDA ? */
															
 
																 	int current_cuda_gpuid;
															
 
																 	/* Which GPU(s) do we use for OpenCL ? */
															
 
																 	int current_opencl_gpuid;
															
 
																-	
															
 
																+
															
 
																 	/* Basic workers : each of this worker is running its own driver and
															
 
																 	 * can be combined with other basic workers. */
															
 
																-	struct starpu_worker_s workers[STARPU_NMAXWORKERS];
															
 
																+	struct _starpu_worker workers[STARPU_NMAXWORKERS];
															
 
																 	/* Combined workers: these worker are a combination of basic workers
															
 
																 	 * that can run parallel tasks together. */
															
 
																-	struct starpu_combined_worker_s combined_workers[STARPU_NMAX_COMBINEDWORKERS];
															
 
																+	struct _starpu_combined_worker combined_workers[STARPU_NMAX_COMBINEDWORKERS];
															
 
																 	/* This bitmask indicates which kinds of worker are available. For
															
 
																 	 * instance it is possible to test if there is a CUDA worker with
															
@@ -169,16 +173,16 @@ struct starpu_machine_config_s {
 
																 unsigned _starpu_machine_is_running(void);
															
 
																 /* Check if there is a worker that may execute the task. */
															
 
																-uint32_t _starpu_worker_exists(uint32_t task_mask);
															
 
																+uint32_t _starpu_worker_exists(struct starpu_task *);
															
 
																 /* Is there a worker that can execute CUDA code ? */
															
 
																-uint32_t _starpu_may_submit_cuda_task(void);
															
 
																+uint32_t _starpu_can_submit_cuda_task(void);
															
 
																 /* Is there a worker that can execute CPU code ? */
															
 
																-uint32_t _starpu_may_submit_cpu_task(void);
															
 
																+uint32_t _starpu_can_submit_cpu_task(void);
															
 
																 /* Is there a worker that can execute OpenCL code ? */
															
 
																-uint32_t _starpu_may_submit_opencl_task(void);
															
 
																+uint32_t _starpu_can_submit_opencl_task(void);
															
 
																 /* Check whether there is anything that the worker should do instead of
															
 
																  * sleeping (waiting on something to happen). */
															
@@ -189,36 +193,37 @@ unsigned _starpu_worker_can_block(unsigned memnode);
 
																  * */
															
 
																 void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex);
															
 
																-/* The starpu_worker_s structure describes all the state of a StarPU worker.
															
 
																+/* The _starpu_worker structure describes all the state of a StarPU worker.
															
 
																  * This function sets the pthread key which stores a pointer to this structure.
															
 
																  * */
															
 
																-void _starpu_set_local_worker_key(struct starpu_worker_s *worker);
															
 
																+void _starpu_set_local_worker_key(struct _starpu_worker *worker);
															
 
																-/* Returns the starpu_worker_s structure that describes the state of the
															
 
																+/* Returns the _starpu_worker structure that describes the state of the
															
 
																  * current worker. */
															
 
																-struct starpu_worker_s *_starpu_get_local_worker_key(void);
															
 
																+struct _starpu_worker *_starpu_get_local_worker_key(void);
															
 
																-/* Returns the starpu_worker_s structure that describes the state of the
															
 
																+/* Returns the _starpu_worker structure that describes the state of the
															
 
																  * specified worker. */
															
 
																-struct starpu_worker_s *_starpu_get_worker_struct(unsigned id);
															
 
																+struct _starpu_worker *_starpu_get_worker_struct(unsigned id);
															
 
																 /* Returns the starpu_sched_ctx structure that descriebes the state of the 
															
 
																  * specified ctx */
															
 
																 struct starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id);
															
 
																+struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id);
															
 
																-struct starpu_combined_worker_s *_starpu_get_combined_worker_struct(unsigned id);
															
 
																+int _starpu_is_initialized(void);
															
 
																 /* Returns the structure that describes the overall machine configuration (eg.
															
 
																  * all workers and topology). */
															
 
																-struct starpu_machine_config_s *_starpu_get_machine_config(void);
															
 
																+struct _starpu_machine_config *_starpu_get_machine_config(void);
															
 
																 /* Retrieve the status which indicates what the worker is currently doing. */
															
 
																-starpu_worker_status _starpu_worker_get_status(int workerid);
															
 
																+enum _starpu_worker_status _starpu_worker_get_status(int workerid);
															
 
																 /* Change the status of the worker which indicates what the worker is currently
															
 
																  * doing (eg. executing a callback). */
															
 
																-void _starpu_worker_set_status(int workerid, starpu_worker_status status);
															
 
																+void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status);
															
 
																 /* TODO move */
															
 
																 unsigned _starpu_execute_registered_progression_hooks(void);