14 years ago · 3ad31951c8
--- a/src/common/barrier.c
+++ b/src/common/barrier.c
@@ -31,12 +31,13 @@ int _starpu_barrier_init(struct _starpu_barrier *barrier, int count)
 
				 static
			
 
				 int _starpu_barrier_test(struct _starpu_barrier *barrier)
			
 
				 {
			
 
				-    /*
			
 
				-     * Check whether any threads are known to be waiting; report
			
 
				-     * "BUSY" if so.
			
 
				-     */
			
 
				+	/*
			
 
				+	 * Check whether any threads are known to be waiting; report
			
 
				+	 * "BUSY" if so.
			
 
				+	 */
			
 
				         _STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit);
			
 
				-        if (barrier->reached_exit != barrier->count) {
			
 
				+        if (barrier->reached_exit != barrier->count)
			
 
				+	{
			
 
				                 _STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
			
 
				                 return EBUSY;
			
 
				         }
			
@@ -47,7 +48,8 @@ int _starpu_barrier_test(struct _starpu_barrier *barrier)
 
				 int _starpu_barrier_destroy(struct _starpu_barrier *barrier)
			
 
				 {
			
 
				 	int ret = _starpu_barrier_test(barrier);
			
 
				-	while (ret == EBUSY) {
			
 
				+	while (ret == EBUSY)
			
 
				+	{
			
 
				 		ret = _starpu_barrier_test(barrier);
			
 
				 	}
			
 
				 	_STARPU_DEBUG("reached_exit %d\n", barrier->reached_exit);
			
--- a/src/common/barrier.h
+++ b/src/common/barrier.h
@@ -19,7 +19,8 @@
 
				 
			
 
				 #include <pthread.h>
			
 
				 
			
 
				-struct _starpu_barrier {
			
 
				+struct _starpu_barrier
			
 
				+{
			
 
				 	int count;
			
 
				 	int reached_start;
			
 
				 	int reached_exit;
			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -27,17 +27,17 @@
 
				 #ifdef STARPU_HAVE_WINDOWS
			
 
				 #include <windows.h>
			
 
				 #endif
			
 
				-		
			
 
				-#define PROF_BUFFER_SIZE  (8*1024*1024)
			
 
				 
			
 
				-static char PROF_FILE_USER[128];
			
 
				-static int fxt_started = 0;
			
 
				+#define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)
			
 
				 
			
 
				-static int written = 0;
			
 
				+static char _STARPU_PROF_FILE_USER[128];
			
 
				+static int _starpu_fxt_started = 0;
			
 
				 
			
 
				-static int id;
			
 
				+static int _starpu_written = 0;
			
 
				 
			
 
				-static void _profile_set_tracefile(void *last, ...)
			
 
				+static int _starpu_id;
			
 
				+
			
 
				+static void _starpu_profile_set_tracefile(void *last, ...)
			
 
				 {
			
 
				 	va_list vl;
			
 
				 	char *user;
			
@@ -47,7 +47,7 @@ static void _profile_set_tracefile(void *last, ...)
 
				 			fxt_prefix = "/tmp/";
			
 
				 
			
 
				 	va_start(vl, last);
			
 
				-	vsprintf(PROF_FILE_USER, fxt_prefix, vl);
			
 
				+	vsprintf(_STARPU_PROF_FILE_USER, fxt_prefix, vl);
			
 
				 	va_end(vl);
			
 
				 
			
 
				 	user = getenv("USER");
			
@@ -55,31 +55,34 @@ static void _profile_set_tracefile(void *last, ...)
 
				 		user = "";
			
 
				 
			
 
				 	char suffix[128];
			
 
				-	snprintf(suffix, 128, "prof_file_%s_%d", user, id);
			
 
				+	snprintf(suffix, 128, "prof_file_%s_%d", user, _starpu_id);
			
 
				 
			
 
				-	strcat(PROF_FILE_USER, suffix);
			
 
				+	strcat(_STARPU_PROF_FILE_USER, suffix);
			
 
				 }
			
 
				 
			
 
				-void starpu_set_profiling_id(int new_id) {
			
 
				+void starpu_set_profiling_id(int new_id)
			
 
				+{
			
 
				         _STARPU_DEBUG("Set id to <%d>\n", new_id);
			
 
				-	id = new_id;
			
 
				-        _profile_set_tracefile(NULL);
			
 
				+	_starpu_id = new_id;
			
 
				+        _starpu_profile_set_tracefile(NULL);
			
 
				 }
			
 
				 
			
 
				 void _starpu_start_fxt_profiling(void)
			
 
				 {
			
 
				 	unsigned threadid;
			
 
				 
			
 
				-	if (!fxt_started) {
			
 
				-		fxt_started = 1;
			
 
				-		_profile_set_tracefile(NULL);
			
 
				+	if (!_starpu_fxt_started)
			
 
				+	{
			
 
				+		_starpu_fxt_started = 1;
			
 
				+		_starpu_profile_set_tracefile(NULL);
			
 
				 	}
			
 
				 
			
 
				 	threadid = syscall(SYS_gettid);
			
 
				 
			
 
				 	atexit(_starpu_stop_fxt_profiling);
			
 
				 
			
 
				-	if(fut_setup(PROF_BUFFER_SIZE, FUT_KEYMASKALL, threadid) < 0) {
			
 
				+	if (fut_setup(_STARPU_PROF_BUFFER_SIZE, FUT_KEYMASKALL, threadid) < 0)
			
 
				+	{
			
 
				 		perror("fut_setup");
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
@@ -89,7 +92,7 @@ void _starpu_start_fxt_profiling(void)
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-static void generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
			
 
				+static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
			
 
				 {
			
 
				 	/* We take default options */
			
 
				 	struct starpu_fxt_options options;
			
@@ -108,19 +111,19 @@ static void generate_paje_trace(char *input_fxt_filename, char *output_paje_file
 
				 
			
 
				 void _starpu_stop_fxt_profiling(void)
			
 
				 {
			
 
				-	if (!written)
			
 
				+	if (!_starpu_written)
			
 
				 	{
			
 
				 #ifdef STARPU_VERBOSE
			
 
				 	        char hostname[128];
			
 
				 		gethostname(hostname, 128);
			
 
				-		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, PROF_FILE_USER);
			
 
				+		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, _STARPU_PROF_FILE_USER);
			
 
				 #endif
			
 
				-		fut_endup(PROF_FILE_USER);
			
 
				+		fut_endup(_STARPU_PROF_FILE_USER);
			
 
				 
			
 
				 		/* Should we generate a Paje trace directly ? */
			
 
				 		int generate_trace = starpu_get_env_number("STARPU_GENERATE_TRACE");
			
 
				 		if (generate_trace == 1)
			
 
				-			generate_paje_trace(PROF_FILE_USER, "paje.trace");
			
 
				+			_starpu_generate_paje_trace(_STARPU_PROF_FILE_USER, "paje.trace");
			
 
				 
			
 
				 		int ret = fut_done();
			
 
				 		if (ret < 0)
			
@@ -130,7 +133,7 @@ void _starpu_stop_fxt_profiling(void)
 
				 			fprintf(stderr, "Warning: the FxT trace could not be generated properly\n");
			
 
				 		}
			
 
				 
			
 
				-		written = 1;
			
 
				+		_starpu_written = 1;
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/common/hash.c
+++ b/src/common/hash.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,16 +19,16 @@
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
 
				 
			
 
				-#define CRC32C_POLY_BE 0x1EDC6F41
			
 
				+#define _STARPU_CRC32C_POLY_BE 0x1EDC6F41
			
 
				 
			
 
				-static inline uint32_t __attribute__ ((pure)) crc32_be_8(uint8_t inputbyte, uint32_t inputcrc)
			
 
				+static inline uint32_t __attribute__ ((pure)) _starpu_crc32_be_8(uint8_t inputbyte, uint32_t inputcrc)
			
 
				 {
			
 
				 	unsigned i;
			
 
				 	uint32_t crc;
			
 
				 
			
 
				 	crc = inputcrc ^ (inputbyte << 24);
			
 
				 	for (i = 0; i < 8; i++)
			
 
				-		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32C_POLY_BE : 0);
			
 
				+		crc = (crc << 1) ^ ((crc & 0x80000000) ? _STARPU_CRC32C_POLY_BE : 0);
			
 
				 
			
 
				 	return crc;
			
 
				 }
			
@@ -39,10 +39,10 @@ uint32_t _starpu_crc32_be(uint32_t input, uint32_t inputcrc)
 
				 
			
 
				 	uint32_t crc = inputcrc;
			
 
				 
			
 
				-	crc = crc32_be_8(p[0], crc);
			
 
				-	crc = crc32_be_8(p[1], crc);
			
 
				-	crc = crc32_be_8(p[2], crc);
			
 
				-	crc = crc32_be_8(p[3], crc);
			
 
				+	crc = _starpu_crc32_be_8(p[0], crc);
			
 
				+	crc = _starpu_crc32_be_8(p[1], crc);
			
 
				+	crc = _starpu_crc32_be_8(p[2], crc);
			
 
				+	crc = _starpu_crc32_be_8(p[3], crc);
			
 
				 
			
 
				 	return crc;
			
 
				 }
			
@@ -56,7 +56,7 @@ uint32_t _starpu_crc32_string(char *str, uint32_t inputcrc)
 
				 	unsigned i;
			
 
				 	for (i = 0; i < len; i++)
			
 
				 	{
			
 
				-		hash = crc32_be_8((uint8_t)str[i], hash);
			
 
				+		hash = _starpu_crc32_be_8((uint8_t)str[i], hash);
			
 
				 	}
			
 
				 
			
 
				 	return hash;
			
--- a/src/common/htable32.c
+++ b/src/common/htable32.c
@@ -33,21 +33,20 @@ void *_starpu_htbl_search_32(struct starpu_htbl32_node *htbl, uint32_t key)
 
				 
			
 
				 	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
			
 
				 	{
			
 
				-	
			
 
				-	//	printf("search : current bit = %d \n", currentbit);
			
 
				+		//	printf("search : current bit = %d \n", currentbit);
			
 
				 		if (STARPU_UNLIKELY(current_htbl == NULL))
			
 
				 			return NULL;
			
 
				 
			
 
				-		/* 0000000000001111 
			
 
				+		/* 0000000000001111
			
 
				 		 *     | currentbit
			
 
				 		 * 0000111100000000 = offloaded_mask
			
 
				 		 *         |last_currentbit
			
 
				 		 * */
			
 
				 
			
 
				-		unsigned last_currentbit = 
			
 
				+		unsigned last_currentbit =
			
 
				 			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
			
 
				 		uint32_t offloaded_mask = mask << last_currentbit;
			
 
				-		unsigned current_index = 
			
 
				+		unsigned current_index =
			
 
				 			(key & (offloaded_mask)) >> (last_currentbit);
			
 
				 
			
 
				 		current_htbl = current_htbl->children[current_index];
			
@@ -73,29 +72,30 @@ void *_starpu_htbl_insert_32(struct starpu_htbl32_node **htbl, uint32_t key, voi
 
				 	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
			
 
				 	{
			
 
				 		//printf("insert : current bit = %d \n", currentbit);
			
 
				-		if (*current_htbl_ptr == NULL) {
			
 
				+		if (*current_htbl_ptr == NULL)
			
 
				+		{
			
 
				 			/* TODO pad to change that 1 into 16 ? */
			
 
				 			*current_htbl_ptr = (struct starpu_htbl32_node*)calloc(sizeof(struct starpu_htbl32_node), 1);
			
 
				 			assert(*current_htbl_ptr);
			
 
				 		}
			
 
				 
			
 
				-		/* 0000000000001111 
			
 
				+		/* 0000000000001111
			
 
				 		 *     | currentbit
			
 
				 		 * 0000111100000000 = offloaded_mask
			
 
				 		 *         |last_currentbit
			
 
				 		 * */
			
 
				 
			
 
				-		unsigned last_currentbit = 
			
 
				+		unsigned last_currentbit =
			
 
				 			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
			
 
				 		uint32_t offloaded_mask = mask << last_currentbit;
			
 
				-		unsigned current_index = 
			
 
				+		unsigned current_index =
			
 
				 			(key & (offloaded_mask)) >> (last_currentbit);
			
 
				 
			
 
				-		current_htbl_ptr = 
			
 
				+		current_htbl_ptr =
			
 
				 			&((*current_htbl_ptr)->children[current_index]);
			
 
				 	}
			
 
				 
			
 
				-	/* current_htbl either contains NULL or a previous entry 
			
 
				+	/* current_htbl either contains NULL or a previous entry
			
 
				 	 * we overwrite it anyway */
			
 
				 	void *old_entry = *current_htbl_ptr;
			
 
				 	*current_htbl_ptr = (struct starpu_htbl32_node *) entry;
			
--- a/src/common/rwlock.c
+++ b/src/common/rwlock.c
@@ -17,16 +17,18 @@
 
				 
			
 
				 /**
			
 
				  * A dummy implementation of a rw_lock using spinlocks ...
			
 
				- */ 
			
 
				+ */
			
 
				 
			
 
				 #include "rwlock.h"
			
 
				 
			
 
				 static void _starpu_take_busy_lock(struct _starpu_rw_lock *lock)
			
 
				 {
			
 
				 	uint32_t prev;
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		prev = STARPU_TEST_AND_SET(&lock->busy, 1);
			
 
				-	} while (prev);
			
 
				+	}
			
 
				+	while (prev);
			
 
				 }
			
 
				 
			
 
				 static void _starpu_release_busy_lock(struct _starpu_rw_lock *lock)
			
@@ -47,14 +49,15 @@ void _starpu_init_rw_lock(struct _starpu_rw_lock *lock)
 
				 int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock)
			
 
				 {
			
 
				 	_starpu_take_busy_lock(lock);
			
 
				-	
			
 
				+
			
 
				 	if (lock->readercnt > 0 || lock->writer)
			
 
				 	{
			
 
				 		/* fail to take the lock */
			
 
				 		_starpu_release_busy_lock(lock);
			
 
				 		return -1;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		STARPU_ASSERT(lock->readercnt == 0);
			
 
				 		STARPU_ASSERT(lock->writer == 0);
			
 
				 
			
@@ -75,7 +78,8 @@ int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock)
 
				 		_starpu_release_busy_lock(lock);
			
 
				 		return -1;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		STARPU_ASSERT(lock->writer == 0);
			
 
				 
			
 
				 		/* no one is writing */
			
@@ -91,29 +95,33 @@ int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock)
 
				 
			
 
				 void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock)
			
 
				 {
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		_starpu_take_busy_lock(lock);
			
 
				-		
			
 
				+
			
 
				 		if (lock->readercnt > 0 || lock->writer)
			
 
				 		{
			
 
				 			/* fail to take the lock */
			
 
				 			_starpu_release_busy_lock(lock);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			STARPU_ASSERT(lock->readercnt == 0);
			
 
				 			STARPU_ASSERT(lock->writer == 0);
			
 
				-	
			
 
				+
			
 
				 			/* no one was either writing nor reading */
			
 
				 			lock->writer = 1;
			
 
				 			_starpu_release_busy_lock(lock);
			
 
				 			return;
			
 
				 		}
			
 
				-	} while (1);
			
 
				+	}
			
 
				+	while (1);
			
 
				 }
			
 
				 
			
 
				 void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
			
 
				 {
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		_starpu_take_busy_lock(lock);
			
 
				 
			
 
				 		if (lock->writer)
			
@@ -121,7 +129,8 @@ void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
 
				 			/* there is a writer ... */
			
 
				 			_starpu_release_busy_lock(lock);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			STARPU_ASSERT(lock->writer == 0);
			
 
				 
			
 
				 			/* no one is writing */
			
@@ -131,19 +140,21 @@ void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
 
				 
			
 
				 			return;
			
 
				 		}
			
 
				-	} while (1);
			
 
				+	}
			
 
				+	while (1);
			
 
				 }
			
 
				 
			
 
				 void _starpu_release_rw_lock(struct _starpu_rw_lock *lock)
			
 
				 {
			
 
				 	_starpu_take_busy_lock(lock);
			
 
				 	/* either writer or reader (exactly one !) */
			
 
				-	if (lock->writer) 
			
 
				+	if (lock->writer)
			
 
				 	{
			
 
				 		STARPU_ASSERT(lock->readercnt == 0);
			
 
				 		lock->writer = 0;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* reading mode */
			
 
				 		STARPU_ASSERT(lock->writer == 0);
			
 
				 		lock->readercnt--;
			
--- a/src/common/starpu_spinlock.c
+++ b/src/common/starpu_spinlock.c
@@ -74,9 +74,11 @@ int _starpu_spin_lock(struct _starpu_spinlock *lock)
 
				 	return ret;
			
 
				 #else
			
 
				 	uint32_t prev;
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		prev = STARPU_TEST_AND_SET(&lock->taken, 1);
			
 
				-	} while (prev);
			
 
				+	}
			
 
				+	while (prev);
			
 
				 	return 0;
			
 
				 #endif
			
 
				 #endif
			
--- a/src/common/starpu_spinlock.h
+++ b/src/common/starpu_spinlock.h
@@ -23,7 +23,8 @@
 
				 #include <common/utils.h>
			
 
				 #include <common/config.h>
			
 
				 
			
 
				-struct _starpu_spinlock {
			
 
				+struct _starpu_spinlock
			
 
				+{
			
 
				 #ifdef STARPU_SPINLOCK_CHECK
			
 
				 	pthread_mutexattr_t errcheck_attr;
			
 
				 	pthread_mutex_t errcheck_lock;
			
--- a/src/common/timing.c
+++ b/src/common/timing.c
@@ -34,26 +34,31 @@
 
				 #endif
			
 
				 #endif
			
 
				 
			
 
				-static struct timespec reference_start_time_ts;
			
 
				+static struct timespec _starpu_reference_start_time_ts;
			
 
				 
			
 
				 /* Modern CPUs' clocks are usually not synchronized so we use a monotonic clock
			
 
				  * to have consistent timing measurements. The CLOCK_MONOTONIC_RAW clock is not
			
 
				  * subject to NTP adjustments, but is not available on all systems (in that
			
 
				  * case we use the CLOCK_MONOTONIC clock instead). */
			
 
				-static void _starpu_clock_readtime(struct timespec *ts) {
			
 
				+static void _starpu_clock_readtime(struct timespec *ts)
			
 
				+{
			
 
				 #ifdef CLOCK_MONOTONIC_RAW
			
 
				 	static int raw_supported = 0;
			
 
				-	switch (raw_supported) {
			
 
				+	switch (raw_supported)
			
 
				+	{
			
 
				 	case -1:
			
 
				 		break;
			
 
				 	case 1:
			
 
				 		clock_gettime(CLOCK_MONOTONIC_RAW, ts);
			
 
				 		return;
			
 
				 	case 0:
			
 
				-		if (clock_gettime(CLOCK_MONOTONIC_RAW, ts)) {
			
 
				+		if (clock_gettime(CLOCK_MONOTONIC_RAW, ts))
			
 
				+		{
			
 
				 			raw_supported = -1;
			
 
				 			break;
			
 
				-		} else {
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				 			raw_supported = 1;
			
 
				 			return;
			
 
				 		}
			
@@ -64,7 +69,7 @@ static void _starpu_clock_readtime(struct timespec *ts) {
 
				 
			
 
				 void _starpu_timing_init(void)
			
 
				 {
			
 
				-	_starpu_clock_gettime(&reference_start_time_ts);
			
 
				+	_starpu_clock_gettime(&_starpu_reference_start_time_ts);
			
 
				 }
			
 
				 
			
 
				 void _starpu_clock_gettime(struct timespec *ts)
			
@@ -75,13 +80,13 @@ void _starpu_clock_gettime(struct timespec *ts)
 
				 	_starpu_clock_readtime(&absolute_ts);
			
 
				 
			
 
				 	/* Compute the relative time since initialization */
			
 
				-	starpu_timespec_sub(&absolute_ts, &reference_start_time_ts, ts);
			
 
				+	starpu_timespec_sub(&absolute_ts, &_starpu_reference_start_time_ts, ts);
			
 
				 }
			
 
				 
			
 
				 #else // !HAVE_CLOCK_GETTIME
			
 
				 
			
 
				 #if defined(__i386__) || defined(__pentium__) || defined(__pentiumpro__) || defined(__i586__) || defined(__i686__) || defined(__k6__) || defined(__k7__) || defined(__x86_64__)
			
 
				-typedef union starpu_u_tick
			
 
				+union starpu_u_tick
			
 
				 {
			
 
				   uint64_t tick;
			
 
				 
			
@@ -91,64 +96,64 @@ typedef union starpu_u_tick
 
				     uint32_t high;
			
 
				   }
			
 
				   sub;
			
 
				-} starpu_tick_t;
			
 
				+};
			
 
				 
			
 
				 #define STARPU_GET_TICK(t) __asm__ volatile("rdtsc" : "=a" ((t).sub.low), "=d" ((t).sub.high))
			
 
				 #define STARPU_TICK_RAW_DIFF(t1, t2) ((t2).tick - (t1).tick)
			
 
				 #define STARPU_TICK_DIFF(t1, t2) (STARPU_TICK_RAW_DIFF(t1, t2) - residual)
			
 
				 
			
 
				-static starpu_tick_t reference_start_tick;
			
 
				-static double scale = 0.0;
			
 
				-static unsigned long long residual = 0;
			
 
				+static union starpu_u_tick _starpu_reference_start_tick;
			
 
				+static double _starpu_scale = 0.0;
			
 
				+static unsigned long long _starpu_residual = 0;
			
 
				 
			
 
				-static int inited = 0;
			
 
				+static int _starpu_inited = 0;
			
 
				 
			
 
				 void _starpu_timing_init(void)
			
 
				 {
			
 
				-  static starpu_tick_t t1, t2;
			
 
				+  static union starpu_u_tick t1, t2;
			
 
				   int i;
			
 
				 
			
 
				-  if (inited) return;
			
 
				+  if (_starpu_inited) return;
			
 
				+
			
 
				+  _starpu_residual = (unsigned long long)1 << 63;
			
 
				 
			
 
				-  residual = (unsigned long long)1 << 63;
			
 
				-  
			
 
				   for(i = 0; i < 20; i++)
			
 
				     {
			
 
				       STARPU_GET_TICK(t1);
			
 
				       STARPU_GET_TICK(t2);
			
 
				-      residual = STARPU_MIN(residual, STARPU_TICK_RAW_DIFF(t1, t2));
			
 
				+      _starpu_residual = STARPU_MIN(_starpu_residual, STARPU_TICK_RAW_DIFF(t1, t2));
			
 
				     }
			
 
				-  
			
 
				+
			
 
				   {
			
 
				     struct timeval tv1,tv2;
			
 
				-    
			
 
				+
			
 
				     STARPU_GET_TICK(t1);
			
 
				     gettimeofday(&tv1,0);
			
 
				     usleep(500000);
			
 
				     STARPU_GET_TICK(t2);
			
 
				     gettimeofday(&tv2,0);
			
 
				-    scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
			
 
				-	     (tv1.tv_sec*1e6 + tv1.tv_usec)) / 
			
 
				+    _starpu_scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
			
 
				+		     (tv1.tv_sec*1e6 + tv1.tv_usec)) /
			
 
				       (double)(STARPU_TICK_DIFF(t1, t2));
			
 
				   }
			
 
				 
			
 
				-  STARPU_GET_TICK(reference_start_tick);
			
 
				+  STARPU_GET_TICK(_starpu_reference_start_tick);
			
 
				 
			
 
				-  inited = 1;
			
 
				+  _starpu_inited = 1;
			
 
				 }
			
 
				 
			
 
				 void _starpu_clock_gettime(struct timespec *ts)
			
 
				 {
			
 
				-	starpu_tick_t tick_now;
			
 
				+	union starpu_u_tick tick_now;
			
 
				 
			
 
				 	STARPU_GET_TICK(tick_now);
			
 
				 
			
 
				-	uint64_t elapsed_ticks = STARPU_TICK_DIFF(reference_start_tick, tick_now);
			
 
				+	uint64_t elapsed_ticks = STARPU_TICK_DIFF(_starpu_reference_start_tick, tick_now);
			
 
				 
			
 
				 	/* We convert this number into nano-seconds so that we can fill the
			
 
				 	 * timespec structure. */
			
 
				-	uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(scale*1000.0));
			
 
				-	
			
 
				+	uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(_starpu_scale*1000.0));
			
 
				+
			
 
				 	long tv_nsec = (elapsed_ns % 1000000000);
			
 
				 	time_t tv_sec = (elapsed_ns / 1000000000);
			
 
				 
			
@@ -173,7 +178,7 @@ void _starpu_clock_gettime(struct timespec *ts)
 
				 double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
			
 
				 {
			
 
				 	struct timespec diff;
			
 
				-	
			
 
				+
			
 
				 	starpu_timespec_sub(end, start, &diff);
			
 
				 
			
 
				 	double us = (diff.tv_sec*1e6) + (diff.tv_nsec*1e-3);
			
--- a/src/common/utils.c
+++ b/src/common/utils.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -59,9 +59,9 @@ int _starpu_mkpath(const char *s, mode_t mode)
 
				 
			
 
				 	if ((mkdir(path, mode) == -1) && (errno != EEXIST))
			
 
				 		rv = -1;
			
 
				-	else 
			
 
				+	else
			
 
				 		rv = 0;
			
 
				-	
			
 
				+
			
 
				 out:
			
 
				 	if (up)
			
 
				 		free(up);
			
--- a/src/core/combined_workers.c
+++ b/src/core/combined_workers.c
@@ -132,7 +132,8 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
				 			&config->workers[id].initial_cpu_set);
			
 
				 #else
			
 
				 		int j;
			
 
				-		for (j = 0; j < CPU_SETSIZE; j++) {
			
 
				+		for (j = 0; j < CPU_SETSIZE; j++)
			
 
				+		{
			
 
				 			if (CPU_ISSET(j, &config->workers[id].initial_cpu_set))
			
 
				 				CPU_SET(j, &combined_worker->cpu_set);
			
 
				 		}
			
--- a/src/core/debug.c
+++ b/src/core/debug.c
@@ -25,6 +25,7 @@ static pthread_mutex_t logfile_mutex = PTHREAD_MUTEX_INITIALIZER;
 
				 static FILE *logfile;
			
 
				 #endif
			
 
				 
			
 
				+/* Tell gdb whether FXT is compiled in or not */
			
 
				 int _starpu_use_fxt
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	= 1
			
@@ -36,7 +37,7 @@ void _starpu_open_debug_logfile(void)
 
				 #ifdef STARPU_VERBOSE
			
 
				 	/* what is  the name of the file ? default = "starpu.log" */
			
 
				 	char *logfile_name;
			
 
				-	
			
 
				+
			
 
				 	logfile_name = getenv("STARPU_LOGFILENAME");
			
 
				 	if (!logfile_name)
			
 
				 	{
			
--- a/src/core/debug.h
+++ b/src/core/debug.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
--- a/src/core/dependencies/cg.c
+++ b/src/core/dependencies/cg.c
@@ -44,7 +44,7 @@ void _starpu_cg_list_deinit(struct _starpu_cg_list *list)
 
				 		struct _starpu_cg *cg = list->succ[id];
			
 
				 
			
 
				 		/* We remove the reference on the completion group, and free it
			
 
				-		 * if there is no more reference. */		
			
 
				+		 * if there is no more reference. */
			
 
				 		unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1);
			
 
				 		if (ntags == 0)
			
 
				 			free(list->succ[id]);
			
@@ -72,7 +72,7 @@ void _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct
 
				 			successors->succ_list_size = 4;
			
 
				 
			
 
				 		/* NB: this is thread safe as the tag->lock is taken */
			
 
				-		successors->succ = (struct _starpu_cg **) realloc(successors->succ, 
			
 
				+		successors->succ = (struct _starpu_cg **) realloc(successors->succ,
			
 
				 			successors->succ_list_size*sizeof(struct _starpu_cg *));
			
 
				 	}
			
 
				 #else
			
@@ -86,7 +86,8 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 
				 	STARPU_ASSERT(cg);
			
 
				 	unsigned remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1);
			
 
				 
			
 
				-	if (remaining == 0) {
			
 
				+	if (remaining == 0)
			
 
				+	{
			
 
				 		cg->remaining = cg->ntags;
			
 
				 
			
 
				 		struct _starpu_tag *tag;
			
@@ -94,8 +95,10 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 
				 		struct _starpu_job *j;
			
 
				 
			
 
				 		/* the group is now completed */
			
 
				-		switch (cg->cg_type) {
			
 
				-			case STARPU_CG_APPS: {
			
 
				+		switch (cg->cg_type)
			
 
				+		{
			
 
				+			case STARPU_CG_APPS:
			
 
				+			{
			
 
				 				/* this is a cg for an application waiting on a set of
			
 
				 	 			 * tags, wake the thread */
			
 
				 				_STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex);
			
@@ -105,17 +108,19 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 
				 				break;
			
 
				 			}
			
 
				 
			
 
				-			case STARPU_CG_TAG: {
			
 
				+			case STARPU_CG_TAG:
			
 
				+			{
			
 
				 				tag = cg->succ.tag;
			
 
				 				tag_successors = &tag->tag_successors;
			
 
				-	
			
 
				+
			
 
				 				tag_successors->ndeps_completed++;
			
 
				 
			
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning FIXME: who locks this?
			
 
				 #endif
			
 
				 				if ((tag->state == STARPU_BLOCKED) &&
			
 
				-					(tag_successors->ndeps == tag_successors->ndeps_completed)) {
			
 
				+					(tag_successors->ndeps == tag_successors->ndeps_completed))
			
 
				+				{
			
 
				 					/* reset the counter so that we can reuse the completion group */
			
 
				 					tag_successors->ndeps_completed = 0;
			
 
				 					_starpu_tag_set_ready(tag);
			
@@ -123,7 +128,8 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 
				 				break;
			
 
				 			}
			
 
				 
			
 
				- 		        case STARPU_CG_TASK: {
			
 
				+ 		        case STARPU_CG_TASK:
			
 
				+			{
			
 
				 				j = cg->succ.job;
			
 
				 
			
 
				 				job_successors = &j->job_successors;
			
@@ -175,14 +181,14 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 
				 		{
			
 
				 			struct _starpu_job *j = cg->succ.job;
			
 
				 			_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				-		}			
			
 
				+		}
			
 
				 
			
 
				 		_starpu_notify_cg(cg);
			
 
				 
			
 
				 		if (cg_type == STARPU_CG_TASK)
			
 
				 		{
			
 
				 			struct _starpu_job *j = cg->succ.job;
			
 
				-			
			
 
				+
			
 
				 			/* In case this task was immediately terminated, since
			
 
				 			 * _starpu_notify_cg_list already hold the sync_mutex
			
 
				 			 * lock, it is its reponsability to destroy the task if
			
@@ -197,9 +203,10 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 
				 
			
 
				 			if (must_destroy_task)
			
 
				 				starpu_task_destroy(task);
			
 
				-		}			
			
 
				+		}
			
 
				 
			
 
				-		if (cg_type == STARPU_CG_APPS) {
			
 
				+		if (cg_type == STARPU_CG_APPS)
			
 
				+		{
			
 
				 			/* Remove the temporary ref to the cg */
			
 
				 			memmove(&successors->succ[succ], &successors->succ[succ+1], (nsuccs-(succ+1)) * sizeof(successors->succ[succ]));
			
 
				 			succ--;
			
--- a/src/core/dependencies/cg.h
+++ b/src/core/dependencies/cg.h
@@ -34,7 +34,8 @@
 
				 struct _starpu_job;
			
 
				 
			
 
				 /* Completion Group list */
			
 
				-struct _starpu_cg_list {
			
 
				+struct _starpu_cg_list
			
 
				+{
			
 
				 	unsigned nsuccs; /* how many successors ? */
			
 
				 	unsigned ndeps; /* how many deps ? */
			
 
				 	unsigned ndeps_completed; /* how many deps are done ? */
			
@@ -46,20 +47,23 @@ struct _starpu_cg_list {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-enum _starpu_cg_type {
			
 
				+enum _starpu_cg_type
			
 
				+{
			
 
				 	STARPU_CG_APPS=(1<<0),
			
 
				 	STARPU_CG_TAG=(1<<1),
			
 
				 	STARPU_CG_TASK=(1<<2)
			
 
				 };
			
 
				 
			
 
				 /* Completion Group */
			
 
				-struct _starpu_cg {
			
 
				+struct _starpu_cg
			
 
				+{
			
 
				 	unsigned ntags; /* number of tags depended on */
			
 
				 	unsigned remaining; /* number of remaining tags */
			
 
				 
			
 
				 	enum _starpu_cg_type cg_type;
			
 
				 
			
 
				-	union {
			
 
				+	union
			
 
				+	{
			
 
				 		/* STARPU_CG_TAG */
			
 
				 		struct _starpu_tag *tag;
			
 
				 
			
@@ -70,7 +74,8 @@ struct _starpu_cg {
 
				 		/* in case this completion group is related to an application,
			
 
				 		 * we have to explicitely wake the waiting thread instead of
			
 
				 		 * reschedule the corresponding task */
			
 
				-		struct {
			
 
				+		struct
			
 
				+		{
			
 
				 			unsigned completed;
			
 
				 			pthread_mutex_t cg_mutex;
			
 
				 			pthread_cond_t cg_cond;
			
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -39,7 +39,8 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 
				 	{
			
 
				 		req_list = handle->reduction_req_list;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		if (_starpu_data_requester_list_empty(handle->reduction_req_list))
			
 
				 			req_list = handle->req_list;
			
 
				 		else
			
@@ -63,7 +64,7 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 
				 	enum starpu_access_mode r_mode = r->mode;
			
 
				 	if (r_mode == STARPU_RW)
			
 
				 		r_mode = STARPU_W;
			
 
				-	
			
 
				+
			
 
				 	/* If this is a STARPU_R, STARPU_SCRATCH or STARPU_REDUX type of
			
 
				 	 * access, we only proceed if the cuurrent mode is the same as the
			
 
				 	 * requested mode. */
			
@@ -93,7 +94,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 		while (_starpu_spin_trylock(&handle->header_lock))
			
 
				 			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_starpu_spin_lock(&handle->header_lock);
			
 
				 	}
			
 
				 
			
@@ -131,7 +133,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 			 * the request if needed. */
			
 
				 			put_in_list = (handle->reduction_refcnt > 0);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			put_in_list = 0;
			
 
				 		}
			
 
				 	}
			
@@ -140,16 +143,16 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 	{
			
 
				 		/* there cannot be multiple writers or a new writer
			
 
				 		 * while the data is in read mode */
			
 
				-		
			
 
				+
			
 
				 		handle->busy_count++;
			
 
				 		/* enqueue the request */
			
 
				 		struct _starpu_data_requester *r = _starpu_data_requester_new();
			
 
				-			r->mode = mode;
			
 
				-			r->is_requested_by_codelet = request_from_codelet;
			
 
				-			r->j = j;
			
 
				-			r->buffer_index = buffer_index;
			
 
				-			r->ready_data_callback = callback;
			
 
				-			r->argcb = argcb;
			
 
				+		r->mode = mode;
			
 
				+		r->is_requested_by_codelet = request_from_codelet;
			
 
				+		r->j = j;
			
 
				+		r->buffer_index = buffer_index;
			
 
				+		r->ready_data_callback = callback;
			
 
				+		r->argcb = argcb;
			
 
				 
			
 
				 		/* We put the requester in a specific list if this is a reduction task */
			
 
				 		struct _starpu_data_requester_list *req_list =
			
@@ -160,7 +163,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 		/* failed */
			
 
				 		put_in_list = 1;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		handle->refcnt++;
			
 
				 		handle->busy_count++;
			
 
				 
			
@@ -178,9 +182,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 
			
 
				 }
			
 
				 
			
 
				-
			
 
				 unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_access_mode mode,
			
 
				-						void (*callback)(void *), void *argcb)
			
 
				+							  void (*callback)(void *), void *argcb)
			
 
				 {
			
 
				 	return _starpu_attempt_to_submit_data_request(0, handle, mode, callback, argcb, NULL, 0);
			
 
				 }
			
@@ -193,7 +196,6 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 
				 	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
			
 
				 
			
 
				 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
			
 
				-
			
 
				 }
			
 
				 
			
 
				 static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned start_buffer_index)
			
@@ -203,7 +205,8 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 
				 	unsigned nbuffers = j->task->cl->nbuffers;
			
 
				 	for (buf = start_buffer_index; buf < nbuffers; buf++)
			
 
				 	{
			
 
				-                if (attempt_to_submit_data_request_from_job(j, buf)) {
			
 
				+                if (attempt_to_submit_data_request_from_job(j, buf))
			
 
				+		{
			
 
				                         j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
			
 
				 			return 1;
			
 
				                 }
			
@@ -239,10 +242,8 @@ static unsigned unlock_one_requester(struct _starpu_data_requester *r)
 
				 	unsigned buffer_index = r->buffer_index;
			
 
				 
			
 
				 	if (buffer_index + 1 < nbuffers)
			
 
				-	{
			
 
				 		/* not all buffers are protected yet */
			
 
				 		return _submit_job_enforce_data_deps(j, buffer_index + 1);
			
 
				-	}
			
 
				 	else
			
 
				 		return 0;
			
 
				 }
			
@@ -279,7 +280,6 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 			_starpu_data_end_reduction_mode_terminate(handle);
			
 
				 	}
			
 
				 
			
 
				-
			
 
				 	struct _starpu_data_requester *r;
			
 
				 	while ((r = may_unlock_data_req_list_head(handle)))
			
 
				 	{
			
@@ -297,7 +297,8 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 			 * the request if needed. */
			
 
				 			put_in_list = (handle->reduction_refcnt > 0);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			put_in_list = 0;
			
 
				 		}
			
 
				 
			
@@ -307,12 +308,13 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 			 * perform a reduction before. */
			
 
				 			_starpu_data_requester_list_push_front(handle->req_list, r);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* The data is now attributed to that request so we put a
			
 
				 			 * reference on it. */
			
 
				 			handle->refcnt++;
			
 
				 			handle->busy_count++;
			
 
				-		
			
 
				+
			
 
				 			enum starpu_access_mode previous_mode = handle->current_mode;
			
 
				 			handle->current_mode = r_mode;
			
 
				 
			
@@ -339,7 +341,7 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 			}
			
 
				 
			
 
				 			_starpu_data_requester_delete(r);
			
 
				-			
			
 
				+
			
 
				 			_starpu_spin_lock(&handle->header_lock);
			
 
				 			STARPU_ASSERT(handle->busy_count > 0);
			
 
				 			handle->busy_count--;
			
--- a/src/core/dependencies/dependencies.c
+++ b/src/core/dependencies/dependencies.c
@@ -32,7 +32,7 @@ void _starpu_notify_dependencies(struct _starpu_job *j)
 
				 
			
 
				 	/* unlock tasks depending on that task */
			
 
				 	_starpu_notify_task_dependencies(j);
			
 
				-	
			
 
				+
			
 
				 	/* unlock tags depending on that task */
			
 
				 	if (j->task->use_tag)
			
 
				 		_starpu_notify_tag_dependencies(j->tag);
			
--- a/src/core/dependencies/htable.c
+++ b/src/core/dependencies/htable.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -28,21 +28,20 @@ void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 
			
 
				 	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
			
 
				 	{
			
 
				-	
			
 
				 	//	printf("search : current bit = %d \n", currentbit);
			
 
				 		if (STARPU_UNLIKELY(current_htbl == NULL))
			
 
				 			return NULL;
			
 
				 
			
 
				-		/* 0000000000001111 
			
 
				+		/* 0000000000001111
			
 
				 		 *     | currentbit
			
 
				 		 * 0000111100000000 = offloaded_mask
			
 
				 		 *         |last_currentbit
			
 
				 		 * */
			
 
				 
			
 
				-		unsigned last_currentbit = 
			
 
				+		unsigned last_currentbit =
			
 
				 			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
			
 
				 		starpu_tag_t offloaded_mask = mask << last_currentbit;
			
 
				-		unsigned current_index = 
			
 
				+		unsigned current_index =
			
 
				 			(tag & (offloaded_mask)) >> (last_currentbit);
			
 
				 
			
 
				 		current_htbl = current_htbl->children[current_index];
			
@@ -57,7 +56,6 @@ void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 
			
 
				 void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag, void *entry)
			
 
				 {
			
 
				-
			
 
				 	unsigned currentbit;
			
 
				 	struct _starpu_htbl_node **current_htbl_ptr = htbl;
			
 
				 	struct _starpu_htbl_node *previous_htbl_ptr = NULL;
			
@@ -67,7 +65,8 @@ void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag,
 
				 
			
 
				 	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
			
 
				 	{
			
 
				-		if (*current_htbl_ptr == NULL) {
			
 
				+		if (*current_htbl_ptr == NULL)
			
 
				+		{
			
 
				 			/* TODO pad to change that 1 into 16 ? */
			
 
				 			*current_htbl_ptr = (struct _starpu_htbl_node *) calloc(1, sizeof(struct _starpu_htbl_node));
			
 
				 			assert(*current_htbl_ptr);
			
@@ -76,25 +75,24 @@ void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag,
 
				 				previous_htbl_ptr->nentries++;
			
 
				 		}
			
 
				 
			
 
				-		/* 0000000000001111 
			
 
				+		/* 0000000000001111
			
 
				 		 *     | currentbit
			
 
				 		 * 0000111100000000 = offloaded_mask
			
 
				 		 *         |last_currentbit
			
 
				 		 * */
			
 
				 
			
 
				-		unsigned last_currentbit = 
			
 
				+		unsigned last_currentbit =
			
 
				 			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
			
 
				 		starpu_tag_t offloaded_mask = mask << last_currentbit;
			
 
				-		unsigned current_index = 
			
 
				+		unsigned current_index =
			
 
				 			(tag & (offloaded_mask)) >> (last_currentbit);
			
 
				 
			
 
				 		previous_htbl_ptr = *current_htbl_ptr;
			
 
				-		current_htbl_ptr = 
			
 
				+		current_htbl_ptr =
			
 
				 			&((*current_htbl_ptr)->children[current_index]);
			
 
				-
			
 
				 	}
			
 
				 
			
 
				-	/* current_htbl either contains NULL or a previous entry 
			
 
				+	/* current_htbl either contains NULL or a previous entry
			
 
				 	 * we overwrite it anyway */
			
 
				 	void *old_entry = *current_htbl_ptr;
			
 
				 	*current_htbl_ptr = (struct _starpu_htbl_node *) entry;
			
@@ -124,24 +122,25 @@ void *_starpu_htbl_remove_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 	{
			
 
				 		path[level] = current_htbl_ptr;
			
 
				 
			
 
				-		if (STARPU_UNLIKELY(!current_htbl_ptr)) {
			
 
				+		if (STARPU_UNLIKELY(!current_htbl_ptr))
			
 
				+		{
			
 
				 			tag_is_present = 0;
			
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		/* 0000000000001111 
			
 
				+		/* 0000000000001111
			
 
				 		 *     | currentbit
			
 
				 		 * 0000111100000000 = offloaded_mask
			
 
				 		 *         |last_currentbit
			
 
				 		 * */
			
 
				 
			
 
				-		unsigned last_currentbit = 
			
 
				+		unsigned last_currentbit =
			
 
				 			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
			
 
				 		starpu_tag_t offloaded_mask = mask << last_currentbit;
			
 
				-		unsigned current_index = 
			
 
				+		unsigned current_index =
			
 
				 			(tag & (offloaded_mask)) >> (last_currentbit);
			
 
				-		
			
 
				-		current_htbl_ptr = 
			
 
				+
			
 
				+		current_htbl_ptr =
			
 
				 			current_htbl_ptr->children[current_index];
			
 
				 	}
			
 
				 
			
@@ -151,8 +150,9 @@ void *_starpu_htbl_remove_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 
			
 
				 	void *old_entry = current_htbl_ptr;
			
 
				 
			
 
				-	if (tag_is_present) {
			
 
				-		/* the tag was in the htbl, so we have to unroll the search 
			
 
				+	if (tag_is_present)
			
 
				+	{
			
 
				+		/* the tag was in the htbl, so we have to unroll the search
			
 
				  		 * to remove possibly useless htbl (internal) nodes */
			
 
				 		for (level = maxlevel - 1; level >= 0; level--)
			
 
				 		{
			
--- a/src/core/dependencies/htable.h
+++ b/src/core/dependencies/htable.h
@@ -30,7 +30,8 @@
 
				 
			
 
				 #define _STARPU_HTBL_NODE_SIZE	16
			
 
				 
			
 
				-struct _starpu_htbl_node {
			
 
				+struct _starpu_htbl_node
			
 
				+{
			
 
				 	unsigned nentries;
			
 
				 	struct _starpu_htbl_node *children[1<<_STARPU_HTBL_NODE_SIZE];
			
 
				 };
			
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -163,7 +163,7 @@ static void _starpu_add_writer_after_writer(starpu_data_handle_t handle, struct
 
				 static void disable_last_writer_callback(void *cl_arg)
			
 
				 {
			
 
				 	starpu_data_handle_t handle = (starpu_data_handle_t) cl_arg;
			
 
				-	
			
 
				+
			
 
				 	/* NB: we don't take the handle->sequential_consistency_mutex mutex
			
 
				 	 * because the empty task that is used for synchronization is going to
			
 
				 	 * be unlock in the context of a call to
			
@@ -172,7 +172,6 @@ static void disable_last_writer_callback(void *cl_arg)
 
				 	handle->last_submitted_writer = NULL;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /* This function adds the implicit task dependencies introduced by data
			
 
				  * sequential consistency. Two tasks are provided: pre_sync and post_sync which
			
 
				  * respectively indicates which task is going to depend on the previous deps
			
@@ -196,8 +195,7 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
				 		 * they do not interfere with the application. */
			
 
				 		if (pre_sync_job->reduction_task || post_sync_job->reduction_task)
			
 
				 			return;
			
 
				-	
			
 
				-	
			
 
				+
			
 
				 		_STARPU_DEP_DEBUG("Tasks %p %p\n", pre_sync_task, post_sync_task);
			
 
				 		/* In case we are generating the DAG, we add an implicit
			
 
				 		 * dependency between the pre and the post sync tasks in case
			
@@ -213,7 +211,7 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
				 		}
			
 
				 
			
 
				 		enum starpu_access_mode previous_mode = handle->last_submitted_mode;
			
 
				-	
			
 
				+
			
 
				 		if (mode & STARPU_W)
			
 
				 		{
			
 
				 			_STARPU_DEP_DEBUG("W %p\n", handle);
			
@@ -222,17 +220,17 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
				 				_STARPU_DEP_DEBUG("WAW %p\n", handle);
			
 
				 				_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				/* The task submitted previously were in read-only
			
 
				 				 * mode: this task must depend on all those read-only
			
 
				 				 * tasks and we get rid of the list of readers */
			
 
				-			
			
 
				 				_STARPU_DEP_DEBUG("WAR %p\n", handle);
			
 
				 				_starpu_add_writer_after_readers(handle, pre_sync_task, post_sync_task);
			
 
				 			}
			
 
				-	
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			_STARPU_DEP_DEBUG("R %p %d -> %d\n", handle, previous_mode, mode);
			
 
				 			/* Add a reader, after a writer or a reader. */
			
 
				 			STARPU_ASSERT(pre_sync_task);
			
@@ -263,10 +261,8 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
				 
			
 
				 				starpu_task_submit(new_sync_task);
			
 
				 			}
			
 
				-	
			
 
				 			_starpu_add_reader_after_writer(handle, pre_sync_task, post_sync_task);
			
 
				 		}
			
 
				-	
			
 
				 		handle->last_submitted_mode = mode;
			
 
				 	}
			
 
				         _STARPU_LOG_OUT();
			
@@ -323,7 +319,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
				 		if (task == handle->last_submitted_writer)
			
 
				 		{
			
 
				 			handle->last_submitted_writer = NULL;
			
 
				-			
			
 
				+
			
 
				 #ifndef STARPU_USE_FXT
			
 
				 			if (_starpu_bound_recording)
			
 
				 #endif
			
@@ -333,9 +329,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
				 				struct _starpu_job *ghost_job = _starpu_get_job_associated_to_task(task);
			
 
				 				handle->last_submitted_ghost_writer_id = ghost_job->job_id;
			
 
				 			}
			
 
				-			
			
 
				 		}
			
 
				-		
			
 
				+
			
 
				 		/* XXX can a task be both the last writer associated to a data
			
 
				 		 * and be in its list of readers ? If not, we should not go
			
 
				 		 * through the entire list once we have detected it was the
			
@@ -364,7 +359,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
				 					struct _starpu_jobid_list *link = (struct _starpu_jobid_list *) malloc(sizeof(struct _starpu_jobid_list));
			
 
				 					STARPU_ASSERT(link);
			
 
				 					link->next = handle->last_submitted_ghost_readers_id;
			
 
				-					link->id = ghost_reader_job->job_id; 
			
 
				+					link->id = ghost_reader_job->job_id;
			
 
				 					handle->last_submitted_ghost_readers_id = link;
			
 
				 				}
			
 
				 
			
@@ -372,7 +367,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
				 				{
			
 
				 					prev->next = next;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					/* This is the first element of the list */
			
 
				 					handle->last_submitted_readers = next;
			
 
				 				}
			
@@ -383,7 +379,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
				 				 * as soon as we find the task. TODO: check how
			
 
				 				 * duplicate dependencies are treated. */
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				prev = l;
			
 
				 			}
			
 
				 
			
@@ -406,7 +403,7 @@ void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data
 
				 		struct _starpu_task_wrapper_list *link = (struct _starpu_task_wrapper_list *) malloc(sizeof(struct _starpu_task_wrapper_list));
			
 
				 		link->task = post_sync_task;
			
 
				 		link->next = handle->post_sync_tasks;
			
 
				-		handle->post_sync_tasks = link;		
			
 
				+		handle->post_sync_tasks = link;
			
 
				 	}
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
@@ -431,7 +428,6 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
 
				 			post_sync_tasks = handle->post_sync_tasks;
			
 
				 			handle->post_sync_tasks = NULL;
			
 
				 		}
			
 
				-
			
 
				 	}
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
@@ -440,7 +436,8 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
 
				 	{
			
 
				 		struct _starpu_task_wrapper_list *link = post_sync_tasks;
			
 
				 
			
 
				-		while (link) {
			
 
				+		while (link)
			
 
				+		{
			
 
				 			/* There is no need to depend on that task now, since it was already unlocked */
			
 
				 			_starpu_release_data_enforce_sequential_consistency(link->task, handle);
			
 
				 
			
@@ -478,7 +475,8 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_a
 
				 		STARPU_ASSERT(!ret);
			
 
				 		starpu_task_wait(sync_task);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				 	}
			
 
				 
			
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -44,7 +44,6 @@ static struct _starpu_cg *create_cg_apps(unsigned ntags)
 
				 	return cg;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
			
 
				 {
			
 
				 	struct _starpu_cg *cg = (struct _starpu_cg *) malloc(sizeof(struct _starpu_cg));
			
@@ -90,7 +89,8 @@ void starpu_tag_remove(starpu_tag_t id)
 
				 
			
 
				 	pthread_rwlock_unlock(&tag_global_rwlock);
			
 
				 
			
 
				-	if (tag) {
			
 
				+	if (tag)
			
 
				+	{
			
 
				 		_starpu_spin_lock(&tag->lock);
			
 
				 
			
 
				 		unsigned nsuccs = tag->tag_successors.nsuccs;
			
@@ -126,7 +126,8 @@ static struct _starpu_tag *gettag_struct(starpu_tag_t id)
 
				 	struct _starpu_tag *tag;
			
 
				 	tag = (struct _starpu_tag *) _starpu_htbl_search_tag(tag_htbl, id);
			
 
				 
			
 
				-	if (tag == NULL) {
			
 
				+	if (tag == NULL)
			
 
				+	{
			
 
				 		/* the tag does not exist yet : create an entry */
			
 
				 		tag = _starpu_tag_init(id);
			
 
				 
			
@@ -172,7 +173,8 @@ static void _starpu_tag_add_succ(struct _starpu_tag *tag, struct _starpu_cg *cg)
 
				 
			
 
				 	_starpu_add_successor_to_cg_list(&tag->tag_successors, cg);
			
 
				 
			
 
				-	if (tag->state == STARPU_DONE) {
			
 
				+	if (tag->state == STARPU_DONE)
			
 
				+	{
			
 
				 		/* the tag was already completed sooner */
			
 
				 		_starpu_notify_cg(cg);
			
 
				 	}
			
@@ -201,11 +203,11 @@ void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
 
				 {
			
 
				 	_STARPU_TRACE_TAG(id, job);
			
 
				 	job->task->use_tag = 1;
			
 
				-	
			
 
				+
			
 
				 	struct _starpu_tag *tag= gettag_struct(id);
			
 
				 	tag->job = job;
			
 
				 	tag->is_assigned = 1;
			
 
				-	
			
 
				+
			
 
				 	job->tag = tag;
			
 
				 
			
 
				 	/* the tag is now associated to a job */
			
@@ -226,11 +228,11 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 
				 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
			
 
				 
			
 
				 	STARPU_ASSERT(ndeps != 0);
			
 
				-	
			
 
				+
			
 
				 	for (i = 0; i < ndeps; i++)
			
 
				 	{
			
 
				 		starpu_tag_t dep_id = array[i];
			
 
				-		
			
 
				+
			
 
				 		/* id depends on dep_id
			
 
				 		 * so cg should be among dep_id's successors*/
			
 
				 		_STARPU_TRACE_TAG_DEPS(id, dep_id);
			
@@ -248,7 +250,7 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 
				 void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
			
 
				 {
			
 
				 	unsigned i;
			
 
				-	
			
 
				+
			
 
				 	/* create the associated completion group */
			
 
				 	struct _starpu_tag *tag_child = gettag_struct(id);
			
 
				 
			
@@ -257,14 +259,14 @@ void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
 
				 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
			
 
				 
			
 
				 	STARPU_ASSERT(ndeps != 0);
			
 
				-	
			
 
				+
			
 
				 	va_list pa;
			
 
				 	va_start(pa, ndeps);
			
 
				 	for (i = 0; i < ndeps; i++)
			
 
				 	{
			
 
				 		starpu_tag_t dep_id;
			
 
				 		dep_id = va_arg(pa, starpu_tag_t);
			
 
				-	
			
 
				+
			
 
				 		/* id depends on dep_id
			
 
				 		 * so cg should be among dep_id's successors*/
			
 
				 		_STARPU_TRACE_TAG_DEPS(id, dep_id);
			
@@ -291,7 +293,8 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
				 	_STARPU_LOG_IN();
			
 
				 
			
 
				 	/* It is forbidden to block within callbacks or codelets */
			
 
				-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
			
 
				+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				+	{
			
 
				 		_STARPU_LOG_OUT_TAG("edeadlk");
			
 
				 		return -EDEADLK;
			
 
				 	}
			
@@ -300,7 +303,7 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
				 	for (i = 0, current = 0; i < ntags; i++)
			
 
				 	{
			
 
				 		struct _starpu_tag *tag = gettag_struct(id[i]);
			
 
				-		
			
 
				+
			
 
				 		_starpu_spin_lock(&tag->lock);
			
 
				 
			
 
				 		if (tag->state == STARPU_DONE)
			
@@ -321,7 +324,7 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 
				 		_STARPU_LOG_OUT_TAG("all deps are already fulfilled");
			
 
				 		return 0;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/* there is at least one task that is not finished */
			
 
				 	struct _starpu_cg *cg = create_cg_apps(current);
			
 
				 
			
--- a/src/core/dependencies/tags.h
+++ b/src/core/dependencies/tags.h
@@ -25,7 +25,8 @@
 
				 
			
 
				 #define _STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
			
 
				 
			
 
				-enum _starpu_tag_state {
			
 
				+enum _starpu_tag_state
			
 
				+{
			
 
				 	/* this tag is not declared by any task */
			
 
				 	STARPU_INVALID_STATE,
			
 
				 	/* _starpu_tag_declare was called to associate the tag to a task */
			
@@ -44,7 +45,8 @@ enum _starpu_tag_state {
 
				 
			
 
				 struct _starpu_job;
			
 
				 
			
 
				-struct _starpu_tag {
			
 
				+struct _starpu_tag
			
 
				+{
			
 
				 	struct _starpu_spinlock lock;
			
 
				 	starpu_tag_t id; /* an identifier for the task */
			
 
				 	enum _starpu_tag_state state;
			
--- a/src/core/dependencies/task_deps.c
+++ b/src/core/dependencies/task_deps.c
@@ -48,7 +48,8 @@ static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg)
 
				 
			
 
				 	_starpu_add_successor_to_cg_list(&j->job_successors, cg);
			
 
				 
			
 
				-	if (j->terminated) {
			
 
				+	if (j->terminated)
			
 
				+	{
			
 
				 		/* the task was already completed sooner */
			
 
				 		_starpu_notify_cg(cg);
			
 
				 	}
			
@@ -90,6 +91,5 @@ void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, st
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex);
			
 
				 	}
			
 
				 
			
 
				-	
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex);
			
 
				 }
			
--- a/src/core/errorcheck.h
+++ b/src/core/errorcheck.h
@@ -21,7 +21,8 @@
 
				 #include <starpu.h>
			
 
				 
			
 
				 /* This type describes in which state a worker may be. */
			
 
				-enum _starpu_worker_status {
			
 
				+enum _starpu_worker_status
			
 
				+{
			
 
				 	/* invalid status (for instance if we request the status of some thread
			
 
				 	 * that is not controlled by StarPU */
			
 
				 	STATUS_INVALID,
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -157,7 +157,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 
				 	if (!job_is_already_locked)
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				 
			
 
				-	/* the callback is executed after the dependencies so that we may remove the tag 
			
 
				+	/* the callback is executed after the dependencies so that we may remove the tag
			
 
				  	 * of the task itself */
			
 
				 	if (task->callback_func)
			
 
				 	{
			
@@ -168,8 +168,8 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 
				 		/* so that we can check whether we are doing blocking calls
			
 
				 		 * within the callback */
			
 
				 		_starpu_set_local_worker_status(STATUS_CALLBACK);
			
 
				-		
			
 
				-		
			
 
				+
			
 
				+
			
 
				 		/* Perhaps we have nested callbacks (eg. with chains of empty
			
 
				 		 * tasks). So we store the current task and we will restore it
			
 
				 		 * later. */
			
@@ -180,7 +180,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 
				 		_STARPU_TRACE_START_CALLBACK(j);
			
 
				 		task->callback_func(task->callback_arg);
			
 
				 		_STARPU_TRACE_END_CALLBACK(j);
			
 
				-		
			
 
				+
			
 
				 		_starpu_set_current_task(current_task);
			
 
				 
			
 
				 		_starpu_set_local_worker_status(STATUS_UNKNOWN);
			
@@ -214,7 +214,8 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 
				 		if (!job_is_already_locked)
			
 
				 			_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* no one is going to synchronize with that task so we release
			
 
				 		 * the data structures now. In case the job was already locked
			
 
				 		 * by the caller, it is its responsability to destroy the task.
			
@@ -235,7 +236,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 
				 	_starpu_decrement_nready_tasks();
			
 
				 }
			
 
				 
			
 
				-/* This function is called when a new task is submitted to StarPU 
			
 
				+/* This function is called when a new task is submitted to StarPU
			
 
				  * it returns 1 if the tag deps are not fulfilled, 0 otherwise */
			
 
				 static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
			
 
				 {
			
@@ -259,7 +260,8 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
 
				                 j->task->status = STARPU_TASK_BLOCKED_ON_TAG;
			
 
				 		ret = 1;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* existing deps (if any) are fulfilled */
			
 
				 		tag->state = STARPU_READY;
			
 
				 		/* already prepare for next run */
			
@@ -281,14 +283,15 @@ static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j, u
 
				 	struct _starpu_cg_list *job_successors = &j->job_successors;
			
 
				 
			
 
				 	if (!job_is_already_locked)
			
 
				-		_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);	
			
 
				+		_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				 
			
 
				 	if (!j->submitted || (job_successors->ndeps != job_successors->ndeps_completed))
			
 
				 	{
			
 
				                 j->task->status = STARPU_TASK_BLOCKED_ON_TASK;
			
 
				 		ret = 1;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* existing deps (if any) are fulfilled */
			
 
				 		/* already prepare for next run */
			
 
				 		job_successors->ndeps_completed = 0;
			
@@ -301,8 +304,6 @@ static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j, u
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  *	In order, we enforce tag, task and data dependencies. The task is
			
 
				  *	passed to the scheduler only once all these constraints are fulfilled.
			
@@ -316,19 +317,22 @@ unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j, unsigned job_i
 
				         _STARPU_LOG_IN();
			
 
				 
			
 
				 	/* enfore tag dependencies */
			
 
				-	if (_starpu_not_all_tag_deps_are_fulfilled(j)) {
			
 
				+	if (_starpu_not_all_tag_deps_are_fulfilled(j))
			
 
				+	{
			
 
				                 _STARPU_LOG_OUT_TAG("not_all_tag_deps_are_fulfilled");
			
 
				 		return 0;
			
 
				         }
			
 
				 
			
 
				 	/* enfore task dependencies */
			
 
				-	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked)) {
			
 
				+	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked))
			
 
				+	{
			
 
				                 _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled");
			
 
				 		return 0;
			
 
				         }
			
 
				 
			
 
				 	/* enforce data dependencies */
			
 
				-	if (_starpu_submit_job_enforce_data_deps(j)) {
			
 
				+	if (_starpu_submit_job_enforce_data_deps(j))
			
 
				+	{
			
 
				                 _STARPU_LOG_OUT_TAG("enforce_data_deps");
			
 
				 		return 0;
			
 
				         }
			
@@ -403,7 +407,8 @@ const char *_starpu_get_model_name(struct _starpu_job *j)
 
				             && task->cl->model
			
 
				             && task->cl->model->symbol)
			
 
				                 return task->cl->model->symbol;
			
 
				-        else {
			
 
				+        else
			
 
				+	{
			
 
				 #ifdef STARPU_USE_FXT
			
 
				                 return j->model_name;
			
 
				 #else
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -30,7 +30,7 @@
 
				 #ifdef STARPU_HAVE_WINDOWS
			
 
				 #include <windows.h>
			
 
				 #endif
			
 
				-		
			
 
				+
			
 
				 /* This flag indicates whether performance models should be calibrated or not.
			
 
				  *	0: models need not be calibrated
			
 
				  *	1: models must be calibrated
			
@@ -72,7 +72,7 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, enum s
 
				 {
			
 
				 	double exp = -1.0;
			
 
				 	double (*per_arch_cost_model)(struct starpu_buffer_descr *);
			
 
				-	
			
 
				+
			
 
				 	per_arch_cost_model = model->per_arch[arch][nimpl].cost_model;
			
 
				 
			
 
				 	if (per_arch_cost_model)
			
@@ -99,7 +99,8 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 
				 	{
			
 
				 		return _STARPU_OPENCL_ALPHA;
			
 
				 	}
			
 
				-	else if (perf_archtype < STARPU_NARCH_VARIATIONS) {
			
 
				+	else if (perf_archtype < STARPU_NARCH_VARIATIONS)
			
 
				+	{
			
 
				 		/* Gordon value */
			
 
				 		return _STARPU_GORDON_ALPHA;
			
 
				 	}
			
@@ -115,7 +116,8 @@ static double common_task_expected_perf(struct starpu_perfmodel *model, enum sta
 
				 	double exp;
			
 
				 	double alpha;
			
 
				 
			
 
				-	if (model->cost_model) {
			
 
				+	if (model->cost_model)
			
 
				+	{
			
 
				 		exp = model->cost_model(task->buffers);
			
 
				 		alpha = starpu_worker_get_relative_speedup(arch);
			
 
				 
			
@@ -137,7 +139,8 @@ void _starpu_load_perfmodel(struct starpu_perfmodel *model)
 
				 	if (!load_model)
			
 
				 		return;
			
 
				 
			
 
				-	switch (model->type) {
			
 
				+	switch (model->type)
			
 
				+	{
			
 
				 		case STARPU_PER_ARCH:
			
 
				 		case STARPU_COMMON:
			
 
				 			break;
			
@@ -160,9 +163,11 @@ void _starpu_load_perfmodel(struct starpu_perfmodel *model)
 
				 
			
 
				 static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, enum starpu_perf_archtype arch,  unsigned nimpl)
			
 
				 {
			
 
				-	if (model) {
			
 
				+	if (model)
			
 
				+	{
			
 
				 		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				-		switch (model->type) {
			
 
				+		switch (model->type)
			
 
				+		{
			
 
				 			case STARPU_PER_ARCH:
			
 
				 
			
 
				 				return per_arch_task_expected_perf(model, arch, task, nimpl);
			
@@ -182,7 +187,7 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
				 
			
 
				 			default:
			
 
				 				STARPU_ABORT();
			
 
				-		};
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/* no model was found */
			
@@ -211,7 +216,7 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 
				 	/* If we don't need to read the content of the handle */
			
 
				 	if (!(mode & STARPU_R))
			
 
				 		return 0.0;
			
 
				-	
			
 
				+
			
 
				 	if (_starpu_is_data_present_or_requested(handle, memory_node))
			
 
				 		return 0.0;
			
 
				 
			
@@ -259,9 +264,8 @@ void _starpu_get_perf_model_dir(char *path, size_t maxlen)
 
				 	const char *home_path = getenv("HOME");
			
 
				 	if (!home_path)
			
 
				 		home_path = getenv("USERPROFILE");
			
 
				-	if (!home_path) {
			
 
				+	if (!home_path)
			
 
				 		_STARPU_ERROR("couldn't find a home place to put starpu data\n");
			
 
				-	}
			
 
				 	snprintf(path, maxlen, "%s/.starpu/sampling/", home_path);
			
 
				 #endif
			
 
				 }
			
@@ -294,8 +298,8 @@ void _starpu_create_sampling_directory_if_needed(void)
 
				 		/* The performance of the codelets are stored in
			
 
				 		 * $STARPU_PERF_MODEL_DIR/codelets/ while those of the bus are stored in
			
 
				 		 * $STARPU_PERF_MODEL_DIR/bus/ so that we don't have name collisions */
			
 
				-		
			
 
				-		/* Testing if a directory exists and creating it otherwise 
			
 
				+
			
 
				+		/* Testing if a directory exists and creating it otherwise
			
 
				 		   may not be safe: it is possible that the permission are
			
 
				 		   changed in between. Instead, we create it and check if
			
 
				 		   it already existed before */
			
@@ -305,13 +309,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 
				 		if (ret == -1)
			
 
				 		{
			
 
				 			STARPU_ASSERT(errno == EEXIST);
			
 
				-	
			
 
				+
			
 
				 			/* make sure that it is actually a directory */
			
 
				 			struct stat sb;
			
 
				 			stat(perf_model_dir, &sb);
			
 
				 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 		/* Per-task performance models */
			
 
				 		char perf_model_dir_codelets[256];
			
 
				 		_starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
			
@@ -320,13 +324,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 
				 		if (ret == -1)
			
 
				 		{
			
 
				 			STARPU_ASSERT(errno == EEXIST);
			
 
				-	
			
 
				+
			
 
				 			/* make sure that it is actually a directory */
			
 
				 			struct stat sb;
			
 
				 			stat(perf_model_dir_codelets, &sb);
			
 
				 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 		/* Performance of the memory subsystem */
			
 
				 		char perf_model_dir_bus[256];
			
 
				 		_starpu_get_perf_model_dir_bus(perf_model_dir_bus, 256);
			
@@ -335,13 +339,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 
				 		if (ret == -1)
			
 
				 		{
			
 
				 			STARPU_ASSERT(errno == EEXIST);
			
 
				-	
			
 
				+
			
 
				 			/* make sure that it is actually a directory */
			
 
				 			struct stat sb;
			
 
				 			stat(perf_model_dir_bus, &sb);
			
 
				 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 		/* Performance debug measurements */
			
 
				 		char perf_model_dir_debug[256];
			
 
				 		_starpu_get_perf_model_dir_debug(perf_model_dir_debug, 256);
			
@@ -350,13 +354,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 
				 		if (ret == -1)
			
 
				 		{
			
 
				 			STARPU_ASSERT(errno == EEXIST);
			
 
				-	
			
 
				+
			
 
				 			/* make sure that it is actually a directory */
			
 
				 			struct stat sb;
			
 
				 			stat(perf_model_dir_debug, &sb);
			
 
				 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 		directory_existence_was_tested = 1;
			
 
				 	}
			
 
				 }
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -33,7 +33,8 @@ struct _starpu_job;
 
				 enum starpu_perf_archtype;
			
 
				 
			
 
				 ///* File format */
			
 
				-//struct model_file_format {
			
 
				+//struct model_file_format
			
 
				+// {
			
 
				 //	unsigned ncore_entries;
			
 
				 //	unsigned ncuda_entries;
			
 
				 //	/* contains core entries, then cuda ones */
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -47,7 +47,8 @@
 
				 #define MAXCPUS	32
			
 
				 
			
 
				 /* timing is in µs per byte (i.e. slowness, inverse of bandwidth) */
			
 
				-struct dev_timing {
			
 
				+struct dev_timing
			
 
				+{
			
 
				 	int cpu_id;
			
 
				 	double timing_htod;
			
 
				 	double timing_dtoh;
			
@@ -785,12 +786,14 @@ static int load_bus_latency_file_content(void)
 
				 			double latency;
			
 
				 
			
 
				 			n = fscanf(f, "%lf", &latency);
			
 
				-			if (n != 1) {
			
 
				+			if (n != 1)
			
 
				+			{
			
 
				 				fclose(f);
			
 
				 				return 0;
			
 
				 			}
			
 
				 			n = getc(f);
			
 
				-			if (n != '\t') {
			
 
				+			if (n != '\t')
			
 
				+			{
			
 
				 				fclose(f);
			
 
				 				return 0;
			
 
				 			}
			
@@ -799,7 +802,8 @@ static int load_bus_latency_file_content(void)
 
				 		}
			
 
				 
			
 
				 		n = getc(f);
			
 
				-		if (n != '\n') {
			
 
				+		if (n != '\n')
			
 
				+		{
			
 
				 			fclose(f);
			
 
				 			return 0;
			
 
				 		}
			
@@ -852,7 +856,8 @@ static void write_bus_latency_file_content(void)
 
				 			{
			
 
				 				latency = 0.0;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				/* µs */
			
 
				                                 latency = ((src && dst)?2000.0:500.0);
			
 
				 			}
			
@@ -925,13 +930,15 @@ static int load_bus_bandwidth_file_content(void)
 
				 			double bandwidth;
			
 
				 
			
 
				 			n = fscanf(f, "%lf", &bandwidth);
			
 
				-			if (n != 1) {
			
 
				+			if (n != 1)
			
 
				+			{
			
 
				 				fprintf(stderr,"didn't get a number\n");
			
 
				 				fclose(f);
			
 
				 				return 0;
			
 
				 			}
			
 
				 			n = getc(f);
			
 
				-			if (n != '\t') {
			
 
				+			if (n != '\t')
			
 
				+			{
			
 
				 				fclose(f);
			
 
				 				return 0;
			
 
				 			}
			
@@ -940,7 +947,8 @@ static int load_bus_bandwidth_file_content(void)
 
				 		}
			
 
				 
			
 
				 		n = getc(f);
			
 
				-		if (n != '\n') {
			
 
				+		if (n != '\n')
			
 
				+		{
			
 
				 			fclose(f);
			
 
				 			return 0;
			
 
				 		}
			
@@ -1010,7 +1018,8 @@ static void write_bus_bandwidth_file_content(void)
 
				 				bandwidth = 1.0/slowness;
			
 
				 			}
			
 
				 #endif
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 			        /* convention */
			
 
				 			        bandwidth = 0.0;
			
 
				 			}
			
@@ -1094,12 +1103,14 @@ static void check_bus_config_file()
 
				 
			
 
				         get_config_path(path, 256);
			
 
				         res = access(path, F_OK);
			
 
				-        if (res) {
			
 
				+        if (res)
			
 
				+	{
			
 
				 		fprintf(stderr, "No performance model for the bus, calibrating...");
			
 
				 		starpu_force_bus_sampling();
			
 
				 		fprintf(stderr, "done\n");
			
 
				         }
			
 
				-        else {
			
 
				+        else
			
 
				+	{
			
 
				                 FILE *f;
			
 
				                 int ret, read_cuda, read_opencl;
			
 
				                 unsigned read_cpus;
			
@@ -1130,17 +1141,20 @@ static void check_bus_config_file()
 
				 #endif
			
 
				 
			
 
				                 // Checking if both configurations match
			
 
				-                if (read_cpus != ncpus) {
			
 
				+                if (read_cpus != ncpus)
			
 
				+		{
			
 
				 			fprintf(stderr, "Current configuration does not match the bus performance model (CPUS: (stored) %u != (current) %u), recalibrating...", read_cpus, ncpus);
			
 
				                         starpu_force_bus_sampling();
			
 
				 			fprintf(stderr, "done\n");
			
 
				                 }
			
 
				-                else if (read_cuda != ncuda) {
			
 
				+                else if (read_cuda != ncuda)
			
 
				+		{
			
 
				                         fprintf(stderr, "Current configuration does not match the bus performance model (CUDA: (stored) %d != (current) %d), recalibrating...", read_cuda, ncuda);
			
 
				                         starpu_force_bus_sampling();
			
 
				 			fprintf(stderr, "done\n");
			
 
				                 }
			
 
				-                else if (read_opencl != nopencl) {
			
 
				+                else if (read_opencl != nopencl)
			
 
				+		{
			
 
				                         fprintf(stderr, "Current configuration does not match the bus performance model (OpenCL: (stored) %d != (current) %d), recalibrating...", read_opencl, nopencl);
			
 
				                         starpu_force_bus_sampling();
			
 
				 			fprintf(stderr, "done\n");
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -44,8 +44,6 @@ static struct starpu_model_list *registered_models = NULL;
 
				 /*
			
 
				  * History based model
			
 
				  */
			
 
				-
			
 
				-
			
 
				 static void insert_history_entry(struct starpu_history_entry *entry, struct starpu_history_list **list, struct starpu_htbl32_node **history_ptr)
			
 
				 {
			
 
				 	struct starpu_history_list *link;
			
@@ -61,7 +59,6 @@ static void insert_history_entry(struct starpu_history_entry *entry, struct star
 
				 	STARPU_ASSERT(old == NULL);
			
 
				 }
			
 
				 
			
 
				-
			
 
				 static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
			
 
				 {
			
 
				 	struct starpu_per_arch_perfmodel *per_arch_model;
			
@@ -193,7 +190,8 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel
 
				 
			
 
				 	/* parse cpu entries */
			
 
				 	unsigned i;
			
 
				-	for (i = 0; i < nentries; i++) {
			
 
				+	for (i = 0; i < nentries; i++)
			
 
				+	{
			
 
				 		struct starpu_history_entry *entry = NULL;
			
 
				 		if (scan_history)
			
 
				 		{
			
@@ -202,7 +200,7 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel
 
				 		}
			
 
				 
			
 
				 		scan_history_entry(f, entry);
			
 
				-		
			
 
				+
			
 
				 		/* insert the entry in the hashtable and the list structures  */
			
 
				 		if (scan_history)
			
 
				 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
			
@@ -214,36 +212,44 @@ static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 
				 	struct starpu_per_arch_perfmodel dummy;
			
 
				 	int nimpls, implmax, skipimpl, impl;
			
 
				 	unsigned ret, arch;
			
 
				-	
			
 
				 
			
 
				-	for (arch = archmin; arch < archmax; arch++) {
			
 
				+	for (arch = archmin; arch < archmax; arch++)
			
 
				+	{
			
 
				 		_starpu_drop_comments(f);
			
 
				 		ret = fscanf(f, "%d\n", &nimpls);
			
 
				 		STARPU_ASSERT(ret == 1);
			
 
				 		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
			
 
				 		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
			
 
				-		for (impl = 0; impl < implmax; impl++) {
			
 
				+		for (impl = 0; impl < implmax; impl++)
			
 
				+		{
			
 
				 			parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
			
 
				 		}
			
 
				-		if (skipimpl > 0) {
			
 
				-			for (impl = 0; impl < skipimpl; impl++) {
			
 
				+		if (skipimpl > 0)
			
 
				+		{
			
 
				+			for (impl = 0; impl < skipimpl; impl++)
			
 
				+			{
			
 
				 				parse_per_arch_model_file(f, &dummy, 0);
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (skiparch > 0) {
			
 
				+	if (skiparch > 0)
			
 
				+	{
			
 
				 		_starpu_drop_comments(f);
			
 
				 		ret = fscanf(f, "%d\n", &nimpls);
			
 
				 		STARPU_ASSERT(ret == 1);
			
 
				 		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
			
 
				 		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
			
 
				-		for (arch = 0; arch < skiparch; arch ++) {
			
 
				-			for (impl = 0; impl < implmax; impl++) {
			
 
				+		for (arch = 0; arch < skiparch; arch ++)
			
 
				+		{
			
 
				+			for (impl = 0; impl < implmax; impl++)
			
 
				+			{
			
 
				 				parse_per_arch_model_file(f, &dummy, 0);
			
 
				 			}
			
 
				-			if (skipimpl > 0) {
			
 
				-				for (impl = 0; impl < skipimpl; impl++) {
			
 
				+			if (skipimpl > 0)
			
 
				+			{
			
 
				+				for (impl = 0; impl < skipimpl; impl++)
			
 
				+				{
			
 
				 					parse_per_arch_model_file(f, &dummy, 0);
			
 
				 				}
			
 
				 			}
			
@@ -270,9 +276,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
				 	if (narchs > 0)
			
 
				 	{
			
 
				 		parse_arch(f, model, scan_history,
			
 
				-				archmin,
			
 
				-				STARPU_MIN(narchs, STARPU_MAXCPUS),
			
 
				-				narchs - STARPU_MAXCPUS);
			
 
				+			   archmin,
			
 
				+			   STARPU_MIN(narchs, STARPU_MAXCPUS),
			
 
				+			   narchs - STARPU_MAXCPUS);
			
 
				 	}
			
 
				 
			
 
				 	/* Parsing CUDA devs */
			
@@ -284,9 +290,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
				 	if (narchs > 0)
			
 
				 	{
			
 
				 		parse_arch(f, model, scan_history,
			
 
				-				archmin,
			
 
				-				archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
			
 
				-				narchs - STARPU_MAXCUDADEVS);
			
 
				+			   archmin,
			
 
				+			   archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
			
 
				+			   narchs - STARPU_MAXCUDADEVS);
			
 
				 	}
			
 
				 
			
 
				 	/* Parsing OpenCL devs */
			
@@ -299,9 +305,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
				 	if (narchs > 0)
			
 
				 	{
			
 
				 		parse_arch(f, model, scan_history,
			
 
				-				archmin,
			
 
				-				archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
			
 
				-				narchs - STARPU_MAXOPENCLDEVS);
			
 
				+			   archmin,
			
 
				+			   archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
			
 
				+			   narchs - STARPU_MAXOPENCLDEVS);
			
 
				 	}
			
 
				 
			
 
				 	/* Parsing Gordon implementations */
			
@@ -314,9 +320,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
				 	if (narchs > 0)
			
 
				 	{
			
 
				 		parse_arch(f, model, scan_history,
			
 
				-				archmin,
			
 
				-				archmin + max_gordondevs,
			
 
				-				narchs - max_gordondevs);
			
 
				+			   archmin,
			
 
				+			   archmin + max_gordondevs,
			
 
				+			   narchs - max_gordondevs);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -334,7 +340,8 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 
				 	{
			
 
				 		/* Dump the list of all entries in the history */
			
 
				 		ptr = per_arch_model->list;
			
 
				-		while(ptr) {
			
 
				+		while(ptr)
			
 
				+		{
			
 
				 			nentries++;
			
 
				 			ptr = ptr->next;
			
 
				 		}
			
@@ -353,7 +360,8 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 
				 	{
			
 
				 		fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
			
 
				 		ptr = per_arch_model->list;
			
 
				-		while (ptr) {
			
 
				+		while (ptr)
			
 
				+		{
			
 
				 			dump_history_entry(f, ptr->entry);
			
 
				 			ptr = ptr->next;
			
 
				 		}
			
@@ -374,7 +382,8 @@ static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, uns
 
				 	{
			
 
				 		/* Dump the list of all entries in the history */
			
 
				 		ptr = per_arch_model->list;
			
 
				-		while(ptr) {
			
 
				+		while(ptr)
			
 
				+		{
			
 
				 			nentries++;
			
 
				 			ptr = ptr->next;
			
 
				 		}
			
@@ -403,21 +412,25 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 				break;
			
 
				 		}
			
 
				 
			
 
				-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED) {
			
 
				+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+		{
			
 
				 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 				if (get_n_entries(model, arch, nimpl))
			
 
				 				{
			
 
				 					number_of_archs[idx]++;
			
 
				 					break;
			
 
				 				}
			
 
				-		} else if (model->type == STARPU_REGRESSION_BASED) {
			
 
				+		}
			
 
				+		else if (model->type == STARPU_REGRESSION_BASED)
			
 
				+		{
			
 
				 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 				if (model->per_arch[arch][nimpl].regression.nsample)
			
 
				 				{
			
 
				 					number_of_archs[idx]++;
			
 
				 					break;
			
 
				 				}
			
 
				-		} else
			
 
				+		}
			
 
				+		else
			
 
				 			STARPU_ASSERT(!"Unknown history-based performance model");
			
 
				 	}
			
 
				 
			
@@ -464,15 +477,19 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 		}
			
 
				 
			
 
				 		unsigned max_impl = 0;
			
 
				-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED) {
			
 
				+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				+		{
			
 
				 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 				if (get_n_entries(model, arch, nimpl))
			
 
				 					max_impl = nimpl + 1;
			
 
				-		} else if (model->type == STARPU_REGRESSION_BASED) {
			
 
				+		}
			
 
				+		else if (model->type == STARPU_REGRESSION_BASED)
			
 
				+		{
			
 
				 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 				if (model->per_arch[arch][nimpl].regression.nsample)
			
 
				 					max_impl = nimpl + 1;
			
 
				-		} else
			
 
				+		}
			
 
				+		else
			
 
				 			STARPU_ASSERT(!"Unknown history-based performance model");
			
 
				 
			
 
				 		if (max_impl == 0)
			
@@ -514,7 +531,7 @@ static void get_model_debug_path(struct starpu_perfmodel *model, const char *arc
 
				 
			
 
				 	_starpu_get_perf_model_dir_debug(path, maxlen);
			
 
				 	strncat(path, model->symbol, maxlen);
			
 
				-	
			
 
				+
			
 
				 	char hostname[32];
			
 
				 	char *forced_hostname = getenv("STARPU_HOSTNAME");
			
 
				 	if (forced_hostname && forced_hostname[0])
			
@@ -535,7 +552,8 @@ int _starpu_register_model(struct starpu_perfmodel *model)
 
				 {
			
 
				 	/* If the model has already been loaded, there is nothing to do */
			
 
				 	_STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
			
 
				-	if (model->is_loaded) {
			
 
				+	if (model->is_loaded)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
			
 
				 		return 0;
			
 
				 	}
			
@@ -544,7 +562,8 @@ int _starpu_register_model(struct starpu_perfmodel *model)
 
				 	/* We have to make sure the model has not been loaded since the
			
 
				          * last time we took the lock */
			
 
				 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
			
 
				-	if (model->is_loaded) {
			
 
				+	if (model->is_loaded)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
			
 
				 		return 0;
			
 
				 	}
			
@@ -565,8 +584,10 @@ int _starpu_register_model(struct starpu_perfmodel *model)
 
				 	unsigned arch;
			
 
				 	unsigned nimpl;
			
 
				 
			
 
				-	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++) {
			
 
				-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
			
 
				+	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
			
 
				+	{
			
 
				+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+		{
			
 
				 			char debugpath[256];
			
 
				 			starpu_perfmodel_debugfilepath(model, arch, debugpath, 256, nimpl);
			
 
				 			model->per_arch[arch][nimpl].debug_file = fopen(debugpath, "a+");
			
@@ -583,7 +604,7 @@ static void get_model_path(struct starpu_perfmodel *model, char *path, size_t ma
 
				 {
			
 
				 	_starpu_get_perf_model_dir_codelets(path, maxlen);
			
 
				 	strncat(path, model->symbol, maxlen);
			
 
				-	
			
 
				+
			
 
				 	char hostname[32];
			
 
				 	char *forced_hostname = getenv("STARPU_HOSTNAME");
			
 
				 	if (forced_hostname && forced_hostname[0])
			
@@ -626,8 +647,9 @@ static void _starpu_dump_registered_models(void)
 
				 
			
 
				 	_STARPU_DEBUG("DUMP MODELS !\n");
			
 
				 
			
 
				-	while (node) {
			
 
				-		save_history_based_model(node->model);		
			
 
				+	while (node)
			
 
				+	{
			
 
				+		save_history_based_model(node->model);
			
 
				 		node = node->next;
			
 
				 
			
 
				 		/* XXX free node */
			
@@ -657,12 +679,11 @@ void _starpu_deinitialize_registered_performance_models(void)
 
				  * is still not loaded once we have the lock, we do load it.  */
			
 
				 void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
			
 
				 {
			
 
				-
			
 
				 	STARPU_ASSERT(model);
			
 
				 	STARPU_ASSERT(model->symbol);
			
 
				-	
			
 
				+
			
 
				 	int already_loaded;
			
 
				- 
			
 
				+
			
 
				 	_STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
			
 
				 	already_loaded = model->is_loaded;
			
 
				 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
			
@@ -681,7 +702,7 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
			
 
				 		return;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
			
 
				 
			
 
				 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
			
@@ -695,12 +716,13 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 
				 	_STARPU_DEBUG("Opening performance model file %s for model %s ... ", path, model->symbol);
			
 
				 
			
 
				 	unsigned calibrate_flag = _starpu_get_calibrate_flag();
			
 
				-	model->benchmarking = calibrate_flag; 
			
 
				-	
			
 
				+	model->benchmarking = calibrate_flag;
			
 
				+
			
 
				 	/* try to open an existing file and load it */
			
 
				 	int res;
			
 
				-	res = access(path, F_OK); 
			
 
				-	if (res == 0) {
			
 
				+	res = access(path, F_OK);
			
 
				+	if (res == 0)
			
 
				+	{
			
 
				 		if (calibrate_flag == 2)
			
 
				 		{
			
 
				 			/* The user specified that the performance model should
			
@@ -709,21 +731,24 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 
				                         _STARPU_DEBUG("Overwrite existing file\n");
			
 
				 			initialize_model(model);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* We load the available file */
			
 
				 			_STARPU_DEBUG("File exists\n");
			
 
				 			FILE *f;
			
 
				 			f = fopen(path, "r");
			
 
				 			STARPU_ASSERT(f);
			
 
				-	
			
 
				+
			
 
				 			parse_model_file(f, model, scan_history);
			
 
				-	
			
 
				+
			
 
				 			fclose(f);
			
 
				 		}
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_DEBUG("File does not exists\n");
			
 
				-		if (!calibrate_flag) {
			
 
				+		if (!calibrate_flag)
			
 
				+		{
			
 
				 			_STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
			
 
				 			_starpu_set_calibrate_flag(1);
			
 
				 			model->benchmarking = 1;
			
@@ -753,16 +778,19 @@ int starpu_list_models(FILE *output)
 
				 
			
 
				         strncpy(path, perf_model_dir_codelets, 256);
			
 
				         dp = opendir(path);
			
 
				-        if (dp != NULL) {
			
 
				-                while ((ep = readdir(dp))) {
			
 
				+        if (dp != NULL)
			
 
				+	{
			
 
				+                while ((ep = readdir(dp)))
			
 
				+		{
			
 
				                         if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
			
 
				                                 fprintf(output, "file: <%s>\n", ep->d_name);
			
 
				                 }
			
 
				                 closedir (dp);
			
 
				                 return 0;
			
 
				         }
			
 
				-        else {
			
 
				-                perror ("Couldn't open the directory");
			
 
				+        else
			
 
				+	{
			
 
				+                perror("Couldn't open the directory");
			
 
				                 return 1;
			
 
				         }
			
 
				 }
			
@@ -782,9 +810,11 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel *model
 
				 	/* does it exist ? */
			
 
				 	int res;
			
 
				 	res = access(path, F_OK);
			
 
				-	if (res) {
			
 
				+	if (res)
			
 
				+	{
			
 
				 		char *dot = strrchr(symbol, '.');
			
 
				-		if (dot) {
			
 
				+		if (dot)
			
 
				+		{
			
 
				 			char *symbol2 = strdup(symbol);
			
 
				 			symbol2[dot-symbol] = '\0';
			
 
				 			int ret;
			
@@ -846,7 +876,7 @@ void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archna
 
				 }
			
 
				 
			
 
				 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
			
 
				-		enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
			
 
				+				    enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
			
 
				 {
			
 
				 	char archname[32];
			
 
				 	starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
			
@@ -880,7 +910,8 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 
			
 
				 	if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
			
 
				 		exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		uint32_t key = _starpu_compute_buffers_footprint(j);
			
 
				 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
			
 
				 		struct starpu_htbl32_node *history = per_arch_model->history;
			
@@ -892,7 +923,8 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 
			
 
				 		if (entry && entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
			
 
				 			exp = entry->mean;
			
 
				-		else if (!model->benchmarking) {
			
 
				+		else if (!model->benchmarking)
			
 
				+		{
			
 
				 			_STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
			
 
				 			_starpu_set_calibrate_flag(1);
			
 
				 			model->benchmarking = 1;
			
@@ -929,7 +961,8 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 		/* Not calibrated enough */
			
 
				 		exp = -1.0;
			
 
				 
			
 
				-	if (exp == -1.0 && !model->benchmarking) {
			
 
				+	if (exp == -1.0 && !model->benchmarking)
			
 
				+	{
			
 
				 		_STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
			
 
				 		_starpu_set_calibrate_flag(1);
			
 
				 		model->benchmarking = 1;
			
@@ -948,14 +981,11 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 
			
 
				 		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				 		{
			
 
				-			uint32_t key = _starpu_compute_buffers_footprint(j);
			
 
				 			struct starpu_history_entry *entry;
			
 
				-
			
 
				 			struct starpu_htbl32_node *history;
			
 
				 			struct starpu_htbl32_node **history_ptr;
			
 
				-
			
 
				 			struct starpu_history_list **list;
			
 
				-
			
 
				+			uint32_t key = _starpu_compute_buffers_footprint(j);
			
 
				 
			
 
				 			history = per_arch_model->history;
			
 
				 			history_ptr = &per_arch_model->history;
			
@@ -982,7 +1012,8 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 				insert_history_entry(entry, list, history_ptr);
			
 
				 
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				/* there is already some entry with the same footprint */
			
 
				 				entry->sum += measured;
			
 
				 				entry->sum2 += measured*measured;
			
@@ -992,10 +1023,10 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 				entry->mean = entry->sum / n;
			
 
				 				entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
			
 
				 			}
			
 
				-			
			
 
				+
			
 
				 			STARPU_ASSERT(entry);
			
 
				 		}
			
 
				-			
			
 
				+
			
 
				 		if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				 		{
			
 
				 			struct starpu_regression_model *reg_model;
			
@@ -1018,7 +1049,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			reg_model->nsample++;
			
 
				 
			
 
				 			unsigned n = reg_model->nsample;
			
 
				-			
			
 
				+
			
 
				 			double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
			
 
				 			double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
			
 
				 
			
@@ -1035,12 +1066,12 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 
			
 
				 		if (!j->footprint_is_computed)
			
 
				 			(void) _starpu_compute_buffers_footprint(j);
			
 
				-			
			
 
				+
			
 
				 		STARPU_ASSERT(j->footprint_is_computed);
			
 
				 
			
 
				 		fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(j), measured, task->predicted, task->predicted_transfer, cpuid);
			
 
				 		unsigned i;
			
 
				-			
			
 
				+
			
 
				 		for (i = 0; i < task->cl->nbuffers; i++)
			
 
				 		{
			
 
				 			starpu_data_handle_t handle = task->buffers[i].handle;
			
@@ -1049,10 +1080,9 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 			STARPU_ASSERT(handle->ops->display);
			
 
				 			handle->ops->display(handle, debug_file);
			
 
				 		}
			
 
				-		fprintf(debug_file, "\n");	
			
 
				+		fprintf(debug_file, "\n");
			
 
				 
			
 
				 #endif
			
 
				-		
			
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
			
 
				 	}
			
 
				 }
			
--- a/src/core/perfmodel/regression.c
+++ b/src/core/perfmodel/regression.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -117,7 +117,8 @@ static unsigned find_list_size(struct starpu_history_list *list_history)
 
				 	unsigned cnt = 0;
			
 
				 
			
 
				 	struct starpu_history_list *ptr = list_history;
			
 
				-	while (ptr) {
			
 
				+	while (ptr)
			
 
				+	{
			
 
				 		cnt++;
			
 
				 		ptr = ptr->next;
			
 
				 	}
			
@@ -143,7 +144,8 @@ static void dump_list(unsigned *x, double *y, struct starpu_history_list *list_h
 
				 	struct starpu_history_list *ptr = list_history;
			
 
				 	unsigned i = 0;
			
 
				 
			
 
				-	while (ptr) {
			
 
				+	while (ptr)
			
 
				+	{
			
 
				 		x[i] = ptr->entry->size;
			
 
				 		y[i] = ptr->entry->mean;
			
 
				 
			
@@ -153,7 +155,7 @@ static void dump_list(unsigned *x, double *y, struct starpu_history_list *list_h
 
				 }
			
 
				 
			
 
				 
			
 
				-/* y = ax^b + c 
			
 
				+/* y = ax^b + c
			
 
				  * 	return 0 if success, -1 otherwise
			
 
				  * 	if success, a, b and c are modified
			
 
				  * */
			
@@ -171,7 +173,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
				 
			
 
				 	double cmin = 0.0;
			
 
				 	double cmax = find_list_min(y, n);
			
 
				-	
			
 
				+
			
 
				 	unsigned iter;
			
 
				 
			
 
				 	double err = 100000.0;
			
@@ -180,7 +182,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
				 	{
			
 
				 		double c1, c2;
			
 
				 		double r1, r2;
			
 
				-		
			
 
				+
			
 
				 		double radius = 0.01;
			
 
				 
			
 
				 		c1 = cmin + (0.5-radius)*(cmax - cmin);
			
@@ -197,7 +199,8 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
				 		{
			
 
				 			cmax = (cmin + cmax)/2;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* 2 is better */
			
 
				 			cmin = (cmin + cmax)/2;
			
 
				 		}
			
@@ -210,7 +213,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
				 
			
 
				 	*c = (cmin + cmax)/2;
			
 
				 
			
 
				-	*b = compute_b(*c, n, x, y); 
			
 
				+	*b = compute_b(*c, n, x, y);
			
 
				 	*a = exp(compute_a(*c, *b, n, x, y));
			
 
				 
			
 
				 	free(x);
			
@@ -218,4 +221,3 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-
			
--- a/src/core/progress_hook.c
+++ b/src/core/progress_hook.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -21,7 +21,8 @@
 
				 
			
 
				 #define NMAXHOOKS	16
			
 
				 
			
 
				-struct progression_hook {
			
 
				+struct progression_hook
			
 
				+{
			
 
				 	unsigned (*func)(void *arg);
			
 
				 	void *arg;
			
 
				 	unsigned active;
			
@@ -48,7 +49,7 @@ int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
 
				 			active_hook_cnt++;
			
 
				 
			
 
				 			_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
			
 
				-			
			
 
				+
			
 
				 			return hook;
			
 
				 		}
			
 
				 	}
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -49,7 +49,8 @@ extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy;
 
				 extern struct starpu_sched_policy _starpu_sched_pgreedy_policy;
			
 
				 extern struct starpu_sched_policy heft_policy;
			
 
				 
			
 
				-static struct starpu_sched_policy *predefined_policies[] = {
			
 
				+static struct starpu_sched_policy *predefined_policies[] =
			
 
				+{
			
 
				 	&_starpu_sched_ws_policy,
			
 
				 	&_starpu_sched_prio_policy,
			
 
				 	&_starpu_sched_dm_policy,
			
@@ -98,7 +99,6 @@ static void load_sched_policy(struct starpu_sched_policy *sched_policy)
 
				 
			
 
				 static struct starpu_sched_policy *find_sched_policy_from_name(const char *policy_name)
			
 
				 {
			
 
				-
			
 
				 	if (!policy_name)
			
 
				 		return NULL;
			
 
				 
			
@@ -109,7 +109,8 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 
				 		p = predefined_policies[i];
			
 
				 		if (p->policy_name)
			
 
				 		{
			
 
				-			if (strcmp(policy_name, p->policy_name) == 0) {
			
 
				+			if (strcmp(policy_name, p->policy_name) == 0)
			
 
				+			{
			
 
				 				/* we found a policy with the requested name */
			
 
				 				return p;
			
 
				 			}
			
@@ -124,7 +125,8 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 
				 static void display_sched_help_message(void)
			
 
				 {
			
 
				 	const char *sched_env = getenv("STARPU_SCHED");
			
 
				-	if (sched_env && (strcmp(sched_env, "help") == 0)) {
			
 
				+	if (sched_env && (strcmp(sched_env, "help") == 0))
			
 
				+	{
			
 
				 		fprintf(stderr, "STARPU_SCHED can be either of\n");
			
 
				 
			
 
				 		/* display the description of all predefined policies */
			
@@ -153,7 +155,8 @@ static struct starpu_sched_policy *select_sched_policy(struct _starpu_machine_co
 
				 	{
			
 
				 		sched_pol_name = user_conf->sched_policy_name;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		sched_pol_name = getenv("STARPU_SCHED");
			
 
				 	}
			
 
				 
			
@@ -184,7 +187,8 @@ void _starpu_init_sched_policy(struct _starpu_machine_config *config)
 
				 	{
			
 
				 		do_calibrate = config->user_conf->calibrate;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		int res = starpu_get_env_number("STARPU_CALIBRATE");
			
 
				 		do_calibrate =  (res < 0)?0:(unsigned)res;
			
 
				 	}
			
@@ -215,7 +219,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 	/* Is this a basic worker or a combined worker ? */
			
 
				 	int is_basic_worker = (workerid < nbasic_workers);
			
 
				 
			
 
				-	unsigned memory_node; 
			
 
				+	unsigned memory_node;
			
 
				 	struct _starpu_worker *worker = NULL;
			
 
				 	struct _starpu_combined_worker *combined_worker = NULL;
			
 
				 
			
@@ -240,7 +244,8 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 	{
			
 
				 		return _starpu_push_local_task(worker, task, 0);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* This is a combined worker so we create task aliases */
			
 
				 		int worker_size = combined_worker->worker_size;
			
 
				 		int *combined_workerid = combined_worker->combined_workerid;
			
@@ -292,7 +297,8 @@ int _starpu_push_task(struct _starpu_job *j, unsigned job_is_already_locked)
 
				 	{
			
 
				 		ret = _starpu_push_task_on_specific_worker(task, task->workerid);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		STARPU_ASSERT(policy.push_task);
			
 
				 		ret = policy.push_task(task);
			
 
				 	}
			
@@ -385,4 +391,3 @@ int starpu_push_local_task(int workerid, struct starpu_task *task, int back)
 
				 	return _starpu_push_local_task(worker, task, back);
			
 
				 }
			
 
				 
			
 
				-
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -138,18 +138,18 @@ void starpu_task_destroy(struct starpu_task *task)
 
				    /* If starpu_task_destroy is called in a callback, we just set the destroy
			
 
				       flag. The task will be destroyed after the callback returns */
			
 
				    if (task == starpu_get_current_task()
			
 
				-       && _starpu_get_local_worker_status() == STATUS_CALLBACK) {
			
 
				+       && _starpu_get_local_worker_status() == STATUS_CALLBACK)
			
 
				+   {
			
 
				 
			
 
				-      task->destroy = 1;
			
 
				+	   task->destroy = 1;
			
 
				 
			
 
				-   } else {
			
 
				-
			
 
				-      starpu_task_deinit(task);
			
 
				-
			
 
				-      /* TODO handle the case of task with detach = 1 and destroy = 1 */
			
 
				-      /* TODO handle the case of non terminated tasks -> return -EINVAL */
			
 
				-	
			
 
				-      free(task);
			
 
				+   }
			
 
				+   else
			
 
				+   {
			
 
				+	   starpu_task_deinit(task);
			
 
				+	   /* TODO handle the case of task with detach = 1 and destroy = 1 */
			
 
				+	   /* TODO handle the case of non terminated tasks -> return -EINVAL */
			
 
				+	   free(task);
			
 
				    }
			
 
				 }
			
 
				 
			
@@ -158,13 +158,15 @@ int starpu_task_wait(struct starpu_task *task)
 
				         _STARPU_LOG_IN();
			
 
				 	STARPU_ASSERT(task);
			
 
				 
			
 
				-	if (task->detach || task->synchronous) {
			
 
				+	if (task->detach || task->synchronous)
			
 
				+	{
			
 
				 		_STARPU_DEBUG("Task is detached or asynchronous. Waiting returns immediately\n");
			
 
				 		_STARPU_LOG_OUT_TAG("einval");
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
			
 
				+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				+	{
			
 
				 		_STARPU_LOG_OUT_TAG("edeadlk");
			
 
				 		return -EDEADLK;
			
 
				 	}
			
@@ -208,7 +210,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 
				 	_starpu_increment_nsubmitted_tasks();
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
			
 
				-	
			
 
				+
			
 
				 	j->submitted = 1;
			
 
				 
			
 
				 	int ret = _starpu_enforce_deps_and_schedule(j, 1);
			
@@ -232,7 +234,8 @@ int starpu_task_submit(struct starpu_task *task)
 
				 	{
			
 
				 		/* Perhaps it is not possible to submit a synchronous
			
 
				 		 * (blocking) task */
			
 
				-                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
			
 
				+                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				+		{
			
 
				                         _STARPU_LOG_OUT_TAG("EDEADLK");
			
 
				 			return -EDEADLK;
			
 
				                 }
			
@@ -245,12 +248,14 @@ int starpu_task_submit(struct starpu_task *task)
 
				 	{
			
 
				 		uint32_t where = task->cl->where;
			
 
				 		unsigned i;
			
 
				-		if (!_starpu_worker_exists(where)) {
			
 
				+		if (!_starpu_worker_exists(where))
			
 
				+		{
			
 
				                         _STARPU_LOG_OUT_TAG("ENODEV");
			
 
				 			return -ENODEV;
			
 
				                 }
			
 
				 		assert(task->cl->nbuffers <= STARPU_NMAXBUFS);
			
 
				-		for (i = 0; i < task->cl->nbuffers; i++) {
			
 
				+		for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+		{
			
 
				 			/* Make sure handles are not partitioned */
			
 
				 			assert(task->buffers[i].handle->nchildren == 0);
			
 
				 		}
			
@@ -258,7 +263,8 @@ int starpu_task_submit(struct starpu_task *task)
 
				 		/* In case we require that a task should be explicitely
			
 
				 		 * executed on a specific worker, we make sure that the worker
			
 
				 		 * is able to execute this task.  */
			
 
				-		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0)) {
			
 
				+		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
			
 
				+		{
			
 
				                         _STARPU_LOG_OUT_TAG("ENODEV");
			
 
				 			return -ENODEV;
			
 
				                 }
			
@@ -313,7 +319,7 @@ void starpu_display_codelet_stats(struct starpu_codelet *cl)
 
				 		fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
			
 
				 
			
 
				 	unsigned long total = 0;
			
 
				-	
			
 
				+
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 		total += cl->per_worker_stats[worker];
			
 
				 
			
@@ -342,7 +348,7 @@ int starpu_task_wait_for_all(void)
 
				 
			
 
				 	while (nsubmitted > 0)
			
 
				 		_STARPU_PTHREAD_COND_WAIT(&submitted_cond, &submitted_mutex);
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
			
 
				 
			
 
				 	return 0;
			
@@ -363,7 +369,7 @@ int starpu_task_wait_for_no_ready(void)
 
				 
			
 
				 	while (nready > 0)
			
 
				 		_STARPU_PTHREAD_COND_WAIT(&submitted_cond, &submitted_mutex);
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
			
 
				 
			
 
				 	return 0;
			
@@ -436,16 +442,18 @@ double _starpu_task_get_conversion_time(struct starpu_task *task)
 
				 	int i;
			
 
				 	double conversion_time = 0.0;
			
 
				 
			
 
				-	for (i = 0; i < task->cl->nbuffers; i++) {
			
 
				+	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				+	{
			
 
				 		starpu_data_handle_t handle = task->buffers[i].handle;
			
 
				 		enum starpu_data_interface_id id = starpu_get_handle_interface_id(handle);
			
 
				-		if (id == STARPU_MULTIFORMAT_INTERFACE_ID) {
			
 
				+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID)
			
 
				+		{
			
 
				 			struct starpu_multiformat_interface *tmp;
			
 
				 			uint32_t node = starpu_worker_get_memory_node(task->workerid);
			
 
				 			tmp = starpu_data_get_interface_on_node(handle, node);
			
 
				 			conversion_time += tmp->conversion_time;
			
 
				 			/* XXX : this may not be the right place to reset this field,
			
 
				-			 * but we need to make sure the conversion time won't be counted 
			
 
				+			 * but we need to make sure the conversion time won't be counted
			
 
				                          * twice */
			
 
				 			tmp->conversion_time = 0;
			
 
				 		}
			
--- a/src/core/task_bundle.c
+++ b/src/core/task_bundle.c
@@ -88,7 +88,8 @@ int starpu_task_bundle_insert(struct starpu_task_bundle *bundle, struct starpu_t
 
				 	{
			
 
				 		bundle->list = entry;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		struct starpu_task_bundle_entry *item;
			
 
				 		item = bundle->list;
			
 
				 		while (item->next)
			
@@ -188,7 +189,8 @@ double starpu_task_bundle_expected_length(struct starpu_task_bundle *bundle,  en
 
				 	struct starpu_task_bundle_entry *entry;
			
 
				 	entry = bundle->list;
			
 
				 
			
 
				-	while (entry) {
			
 
				+	while (entry)
			
 
				+	{
			
 
				 		double task_length = starpu_task_expected_length(entry->task, arch, nimpl);
			
 
				 
			
 
				 		/* In case the task is not calibrated, we consider the task
			
@@ -198,7 +200,7 @@ double starpu_task_bundle_expected_length(struct starpu_task_bundle *bundle,  en
 
				 
			
 
				 		entry = entry->next;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
			
 
				 
			
 
				 	return expected_length;
			
@@ -215,7 +217,8 @@ double starpu_task_bundle_expected_power(struct starpu_task_bundle *bundle,  enu
 
				 	struct starpu_task_bundle_entry *entry;
			
 
				 	entry = bundle->list;
			
 
				 
			
 
				-	while (entry) {
			
 
				+	while (entry)
			
 
				+	{
			
 
				 		double task_power = starpu_task_expected_power(entry->task, arch, nimpl);
			
 
				 
			
 
				 		/* In case the task is not calibrated, we consider the task
			
@@ -225,13 +228,14 @@ double starpu_task_bundle_expected_power(struct starpu_task_bundle *bundle,  enu
 
				 
			
 
				 		entry = entry->next;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
			
 
				 
			
 
				 	return expected_power;
			
 
				 }
			
 
				 
			
 
				-struct handle_list {
			
 
				+struct handle_list
			
 
				+{
			
 
				 	starpu_data_handle_t handle;
			
 
				 	enum starpu_access_mode mode;
			
 
				 	struct handle_list *next;
			
@@ -271,7 +275,8 @@ static void insertion_handle_sorted(struct handle_list **listp, starpu_data_hand
 
				 		/* The handle is already in the list */
			
 
				 		prev->mode |= mode;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* The handle was not in the list, we insert it after prev */
			
 
				 		struct handle_list *link = (struct handle_list *) malloc(sizeof(struct handle_list));
			
 
				 		STARPU_ASSERT(link);
			
@@ -293,7 +298,8 @@ double starpu_task_bundle_expected_data_transfer_time(struct starpu_task_bundle
 
				 
			
 
				 	/* For each task in the bundle */
			
 
				 	struct starpu_task_bundle_entry *entry = bundle->list;
			
 
				-	while (entry) {
			
 
				+	while (entry)
			
 
				+	{
			
 
				 		struct starpu_task *task = entry->task;
			
 
				 
			
 
				 		if (task->cl)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -42,7 +42,6 @@
 
				 #define hwloc_bitmap_singlify hwloc_cpuset_singlify
			
 
				 #endif
			
 
				 
			
 
				-		
			
 
				 static unsigned topology_is_initialized = 0;
			
 
				 
			
 
				 static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config);
			
@@ -92,9 +91,11 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				                 unsigned tmp[STARPU_NMAXWORKERS];
			
 
				                 unsigned nb=0;
			
 
				                 int i;
			
 
				-                for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
			
 
				+                for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
			
 
				+		{
			
 
				                         uint32_t key = _starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
			
 
				-                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL) {
			
 
				+                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL)
			
 
				+			{
			
 
				                                 tmp[nb] = topology->workers_opencl_gpuid[i];
			
 
				                                 nb++;
			
 
				                         }
			
@@ -110,9 +111,11 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				                 unsigned nb=0;
			
 
				                 int i;
			
 
				 
			
 
				-                for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
			
 
				+                for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
			
 
				+		{
			
 
				                         uint32_t key = _starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
			
 
				-                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL) {
			
 
				+                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL)
			
 
				+			{
			
 
				                                 _starpu_htbl_insert_32(&devices_already_used, key, config);
			
 
				                                 tmp[nb] = topology->workers_opencl_gpuid[i];
			
 
				                                 nb ++;
			
@@ -161,7 +164,8 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
				 		/* we use the content of the STARPU_WORKERS_CUDAID env. variable */
			
 
				 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				 		{
			
 
				-			if (!wrap) {
			
 
				+			if (!wrap)
			
 
				+			{
			
 
				 				long int val;
			
 
				 				val = strtol(strval, &endptr, 10);
			
 
				 				if (endptr != strval)
			
@@ -169,18 +173,20 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 
				 					workers_gpuid[i] = (unsigned)val;
			
 
				 					strval = endptr;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					/* there must be at least one entry */
			
 
				 					STARPU_ASSERT(i != 0);
			
 
				 					number_of_entries = i;
			
 
				-	
			
 
				+
			
 
				 					/* there is no more values in the string */
			
 
				 					wrap = 1;
			
 
				 
			
 
				 					workers_gpuid[i] = workers_gpuid[0];
			
 
				 				}
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				workers_gpuid[i] = workers_gpuid[i % number_of_entries];
			
 
				 			}
			
 
				 		}
			
@@ -262,7 +268,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 
				 unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
			
 
				 {
			
 
				 	_starpu_init_topology(config);
			
 
				-	
			
 
				+
			
 
				 	return config->topology.nhwcpus;
			
 
				 }
			
 
				 
			
@@ -290,7 +296,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 		/* the user explicitely disabled CUDA */
			
 
				 		topology->ncudagpus = 0;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* we need to initialize CUDA early to count the number of devices */
			
 
				 		_starpu_init_cuda();
			
 
				 
			
@@ -298,16 +305,21 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 		{
			
 
				 			explicitval = user_conf->ncuda;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			explicitval = starpu_get_env_number("STARPU_NCUDA");
			
 
				 		}
			
 
				 
			
 
				-		if (explicitval < 0) {
			
 
				+		if (explicitval < 0)
			
 
				+		{
			
 
				 			config->topology.ncudagpus =
			
 
				 				STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
			
 
				-		} else {
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				 			/* use the specified value */
			
 
				-			if (explicitval > STARPU_MAXCUDADEVS) {
			
 
				+			if (explicitval > STARPU_MAXCUDADEVS)
			
 
				+			{
			
 
				 				fprintf(stderr,"# Warning: %d CUDA devices requested. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", explicitval, STARPU_MAXCUDADEVS);
			
 
				 				explicitval = STARPU_MAXCUDADEVS;
			
 
				 			}
			
@@ -329,7 +341,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 		int devid = _starpu_get_next_cuda_gpuid(config);
			
 
				 		enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
			
 
				 		config->workers[topology->nworkers + cudagpu].devid = devid;
			
 
				-		config->workers[topology->nworkers + cudagpu].perf_arch = arch; 
			
 
				+		config->workers[topology->nworkers + cudagpu].perf_arch = arch;
			
 
				 		config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
			
 
				 		config->worker_mask |= STARPU_CUDA;
			
 
				 
			
@@ -346,7 +358,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 		/* the user explicitely disabled OpenCL */
			
 
				 		topology->nopenclgpus = 0;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* we need to initialize OpenCL early to count the number of devices */
			
 
				 		int nb_devices;
			
 
				 		_starpu_opencl_init();
			
@@ -356,20 +369,25 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 		{
			
 
				 			explicitval = user_conf->nopencl;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			explicitval = starpu_get_env_number("STARPU_NOPENCL");
			
 
				 		}
			
 
				 
			
 
				 
			
 
				-		if (explicitval < 0) {
			
 
				+		if (explicitval < 0)
			
 
				+		{
			
 
				 			topology->nopenclgpus = nb_devices;
			
 
				 		}
			
 
				-		else {
			
 
				-			if (explicitval > nb_devices) {
			
 
				+		else
			
 
				+		{
			
 
				+			if (explicitval > nb_devices)
			
 
				+			{
			
 
				 				/* The user requires more OpenCL devices than there is available */
			
 
				 				topology->nopenclgpus = nb_devices;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				/* use the specified value */
			
 
				 				topology->nopenclgpus = (unsigned)explicitval;
			
 
				 			}
			
@@ -388,32 +406,38 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 	for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
			
 
				 	{
			
 
				 		int devid = _starpu_get_next_opencl_gpuid(config);
			
 
				-		if (devid == -1) { // There is no more devices left
			
 
				+		if (devid == -1)
			
 
				+		{ // There is no more devices left
			
 
				 			topology->nopenclgpus = openclgpu;
			
 
				 			break;
			
 
				 		}
			
 
				 		config->workers[topology->nworkers + openclgpu].arch = STARPU_OPENCL_WORKER;
			
 
				 		enum starpu_perf_archtype arch = STARPU_OPENCL_DEFAULT + devid;
			
 
				 		config->workers[topology->nworkers + openclgpu].devid = devid;
			
 
				-		config->workers[topology->nworkers + openclgpu].perf_arch = arch; 
			
 
				+		config->workers[topology->nworkers + openclgpu].perf_arch = arch;
			
 
				 		config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
			
 
				 		config->worker_mask |= STARPU_OPENCL;
			
 
				 	}
			
 
				 
			
 
				 	topology->nworkers += topology->nopenclgpus;
			
 
				 #endif
			
 
				-	
			
 
				+
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-	if (user_conf && (user_conf->ncuda != -1)) {
			
 
				+	if (user_conf && (user_conf->ncuda != -1))
			
 
				+	{
			
 
				 		explicitval = user_conf->ncuda;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		explicitval = starpu_get_env_number("STARPU_NGORDON");
			
 
				 	}
			
 
				 
			
 
				-	if (explicitval < 0) {
			
 
				+	if (explicitval < 0)
			
 
				+	{
			
 
				 		topology->ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* use the specified value */
			
 
				 		topology->ngordon_spus = (unsigned)explicitval;
			
 
				 		STARPU_ASSERT(topology->ngordon_spus <= NMAXGORDONSPUS);
			
@@ -440,20 +464,25 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 
				 /* we put the CPU section after the accelerator : in case there was an
			
 
				  * accelerator found, we devote one cpu */
			
 
				 #ifdef STARPU_USE_CPU
			
 
				-	if (user_conf && (user_conf->ncpus != -1)) {
			
 
				+	if (user_conf && (user_conf->ncpus != -1))
			
 
				+	{
			
 
				 		explicitval = user_conf->ncpus;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		explicitval = starpu_get_env_number("STARPU_NCPUS");
			
 
				 	}
			
 
				 
			
 
				-	if (explicitval < 0) {
			
 
				+	if (explicitval < 0)
			
 
				+	{
			
 
				 		unsigned already_busy_cpus = (topology->ngordon_spus?1:0) + topology->ncudagpus + topology->nopenclgpus;
			
 
				 		long avail_cpus = topology->nhwcpus - (use_accelerator?already_busy_cpus:0);
			
 
				 		if (avail_cpus < 0)
			
 
				 			avail_cpus = 0;
			
 
				 		topology->ncpus = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* use the specified value */
			
 
				 		topology->ncpus = (unsigned)explicitval;
			
 
				 		STARPU_ASSERT(topology->ncpus <= STARPU_MAXCPUS);
			
@@ -521,7 +550,8 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 		/* we use the content of the STARPU_WORKERS_CUDAID env. variable */
			
 
				 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				 		{
			
 
				-			if (!wrap) {
			
 
				+			if (!wrap)
			
 
				+			{
			
 
				 				long int val;
			
 
				 				val = strtol(strval, &endptr, 10);
			
 
				 				if (endptr != strval)
			
@@ -529,7 +559,8 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 					topology->workers_bindid[i] = (unsigned)(val % topology->nhwcpus);
			
 
				 					strval = endptr;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					/* there must be at least one entry */
			
 
				 					STARPU_ASSERT(i != 0);
			
 
				 					number_of_entries = i;
			
@@ -540,7 +571,8 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 					topology->workers_bindid[i] = topology->workers_bindid[0];
			
 
				 				}
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				topology->workers_bindid[i] = topology->workers_bindid[i % number_of_entries];
			
 
				 			}
			
 
				 		}
			
@@ -632,7 +664,8 @@ void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config STARPU_ATT
 
				 
			
 
				 #elif defined(__MINGW32__) || defined(__CYGWIN__)
			
 
				 	DWORD mask = 1 << cpuid;
			
 
				-	if (!SetThreadAffinityMask(GetCurrentThread(), mask)) {
			
 
				+	if (!SetThreadAffinityMask(GetCurrentThread(), mask))
			
 
				+	{
			
 
				 		fprintf(stderr,"SetThreadMaskAffinity(%lx) failed\n", mask);
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
@@ -667,9 +700,10 @@ static void _starpu_init_workers_binding(struct _starpu_machine_config *config)
 
				 		/* Perhaps the worker has some "favourite" bindings  */
			
 
				 		int *preferred_binding = NULL;
			
 
				 		int npreferred = 0;
			
 
				-		
			
 
				+
			
 
				 		/* select the memory node that contains worker's memory */
			
 
				-		switch (workerarg->arch) {
			
 
				+		switch (workerarg->arch)
			
 
				+		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				 			/* "dedicate" a cpu cpu to that worker */
			
 
				 				is_a_set_of_accelerators = 0;
			
@@ -702,7 +736,8 @@ static void _starpu_init_workers_binding(struct _starpu_machine_config *config)
 
				 				for (worker2 = 0; worker2 < worker; worker2++)
			
 
				 				{
			
 
				 					struct _starpu_worker *workerarg = &config->workers[worker];
			
 
				-					if (workerarg->arch == STARPU_CUDA_WORKER) {
			
 
				+					if (workerarg->arch == STARPU_CUDA_WORKER)
			
 
				+					{
			
 
				 						unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
			
 
				 						_starpu_register_bus(memory_node2, memory_node);
			
 
				 						_starpu_register_bus(memory_node, memory_node2);
			
@@ -732,13 +767,15 @@ static void _starpu_init_workers_binding(struct _starpu_machine_config *config)
 
				 				STARPU_ABORT();
			
 
				 		}
			
 
				 
			
 
				-		if (is_a_set_of_accelerators) {
			
 
				+		if (is_a_set_of_accelerators)
			
 
				+		{
			
 
				 			if (accelerator_bindid == -1)
			
 
				 				accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
			
 
				 
			
 
				 			workerarg->bindid = accelerator_bindid;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
			
 
				 		}
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -52,7 +52,7 @@ struct _starpu_machine_config *_starpu_get_machine_config(void)
 
				 uint32_t _starpu_worker_exists(uint32_t task_mask)
			
 
				 {
			
 
				 	return (task_mask & config.worker_mask);
			
 
				-} 
			
 
				+}
			
 
				 
			
 
				 uint32_t _starpu_can_submit_cuda_task(void)
			
 
				 {
			
@@ -71,7 +71,8 @@ uint32_t _starpu_can_submit_opencl_task(void)
 
				 
			
 
				 static int _starpu_can_use_nth_implementation(enum starpu_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
			
 
				 {
			
 
				-	switch(arch) {
			
 
				+	switch(arch)
			
 
				+	{
			
 
				 	case STARPU_CPU_WORKER:
			
 
				 		if (cl->cpu_func == STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
			
 
				 			return cl->cpu_funcs[nimpl] != NULL;
			
@@ -119,7 +120,8 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 
				 		return !!((task->cl->where & config.workers[workerid].worker_mask) &&
			
 
				 				_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		if ((cl->type == STARPU_SPMD) || (cl->type == STARPU_FORKJOIN))
			
 
				 		{
			
 
				 			/* TODO we should add other types of constraints */
			
@@ -142,7 +144,7 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 
				  */
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-static unsigned gordon_inited = 0;	
			
 
				+static unsigned gordon_inited = 0;
			
 
				 static struct _starpu_worker_set gordon_worker_set;
			
 
				 #endif
			
 
				 
			
@@ -187,19 +189,20 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 		workerarg->terminated_jobs = _starpu_job_list_new();
			
 
				 
			
 
				 		starpu_task_list_init(&workerarg->local_tasks);
			
 
				-	
			
 
				+
			
 
				 		workerarg->status = STATUS_INITIALIZING;
			
 
				 
			
 
				 		_STARPU_DEBUG("initialising worker %u\n", worker);
			
 
				 
			
 
				 		_starpu_init_worker_queue(workerarg);
			
 
				 
			
 
				-		switch (workerarg->arch) {
			
 
				+		switch (workerarg->arch)
			
 
				+		{
			
 
				 #ifdef STARPU_USE_CPU
			
 
				 			case STARPU_CPU_WORKER:
			
 
				 				workerarg->set = NULL;
			
 
				 				workerarg->worker_is_initialized = 0;
			
 
				-				pthread_create(&workerarg->worker_thread, 
			
 
				+				pthread_create(&workerarg->worker_thread,
			
 
				 						NULL, _starpu_cpu_worker, workerarg);
			
 
				 				break;
			
 
				 #endif
			
@@ -207,7 +210,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 			case STARPU_CUDA_WORKER:
			
 
				 				workerarg->set = NULL;
			
 
				 				workerarg->worker_is_initialized = 0;
			
 
				-				pthread_create(&workerarg->worker_thread, 
			
 
				+				pthread_create(&workerarg->worker_thread,
			
 
				 						NULL, _starpu_cuda_worker, workerarg);
			
 
				 
			
 
				 				break;
			
@@ -216,23 +219,23 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 			case STARPU_OPENCL_WORKER:
			
 
				 				workerarg->set = NULL;
			
 
				 				workerarg->worker_is_initialized = 0;
			
 
				-				pthread_create(&workerarg->worker_thread, 
			
 
				+				pthread_create(&workerarg->worker_thread,
			
 
				 						NULL, _starpu_opencl_worker, workerarg);
			
 
				 
			
 
				 				break;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				 			case STARPU_GORDON_WORKER:
			
 
				-				/* we will only launch gordon once, but it will handle 
			
 
				+				/* we will only launch gordon once, but it will handle
			
 
				 				 * the different SPU workers */
			
 
				 				if (!gordon_inited)
			
 
				 				{
			
 
				-					gordon_worker_set.nworkers = config->ngordon_spus; 
			
 
				+					gordon_worker_set.nworkers = config->ngordon_spus;
			
 
				 					gordon_worker_set.workers = &config->workers[worker];
			
 
				 
			
 
				 					gordon_worker_set.set_is_initialized = 0;
			
 
				 
			
 
				-					pthread_create(&gordon_worker_set.worker_thread, NULL, 
			
 
				+					pthread_create(&gordon_worker_set.worker_thread, NULL,
			
 
				 							_starpu_gordon_worker, &gordon_worker_set);
			
 
				 
			
 
				 					_STARPU_PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
			
@@ -243,7 +246,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 
			
 
				 					gordon_inited = 1;
			
 
				 				}
			
 
				-				
			
 
				+
			
 
				 				workerarg->set = &gordon_worker_set;
			
 
				 				gordon_worker_set.joined = 0;
			
 
				 				workerarg->worker_is_running = 1;
			
@@ -259,10 +262,11 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
				 	{
			
 
				 		struct _starpu_worker *workerarg = &config->workers[worker];
			
 
				 
			
 
				-		switch (workerarg->arch) {
			
 
				+		switch (workerarg->arch)
			
 
				+		{
			
 
				 			case STARPU_CPU_WORKER:
			
 
				 			case STARPU_CUDA_WORKER:
			
 
				-			case STARPU_OPENCL_WORKER:			  
			
 
				+			case STARPU_OPENCL_WORKER:
			
 
				 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				 				while (!workerarg->worker_is_initialized)
			
 
				 					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
			
@@ -355,11 +359,12 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 		/* Wait for the other one changing it */
			
 
				 		_STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
			
 
				 	init_count++;
			
 
				-	if (initialized == INITIALIZED) {
			
 
				-	  /* He initialized it, don't do it again, and let the others get the mutex */
			
 
				-	  _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
			
 
				-	  return 0;
			
 
				-	  }
			
 
				+	if (initialized == INITIALIZED)
			
 
				+	{
			
 
				+		/* He initialized it, don't do it again, and let the others get the mutex */
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
			
 
				+		return 0;
			
 
				+	}
			
 
				 	/* initialized == UNINITIALIZED */
			
 
				 	initialized = CHANGING;
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
			
@@ -370,11 +375,11 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 #endif
			
 
				 
			
 
				 	srand(2008);
			
 
				-	
			
 
				+
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	_starpu_start_fxt_profiling();
			
 
				 #endif
			
 
				-	
			
 
				+
			
 
				 	_starpu_open_debug_logfile();
			
 
				 
			
 
				 	_starpu_data_interface_init();
			
@@ -390,7 +395,8 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 	config.user_conf = user_conf;
			
 
				 
			
 
				 	ret = _starpu_build_topology(&config);
			
 
				-	if (ret) {
			
 
				+	if (ret)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
			
 
				 		init_count--;
			
 
				 		initialized = UNINITIALIZED;
			
@@ -402,7 +408,7 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 
			
 
				 	/* We need to store the current task handled by the different
			
 
				 	 * threads */
			
 
				-	_starpu_initialize_current_task_key();	
			
 
				+	_starpu_initialize_current_task_key();
			
 
				 
			
 
				 	/* initialize the scheduling policy */
			
 
				 	_starpu_init_sched_policy(&config);
			
@@ -423,7 +429,7 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Handle runtime termination 
			
 
				+ * Handle runtime termination
			
 
				  */
			
 
				 
			
 
				 static void _starpu_terminate_workers(struct _starpu_machine_config *config)
			
@@ -434,7 +440,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 
				 	for (workerid = 0; workerid < config->topology.nworkers; workerid++)
			
 
				 	{
			
 
				 		starpu_wake_all_blocked_workers();
			
 
				-		
			
 
				+
			
 
				 		_STARPU_DEBUG("wait for worker %u\n", workerid);
			
 
				 
			
 
				 		struct _starpu_worker_set *set = config->workers[workerid].set;
			
@@ -442,13 +448,16 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 
				 
			
 
				 		/* in case StarPU termination code is called from a callback,
			
 
				  		 * we have to check if pthread_self() is the worker itself */
			
 
				-		if (set){ 
			
 
				-			if (!set->joined) {
			
 
				+		if (set)
			
 
				+		{
			
 
				+			if (!set->joined)
			
 
				+			{
			
 
				 				if (!pthread_equal(pthread_self(), set->worker_thread))
			
 
				 				{
			
 
				 					status = pthread_join(set->worker_thread, NULL);
			
 
				 #ifdef STARPU_VERBOSE
			
 
				-					if (status) {
			
 
				+					if (status)
			
 
				+					{
			
 
				 						_STARPU_DEBUG("pthread_join -> %d\n", status);
			
 
				                                         }
			
 
				 #endif
			
@@ -457,12 +466,14 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 
				 				set->joined = 1;
			
 
				 			}
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			if (!pthread_equal(pthread_self(), worker->worker_thread))
			
 
				 			{
			
 
				 				status = pthread_join(worker->worker_thread, NULL);
			
 
				 #ifdef STARPU_VERBOSE
			
 
				-				if (status) {
			
 
				+				if (status)
			
 
				+				{
			
 
				 					_STARPU_DEBUG("pthread_join -> %d\n", status);
			
 
				                                 }
			
 
				 #endif
			
@@ -511,7 +522,8 @@ void starpu_shutdown(void)
 
				 	const char *stats;
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
			
 
				 	init_count--;
			
 
				-	if (init_count) {
			
 
				+	if (init_count)
			
 
				+	{
			
 
				 		_STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
			
 
				 		return;
			
 
				 	}
			
@@ -633,7 +645,8 @@ int starpu_worker_get_id(void)
 
				 	{
			
 
				 		return worker->workerid;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* there is no worker associated to that thread, perhaps it is
			
 
				 		 * a thread from the application or this is some SPU worker */
			
 
				 		return -1;
			
@@ -649,7 +662,8 @@ int starpu_combined_worker_get_id(void)
 
				 	{
			
 
				 		return worker->combined_workerid;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* there is no worker associated to that thread, perhaps it is
			
 
				 		 * a thread from the application or this is some SPU worker */
			
 
				 		return -1;
			
@@ -665,7 +679,8 @@ int starpu_combined_worker_get_size(void)
 
				 	{
			
 
				 		return worker->worker_size;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* there is no worker associated to that thread, perhaps it is
			
 
				 		 * a thread from the application or this is some SPU worker */
			
 
				 		return -1;
			
@@ -681,7 +696,8 @@ int starpu_combined_worker_get_rank(void)
 
				 	{
			
 
				 		return worker->current_rank;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* there is no worker associated to that thread, perhaps it is
			
 
				 		 * a thread from the application or this is some SPU worker */
			
 
				 		return -1;
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -53,7 +53,8 @@
 
				 
			
 
				 #include <starpu_parameters.h>
			
 
				 
			
 
				-struct _starpu_worker {
			
 
				+struct _starpu_worker
			
 
				+{
			
 
				 	struct _starpu_machine_config *config;
			
 
				         pthread_mutex_t mutex;
			
 
				 	enum starpu_archtype arch; /* what is the type of worker ? */
			
@@ -89,7 +90,8 @@ struct _starpu_worker {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct _starpu_combined_worker {
			
 
				+struct _starpu_combined_worker
			
 
				+{
			
 
				 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
			
 
				 	uint32_t worker_mask; /* what is the type of workers ? */
			
 
				 	int worker_size;
			
@@ -104,9 +106,10 @@ struct _starpu_combined_worker {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-/* in case a single CPU worker may control multiple 
			
 
				+/* in case a single CPU worker may control multiple
			
 
				  * accelerators (eg. Gordon for n SPUs) */
			
 
				-struct _starpu_worker_set {
			
 
				+struct _starpu_worker_set
			
 
				+{
			
 
				         pthread_mutex_t mutex;
			
 
				 	pthread_t worker_thread; /* the thread which runs the worker */
			
 
				 	unsigned nworkers;
			
@@ -117,8 +120,8 @@ struct _starpu_worker_set {
 
				 	unsigned set_is_initialized;
			
 
				 };
			
 
				 
			
 
				-struct _starpu_machine_config {
			
 
				-
			
 
				+struct _starpu_machine_config
			
 
				+{
			
 
				 	struct starpu_machine_topology topology;
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
@@ -127,13 +130,13 @@ struct _starpu_machine_config {
 
				 
			
 
				 	/* Where to bind workers ? */
			
 
				 	int current_bindid;
			
 
				-	
			
 
				+
			
 
				 	/* Which GPU(s) do we use for CUDA ? */
			
 
				 	int current_cuda_gpuid;
			
 
				 
			
 
				 	/* Which GPU(s) do we use for OpenCL ? */
			
 
				 	int current_opencl_gpuid;
			
 
				-	
			
 
				+
			
 
				 	/* Basic workers : each of this worker is running its own driver and
			
 
				 	 * can be combined with other basic workers. */
			
 
				 	struct _starpu_worker workers[STARPU_NMAXWORKERS];
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -40,7 +40,8 @@ uint32_t _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
 
				 
			
 
				 	for (node = 0; node < nnodes; node++)
			
 
				 	{
			
 
				-		if (handle->per_node[node].state != STARPU_INVALID) {
			
 
				+		if (handle->per_node[node].state != STARPU_INVALID)
			
 
				+		{
			
 
				 			/* we found a copy ! */
			
 
				 			src_node_mask |= (1<<node);
			
 
				 		}
			
@@ -66,11 +67,14 @@ uint32_t _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
 
				 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
			
 
				 					continue;
			
 
				 
			
 
				-				if (time == 0.0) {
			
 
				+				if (time == 0.0)
			
 
				+				{
			
 
				 					/* No estimation, will have to revert to dumb strategy */
			
 
				 					cost = 0.0;
			
 
				 					break;
			
 
				-				} else if (time < cost) {
			
 
				+				}
			
 
				+				else if (time < cost)
			
 
				+				{
			
 
				 					cost = time;
			
 
				 					src_node = i;
			
 
				 				}
			
@@ -97,7 +101,7 @@ uint32_t _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
 
				 #ifndef HAVE_CUDA_MEMCPY_PEER
			
 
				 					_starpu_get_node_kind(i) != STARPU_CUDA_RAM &&
			
 
				 #endif
			
 
				-					_starpu_get_node_kind(i) != STARPU_OPENCL_RAM)	
			
 
				+					_starpu_get_node_kind(i) != STARPU_OPENCL_RAM)
			
 
				 				break ;
			
 
				 		}
			
 
				 	}
			
@@ -123,7 +127,8 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 
				 	unsigned requesting_node = requesting_replicate->memory_node;
			
 
				 	requesting_replicate->requested[requesting_node] = 0;
			
 
				 
			
 
				-	if (mode & STARPU_W) {
			
 
				+	if (mode & STARPU_W)
			
 
				+	{
			
 
				 		/* the requesting node now has the only valid copy */
			
 
				 		uint32_t node;
			
 
				 		for (node = 0; node < nnodes; node++)
			
@@ -131,7 +136,8 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 
				 
			
 
				 		requesting_replicate->state = STARPU_OWNER;
			
 
				 	}
			
 
				-	else { /* read only */
			
 
				+	else
			
 
				+	{ /* read only */
			
 
				 		if (requesting_replicate->state != STARPU_OWNER)
			
 
				 		{
			
 
				 			/* there was at least another copy of the data */
			
@@ -227,7 +233,8 @@ static int determine_request_path(starpu_data_handle_t handle,
 
				 	unsigned handling_node;
			
 
				 	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node);
			
 
				 
			
 
				-	if (!link_is_valid) {
			
 
				+	if (!link_is_valid)
			
 
				+	{
			
 
				 		/* We need an intermediate hop to implement data staging
			
 
				 		 * through main memory. */
			
 
				 		STARPU_ASSERT(max_len >= 2);
			
@@ -246,9 +253,10 @@ static int determine_request_path(starpu_data_handle_t handle,
 
				 
			
 
				 		return 2;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		STARPU_ASSERT(max_len >= 1);
			
 
				-		
			
 
				+
			
 
				 		src_nodes[0] = src_node;
			
 
				 		dst_nodes[0] = dst_node;
			
 
				 		handling_nodes[0] = handling_node;
			
@@ -273,17 +281,18 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 
				 	if (r)
			
 
				 	{
			
 
				 		_starpu_spin_lock(&r->lock);
			
 
				-                
			
 
				+
			
 
				                 /* perhaps we need to "upgrade" the request */
			
 
				-		if (is_prefetch < r->prefetch) 
			
 
				+		if (is_prefetch < r->prefetch)
			
 
				 			_starpu_update_prefetch_status(r);
			
 
				-		
			
 
				+
			
 
				 		if (mode & STARPU_R)
			
 
				 		{
			
 
				 			/* in case the exisiting request did not imply a memory
			
 
				 			 * transfer yet, we have to increment the refcnt now
			
 
				 			 * (so that the source remains valid) */
			
 
				-			if (!(r->mode & STARPU_R)) {
			
 
				+			if (!(r->mode & STARPU_R))
			
 
				+			{
			
 
				 				replicate->refcnt++;
			
 
				 				replicate->handle->busy_count++;
			
 
				 			}
			
@@ -302,21 +311,21 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 
				 
			
 
				 /*
			
 
				  * This function is called when the data is needed on the local node, this
			
 
				- * returns a pointer to the local copy 
			
 
				+ * returns a pointer to the local copy
			
 
				  *
			
 
				  *			R 	STARPU_W 	STARPU_RW
			
 
				  *	Owner		OK	OK	OK
			
 
				  *	Shared		OK	1	1
			
 
				  *	Invalid		2	3	4
			
 
				  *
			
 
				- * case 1 : shared + (read)write : 
			
 
				+ * case 1 : shared + (read)write :
			
 
				  * 	no data copy but shared->Invalid/Owner
			
 
				- * case 2 : invalid + read : 
			
 
				+ * case 2 : invalid + read :
			
 
				  * 	data copy + invalid->shared + owner->shared (STARPU_ASSERT(there is a valid))
			
 
				- * case 3 : invalid + write : 
			
 
				+ * case 3 : invalid + write :
			
 
				  * 	no data copy + invalid->owner + (owner,shared)->invalid
			
 
				- * case 4 : invalid + R/STARPU_W : 
			
 
				- * 	data copy + if (STARPU_W) (invalid->owner + owner->invalid) 
			
 
				+ * case 4 : invalid + R/STARPU_W :
			
 
				+ * 	data copy + if (STARPU_W) (invalid->owner + owner->invalid)
			
 
				  * 		    else (invalid,owner->shared)
			
 
				  */
			
 
				 
			
@@ -336,16 +345,16 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
				 		/* the data is already available so we can stop */
			
 
				 		_starpu_update_data_state(handle, dst_replicate, mode);
			
 
				 		_starpu_msi_cache_hit(requesting_node);
			
 
				-		
			
 
				+
			
 
				 #ifdef STARPU_MEMORY_STATUS
			
 
				 		_starpu_handle_stats_cache_hit(handle, requesting_node);
			
 
				 
			
 
				 		/* XXX Broken ? */
			
 
				-		if (old_state == STARPU_SHARED 
			
 
				+		if (old_state == STARPU_SHARED
			
 
				 		    && dst_replicate->state == STARPU_OWNER)
			
 
				 			_starpu_handle_stats_shared_to_owner(handle, requesting_node);
			
 
				 #endif
			
 
				-		
			
 
				+
			
 
				 		_starpu_memchunk_recently_used(dst_replicate->mc, requesting_node);
			
 
				 
			
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
@@ -405,19 +414,20 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
				 
			
 
				 		/* Try to reuse a request if possible */
			
 
				 		r = _starpu_search_existing_data_request(hop_dst_replicate,
			
 
				-				(mode & STARPU_R)?hop_src_node:hop_dst_node, 
			
 
				+				(mode & STARPU_R)?hop_src_node:hop_dst_node,
			
 
				 							 mode, is_prefetch);
			
 
				 
			
 
				 		reused_requests[hop] = !!r;
			
 
				 
			
 
				-		if (!r) {
			
 
				+		if (!r)
			
 
				+		{
			
 
				 			/* Create a new request if there was no request to reuse */
			
 
				 			r = _starpu_create_data_request(handle, hop_src_replicate,
			
 
				 							hop_dst_replicate, hop_handling_node,
			
 
				 							mode, ndeps, is_prefetch);
			
 
				 		}
			
 
				 
			
 
				-		requests[hop] = r; 
			
 
				+		requests[hop] = r;
			
 
				 	}
			
 
				 
			
 
				 	/* Chain these requests */
			
@@ -461,7 +471,8 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_
 
				 	while (_starpu_spin_trylock(&handle->header_lock))
			
 
				 		_starpu_datawizard_progress(local_node, 1);
			
 
				 
			
 
				-	if (!is_prefetch) {
			
 
				+	if (!is_prefetch)
			
 
				+	{
			
 
				 		dst_replicate->refcnt++;
			
 
				 		dst_replicate->handle->busy_count++;
			
 
				 	}
			
@@ -475,7 +486,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_
 
				 	 * unlocked the header. */
			
 
				 	if (!r)
			
 
				 		return 0;
			
 
				-	
			
 
				+
			
 
				 	_starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 	int ret = is_prefetch?0:_starpu_wait_data_request_completion(r, 1);
			
@@ -508,7 +519,7 @@ uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle)
 
				 	return handle->footprint;
			
 
				 }
			
 
				 
			
 
				-/* in case the data was accessed on a write mode, do not forget to 
			
 
				+/* in case the data was accessed on a write mode, do not forget to
			
 
				  * make it accessible again once it is possible ! */
			
 
				 void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_wt_mask, struct _starpu_data_replicate *replicate)
			
 
				 {
			
@@ -553,7 +564,7 @@ static void _starpu_set_data_requested_flag_if_needed(struct _starpu_data_replic
 
				 // XXX : this is just a hint, so we don't take the lock ...
			
 
				 //	pthread_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-	if (replicate->state == STARPU_INVALID) 
			
 
				+	if (replicate->state == STARPU_INVALID)
			
 
				 	{
			
 
				 		unsigned dst_node = replicate->memory_node;
			
 
				 		replicate->requested[dst_node] = 1;
			
@@ -613,7 +624,8 @@ int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 
				 		{
			
 
				 			local_replicate = &handle->per_worker[workerid];
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* That's a "normal" buffer (R/W) */
			
 
				 			local_replicate = &handle->per_node[local_memory_node];
			
 
				 		}
			
@@ -708,7 +720,8 @@ unsigned _starpu_is_data_present_or_requested(starpu_data_handle_t handle, uint3
 
				 	{
			
 
				 		ret  = 1;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		unsigned i;
			
 
				 		unsigned nnodes = _starpu_get_memory_nodes_count();
			
 
				 
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -31,7 +31,8 @@
 
				 #include <datawizard/interfaces/data_interface.h>
			
 
				 #include <datawizard/datastats.h>
			
 
				 
			
 
				-enum _starpu_cache_state {
			
 
				+enum _starpu_cache_state
			
 
				+{
			
 
				 	STARPU_OWNER,
			
 
				 	STARPU_SHARED,
			
 
				 	STARPU_INVALID
			
@@ -55,23 +56,23 @@ LIST_TYPE(_starpu_data_replicate,
 
				 	unsigned initialized;
			
 
				 
			
 
				 	/* describes the state of the local data in term of coherency */
			
 
				-	enum _starpu_cache_state	state; 
			
 
				+	enum _starpu_cache_state	state;
			
 
				 
			
 
				 	int refcnt;
			
 
				 
			
 
				 	/* is the data locally allocated ? */
			
 
				-	uint8_t allocated; 
			
 
				+	uint8_t allocated;
			
 
				 	/* was it automatically allocated ? */
			
 
				-	/* perhaps the allocation was perform higher in the hiearchy 
			
 
				+	/* perhaps the allocation was perform higher in the hiearchy
			
 
				 	 * for now this is just translated into !automatically_allocated
			
 
				 	 * */
			
 
				 	uint8_t automatically_allocated;
			
 
				 
			
 
				         /* Pointer to memchunk for LRU strategy */
			
 
				 	struct _starpu_mem_chunk * mc;
			
 
				- 
			
 
				+
			
 
				 	/* To help the scheduling policies to make some decision, we
			
 
				-	   may keep a track of the tasks that are likely to request 
			
 
				+	   may keep a track of the tasks that are likely to request
			
 
				 	   this data on the current node.
			
 
				 	   It is the responsability of the scheduling _policy_ to set that
			
 
				 	   flag when it assigns a task to a queue, policies which do not
			
@@ -83,18 +84,21 @@ LIST_TYPE(_starpu_data_replicate,
 
				 
			
 
				 struct _starpu_data_requester_list;
			
 
				 
			
 
				-struct _starpu_jobid_list {
			
 
				+struct _starpu_jobid_list
			
 
				+{
			
 
				 	unsigned long id;
			
 
				 	struct _starpu_jobid_list *next;
			
 
				 };
			
 
				 
			
 
				 /* This structure describes a simply-linked list of task */
			
 
				-struct _starpu_task_wrapper_list {
			
 
				+struct _starpu_task_wrapper_list
			
 
				+{
			
 
				 	struct starpu_task *task;
			
 
				 	struct _starpu_task_wrapper_list *next;
			
 
				 };
			
 
				 
			
 
				-struct _starpu_data_state {
			
 
				+struct _starpu_data_state
			
 
				+{
			
 
				 	struct _starpu_data_requester_list *req_list;
			
 
				 	/* the number of requests currently in the scheduling engine (not in
			
 
				 	 * the req_list anymore), i.e. the number of holders of the
			
@@ -154,7 +158,7 @@ struct _starpu_data_state {
 
				 	/* This lock should protect any operation to enforce
			
 
				 	 * sequential_consistency */
			
 
				 	pthread_mutex_t sequential_consistency_mutex;
			
 
				-	
			
 
				+
			
 
				 	/* The last submitted task (or application data request) that declared
			
 
				 	 * it would modify the piece of data ? Any task accessing the data in a
			
 
				 	 * read-only mode should depend on that task implicitely if the
			
@@ -172,7 +176,7 @@ struct _starpu_data_state {
 
				 	unsigned last_submitted_ghost_writer_id_is_valid;
			
 
				 	unsigned long last_submitted_ghost_writer_id;
			
 
				 	struct _starpu_jobid_list *last_submitted_ghost_readers_id;
			
 
				-	
			
 
				+
			
 
				 	struct _starpu_task_wrapper_list *post_sync_tasks;
			
 
				 	unsigned post_sync_tasks_cnt;
			
 
				 
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -122,7 +122,8 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				-	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
			
 
				+	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind))
			
 
				+	{
			
 
				 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
			
 
				 		/* STARPU_CPU_RAM -> STARPU_CPU_RAM */
			
 
				 		STARPU_ASSERT(copy_methods->ram_to_ram);
			
@@ -132,11 +133,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
			
 
				 		/* only the proper CUBLAS thread can initiate this directly ! */
			
 
				 		STARPU_ASSERT(copy_methods->cuda_to_ram);
			
 
				-		if (!req || !copy_methods->cuda_to_ram_async) {
			
 
				+		if (!req || !copy_methods->cuda_to_ram_async)
			
 
				+		{
			
 
				 			/* this is not associated to a request so it's synchronous */
			
 
				 			copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			req->async_channel.type = STARPU_CUDA_RAM;
			
 
				 			cures = cudaEventCreate(&req->async_channel.event.cuda_event);
			
 
				 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
@@ -153,11 +156,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 		/* only the proper CUBLAS thread can initiate this ! */
			
 
				 		STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
			
 
				 		STARPU_ASSERT(copy_methods->ram_to_cuda);
			
 
				-		if (!req || !copy_methods->ram_to_cuda_async) {
			
 
				+		if (!req || !copy_methods->ram_to_cuda_async)
			
 
				+		{
			
 
				 			/* this is not associated to a request so it's synchronous */
			
 
				 			copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			req->async_channel.type = STARPU_CUDA_RAM;
			
 
				 			cures = cudaEventCreate(&req->async_channel.event.cuda_event);
			
 
				 			if (STARPU_UNLIKELY(cures != cudaSuccess))
			
@@ -174,12 +179,14 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
			
 
				 		/* CUDA - CUDA transfer */
			
 
				 		STARPU_ASSERT(copy_methods->cuda_to_cuda || copy_methods->cuda_to_cuda_async);
			
 
				-		if (!req || !copy_methods->cuda_to_cuda_async) {
			
 
				+		if (!req || !copy_methods->cuda_to_cuda_async)
			
 
				+		{
			
 
				 			STARPU_ASSERT(copy_methods->cuda_to_cuda);
			
 
				 			/* this is not associated to a request so it's synchronous */
			
 
				 			copy_methods->cuda_to_cuda(src_interface, src_node, dst_interface, dst_node);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			req->async_channel.type = STARPU_CUDA_RAM;
			
 
				 			cures = cudaEventCreate(&req->async_channel.event.cuda_event);
			
 
				 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
			
@@ -195,18 +202,22 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
			
 
				 		/* OpenCL -> RAM */
			
 
				-		if (_starpu_get_local_memory_node() == src_node) {
			
 
				+		if (_starpu_get_local_memory_node() == src_node)
			
 
				+		{
			
 
				 			STARPU_ASSERT(copy_methods->opencl_to_ram);
			
 
				-			if (!req || !copy_methods->opencl_to_ram_async) {
			
 
				+			if (!req || !copy_methods->opencl_to_ram_async)
			
 
				+			{
			
 
				 				/* this is not associated to a request so it's synchronous */
			
 
				 				copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				req->async_channel.type = STARPU_OPENCL_RAM;
			
 
				 				ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
			
 
				 			}
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* we should not have a blocking call ! */
			
 
				 			STARPU_ABORT();
			
 
				 		}
			
@@ -215,11 +226,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 		/* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
			
 
				 		STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
			
 
				 		STARPU_ASSERT(copy_methods->ram_to_opencl);
			
 
				-		if (!req || !copy_methods->ram_to_opencl_async) {
			
 
				+		if (!req || !copy_methods->ram_to_opencl_async)
			
 
				+		{
			
 
				 			/* this is not associated to a request so it's synchronous */
			
 
				 			copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			req->async_channel.type = STARPU_OPENCL_RAM;
			
 
				 			ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
			
 
				 		}
			
@@ -266,12 +279,13 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 
				 	STARPU_ASSERT(dst_replicate->allocated);
			
 
				 	STARPU_ASSERT(dst_replicate->refcnt);
			
 
				 
			
 
				-	/* if there is no need to actually read the data, 
			
 
				+	/* if there is no need to actually read the data,
			
 
				 	 * we do not perform any transfer */
			
 
				-	if (!donotread) {
			
 
				+	if (!donotread)
			
 
				+	{
			
 
				 		size_t size = _starpu_data_get_size(handle);
			
 
				 		_starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size);
			
 
				-		
			
 
				+
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 		com_id = STARPU_ATOMIC_ADD(&communication_cnt, 1);
			
 
				 
			
@@ -303,34 +317,35 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 
				 	cudaError_t cures;
			
 
				 #endif
			
 
				 
			
 
				-	switch (kind) {
			
 
				+	switch (kind)
			
 
				+	{
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case STARPU_CUDA_RAM:
			
 
				-			event = (*async_channel).event.cuda_event;
			
 
				+	case STARPU_CUDA_RAM:
			
 
				+		event = (*async_channel).event.cuda_event;
			
 
				 
			
 
				-			cures = cudaEventSynchronize(event);
			
 
				-			if (STARPU_UNLIKELY(cures))
			
 
				-				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				+		cures = cudaEventSynchronize(event);
			
 
				+		if (STARPU_UNLIKELY(cures))
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				-			cures = cudaEventDestroy(event);
			
 
				-			if (STARPU_UNLIKELY(cures))
			
 
				-				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				+		cures = cudaEventDestroy(event);
			
 
				+		if (STARPU_UNLIKELY(cures))
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 
			
 
				-			break;
			
 
				+		break;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-      case STARPU_OPENCL_RAM:
			
 
				-         {
			
 
				-                 if ((*async_channel).event.opencl_event == NULL) STARPU_ABORT();
			
 
				-                 cl_int err = clWaitForEvents(1, &((*async_channel).event.opencl_event));
			
 
				-                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				-                 clReleaseEvent((*async_channel).event.opencl_event);
			
 
				-         }
			
 
				-         break;
			
 
				+	case STARPU_OPENCL_RAM:
			
 
				+	{
			
 
				+		if ((*async_channel).event.opencl_event == NULL) STARPU_ABORT();
			
 
				+		cl_int err = clWaitForEvents(1, &((*async_channel).event.opencl_event));
			
 
				+		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+		clReleaseEvent((*async_channel).event.opencl_event);
			
 
				+	      break;
			
 
				+	}
			
 
				 #endif
			
 
				-		case STARPU_CPU_RAM:
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				+	case STARPU_CPU_RAM:
			
 
				+	default:
			
 
				+		STARPU_ABORT();
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -342,36 +357,36 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 
				 	cudaEvent_t event;
			
 
				 #endif
			
 
				 
			
 
				-	switch (kind) {
			
 
				+	switch (kind)
			
 
				+	{
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				-		case STARPU_CUDA_RAM:
			
 
				-			event = (*async_channel).event.cuda_event;
			
 
				-			cudaError_t cures = cudaEventQuery(event);
			
 
				-
			
 
				-			success = (cures == cudaSuccess);
			
 
				-			if (success)
			
 
				-				cudaEventDestroy(event);
			
 
				-			else if (cures != cudaErrorNotReady)
			
 
				-				STARPU_CUDA_REPORT_ERROR(cures);
			
 
				-
			
 
				-			break;
			
 
				+	case STARPU_CUDA_RAM:
			
 
				+		event = (*async_channel).event.cuda_event;
			
 
				+		cudaError_t cures = cudaEventQuery(event);
			
 
				+
			
 
				+		success = (cures == cudaSuccess);
			
 
				+		if (success)
			
 
				+			cudaEventDestroy(event);
			
 
				+		else if (cures != cudaErrorNotReady)
			
 
				+			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				+		break;
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-      case STARPU_OPENCL_RAM:
			
 
				-         {
			
 
				-            cl_int event_status;
			
 
				-            cl_event opencl_event = (*async_channel).event.opencl_event;
			
 
				-            if (opencl_event == NULL) STARPU_ABORT();
			
 
				-            cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
			
 
				-            if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				-            success = (event_status == CL_COMPLETE);
			
 
				-            break;
			
 
				-         }
			
 
				+	case STARPU_OPENCL_RAM:
			
 
				+	{
			
 
				+		cl_int event_status;
			
 
				+		cl_event opencl_event = (*async_channel).event.opencl_event;
			
 
				+		if (opencl_event == NULL) STARPU_ABORT();
			
 
				+		cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
			
 
				+		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				+		success = (event_status == CL_COMPLETE);
			
 
				+		break;
			
 
				+	}
			
 
				 #endif
			
 
				-		case STARPU_CPU_RAM:
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-			success = 0;
			
 
				+	case STARPU_CPU_RAM:
			
 
				+	default:
			
 
				+		STARPU_ABORT();
			
 
				+		success = 0;
			
 
				 	}
			
 
				 
			
 
				 	return success;
			
--- a/src/datawizard/copy_driver.h
+++ b/src/datawizard/copy_driver.h
@@ -38,7 +38,8 @@ struct _starpu_data_replicate;
 
				 
			
 
				 /* this is a structure that can be queried to see whether an asynchronous
			
 
				  * transfer has terminated or not */
			
 
				-union _starpu_async_channel_event {
			
 
				+union _starpu_async_channel_event
			
 
				+{
			
 
				 	int dummy;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	cudaEvent_t cuda_event;
			
@@ -48,7 +49,8 @@ union _starpu_async_channel_event {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-struct _starpu_async_channel {
			
 
				+struct _starpu_async_channel
			
 
				+{
			
 
				 	union _starpu_async_channel_event event;
			
 
				 	enum _starpu_node_kind type;
			
 
				 };
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -41,7 +41,7 @@ void _starpu_init_data_request_lists(void)
 
				 
			
 
				 		data_requests_pending[i] = _starpu_data_request_list_new();
			
 
				 		_STARPU_PTHREAD_MUTEX_INIT(&data_requests_pending_list_mutex[i], NULL);
			
 
				-		
			
 
				+
			
 
				 		starpu_memstrategy_drop_prefetch[i]=0;
			
 
				 	}
			
 
				 }
			
@@ -72,7 +72,8 @@ static void starpu_data_request_destroy(struct _starpu_data_request *r)
 
				 	{
			
 
				 		node = r->src_replicate->memory_node;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		node = r->dst_replicate->memory_node;
			
 
				 	}
			
 
				 
			
@@ -119,7 +120,8 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha
 
				 		src_replicate->refcnt++;
			
 
				 		handle->busy_count++;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		unsigned dst_node = dst_replicate->memory_node;
			
 
				 		dst_replicate->request[dst_node] = r;
			
 
				 	}
			
@@ -138,7 +140,8 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 
				 
			
 
				 	uint32_t local_node = _starpu_get_local_memory_node();
			
 
				 
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		_starpu_spin_lock(&r->lock);
			
 
				 
			
 
				 		if (r->completed)
			
@@ -152,13 +155,14 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 
				 
			
 
				 		_starpu_datawizard_progress(local_node, may_alloc);
			
 
				 
			
 
				-	} while (1);
			
 
				+	}
			
 
				+	while (1);
			
 
				 
			
 
				 
			
 
				 	retval = r->retval;
			
 
				 	if (retval)
			
 
				 		_STARPU_DISP("REQUEST %p COMPLETED (retval %d) !\n", r, r->retval);
			
 
				-		
			
 
				+
			
 
				 
			
 
				 	r->refcnt--;
			
 
				 
			
@@ -167,10 +171,10 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 
				 		do_delete = 1;
			
 
				 
			
 
				 	_starpu_spin_unlock(&r->lock);
			
 
				-	
			
 
				+
			
 
				 	if (do_delete)
			
 
				 		starpu_data_request_destroy(r);
			
 
				-	
			
 
				+
			
 
				 	return retval;
			
 
				 }
			
 
				 
			
@@ -191,9 +195,9 @@ void _starpu_post_data_request(struct _starpu_data_request *r, uint32_t handling
 
				 
			
 
				 	/* insert the request in the proper list */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[handling_node]);
			
 
				-	if (r->prefetch) {
			
 
				+	if (r->prefetch)
			
 
				 		_starpu_data_request_list_push_back(prefetch_requests[handling_node], r);
			
 
				-	} else
			
 
				+	else
			
 
				 		_starpu_data_request_list_push_back(data_requests[handling_node], r);
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data_requests_list_mutex[handling_node]);
			
 
				 
			
@@ -240,12 +244,12 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 
				 	{
			
 
				 		if (old_src_replicate_state == STARPU_OWNER)
			
 
				 			_starpu_handle_stats_invalidated(handle, src_replicate->memory_node);
			
 
				-		else 
			
 
				+		else
			
 
				 		{
			
 
				 			/* XXX Currently only ex-OWNER are tagged as invalidated */
			
 
				 			/* XXX Have to check all old state of every node in case a SHARED data become OWNED by the dst_replicate */
			
 
				 		}
			
 
				-		
			
 
				+
			
 
				 	}
			
 
				 	if (dst_replicate->state == STARPU_SHARED)
			
 
				 		_starpu_handle_stats_loaded_shared(handle, dst_replicate->memory_node);
			
@@ -274,7 +278,7 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 
				 	}
			
 
				 
			
 
				 	r->completed = 1;
			
 
				-	
			
 
				+
			
 
				 	/* Remove a reference on the destination replicate  */
			
 
				 	STARPU_ASSERT(dst_replicate->refcnt > 0);
			
 
				 	dst_replicate->refcnt--;
			
@@ -297,12 +301,12 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 
				 	/* if nobody is waiting on that request, we can get rid of it */
			
 
				 	if (r->refcnt == 0)
			
 
				 		do_delete = 1;
			
 
				-	
			
 
				+
			
 
				 	r->retval = 0;
			
 
				 
			
 
				 	/* In case there are one or multiple callbacks, we execute them now. */
			
 
				 	struct _starpu_callback_list *callbacks = r->callbacks;
			
 
				-	
			
 
				+
			
 
				 	_starpu_spin_unlock(&r->lock);
			
 
				 
			
 
				 	if (do_delete)
			
@@ -426,7 +430,8 @@ void _starpu_handle_node_data_requests(uint32_t src_node, unsigned may_alloc)
 
				 	_starpu_data_request_list_delete(local_list);
			
 
				 }
			
 
				 
			
 
				-void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc){
			
 
				+void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc)
			
 
				+{
			
 
				 	starpu_memstrategy_drop_prefetch[src_node]=0;
			
 
				 
			
 
				 	struct _starpu_data_request *r;
			
@@ -437,7 +442,7 @@ void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc
 
				         _STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[src_node]);
			
 
				 
			
 
				 	struct _starpu_data_request_list *local_list = prefetch_requests[src_node];
			
 
				-	
			
 
				+
			
 
				 	if (_starpu_data_request_list_empty(local_list))
			
 
				 	{
			
 
				 		/* there is no request */
			
@@ -516,24 +521,26 @@ static void _handle_pending_node_data_requests(uint32_t src_node, unsigned force
 
				 		r = _starpu_data_request_list_pop_front(local_list);
			
 
				 
			
 
				 		starpu_data_handle_t handle = r->handle;
			
 
				-		
			
 
				+
			
 
				 		_starpu_spin_lock(&handle->header_lock);
			
 
				-	
			
 
				+
			
 
				 		_starpu_spin_lock(&r->lock);
			
 
				-	
			
 
				+
			
 
				 		/* wait until the transfer is terminated */
			
 
				 		if (force)
			
 
				 		{
			
 
				 			_starpu_driver_wait_request_completion(&r->async_channel);
			
 
				 			starpu_handle_data_request_completion(r);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			if (_starpu_driver_test_request_completion(&r->async_channel))
			
 
				 			{
			
 
				 				/* The request was completed */
			
 
				 				starpu_handle_data_request_completion(r);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				/* The request was not completed, so we put it
			
 
				 				 * back again on the list of pending requests
			
 
				 				 * so that it can be handled later on. */
			
@@ -572,10 +579,11 @@ int _starpu_check_that_no_data_request_exists(uint32_t node)
 
				 }
			
 
				 
			
 
				 
			
 
				-void _starpu_update_prefetch_status(struct _starpu_data_request *r){
			
 
				+void _starpu_update_prefetch_status(struct _starpu_data_request *r)
			
 
				+{
			
 
				 	STARPU_ASSERT(r->prefetch > 0);
			
 
				 	r->prefetch=0;
			
 
				-	
			
 
				+
			
 
				 	/* We have to promote chained_request too! */
			
 
				 	unsigned chained_req;
			
 
				 	for (chained_req = 0; chained_req < r->next_req_count; chained_req++)
			
@@ -586,7 +594,7 @@ void _starpu_update_prefetch_status(struct _starpu_data_request *r){
 
				 	}
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[r->handling_node]);
			
 
				-	
			
 
				+
			
 
				 	/* The request can be in a different list (handling request or the temp list)
			
 
				 	 * we have to check that it is really in the prefetch list. */
			
 
				 	struct _starpu_data_request *r_iter;
			
@@ -594,13 +602,13 @@ void _starpu_update_prefetch_status(struct _starpu_data_request *r){
 
				 	     r_iter != _starpu_data_request_list_end(prefetch_requests[r->handling_node]);
			
 
				 	     r_iter = _starpu_data_request_list_next(r_iter))
			
 
				 	{
			
 
				-		
			
 
				+
			
 
				 		if (r==r_iter)
			
 
				 		{
			
 
				 			_starpu_data_request_list_erase(prefetch_requests[r->handling_node],r);
			
 
				 			_starpu_data_request_list_push_front(data_requests[r->handling_node],r);
			
 
				 			break;
			
 
				-		}		
			
 
				+		}
			
 
				 	}
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data_requests_list_mutex[r->handling_node]);
			
 
				 }
			
--- a/src/datawizard/data_request.h
+++ b/src/datawizard/data_request.h
@@ -26,7 +26,8 @@
 
				 
			
 
				 struct _starpu_data_replicate;
			
 
				 
			
 
				-struct _starpu_callback_list {
			
 
				+struct _starpu_callback_list
			
 
				+{
			
 
				 	void (*callback_func)(void *);
			
 
				 	void *callback_arg;
			
 
				 	struct _starpu_callback_list *next;
			
--- a/src/datawizard/datastats.c
+++ b/src/datawizard/datastats.c
@@ -95,13 +95,13 @@ void _starpu_display_alloc_cache_stats(void)
 
				 #ifdef STARPU_DATA_STATS
			
 
				 	fprintf(stderr, "Allocation cache stats:\n");
			
 
				 	unsigned node;
			
 
				-	for (node = 0; node < STARPU_MAXNODES; node++) 
			
 
				+	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				-		if (alloc_cnt[node]) 
			
 
				+		if (alloc_cnt[node])
			
 
				 		{
			
 
				 			fprintf(stderr, "memory node %d\n", node);
			
 
				 			fprintf(stderr, "\ttotal alloc : %u\n", alloc_cnt[node]);
			
 
				-			fprintf(stderr, "\tcached alloc: %u (%2.2f \%%)\n", 
			
 
				+			fprintf(stderr, "\tcached alloc: %u (%2.2f \%%)\n",
			
 
				 				alloc_cache_hit_cnt[node], (100.0f*alloc_cache_hit_cnt[node])/(alloc_cnt[node]));
			
 
				 		}
			
 
				 	}
			
@@ -161,12 +161,12 @@ void _starpu_display_data_stats(void)
 
				 void _starpu_display_data_handle_stats(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				-	
			
 
				+
			
 
				 	fprintf(stderr, "#-----\n");
			
 
				 	fprintf(stderr, "Data : %p\n", handle);
			
 
				 	fprintf(stderr, "Size : %d\n", (int)handle->data_size);
			
 
				 	fprintf(stderr, "\n");
			
 
				-	
			
 
				+
			
 
				 	fprintf(stderr, "#--\n");
			
 
				 	fprintf(stderr, "Data access stats\n");
			
 
				 	fprintf(stderr, "/!\\ Work Underway\n");
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -32,7 +32,8 @@ static void map_filter(starpu_data_handle_t root_handle, struct starpu_data_filt
 
				 		/* this is a leaf */
			
 
				 		starpu_data_partition(root_handle, f);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* try to apply the data filter recursively */
			
 
				 		unsigned child;
			
 
				 		for (child = 0; child < root_handle->nchildren; child++)
			
@@ -94,7 +95,7 @@ starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_handle,
 
				 	starpu_data_handle_t current_handle = root_handle;
			
 
				 
			
 
				 	/* the variable number of argument must correlate the depth in the tree */
			
 
				-	unsigned i; 
			
 
				+	unsigned i;
			
 
				 	for (i = 0; i < depth; i++)
			
 
				 	{
			
 
				 		unsigned next_child;
			
@@ -185,7 +186,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
				 		unsigned node;
			
 
				 		for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 		{
			
 
				-			struct _starpu_data_replicate *initial_replicate; 
			
 
				+			struct _starpu_data_replicate *initial_replicate;
			
 
				 			struct _starpu_data_replicate *child_replicate;
			
 
				 
			
 
				 			initial_replicate = &initial_handle->per_node[node];
			
@@ -197,7 +198,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
				 			child_replicate->refcnt = 0;
			
 
				 			child_replicate->memory_node = node;
			
 
				 			child_replicate->relaxed_coherency = 0;
			
 
				-			
			
 
				+
			
 
				 			/* update the interface */
			
 
				 			void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node);
			
 
				 			void *child_interface = starpu_data_get_interface_on_node(child, node);
			
@@ -210,7 +211,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
				 		{
			
 
				 			struct _starpu_data_replicate *child_replicate;
			
 
				 			child_replicate = &child->per_worker[worker];
			
 
				-			
			
 
				+
			
 
				 			child_replicate->state = STARPU_INVALID;
			
 
				 			child_replicate->allocated = 0;
			
 
				 			child_replicate->automatically_allocated = 0;
			
@@ -264,11 +265,11 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
				 
			
 
				 		int ret;
			
 
				 		ret = _starpu_fetch_data_on_node(child_handle, &child_handle->per_node[gathering_node], STARPU_R, 0, NULL, NULL);
			
 
				-		/* for now we pretend that the RAM is almost unlimited and that gathering 
			
 
				+		/* for now we pretend that the RAM is almost unlimited and that gathering
			
 
				 		 * data should be possible from the node that does the unpartionning ... we
			
 
				 		 * don't want to have the programming deal with memory shortage at that time,
			
 
				 		 * really */
			
 
				-		STARPU_ASSERT(ret == 0); 
			
 
				+		STARPU_ASSERT(ret == 0);
			
 
				 
			
 
				 		_starpu_data_free_interfaces(&root_handle->children[child]);
			
 
				 		_starpu_data_requester_list_delete(child_handle->req_list);
			
@@ -278,7 +279,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
				 	/* the gathering_node should now have a valid copy of all the children.
			
 
				 	 * For all nodes, if the node had all copies and none was locally
			
 
				 	 * allocated then the data is still valid there, else, it's invalidated
			
 
				-	 * for the gathering node, if we have some locally allocated data, we 
			
 
				+	 * for the gathering node, if we have some locally allocated data, we
			
 
				 	 * copy all the children (XXX this should not happen so we just do not
			
 
				 	 * do anything since this is transparent ?) */
			
 
				 	unsigned still_valid[STARPU_MAXNODES];
			
@@ -299,9 +300,10 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
				 		{
			
 
				 			struct _starpu_data_replicate *local = &root_handle->children[child].per_node[node];
			
 
				 
			
 
				-			if (local->state == STARPU_INVALID) {
			
 
				+			if (local->state == STARPU_INVALID)
			
 
				+			{
			
 
				 				/* One of the bits is missing */
			
 
				-				isvalid = 0; 
			
 
				+				isvalid = 0;
			
 
				 			}
			
 
				 
			
 
				 			if (local->allocated && local->automatically_allocated)
			
@@ -331,7 +333,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
				 
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				-		root_handle->per_node[node].state = 
			
 
				+		root_handle->per_node[node].state =
			
 
				 			still_valid[node]?newstate:STARPU_INVALID;
			
 
				 	}
			
 
				 
			
@@ -358,15 +360,15 @@ static void starpu_data_create_children(starpu_data_handle_t handle, unsigned nc
 
				 	for (child = 0; child < nchildren; child++)
			
 
				 	{
			
 
				 		starpu_data_handle_t handle_child = &handle->children[child];
			
 
				-		
			
 
				+
			
 
				 		struct starpu_data_interface_ops *ops;
			
 
				-		
			
 
				+
			
 
				 		/* what's this child's interface ? */
			
 
				 		if (f->get_child_ops)
			
 
				 		  ops = f->get_child_ops(f, child);
			
 
				 		else
			
 
				 		  ops = handle->ops;
			
 
				-		
			
 
				+
			
 
				 		handle_child->ops = ops;
			
 
				 
			
 
				 		size_t interfacesize = ops->interface_size;
			
@@ -386,7 +388,7 @@ static void starpu_data_create_children(starpu_data_handle_t handle, unsigned nc
 
				 			STARPU_ASSERT(handle_child->per_worker[worker].data_interface);
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/* this handle now has children */
			
 
				 	handle->nchildren = nchildren;
			
 
				 }
			
--- a/src/datawizard/interfaces/bcsr_filters.c
+++ b/src/datawizard/interfaces/bcsr_filters.c
@@ -25,14 +25,14 @@ void starpu_canonical_block_filter_bcsr(void *father_interface, void *child_inte
 
				 	struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface;
			
 
				 	/* each chunk becomes a small dense matrix */
			
 
				 	struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
			
 
				-	
			
 
				+
			
 
				 	size_t elemsize = bcsr_father->elemsize;
			
 
				 	uint32_t firstentry = bcsr_father->firstentry;
			
 
				 
			
 
				 	/* size of the tiles */
			
 
				 	uint32_t r = bcsr_father->r;
			
 
				 	uint32_t c = bcsr_father->c;
			
 
				-	
			
 
				+
			
 
				 	uint32_t ptr_offset = c*r*id*elemsize;
			
 
				 
			
 
				 	matrix_child->nx = c;
			
@@ -40,8 +40,9 @@ void starpu_canonical_block_filter_bcsr(void *father_interface, void *child_inte
 
				 	matrix_child->ld = c;
			
 
				 	matrix_child->elemsize = elemsize;
			
 
				 
			
 
				-	if (bcsr_father->nzval) {
			
 
				-	  uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
			
 
				-	  matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
			
 
				+	if (bcsr_father->nzval)
			
 
				+	{
			
 
				+		uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
			
 
				+		matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -42,7 +42,8 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTR
 
				 static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods bcsr_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods bcsr_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -68,7 +69,8 @@ static int bcsr_compare(void *data_interface_a, void *data_interface_b);
 
				 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle);
			
 
				 
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_bcsr_ops = {
			
 
				+static struct starpu_data_interface_ops interface_bcsr_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_bcsr_handle,
			
 
				 	.allocate_data_on_node = allocate_bcsr_buffer_on_node,
			
 
				 	.free_data_on_node = free_bcsr_buffer_on_node,
			
@@ -90,12 +92,14 @@ static void register_bcsr_handle(starpu_data_handle_t handle, uint32_t home_node
 
				 		struct starpu_bcsr_interface *local_interface = (struct starpu_bcsr_interface *)
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->nzval = bcsr_interface->nzval;
			
 
				 			local_interface->colind = bcsr_interface->colind;
			
 
				 			local_interface->rowptr = bcsr_interface->rowptr;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->nzval = 0;
			
 
				 			local_interface->colind = NULL;
			
 
				 			local_interface->rowptr = NULL;
			
@@ -115,7 +119,8 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, uint32_t home_no
 
				 		uint32_t *rowptr, uint32_t firstentry,
			
 
				 		uint32_t r, uint32_t c, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_bcsr_interface bcsr_interface = {
			
 
				+	struct starpu_bcsr_interface bcsr_interface =
			
 
				+	{
			
 
				 		.nzval = nzval,
			
 
				 		.colind = colind,
			
 
				 		.rowptr = rowptr,
			
@@ -212,7 +217,7 @@ uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle)
 
				 
			
 
				 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
 
				-	
			
 
				+
			
 
				 	return data_interface->nzval;
			
 
				 }
			
 
				 
			
@@ -245,7 +250,7 @@ static size_t bcsr_interface_get_size(starpu_data_handle_t handle)
 
				 	uint32_t c = starpu_bcsr_get_c(handle);
			
 
				 	size_t elemsize = starpu_bcsr_get_elemsize(handle);
			
 
				 
			
 
				-	size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); 
			
 
				+	size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
			
 
				 
			
 
				 	return size;
			
 
				 }
			
@@ -272,7 +277,8 @@ static ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, uint32_t dst_
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			addr_nzval = (uintptr_t)malloc(nnz*r*c*elemsize);
			
 
				 			if (!addr_nzval)
			
@@ -305,42 +311,43 @@ static ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, uint32_t dst_
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 		case STARPU_OPENCL_RAM:
			
 
				-                        {
			
 
				-                                int ret;
			
 
				-                                void *ptr;
			
 
				+		{
			
 
				+			int ret;
			
 
				+			void *ptr;
			
 
				 
			
 
				-                                ret = _starpu_opencl_allocate_memory(&ptr, nnz*r*c*elemsize, CL_MEM_READ_WRITE);
			
 
				-                                addr_nzval = (uintptr_t)ptr;
			
 
				-                                if (ret) goto fail_nzval;
			
 
				+			ret = _starpu_opencl_allocate_memory(&ptr, nnz*r*c*elemsize, CL_MEM_READ_WRITE);
			
 
				+			addr_nzval = (uintptr_t)ptr;
			
 
				+			if (ret) goto fail_nzval;
			
 
				 
			
 
				-                                ret = _starpu_opencl_allocate_memory(&ptr, nnz*sizeof(uint32_t), CL_MEM_READ_WRITE);
			
 
				-                                addr_colind = ptr;
			
 
				-				if (ret) goto fail_colind;
			
 
				+			ret = _starpu_opencl_allocate_memory(&ptr, nnz*sizeof(uint32_t), CL_MEM_READ_WRITE);
			
 
				+			addr_colind = ptr;
			
 
				+			if (ret) goto fail_colind;
			
 
				 
			
 
				-                                ret = _starpu_opencl_allocate_memory(&ptr, (nrow+1)*sizeof(uint32_t), CL_MEM_READ_WRITE);
			
 
				-                                addr_rowptr = ptr;
			
 
				-				if (ret) goto fail_rowptr;
			
 
				+			ret = _starpu_opencl_allocate_memory(&ptr, (nrow+1)*sizeof(uint32_t), CL_MEM_READ_WRITE);
			
 
				+			addr_rowptr = ptr;
			
 
				+			if (ret) goto fail_rowptr;
			
 
				 
			
 
				-                                break;
			
 
				-                        }
			
 
				+			break;
			
 
				+		}
			
 
				 #endif
			
 
				 		default:
			
 
				 			assert(0);
			
 
				 	}
			
 
				 
			
 
				 	/* allocation succeeded */
			
 
				-	allocated_memory = 
			
 
				+	allocated_memory =
			
 
				 		nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
			
 
				 
			
 
				 	/* update the data properly in consequence */
			
 
				 	bcsr_interface->nzval = addr_nzval;
			
 
				 	bcsr_interface->colind = addr_colind;
			
 
				 	bcsr_interface->rowptr = addr_rowptr;
			
 
				-	
			
 
				+
			
 
				 	return allocated_memory;
			
 
				 
			
 
				 fail_rowptr:
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void *)addr_colind);
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -358,7 +365,8 @@ fail_rowptr:
 
				 	}
			
 
				 
			
 
				 fail_colind:
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void *)addr_nzval);
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -386,7 +394,8 @@ static void free_bcsr_buffer_on_node(void *data_interface, uint32_t node)
 
				 	struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface;
			
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void*)bcsr_interface->nzval);
			
 
				 			free((void*)bcsr_interface->colind);
			
--- a/src/datawizard/interfaces/block_filters.c
+++ b/src/datawizard/interfaces/block_filters.c
@@ -41,7 +41,8 @@ void starpu_block_filter_func_block(void *father_interface, void *child_interfac
 
				 	block_child->nz = nz;
			
 
				 	block_child->elemsize = elemsize;
			
 
				 
			
 
				-	if (block_father->ptr) {
			
 
				+	if (block_father->ptr)
			
 
				+	{
			
 
				                 block_child->ptr = block_father->ptr + offset;
			
 
				                 block_child->ldy = block_father->ldy;
			
 
				                 block_child->ldz = block_father->ldz;
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -42,7 +42,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods block_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods block_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -77,7 +78,8 @@ static void display_block_interface(starpu_data_handle_t handle, FILE *f);
 
				 static int convert_block_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_block_ops = {
			
 
				+static struct starpu_data_interface_ops interface_block_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_block_handle,
			
 
				 	.allocate_data_on_node = allocate_block_buffer_on_node,
			
 
				 	.handle_to_pointer = block_handle_to_pointer,
			
@@ -89,13 +91,13 @@ static struct starpu_data_interface_ops interface_block_ops = {
 
				 #ifdef STARPU_USE_GORDON
			
 
				 	.convert_to_gordon = convert_block_to_gordon,
			
 
				 #endif
			
 
				-	.interfaceid = STARPU_BLOCK_INTERFACE_ID, 
			
 
				+	.interfaceid = STARPU_BLOCK_INTERFACE_ID,
			
 
				 	.interface_size = sizeof(struct starpu_block_interface),
			
 
				 	.display = display_block_interface
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-int convert_block_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
			
 
				+int convert_block_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
			
 
				 {
			
 
				 	/* TODO */
			
 
				 	STARPU_ABORT();
			
@@ -124,14 +126,16 @@ static void register_block_handle(starpu_data_handle_t handle, uint32_t home_nod
 
				 		struct starpu_block_interface *local_interface = (struct starpu_block_interface *)
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->ptr = block_interface->ptr;
			
 
				                         local_interface->dev_handle = block_interface->dev_handle;
			
 
				                         local_interface->offset = block_interface->offset;
			
 
				 			local_interface->ldy  = block_interface->ldy;
			
 
				 			local_interface->ldz  = block_interface->ldz;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->ptr = 0;
			
 
				                         local_interface->dev_handle = 0;
			
 
				                         local_interface->offset = 0;
			
@@ -151,7 +155,8 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, uint32_t home_n
 
				 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
			
 
				 			uint32_t ny, uint32_t nz, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_block_interface block_interface = {
			
 
				+	struct starpu_block_interface block_interface =
			
 
				+	{
			
 
				 		.ptr = ptr,
			
 
				                 .dev_handle = ptr,
			
 
				                 .offset = 0,
			
@@ -205,7 +210,7 @@ static size_t block_interface_get_size(starpu_data_handle_t handle)
 
				 
			
 
				 	block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				-	size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize; 
			
 
				+	size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize;
			
 
				 
			
 
				 	return size;
			
 
				 }
			
@@ -241,7 +246,7 @@ uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle)
 
				 	node = _starpu_get_local_memory_node();
			
 
				 
			
 
				 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				-	
			
 
				+
			
 
				 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
@@ -304,10 +309,11 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			addr = (uintptr_t)malloc(nx*ny*nz*elemsize);
			
 
				-			if (!addr) 
			
 
				+			if (!addr)
			
 
				 				fail = 1;
			
 
				 
			
 
				 			break;
			
@@ -334,7 +340,8 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 
				                                 void *ptr;
			
 
				                                 ret = _starpu_opencl_allocate_memory(&ptr, nx*ny*nz*elemsize, CL_MEM_READ_WRITE);
			
 
				                                 addr = (uintptr_t)ptr;
			
 
				-				if (ret) {
			
 
				+				if (ret)
			
 
				+				{
			
 
				 					fail = 1;
			
 
				 				}
			
 
				 				break;
			
@@ -344,7 +351,8 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 
				 			assert(0);
			
 
				 	}
			
 
				 
			
 
				-	if (!fail) {
			
 
				+	if (!fail)
			
 
				+	{
			
 
				 		/* allocation succeeded */
			
 
				 		allocated_memory = nx*ny*nz*elemsize;
			
 
				 
			
@@ -354,11 +362,13 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 
				                 dst_block->offset = 0;
			
 
				 		dst_block->ldy = nx;
			
 
				 		dst_block->ldz = nx*ny;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* allocation failed */
			
 
				 		allocated_memory = -ENOMEM;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return allocated_memory;
			
 
				 }
			
 
				 
			
@@ -371,7 +381,8 @@ static void free_block_buffer_on_node(void *data_interface, uint32_t node)
 
				 #endif
			
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void*)block_interface->ptr);
			
 
				 			break;
			
@@ -416,7 +427,8 @@ static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIB
 
				                         if (STARPU_UNLIKELY(cures))
			
 
				                                 STARPU_CUDA_REPORT_ERROR(cures);
			
 
				                 }
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* Are all plans contiguous */
			
 
				                         cures = cudaMemcpy2D((char *)dst_block->ptr, dst_block->ldz*elemsize,
			
 
				                                              (char *)src_block->ptr, src_block->ldz*elemsize,
			
@@ -425,7 +437,8 @@ static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIB
 
				                                 STARPU_CUDA_REPORT_ERROR(cures);
			
 
				                 }
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* Default case: we transfer all lines one by one: ny*nz transfers */
			
 
				 		unsigned layer;
			
 
				 		for (layer = 0; layer < src_block->nz; layer++)
			
@@ -481,12 +494,14 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
				 
			
 
				 				ret = 0;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				ret = -EAGAIN;
			
 
				 			}
			
 
				-			
			
 
				+
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* Are all plans contiguous */
			
 
				 			_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
			
 
				 			cures = cudaMemcpy2DAsync((char *)dst_block->ptr, dst_block->ldz*elemsize,
			
@@ -503,12 +518,14 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
				 
			
 
				 				ret = 0;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				ret = -EAGAIN;
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* Default case: we transfer all lines one by one: ny*nz transfers */
			
 
				 		unsigned layer;
			
 
				 		for (layer = 0; layer < src_block->nz; layer++)
			
@@ -609,19 +626,22 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				                         if (STARPU_UNLIKELY(err))
			
 
				                                 STARPU_OPENCL_REPORT_ERROR(err);
			
 
				                 }
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* Are all plans contiguous */
			
 
				                         /* XXX non contiguous buffers are not properly supported yet. (TODO) */
			
 
				                         STARPU_ASSERT(0);
			
 
				                 }
			
 
				         }
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* Default case: we transfer all lines one by one: ny*nz transfers */
			
 
				 		unsigned layer;
			
 
				 		for (layer = 0; layer < src_block->nz; layer++)
			
 
				 		{
			
 
				                         unsigned j;
			
 
				-                        for(j=0 ; j<src_block->ny ; j++) {
			
 
				+                        for(j=0 ; j<src_block->ny ; j++)
			
 
				+			{
			
 
				                                 void *ptr = (void*)src_block->ptr+(layer*src_block->ldz*src_block->elemsize)+(j*src_block->ldy*src_block->elemsize);
			
 
				                                 err = _starpu_opencl_copy_ram_to_opencl(ptr, src_node, (cl_mem)dst_block->dev_handle, dst_node,
			
 
				                                                                         src_block->nx*src_block->elemsize,
			
@@ -673,20 +693,23 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
 
				                         if (STARPU_UNLIKELY(err))
			
 
				                                 STARPU_OPENCL_REPORT_ERROR(err);
			
 
				                 }
			
 
				-                else {
			
 
				+                else
			
 
				+		{
			
 
				 			/* Are all plans contiguous */
			
 
				                         /* XXX non contiguous buffers are not properly supported yet. (TODO) */
			
 
				                         STARPU_ASSERT(0);
			
 
				                 }
			
 
				         }
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* Default case: we transfer all lines one by one: ny*nz transfers */
			
 
				                 /* XXX non contiguous buffers are not properly supported yet. (TODO) */
			
 
				 		unsigned layer;
			
 
				 		for (layer = 0; layer < src_block->nz; layer++)
			
 
				 		{
			
 
				                         unsigned j;
			
 
				-                        for(j=0 ; j<src_block->ny ; j++) {
			
 
				+                        for(j=0 ; j<src_block->ny ; j++)
			
 
				+			{
			
 
				                                 void *ptr = (void *)dst_block->ptr+(layer*dst_block->ldz*dst_block->elemsize)+(j*dst_block->ldy*dst_block->elemsize);
			
 
				                                 err = _starpu_opencl_copy_opencl_to_ram((void*)src_block->dev_handle, src_node, ptr, dst_node,
			
 
				                                                                         src_block->nx*src_block->elemsize,
			
@@ -753,7 +776,7 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 
				 		uint32_t src_offset = (y*ldy_src + y*z*ldz_src)*elemsize;
			
 
				 		uint32_t dst_offset = (y*ldy_dst + y*z*ldz_dst)*elemsize;
			
 
				 
			
 
				-		memcpy((void *)(ptr_dst + dst_offset), 
			
 
				+		memcpy((void *)(ptr_dst + dst_offset),
			
 
				 			(void *)(ptr_src + src_offset), nx*elemsize);
			
 
				 	}
			
 
				 
			
--- a/src/datawizard/interfaces/csr_filters.c
+++ b/src/datawizard/interfaces/csr_filters.c
@@ -35,20 +35,21 @@ void starpu_vertical_block_filter_func_csr(void *father_interface, void *child_i
 
				 
			
 
				 	uint32_t first_index = id*chunk_size - firstentry;
			
 
				 	uint32_t local_firstentry = rowptr[first_index];
			
 
				-	
			
 
				-	uint32_t child_nrow = 
			
 
				+
			
 
				+	uint32_t child_nrow =
			
 
				 	  STARPU_MIN(chunk_size, nrow - id*chunk_size);
			
 
				-	
			
 
				-	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index]; 
			
 
				-	
			
 
				+
			
 
				+	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index];
			
 
				+
			
 
				 	csr_child->nnz = local_nnz;
			
 
				 	csr_child->nrow = child_nrow;
			
 
				 	csr_child->firstentry = local_firstentry;
			
 
				 	csr_child->elemsize = elemsize;
			
 
				-	
			
 
				-	if (csr_father->nzval) {
			
 
				-	  csr_child->rowptr = &csr_father->rowptr[first_index];
			
 
				-	  csr_child->colind = &csr_father->colind[local_firstentry];
			
 
				-	  csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
			
 
				+
			
 
				+	if (csr_father->nzval)
			
 
				+	{
			
 
				+		csr_child->rowptr = &csr_father->rowptr[first_index];
			
 
				+		csr_child->colind = &csr_father->colind[local_firstentry];
			
 
				+		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -42,7 +42,8 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_
 
				 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods csr_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods csr_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -70,7 +71,8 @@ static size_t csr_interface_get_size(starpu_data_handle_t handle);
 
				 static int csr_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_csr_ops = {
			
 
				+static struct starpu_data_interface_ops interface_csr_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_csr_handle,
			
 
				 	.allocate_data_on_node = allocate_csr_buffer_on_node,
			
 
				 	.free_data_on_node = free_csr_buffer_on_node,
			
@@ -92,11 +94,13 @@ static void register_csr_handle(starpu_data_handle_t handle, uint32_t home_node,
 
				 		struct starpu_csr_interface *local_interface = (struct starpu_csr_interface *)
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->nzval = csr_interface->nzval;
			
 
				 			local_interface->colind = csr_interface->colind;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->nzval = 0;
			
 
				 			local_interface->colind = NULL;
			
 
				 		}
			
@@ -114,7 +118,8 @@ static void register_csr_handle(starpu_data_handle_t handle, uint32_t home_node,
 
				 void starpu_csr_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
			
 
				 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_csr_interface csr_interface = {
			
 
				+	struct starpu_csr_interface csr_interface =
			
 
				+	{
			
 
				 		.nnz = nnz,
			
 
				 		.nrow = nrow,
			
 
				 		.nzval = nzval,
			
@@ -246,7 +251,8 @@ static ssize_t allocate_csr_buffer_on_node(void *data_interface_, uint32_t dst_n
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			addr_nzval = (uintptr_t)malloc(nnz*elemsize);
			
 
				 			if (!addr_nzval)
			
@@ -303,18 +309,19 @@ static ssize_t allocate_csr_buffer_on_node(void *data_interface_, uint32_t dst_n
 
				 	}
			
 
				 
			
 
				 	/* allocation succeeded */
			
 
				-	allocated_memory = 
			
 
				+	allocated_memory =
			
 
				 		nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
			
 
				 
			
 
				 	/* update the data properly in consequence */
			
 
				 	csr_interface->nzval = addr_nzval;
			
 
				 	csr_interface->colind = addr_colind;
			
 
				 	csr_interface->rowptr = addr_rowptr;
			
 
				-	
			
 
				+
			
 
				 	return allocated_memory;
			
 
				 
			
 
				 fail_rowptr:
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void *)addr_colind);
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -332,7 +339,8 @@ fail_rowptr:
 
				 	}
			
 
				 
			
 
				 fail_colind:
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void *)addr_nzval);
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -360,7 +368,8 @@ static void free_csr_buffer_on_node(void *data_interface, uint32_t node)
 
				 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface;
			
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void*)csr_interface->nzval);
			
 
				 			free((void*)csr_interface->colind);
			
@@ -465,13 +474,14 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				 			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	if (synchronous_fallback)
			
 
				 	{
			
 
				 		_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
			
 
				 		return 0;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
			
 
				 		return -EAGAIN;
			
 
				 	}
			
@@ -569,13 +579,14 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
 
				 		if (STARPU_UNLIKELY(cures))
			
 
				 			STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	if (synchronous_fallback)
			
 
				 	{
			
 
				 		_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
			
 
				 		return 0;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
			
 
				 		return -EAGAIN;
			
 
				 	}
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -45,7 +45,8 @@ void _starpu_data_interface_shutdown()
 
				 
			
 
				 	_starpu_spin_destroy(&registered_handles_lock);
			
 
				 
			
 
				-	HASH_ITER(hh, registered_handles, entry, tmp) {
			
 
				+	HASH_ITER(hh, registered_handles, entry, tmp)
			
 
				+	{
			
 
				 		HASH_DEL(registered_handles, entry);
			
 
				 		free(entry);
			
 
				 	}
			
@@ -89,7 +90,7 @@ starpu_data_handle_t starpu_data_lookup(const void *ptr)
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				-/* 
			
 
				+/*
			
 
				  * Start monitoring a piece of data
			
 
				  */
			
 
				 
			
@@ -162,18 +163,20 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
				 	{
			
 
				 		struct _starpu_data_replicate *replicate;
			
 
				 		replicate = &handle->per_node[node];
			
 
				-		
			
 
				+
			
 
				 		replicate->memory_node = node;
			
 
				 		replicate->relaxed_coherency = 0;
			
 
				 		replicate->refcnt = 0;
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			/* this is the home node with the only valid copy */
			
 
				 			replicate->state = STARPU_OWNER;
			
 
				 			replicate->allocated = 1;
			
 
				 			replicate->automatically_allocated = 0;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* the value is not available here yet */
			
 
				 			replicate->state = STARPU_INVALID;
			
 
				 			replicate->allocated = 0;
			
@@ -323,7 +326,7 @@ int starpu_data_set_tag(starpu_data_handle_t handle, int tag)
 
				         return 0;
			
 
				 }
			
 
				 
			
 
				-/* 
			
 
				+/*
			
 
				  * Stop monitoring a piece of data
			
 
				  */
			
 
				 
			
@@ -360,20 +363,22 @@ void _starpu_data_free_interfaces(starpu_data_handle_t handle)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct _starpu_unregister_callback_arg {
			
 
				+struct _starpu_unregister_callback_arg
			
 
				+{
			
 
				 	unsigned memory_node;
			
 
				 	starpu_data_handle_t handle;
			
 
				 	unsigned terminated;
			
 
				 	pthread_mutex_t mutex;
			
 
				 	pthread_cond_t cond;
			
 
				-}; 
			
 
				+};
			
 
				 
			
 
				 /* Check whether we should tell starpu_data_unregister that the data handle is
			
 
				  * not busy any more.
			
 
				  * The header is supposed to be locked */
			
 
				 void _starpu_data_check_not_busy(starpu_data_handle_t handle)
			
 
				 {
			
 
				-	if (!handle->busy_count && handle->busy_waiting) {
			
 
				+	if (!handle->busy_count && handle->busy_waiting)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
			
 
				 		_STARPU_PTHREAD_COND_BROADCAST(&handle->busy_cond);
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
			
@@ -393,7 +398,7 @@ static void _starpu_data_unregister_fetch_data_callback(void *_arg)
 
				 
			
 
				 	ret = _starpu_fetch_data_on_node(handle, replicate, STARPU_R, 0, NULL, NULL);
			
 
				 	STARPU_ASSERT(!ret);
			
 
				-	
			
 
				+
			
 
				 	/* unlock the caller */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&arg->mutex);
			
 
				 	arg->terminated = 1;
			
@@ -414,7 +419,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 
			
 
				 		/* Fetch data in the home of the data to ensure we have a valid copy
			
 
				 		 * where we registered it */
			
 
				-		int home_node = handle->home_node; 
			
 
				+		int home_node = handle->home_node;
			
 
				 		if (home_node >= 0)
			
 
				 		{
			
 
				 			struct _starpu_unregister_callback_arg arg;
			
@@ -423,7 +428,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 			arg.terminated = 0;
			
 
				 			_STARPU_PTHREAD_MUTEX_INIT(&arg.mutex, NULL);
			
 
				 			_STARPU_PTHREAD_COND_INIT(&arg.cond, NULL);
			
 
				-	
			
 
				+
			
 
				 			if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R,
			
 
				 					_starpu_data_unregister_fetch_data_callback, &arg))
			
 
				 			{
			
@@ -432,7 +437,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 				int ret = _starpu_fetch_data_on_node(handle, home_replicate, STARPU_R, 0, NULL, NULL);
			
 
				 				STARPU_ASSERT(!ret);
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				_STARPU_PTHREAD_MUTEX_LOCK(&arg.mutex);
			
 
				 				while (!arg.terminated)
			
 
				 					_STARPU_PTHREAD_COND_WAIT(&arg.cond, &arg.mutex);
			
@@ -441,7 +447,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 			_starpu_release_data_on_node(handle, 0, &handle->per_node[home_node]);
			
 
				 		}
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* Should we postpone the unregister operation ? */
			
 
				 		if ((handle->refcnt > 0) && handle->lazy_unregister)
			
 
				 			return;
			
@@ -467,7 +474,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 	{
			
 
				 		struct _starpu_data_replicate *local = &handle->per_node[node];
			
 
				 
			
 
				-		if (local->allocated && local->automatically_allocated){
			
 
				+		if (local->allocated && local->automatically_allocated)
			
 
				+		{
			
 
				 			/* free the data copy in a lazy fashion */
			
 
				 			_starpu_request_mem_chunk_removal(handle, node);
			
 
				 		}
			
@@ -502,12 +510,13 @@ void starpu_data_invalidate(starpu_data_handle_t handle)
 
				 	{
			
 
				 		struct _starpu_data_replicate *local = &handle->per_node[node];
			
 
				 
			
 
				-		if (local->allocated && local->automatically_allocated){
			
 
				+		if (local->allocated && local->automatically_allocated)
			
 
				+		{
			
 
				 			/* free the data copy in a lazy fashion */
			
 
				 			_starpu_request_mem_chunk_removal(handle, node);
			
 
				 		}
			
 
				 
			
 
				-		local->state = STARPU_INVALID; 
			
 
				+		local->state = STARPU_INVALID;
			
 
				 	}
			
 
				 
			
 
				 	_starpu_spin_unlock(&handle->header_lock);
			
--- a/src/datawizard/interfaces/matrix_filters.c
+++ b/src/datawizard/interfaces/matrix_filters.c
@@ -25,9 +25,9 @@
 
				  */
			
 
				 void starpu_block_filter_func(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks)
			
 
				 {
			
 
				-       struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface;
			
 
				-       struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
			
 
				-  
			
 
				+	struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface;
			
 
				+	struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
			
 
				+
			
 
				 	uint32_t nx = matrix_father->nx;
			
 
				 	uint32_t ny = matrix_father->ny;
			
 
				 	size_t elemsize = matrix_father->elemsize;
			
@@ -36,21 +36,22 @@ void starpu_block_filter_func(void *father_interface, void *child_interface, STA
 
				 
			
 
				 	size_t chunk_size = ((size_t)nx + nchunks - 1)/nchunks;
			
 
				 	size_t offset = (size_t)id*chunk_size*elemsize;
			
 
				-	
			
 
				-	uint32_t child_nx = 
			
 
				+
			
 
				+	uint32_t child_nx =
			
 
				 	  STARPU_MIN(chunk_size, (size_t)nx - (size_t)id*chunk_size);
			
 
				-	
			
 
				+
			
 
				 	/* update the child's interface */
			
 
				 	matrix_child->nx = child_nx;
			
 
				 	matrix_child->ny = ny;
			
 
				 	matrix_child->elemsize = elemsize;
			
 
				-	
			
 
				+
			
 
				 	/* is the information on this node valid ? */
			
 
				-	if (matrix_father->ptr) {
			
 
				-	  matrix_child->ptr = matrix_father->ptr + offset;
			
 
				-	  matrix_child->ld = matrix_father->ld;
			
 
				-	  matrix_child->dev_handle = matrix_father->dev_handle;
			
 
				-	  matrix_child->offset = matrix_father->offset + offset;
			
 
				+	if (matrix_father->ptr)
			
 
				+	{
			
 
				+		matrix_child->ptr = matrix_father->ptr + offset;
			
 
				+		matrix_child->ld = matrix_father->ld;
			
 
				+		matrix_child->dev_handle = matrix_father->dev_handle;
			
 
				+		matrix_child->offset = matrix_father->offset + offset;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -66,7 +67,7 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 
				 	STARPU_ASSERT(nchunks <= ny);
			
 
				 
			
 
				 	size_t chunk_size = ((size_t)ny + nchunks - 1)/nchunks;
			
 
				-	size_t child_ny = 
			
 
				+	size_t child_ny =
			
 
				 	  STARPU_MIN(chunk_size, (size_t)ny - (size_t)id*chunk_size);
			
 
				 
			
 
				 	matrix_child->nx = nx;
			
@@ -74,12 +75,12 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 
				 	matrix_child->elemsize = elemsize;
			
 
				 
			
 
				 	/* is the information on this node valid ? */
			
 
				-	if (matrix_father->ptr) {
			
 
				-	  size_t offset = 
			
 
				-	    (size_t)id*chunk_size*matrix_father->ld*elemsize;
			
 
				-	  matrix_child->ptr = matrix_father->ptr + offset;
			
 
				-	  matrix_child->ld = matrix_father->ld;
			
 
				-	  matrix_child->dev_handle = matrix_father->dev_handle;
			
 
				-	  matrix_child->offset = matrix_father->offset + offset;
			
 
				+	if (matrix_father->ptr)
			
 
				+	{
			
 
				+		size_t offset = (size_t)id*chunk_size*matrix_father->ld*elemsize;
			
 
				+		matrix_child->ptr = matrix_father->ptr + offset;
			
 
				+		matrix_child->ld = matrix_father->ld;
			
 
				+		matrix_child->dev_handle = matrix_father->dev_handle;
			
 
				+		matrix_child->offset = matrix_father->offset + offset;
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -46,7 +46,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods matrix_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -80,10 +81,11 @@ static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle);
 
				 static int matrix_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_matrix_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				+static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				 #endif
			
 
				 
			
 
				-struct starpu_data_interface_ops _starpu_interface_matrix_ops = {
			
 
				+struct starpu_data_interface_ops _starpu_interface_matrix_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_matrix_handle,
			
 
				 	.allocate_data_on_node = allocate_matrix_buffer_on_node,
			
 
				 	.handle_to_pointer = matrix_handle_to_pointer,
			
@@ -95,13 +97,13 @@ struct starpu_data_interface_ops _starpu_interface_matrix_ops = {
 
				 #ifdef STARPU_USE_GORDON
			
 
				 	.convert_to_gordon = convert_matrix_to_gordon,
			
 
				 #endif
			
 
				-	.interfaceid = STARPU_MATRIX_INTERFACE_ID, 
			
 
				+	.interfaceid = STARPU_MATRIX_INTERFACE_ID,
			
 
				 	.interface_size = sizeof(struct starpu_matrix_interface),
			
 
				 	.display = display_matrix_interface
			
 
				 };
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
			
 
				+static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
			
 
				 {
			
 
				 	size_t elemsize = GET_MATRIX_ELEMSIZE(interface);
			
 
				 	uint32_t nx = STARPU_MATRIX_GET_NX(interface);
			
@@ -128,13 +130,15 @@ static void register_matrix_handle(starpu_data_handle_t handle, uint32_t home_no
 
				 		struct starpu_matrix_interface *local_interface = (struct starpu_matrix_interface *)
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->ptr = matrix_interface->ptr;
			
 
				                         local_interface->dev_handle = matrix_interface->dev_handle;
			
 
				                         local_interface->offset = matrix_interface->offset;
			
 
				 			local_interface->ld  = matrix_interface->ld;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->ptr = 0;
			
 
				 			local_interface->dev_handle = 0;
			
 
				 			local_interface->offset = 0;
			
@@ -163,7 +167,8 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, uint32_t home_
 
				 			uintptr_t ptr, uint32_t ld, uint32_t nx,
			
 
				 			uint32_t ny, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_matrix_interface matrix_interface = {
			
 
				+	struct starpu_matrix_interface matrix_interface =
			
 
				+	{
			
 
				 		.ptr = ptr,
			
 
				 		.ld = ld,
			
 
				 		.nx = nx,
			
@@ -206,7 +211,7 @@ static size_t matrix_interface_get_size(starpu_data_handle_t handle)
 
				 		starpu_data_get_interface_on_node(handle, 0);
			
 
				 
			
 
				 	size_t size;
			
 
				-	size = (size_t)matrix_interface->nx*matrix_interface->ny*matrix_interface->elemsize; 
			
 
				+	size = (size_t)matrix_interface->nx*matrix_interface->ny*matrix_interface->elemsize;
			
 
				 
			
 
				 	return size;
			
 
				 }
			
@@ -284,10 +289,11 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			addr = (uintptr_t)malloc((size_t)nx*ny*elemsize);
			
 
				-			if (!addr) 
			
 
				+			if (!addr)
			
 
				 				fail = 1;
			
 
				 
			
 
				 			break;
			
@@ -298,7 +304,7 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
				 			{
			
 
				 				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
			
 
				 					 STARPU_CUDA_REPORT_ERROR(status);
			
 
				-					
			
 
				+
			
 
				 				fail = 1;
			
 
				 			}
			
 
				 
			
@@ -313,7 +319,8 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
				                                 void *ptr;
			
 
				                                 ret = _starpu_opencl_allocate_memory(&ptr, nx*ny*elemsize, CL_MEM_READ_WRITE);
			
 
				                                 addr = (uintptr_t)ptr;
			
 
				-				if (ret) {
			
 
				+				if (ret)
			
 
				+				{
			
 
				 					fail = 1;
			
 
				 				}
			
 
				 				break;
			
@@ -323,7 +330,8 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
				 			assert(0);
			
 
				 	}
			
 
				 
			
 
				-	if (!fail) {
			
 
				+	if (!fail)
			
 
				+	{
			
 
				 		/* allocation succeeded */
			
 
				 		allocated_memory = (size_t)nx*ny*elemsize;
			
 
				 
			
@@ -332,11 +340,13 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
				                 matrix_interface->dev_handle = addr;
			
 
				                 matrix_interface->offset = 0;
			
 
				 		matrix_interface->ld = ld;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* allocation failed */
			
 
				 		allocated_memory = -ENOMEM;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return allocated_memory;
			
 
				 }
			
 
				 
			
@@ -349,13 +359,14 @@ static void free_matrix_buffer_on_node(void *data_interface, uint32_t node)
 
				 #endif
			
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void*)matrix_interface->ptr);
			
 
				 			break;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case STARPU_CUDA_RAM:
			
 
				-			status = cudaFree((void*)matrix_interface->ptr);			
			
 
				+			status = cudaFree((void*)matrix_interface->ptr);
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 
			
@@ -663,7 +674,7 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 
				 		uint32_t src_offset = y*ld_src*elemsize;
			
 
				 		uint32_t dst_offset = y*ld_dst*elemsize;
			
 
				 
			
 
				-		memcpy((void *)(ptr_dst + dst_offset), 
			
 
				+		memcpy((void *)(ptr_dst + dst_offset),
			
 
				 			(void *)(ptr_src + src_offset), nx*elemsize);
			
 
				 	}
			
 
				 
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -40,7 +40,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods multiformat_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -74,11 +75,12 @@ static int multiformat_compare(void *data_interface_a, void *data_interface_b);
 
				 static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				+static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				 #endif
			
 
				 
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_multiformat_ops = {
			
 
				+static struct starpu_data_interface_ops interface_multiformat_ops =
			
 
				+{
			
 
				 	.register_data_handle  = register_multiformat_handle,
			
 
				 	.allocate_data_on_node = allocate_multiformat_buffer_on_node,
			
 
				 	.handle_to_pointer     = multiformat_handle_to_pointer,
			
@@ -98,10 +100,11 @@ static struct starpu_data_interface_ops interface_multiformat_ops = {
 
				 static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
			
 
				 {
			
 
				 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
			
 
				-	struct starpu_multiformat_interface *multiformat_interface = 
			
 
				+	struct starpu_multiformat_interface *multiformat_interface =
			
 
				 		starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-	switch(_starpu_get_node_kind(node)) {
			
 
				+	switch(_starpu_get_node_kind(node))
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			return multiformat_interface->cpu_ptr;
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -123,11 +126,13 @@ static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t ho
 
				 	multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
			
 
				 
			
 
				 	unsigned node;
			
 
				-	for (node = 0; node < STARPU_MAXNODES; node++) {
			
 
				+	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				+	{
			
 
				 		struct starpu_multiformat_interface *local_interface =
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->cpu_ptr    = multiformat_interface->cpu_ptr;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 			local_interface->cuda_ptr   = multiformat_interface->cuda_ptr;
			
@@ -138,7 +143,8 @@ static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t ho
 
				 			local_interface->dev_handle = multiformat_interface->dev_handle;
			
 
				 			local_interface->offset     = multiformat_interface->offset;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->cpu_ptr    = NULL;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 			local_interface->cuda_ptr   = NULL;
			
@@ -161,7 +167,8 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 
				 				      uint32_t nobjects,
			
 
				 				      struct starpu_multiformat_data_interface_ops *format_ops)
			
 
				 {
			
 
				-	struct starpu_multiformat_interface multiformat = {
			
 
				+	struct starpu_multiformat_interface multiformat =
			
 
				+	{
			
 
				 		.cpu_ptr    = ptr,
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		.cuda_ptr   = NULL,
			
@@ -169,7 +176,7 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 
				 #ifdef STARPu_USE_OPENCL
			
 
				 		.opencl_ptr = NULL,
			
 
				 #endif
			
 
				-		.nx         = nobjects, 
			
 
				+		.nx         = nobjects,
			
 
				 		.dev_handle = (uintptr_t) ptr,
			
 
				 		.offset     = 0,
			
 
				 		.ops        = format_ops
			
@@ -231,7 +238,8 @@ static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
 
				 	multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free(multiformat_interface->cpu_ptr);
			
 
				 			multiformat_interface->cpu_ptr = NULL;
			
@@ -261,14 +269,17 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 	ssize_t allocated_memory;
			
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
			
 
				 			addr = (uintptr_t)malloc(allocated_memory);
			
 
				-			if (!addr) {
			
 
				+			if (!addr)
			
 
				+			{
			
 
				 				fail = 1;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				multiformat_interface->cpu_ptr = (void *) addr;
			
 
				 				multiformat_interface->dev_handle = addr;
			
 
				 			}
			
@@ -278,10 +289,12 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 			{
			
 
				 				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
			
 
				 				cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
			
 
				-				if (STARPU_UNLIKELY(status)) {
			
 
				+				if (STARPU_UNLIKELY(status))
			
 
				+				{
			
 
				 					STARPU_CUDA_REPORT_ERROR(status);
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					multiformat_interface->cuda_ptr = (void *)addr;
			
 
				 					multiformat_interface->dev_handle = addr;
			
 
				 				}
			
@@ -296,10 +309,12 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 
				 				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
			
 
				                                 ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
			
 
				                                 addr = (uintptr_t)ptr;
			
 
				-				if (ret) {
			
 
				+				if (ret)
			
 
				+				{
			
 
				 					fail = 1;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					multiformat_interface->opencl_ptr = (void *)addr;
			
 
				 					multiformat_interface->dev_handle = addr;
			
 
				 
			
@@ -358,11 +373,13 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 
				 
			
 
				 	cudaError_t status;
			
 
				 
			
 
				-	switch (kind) {
			
 
				+	switch (kind)
			
 
				+	{
			
 
				 		case cudaMemcpyHostToDevice:
			
 
				 		{
			
 
				 			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				-			if (src_multiformat->cuda_ptr == NULL) {
			
 
				+			if (src_multiformat->cuda_ptr == NULL)
			
 
				+			{
			
 
				 				src_multiformat->cuda_ptr = malloc(size);
			
 
				 				if (src_multiformat->cuda_ptr == NULL)
			
 
				 					return -ENOMEM;
			
@@ -376,7 +393,8 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 
				 			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
			
 
				 
			
 
				 			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
			
 
				-			if (STARPU_UNLIKELY(status)) {
			
 
				+			if (STARPU_UNLIKELY(status))
			
 
				+			{
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 			}
			
 
				 			break;
			
@@ -387,12 +405,12 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 
				 			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
			
 
				 			if (STARPU_UNLIKELY(status))
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
 
				-		
			
 
				+
			
 
				 			void *buffers[1];
			
 
				 			struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
			
 
				 			buffers[0] = dst_interface;
			
 
				 			cl->cpu_func(buffers, NULL);
			
 
				-							  
			
 
				+
			
 
				 			break;
			
 
				 		}
			
 
				 		case cudaMemcpyDeviceToDevice:
			
@@ -431,11 +449,13 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 
				 	size_t size;
			
 
				 	cudaError_t status;
			
 
				 
			
 
				-	switch (kind) {
			
 
				+	switch (kind)
			
 
				+	{
			
 
				 		case cudaMemcpyHostToDevice:
			
 
				 		{
			
 
				 			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				-			if (src_multiformat->cuda_ptr == NULL) {
			
 
				+			if (src_multiformat->cuda_ptr == NULL)
			
 
				+			{
			
 
				 				src_multiformat->cuda_ptr = malloc(size);
			
 
				 				if (src_multiformat->cuda_ptr == NULL)
			
 
				 					return -ENOMEM;
			
@@ -451,7 +471,8 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 
				 
			
 
				 			/* Actual copy from host to device */
			
 
				 			status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
			
 
				-			if (STARPU_UNLIKELY(status)) {
			
 
				+			if (STARPU_UNLIKELY(status))
			
 
				+			{
			
 
				 				STARPU_CUDA_REPORT_ERROR(status);
			
 
				 			}
			
 
				 			break;
			
@@ -602,9 +623,11 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
 
				 
			
 
				 	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
			
 
				 
			
 
				-	if (src_multiformat->opencl_ptr == NULL) {
			
 
				+	if (src_multiformat->opencl_ptr == NULL)
			
 
				+	{
			
 
				 		src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
			
 
				-		if (src_multiformat->opencl_ptr == NULL) {
			
 
				+		if (src_multiformat->opencl_ptr == NULL)
			
 
				+		{
			
 
				 			return -ENOMEM;
			
 
				 		}
			
 
				 	}
			
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -42,7 +42,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods variable_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods variable_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -75,10 +76,11 @@ static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle);
 
				 static int variable_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_variable_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-static int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				+static int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_variable_ops = {
			
 
				+static struct starpu_data_interface_ops interface_variable_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_variable_handle,
			
 
				 	.allocate_data_on_node = allocate_variable_buffer_on_node,
			
 
				 	.handle_to_pointer = variable_handle_to_pointer,
			
@@ -91,7 +93,7 @@ static struct starpu_data_interface_ops interface_variable_ops = {
 
				 	.convert_to_gordon = convert_variable_to_gordon,
			
 
				 #endif
			
 
				 	.interfaceid = STARPU_VARIABLE_INTERFACE_ID,
			
 
				-	.interface_size = sizeof(struct starpu_variable_interface), 
			
 
				+	.interface_size = sizeof(struct starpu_variable_interface),
			
 
				 	.display = display_variable_interface
			
 
				 };
			
 
				 
			
@@ -110,10 +112,12 @@ static void register_variable_handle(starpu_data_handle_t handle, uint32_t home_
 
				 		struct starpu_variable_interface *local_interface = (struct starpu_variable_interface *)
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->ptr = STARPU_VARIABLE_GET_PTR(data_interface);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->ptr = 0;
			
 
				 		}
			
 
				 
			
@@ -122,7 +126,7 @@ static void register_variable_handle(starpu_data_handle_t handle, uint32_t home_
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
			
 
				+int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
			
 
				 {
			
 
				 	*ptr = STARPU_VARIABLE_GET_PTR(interface);
			
 
				 	(*ss).size = STARPU_VARIABLE_GET_ELEMSIZE(interface);
			
@@ -135,12 +139,13 @@ int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strid
 
				 void starpu_variable_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
			
 
				                         uintptr_t ptr, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_variable_interface variable = {
			
 
				+	struct starpu_variable_interface variable =
			
 
				+	{
			
 
				 		.ptr = ptr,
			
 
				 		.elemsize = elemsize
			
 
				-	};	
			
 
				+	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &variable, &interface_variable_ops); 
			
 
				+	starpu_data_register(handleptr, home_node, &variable, &interface_variable_ops);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -156,7 +161,7 @@ static int variable_compare(void *data_interface_a, void *data_interface_b)
 
				 
			
 
				 	/* Two variables are considered compatible if they have the same size */
			
 
				 	return (variable_a->elemsize == variable_b->elemsize);
			
 
				-} 
			
 
				+}
			
 
				 
			
 
				 static void display_variable_interface(starpu_data_handle_t handle, FILE *f)
			
 
				 {
			
@@ -208,7 +213,8 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 
				 	cudaError_t status;
			
 
				 #endif
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			addr = (uintptr_t)malloc(elemsize);
			
 
				 			if (!addr)
			
@@ -233,7 +239,8 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 
				                                 void *ptr;
			
 
				                                 ret = _starpu_opencl_allocate_memory(&ptr, elemsize, CL_MEM_READ_WRITE);
			
 
				                                 addr = (uintptr_t)ptr;
			
 
				-				if (ret) {
			
 
				+				if (ret)
			
 
				+				{
			
 
				 					fail = 1;
			
 
				 				}
			
 
				 				break;
			
@@ -251,14 +258,15 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 
				 
			
 
				 	/* update the data properly in consequence */
			
 
				 	variable_interface->ptr = addr;
			
 
				-	
			
 
				+
			
 
				 	return allocated_memory;
			
 
				 }
			
 
				 
			
 
				 static void free_variable_buffer_on_node(void *data_interface, uint32_t node)
			
 
				 {
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void*)STARPU_VARIABLE_GET_PTR(data_interface));
			
 
				 			break;
			
@@ -315,7 +323,8 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 
				 	{
			
 
				 		return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 		int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				 		int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
@@ -384,7 +393,8 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					v
 
				 	{
			
 
				 		return copy_cuda_async_common(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 		int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				 		int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
--- a/src/datawizard/interfaces/vector_filters.c
+++ b/src/datawizard/interfaces/vector_filters.c
@@ -24,7 +24,7 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 
				 {
			
 
				         struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface;
			
 
				         struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface;
			
 
				-	
			
 
				+
			
 
				 	uint32_t nx = vector_father->nx;
			
 
				 	size_t elemsize = vector_father->elemsize;
			
 
				 
			
@@ -33,16 +33,17 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 
				 	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
			
 
				 	size_t offset = id*chunk_size*elemsize;
			
 
				 
			
 
				-	uint32_t child_nx = 
			
 
				+	uint32_t child_nx =
			
 
				 	  STARPU_MIN(chunk_size, nx - id*chunk_size);
			
 
				 
			
 
				 	vector_child->nx = child_nx;
			
 
				 	vector_child->elemsize = elemsize;
			
 
				 
			
 
				-	if (vector_father->ptr) {
			
 
				-	  vector_child->ptr = vector_father->ptr + offset;
			
 
				-	  vector_child->dev_handle = vector_father->dev_handle;
			
 
				-	  vector_child->offset = vector_father->offset + offset;
			
 
				+	if (vector_father->ptr)
			
 
				+	{
			
 
				+		vector_child->ptr = vector_father->ptr + offset;
			
 
				+		vector_child->dev_handle = vector_father->dev_handle;
			
 
				+		vector_child->offset = vector_father->offset + offset;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -51,7 +52,7 @@ void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_i
 
				 {
			
 
				         /* there cannot be more than 2 chunks */
			
 
				         STARPU_ASSERT(id < 2);
			
 
				-	
			
 
				+
			
 
				 	struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface;
			
 
				 	struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface;
			
 
				 
			
@@ -61,29 +62,31 @@ void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_i
 
				 	size_t elemsize = vector_father->elemsize;
			
 
				 
			
 
				 	STARPU_ASSERT(length_first < nx);
			
 
				-	
			
 
				+
			
 
				 	/* this is the first child */
			
 
				-	if (id == 0) {
			
 
				-	  vector_child->nx = length_first;
			
 
				-	  vector_child->elemsize = elemsize;
			
 
				-
			
 
				-	  if (vector_father->ptr) {
			
 
				-	    vector_child->ptr = vector_father->ptr;
			
 
				-	    vector_child->offset = vector_father->offset;
			
 
				-	    vector_child->dev_handle = vector_father->dev_handle;
			
 
				-	  }
			
 
				+	if (id == 0)
			
 
				+	{
			
 
				+		vector_child->nx = length_first;
			
 
				+		vector_child->elemsize = elemsize;
			
 
				+
			
 
				+		if (vector_father->ptr)
			
 
				+		{
			
 
				+			vector_child->ptr = vector_father->ptr;
			
 
				+			vector_child->offset = vector_father->offset;
			
 
				+			vector_child->dev_handle = vector_father->dev_handle;
			
 
				+		}
			
 
				 	}
			
 
				-
			
 
				-	/* the second child */
			
 
				-	else {
			
 
				-	  vector_child->nx = nx - length_first;
			
 
				-	  vector_child->elemsize = elemsize;
			
 
				-
			
 
				-	  if (vector_father->ptr) {
			
 
				-	    vector_child->ptr = vector_father->ptr + length_first*elemsize;
			
 
				-	    vector_child->offset = vector_father->offset + length_first*elemsize;
			
 
				-	    vector_child->dev_handle = vector_father->dev_handle;
			
 
				-	  }
			
 
				+	else /* the second child */
			
 
				+	{
			
 
				+		vector_child->nx = nx - length_first;
			
 
				+		vector_child->elemsize = elemsize;
			
 
				+
			
 
				+		if (vector_father->ptr)
			
 
				+		{
			
 
				+			vector_child->ptr = vector_father->ptr + length_first*elemsize;
			
 
				+			vector_child->offset = vector_father->offset + length_first*elemsize;
			
 
				+			vector_child->dev_handle = vector_father->dev_handle;
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -103,15 +106,16 @@ void starpu_vector_list_filter_func(void *father_interface, void *child_interfac
 
				 
			
 
				 	vector_child->nx = chunk_size;
			
 
				 	vector_child->elemsize = elemsize;
			
 
				-	
			
 
				-	if (vector_father->ptr) {
			
 
				-	  /* compute the current position */
			
 
				-	  unsigned i;
			
 
				-	  for (i = 0; i < id; i++) 
			
 
				-	    current_pos += length_tab[i];
			
 
				-	  
			
 
				-	  vector_child->ptr = vector_father->ptr + current_pos*elemsize;
			
 
				-	  vector_child->offset = vector_father->offset + current_pos*elemsize;
			
 
				-	  vector_child->dev_handle = vector_father->dev_handle;
			
 
				+
			
 
				+	if (vector_father->ptr)
			
 
				+	{
			
 
				+		/* compute the current position */
			
 
				+		unsigned i;
			
 
				+		for (i = 0; i < id; i++)
			
 
				+			current_pos += length_tab[i];
			
 
				+
			
 
				+		vector_child->ptr = vector_father->ptr + current_pos*elemsize;
			
 
				+		vector_child->offset = vector_father->offset + current_pos*elemsize;
			
 
				+		vector_child->dev_handle = vector_father->dev_handle;
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -42,7 +42,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods vector_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods vector_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = copy_ram_to_ram,
			
 
				 	.ram_to_spu = NULL,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -75,10 +76,11 @@ static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle);
 
				 static int vector_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_vector_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-static int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
			
 
				+static int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
			
 
				 #endif
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_vector_ops = {
			
 
				+static struct starpu_data_interface_ops interface_vector_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_vector_handle,
			
 
				 	.allocate_data_on_node = allocate_vector_buffer_on_node,
			
 
				 	.handle_to_pointer = vector_handle_to_pointer,
			
@@ -91,7 +93,7 @@ static struct starpu_data_interface_ops interface_vector_ops = {
 
				 	.convert_to_gordon = convert_vector_to_gordon,
			
 
				 #endif
			
 
				 	.interfaceid = STARPU_VECTOR_INTERFACE_ID,
			
 
				-	.interface_size = sizeof(struct starpu_vector_interface), 
			
 
				+	.interface_size = sizeof(struct starpu_vector_interface),
			
 
				 	.display = display_vector_interface
			
 
				 };
			
 
				 
			
@@ -115,12 +117,14 @@ static void register_vector_handle(starpu_data_handle_t handle, uint32_t home_no
 
				 		struct starpu_vector_interface *local_interface = (struct starpu_vector_interface *)
			
 
				 			starpu_data_get_interface_on_node(handle, node);
			
 
				 
			
 
				-		if (node == home_node) {
			
 
				+		if (node == home_node)
			
 
				+		{
			
 
				 			local_interface->ptr = vector_interface->ptr;
			
 
				                         local_interface->dev_handle = vector_interface->dev_handle;
			
 
				                         local_interface->offset = vector_interface->offset;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			local_interface->ptr = 0;
			
 
				                         local_interface->dev_handle = 0;
			
 
				                         local_interface->offset = 0;
			
@@ -132,10 +136,10 @@ static void register_vector_handle(starpu_data_handle_t handle, uint32_t home_no
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_GORDON
			
 
				-int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
			
 
				+int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
			
 
				 {
			
 
				 	struct starpu_vector_interface *vector_interface = interface;
			
 
				-	
			
 
				+
			
 
				 	*ptr = vector_interface->ptr;
			
 
				 	(*ss).size = vector_interface->nx * vector_interface->elemsize;
			
 
				 
			
@@ -147,15 +151,16 @@ int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideS
 
				 void starpu_vector_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
			
 
				                         uintptr_t ptr, uint32_t nx, size_t elemsize)
			
 
				 {
			
 
				-	struct starpu_vector_interface vector = {
			
 
				+	struct starpu_vector_interface vector =
			
 
				+	{
			
 
				 		.ptr = ptr,
			
 
				 		.nx = nx,
			
 
				 		.elemsize = elemsize,
			
 
				                 .dev_handle = ptr,
			
 
				                 .offset = 0
			
 
				-	};	
			
 
				+	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &vector, &interface_vector_ops); 
			
 
				+	starpu_data_register(handleptr, home_node, &vector, &interface_vector_ops);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -243,7 +248,8 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 
				 	cudaError_t status;
			
 
				 #endif
			
 
				 
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			addr = (uintptr_t)malloc(nx*elemsize);
			
 
				 			if (!addr)
			
@@ -268,7 +274,8 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 
				                                 void *ptr;
			
 
				                                 ret = _starpu_opencl_allocate_memory(&ptr, nx*elemsize, CL_MEM_READ_WRITE);
			
 
				                                 addr = (uintptr_t)ptr;
			
 
				-				if (ret) {
			
 
				+				if (ret)
			
 
				+				{
			
 
				 					fail = 1;
			
 
				 				}
			
 
				 				break;
			
@@ -288,7 +295,7 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 
				 	vector_interface->ptr = addr;
			
 
				         vector_interface->dev_handle = addr;
			
 
				         vector_interface->offset = 0;
			
 
				-	
			
 
				+
			
 
				 	return allocated_memory;
			
 
				 }
			
 
				 
			
@@ -301,7 +308,8 @@ static void free_vector_buffer_on_node(void *data_interface, uint32_t node)
 
				 #endif
			
 
				 
			
 
				 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
			
 
				-	switch(kind) {
			
 
				+	switch(kind)
			
 
				+	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				 			free((void*)vector_interface->ptr);
			
 
				 			break;
			
@@ -395,7 +403,8 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 
				 	{
			
 
				 		return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 		return copy_cuda_peer_common(src_interface, src_node, dst_interface, dst_node, 0, 0);
			
 
				 #else
			
@@ -433,13 +442,14 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
				 	return -EAGAIN;
			
 
				 }
			
 
				 
			
 
				-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
			
 
				 {
			
 
				 	if (src_node == dst_node)
			
 
				 	{
			
 
				 		return copy_cuda_async_common(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 		return copy_cuda_peer_common(src_interface, src_node, dst_interface, dst_node, 1, stream);
			
 
				 #else
			
@@ -525,7 +535,7 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
				 
			
 
				 	size_t size = src_vector->nx*src_vector->elemsize;
			
 
				 
			
 
				-	err = clEnqueueCopyBuffer(cq, (cl_mem)src_vector->dev_handle, (cl_mem)dst_vector->dev_handle, src_vector->offset, dst_vector->offset, size, 0, NULL, NULL); 
			
 
				+	err = clEnqueueCopyBuffer(cq, (cl_mem)src_vector->dev_handle, (cl_mem)dst_vector->dev_handle, src_vector->offset, dst_vector->offset, size, 0, NULL, NULL);
			
 
				         if (STARPU_UNLIKELY(err))
			
 
				                 STARPU_OPENCL_REPORT_ERROR(err);
			
 
				 
			
--- a/src/datawizard/interfaces/void_interface.c
+++ b/src/datawizard/interfaces/void_interface.c
@@ -33,7 +33,8 @@ static int dummy_cuda_copy_async(void *src_interface, unsigned src_node, void *d
 
				 static int dummy_opencl_copy_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
			
 
				 #endif
			
 
				 
			
 
				-static const struct starpu_data_copy_methods void_copy_data_methods_s = {
			
 
				+static const struct starpu_data_copy_methods void_copy_data_methods_s =
			
 
				+{
			
 
				 	.ram_to_ram = dummy_copy,
			
 
				 	.ram_to_spu = dummy_copy,
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -64,7 +65,8 @@ static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle);
 
				 static int void_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_void_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_void_ops = {
			
 
				+static struct starpu_data_interface_ops interface_void_ops =
			
 
				+{
			
 
				 	.register_data_handle = register_void_handle,
			
 
				 	.allocate_data_on_node = allocate_void_buffer_on_node,
			
 
				 	.free_data_on_node = free_void_buffer_on_node,
			
@@ -73,7 +75,7 @@ static struct starpu_data_interface_ops interface_void_ops = {
 
				 	.footprint = footprint_void_interface_crc32,
			
 
				 	.compare = void_compare,
			
 
				 	.interfaceid = STARPU_VOID_INTERFACE_ID,
			
 
				-	.interface_size = 0, 
			
 
				+	.interface_size = 0,
			
 
				 	.display = display_void_interface
			
 
				 };
			
 
				 
			
@@ -87,7 +89,7 @@ static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UN
 
				 /* declare a new data with the void interface */
			
 
				 void starpu_void_data_register(starpu_data_handle_t *handleptr)
			
 
				 {
			
 
				-	starpu_data_register(handleptr, 0, NULL, &interface_void_ops); 
			
 
				+	starpu_data_register(handleptr, 0, NULL, &interface_void_ops);
			
 
				 }
			
 
				 
			
 
				 
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -76,7 +76,8 @@ static void lock_all_subtree(starpu_data_handle_t handle)
 
				 		while (_starpu_spin_trylock(&handle->header_lock))
			
 
				 			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* lock all sub-subtrees children */
			
 
				 		unsigned child;
			
 
				 		for (child = 0; child < handle->nchildren; child++)
			
@@ -93,7 +94,8 @@ static void unlock_all_subtree(starpu_data_handle_t handle)
 
				 		/* this is a leaf */
			
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* lock all sub-subtrees children
			
 
				 		 * Note that this is done in the reverse order of the
			
 
				 		 * lock_all_subtree so that we avoid deadlock */
			
@@ -143,7 +145,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 
				 		struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
			
 
				 
			
 
				 		/* this is a leaf */
			
 
				-		switch(src_replicate->state) {
			
 
				+		switch(src_replicate->state)
			
 
				+		{
			
 
				 		case STARPU_OWNER:
			
 
				 			/* the local node has the only copy */
			
 
				 			/* the owner is now the destination_node */
			
@@ -176,7 +179,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 
				 			cnt = 0;
			
 
				 			for (i = 0; i < STARPU_MAXNODES; i++)
			
 
				 			{
			
 
				-				if (handle->per_node[i].state == STARPU_SHARED) {
			
 
				+				if (handle->per_node[i].state == STARPU_SHARED)
			
 
				+				{
			
 
				 					cnt++;
			
 
				 					last = i;
			
 
				 				}
			
@@ -194,7 +198,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* lock all sub-subtrees children */
			
 
				 		unsigned child;
			
 
				 		for (child = 0; child < handle->nchildren; child++)
			
@@ -317,7 +322,8 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
				 
			
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* try to lock all the leafs of the subtree */
			
 
				 		lock_all_subtree(handle);
			
 
				 
			
@@ -562,7 +568,8 @@ static size_t free_potentially_in_use_mc(uint32_t node, unsigned force, size_t r
 
				 				break;
			
 
				 			#endif
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* We must free the memory now: note that data
			
 
				 			 * coherency is not maintained in that case ! */
			
 
				 			freed += do_free_mem_chunk(mc, node);
			
@@ -663,7 +670,8 @@ void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned nod
 
				 	{
			
 
				 		next_mc = _starpu_mem_chunk_list_next(mc);
			
 
				 
			
 
				-		if (mc->data == handle) {
			
 
				+		if (mc->data == handle)
			
 
				+		{
			
 
				 			/* we found the data */
			
 
				 			mc->data_was_deleted = 1;
			
 
				 
			
@@ -756,7 +764,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 
				 	_STARPU_TRACE_END_ALLOC_REUSE(dst_node);
			
 
				 #endif
			
 
				 
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		STARPU_ASSERT(handle->ops);
			
 
				 		STARPU_ASSERT(handle->ops->allocate_data_on_node);
			
 
				 
			
@@ -806,7 +815,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 
				 			_starpu_data_check_not_busy(handle);
			
 
				 		}
			
 
				 
			
 
				-	} while((allocated_memory == -ENOMEM) && attempts++ < 2);
			
 
				+	}
			
 
				+	while((allocated_memory == -ENOMEM) && attempts++ < 2);
			
 
				 
			
 
				 	return allocated_memory;
			
 
				 }
			
--- a/src/datawizard/memalloc.h
+++ b/src/datawizard/memalloc.h
@@ -33,7 +33,7 @@ LIST_TYPE(_starpu_mem_chunk,
 
				 	size_t size;
			
 
				 
			
 
				 	uint32_t footprint;
			
 
				-	
			
 
				+
			
 
				 	/* The footprint of the data is not sufficient to determine whether two
			
 
				 	 * pieces of data have the same layout (there could be collision in the
			
 
				 	 * hash function ...) so we still keep a copy of the actual layout (ie.
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -28,15 +28,16 @@ static pthread_key_t memory_node_key;
 
				 
			
 
				 void _starpu_init_memory_nodes(void)
			
 
				 {
			
 
				-	/* there is no node yet, subsequent nodes will be 
			
 
				+	/* there is no node yet, subsequent nodes will be
			
 
				 	 * added using _starpu_register_memory_node */
			
 
				 	descr.nnodes = 0;
			
 
				 
			
 
				 	pthread_key_create(&memory_node_key, NULL);
			
 
				 
			
 
				 	unsigned i;
			
 
				-	for (i = 0; i < STARPU_MAXNODES; i++) {
			
 
				-		descr.nodes[i] = STARPU_UNUSED; 
			
 
				+	for (i = 0; i < STARPU_MAXNODES; i++)
			
 
				+	{
			
 
				+		descr.nodes[i] = STARPU_UNUSED;
			
 
				 		descr.nworkers[i] = 0;
			
 
				 	}
			
 
				 
			
@@ -64,8 +65,8 @@ unsigned _starpu_get_local_memory_node(void)
 
				 {
			
 
				 	unsigned *memory_node;
			
 
				 	memory_node = (unsigned *) pthread_getspecific(memory_node_key);
			
 
				-	
			
 
				-	/* in case this is called by the programmer, we assume the RAM node 
			
 
				+
			
 
				+	/* in case this is called by the programmer, we assume the RAM node
			
 
				 	   is the appropriate memory node ... so we return 0 XXX */
			
 
				 	if (STARPU_UNLIKELY(!memory_node))
			
 
				 		return 0;
			
@@ -127,7 +128,7 @@ void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_
 
				 {
			
 
				 	unsigned cond_id;
			
 
				 	unsigned nconds_total, nconds;
			
 
				-	
			
 
				+
			
 
				 	pthread_rwlock_wrlock(&descr.conditions_rwlock);
			
 
				 
			
 
				 	/* we only insert the queue if it's not already in the list */
			
@@ -150,7 +151,7 @@ void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_
 
				 	descr.condition_count[nodeid]++;
			
 
				 
			
 
				 	/* do we have to add it in the global list as well ? */
			
 
				-	nconds_total = descr.total_condition_count; 
			
 
				+	nconds_total = descr.total_condition_count;
			
 
				 	for (cond_id = 0; cond_id < nconds_total; cond_id++)
			
 
				 	{
			
 
				 		if (descr.conditions_all[cond_id].cond == cond)
			
@@ -159,7 +160,7 @@ void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_
 
				 			pthread_rwlock_unlock(&descr.conditions_rwlock);
			
 
				 			return;
			
 
				 		}
			
 
				-	} 
			
 
				+	}
			
 
				 
			
 
				 	/* it was not in the global list either */
			
 
				 	descr.conditions_all[nconds_total].cond = cond;
			
--- a/src/datawizard/memory_nodes.h
+++ b/src/datawizard/memory_nodes.h
@@ -23,7 +23,8 @@
 
				 #include <datawizard/coherency.h>
			
 
				 #include <datawizard/memalloc.h>
			
 
				 
			
 
				-enum _starpu_node_kind {
			
 
				+enum _starpu_node_kind
			
 
				+{
			
 
				 	STARPU_UNUSED     = 0x00,
			
 
				 	STARPU_CPU_RAM    = 0x01,
			
 
				 	STARPU_CUDA_RAM   = 0x02,
			
@@ -35,12 +36,14 @@ enum _starpu_node_kind {
 
				 #define _STARPU_MEMORY_NODE_TUPLE_FIRST(tuple) (tuple & 0x0F)
			
 
				 #define _STARPU_MEMORY_NODE_TUPLE_SECOND(tuple) (tuple & 0xF0)
			
 
				 
			
 
				-struct _starpu_cond_and_mutex {
			
 
				+struct _starpu_cond_and_mutex
			
 
				+{
			
 
				         pthread_cond_t *cond;
			
 
				         pthread_mutex_t *mutex;
			
 
				 };
			
 
				 
			
 
				-struct _starpu_mem_node_descr {
			
 
				+struct _starpu_mem_node_descr
			
 
				+{
			
 
				 	unsigned nnodes;
			
 
				 	enum _starpu_node_kind nodes[STARPU_MAXNODES];
			
 
				 
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -50,10 +50,11 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
				 	STARPU_ASSERT(init_cl);
			
 
				 
			
 
				 	_starpu_cl_func init_func = NULL;
			
 
				-	
			
 
				+
			
 
				 	/* TODO Check that worker may execute the codelet */
			
 
				 
			
 
				-	switch (starpu_worker_get_type(workerid)) {
			
 
				+	switch (starpu_worker_get_type(workerid))
			
 
				+	{
			
 
				 		case STARPU_CPU_WORKER:
			
 
				 			init_func = init_cl->cpu_func;
			
 
				 			break;
			
@@ -112,7 +113,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 			/* Make sure the replicate is not removed */
			
 
				 			handle->per_worker[worker].refcnt++;
			
 
				 
			
 
				-			uint32_t home_node = starpu_worker_get_memory_node(worker); 
			
 
				+			uint32_t home_node = starpu_worker_get_memory_node(worker);
			
 
				 			starpu_data_register(&handle->reduction_tmp_handles[worker],
			
 
				 				home_node, handle->per_worker[worker].data_interface, handle->ops);
			
 
				 
			
@@ -120,7 +121,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 
			
 
				 			replicate_array[replicate_count++] = handle->reduction_tmp_handles[worker];
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			handle->reduction_tmp_handles[worker] = NULL;
			
 
				 		}
			
 
				 	}
			
@@ -133,7 +135,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 #endif
			
 
				 
			
 
				 //	fprintf(stderr, "REDUX REFCNT = %d\n", handle->reduction_refcnt);
			
 
				-	
			
 
				+
			
 
				 	if (replicate_count > 0)
			
 
				 	{
			
 
				 		/* Temporarily unlock the handle */
			
@@ -144,7 +146,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 		 * replicate */
			
 
				 		struct starpu_task *last_replicate_deps[replicate_count];
			
 
				 		memset(last_replicate_deps, 0, replicate_count*sizeof(struct starpu_task *));
			
 
				-	
			
 
				+
			
 
				 		unsigned step = 1;
			
 
				 		while (step <= replicate_count)
			
 
				 		{
			
@@ -156,42 +158,42 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 					/* Perform the reduction between replicates i
			
 
				 					 * and i+step and put the result in replicate i */
			
 
				 					struct starpu_task *redux_task = starpu_task_create();
			
 
				-		
			
 
				+
			
 
				 					redux_task->cl = handle->redux_cl;
			
 
				 					STARPU_ASSERT(redux_task->cl);
			
 
				-		
			
 
				+
			
 
				 					redux_task->buffers[0].handle = replicate_array[i];
			
 
				 					redux_task->buffers[0].mode = STARPU_RW;
			
 
				-		
			
 
				+
			
 
				 					redux_task->buffers[1].handle = replicate_array[i+step];
			
 
				 					redux_task->buffers[1].mode = STARPU_R;
			
 
				-	
			
 
				+
			
 
				 					redux_task->detach = 0;
			
 
				-	
			
 
				+
			
 
				 					int ndeps = 0;
			
 
				 					struct starpu_task *task_deps[2];
			
 
				-	
			
 
				+
			
 
				 					if (last_replicate_deps[i])
			
 
				 						task_deps[ndeps++] = last_replicate_deps[i];
			
 
				-	
			
 
				+
			
 
				 					if (last_replicate_deps[i+step])
			
 
				 						task_deps[ndeps++] = last_replicate_deps[i+step];
			
 
				-	
			
 
				+
			
 
				 					/* i depends on this task */
			
 
				 					last_replicate_deps[i] = redux_task;
			
 
				-	
			
 
				+
			
 
				 					/* we don't perform the reduction until both replicates are ready */
			
 
				-					starpu_task_declare_deps_array(redux_task, ndeps, task_deps); 
			
 
				-		
			
 
				+					starpu_task_declare_deps_array(redux_task, ndeps, task_deps);
			
 
				+
			
 
				 					int ret = starpu_task_submit(redux_task);
			
 
				 					STARPU_ASSERT(!ret);
			
 
				-		
			
 
				+
			
 
				 				}
			
 
				 			}
			
 
				 
			
 
				 			step *= 2;
			
 
				 		}
			
 
				-	
			
 
				+
			
 
				 		struct starpu_task *redux_task = starpu_task_create();
			
 
				 
			
 
				 		/* Mark these tasks so that StarPU does not block them
			
@@ -222,23 +224,23 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 		for (replicate = 0; replicate < replicate_count; replicate++)
			
 
				 		{
			
 
				 			struct starpu_task *redux_task = starpu_task_create();
			
 
				-	
			
 
				+
			
 
				 			/* Mark these tasks so that StarPU does not block them
			
 
				 			 * when they try to access the handle (normal tasks are
			
 
				 			 * data requests to that handle are frozen until the
			
 
				 			 * data is coherent again). */
			
 
				 			struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task);
			
 
				 			j->reduction_task = 1;
			
 
				-	
			
 
				+
			
 
				 			redux_task->cl = handle->redux_cl;
			
 
				 			STARPU_ASSERT(redux_task->cl);
			
 
				-	
			
 
				+
			
 
				 			redux_task->buffers[0].handle = handle;
			
 
				 			redux_task->buffers[0].mode = STARPU_RW;
			
 
				-	
			
 
				+
			
 
				 			redux_task->buffers[1].handle = replicate_array[replicate];
			
 
				 			redux_task->buffers[1].mode = STARPU_R;
			
 
				-	
			
 
				+
			
 
				 			int ret = starpu_task_submit(redux_task);
			
 
				 			STARPU_ASSERT(!ret);
			
 
				 		}
			
--- a/src/datawizard/sort_data_handles.c
+++ b/src/datawizard/sort_data_handles.c
@@ -39,7 +39,7 @@ static void find_data_path(struct _starpu_data_state *data, unsigned path[])
 
				 		path[depth - level - 1] = current->sibling_index;
			
 
				 		current = data->father_handle;
			
 
				 	}
			
 
				-} 
			
 
				+}
			
 
				 
			
 
				 static int _compar_data_paths(const unsigned pathA[], unsigned depthA,
			
 
				 				const unsigned pathB[], unsigned depthB)
			
--- a/src/datawizard/user_interactions.c
+++ b/src/datawizard/user_interactions.c
@@ -41,7 +41,8 @@ int starpu_data_request_allocation(starpu_data_handle_t handle, uint32_t node)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-struct user_interaction_wrapper {
			
 
				+struct user_interaction_wrapper
			
 
				+{
			
 
				 	starpu_data_handle_t handle;
			
 
				 	enum starpu_access_mode mode;
			
 
				 	unsigned node;
			
@@ -159,7 +160,8 @@ int starpu_data_acquire_cb(starpu_data_handle_t handle,
 
				 		int ret = starpu_task_submit(wrapper->pre_sync_task);
			
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				 
			
 
				 		starpu_data_acquire_cb_pre_sync_callback(wrapper);
			
@@ -183,7 +185,7 @@ static inline void _starpu_data_acquire_continuation(void *arg)
 
				 	struct _starpu_data_replicate *ram_replicate = &handle->per_node[0];
			
 
				 
			
 
				 	_starpu_fetch_data_on_node(handle, ram_replicate, wrapper->mode, 0, NULL, NULL);
			
 
				-	
			
 
				+
			
 
				 	/* continuation of starpu_data_acquire */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock);
			
 
				 	wrapper->finished = 1;
			
@@ -198,7 +200,8 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 
				         _STARPU_LOG_IN();
			
 
				 
			
 
				 	/* unless asynchronous, it is forbidden to call this function from a callback or a codelet */
			
 
				-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
			
 
				+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
			
 
				+	{
			
 
				                 _STARPU_LOG_OUT_TAG("EDEADLK");
			
 
				 		return -EDEADLK;
			
 
				         }
			
@@ -240,7 +243,8 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 
				 		STARPU_ASSERT(!ret);
			
 
				 		//starpu_task_wait(wrapper.pre_sync_task);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
			
 
				 	}
			
 
				 
			
@@ -254,7 +258,8 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 
				 		int ret = _starpu_fetch_data_on_node(handle, ram_replicate, mode, 0, NULL, NULL);
			
 
				 		STARPU_ASSERT(!ret);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&wrapper.lock);
			
 
				 		while (!wrapper.finished)
			
 
				 			_STARPU_PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock);
			
@@ -336,7 +341,8 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigne
 
				 
			
 
				 		_starpu_spin_lock(&handle->header_lock);
			
 
				 
			
 
				-		if (!async) {
			
 
				+		if (!async)
			
 
				+		{
			
 
				 			replicate->refcnt--;
			
 
				 			STARPU_ASSERT(replicate->refcnt >= 0);
			
 
				 			STARPU_ASSERT(handle->busy_count > 0);
			
@@ -348,7 +354,8 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigne
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
 
				 	}
			
 
				-	else if (!async) {
			
 
				+	else if (!async)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock);
			
 
				 		while (!wrapper->finished)
			
 
				 			_STARPU_PTHREAD_COND_WAIT(&wrapper->cond, &wrapper->lock);
			
--- a/src/datawizard/write_back.c
+++ b/src/datawizard/write_back.c
@@ -19,7 +19,8 @@
 
				 #include <datawizard/write_back.h>
			
 
				 #include <core/dependencies/data_concurrency.h>
			
 
				 
			
 
				-static void wt_callback(void *arg) {
			
 
				+static void wt_callback(void *arg)
			
 
				+{
			
 
				 	starpu_data_handle_t handle = (starpu_data_handle_t) arg;
			
 
				 
			
 
				 	_starpu_spin_lock(&handle->header_lock);
			
@@ -27,10 +28,11 @@ static void wt_callback(void *arg) {
 
				 	_starpu_spin_unlock(&handle->header_lock);
			
 
				 }
			
 
				 
			
 
				-void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node, 
			
 
				+void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node,
			
 
				 				uint32_t write_through_mask)
			
 
				 {
			
 
				-	if ((write_through_mask & ~(1<<requesting_node)) == 0) {
			
 
				+	if ((write_through_mask & ~(1<<requesting_node)) == 0)
			
 
				+	{
			
 
				 		/* nothing will be done ... */
			
 
				 		return;
			
 
				 	}
			
@@ -39,9 +41,10 @@ void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting
 
				 	uint32_t node;
			
 
				 	for (node = 0; node < _starpu_get_memory_nodes_count(); node++)
			
 
				 	{
			
 
				-		if (write_through_mask & (1<<node)) {
			
 
				+		if (write_through_mask & (1<<node))
			
 
				+		{
			
 
				 			/* we need to commit the buffer on that node */
			
 
				-			if (node != requesting_node) 
			
 
				+			if (node != requesting_node)
			
 
				 			{
			
 
				 				while (_starpu_spin_trylock(&handle->header_lock))
			
 
				 					_starpu_datawizard_progress(requesting_node, 1);
			
@@ -71,7 +74,7 @@ void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask)
 
				 	handle->wt_mask = wt_mask;
			
 
				 
			
 
				 	/* in case the data has some children, set their wt_mask as well */
			
 
				-	if (handle->nchildren > 0) 
			
 
				+	if (handle->nchildren > 0)
			
 
				 	{
			
 
				 		unsigned child;
			
 
				 		for (child = 0; child < handle->nchildren; child++)
			
--- a/src/datawizard/write_back.h
+++ b/src/datawizard/write_back.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -24,7 +24,7 @@
 
				 /* If a write-through mask is associated to that data handle, this propagates
			
 
				  * the the current value of the data onto the different memory nodes in the
			
 
				  * write_through_mask. */
			
 
				-void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node, 
			
 
				+void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node,
			
 
				 					   uint32_t write_through_mask);
			
 
				 
			
 
				 #endif // __DW_WRITE_BACK_H__
			
--- a/src/debug/starpu_debug_helpers.h
+++ b/src/debug/starpu_debug_helpers.h
@@ -22,7 +22,8 @@
 
				 #include <starpu_util.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				-extern "C" {
			
 
				+extern "C"
			
 
				+{
			
 
				 #endif
			
 
				 
			
 
				 /* Perform a ping pong between the two memory nodes */
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -97,7 +97,7 @@ static struct _starpu_symbol_name_list *symbol_list;
 
				 
			
 
				 LIST_TYPE(_starpu_communication,
			
 
				 	unsigned comid;
			
 
				-	float comm_start;	
			
 
				+	float comm_start;
			
 
				 	float bandwidth;
			
 
				 	unsigned src_node;
			
 
				 	unsigned dst_node;
			
@@ -175,7 +175,7 @@ static void update_accumulated_time(int worker, double sleep_time, double exec_t
 
				 	 * point in our graph */
			
 
				 	double elapsed = current_timestamp - last_activity_flush_timestamp[worker];
			
 
				 	if (forceflush || (elapsed > ACTIVITY_PERIOD))
			
 
				-	{		
			
 
				+	{
			
 
				 		if (activity_file)
			
 
				 			fprintf(activity_file, "%d\t%f\t%f\t%f\t%f\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]);
			
 
				 
			
@@ -197,7 +197,7 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
				 	if (out_paje_file)
			
 
				 	{
			
 
				 		fprintf(out_paje_file, "7       %f	%"PRIu64"      Mn      %sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), ev->param[0], prefix, options->file_prefix, ev->param[0]);
			
 
				-	
			
 
				+
			
 
				 		if (!options->no_bus)
			
 
				 			fprintf(out_paje_file, "13       %f bw %sMEMNODE%"PRIu64" 0.0\n", 0.0f, prefix, ev->param[0]);
			
 
				 	}
			
@@ -205,10 +205,10 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
				 
			
 
				 static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
 
				 {
			
 
				-	/* 
			
 
				+	/*
			
 
				 	   arg0 : type of worker (cuda, cpu ..)
			
 
				 	   arg1 : memory node
			
 
				-	   arg2 : thread id 
			
 
				+	   arg2 : thread id
			
 
				 	*/
			
 
				 	char *prefix = options->file_prefix;
			
 
				 
			
@@ -222,7 +222,8 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
				 	char *kindstr = "";
			
 
				 	enum starpu_perf_archtype archtype = 0;
			
 
				 
			
 
				-	switch (ev->param[0]) {
			
 
				+	switch (ev->param[0])
			
 
				+	{
			
 
				 		case _STARPU_FUT_APPS_KEY:
			
 
				 			set_next_other_worker_color(workerid);
			
 
				 			kindstr = "apps";
			
@@ -326,7 +327,8 @@ static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options
 
				 		green = (1.0f * hash_symbol_green) / hash_sum;
			
 
				 		blue = (1.0f * hash_symbol_blue) / hash_sum;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* Use the hardcoded value for execution mode */
			
 
				 		red = 0.0f;
			
 
				 		green = 0.6f;
			
@@ -384,7 +386,7 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 
				 	float codelet_length = (end_codelet_time - last_codelet_start[worker]);
			
 
				 
			
 
				 	update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
			
 
				-	
			
 
				+
			
 
				 	if (distrib_time)
			
 
				 	fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%f\n", last_codelet_symbol[worker],
			
 
				 				prefix, worker, codelet_size, codelet_hash, codelet_length);
			
@@ -417,11 +419,12 @@ static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *o
 
				 	if (worker < 0)
			
 
				 	{
			
 
				 		if (out_paje_file)
			
 
				-		fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
			
 
				+			fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		if (out_paje_file)
			
 
				-		fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
			
 
				+			fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -433,7 +436,7 @@ static void handle_start_callback(struct fxt_ev_64 *ev, struct starpu_fxt_option
 
				 		return;
			
 
				 
			
 
				 	if (out_paje_file)
			
 
				-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
			
 
				+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
			
 
				 }
			
 
				 
			
 
				 static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
@@ -444,7 +447,7 @@ static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
				 		return;
			
 
				 
			
 
				 	if (out_paje_file)
			
 
				-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
			
 
				+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
			
 
				 }
			
 
				 
			
 
				 static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus)
			
@@ -455,8 +458,8 @@ static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
				 		return;
			
 
				 
			
 
				 	if (out_paje_file)
			
 
				-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
			
 
				-				get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
			
 
				+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
			
 
				+			get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
			
 
				 }
			
 
				 
			
 
				 static double last_sleep_start[STARPU_NMAXWORKERS];
			
@@ -471,8 +474,8 @@ static void handle_start_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *
 
				 	last_sleep_start[worker] = start_sleep_time;
			
 
				 
			
 
				 	if (out_paje_file)
			
 
				-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
			
 
				-				get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
			
 
				+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
			
 
				+			get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
			
 
				 }
			
 
				 
			
 
				 static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
@@ -484,8 +487,8 @@ static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 
				 	float end_sleep_timestamp = get_event_time_stamp(ev, options);
			
 
				 
			
 
				 	if (out_paje_file)
			
 
				-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
			
 
				-				end_sleep_timestamp, options->file_prefix, ev->param[0]);
			
 
				+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
			
 
				+			end_sleep_timestamp, options->file_prefix, ev->param[0]);
			
 
				 
			
 
				 	double sleep_length = end_sleep_timestamp - last_sleep_start[worker];
			
 
				 
			
@@ -531,7 +534,7 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
				 	unsigned dst = ev->param[1];
			
 
				 	unsigned size = ev->param[2];
			
 
				 	unsigned comid = ev->param[3];
			
 
				-	
			
 
				+
			
 
				 	char *prefix = options->file_prefix;
			
 
				 
			
 
				 	if (!options->no_bus)
			
@@ -631,14 +634,14 @@ static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *opti
 
				 		fprintf(out_paje_file, "13       %f ntask %ssched %f\n", current_timestamp, options->file_prefix, (float)curq_size);
			
 
				 
			
 
				 	if (activity_file)
			
 
				-	fprintf(activity_file, "cnt_ready\t%f\t%d\n", current_timestamp, curq_size);
			
 
				+		fprintf(activity_file, "cnt_ready\t%f\t%d\n", current_timestamp, curq_size);
			
 
				 }
			
 
				 
			
 
				 static
			
 
				 void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
			
 
				 {
			
 
				 	float current_timestamp = get_event_time_stamp(ev, options);
			
 
				-	unsigned long nsubmitted = ev->param[0]; 
			
 
				+	unsigned long nsubmitted = ev->param[0];
			
 
				 	if (activity_file)
			
 
				 	fprintf(activity_file, "cnt_submitted\t%f\t%lu\n", current_timestamp, nsubmitted);
			
 
				 }
			
@@ -648,8 +651,8 @@ static void handle_codelet_tag_deps(struct fxt_ev_64 *ev)
 
				 	uint64_t child;
			
 
				 	uint64_t father;
			
 
				 
			
 
				-	child = ev->param[0]; 
			
 
				-	father = ev->param[1]; 
			
 
				+	child = ev->param[0];
			
 
				+	father = ev->param[1];
			
 
				 
			
 
				 	_starpu_fxt_dag_add_tag_deps(child, father);
			
 
				 }
			
@@ -676,14 +679,16 @@ static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 
				 
			
 
				 	const char *colour;
			
 
				 	char buffer[32];
			
 
				-	if (options->per_task_colour) {
			
 
				+	if (options->per_task_colour)
			
 
				+	{
			
 
				 		snprintf(buffer, 32, "#%x%x%x",
			
 
				-			get_colour_symbol_red(name)/4,
			
 
				-			get_colour_symbol_green(name)/4,
			
 
				-			get_colour_symbol_blue(name)/4);
			
 
				+			 get_colour_symbol_red(name)/4,
			
 
				+			 get_colour_symbol_green(name)/4,
			
 
				+			 get_colour_symbol_blue(name)/4);
			
 
				 		colour = &buffer[0];
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		colour= (worker < 0)?"#aaaaaa":get_worker_color(worker);
			
 
				 	}
			
 
				 
			
@@ -706,14 +711,16 @@ static void handle_tag_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 
				 
			
 
				 	const char *colour;
			
 
				 	char buffer[32];
			
 
				-	if (options->per_task_colour) {
			
 
				+	if (options->per_task_colour)
			
 
				+	{
			
 
				 		snprintf(buffer, 32, "%.4f,%.4f,%.4f",
			
 
				-			get_colour_symbol_red(name)/1024.0,
			
 
				-			get_colour_symbol_green(name)/1024.0,
			
 
				-			get_colour_symbol_blue(name)/1024.0);
			
 
				+			 get_colour_symbol_red(name)/1024.0,
			
 
				+			 get_colour_symbol_green(name)/1024.0,
			
 
				+			 get_colour_symbol_blue(name)/1024.0);
			
 
				 		colour = &buffer[0];
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		colour= (worker < 0)?"0.0,0.0,0.0":get_worker_color(worker);
			
 
				 	}
			
 
				 
			
@@ -797,18 +804,20 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 
				 	/* Open the trace file */
			
 
				 	int fd_in;
			
 
				 	fd_in = open(filename_in, O_RDONLY);
			
 
				-	if (fd_in < 0) {
			
 
				+	if (fd_in < 0)
			
 
				+	{
			
 
				 	        perror("open failed :");
			
 
				 	        exit(-1);
			
 
				 	}
			
 
				 
			
 
				 	static fxt_t fut;
			
 
				 	fut = fxt_fdopen(fd_in);
			
 
				-	if (!fut) {
			
 
				+	if (!fut)
			
 
				+	{
			
 
				 	        perror("fxt_fdopen :");
			
 
				 	        exit(-1);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	fxt_blockev_t block;
			
 
				 	block = fxt_blockev_enter(fut);
			
 
				 
			
@@ -834,13 +843,16 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 
				 	}
			
 
				 
			
 
				 	struct fxt_ev_64 ev;
			
 
				-	while(1) {
			
 
				+	while(1)
			
 
				+	{
			
 
				 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
			
 
				-		if (ret != FXT_EV_OK) {
			
 
				+		if (ret != FXT_EV_OK)
			
 
				+		{
			
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		switch (ev.code) {
			
 
				+		switch (ev.code)
			
 
				+		{
			
 
				 			case _STARPU_FUT_WORKER_INIT_START:
			
 
				 				handle_worker_init_start(&ev, options);
			
 
				 				break;
			
@@ -1052,7 +1064,8 @@ void starpu_fxt_distrib_file_init(struct starpu_fxt_options *options)
 
				 	{
			
 
				 		distrib_time = fopen(options->distrib_time_path, "w+");
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		distrib_time = NULL;
			
 
				 	}
			
 
				 }
			
@@ -1102,7 +1115,8 @@ void starpu_fxt_paje_file_init(struct starpu_fxt_options *options)
 
				 
			
 
				 		_starpu_fxt_write_paje_header(out_paje_file);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		out_paje_file = NULL;
			
 
				 	}
			
 
				 }
			
@@ -1119,18 +1133,20 @@ static uint64_t starpu_fxt_find_start_time(char *filename_in)
 
				 	/* Open the trace file */
			
 
				 	int fd_in;
			
 
				 	fd_in = open(filename_in, O_RDONLY);
			
 
				-	if (fd_in < 0) {
			
 
				+	if (fd_in < 0)
			
 
				+	{
			
 
				 	        perror("open failed :");
			
 
				 	        exit(-1);
			
 
				 	}
			
 
				 
			
 
				 	static fxt_t fut;
			
 
				 	fut = fxt_fdopen(fd_in);
			
 
				-	if (!fut) {
			
 
				+	if (!fut)
			
 
				+	{
			
 
				 	        perror("fxt_fdopen :");
			
 
				 	        exit(-1);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	fxt_blockev_t block;
			
 
				 	block = fxt_blockev_enter(fut);
			
 
				 
			
@@ -1166,7 +1182,8 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
				 
			
 
				 		starpu_fxt_parse_new_file(options->filenames[0], options);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		unsigned inputfile;
			
 
				 
			
 
				 		uint64_t offsets[64];
			
@@ -1181,7 +1198,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
				 		 * More generally:
			
 
				 		 *	- psi_k(x) = x - offset_k
			
 
				 		 */
			
 
				-		
			
 
				+
			
 
				 		int unique_keys[64];
			
 
				 		int rank_k[64];
			
 
				 		uint64_t start_k[64];
			
@@ -1191,13 +1208,13 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
				 
			
 
				 		unsigned found_one_sync_point = 0;
			
 
				 		int key = 0;
			
 
				-		unsigned display_mpi = 0; 
			
 
				+		unsigned display_mpi = 0;
			
 
				 
			
 
				 		/* Compute all start_k */
			
 
				 		for (inputfile = 0; inputfile < options->ninputfiles; inputfile++)
			
 
				 		{
			
 
				 			uint64_t file_start = starpu_fxt_find_start_time(options->filenames[inputfile]);
			
 
				-			start_k[inputfile] = file_start; 
			
 
				+			start_k[inputfile] = file_start;
			
 
				 		}
			
 
				 
			
 
				 		/* Compute all sync_k if they exist */
			
@@ -1212,14 +1229,16 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
				 				/* There was no sync point, we assume there is no offset */
			
 
				 				sync_k_exists[inputfile] = 0;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				if (!found_one_sync_point)
			
 
				 				{
			
 
				 					key = unique_keys[inputfile];
			
 
				 					display_mpi = 1;
			
 
				 					found_one_sync_point = 1;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					if (key != unique_keys[inputfile])
			
 
				 					{
			
 
				 						fprintf(stderr, "Warning: traces are coming from different run so we will not try to display MPI communications.\n");
			
--- a/src/debug/traces/starpu_fxt_dag.c
+++ b/src/debug/traces/starpu_fxt_dag.c
@@ -35,7 +35,8 @@ void _starpu_fxt_dag_init(char *out_path)
 
				 
			
 
				 	/* create a new file */
			
 
				 	out_file = fopen(out_path, "w+");
			
 
				-	if (!out_file) {
			
 
				+	if (!out_file)
			
 
				+	{
			
 
				 		fprintf(stderr,"error while opening %s\n", out_path);
			
 
				 		perror("fopen");
			
 
				 		exit(1);
			
@@ -66,27 +67,27 @@ void _starpu_fxt_dag_terminate(void)
 
				 void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father)
			
 
				 {
			
 
				 	if (out_file)
			
 
				-	fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n", 
			
 
				-		(unsigned long long)father, (unsigned long long)child);
			
 
				+		fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n",
			
 
				+			(unsigned long long)father, (unsigned long long)child);
			
 
				 }
			
 
				 
			
 
				 void _starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ)
			
 
				 {
			
 
				 	if (out_file)
			
 
				-	fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
			
 
				-} 
			
 
				+		fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
			
 
				+}
			
 
				 
			
 
				 void _starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color)
			
 
				 {
			
 
				 	if (out_file)
			
 
				-	fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n", 
			
 
				-		(unsigned long long)tag, color);
			
 
				+		fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n",
			
 
				+			(unsigned long long)tag, color);
			
 
				 }
			
 
				 
			
 
				 void _starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color)
			
 
				 {
			
 
				 	if (out_file)
			
 
				-	fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
			
 
				+		fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
			
 
				 }
			
 
				 
			
 
				 void _starpu_fxt_dag_add_sync_point(void)
			
--- a/src/debug/traces/starpu_fxt_mpi.c
+++ b/src/debug/traces/starpu_fxt_mpi.c
@@ -21,7 +21,8 @@
 
				 
			
 
				 #include "starpu_fxt.h"
			
 
				 
			
 
				-struct mpi_transfer {
			
 
				+struct mpi_transfer
			
 
				+{
			
 
				 	unsigned matched;
			
 
				 	int other_rank; /* src for a recv, dest for a send */
			
 
				 	int mpi_tag;
			
@@ -38,18 +39,20 @@ int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *ke
 
				 	/* Open the trace file */
			
 
				 	int fd_in;
			
 
				 	fd_in = open(filename_in, O_RDONLY);
			
 
				-	if (fd_in < 0) {
			
 
				+	if (fd_in < 0)
			
 
				+	{
			
 
				 	        perror("open failed :");
			
 
				 	        exit(-1);
			
 
				 	}
			
 
				 
			
 
				 	static fxt_t fut;
			
 
				 	fut = fxt_fdopen(fd_in);
			
 
				-	if (!fut) {
			
 
				+	if (!fut)
			
 
				+	{
			
 
				 	        perror("fxt_fdopen :");
			
 
				 	        exit(-1);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	fxt_blockev_t block;
			
 
				 	block = fxt_blockev_enter(fut);
			
 
				 
			
@@ -57,9 +60,11 @@ int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *ke
 
				 
			
 
				 	int func_ret = -1;
			
 
				 	unsigned found = 0;
			
 
				-	while(!found) {
			
 
				+	while(!found)
			
 
				+	{
			
 
				 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
			
 
				-		if (ret != FXT_EV_OK) {
			
 
				+		if (ret != FXT_EV_OK)
			
 
				+		{
			
 
				 			fprintf(stderr, "no more block ...\n");
			
 
				 			break;
			
 
				 		}
			
@@ -116,7 +121,8 @@ void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 
				 		{
			
 
				 			mpi_sends_list_size[src] *= 2;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			mpi_sends_list_size[src] = 1;
			
 
				 		}
			
 
				 
			
@@ -140,7 +146,8 @@ void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst,
 
				 		{
			
 
				 			mpi_recvs_list_size[dst] *= 2;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			mpi_recvs_list_size[dst] = 1;
			
 
				 		}
			
 
				 
			
@@ -174,7 +181,8 @@ struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED,
 
				 
			
 
				 			all_previous_were_matched = 0;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			if (all_previous_were_matched)
			
 
				 			{
			
 
				 				/* All previous transfers are already matched,
			
--- a/src/dolib.c
+++ b/src/dolib.c
@@ -20,27 +20,30 @@
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 
			
 
				-int main(int argc, char *argv[]) {
			
 
				-  char *prog, *arch, *def, *name, *lib;
			
 
				-  char s[1024];
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				+	char *prog, *arch, *def, *name, *lib;
			
 
				+	char s[1024];
			
 
				 
			
 
				-  if (argc != 6) {
			
 
				-    fprintf(stderr,"bad number of arguments");
			
 
				-    exit(EXIT_FAILURE);
			
 
				-  }
			
 
				+	if (argc != 6)
			
 
				+	{
			
 
				+		fprintf(stderr,"bad number of arguments");
			
 
				+		exit(EXIT_FAILURE);
			
 
				+	}
			
 
				 
			
 
				-  prog = argv[1];
			
 
				-  arch = argv[2];
			
 
				-  def = argv[3];
			
 
				-  name = argv[4];
			
 
				-  lib = argv[5];
			
 
				+	prog = argv[1];
			
 
				+	arch = argv[2];
			
 
				+	def = argv[3];
			
 
				+	name = argv[4];
			
 
				+	lib = argv[5];
			
 
				 
			
 
				-  snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
			
 
				-      prog, arch, def, name, lib);
			
 
				-  if (system(s)) {
			
 
				-    fprintf(stderr, "%s failed\n", s);
			
 
				-    exit(EXIT_FAILURE);
			
 
				-  }
			
 
				+	snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
			
 
				+		 prog, arch, def, name, lib);
			
 
				+	if (system(s))
			
 
				+	{
			
 
				+		fprintf(stderr, "%s failed\n", s);
			
 
				+		exit(EXIT_FAILURE);
			
 
				+	}
			
 
				 
			
 
				-  exit(EXIT_SUCCESS);
			
 
				+	exit(EXIT_SUCCESS);
			
 
				 }
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -56,12 +56,14 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 
				 	 * execute the kernel at all. */
			
 
				 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
			
 
				 	{
			
 
				-		if (cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS) {
			
 
				+		if (cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
			
 
				+		{
			
 
				 			_starpu_cl_func func = cl->cpu_func;
			
 
				 			STARPU_ASSERT(func);
			
 
				 			func(task->interfaces, task->cl_arg);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* _STARPU_DEBUG("CPU driver : running kernel (%d)\n", j->nimpl); */
			
 
				 			_starpu_cl_func func = cl->cpu_funcs[j->nimpl];
			
 
				 			STARPU_ASSERT(func);
			
@@ -131,8 +133,8 @@ void *_starpu_cpu_worker(void *arg)
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex);
			
 
				 
			
 
				 		task = _starpu_pop_task(cpu_arg);
			
 
				-	
			
 
				-                if (!task) 
			
 
				+
			
 
				+                if (!task)
			
 
				 		{
			
 
				 			if (_starpu_worker_can_block(memnode))
			
 
				 				_starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex);
			
@@ -142,13 +144,13 @@ void *_starpu_cpu_worker(void *arg)
 
				 			continue;
			
 
				 		};
			
 
				 
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);	
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);
			
 
				 
			
 
				 		STARPU_ASSERT(task);
			
 
				 		j = _starpu_get_job_associated_to_task(task);
			
 
				-	
			
 
				+
			
 
				 		/* can a cpu perform that task ? */
			
 
				-		if (!_STARPU_CPU_MAY_PERFORM(j)) 
			
 
				+		if (!_STARPU_CPU_MAY_PERFORM(j))
			
 
				 		{
			
 
				 			/* put it and the end of the queue ... XXX */
			
 
				 			_starpu_push_task(j, 0);
			
@@ -158,8 +160,8 @@ void *_starpu_cpu_worker(void *arg)
 
				 		int rank = 0;
			
 
				 		int is_parallel_task = (j->task_size > 1);
			
 
				 
			
 
				-		enum starpu_perf_archtype perf_arch; 
			
 
				-	
			
 
				+		enum starpu_perf_archtype perf_arch;
			
 
				+
			
 
				 		/* Get the rank in case it is a parallel task */
			
 
				 		if (is_parallel_task)
			
 
				 		{
			
@@ -179,7 +181,8 @@ void *_starpu_cpu_worker(void *arg)
 
				 			cpu_arg->current_rank = rank;
			
 
				 			perf_arch = combined_worker->perf_arch;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			cpu_arg->combined_workerid = cpu_arg->workerid;
			
 
				 			cpu_arg->worker_size = 1;
			
 
				 			cpu_arg->current_rank = 0;
			
@@ -192,12 +195,14 @@ void *_starpu_cpu_worker(void *arg)
 
				 
			
 
				 		_starpu_set_current_task(NULL);
			
 
				 
			
 
				-		if (res) {
			
 
				-			switch (res) {
			
 
				+		if (res)
			
 
				+		{
			
 
				+			switch (res)
			
 
				+			{
			
 
				 				case -EAGAIN:
			
 
				 					_starpu_push_task(j, 0);
			
 
				 					continue;
			
 
				-				default: 
			
 
				+				default:
			
 
				 					assert(0);
			
 
				 			}
			
 
				 		}
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -63,7 +63,7 @@ static void limit_gpu_mem_if_needed(int devid)
 
				 	_STARPU_DEBUG("CUDA device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
			
 
				 			devid, (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024),
			
 
				 			(size_t)(totalGlobalMem - to_waste)/(1024*1024));
			
 
				-	
			
 
				+
			
 
				 	/* Allocate a large buffer to waste memory and constraint the amount of available memory. */
			
 
				 	cures = cudaMalloc((void **)&wasted_memory[devid], to_waste);
			
 
				 	if (STARPU_UNLIKELY(cures))
			
@@ -161,7 +161,8 @@ unsigned _starpu_get_cuda_device_count(void)
 
				 	if (STARPU_UNLIKELY(cures))
			
 
				 		 return 0;
			
 
				 
			
 
				-	if (cnt > STARPU_MAXCUDADEVS) {
			
 
				+	if (cnt > STARPU_MAXCUDADEVS)
			
 
				+	{
			
 
				 		fprintf(stderr, "# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS);
			
 
				 		cnt = STARPU_MAXCUDADEVS;
			
 
				 	}
			
@@ -191,13 +192,14 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
				 	struct starpu_codelet *cl = task->cl;
			
 
				 	STARPU_ASSERT(cl);
			
 
				 
			
 
				-	if (cl->model && cl->model->benchmarking) 
			
 
				+	if (cl->model && cl->model->benchmarking)
			
 
				 		calibrate_model = 1;
			
 
				 
			
 
				 	ret = _starpu_fetch_task_input(task, mask);
			
 
				-	if (ret != 0) {
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				 		/* there was not enough memory, so the input of
			
 
				-		 * the codelet cannot be fetched ... put the 
			
 
				+		 * the codelet cannot be fetched ... put the
			
 
				 		 * codelet back, and try it later */
			
 
				 		return -EAGAIN;
			
 
				 	}
			
@@ -219,12 +221,14 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
				 		STARPU_CUDA_REPORT_ERROR(cures);
			
 
				 #endif
			
 
				 
			
 
				-	if (cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS) {
			
 
				+	if (cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS)
			
 
				+	{
			
 
				 		_starpu_cl_func func = cl->cuda_func;
			
 
				 		STARPU_ASSERT(func);
			
 
				 		func(task->interfaces, task->cl_arg);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* _STARPU_DEBUG("Cuda driver : running kernel * (%d)\n", j->nimpl); */
			
 
				 		_starpu_cl_func func = cl->cuda_funcs[j->nimpl];
			
 
				 		STARPU_ASSERT(func);
			
@@ -305,8 +309,8 @@ void *_starpu_cuda_worker(void *arg)
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
			
 
				 
			
 
				 		task = _starpu_pop_task(args);
			
 
				-	
			
 
				-                if (task == NULL) 
			
 
				+
			
 
				+                if (task == NULL)
			
 
				 		{
			
 
				 			if (_starpu_worker_can_block(memnode))
			
 
				 				_starpu_block_worker(workerid, args->sched_cond, args->sched_mutex);
			
@@ -335,8 +339,10 @@ void *_starpu_cuda_worker(void *arg)
 
				 
			
 
				 		_starpu_set_current_task(NULL);
			
 
				 
			
 
				-		if (res) {
			
 
				-			switch (res) {
			
 
				+		if (res)
			
 
				+		{
			
 
				+			switch (res)
			
 
				+			{
			
 
				 				case -EAGAIN:
			
 
				 					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				 					_starpu_push_task(j, 0);
			
@@ -372,7 +378,8 @@ void *_starpu_cuda_worker(void *arg)
 
				 void starpu_cublas_report_error(const char *func, const char *file, int line, cublasStatus status)
			
 
				 {
			
 
				 	char *errormsg;
			
 
				-	switch (status) {
			
 
				+	switch (status)
			
 
				+	{
			
 
				 		case CUBLAS_STATUS_SUCCESS:
			
 
				 			errormsg = "success";
			
 
				 			break;
			
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -39,13 +39,14 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 
				 		calibrate_model = 1;
			
 
				 
			
 
				 	args->status = STATUS_EXECUTING;
			
 
				-	task->status = STARPU_TASK_RUNNING;	
			
 
				+	task->status = STARPU_TASK_RUNNING;
			
 
				 
			
 
				-	if (rank == 0) {
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				 		cl->per_worker_stats[workerid]++;
			
 
				 
			
 
				 		profiling_info = task->profiling_info;
			
 
				-	
			
 
				+
			
 
				 		if ((profiling && profiling_info) || calibrate_model || starpu_top)
			
 
				 		{
			
 
				 			_starpu_clock_gettime(codelet_start);
			
@@ -75,7 +76,8 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 
				 	if (cl->model && cl->model->benchmarking)
			
 
				 		calibrate_model = 1;
			
 
				 
			
 
				-	if (rank == 0) {
			
 
				+	if (rank == 0)
			
 
				+	{
			
 
				 		if ((profiling && profiling_info) || calibrate_model || starpu_top)
			
 
				 			_starpu_clock_gettime(codelet_end);
			
 
				 	}
			
@@ -112,7 +114,7 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
				 			memcpy(&profiling_info->end_time, codelet_end, sizeof(struct timespec));
			
 
				 
			
 
				 			profiling_info->workerid = workerid;
			
 
				-			
			
 
				+
			
 
				 			_starpu_worker_update_profiling_info_executing(workerid, &measured_ts, 1,
			
 
				 				profiling_info->used_cycles,
			
 
				 				profiling_info->stall_cycles,
			
@@ -130,11 +132,13 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 
				 	if (!updated)
			
 
				 		_starpu_worker_update_profiling_info_executing(workerid, NULL, 1, 0, 0, 0);
			
 
				 
			
 
				-	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking) {
			
 
				+	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking)
			
 
				+	{
			
 
				 		_starpu_update_perfmodel_history(j, j->task->cl->power_model,  perf_arch, worker_args->devid, profiling_info->power_consumed,j->nimpl);
			
 
				-		}
			
 
				+	}
			
 
				 
			
 
				-	if (j->task->cl->conversion_model) {
			
 
				+	if (j->task->cl->conversion_model)
			
 
				+	{
			
 
				 		_starpu_update_perfmodel_history(j, j->task->cl->conversion_model, perf_arch,
			
 
				 						 worker_args->devid, conversion_time, j->nimpl);
			
 
				 	}
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -34,7 +34,8 @@ pthread_t progress_thread;
 
				 pthread_cond_t progress_cond;
			
 
				 pthread_mutex_t progress_mutex;
			
 
				 
			
 
				-struct gordon_task_wrapper_s {
			
 
				+struct gordon_task_wrapper_s
			
 
				+{
			
 
				 	/* who has executed that ? */
			
 
				 	struct _starpu_worker *worker;
			
 
				 
			
@@ -53,7 +54,7 @@ void *gordon_worker_progress(void *arg)
 
				 
			
 
				 	/* fix the thread on the correct cpu */
			
 
				 	struct _starpu_worker_set *gordon_set_arg = arg;
			
 
				-	unsigned prog_thread_bind_id = 
			
 
				+	unsigned prog_thread_bind_id =
			
 
				 		(gordon_set_arg->workers[0].bindid + 1)%(gordon_set_arg->config->nhwcores);
			
 
				 	_starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id);
			
 
				 
			
@@ -62,8 +63,9 @@ void *gordon_worker_progress(void *arg)
 
				 	_STARPU_PTHREAD_COND_SIGNAL(&progress_cond);
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
			
 
				 
			
 
				-	while (1) {
			
 
				-		/* the Gordon runtime needs to make sure that we poll it 
			
 
				+	while (1)
			
 
				+	{
			
 
				+		/* the Gordon runtime needs to make sure that we poll it
			
 
				 		 * so that we handle jobs that are done */
			
 
				 
			
 
				 		/* wait for one task termination */
			
@@ -89,10 +91,11 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 
			
 
				 	/* if it is non null, the argument buffer is considered
			
 
				  	 * as the first read-only buffer */
			
 
				-	if (task->cl_arg) {
			
 
				+	if (task->cl_arg)
			
 
				+	{
			
 
				 		gordon_job->buffers[in] = (uint64_t)task->cl_arg;
			
 
				 		gordon_job->ss[in].size = (uint32_t)task->cl_arg_size;
			
 
				-		
			
 
				+
			
 
				 		nin++; in++;
			
 
				 	}
			
 
				 
			
@@ -103,7 +106,8 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 		struct starpu_buffer_descr *descr;
			
 
				 		descr = &task->buffers[buffer];
			
 
				 
			
 
				-		switch (descr->mode) {
			
 
				+		switch (descr->mode)
			
 
				+		{
			
 
				 			case STARPU_R:
			
 
				 				nin++;
			
 
				 				break;
			
@@ -123,7 +127,8 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 		struct starpu_buffer_descr *descr;
			
 
				 		descr = &task->buffers[buffer];
			
 
				 
			
 
				-		switch (descr->mode) {
			
 
				+		switch (descr->mode)
			
 
				+		{
			
 
				 			case STARPU_R:
			
 
				 				gordon_buffer = in++;
			
 
				 				break;
			
@@ -150,7 +155,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/* we assume the data are already available so that the data interface fields are 
			
 
				+/* we assume the data are already available so that the data interface fields are
			
 
				  * already filled */
			
 
				 static struct gordon_task_wrapper_s *starpu_to_gordon_job(struct _starpu_job *j)
			
 
				 {
			
@@ -183,8 +188,8 @@ static void handle_terminated_job(struct _starpu_job *j)
 
				 
			
 
				 static void gordon_callback_list_func(void *arg)
			
 
				 {
			
 
				-	struct gordon_task_wrapper_s *task_wrapper = arg; 
			
 
				-	struct _starpu_job_list *wrapper_list; 
			
 
				+	struct gordon_task_wrapper_s *task_wrapper = arg;
			
 
				+	struct _starpu_job_list *wrapper_list;
			
 
				 
			
 
				 	/* we don't know who will execute that codelet : so we actually defer the
			
 
				  	 * execution of the StarPU codelet and the job termination later */
			
@@ -232,7 +237,7 @@ static void gordon_callback_list_func(void *arg)
 
				 
			
 
				 static void gordon_callback_func(void *arg)
			
 
				 {
			
 
				-	struct gordon_task_wrapper_s *task_wrapper = arg; 
			
 
				+	struct gordon_task_wrapper_s *task_wrapper = arg;
			
 
				 
			
 
				 	/* we don't know who will execute that codelet : so we actually defer the
			
 
				  	 * execution of the StarPU codelet and the job termination later */
			
@@ -254,7 +259,8 @@ int inject_task(struct _starpu_job *j, struct _starpu_worker *worker)
 
				 	struct starpu_task *task = j->task;
			
 
				 	int ret = _starpu_fetch_task_input(task, 0);
			
 
				 
			
 
				-	if (ret != 0) {
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				 		/* there was not enough memory so the codelet cannot be executed right now ... */
			
 
				 		/* push the codelet back and try another one ... */
			
 
				 		return STARPU_TRYAGAIN;
			
@@ -277,15 +283,17 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 
				 	struct _starpu_job *j;
			
 
				 
			
 
				 	// TODO !
			
 
				-//	
			
 
				+//
			
 
				 //	for (j = _starpu_job_list_begin(list); j != _starpu_job_list_end(list); j = _starpu_job_list_next(j) )
			
 
				 //	{
			
 
				-//		if (!_STARPU_GORDON_MAY_PERFORM(j)) {
			
 
				+//		if (!_STARPU_GORDON_MAY_PERFORM(j))
			
 
				+//              {
			
 
				 //			// XXX TODO
			
 
				 //			ninvalids++;
			
 
				 //			assert(0);
			
 
				 //		}
			
 
				-//		else {
			
 
				+//		else
			
 
				+//              {
			
 
				 //			nvalids++;
			
 
				 //		}
			
 
				 //	}
			
@@ -293,7 +301,7 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 
				 	nvalids = _job_list_size(list);
			
 
				 //	_STARPU_DEBUG("nvalids %d \n", nvalids);
			
 
				 
			
 
				-	
			
 
				+
			
 
				 
			
 
				 	struct gordon_task_wrapper_s *task_wrapper = malloc(sizeof(struct gordon_task_wrapper_s));
			
 
				 	gordon_job_t *gordon_jobs = gordon_alloc_jobs(nvalids, 0);
			
@@ -303,7 +311,7 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 
				 	task_wrapper->j = NULL;
			
 
				 	task_wrapper->terminated = 0;
			
 
				 	task_wrapper->worker = worker;
			
 
				-	
			
 
				+
			
 
				 	unsigned index;
			
 
				 	for (j = _starpu_job_list_begin(list), index = 0; j != _starpu_job_list_end(list); j = _starpu_job_list_next(j), index++)
			
 
				 	{
			
@@ -322,7 +330,7 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 
				 		/* we should not hardcore the memory node ... XXX */
			
 
				 		unsigned memory_node = 0;
			
 
				 		starpu_to_gordon_buffers(j, &gordon_jobs[index], memory_node);
			
 
				-		
			
 
				+
			
 
				 	}
			
 
				 
			
 
				 	gordon_pushjob(task_wrapper->gordon_job, gordon_callback_list_func, task_wrapper);
			
@@ -333,12 +341,15 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 
				 void *gordon_worker_inject(struct _starpu_worker_set *arg)
			
 
				 {
			
 
				 
			
 
				-	while(_starpu_machine_is_running()) {
			
 
				-		if (gordon_busy_enough()) {
			
 
				+	while(_starpu_machine_is_running())
			
 
				+	{
			
 
				+		if (gordon_busy_enough())
			
 
				+		{
			
 
				 			/* gordon already has enough work, wait a little TODO */
			
 
				 			_starpu_wait_on_sched_event();
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 #ifndef NOCHAIN
			
 
				 			int ret = 0;
			
 
				 #ifdef STARPU_DEVEL
			
@@ -382,7 +393,8 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
				 						list->_head = it_j;
			
 
				 						it_j->_prev = NULL;
			
 
				 					}
			
 
				-					else {
			
 
				+					else
			
 
				+					{
			
 
				 						/* this is the last chunk */
			
 
				 						chunk_list = list;
			
 
				 					}
			
@@ -390,7 +402,8 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
				 					ret = inject_task_list(chunk_list, &arg->workers[0]);
			
 
				 				}
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				_starpu_wait_on_sched_event();
			
 
				 			}
			
 
				 #else
			
@@ -398,18 +411,21 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
				 			struct _starpu_job *j;
			
 
				 			j =  _starpu_pop_task();
			
 
				 	//		_STARPU_DEBUG("pop task %p\n", j);
			
 
				-			if (j) {
			
 
				-				if (_STARPU_GORDON_MAY_PERFORM(j)) {
			
 
				+			if (j)
			
 
				+			{
			
 
				+				if (_STARPU_GORDON_MAY_PERFORM(j))
			
 
				+				{
			
 
				 					/* inject that task */
			
 
				 					/* XXX we hardcore &arg->workers[0] for now */
			
 
				 					inject_task(j, &arg->workers[0]);
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					_starpu_push_task(j, 0);
			
 
				 				}
			
 
				 			}
			
 
				 #endif
			
 
				-			
			
 
				+
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -423,7 +439,7 @@ void *_starpu_gordon_worker(void *arg)
 
				 	_starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid);
			
 
				 
			
 
				 	/* TODO set_local_memory_node per SPU */
			
 
				-	gordon_init(gordon_set_arg->nworkers);	
			
 
				+	gordon_init(gordon_set_arg->nworkers);
			
 
				 
			
 
				 	/* NB: On SPUs, the worker_key is set to NULL since there is no point
			
 
				 	 * in associating the PPU thread with a specific SPU (worker) while
			
@@ -448,7 +464,7 @@ void *_starpu_gordon_worker(void *arg)
 
				 	/* launch the progression thread */
			
 
				 	_STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL);
			
 
				 	_STARPU_PTHREAD_COND_INIT(&progress_cond, NULL);
			
 
				-	
			
 
				+
			
 
				 	pthread_create(&progress_thread, NULL, gordon_worker_progress, gordon_set_arg);
			
 
				 
			
 
				 	/* wait for the progression thread to be ready */
			
@@ -458,7 +474,7 @@ void *_starpu_gordon_worker(void *arg)
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
			
 
				 
			
 
				 	_STARPU_DEBUG("progress thread is running ... \n");
			
 
				-	
			
 
				+
			
 
				 	/* tell the core that gordon is ready */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&gordon_set_arg->mutex);
			
 
				 	gordon_set_arg->set_is_initialized = 1;
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -204,20 +204,25 @@ cl_int _starpu_opencl_copy_ram_to_opencl_async_sync(void *ptr, unsigned src_node
 
				         err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
			
 
				         if (event)
			
 
				                 _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
			
 
				-        if (STARPU_LIKELY(err == CL_SUCCESS)) {
			
 
				+        if (STARPU_LIKELY(err == CL_SUCCESS))
			
 
				+	{
			
 
				                 *ret = (event == NULL) ? 0 : -EAGAIN;
			
 
				                 return CL_SUCCESS;
			
 
				         }
			
 
				-        else {
			
 
				-                if (event != NULL) {
			
 
				+        else
			
 
				+	{
			
 
				+                if (event != NULL)
			
 
				+		{
			
 
				                         /* The asynchronous copy has failed, try to copy synchronously */
			
 
				                         err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_TRUE, offset, size, ptr, 0, NULL, NULL);
			
 
				                 }
			
 
				-                if (STARPU_LIKELY(err == CL_SUCCESS)) {
			
 
				+                if (STARPU_LIKELY(err == CL_SUCCESS))
			
 
				+		{
			
 
				                         *ret = 0;
			
 
				                         return CL_SUCCESS;
			
 
				                 }
			
 
				-                else {
			
 
				+                else
			
 
				+		{
			
 
				                         STARPU_OPENCL_REPORT_ERROR(err);
			
 
				                         return err;
			
 
				                 }
			
@@ -253,19 +258,23 @@ cl_int _starpu_opencl_copy_opencl_to_ram_async_sync(cl_mem buffer, unsigned src_
 
				         err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
			
 
				         if (event)
			
 
				                 _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
			
 
				-        if (STARPU_LIKELY(err == CL_SUCCESS)) {
			
 
				+        if (STARPU_LIKELY(err == CL_SUCCESS))
			
 
				+	{
			
 
				                 *ret = (event == NULL) ? 0 : -EAGAIN;
			
 
				                 return CL_SUCCESS;
			
 
				         }
			
 
				-        else {
			
 
				+        else
			
 
				+	{
			
 
				                 if (event != NULL)
			
 
				                         /* The asynchronous copy has failed, try to copy synchronously */
			
 
				                         err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_TRUE, offset, size, ptr, 0, NULL, NULL);
			
 
				-                if (STARPU_LIKELY(err == CL_SUCCESS)) {
			
 
				+                if (STARPU_LIKELY(err == CL_SUCCESS))
			
 
				+		{
			
 
				                         *ret = 0;
			
 
				                         return CL_SUCCESS;
			
 
				                 }
			
 
				-                else {
			
 
				+                else
			
 
				+		{
			
 
				                         STARPU_OPENCL_REPORT_ERROR(err);
			
 
				                         return err;
			
 
				                 }
			
@@ -334,7 +343,8 @@ cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node STARP
 
				 void _starpu_opencl_init(void)
			
 
				 {
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
			
 
				-        if (!init_done) {
			
 
				+        if (!init_done)
			
 
				+	{
			
 
				                 cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
			
 
				                 cl_uint nb_platforms;
			
 
				                 cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
			
@@ -351,19 +361,23 @@ void _starpu_opencl_init(void)
 
				                 // Get devices
			
 
				                 nb_devices = 0;
			
 
				                 {
			
 
				-                        for (i=0; i<nb_platforms; i++) {
			
 
				+                        for (i=0; i<nb_platforms; i++)
			
 
				+			{
			
 
				                                 cl_uint num;
			
 
				 				int platform_valid = 1;
			
 
				 				char name[1024], vendor[1024];
			
 
				 
			
 
				 				err = clGetPlatformInfo(platform_id[i], CL_PLATFORM_NAME, 1024, name, NULL);
			
 
				-				if (err != CL_SUCCESS) {
			
 
				+				if (err != CL_SUCCESS)
			
 
				+				{
			
 
				 					STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo NAME", err);
			
 
				 					platform_valid = 0;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					err = clGetPlatformInfo(platform_id[i], CL_PLATFORM_VENDOR, 1024, vendor, NULL);
			
 
				-					if (err != CL_SUCCESS) {
			
 
				+					if (err != CL_SUCCESS)
			
 
				+					{
			
 
				 						STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo VENDOR", err);
			
 
				 						platform_valid = 0;
			
 
				 					}
			
@@ -374,12 +388,15 @@ void _starpu_opencl_init(void)
 
				 				else
			
 
				 					_STARPU_DEBUG("Platform invalid\n");
			
 
				 #endif
			
 
				-				if (platform_valid) {
			
 
				+				if (platform_valid)
			
 
				+				{
			
 
				 					err = clGetDeviceIDs(platform_id[i], device_type, STARPU_MAXOPENCLDEVS-nb_devices, &devices[nb_devices], &num);
			
 
				-					if (err == CL_DEVICE_NOT_FOUND) {
			
 
				+					if (err == CL_DEVICE_NOT_FOUND)
			
 
				+					{
			
 
				 						_STARPU_DEBUG("  No devices detected on this platform\n");
			
 
				 					}
			
 
				-					else {
			
 
				+					else
			
 
				+					{
			
 
				 						if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
			
 
				 						_STARPU_DEBUG("  %d devices detected\n", num);
			
 
				 						nb_devices += num;
			
@@ -391,13 +408,15 @@ void _starpu_opencl_init(void)
 
				                 // Get location of OpenCl kernel source files
			
 
				                 _starpu_opencl_program_dir = getenv("STARPU_OPENCL_PROGRAM_DIR");
			
 
				 
			
 
				-		if (nb_devices > STARPU_MAXOPENCLDEVS) {
			
 
				+		if (nb_devices > STARPU_MAXOPENCLDEVS)
			
 
				+		{
			
 
				 			_STARPU_DISP("# Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices?\n", nb_devices, STARPU_MAXOPENCLDEVS);
			
 
				 			nb_devices = STARPU_MAXOPENCLDEVS;
			
 
				 		}
			
 
				 
			
 
				                 // initialise internal structures
			
 
				-                for(i=0 ; i<nb_devices ; i++) {
			
 
				+                for(i=0 ; i<nb_devices ; i++)
			
 
				+		{
			
 
				                         contexts[i] = NULL;
			
 
				                         queues[i] = NULL;
			
 
				                         transfer_queues[i] = NULL;
			
@@ -467,8 +486,8 @@ void *_starpu_opencl_worker(void *arg)
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
			
 
				 
			
 
				 		task = _starpu_pop_task(args);
			
 
				-		
			
 
				-                if (task == NULL) 
			
 
				+
			
 
				+                if (task == NULL)
			
 
				 		{
			
 
				 			if (_starpu_worker_can_block(memnode))
			
 
				 				_starpu_block_worker(workerid, args->sched_cond, args->sched_mutex);
			
@@ -497,8 +516,10 @@ void *_starpu_opencl_worker(void *arg)
 
				 
			
 
				 		_starpu_set_current_task(NULL);
			
 
				 
			
 
				-                if (res) {
			
 
				-			switch (res) {
			
 
				+                if (res)
			
 
				+		{
			
 
				+			switch (res)
			
 
				+			{
			
 
				 				case -EAGAIN:
			
 
				 					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				 					_starpu_push_task(j, 0);
			
@@ -527,7 +548,8 @@ static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
 
				 {
			
 
				 	int err;
			
 
				 
			
 
				-        if (!init_done) {
			
 
				+        if (!init_done)
			
 
				+	{
			
 
				                 _starpu_opencl_init();
			
 
				         }
			
 
				 
			
@@ -541,7 +563,8 @@ static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
 
				 
			
 
				 unsigned _starpu_opencl_get_device_count(void)
			
 
				 {
			
 
				-        if (!init_done) {
			
 
				+        if (!init_done)
			
 
				+	{
			
 
				                 _starpu_opencl_init();
			
 
				         }
			
 
				 	return nb_devices;
			
@@ -562,7 +585,8 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
				 	STARPU_ASSERT(cl);
			
 
				 
			
 
				 	ret = _starpu_fetch_task_input(task, mask);
			
 
				-	if (ret != 0) {
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				 		/* there was not enough memory, so the input of
			
 
				 		 * the codelet cannot be fetched ... put the
			
 
				 		 * codelet back, and try it later */
			
@@ -572,12 +596,14 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
				 
			
 
				 	_starpu_driver_start_job(args, j, &codelet_start, 0);
			
 
				 
			
 
				-	if (cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS) {
			
 
				+	if (cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS)
			
 
				+	{
			
 
				 		_starpu_cl_func func = cl->opencl_func;
			
 
				 		STARPU_ASSERT(func);
			
 
				 		func(task->interfaces, task->cl_arg);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* _STARPU_DEBUG("OpenCL driver : running kernel (%d)\n", j->nimpl); */
			
 
				 		_starpu_cl_func func = cl->opencl_funcs[j->nimpl];
			
 
				 		STARPU_ASSERT(func);
			
--- a/src/drivers/opencl/driver_opencl_utils.c
+++ b/src/drivers/opencl/driver_opencl_utils.c
@@ -39,42 +39,50 @@ char *_starpu_opencl_program_dir;
 
				 #define _STARPU_STRINGIFY(x) _STARPU_STRINGIFY_(x)
			
 
				 
			
 
				 static
			
 
				-int _starpu_opencl_locate_file(const char *source_file_name, char *located_file_name, char *located_dir_name) {
			
 
				+int _starpu_opencl_locate_file(const char *source_file_name, char *located_file_name, char *located_dir_name)
			
 
				+{
			
 
				 	int ret = EXIT_FAILURE;
			
 
				 
			
 
				         _STARPU_DEBUG("Trying to locate <%s>\n", source_file_name);
			
 
				-        if (access(source_file_name, R_OK) == 0) {
			
 
				+        if (access(source_file_name, R_OK) == 0)
			
 
				+	{
			
 
				                 strcpy(located_file_name, source_file_name);
			
 
				 		ret = EXIT_SUCCESS;
			
 
				         }
			
 
				 
			
 
				-	if (ret == EXIT_FAILURE && _starpu_opencl_program_dir) {
			
 
				+	if (ret == EXIT_FAILURE && _starpu_opencl_program_dir)
			
 
				+	{
			
 
				 		sprintf(located_file_name, "%s/%s", _starpu_opencl_program_dir, source_file_name);
			
 
				 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
			
 
				 		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
			
 
				 	}
			
 
				 
			
 
				-	if (ret == EXIT_FAILURE) {
			
 
				+	if (ret == EXIT_FAILURE)
			
 
				+	{
			
 
				 		sprintf(located_file_name, "%s/%s", _STARPU_STRINGIFY(STARPU_OPENCL_DATADIR), source_file_name);
			
 
				 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
			
 
				 		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
			
 
				 	}
			
 
				 
			
 
				-	if (ret == EXIT_FAILURE) {
			
 
				+	if (ret == EXIT_FAILURE)
			
 
				+	{
			
 
				 		sprintf(located_file_name, "%s/%s", STARPU_SRC_DIR, source_file_name);
			
 
				 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
			
 
				 		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
			
 
				 	}
			
 
				 
			
 
				-	if (ret == EXIT_FAILURE) {
			
 
				+	if (ret == EXIT_FAILURE)
			
 
				+	{
			
 
				 		strcpy(located_file_name, "");
			
 
				 		strcpy(located_dir_name, "");
			
 
				 		_STARPU_ERROR("Cannot locate file <%s>\n", source_file_name);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		char *last = strrchr(located_file_name, '/');
			
 
				 		if (!last) strcpy(located_dir_name, "");
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			sprintf(located_dir_name, "%s", located_file_name);
			
 
				 			located_dir_name[strlen(located_file_name)-strlen(last)+1] = '\0';
			
 
				 		}
			
@@ -96,7 +104,8 @@ cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, str
 
				         starpu_opencl_get_queue(devid, queue);
			
 
				 
			
 
				         program = opencl_programs->programs[devid];
			
 
				-        if (!program) {
			
 
				+        if (!program)
			
 
				+	{
			
 
				                 _STARPU_DISP("Program not available\n");
			
 
				                 return CL_INVALID_PROGRAM;
			
 
				         }
			
@@ -108,7 +117,8 @@ cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, str
 
				 	return CL_SUCCESS;
			
 
				 }
			
 
				 
			
 
				-cl_int starpu_opencl_release_kernel(cl_kernel kernel) {
			
 
				+cl_int starpu_opencl_release_kernel(cl_kernel kernel)
			
 
				+{
			
 
				 	cl_int err;
			
 
				 
			
 
				 	err = clReleaseKernel(kernel);
			
@@ -133,8 +143,9 @@ char *_starpu_opencl_load_program_source(const char *filename)
 
				         stat(filename, &statbuf);
			
 
				         source = (char *) malloc(statbuf.st_size + 1);
			
 
				 
			
 
				-        for(c=fgetc(fh), x=0 ; c != EOF ; c = fgetc(fh), x++) {
			
 
				-          source[x] = c;
			
 
				+        for(c=fgetc(fh), x=0 ; c != EOF ; c = fgetc(fh), x++)
			
 
				+	{
			
 
				+		source[x] = c;
			
 
				         }
			
 
				         source[x] = '\0';
			
 
				 
			
@@ -154,7 +165,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 
				 
			
 
				         nb_devices = _starpu_opencl_get_device_count();
			
 
				         // Iterate over each device
			
 
				-        for(dev = 0; dev < nb_devices; dev ++) {
			
 
				+        for(dev = 0; dev < nb_devices; dev ++)
			
 
				+	{
			
 
				                 cl_device_id device;
			
 
				                 cl_context   context;
			
 
				                 cl_program   program;
			
@@ -162,7 +174,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 
				 
			
 
				                 starpu_opencl_get_device(dev, &device);
			
 
				                 starpu_opencl_get_context(dev, &context);
			
 
				-                if (context == NULL) {
			
 
				+                if (context == NULL)
			
 
				+		{
			
 
				                         _STARPU_DEBUG("[%d] is not a valid OpenCL context\n", dev);
			
 
				                         continue;
			
 
				                 }
			
@@ -177,7 +190,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 
				 
			
 
				                 // Build the program executable
			
 
				                 err = clBuildProgram(program, 1, &device, build_options, NULL, NULL);
			
 
				-                if (err != CL_SUCCESS) {
			
 
				+                if (err != CL_SUCCESS)
			
 
				+		{
			
 
				                         size_t len;
			
 
				                         static char buffer[4096];
			
 
				 
			
@@ -237,7 +251,8 @@ cl_int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs
 
				 
			
 
				         nb_devices = _starpu_opencl_get_device_count();
			
 
				         // Iterate over each device
			
 
				-        for(dev = 0; dev < nb_devices; dev ++) {
			
 
				+        for(dev = 0; dev < nb_devices; dev ++)
			
 
				+	{
			
 
				                 if (opencl_programs->programs[dev])
			
 
				                         clReleaseProgram(opencl_programs->programs[dev]);
			
 
				         }
			
@@ -252,7 +267,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 
				 #endif
			
 
				 
			
 
				 #ifdef CL_PROFILING_CLOCK_CYCLE_COUNT
			
 
				-	if (starpu_profiling_status_get() && info) {
			
 
				+	if (starpu_profiling_status_get() && info)
			
 
				+	{
			
 
				 		cl_int err;
			
 
				 		unsigned int clock_cycle_count;
			
 
				 		size_t size;
			
@@ -263,7 +279,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 
				 	}
			
 
				 #endif
			
 
				 #ifdef CL_PROFILING_STALL_CYCLE_COUNT
			
 
				-	if (starpu_profiling_status_get() && info) {
			
 
				+	if (starpu_profiling_status_get() && info)
			
 
				+	{
			
 
				 		cl_int err;
			
 
				 		unsigned int stall_cycle_count;
			
 
				 		size_t size;
			
@@ -275,7 +292,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 
				 	}
			
 
				 #endif
			
 
				 #ifdef CL_PROFILING_POWER_CONSUMED
			
 
				-	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking))) {
			
 
				+	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking)))
			
 
				+	{
			
 
				 		cl_int err;
			
 
				 		double power_consumed;
			
 
				 		size_t size;
			
@@ -293,7 +311,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 
				 void starpu_opencl_display_error(const char *func, const char *file, int line, const char* msg, cl_int status)
			
 
				 {
			
 
				 	const char *errormsg;
			
 
				-	switch (status) {
			
 
				+	switch (status)
			
 
				+	{
			
 
				 	case CL_SUCCESS:
			
 
				 		errormsg = "success";
			
 
				 		break;
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -45,7 +45,8 @@
 
				  * - the total numer of tasks of a given kind is equal to the number run by the
			
 
				  *   application.
			
 
				  */
			
 
				-struct bound_task_pool {
			
 
				+struct bound_task_pool
			
 
				+{
			
 
				 	/* Which codelet has been executed */
			
 
				 	struct starpu_codelet *cl;
			
 
				 	/* Task footprint key */
			
@@ -77,7 +78,8 @@ struct bound_task_pool {
 
				 /* Note: only task-task, implicit data dependencies or task-tag dependencies
			
 
				  * are taken into account. Tags released in a callback or something like this
			
 
				  * is not taken into account, only tags associated with a task are. */
			
 
				-struct bound_task {
			
 
				+struct bound_task
			
 
				+{
			
 
				 	/* Unique ID */
			
 
				 	unsigned long id;
			
 
				 	/* Tag ID, if any */
			
@@ -100,7 +102,8 @@ struct bound_task {
 
				 	struct bound_task *next;
			
 
				 };
			
 
				 
			
 
				-struct bound_tag_dep {
			
 
				+struct bound_tag_dep
			
 
				+{
			
 
				 	starpu_tag_t tag;
			
 
				 	starpu_tag_t dep_tag;
			
 
				 	struct bound_tag_dep *next;
			
@@ -197,14 +200,18 @@ void _starpu_bound_record(struct _starpu_job *j)
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	/* Re-check, this time with mutex held */
			
 
				-	if (!_starpu_bound_recording) {
			
 
				+	if (!_starpu_bound_recording)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	if (recorddeps) {
			
 
				+	if (recorddeps)
			
 
				+	{
			
 
				 		new_task(j);
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		struct bound_task_pool *tp;
			
 
				 
			
 
				 		_starpu_compute_buffers_footprint(j);
			
@@ -216,7 +223,8 @@ void _starpu_bound_record(struct _starpu_job *j)
 
				 				if (tp->cl == j->task->cl && tp->footprint == j->footprint)
			
 
				 					break;
			
 
				 
			
 
				-		if (!tp) {
			
 
				+		if (!tp)
			
 
				+		{
			
 
				 			tp = (struct bound_task_pool *) malloc(sizeof(*tp));
			
 
				 			tp->cl = j->task->cl;
			
 
				 			tp->footprint = j->footprint;
			
@@ -241,7 +249,8 @@ void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	/* Re-check, this time with mutex held */
			
 
				-	if (!_starpu_bound_recording || !recorddeps) {
			
 
				+	if (!_starpu_bound_recording || !recorddeps)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 		return;
			
 
				 	}
			
@@ -266,7 +275,8 @@ void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	/* Re-check, this time with mutex held */
			
 
				-	if (!_starpu_bound_recording || !recorddeps) {
			
 
				+	if (!_starpu_bound_recording || !recorddeps)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 		return;
			
 
				 	}
			
@@ -301,14 +311,16 @@ void _starpu_bound_job_id_dep(struct _starpu_job *j, unsigned long id)
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	/* Re-check, this time with mutex held */
			
 
				-	if (!_starpu_bound_recording || !recorddeps) {
			
 
				+	if (!_starpu_bound_recording || !recorddeps)
			
 
				+	{
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	new_task(j);
			
 
				 	dep_t = find_job(id);
			
 
				-	if (!dep_t) {
			
 
				+	if (!dep_t)
			
 
				+	{
			
 
				 		fprintf(stderr,"dependency %lu not found !\n", id);
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 		return;
			
@@ -326,12 +338,16 @@ void starpu_bound_stop(void)
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 }
			
 
				 
			
 
				-static void _starpu_get_tasks_times(int nw, int nt, double *times) {
			
 
				+static void _starpu_get_tasks_times(int nw, int nt, double *times)
			
 
				+{
			
 
				 	struct bound_task_pool *tp;
			
 
				 	int w, t;
			
 
				-	for (w = 0; w < nw; w++) {
			
 
				-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				-			struct _starpu_job j = {
			
 
				+	for (w = 0; w < nw; w++)
			
 
				+	{
			
 
				+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+		{
			
 
				+			struct _starpu_job j =
			
 
				+			{
			
 
				 				.footprint = tp->footprint,
			
 
				 				.footprint_is_computed = 1,
			
 
				 			};
			
@@ -345,9 +361,11 @@ static void _starpu_get_tasks_times(int nw, int nt, double *times) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int ancestor(struct bound_task *child, struct bound_task *parent) {
			
 
				+static int ancestor(struct bound_task *child, struct bound_task *parent)
			
 
				+{
			
 
				 	int i;
			
 
				-	for (i = 0; i < child->depsn; i++) {
			
 
				+	for (i = 0; i < child->depsn; i++)
			
 
				+	{
			
 
				 		if (parent == child->deps[i])
			
 
				 			return 1;
			
 
				 		if (ancestor(child->deps[i], parent))
			
@@ -356,17 +374,20 @@ static int ancestor(struct bound_task *child, struct bound_task *parent) {
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void starpu_bound_print_dot(FILE *output) {
			
 
				+void starpu_bound_print_dot(FILE *output)
			
 
				+{
			
 
				 	struct bound_task *t;
			
 
				 	struct bound_tag_dep *td;
			
 
				 	int i;
			
 
				 
			
 
				-	if (!recorddeps) {
			
 
				+	if (!recorddeps)
			
 
				+	{
			
 
				 		fprintf(output, "Not supported\n");
			
 
				 		return;
			
 
				 	}
			
 
				 	fprintf(output, "strict digraph bounddeps {\n");
			
 
				-	for (t = tasks; t; t = t->next) {
			
 
				+	for (t = tasks; t; t = t->next)
			
 
				+	{
			
 
				 		fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, t->cl->model->symbol);
			
 
				 		for (i = 0; i < t->depsn; i++)
			
 
				 			fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i]->id, t->id);
			
@@ -388,20 +409,25 @@ void starpu_bound_print_lp(FILE *output)
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	nw = starpu_worker_get_count();
			
 
				 
			
 
				-	if (recorddeps) {
			
 
				+	if (recorddeps)
			
 
				+	{
			
 
				 		struct bound_task *t1, *t2;
			
 
				 		struct bound_tag_dep *td;
			
 
				 		int i;
			
 
				 
			
 
				 		nt = 0;
			
 
				-		for (t1 = tasks; t1; t1 = t1->next) {
			
 
				-			struct _starpu_job j = {
			
 
				+		for (t1 = tasks; t1; t1 = t1->next)
			
 
				+		{
			
 
				+			struct _starpu_job j =
			
 
				+			{
			
 
				 				.footprint = t1->footprint,
			
 
				 				.footprint_is_computed = 1,
			
 
				 			};
			
 
				-			for (w = 0; w < nw; w++) {
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
			
 
				-				if (t1->duration[arch] == 0.) {
			
 
				+				if (t1->duration[arch] == 0.)
			
 
				+				{
			
 
				 					double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
			
 
				 					if (length == -1.0)
			
 
				 						/* Avoid problems with binary coding of doubles */
			
@@ -422,8 +448,10 @@ void starpu_bound_print_lp(FILE *output)
 
				 			fprintf(output, "c%lu <= tmax;\n", t1->id);
			
 
				 
			
 
				 		fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
			
 
				-		for (t1 = tasks; t1; t1 = t1->next) {
			
 
				-			for (w = 0; w < nw; w++) {
			
 
				+		for (t1 = tasks; t1; t1 = t1->next)
			
 
				+		{
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
			
 
				 				if (t1->duration[arch] != -1.0)
			
 
				 					fprintf(output, " +t%luw%d", t1->id, w);
			
@@ -433,9 +461,11 @@ void starpu_bound_print_lp(FILE *output)
 
				 
			
 
				 		fprintf(output, "\n/* Completion time is start time plus computation time */\n");
			
 
				 		fprintf(output, "/* According to where the task is indeed executed */\n");
			
 
				-		for (t1 = tasks; t1; t1 = t1->next) {
			
 
				+		for (t1 = tasks; t1; t1 = t1->next)
			
 
				+		{
			
 
				 			fprintf(output, "/* %s %x */\tc%lu = s%lu", t1->cl->model->symbol, (unsigned) t1->footprint, t1->id, t1->id);
			
 
				-			for (w = 0; w < nw; w++) {
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
			
 
				 				if (t1->duration[arch] != -1.0)
			
 
				 					fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
			
@@ -451,7 +481,8 @@ void starpu_bound_print_lp(FILE *output)
 
				 
			
 
				 		fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
			
 
				 		for (t1 = tasks; t1; t1 = t1->next)
			
 
				-			if (t1->use_tag) {
			
 
				+			if (t1->use_tag)
			
 
				+			{
			
 
				 				for (w = 0; w < nw; w++)
			
 
				 					fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
			
 
				 			}
			
@@ -463,13 +494,17 @@ void starpu_bound_print_lp(FILE *output)
 
				 /* TODO: factorize ancestor calls */
			
 
				 		fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
			
 
				 		fprintf(output, "   one is started after the other's completion */\n");
			
 
				-		for (t1 = tasks; t1; t1 = t1->next) {
			
 
				+		for (t1 = tasks; t1; t1 = t1->next)
			
 
				+		{
			
 
				 			for (t2 = t1->next; t2; t2 = t2->next)
			
 
				 			{
			
 
				-				if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
			
 
				-					for (w = 0; w < nw; w++) {
			
 
				+				if (!ancestor(t1, t2) && !ancestor(t2, t1))
			
 
				+				{
			
 
				+					for (w = 0; w < nw; w++)
			
 
				+					{
			
 
				 						enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
			
 
				-						if (t1->duration[arch] != -1.0) {
			
 
				+						if (t1->duration[arch] != -1.0)
			
 
				+						{
			
 
				 							fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
			
 
				 									t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
			
 
				 							fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
			
@@ -491,26 +526,32 @@ void starpu_bound_print_lp(FILE *output)
 
				 				}
			
 
				 #endif
			
 
				 
			
 
				-		if (recordprio) {
			
 
				+		if (recordprio)
			
 
				+		{
			
 
				 			fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
			
 
				 			fprintf(output, " * more prioritized first */\n");
			
 
				-			for (t1 = tasks; t1; t1 = t1->next) {
			
 
				+			for (t1 = tasks; t1; t1 = t1->next)
			
 
				+			{
			
 
				 				for (t2 = t1->next; t2; t2 = t2->next)
			
 
				 				{
			
 
				 					if (!ancestor(t1, t2) && !ancestor(t2, t1)
			
 
				-					     && t1->priority != t2->priority) {
			
 
				-						if (t1->priority > t2->priority) {
			
 
				+					     && t1->priority != t2->priority)
			
 
				+					{
			
 
				+						if (t1->priority > t2->priority)
			
 
				+						{
			
 
				 							/* Either t2 is scheduled before t1, but then it
			
 
				 							   needs to be scheduled before some t dep finishes */
			
 
				 
			
 
				 							/* One of the t1 deps to give the maximum start time for t2 */
			
 
				-							if (t1->depsn > 1) {
			
 
				+							if (t1->depsn > 1)
			
 
				+							{
			
 
				 								for (i = 0; i < t1->depsn; i++)
			
 
				 									fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
			
 
				 								fprintf(output, " = 1;\n");
			
 
				 							}
			
 
				 
			
 
				-							for (i = 0; i < t1->depsn; i++) {
			
 
				+							for (i = 0; i < t1->depsn; i++)
			
 
				+							{
			
 
				 								fprintf(output, "c%lu - s%lu >= ", t1->deps[i]->id, t2->id);
			
 
				 								if (t1->depsn > 1)
			
 
				 									/* Only checks this when it's this dependency that is chosen */
			
@@ -524,18 +565,22 @@ void starpu_bound_print_lp(FILE *output)
 
				 
			
 
				 							/* Or t2 is scheduled after t1 is.  */
			
 
				 							fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
			
 
				-						} else {
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				 							/* Either t1 is scheduled before t2, but then it
			
 
				 							   needs to be scheduled before some t2 dep finishes */
			
 
				 
			
 
				 							/* One of the t2 deps to give the maximum start time for t1 */
			
 
				-							if (t2->depsn > 1) {
			
 
				+							if (t2->depsn > 1)
			
 
				+							{
			
 
				 								for (i = 0; i < t2->depsn; i++)
			
 
				 									fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
			
 
				 								fprintf(output, " = 1;\n");
			
 
				 							}
			
 
				 
			
 
				-							for (i = 0; i < t2->depsn; i++) {
			
 
				+							for (i = 0; i < t2->depsn; i++)
			
 
				+							{
			
 
				 								fprintf(output, "c%lu - s%lu >= ", t2->deps[i]->id, t1->id);
			
 
				 								if (t2->depsn > 1)
			
 
				 									/* Only checks this when it's this dependency that is chosen */
			
@@ -555,14 +600,19 @@ void starpu_bound_print_lp(FILE *output)
 
				 
			
 
				 		for (t1 = tasks; t1; t1 = t1->next)
			
 
				 			for (t2 = t1->next; t2; t2 = t2->next)
			
 
				-				if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
			
 
				+				if (!ancestor(t1, t2) && !ancestor(t2, t1))
			
 
				+				{
			
 
				 					fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
			
 
				-					if (recordprio && t1->priority != t2->priority) {
			
 
				-						if (t1->priority > t2->priority) {
			
 
				+					if (recordprio && t1->priority != t2->priority)
			
 
				+					{
			
 
				+						if (t1->priority > t2->priority)
			
 
				+						{
			
 
				 							if (t1->depsn > 1)
			
 
				 								for (i = 0; i < t1->depsn; i++)
			
 
				 									fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
			
 
				-						} else {
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				 							if (t2->depsn > 1)
			
 
				 								for (i = 0; i < t2->depsn; i++)
			
 
				 									fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
			
@@ -573,7 +623,9 @@ void starpu_bound_print_lp(FILE *output)
 
				 		for (t1 = tasks; t1; t1 = t1->next)
			
 
				 			for (w = 0; w < nw; w++)
			
 
				 				fprintf(output, "bin t%luw%d;\n", t1->id, w);
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		struct bound_task_pool *tp;
			
 
				 		nt = 0;
			
 
				 		for (tp = task_pools; tp; tp = tp->next)
			
@@ -589,11 +641,13 @@ void starpu_bound_print_lp(FILE *output)
 
				 			fprintf(output, "min: tmax;\n\n");
			
 
				 
			
 
				 			fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
			
 
				-			for (w = 0; w < nw; w++) {
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				 				char name[32];
			
 
				 				starpu_worker_get_name(w, name, sizeof(name));
			
 
				 				fprintf(output, "/* worker %s */\n0", name);
			
 
				-				for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+				{
			
 
				 					if (times[w*nt+t] != -1.0)
			
 
				 						fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
			
 
				 				}
			
@@ -602,7 +656,8 @@ void starpu_bound_print_lp(FILE *output)
 
				 			fprintf(output, "\n");
			
 
				 
			
 
				 			fprintf(output, "/* And we have to have computed exactly all tasks */\n");
			
 
				-			for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+			{
			
 
				 				fprintf(output, "/* task %s key %x */\n0", tp->cl->model->symbol, (unsigned) tp->footprint);
			
 
				 				for (w = 0; w < nw; w++)
			
 
				 					if (times[w*nt+t] != -1.0)
			
@@ -619,7 +674,8 @@ void starpu_bound_print_lp(FILE *output)
 
				 			fprintf(output, "/* int ");
			
 
				 			int first = 1;
			
 
				 			for (w = 0; w < nw; w++)
			
 
				-				for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+				{
			
 
				 					if (!first)
			
 
				 						fprintf(output, ",");
			
 
				 					else
			
@@ -643,7 +699,8 @@ void starpu_bound_print_mps(FILE *output)
 
				 	int nw; /* Number of different workers */
			
 
				 	int t, w;
			
 
				 
			
 
				-	if (recorddeps) {
			
 
				+	if (recorddeps)
			
 
				+	{
			
 
				 		fprintf(output, "Not supported\n");
			
 
				 		return;
			
 
				 	}
			
@@ -654,7 +711,6 @@ void starpu_bound_print_mps(FILE *output)
 
				 	nt = 0;
			
 
				 	for (tp = task_pools; tp; tp = tp->next)
			
 
				 		nt++;
			
 
				-
			
 
				 	{
			
 
				 		double times[nw*nt];
			
 
				 
			
@@ -668,7 +724,8 @@ void starpu_bound_print_mps(FILE *output)
 
				 		fprintf(output, " N  TMAX\n");
			
 
				 
			
 
				 		fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
			
 
				-		for (w = 0; w < nw; w++) {
			
 
				+		for (w = 0; w < nw; w++)
			
 
				+		{
			
 
				 			char name[32];
			
 
				 			starpu_worker_get_name(w, name, sizeof(name));
			
 
				 			fprintf(output, "* worker %s\n", name);
			
@@ -676,7 +733,8 @@ void starpu_bound_print_mps(FILE *output)
 
				 		}
			
 
				 
			
 
				 		fprintf(output, "\n* And we have to have computed exactly all tasks\n");
			
 
				-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+		{
			
 
				 			fprintf(output, "* task %s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
			
 
				 			fprintf(output, " E  T%d\n", t);
			
 
				 		}
			
@@ -686,7 +744,8 @@ void starpu_bound_print_mps(FILE *output)
 
				 		fprintf(output, "\n* Execution times and completion of all tasks\n");
			
 
				 		for (w = 0; w < nw; w++)
			
 
				 			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				-				if (times[w*nt+t] != -1.0) {
			
 
				+				if (times[w*nt+t] != -1.0)
			
 
				+				{
			
 
				 					char name[9];
			
 
				 					snprintf(name, sizeof(name), "W%dT%d", w, t);
			
 
				 					fprintf(stderr,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
			
@@ -751,7 +810,8 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
				 		glp_set_obj_coef(lp, nw*nt+1, 1.);
			
 
				 
			
 
				 		for (w = 0; w < nw; w++)
			
 
				-			for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+			{
			
 
				 				char name[32];
			
 
				 				snprintf(name, sizeof(name), "w%dt%dn", w, t);
			
 
				 				glp_set_col_name(lp, colnum(w, t), name);
			
@@ -763,23 +823,27 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
				 
			
 
				 		/* Total worker execution time */
			
 
				 		glp_add_rows(lp, nw);
			
 
				-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+		{
			
 
				 			int someone = 0;
			
 
				 			for (w = 0; w < nw; w++)
			
 
				 				if (times[w*nt+t] != -1.)
			
 
				 					someone = 1;
			
 
				-			if (!someone) {
			
 
				+			if (!someone)
			
 
				+			{
			
 
				 				/* This task does not have any performance model at all, abort */
			
 
				 				glp_delete_prob(lp);
			
 
				 				return NULL;
			
 
				 			}
			
 
				 		}
			
 
				-		for (w = 0; w < nw; w++) {
			
 
				+		for (w = 0; w < nw; w++)
			
 
				+		{
			
 
				 			char name[32], title[64];
			
 
				 			starpu_worker_get_name(w, name, sizeof(name));
			
 
				 			snprintf(title, sizeof(title), "worker %s", name);
			
 
				 			glp_set_row_name(lp, w+1, title);
			
 
				-			for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+			{
			
 
				 				ia[n] = w+1;
			
 
				 				ja[n] = colnum(w, t);
			
 
				 				if (times[w*nt+t] == -1.)
			
@@ -798,12 +862,14 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
				 
			
 
				 		/* Total task completion */
			
 
				 		glp_add_rows(lp, nt);
			
 
				-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+		{
			
 
				 			char name[32], title[64];
			
 
				 			starpu_worker_get_name(w, name, sizeof(name));
			
 
				 			snprintf(title, sizeof(title), "task %s key %x", tp->cl->model->symbol, (unsigned) tp->footprint);
			
 
				 			glp_set_row_name(lp, nw+t+1, title);
			
 
				-			for (w = 0; w < nw; w++) {
			
 
				+			for (w = 0; w < nw; w++)
			
 
				+			{
			
 
				 				ia[n] = nw+t+1;
			
 
				 				ja[n] = colnum(w, t);
			
 
				 				ar[n] = 1;
			
@@ -821,12 +887,14 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
				 	glp_init_smcp(&parm);
			
 
				 	parm.msg_lev = GLP_MSG_OFF;
			
 
				 	ret = glp_simplex(lp, &parm);
			
 
				-	if (ret) {
			
 
				+	if (ret)
			
 
				+	{
			
 
				 		glp_delete_prob(lp);
			
 
				 		lp = NULL;
			
 
				 		return NULL;
			
 
				 	}
			
 
				-	if (integer) {
			
 
				+	if (integer)
			
 
				+	{
			
 
				 		glp_iocp iocp;
			
 
				 		glp_init_iocp(&iocp);
			
 
				 		iocp.msg_lev = GLP_MSG_OFF;
			
@@ -837,16 +905,19 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
				 }
			
 
				 #endif /* HAVE_GLPK_H */
			
 
				 
			
 
				-void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
			
 
				+void starpu_bound_print(FILE *output, int integer __attribute__ ((unused)))
			
 
				+{
			
 
				 #ifdef HAVE_GLPK_H
			
 
				-	if (recorddeps) {
			
 
				+	if (recorddeps)
			
 
				+	{
			
 
				 		fprintf(output, "Not supported\n");
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	glp_prob *lp = _starpu_bound_glp_resolve(integer);
			
 
				-	if (lp) {
			
 
				+	if (lp)
			
 
				+	{
			
 
				 		struct bound_task_pool * tp;
			
 
				 		int t, w;
			
 
				 		int nw; /* Number of different workers */
			
@@ -861,7 +932,8 @@ void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
 
				 
			
 
				 		fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
			
 
				 
			
 
				-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
			
 
				+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				+		{
			
 
				 			fprintf(output, "%s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
			
 
				 			for (w = 0; w < nw; w++)
			
 
				 				if (integer)
			
@@ -872,7 +944,9 @@ void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
 
				 		}
			
 
				 
			
 
				 		glp_delete_prob(lp);
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		fprintf(stderr, "Simplex failed\n");
			
 
				 	}
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
@@ -881,23 +955,27 @@ void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
 
				 #endif /* HAVE_GLPK_H */
			
 
				 }
			
 
				 
			
 
				-void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused))) {
			
 
				+void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused)))
			
 
				+{
			
 
				 #ifdef HAVE_GLPK_H
			
 
				 	double ret;
			
 
				 
			
 
				-	if (recorddeps) {
			
 
				+	if (recorddeps)
			
 
				+	{
			
 
				 		*res = 0.;
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	glp_prob *lp = _starpu_bound_glp_resolve(integer);
			
 
				-	if (lp) {
			
 
				+	if (lp)
			
 
				+	{
			
 
				 		ret = glp_get_obj_val(lp);
			
 
				 		if (integer)
			
 
				 			*integer_res = glp_mip_obj_val(lp);
			
 
				 		glp_delete_prob(lp);
			
 
				-	} else
			
 
				+	}
			
 
				+	else
			
 
				 		ret = 0.;
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 	*res = ret;
			
--- a/src/profiling/profiling.c
+++ b/src/profiling/profiling.c
@@ -39,7 +39,8 @@ static struct timespec executing_start_date[STARPU_NMAXWORKERS];
 
				 
			
 
				 /* Store the busid of the different (src, dst) pairs. busid_matrix[src][dst]
			
 
				  * contains the busid of (src, dst) or -1 if the bus was not registered. */
			
 
				-struct node_pair {
			
 
				+struct node_pair
			
 
				+{
			
 
				 	int src;
			
 
				 	int dst;
			
 
				 	struct starpu_bus_profiling_info *bus_info;
			
@@ -47,7 +48,7 @@ struct node_pair {
 
				 
			
 
				 static int busid_matrix[STARPU_MAXNODES][STARPU_MAXNODES];
			
 
				 static struct starpu_bus_profiling_info bus_profiling_info[STARPU_MAXNODES][STARPU_MAXNODES];
			
 
				-static struct node_pair busid_to_node_pair[STARPU_MAXNODES*STARPU_MAXNODES]; 
			
 
				+static struct node_pair busid_to_node_pair[STARPU_MAXNODES*STARPU_MAXNODES];
			
 
				 static unsigned busid_cnt = 0;
			
 
				 
			
 
				 static void _starpu_bus_reset_profiling_info(struct starpu_bus_profiling_info *bus_info);
			
@@ -148,7 +149,7 @@ static void _starpu_worker_reset_profiling_info_with_lock(int workerid)
 
				 	worker_info[workerid].used_cycles = 0;
			
 
				 	worker_info[workerid].stall_cycles = 0;
			
 
				 	worker_info[workerid].power_consumed = 0;
			
 
				-	
			
 
				+
			
 
				 	/* We detect if the worker is already sleeping or doing some
			
 
				 	 * computation */
			
 
				 	enum _starpu_worker_status status = _starpu_worker_get_status(workerid);
			
@@ -158,7 +159,8 @@ static void _starpu_worker_reset_profiling_info_with_lock(int workerid)
 
				 		worker_registered_sleeping_start[workerid] = 1;
			
 
				 		_starpu_clock_gettime(&sleeping_start_date[workerid]);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		worker_registered_sleeping_start[workerid] = 0;
			
 
				 	}
			
 
				 
			
@@ -167,7 +169,8 @@ static void _starpu_worker_reset_profiling_info_with_lock(int workerid)
 
				 		worker_registered_executing_start[workerid] = 1;
			
 
				 		_starpu_clock_gettime(&executing_start_date[workerid]);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		worker_registered_executing_start[workerid] = 0;
			
 
				 	}
			
 
				 }
			
@@ -184,7 +187,7 @@ void _starpu_worker_register_sleeping_start_date(int workerid, struct timespec *
 
				 	if (profiling)
			
 
				 	{
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
			
 
				-		worker_registered_sleeping_start[workerid] = 1;	
			
 
				+		worker_registered_sleeping_start[workerid] = 1;
			
 
				 		memcpy(&sleeping_start_date[workerid], sleeping_start, sizeof(struct timespec));
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
			
 
				 	}
			
@@ -195,7 +198,7 @@ void _starpu_worker_register_executing_start_date(int workerid, struct timespec
 
				 	if (profiling)
			
 
				 	{
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
			
 
				-		worker_registered_executing_start[workerid] = 1;	
			
 
				+		worker_registered_executing_start[workerid] = 1;
			
 
				 		memcpy(&executing_start_date[workerid], executing_start, sizeof(struct timespec));
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
			
 
				 	}
			
@@ -208,7 +211,7 @@ void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
			
 
				 
			
 
				                 /* Perhaps that profiling was enabled while the worker was
			
 
				-                 * already blocked, so we don't measure (end - start), but 
			
 
				+                 * already blocked, so we don't measure (end - start), but
			
 
				                  * (end - max(start,worker_start)) where worker_start is the
			
 
				                  * date of the previous profiling info reset on the worker */
			
 
				 		struct timespec *worker_start = &worker_info[workerid].start_time;
			
@@ -223,7 +226,7 @@ void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec
 
				 
			
 
				 		starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
			
 
				 
			
 
				-		worker_registered_sleeping_start[workerid] = 0;	
			
 
				+		worker_registered_sleeping_start[workerid] = 0;
			
 
				 
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
			
 
				 	}
			
@@ -243,9 +246,10 @@ void _starpu_worker_update_profiling_info_executing(int workerid, struct timespe
 
				 		worker_info[workerid].stall_cycles += stall_cycles;
			
 
				 		worker_info[workerid].power_consumed += power_consumed;
			
 
				 		worker_info[workerid].executed_tasks += executed_tasks;
			
 
				-	
			
 
				+
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
			
 
				-	} else /* Not thread safe, shouldn't be too much a problem */
			
 
				+	}
			
 
				+	else /* Not thread safe, shouldn't be too much a problem */
			
 
				 		worker_info[workerid].executed_tasks += executed_tasks;
			
 
				 }
			
 
				 
			
@@ -329,7 +333,7 @@ void _starpu_initialize_busid_matrix(void)
 
				 	int i, j;
			
 
				 	for (j = 0; j < STARPU_MAXNODES; j++)
			
 
				 	for (i = 0; i < STARPU_MAXNODES; i++)
			
 
				-		busid_matrix[i][j] = -1;	
			
 
				+		busid_matrix[i][j] = -1;
			
 
				 
			
 
				 	busid_cnt = 0;
			
 
				 }
			
@@ -400,7 +404,7 @@ int starpu_bus_get_profiling_info(int busid, struct starpu_bus_profiling_info *b
 
				 	_starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]);
			
 
				 
			
 
				 	return 0;
			
 
				-} 
			
 
				+}
			
 
				 
			
 
				 void _starpu_bus_update_profiling_info(int src_node, int dst_node, size_t size)
			
 
				 {
			
--- a/src/profiling/profiling_helpers.c
+++ b/src/profiling/profiling_helpers.c
@@ -30,7 +30,7 @@ void starpu_bus_profiling_helper_display_summary(void)
 
				 	for (busid = 0; busid < bus_cnt; busid++)
			
 
				 	{
			
 
				 		int src, dst;
			
 
				-	
			
 
				+
			
 
				 		src = starpu_bus_get_src(busid);
			
 
				 		dst = starpu_bus_get_dst(busid);
			
 
				 
			
@@ -67,7 +67,8 @@ void starpu_worker_profiling_helper_display_summary(void)
 
				 
			
 
				 		starpu_worker_get_name(workerid, name, sizeof(name));
			
 
				 
			
 
				-		if (profiling) {
			
 
				+		if (profiling)
			
 
				+		{
			
 
				 			double total_time = starpu_timing_timespec_to_us(&info.total_time) / 1000.;
			
 
				 			double executing_time = starpu_timing_timespec_to_us(&info.executing_time) / 1000.;
			
 
				 			double sleeping_time = starpu_timing_timespec_to_us(&info.sleeping_time) / 1000.;
			
@@ -80,16 +81,20 @@ void starpu_worker_profiling_helper_display_summary(void)
 
				 				fprintf(stderr, "\t%lu Mcy %lu Mcy stall\n", info.used_cycles/1000000, info.stall_cycles/1000000);
			
 
				 			if (info.power_consumed)
			
 
				 				fprintf(stderr, "\t%f J consumed\n", info.power_consumed);
			
 
				-		} else {
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				 			fprintf(stderr, "\t%-32s\t%d task(s)\n", name, info.executed_tasks);
			
 
				 		}
			
 
				 
			
 
				 		sum_consumed += info.power_consumed;
			
 
				 	}
			
 
				 
			
 
				-	if (profiling) {
			
 
				+	if (profiling)
			
 
				+	{
			
 
				 		const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
			
 
				-		if (strval_idle_power) {
			
 
				+		if (strval_idle_power)
			
 
				+		{
			
 
				 			double idle_power = atof(strval_idle_power); /* Watt */
			
 
				 			double idle_consumption = idle_power * overall_time / 1000.; /* J */
			
 
				 
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -56,7 +56,7 @@ static int count_non_ready_buffers(struct starpu_task *task, uint32_t node)
 
				 
			
 
				 		descr = &descrs[index];
			
 
				 		handle = descr->handle;
			
 
				-		
			
 
				+
			
 
				 		int is_valid;
			
 
				 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
			
 
				 
			
@@ -74,7 +74,7 @@ static struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo
 
				 	if (fifo_queue->ntasks == 0)
			
 
				 		return NULL;
			
 
				 
			
 
				-	if (fifo_queue->ntasks > 0) 
			
 
				+	if (fifo_queue->ntasks > 0)
			
 
				 	{
			
 
				 		fifo_queue->ntasks--;
			
 
				 
			
@@ -105,12 +105,12 @@ static struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo
 
				 
			
 
				 			current = current->prev;
			
 
				 		}
			
 
				-		
			
 
				+
			
 
				 		starpu_task_list_erase(&fifo_queue->taskq, task);
			
 
				 
			
 
				 		_STARPU_TRACE_JOB_POP(task, 0);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return task;
			
 
				 }
			
 
				 
			
@@ -124,9 +124,10 @@ static struct starpu_task *dmda_pop_ready_task(void)
 
				 	unsigned node = starpu_worker_get_memory_node(workerid);
			
 
				 
			
 
				 	task = _starpu_fifo_pop_first_ready_task(fifo, node);
			
 
				-	if (task) {
			
 
				+	if (task)
			
 
				+	{
			
 
				 		double model = task->predicted;
			
 
				-	
			
 
				+
			
 
				 		fifo->exp_len -= model;
			
 
				 		fifo->exp_start = starpu_timing_now() + model;
			
 
				 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
@@ -154,9 +155,10 @@ static struct starpu_task *dmda_pop_task(void)
 
				 	struct _starpu_fifo_taskq *fifo = queue_array[workerid];
			
 
				 
			
 
				 	task = _starpu_fifo_pop_task(fifo, workerid);
			
 
				-	if (task) {
			
 
				+	if (task)
			
 
				+	{
			
 
				 		double model = task->predicted;
			
 
				-	
			
 
				+
			
 
				 		fifo->exp_len -= model;
			
 
				 		fifo->exp_start = starpu_timing_now() + model;
			
 
				 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
@@ -195,7 +197,7 @@ static struct starpu_task *dmda_pop_every_task(void)
 
				 		fifo->exp_len -= model;
			
 
				 		fifo->exp_start = starpu_timing_now() + model;
			
 
				 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				-	
			
 
				+
			
 
				 		new_list = new_list->next;
			
 
				 	}
			
 
				 
			
@@ -218,7 +220,8 @@ int _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, pthread
 
				 		task->prev = NULL;
			
 
				 		task->next = NULL;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		struct starpu_task *current = list->head;
			
 
				 		struct starpu_task *prev = NULL;
			
 
				 
			
@@ -239,7 +242,8 @@ int _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, pthread
 
				 			task->next = list->head;
			
 
				 			list->head = task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			if (current)
			
 
				 			{
			
 
				 				/* Insert between prev and current */
			
@@ -248,7 +252,8 @@ int _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, pthread
 
				 				task->next = current;
			
 
				 				current->prev = task;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				/* Insert at the tail of the list */
			
 
				 				list->tail->next = task;
			
 
				 				task->next = NULL;
			
@@ -318,8 +323,10 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 
				 	unsigned best_impl = 0;
			
 
				 	unsigned nimpl;
			
 
				 
			
 
				-	for (worker = 0; worker < nworkers; worker++) {
			
 
				-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
			
 
				+	for (worker = 0; worker < nworkers; worker++)
			
 
				+	{
			
 
				+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+		{
			
 
				 			double exp_end;
			
 
				 
			
 
				 			fifo = queue_array[worker];
			
@@ -344,7 +351,8 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 
				 					|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
			
 
				 					|| (!calibrating && local_length == -1.0) /* Not calibrating but this worker is being calibrated */
			
 
				 					|| (calibrating && local_length == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				-					) {
			
 
				+					)
			
 
				+			{
			
 
				 				ntasks_best_end = ntasks_end;
			
 
				 				ntasks_best = worker;
			
 
				 				best_impl = nimpl;
			
@@ -377,11 +385,12 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (unknown) {
			
 
				+	if (unknown)
			
 
				+	{
			
 
				 		best = ntasks_best;
			
 
				 		model_best = 0.0;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	//_STARPU_DEBUG("Scheduler dm: kernel (%u)\n", best_impl);
			
 
				 
			
 
				 	 _starpu_get_job_associated_to_task(task)->nimpl = best_impl;
			
@@ -396,7 +405,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 	struct _starpu_fifo_taskq *fifo;
			
 
				 	unsigned worker;
			
 
				 	int best = -1;
			
 
				-	
			
 
				+
			
 
				 	/* this flag is set if the corresponding worker is selected because
			
 
				 	   there is no performance prediction available yet */
			
 
				 	int forced_best = -1;
			
@@ -423,8 +432,10 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 	unsigned best_impl = 0;
			
 
				 	unsigned nimpl;
			
 
				 
			
 
				-	for (worker = 0; worker < nworkers; worker++) {
			
 
				-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
			
 
				+	for (worker = 0; worker < nworkers; worker++)
			
 
				+	{
			
 
				+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+		{
			
 
				 			fifo = queue_array[worker];
			
 
				 
			
 
				 			/* Sometimes workers didn't take the tasks as early as we expected */
			
@@ -453,7 +464,8 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 					|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
			
 
				 					|| (!calibrating && local_task_length[worker][nimpl] == -1.0) /* Not calibrating but this worker is being calibrated */
			
 
				 					|| (calibrating && local_task_length[worker][nimpl] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				-					) {
			
 
				+					)
			
 
				+			{
			
 
				 				ntasks_best_end = ntasks_end;
			
 
				 				ntasks_best = worker;
			
 
				 				best_impl = nimpl;
			
@@ -493,7 +505,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 		forced_best = ntasks_best;
			
 
				 
			
 
				 	double best_fitness = -1;
			
 
				-	
			
 
				+
			
 
				 	if (forced_best == -1)
			
 
				 	{
			
 
				 		for (worker = 0; worker < nworkers; worker++)
			
@@ -504,12 +516,13 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 				/* no one on that queue may execute this task */
			
 
				 				continue;
			
 
				 			}
			
 
				-	
			
 
				-			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end) 
			
 
				+
			
 
				+			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end)
			
 
				 				+ beta*(local_data_penalty[worker][nimpl])
			
 
				 				+ _gamma*(local_power[worker][nimpl]);
			
 
				 
			
 
				-			if (exp_end[worker][nimpl] > max_exp_end) {
			
 
				+			if (exp_end[worker][nimpl] > max_exp_end)
			
 
				+			{
			
 
				 				/* This placement will make the computation
			
 
				 				 * longer, take into account the idle
			
 
				 				 * consumption of other cpus */
			
@@ -529,7 +542,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 	}
			
 
				 
			
 
				 	STARPU_ASSERT(forced_best != -1 || best != -1);
			
 
				-	
			
 
				+
			
 
				 	if (forced_best != -1)
			
 
				 	{
			
 
				 		/* there is no prediction available for that task
			
@@ -539,7 +552,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 
				 		model_best = 0.0;
			
 
				 		//penality_best = 0.0;
			
 
				 	}
			
 
				-	else 
			
 
				+	else
			
 
				 	{
			
 
				 		model_best = local_task_length[best][nimpl];
			
 
				 		//penality_best = local_data_penalty[best][nimpl];
			
@@ -568,8 +581,8 @@ static int dmda_push_task(struct starpu_task *task)
 
				 	return _dmda_push_task(task, 0);
			
 
				 }
			
 
				 
			
 
				-static void initialize_dmda_policy(struct starpu_machine_topology *topology, 
			
 
				-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_dmda_policy(struct starpu_machine_topology *topology,
			
 
				+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	nworkers = topology->nworkers;
			
 
				 
			
@@ -593,10 +606,10 @@ static void initialize_dmda_policy(struct starpu_machine_topology *topology,
 
				 	for (workerid = 0; workerid < nworkers; workerid++)
			
 
				 	{
			
 
				 		queue_array[workerid] = _starpu_create_fifo();
			
 
				-	
			
 
				+
			
 
				 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
			
 
				 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
			
 
				-	
			
 
				+
			
 
				 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
			
 
				 	}
			
 
				 }
			
@@ -611,8 +624,8 @@ static void initialize_dmda_sorted_policy(struct starpu_machine_topology *topolo
 
				 	starpu_sched_set_max_priority(INT_MAX);
			
 
				 }
			
 
				 
			
 
				-static void deinitialize_dmda_policy(struct starpu_machine_topology *topology, 
			
 
				-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void deinitialize_dmda_policy(struct starpu_machine_topology *topology,
			
 
				+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	unsigned workerid;
			
 
				 	for (workerid = 0; workerid < topology->nworkers; workerid++)
			
@@ -622,10 +635,11 @@ static void deinitialize_dmda_policy(struct starpu_machine_topology *topology,
 
				 }
			
 
				 
			
 
				 /* TODO: use post_exec_hook to fix the expected start */
			
 
				-struct starpu_sched_policy _starpu_sched_dm_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_dm_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_dmda_policy,
			
 
				 	.deinit_sched = deinitialize_dmda_policy,
			
 
				-	.push_task = dm_push_task, 
			
 
				+	.push_task = dm_push_task,
			
 
				 	.pop_task = dmda_pop_task,
			
 
				 	.post_exec_hook = NULL,
			
 
				 	.pop_every_task = dmda_pop_every_task,
			
@@ -633,10 +647,11 @@ struct starpu_sched_policy _starpu_sched_dm_policy = {
 
				 	.policy_description = "performance model"
			
 
				 };
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_dmda_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_dmda_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_dmda_policy,
			
 
				 	.deinit_sched = deinitialize_dmda_policy,
			
 
				-	.push_task = dmda_push_task, 
			
 
				+	.push_task = dmda_push_task,
			
 
				 	.pop_task = dmda_pop_task,
			
 
				 	.post_exec_hook = NULL,
			
 
				 	.pop_every_task = dmda_pop_every_task,
			
@@ -644,10 +659,11 @@ struct starpu_sched_policy _starpu_sched_dmda_policy = {
 
				 	.policy_description = "data-aware performance model"
			
 
				 };
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_dmda_sorted_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_dmda_sorted_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_dmda_sorted_policy,
			
 
				 	.deinit_sched = deinitialize_dmda_policy,
			
 
				-	.push_task = dmda_push_sorted_task, 
			
 
				+	.push_task = dmda_push_sorted_task,
			
 
				 	.pop_task = dmda_pop_ready_task,
			
 
				 	.post_exec_hook = NULL,
			
 
				 	.pop_every_task = dmda_pop_every_task,
			
@@ -655,10 +671,11 @@ struct starpu_sched_policy _starpu_sched_dmda_sorted_policy = {
 
				 	.policy_description = "data-aware performance model (sorted)"
			
 
				 };
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_dmda_ready_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_dmda_ready_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_dmda_policy,
			
 
				 	.deinit_sched = deinitialize_dmda_policy,
			
 
				-	.push_task = dmda_push_task, 
			
 
				+	.push_task = dmda_push_task,
			
 
				 	.pop_task = dmda_pop_ready_task,
			
 
				 	.post_exec_hook = NULL,
			
 
				 	.pop_every_task = dmda_pop_every_task,
			
--- a/src/sched_policies/deque_queues.c
+++ b/src/sched_policies/deque_queues.c
@@ -100,7 +100,8 @@ struct _starpu_job_list *_starpu_deque_pop_every_task(struct _starpu_deque_jobq
 
				 	{
			
 
				 		new_list = NULL;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* there is a task */
			
 
				 		old_list = deque_queue->jobq;
			
 
				 		new_list = _starpu_job_list_new();
			
@@ -123,7 +124,7 @@ struct _starpu_job_list *_starpu_deque_pop_every_task(struct _starpu_deque_jobq
 
				 			{
			
 
				 				/* this elements can be moved into the new list */
			
 
				 				new_list_size++;
			
 
				-				
			
 
				+
			
 
				 				_starpu_job_list_erase(old_list, i);
			
 
				 				_starpu_job_list_push_back(new_list, i);
			
 
				 				i->nimpl = nimpl;
			
@@ -141,7 +142,7 @@ struct _starpu_job_list *_starpu_deque_pop_every_task(struct _starpu_deque_jobq
 
				 			deque_queue->njobs -= new_list_size;
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 
			
 
				 	return new_list;
			
--- a/src/sched_policies/deque_queues.h
+++ b/src/sched_policies/deque_queues.h
@@ -23,7 +23,8 @@
 
				 #include <common/config.h>
			
 
				 #include <core/jobs.h>
			
 
				 
			
 
				-struct _starpu_deque_jobq {
			
 
				+struct _starpu_deque_jobq
			
 
				+{
			
 
				 	/* the actual list */
			
 
				 	struct _starpu_job_list *jobq;
			
 
				 
			
--- a/src/sched_policies/detect_combined_workers.c
+++ b/src/sched_policies/detect_combined_workers.c
@@ -41,7 +41,8 @@
 
				  * workers		CPU-workers found by recursion in all the sub-trees and in this very one, represented as leaves in hwloc.
			
 
				  */
			
 
				 
			
 
				-struct _starpu_tree {
			
 
				+struct _starpu_tree
			
 
				+{
			
 
				     hwloc_obj_t obj;
			
 
				     unsigned nb_workers;
			
 
				     int *workers;
			
@@ -386,7 +387,7 @@ static void find_and_assign_combinations_with_hwloc(struct starpu_machine_topolo
 
				     struct _starpu_tree tree;
			
 
				 
			
 
				     /* Of course we start from the root */
			
 
				-    tree.obj = hwloc_get_obj_by_depth(topology->hwtopology, HWLOC_OBJ_SYSTEM, 0); 
			
 
				+    tree.obj = hwloc_get_obj_by_depth(topology->hwtopology, HWLOC_OBJ_SYSTEM, 0);
			
 
				     tree.nb_workers = 0;
			
 
				     tree.workers = (int *) malloc(topology->nhwcpus * sizeof(int));
			
 
				 
			
@@ -441,7 +442,7 @@ static void find_and_assign_combinations_without_hwloc(struct starpu_machine_top
 
				 
			
 
				 		/* We register this combination */
			
 
				 		int ret;
			
 
				-		ret = starpu_combined_worker_assign_workerid(size, workerids); 
			
 
				+		ret = starpu_combined_worker_assign_workerid(size, workerids);
			
 
				 		STARPU_ASSERT(ret >= 0);
			
 
				 	    }
			
 
				 	}
			
@@ -478,7 +479,8 @@ void _starpu_sched_find_worker_combinations(struct starpu_machine_topology *topo
 
				 
			
 
				     if ((config->user_conf && config->user_conf->single_combined_worker > 0) || starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER") > 0)
			
 
				 	combine_all_cpu_workers(topology);
			
 
				-    else {
			
 
				+    else
			
 
				+    {
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	find_and_assign_combinations_with_hwloc(topology);
			
 
				 #else
			
--- a/src/sched_policies/eager_central_policy.c
+++ b/src/sched_policies/eager_central_policy.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -29,8 +29,8 @@ static struct _starpu_fifo_taskq *fifo;
 
				 static pthread_cond_t sched_cond;
			
 
				 static pthread_mutex_t sched_mutex;
			
 
				 
			
 
				-static void initialize_eager_center_policy(struct starpu_machine_topology *topology, 
			
 
				-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_eager_center_policy(struct starpu_machine_topology *topology,
			
 
				+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	/* there is only a single queue in that trivial design */
			
 
				 	fifo = _starpu_create_fifo();
			
@@ -43,8 +43,8 @@ static void initialize_eager_center_policy(struct starpu_machine_topology *topol
 
				 		starpu_worker_set_sched_condition(workerid, &sched_cond, &sched_mutex);
			
 
				 }
			
 
				 
			
 
				-static void deinitialize_eager_center_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology, 
			
 
				-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void deinitialize_eager_center_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology,
			
 
				+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	//STARPU_ASSERT(_starpu_fifo_empty(fifo));
			
 
				 
			
@@ -67,7 +67,8 @@ static struct starpu_task *pop_task_eager_policy(void)
 
				 	return _starpu_fifo_pop_task(fifo, starpu_worker_get_id());
			
 
				 }
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_eager_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_eager_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_eager_center_policy,
			
 
				 	.deinit_sched = deinitialize_eager_center_policy,
			
 
				 	.push_task = push_task_eager_policy,
			
--- a/src/sched_policies/eager_central_priority_policy.c
+++ b/src/sched_policies/eager_central_priority_policy.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -31,8 +31,9 @@
 
				 
			
 
				 #define NPRIO_LEVELS	(MAX_LEVEL - MIN_LEVEL + 1)
			
 
				 
			
 
				-struct starpu_priority_taskq_s {
			
 
				-	/* the actual lists 
			
 
				+struct starpu_priority_taskq_s
			
 
				+{
			
 
				+	/* the actual lists
			
 
				 	 *	taskq[p] is for priority [p - STARPU_MIN_PRIO] */
			
 
				 	struct starpu_task_list taskq[NPRIO_LEVELS];
			
 
				 	unsigned ntasks[NPRIO_LEVELS];
			
@@ -43,19 +44,19 @@ struct starpu_priority_taskq_s {
 
				 /* the former is the actual queue, the latter some container */
			
 
				 static struct starpu_priority_taskq_s *taskq;
			
 
				 
			
 
				-/* keep track of the total number of tasks to be scheduled to avoid infinite 
			
 
				+/* keep track of the total number of tasks to be scheduled to avoid infinite
			
 
				  * polling when there are really few tasks in the overall queue */
			
 
				 static pthread_cond_t global_sched_cond;
			
 
				 static pthread_mutex_t global_sched_mutex;
			
 
				 
			
 
				 /*
			
 
				- * Centralized queue with priorities 
			
 
				+ * Centralized queue with priorities
			
 
				  */
			
 
				 
			
 
				 static struct starpu_priority_taskq_s *_starpu_create_priority_taskq(void)
			
 
				 {
			
 
				 	struct starpu_priority_taskq_s *central_queue;
			
 
				-	
			
 
				+
			
 
				 	central_queue = (struct starpu_priority_taskq_s *) malloc(sizeof(struct starpu_priority_taskq_s));
			
 
				 	central_queue->total_ntasks = 0;
			
 
				 
			
@@ -74,8 +75,8 @@ static void _starpu_destroy_priority_taskq(struct starpu_priority_taskq_s *prior
 
				 	free(priority_queue);
			
 
				 }
			
 
				 
			
 
				-static void initialize_eager_center_priority_policy(struct starpu_machine_topology *topology, 
			
 
				-			__attribute__ ((unused))	struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_eager_center_priority_policy(struct starpu_machine_topology *topology,
			
 
				+			__attribute__ ((unused))	struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	/* In this policy, we support more than two levels of priority. */
			
 
				 	starpu_sched_set_min_priority(MIN_LEVEL);
			
@@ -93,7 +94,7 @@ static void initialize_eager_center_priority_policy(struct starpu_machine_topolo
 
				 }
			
 
				 
			
 
				 static void deinitialize_eager_center_priority_policy(struct starpu_machine_topology *topology __attribute__ ((unused)),
			
 
				-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	/* TODO check that there is no task left in the queue */
			
 
				 
			
@@ -107,7 +108,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&global_sched_mutex);
			
 
				 
			
 
				 	_STARPU_TRACE_JOB_PUSH(task, 1);
			
 
				-	
			
 
				+
			
 
				 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
			
 
				 
			
 
				 	starpu_task_list_push_front(&taskq->taskq[priolevel], task);
			
@@ -141,15 +142,18 @@ static struct starpu_task *_starpu_priority_pop_task(void)
 
				 	if (taskq->total_ntasks > 0)
			
 
				 	{
			
 
				 		unsigned priolevel = NPRIO_LEVELS - 1;
			
 
				-		do {
			
 
				-			if (taskq->ntasks[priolevel] > 0) {
			
 
				+		do
			
 
				+		{
			
 
				+			if (taskq->ntasks[priolevel] > 0)
			
 
				+			{
			
 
				 				/* there is some task that we can grab */
			
 
				 				task = starpu_task_list_pop_back(&taskq->taskq[priolevel]);
			
 
				 				taskq->ntasks[priolevel]--;
			
 
				 				taskq->total_ntasks--;
			
 
				 				_STARPU_TRACE_JOB_POP(task, 0);
			
 
				 			}
			
 
				-		} while (!task && priolevel-- > 0);
			
 
				+		}
			
 
				+		while (!task && priolevel-- > 0);
			
 
				 	}
			
 
				 	STARPU_ASSERT(starpu_worker_can_execute_task(starpu_worker_get_id(), task, 0) || !"prio does not support \"can_execute\"");
			
 
				 
			
@@ -158,7 +162,8 @@ static struct starpu_task *_starpu_priority_pop_task(void)
 
				 	return task;
			
 
				 }
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_prio_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_prio_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_eager_center_priority_policy,
			
 
				 	.deinit_sched = deinitialize_eager_center_priority_policy,
			
 
				 	/* we always use priorities in that policy */
			
--- a/src/sched_policies/fifo_queues.c
+++ b/src/sched_policies/fifo_queues.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -90,7 +90,7 @@ struct starpu_task *_starpu_fifo_pop_task(struct _starpu_fifo_taskq *fifo_queue,
 
				 				return task;
			
 
				 			}
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
@@ -102,12 +102,13 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 
				 
			
 
				 	struct starpu_task *new_list = NULL;
			
 
				 	struct starpu_task *new_list_tail = NULL;
			
 
				-	
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				 
			
 
				 	size = fifo_queue->ntasks;
			
 
				 
			
 
				-	if (size > 0) {
			
 
				+	if (size > 0)
			
 
				+	{
			
 
				 		old_list = &fifo_queue->taskq;
			
 
				 		unsigned new_list_size = 0;
			
 
				 
			
@@ -125,7 +126,7 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 
				 			{
			
 
				 				/* this elements can be moved into the new list */
			
 
				 				new_list_size++;
			
 
				-				
			
 
				+
			
 
				 				starpu_task_list_erase(old_list, task);
			
 
				 
			
 
				 				if (new_list_tail)
			
@@ -135,7 +136,8 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 
				 					task->next = NULL;
			
 
				 					new_list_tail = task;
			
 
				 				}
			
 
				-				else {
			
 
				+				else
			
 
				+				{
			
 
				 					new_list = task;
			
 
				 					new_list_tail = task;
			
 
				 					task->prev = NULL;
			
@@ -144,7 +146,7 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 
				 				_starpu_get_job_associated_to_task(task)->nimpl = nimpl;
			
 
				 				break;
			
 
				 			}
			
 
				-		
			
 
				+
			
 
				 			task = next_task;
			
 
				 		}
			
 
				 
			
--- a/src/sched_policies/fifo_queues.h
+++ b/src/sched_policies/fifo_queues.h
@@ -22,7 +22,8 @@
 
				 #include <starpu.h>
			
 
				 #include <common/config.h>
			
 
				 
			
 
				-struct _starpu_fifo_taskq {
			
 
				+struct _starpu_fifo_taskq
			
 
				+{
			
 
				 	/* the actual list */
			
 
				 	struct starpu_task_list taskq;
			
 
				 
			
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -51,14 +51,15 @@ const float gamma_maximum=10000.0;
 
				 const float idle_power_minimum=0;
			
 
				 const float idle_power_maximum=10000.0;
			
 
				 
			
 
				-static void param_modified(struct starpu_top_param* d){
			
 
				+static void param_modified(struct starpu_top_param* d)
			
 
				+{
			
 
				 	//just to show parameter modification
			
 
				-	fprintf(stderr,"%s has been modified : alpha=%f|beta=%f|gamma=%f|idle_power=%f !\n", 
			
 
				+	fprintf(stderr,"%s has been modified : alpha=%f|beta=%f|gamma=%f|idle_power=%f !\n",
			
 
				 		d->name, alpha,beta,_gamma,idle_power);
			
 
				 }
			
 
				 
			
 
				-static void heft_init(struct starpu_machine_topology *topology, 
			
 
				-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void heft_init(struct starpu_machine_topology *topology,
			
 
				+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	nworkers = topology->nworkers;
			
 
				 
			
@@ -77,7 +78,7 @@ static void heft_init(struct starpu_machine_topology *topology,
 
				 	const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
			
 
				 	if (strval_idle_power)
			
 
				 		idle_power = atof(strval_idle_power);
			
 
				-	
			
 
				+
			
 
				 	starpu_top_register_parameter_float("HEFT_ALPHA", &alpha, alpha_minimum,alpha_maximum,param_modified);
			
 
				 	starpu_top_register_parameter_float("HEFT_BETA", &beta, beta_minimum,beta_maximum,param_modified);
			
 
				 	starpu_top_register_parameter_float("HEFT_GAMMA", &_gamma, gamma_minimum,gamma_maximum,param_modified);
			
@@ -88,12 +89,12 @@ static void heft_init(struct starpu_machine_topology *topology,
 
				 	{
			
 
				 		exp_start[workerid] = starpu_timing_now();
			
 
				 		exp_len[workerid] = 0.0;
			
 
				-		exp_end[workerid] = exp_start[workerid]; 
			
 
				+		exp_end[workerid] = exp_start[workerid];
			
 
				 		ntasks[workerid] = 0;
			
 
				 
			
 
				 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
			
 
				 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
			
 
				-	
			
 
				+
			
 
				 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
			
 
				 	}
			
 
				 }
			
@@ -103,7 +104,7 @@ static void heft_post_exec_hook(struct starpu_task *task)
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	double model = task->predicted;
			
 
				 	double transfer_model = task->predicted_transfer;
			
 
				-	
			
 
				+
			
 
				 	/* Once we have executed the task, we can update the predicted amount
			
 
				 	 * of work. */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&sched_mutex[workerid]);
			
@@ -143,11 +144,14 @@ static void heft_push_task_notify(struct starpu_task *task, int workerid)
 
				 	/* If there is no prediction available, we consider the task has a null length */
			
 
				 	if (predicted_transfer != -1.0)
			
 
				 	{
			
 
				-		if (starpu_timing_now() + predicted_transfer < exp_end[workerid]) {
			
 
				+		if (starpu_timing_now() + predicted_transfer < exp_end[workerid])
			
 
				+		{
			
 
				 			/* We may hope that the transfer will be finished by
			
 
				 			 * the start of the task. */
			
 
				 			predicted_transfer = 0;
			
 
				-		} else {
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				 			/* The transfer will not be finished by then, take the
			
 
				 			 * remainder into account */
			
 
				 			predicted_transfer = (starpu_timing_now() + predicted_transfer) - exp_end[workerid];
			
@@ -176,11 +180,14 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 	exp_end[best_workerid] += predicted;
			
 
				 	exp_len[best_workerid] += predicted;
			
 
				 
			
 
				-	if (starpu_timing_now() + predicted_transfer < exp_end[best_workerid]) {
			
 
				+	if (starpu_timing_now() + predicted_transfer < exp_end[best_workerid])
			
 
				+	{
			
 
				 		/* We may hope that the transfer will be finished by
			
 
				 		 * the start of the task. */
			
 
				 		predicted_transfer = 0;
			
 
				-	} else {
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				 		/* The transfer will not be finished by then, take the
			
 
				 		 * remainder into account */
			
 
				 		predicted_transfer = (starpu_timing_now() + predicted_transfer) - exp_end[best_workerid];
			
@@ -195,7 +202,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 	task->predicted_transfer = predicted_transfer;
			
 
				 
			
 
				 	if (starpu_top_status_get())
			
 
				-		starpu_top_task_prevision(task, best_workerid, 
			
 
				+		starpu_top_task_prevision(task, best_workerid,
			
 
				 					(unsigned long long)(exp_end[best_workerid]-predicted)/1000,
			
 
				 					(unsigned long long)exp_end[best_workerid]/1000);
			
 
				 
			
@@ -232,8 +239,10 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 
			
 
				 	unsigned nimpl;
			
 
				 
			
 
				-	for (worker = 0; worker < nworkers; worker++) {
			
 
				-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
			
 
				+	for (worker = 0; worker < nworkers; worker++)
			
 
				+	{
			
 
				+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+		{
			
 
				 			/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				 			exp_start[worker] = STARPU_MAX(exp_start[worker], starpu_timing_now());
			
 
				 			exp_end[worker][nimpl] = exp_start[worker] + exp_len[worker];
			
@@ -258,7 +267,8 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 				//_STARPU_DEBUG("Scheduler heft bundle: task length (%lf) local power (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],local_power[worker],worker,nimpl);
			
 
				 
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				local_task_length[worker][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				 				local_data_penalty[worker][nimpl] = starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				 				local_power[worker][nimpl] = starpu_task_expected_power(task, perf_arch,nimpl);
			
@@ -276,7 +286,8 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 				|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
			
 
				 				|| (!calibrating && local_task_length[worker][nimpl] == -1.0) /* Not calibrating but this worker is being calibrated */
			
 
				 				|| (calibrating && local_task_length[worker][nimpl] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				-				) {
			
 
				+				)
			
 
				+			{
			
 
				 				ntasks_best_end = ntasks_end;
			
 
				 				ntasks_best = worker;
			
 
				 				nimpl_best = nimpl;
			
@@ -323,7 +334,7 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
				 	unsigned worker, nimpl;
			
 
				 	int best = -1;
			
 
				 	int selected_impl= -1;
			
 
				-	
			
 
				+
			
 
				 	/* this flag is set if the corresponding worker is selected because
			
 
				 	   there is no performance prediction available yet */
			
 
				 	int forced_worker;
			
@@ -352,7 +363,8 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
				 
			
 
				 	/* If there is no prediction available for that task with that arch we
			
 
				 	 * want to speed-up calibration time so we force this measurement */
			
 
				-	if (forced_worker != -1) {
			
 
				+	if (forced_worker != -1)
			
 
				+	{
			
 
				 		_starpu_get_job_associated_to_task(task)->nimpl = forced_impl;
			
 
				 		return push_task_on_best_worker(task, forced_worker, 0.0, 0.0, prio);
			
 
				 	}
			
@@ -362,24 +374,26 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
				 	 *	trade-off between load-balacing, data locality, and energy
			
 
				 	 *	consumption.
			
 
				 	 */
			
 
				-	
			
 
				+
			
 
				 	double fitness[nworkers][STARPU_MAXIMPLEMENTATIONS];
			
 
				 	double best_fitness = -1;
			
 
				 
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
			
 
				+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				+		{
			
 
				 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
			
 
				 			{
			
 
				 				/* no one on that queue may execute this task */
			
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				-			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end) 
			
 
				+			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end)
			
 
				 						+ beta*(local_data_penalty[worker][nimpl])
			
 
				 						+ _gamma*(local_power[worker][nimpl]);
			
 
				 
			
 
				-			if (exp_end[worker][nimpl] > max_exp_end) {
			
 
				+			if (exp_end[worker][nimpl] > max_exp_end)
			
 
				+			{
			
 
				 				/* This placement will make the computation
			
 
				 				 * longer, take into account the idle
			
 
				 				 * consumption of other cpus */
			
@@ -398,7 +412,7 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
				 
			
 
				 	/* By now, we must have found a solution */
			
 
				 	STARPU_ASSERT(best != -1);
			
 
				-	
			
 
				+
			
 
				 	/* we should now have the best worker in variable "best" */
			
 
				 	double model_best, transfer_model_best;
			
 
				 
			
@@ -423,12 +437,13 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
				 			_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
			
 
				 
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		model_best = local_task_length[best][selected_impl];
			
 
				 		transfer_model_best = local_data_penalty[best][selected_impl];
			
 
				 	}
			
 
				 
			
 
				-	
			
 
				+
			
 
				 	_starpu_get_job_associated_to_task(task)->nimpl = selected_impl;
			
 
				 
			
 
				 	return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio);
			
@@ -442,8 +457,8 @@ static int heft_push_task(struct starpu_task *task)
 
				 	return _heft_push_task(task, 0);
			
 
				 }
			
 
				 
			
 
				-static void heft_deinit(__attribute__ ((unused)) struct starpu_machine_topology *topology, 
			
 
				-                        __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void heft_deinit(__attribute__ ((unused)) struct starpu_machine_topology *topology,
			
 
				+                        __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	unsigned workerid;
			
 
				 	for (workerid = 0; workerid < nworkers; workerid++)
			
@@ -453,10 +468,11 @@ static void heft_deinit(__attribute__ ((unused)) struct starpu_machine_topology
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct starpu_sched_policy heft_policy = {
			
 
				+struct starpu_sched_policy heft_policy =
			
 
				+{
			
 
				 	.init_sched = heft_init,
			
 
				 	.deinit_sched = heft_deinit,
			
 
				-	.push_task = heft_push_task, 
			
 
				+	.push_task = heft_push_task,
			
 
				 	.push_task_notify = heft_push_task_notify,
			
 
				 	.pop_task = NULL,
			
 
				 	.pop_every_task = NULL,
			
--- a/src/sched_policies/parallel_greedy.c
+++ b/src/sched_policies/parallel_greedy.c
@@ -37,8 +37,8 @@ static int possible_combinations_cnt[STARPU_NMAXWORKERS];
 
				 static int possible_combinations[STARPU_NMAXWORKERS][10];
			
 
				 static int possible_combinations_size[STARPU_NMAXWORKERS][10];
			
 
				 
			
 
				-static void initialize_pgreedy_policy(struct starpu_machine_topology *topology, 
			
 
				-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
			
 
				+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	/* masters pick tasks from that queue */
			
 
				 	fifo = _starpu_create_fifo();
			
@@ -65,7 +65,7 @@ static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
 
				 	}
			
 
				 
			
 
				 	unsigned i;
			
 
				-	
			
 
				+
			
 
				 	for (i = 0; i < ncombinedworkers; i++)
			
 
				 	{
			
 
				 		workerid = nworkers + i;
			
@@ -113,7 +113,8 @@ static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
 
				 			starpu_worker_set_sched_condition(workerid,
			
 
				 				&sched_cond, &sched_mutex);
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			starpu_worker_set_sched_condition(workerid,
			
 
				 				&master_sched_cond[master],
			
 
				 				&master_sched_mutex[master]);
			
@@ -128,8 +129,8 @@ static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static void deinitialize_pgreedy_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology, 
			
 
				-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void deinitialize_pgreedy_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology,
			
 
				+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	/* TODO check that there is no task left in the queue */
			
 
				 
			
@@ -175,14 +176,14 @@ static struct starpu_task *pop_task_pgreedy_policy(void)
 
				 					best_workerid = combined_worker;
			
 
				 				}
			
 
				 			}
			
 
				-		} 
			
 
				+		}
			
 
				 
			
 
				 		/* In case nobody can execute this task, we let the master
			
 
				 		 * worker take it anyway, so that it can discard it afterward.
			
 
				 		 * */
			
 
				 		if (best_workerid == -1)
			
 
				 			return task;
			
 
				-		
			
 
				+
			
 
				 		/* Is this a basic worker or a combined worker ? */
			
 
				 		int nbasic_workers = (int)starpu_worker_get_count();
			
 
				 		int is_basic_worker = (best_workerid < nbasic_workers);
			
@@ -192,7 +193,8 @@ static struct starpu_task *pop_task_pgreedy_policy(void)
 
				 			/* The master is alone */
			
 
				 			return task;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			/* The master needs to dispatch the task between the
			
 
				 			 * different combined workers */
			
 
				 			struct _starpu_combined_worker *combined_worker;
			
@@ -226,13 +228,15 @@ static struct starpu_task *pop_task_pgreedy_policy(void)
 
				 			return master_alias;
			
 
				 		}
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* The worker is a slave */
			
 
				 		return _starpu_fifo_pop_task(local_fifo[workerid], workerid);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_pgreedy_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_pgreedy_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_pgreedy_policy,
			
 
				 	.deinit_sched = deinitialize_pgreedy_policy,
			
 
				 	.push_task = push_task_pgreedy_policy,
			
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -52,10 +52,10 @@ static void parallel_heft_post_exec_hook(struct starpu_task *task)
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	double model = task->predicted;
			
 
				 	double transfer_model = task->predicted_transfer;
			
 
				-	
			
 
				+
			
 
				 	if (model < 0.0)
			
 
				 		model = 0.0;
			
 
				-	
			
 
				+
			
 
				 	/* Once we have executed the task, we can update the predicted amount
			
 
				 	 * of work. */
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&sched_mutex[workerid]);
			
@@ -75,7 +75,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 	int nbasic_workers = (int)starpu_worker_get_count();
			
 
				 	int is_basic_worker = (best_workerid < nbasic_workers);
			
 
				 
			
 
				-	unsigned memory_node; 
			
 
				+	unsigned memory_node;
			
 
				 	memory_node = starpu_worker_get_memory_node(best_workerid);
			
 
				 
			
 
				 	if (starpu_get_prefetch_flag())
			
@@ -93,12 +93,13 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 		worker_exp_len[best_workerid] += exp_end_predicted - worker_exp_end[best_workerid];
			
 
				 		worker_exp_end[best_workerid] = exp_end_predicted;
			
 
				 		worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid];
			
 
				-	
			
 
				+
			
 
				 		ntasks[best_workerid]++;
			
 
				 
			
 
				 		ret = starpu_push_local_task(best_workerid, task, prio);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* This is a combined worker so we create task aliases */
			
 
				 		struct _starpu_combined_worker *combined_worker;
			
 
				 		combined_worker = _starpu_get_combined_worker_struct(best_workerid);
			
@@ -122,13 +123,13 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 			alias->predicted = exp_end_predicted - worker_exp_end[local_worker];
			
 
				 			/* TODO */
			
 
				 			alias->predicted_transfer = 0;
			
 
				-	
			
 
				+
			
 
				 			worker_exp_len[local_worker] += exp_end_predicted - worker_exp_end[local_worker];
			
 
				 			worker_exp_end[local_worker] = exp_end_predicted;
			
 
				 			worker_exp_start[local_worker] = exp_end_predicted - worker_exp_len[local_worker];
			
 
				-		
			
 
				+
			
 
				 			ntasks[local_worker]++;
			
 
				-	
			
 
				+
			
 
				 			ret |= starpu_push_local_task(local_worker, alias, prio);
			
 
				 		}
			
 
				 
			
@@ -146,7 +147,8 @@ static double compute_expected_end(int workerid, double length)
 
				 		/* This is a basic worker */
			
 
				 		return worker_exp_start[workerid] + worker_exp_len[workerid] + length;
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* This is a combined worker, the expected end is the end for the latest worker */
			
 
				 		int worker_size;
			
 
				 		int *combined_workerid;
			
@@ -175,7 +177,8 @@ static double compute_ntasks_end(int workerid)
 
				 		/* This is a basic worker */
			
 
				 		return ntasks[workerid] / starpu_worker_get_relative_speedup(perf_arch);
			
 
				 	}
			
 
				-	else {
			
 
				+	else
			
 
				+	{
			
 
				 		/* This is a combined worker, the expected end is the end for the latest worker */
			
 
				 		int worker_size;
			
 
				 		int *combined_workerid;
			
@@ -198,7 +201,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 
				 {
			
 
				 	unsigned worker;
			
 
				 	int best = -1;
			
 
				-	
			
 
				+
			
 
				 	/* this flag is set if the corresponding worker is selected because
			
 
				 	   there is no performance prediction available yet */
			
 
				 	int forced_best = -1;
			
@@ -244,7 +247,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 
				 				skip_worker[worker][nimpl] = 1;
			
 
				 				continue;
			
 
				 			}
			
 
				-			else {
			
 
				+			else
			
 
				+			{
			
 
				 				skip_worker[worker][nimpl] = 0;
			
 
				 			}
			
 
				 
			
@@ -261,7 +265,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 
				 					|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
			
 
				 					|| (!calibrating && local_task_length[worker][nimpl] == -1.0) /* Not calibrating but this worker is being calibrated */
			
 
				 					|| (calibrating && local_task_length[worker][nimpl] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				-					) {
			
 
				+					)
			
 
				+			{
			
 
				 				ntasks_best_end = ntasks_end;
			
 
				 				ntasks_best = worker;
			
 
				 			}
			
@@ -318,8 +323,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 
				 					/* no one on that queue may execute this task */
			
 
				 					continue;
			
 
				 				}
			
 
				-		
			
 
				-				fitness[worker][nimpl] = alpha*(local_exp_end[worker][nimpl] - best_exp_end) 
			
 
				+
			
 
				+				fitness[worker][nimpl] = alpha*(local_exp_end[worker][nimpl] - best_exp_end)
			
 
				 						+ beta*(local_data_penalty[worker][nimpl])
			
 
				 						+ _gamma*(local_power[worker][nimpl]);
			
 
				 
			
@@ -352,7 +357,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 
				 		//penality_best = 0.0;
			
 
				 		best_exp_end = local_exp_end[best][nimpl];
			
 
				 	}
			
 
				-	else 
			
 
				+	else
			
 
				 	{
			
 
				 		//penality_best = local_data_penalty[best][nimpl];
			
 
				 		best_exp_end = local_exp_end[best][nimpl];
			
@@ -373,8 +378,8 @@ static int parallel_heft_push_task(struct starpu_task *task)
 
				 	return _parallel_heft_push_task(task, 0);
			
 
				 }
			
 
				 
			
 
				-static void initialize_parallel_heft_policy(struct starpu_machine_topology *topology, 
			
 
				-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_parallel_heft_policy(struct starpu_machine_topology *topology,
			
 
				+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	nworkers = topology->nworkers;
			
 
				 
			
@@ -403,12 +408,12 @@ static void initialize_parallel_heft_policy(struct starpu_machine_topology *topo
 
				 	{
			
 
				 		worker_exp_start[workerid] = starpu_timing_now();
			
 
				 		worker_exp_len[workerid] = 0.0;
			
 
				-		worker_exp_end[workerid] = worker_exp_start[workerid]; 
			
 
				+		worker_exp_end[workerid] = worker_exp_start[workerid];
			
 
				 		ntasks[workerid] = 0;
			
 
				-	
			
 
				+
			
 
				 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
			
 
				 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
			
 
				-	
			
 
				+
			
 
				 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
			
 
				 	}
			
 
				 
			
@@ -437,10 +442,11 @@ static void initialize_parallel_heft_policy(struct starpu_machine_topology *topo
 
				 }
			
 
				 
			
 
				 /* TODO: use post_exec_hook to fix the expected start */
			
 
				-struct starpu_sched_policy _starpu_sched_parallel_heft_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_parallel_heft_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_parallel_heft_policy,
			
 
				 	.deinit_sched = NULL,
			
 
				-	.push_task = parallel_heft_push_task, 
			
 
				+	.push_task = parallel_heft_push_task,
			
 
				 	.pop_task = NULL,
			
 
				 	.post_exec_hook = parallel_heft_post_exec_hook,
			
 
				 	.pop_every_task = NULL,
			
--- a/src/sched_policies/random_policy.c
+++ b/src/sched_policies/random_policy.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -49,7 +49,8 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 
				 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
			
 
				 		double worker_alpha = starpu_worker_get_relative_speedup(perf_arch);
			
 
				 
			
 
				-		if (alpha + worker_alpha > random && starpu_worker_can_execute_task(worker, task, 0)) {
			
 
				+		if (alpha + worker_alpha > random && starpu_worker_can_execute_task(worker, task, 0))
			
 
				+		{
			
 
				 			/* we found the worker */
			
 
				 			selected = worker;
			
 
				 			break;
			
@@ -67,8 +68,8 @@ static int random_push_task(struct starpu_task *task)
 
				 	return _random_push_task(task, !!task->priority);
			
 
				 }
			
 
				 
			
 
				-static void initialize_random_policy(struct starpu_machine_topology *topology, 
			
 
				-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_random_policy(struct starpu_machine_topology *topology,
			
 
				+				     __attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	starpu_srand48(time(NULL));
			
 
				 
			
@@ -79,12 +80,13 @@ static void initialize_random_policy(struct starpu_machine_topology *topology,
 
				 	{
			
 
				 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
			
 
				 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
			
 
				-	
			
 
				+
			
 
				 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_random_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_random_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_random_policy,
			
 
				 	.deinit_sched = NULL,
			
 
				 	.push_task = random_push_task,
			
--- a/src/sched_policies/stack_queues.c
+++ b/src/sched_policies/stack_queues.c
@@ -22,7 +22,7 @@
 
				 #include <errno.h>
			
 
				 #include <common/utils.h>
			
 
				 
			
 
				-/* keep track of the total number of jobs to be scheduled to avoid infinite 
			
 
				+/* keep track of the total number of jobs to be scheduled to avoid infinite
			
 
				  * polling when there are really few jobs in the overall queue */
			
 
				 static unsigned total_number_of_jobs;
			
 
				 
			
@@ -82,23 +82,23 @@ struct _starpu_job *_starpu_stack_pop_task(struct _starpu_stack_jobq *stack_queu
 
				 		return NULL;
			
 
				 
			
 
				 	/* TODO find a task that suits workerid */
			
 
				-	if (stack_queue->njobs > 0) 
			
 
				+	if (stack_queue->njobs > 0)
			
 
				 	{
			
 
				 		/* there is a task */
			
 
				 		j = _starpu_job_list_pop_back(stack_queue->jobq);
			
 
				-	
			
 
				+
			
 
				 		STARPU_ASSERT(j);
			
 
				 		stack_queue->njobs--;
			
 
				-		
			
 
				+
			
 
				 		_STARPU_TRACE_JOB_POP(j, 0);
			
 
				 
			
 
				-		/* we are sure that we got it now, so at worst, some people thought 
			
 
				+		/* we are sure that we got it now, so at worst, some people thought
			
 
				 		 * there remained some work and will soon discover it is not true */
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				 		total_number_of_jobs--;
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return j;
			
 
				 
			
 
				 }
			
--- a/src/sched_policies/stack_queues.h
+++ b/src/sched_policies/stack_queues.h
@@ -23,7 +23,8 @@
 
				 #include <common/config.h>
			
 
				 #include <core/jobs.h>
			
 
				 
			
 
				-struct _starpu_stack_jobq {
			
 
				+struct _starpu_stack_jobq
			
 
				+{
			
 
				 	/* the actual list */
			
 
				 	struct _starpu_job_list *jobq;
			
 
				 
			
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -36,7 +36,8 @@ static unsigned performed_total = 0;
 
				 static float overload_metric(unsigned id)
			
 
				 {
			
 
				 	float execution_ratio = 0.0f;
			
 
				-	if (performed_total > 0) {
			
 
				+	if (performed_total > 0)
			
 
				+	{
			
 
				 		execution_ratio = _starpu_get_deque_nprocessed(queue_array[id])/performed_total;
			
 
				 	}
			
 
				 
			
@@ -44,10 +45,11 @@ static float overload_metric(unsigned id)
 
				 	performed_queue = _starpu_get_deque_nprocessed(queue_array[id]);
			
 
				 
			
 
				 	float current_ratio = 0.0f;
			
 
				-	if (performed_queue > 0) {
			
 
				+	if (performed_queue > 0)
			
 
				+	{
			
 
				 		current_ratio = _starpu_get_deque_njobs(queue_array[id])/performed_queue;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	return (current_ratio - execution_ratio);
			
 
				 }
			
 
				 
			
@@ -59,16 +61,19 @@ static struct _starpu_deque_jobq *select_victimq(void)
 
				 	unsigned attempts = nworkers;
			
 
				 
			
 
				 	unsigned worker = rr_worker;
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		if (overload_metric(worker) > 0.0f)
			
 
				 		{
			
 
				 			q = queue_array[worker];
			
 
				 			return q;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			worker = (worker + 1)%nworkers;
			
 
				 		}
			
 
				-	} while(attempts-- > 0);
			
 
				+	}
			
 
				+	while(attempts-- > 0);
			
 
				 
			
 
				 	/* take one anyway ... */
			
 
				 	q = queue_array[rr_worker];
			
@@ -84,16 +89,19 @@ static struct _starpu_deque_jobq *select_workerq(void)
 
				 	unsigned attempts = nworkers;
			
 
				 
			
 
				 	unsigned worker = rr_worker;
			
 
				-	do {
			
 
				+	do
			
 
				+	{
			
 
				 		if (overload_metric(worker) < 0.0f)
			
 
				 		{
			
 
				 			q = queue_array[worker];
			
 
				 			return q;
			
 
				 		}
			
 
				-		else {
			
 
				+		else
			
 
				+		{
			
 
				 			worker = (worker + 1)%nworkers;
			
 
				 		}
			
 
				-	} while(attempts-- > 0);
			
 
				+	}
			
 
				+	while(attempts-- > 0);
			
 
				 
			
 
				 	/* take one anyway ... */
			
 
				 	q = queue_array[rr_worker];
			
@@ -117,7 +125,7 @@ static struct _starpu_deque_jobq *select_victimq(void)
 
				 }
			
 
				 
			
 
				 
			
 
				-/* when anonymous threads submit tasks, 
			
 
				+/* when anonymous threads submit tasks,
			
 
				  * we need to select a queue where to dispose them */
			
 
				 static struct _starpu_deque_jobq *select_workerq(void)
			
 
				 {
			
@@ -148,19 +156,21 @@ static struct starpu_task *ws_pop_task(void)
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&global_sched_mutex);
			
 
				 
			
 
				 	task = _starpu_deque_pop_task(q, -1);
			
 
				-	if (task) {
			
 
				+	if (task)
			
 
				+	{
			
 
				 		/* there was a local task */
			
 
				 		performed_total++;
			
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(&global_sched_mutex);
			
 
				 		return task;
			
 
				 	}
			
 
				-	
			
 
				+
			
 
				 	/* we need to steal someone's job */
			
 
				 	struct _starpu_deque_jobq *victimq;
			
 
				 	victimq = select_victimq();
			
 
				 
			
 
				 	task = _starpu_deque_pop_task(victimq, workerid);
			
 
				-	if (task) {
			
 
				+	if (task)
			
 
				+	{
			
 
				 		_STARPU_TRACE_WORK_STEALING(q, victimq);
			
 
				 		performed_total++;
			
 
				 	}
			
@@ -194,8 +204,8 @@ static int ws_push_task(struct starpu_task *task)
 
				         return 0;
			
 
				 }
			
 
				 
			
 
				-static void initialize_ws_policy(struct starpu_machine_topology *topology, 
			
 
				-				__attribute__ ((unused)) struct starpu_sched_policy *_policy) 
			
 
				+static void initialize_ws_policy(struct starpu_machine_topology *topology,
			
 
				+				__attribute__ ((unused)) struct starpu_sched_policy *_policy)
			
 
				 {
			
 
				 	nworkers = topology->nworkers;
			
 
				 	rr_worker = 0;
			
@@ -211,7 +221,8 @@ static void initialize_ws_policy(struct starpu_machine_topology *topology,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-struct starpu_sched_policy _starpu_sched_ws_policy = {
			
 
				+struct starpu_sched_policy _starpu_sched_ws_policy =
			
 
				+{
			
 
				 	.init_sched = initialize_ws_policy,
			
 
				 	.deinit_sched = NULL,
			
 
				 	.push_task = ws_push_task,
			
--- a/src/top/starpu_top.c
+++ b/src/top/starpu_top.c
@@ -43,7 +43,7 @@ pthread_cond_t starpu_top_wait_for_continue_cond = PTHREAD_COND_INITIALIZER;
 
				 
			
 
				 int starpu_top_status_get()
			
 
				 {
			
 
				-  return starpu_top;
			
 
				+	return starpu_top;
			
 
				 }
			
 
				 
			
 
				 
			
@@ -82,7 +82,8 @@ void copy_data_and_param()
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void starpu_top_get_device_type(int id, char* type){
			
 
				+static void starpu_top_get_device_type(int id, char* type)
			
 
				+{
			
 
				 	enum starpu_archtype device_type=starpu_worker_get_type(id);
			
 
				 	switch (device_type)
			
 
				 	{
			
@@ -125,7 +126,8 @@ static void starpu_top_send_devices_info()
 
				 }
			
 
				 
			
 
				 
			
 
				-void starpu_top_init_and_wait(const char* server_name){
			
 
				+void starpu_top_init_and_wait(const char* server_name)
			
 
				+{
			
 
				 	starpu_top=1;
			
 
				 	sem_init(&starpu_top_wait_for_go,0,0);
			
 
				 
			
@@ -177,7 +179,8 @@ void starpu_top_init_and_wait(const char* server_name){
 
				 	_starpu_top_message_add(_starpu_top_mt,message);
			
 
				 	struct starpu_top_param * cur_param = starpu_top_first_param;
			
 
				 	printf("%s:%d sending parameters\n", __FILE__, __LINE__);
			
 
				-	while(cur_param != NULL){
			
 
				+	while(cur_param != NULL)
			
 
				+	{
			
 
				 	  _starpu_top_message_add(_starpu_top_mt,message_for_topparam_init(cur_param));
			
 
				 	  cur_param = cur_param->next;
			
 
				 	}
			
@@ -385,7 +388,6 @@ void starpu_top_enqueue_param(struct starpu_top_param* param)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-
			
 
				 struct starpu_top_param* starpu_top_register_parameter_boolean(const char* param_name,
			
 
				 							       int* parameter_field,
			
 
				 							       void (*callback)(struct starpu_top_param*))
			
@@ -404,7 +406,6 @@ struct starpu_top_param* starpu_top_register_parameter_boolean(const char* param
 
				 	return param;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 struct starpu_top_param* starpu_top_register_parameter_integer(const char* param_name,
			
 
				 							       int* parameter_field,
			
 
				 							       int minimum_value,
			
@@ -426,6 +427,7 @@ struct starpu_top_param* starpu_top_register_parameter_integer(const char* param
 
				 
			
 
				 	return param;
			
 
				 }
			
 
				+
			
 
				 struct starpu_top_param* starpu_top_register_parameter_float(const char* param_name,
			
 
				 							     double* parameter_field,
			
 
				 							     double minimum_value,
			
@@ -473,7 +475,8 @@ struct starpu_top_param* starpu_top_register_parameter_enum(const char* param_na
 
				 *****************UPDATE FUNC******************
			
 
				 **********************************************/
			
 
				 
			
 
				-void starpu_top_update_data_boolean(const struct starpu_top_data* data, int value) {
			
 
				+void starpu_top_update_data_boolean(const struct starpu_top_data* data, int value)
			
 
				+{
			
 
				 	if (!starpu_top_status_get())
			
 
				 		return;
			
 
				 	if(data->active)
			
@@ -488,7 +491,8 @@ void starpu_top_update_data_boolean(const struct starpu_top_data* data, int valu
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_top_update_data_integer(const struct starpu_top_data* data, int value){
			
 
				+void starpu_top_update_data_integer(const struct starpu_top_data* data, int value)
			
 
				+{
			
 
				 	if (!starpu_top_status_get())
			
 
				 		return;
			
 
				 	if(data->active)
			
@@ -503,7 +507,8 @@ void starpu_top_update_data_integer(const struct starpu_top_data* data, int valu
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_top_update_data_float(const struct starpu_top_data* data, double value){
			
 
				+void starpu_top_update_data_float(const struct starpu_top_data* data, double value)
			
 
				+{
			
 
				 	if (!starpu_top_status_get())
			
 
				 		return;
			
 
				 	if(data->active)
			
@@ -517,7 +522,8 @@ void starpu_top_update_data_float(const struct starpu_top_data* data, double val
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_top_update_parameter(const struct starpu_top_param* param){
			
 
				+void starpu_top_update_parameter(const struct starpu_top_param* param)
			
 
				+{
			
 
				 	if (!starpu_top_status_get())
			
 
				 		return;
			
 
				 	char*message = (char *) malloc(50);
			
@@ -666,7 +672,8 @@ void starpu_top_change_data_active(char* message, int active)
 
				 	starpu_top_datas[data_id]->active = active;
			
 
				 }
			
 
				 
			
 
				-void starpu_top_change_parameter_value(const char* message){
			
 
				+void starpu_top_change_parameter_value(const char* message)
			
 
				+{
			
 
				 	const char*tmp = strstr(message, ";")+1;
			
 
				 	int param_id = atoi(tmp);
			
 
				 	struct starpu_top_param* param = starpu_top_params[param_id];
			
--- a/src/top/starpu_top_connection.h
+++ b/src/top/starpu_top_connection.h
@@ -23,7 +23,8 @@
 
				 #include <starpu_top.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				-extern "C" {
			
 
				+extern "C"
			
 
				+{
			
 
				 #endif
			
 
				 
			
 
				 extern struct _starpu_top_message_queue* _starpu_top_mt;
			
--- a/src/top/starpu_top_message_queue.c
+++ b/src/top/starpu_top_message_queue.c