Pārlūkot izejas kodu

src: code cleaning

	- curly brackets on a individual line
        - delete trailing whitespaces
Nathalie Furmento 14 gadi atpakaļ
vecāks
revīzija
3ad31951c8
100 mainītis faili ar 2060 papildinājumiem un 1421 dzēšanām
  1. 8 6
      src/common/barrier.c
  2. 2 1
      src/common/barrier.h
  3. 26 23
      src/common/fxt.c
  4. 9 9
      src/common/hash.c
  5. 11 11
      src/common/htable32.c
  6. 27 16
      src/common/rwlock.c
  7. 4 2
      src/common/starpu_spinlock.c
  8. 2 1
      src/common/starpu_spinlock.h
  9. 34 29
      src/common/timing.c
  10. 3 3
      src/common/utils.c
  11. 2 1
      src/core/combined_workers.c
  12. 2 1
      src/core/debug.c
  13. 1 1
      src/core/debug.h
  14. 20 13
      src/core/dependencies/cg.c
  15. 10 5
      src/core/dependencies/cg.h
  16. 25 23
      src/core/dependencies/data_concurrency.c
  17. 1 1
      src/core/dependencies/dependencies.c
  18. 21 21
      src/core/dependencies/htable.c
  19. 2 1
      src/core/dependencies/htable.h
  20. 19 21
      src/core/dependencies/implicit_data_deps.c
  21. 17 14
      src/core/dependencies/tags.c
  22. 4 2
      src/core/dependencies/tags.h
  23. 2 2
      src/core/dependencies/task_deps.c
  24. 2 1
      src/core/errorcheck.h
  25. 20 15
      src/core/jobs.c
  26. 25 21
      src/core/perfmodel/perfmodel.c
  27. 2 1
      src/core/perfmodel/perfmodel.h
  28. 28 14
      src/core/perfmodel/perfmodel_bus.c
  29. 109 79
      src/core/perfmodel/perfmodel_history.c
  30. 11 9
      src/core/perfmodel/regression.c
  31. 4 3
      src/core/progress_hook.c
  32. 15 10
      src/core/sched_policy.c
  33. 31 23
      src/core/task.c
  34. 14 8
      src/core/task_bundle.c
  35. 79 42
      src/core/topology.c
  36. 52 36
      src/core/workers.c
  37. 11 8
      src/core/workers.h
  38. 44 31
      src/datawizard/coherency.c
  39. 15 11
      src/datawizard/coherency.h
  40. 78 63
      src/datawizard/copy_driver.c
  41. 4 2
      src/datawizard/copy_driver.h
  42. 35 27
      src/datawizard/data_request.c
  43. 2 1
      src/datawizard/data_request.h
  44. 5 5
      src/datawizard/datastats.c
  45. 17 15
      src/datawizard/filters.c
  46. 6 5
      src/datawizard/interfaces/bcsr_filters.c
  47. 36 27
      src/datawizard/interfaces/bcsr_interface.c
  48. 2 1
      src/datawizard/interfaces/block_filters.c
  49. 53 30
      src/datawizard/interfaces/block_interface.c
  50. 11 10
      src/datawizard/interfaces/csr_filters.c
  51. 26 15
      src/datawizard/interfaces/csr_interface.c
  52. 26 17
      src/datawizard/interfaces/data_interface.c
  53. 21 20
      src/datawizard/interfaces/matrix_filters.c
  54. 30 19
      src/datawizard/interfaces/matrix_interface.c
  55. 51 28
      src/datawizard/interfaces/multiformat_interface.c
  56. 27 17
      src/datawizard/interfaces/variable_interface.c
  57. 42 38
      src/datawizard/interfaces/vector_filters.c
  58. 29 19
      src/datawizard/interfaces/vector_interface.c
  59. 6 4
      src/datawizard/interfaces/void_interface.c
  60. 20 10
      src/datawizard/memalloc.c
  61. 1 1
      src/datawizard/memalloc.h
  62. 10 9
      src/datawizard/memory_nodes.c
  63. 6 3
      src/datawizard/memory_nodes.h
  64. 26 24
      src/datawizard/reduction.c
  65. 1 1
      src/datawizard/sort_data_handles.c
  66. 15 8
      src/datawizard/user_interactions.c
  67. 9 6
      src/datawizard/write_back.c
  68. 2 2
      src/datawizard/write_back.h
  69. 2 1
      src/debug/starpu_debug_helpers.h
  70. 70 51
      src/debug/traces/starpu_fxt.c
  71. 9 8
      src/debug/traces/starpu_fxt_dag.c
  72. 17 9
      src/debug/traces/starpu_fxt_mpi.c
  73. 22 19
      src/dolib.c
  74. 18 13
      src/drivers/cpu/driver_cpu.c
  75. 19 12
      src/drivers/cuda/driver_cuda.c
  76. 12 8
      src/drivers/driver_common/driver_common.c
  77. 47 31
      src/drivers/gordon/driver_gordon.c
  78. 54 28
      src/drivers/opencl/driver_opencl.c
  79. 39 20
      src/drivers/opencl/driver_opencl_utils.c
  80. 153 75
      src/profiling/bound.c
  81. 17 13
      src/profiling/profiling.c
  82. 10 5
      src/profiling/profiling_helpers.c
  83. 58 41
      src/sched_policies/deque_modeling_policy_data_aware.c
  84. 4 3
      src/sched_policies/deque_queues.c
  85. 2 1
      src/sched_policies/deque_queues.h
  86. 6 4
      src/sched_policies/detect_combined_workers.c
  87. 7 6
      src/sched_policies/eager_central_policy.c
  88. 19 14
      src/sched_policies/eager_central_priority_policy.c
  89. 9 7
      src/sched_policies/fifo_queues.c
  90. 2 1
      src/sched_policies/fifo_queues.h
  91. 46 30
      src/sched_policies/heft.c
  92. 15 11
      src/sched_policies/parallel_greedy.c
  93. 29 23
      src/sched_policies/parallel_heft.c
  94. 8 6
      src/sched_policies/random_policy.c
  95. 6 6
      src/sched_policies/stack_queues.c
  96. 2 1
      src/sched_policies/stack_queues.h
  97. 27 16
      src/sched_policies/work_stealing_policy.c
  98. 18 11
      src/top/starpu_top.c
  99. 2 1
      src/top/starpu_top_connection.h
  100. 0 0
      src/top/starpu_top_message_queue.c

+ 8 - 6
src/common/barrier.c

@@ -31,12 +31,13 @@ int _starpu_barrier_init(struct _starpu_barrier *barrier, int count)
 static
 int _starpu_barrier_test(struct _starpu_barrier *barrier)
 {
-    /*
-     * Check whether any threads are known to be waiting; report
-     * "BUSY" if so.
-     */
+	/*
+	 * Check whether any threads are known to be waiting; report
+	 * "BUSY" if so.
+	 */
         _STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit);
-        if (barrier->reached_exit != barrier->count) {
+        if (barrier->reached_exit != barrier->count)
+	{
                 _STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
                 return EBUSY;
         }
@@ -47,7 +48,8 @@ int _starpu_barrier_test(struct _starpu_barrier *barrier)
 int _starpu_barrier_destroy(struct _starpu_barrier *barrier)
 {
 	int ret = _starpu_barrier_test(barrier);
-	while (ret == EBUSY) {
+	while (ret == EBUSY)
+	{
 		ret = _starpu_barrier_test(barrier);
 	}
 	_STARPU_DEBUG("reached_exit %d\n", barrier->reached_exit);

+ 2 - 1
src/common/barrier.h

@@ -19,7 +19,8 @@
 
 #include <pthread.h>
 
-struct _starpu_barrier {
+struct _starpu_barrier
+{
 	int count;
 	int reached_start;
 	int reached_exit;

+ 26 - 23
src/common/fxt.c

@@ -27,17 +27,17 @@
 #ifdef STARPU_HAVE_WINDOWS
 #include <windows.h>
 #endif
-		
-#define PROF_BUFFER_SIZE  (8*1024*1024)
 
-static char PROF_FILE_USER[128];
-static int fxt_started = 0;
+#define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)
 
-static int written = 0;
+static char _STARPU_PROF_FILE_USER[128];
+static int _starpu_fxt_started = 0;
 
-static int id;
+static int _starpu_written = 0;
 
-static void _profile_set_tracefile(void *last, ...)
+static int _starpu_id;
+
+static void _starpu_profile_set_tracefile(void *last, ...)
 {
 	va_list vl;
 	char *user;
@@ -47,7 +47,7 @@ static void _profile_set_tracefile(void *last, ...)
 			fxt_prefix = "/tmp/";
 
 	va_start(vl, last);
-	vsprintf(PROF_FILE_USER, fxt_prefix, vl);
+	vsprintf(_STARPU_PROF_FILE_USER, fxt_prefix, vl);
 	va_end(vl);
 
 	user = getenv("USER");
@@ -55,31 +55,34 @@ static void _profile_set_tracefile(void *last, ...)
 		user = "";
 
 	char suffix[128];
-	snprintf(suffix, 128, "prof_file_%s_%d", user, id);
+	snprintf(suffix, 128, "prof_file_%s_%d", user, _starpu_id);
 
-	strcat(PROF_FILE_USER, suffix);
+	strcat(_STARPU_PROF_FILE_USER, suffix);
 }
 
-void starpu_set_profiling_id(int new_id) {
+void starpu_set_profiling_id(int new_id)
+{
         _STARPU_DEBUG("Set id to <%d>\n", new_id);
-	id = new_id;
-        _profile_set_tracefile(NULL);
+	_starpu_id = new_id;
+        _starpu_profile_set_tracefile(NULL);
 }
 
 void _starpu_start_fxt_profiling(void)
 {
 	unsigned threadid;
 
-	if (!fxt_started) {
-		fxt_started = 1;
-		_profile_set_tracefile(NULL);
+	if (!_starpu_fxt_started)
+	{
+		_starpu_fxt_started = 1;
+		_starpu_profile_set_tracefile(NULL);
 	}
 
 	threadid = syscall(SYS_gettid);
 
 	atexit(_starpu_stop_fxt_profiling);
 
-	if(fut_setup(PROF_BUFFER_SIZE, FUT_KEYMASKALL, threadid) < 0) {
+	if (fut_setup(_STARPU_PROF_BUFFER_SIZE, FUT_KEYMASKALL, threadid) < 0)
+	{
 		perror("fut_setup");
 		STARPU_ABORT();
 	}
@@ -89,7 +92,7 @@ void _starpu_start_fxt_profiling(void)
 	return;
 }
 
-static void generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
+static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
 {
 	/* We take default options */
 	struct starpu_fxt_options options;
@@ -108,19 +111,19 @@ static void generate_paje_trace(char *input_fxt_filename, char *output_paje_file
 
 void _starpu_stop_fxt_profiling(void)
 {
-	if (!written)
+	if (!_starpu_written)
 	{
 #ifdef STARPU_VERBOSE
 	        char hostname[128];
 		gethostname(hostname, 128);
-		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, PROF_FILE_USER);
+		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, _STARPU_PROF_FILE_USER);
 #endif
-		fut_endup(PROF_FILE_USER);
+		fut_endup(_STARPU_PROF_FILE_USER);
 
 		/* Should we generate a Paje trace directly ? */
 		int generate_trace = starpu_get_env_number("STARPU_GENERATE_TRACE");
 		if (generate_trace == 1)
-			generate_paje_trace(PROF_FILE_USER, "paje.trace");
+			_starpu_generate_paje_trace(_STARPU_PROF_FILE_USER, "paje.trace");
 
 		int ret = fut_done();
 		if (ret < 0)
@@ -130,7 +133,7 @@ void _starpu_stop_fxt_profiling(void)
 			fprintf(stderr, "Warning: the FxT trace could not be generated properly\n");
 		}
 
-		written = 1;
+		_starpu_written = 1;
 	}
 }
 

+ 9 - 9
src/common/hash.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,16 +19,16 @@
 #include <stdlib.h>
 #include <string.h>
 
-#define CRC32C_POLY_BE 0x1EDC6F41
+#define _STARPU_CRC32C_POLY_BE 0x1EDC6F41
 
-static inline uint32_t __attribute__ ((pure)) crc32_be_8(uint8_t inputbyte, uint32_t inputcrc)
+static inline uint32_t __attribute__ ((pure)) _starpu_crc32_be_8(uint8_t inputbyte, uint32_t inputcrc)
 {
 	unsigned i;
 	uint32_t crc;
 
 	crc = inputcrc ^ (inputbyte << 24);
 	for (i = 0; i < 8; i++)
-		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32C_POLY_BE : 0);
+		crc = (crc << 1) ^ ((crc & 0x80000000) ? _STARPU_CRC32C_POLY_BE : 0);
 
 	return crc;
 }
@@ -39,10 +39,10 @@ uint32_t _starpu_crc32_be(uint32_t input, uint32_t inputcrc)
 
 	uint32_t crc = inputcrc;
 
-	crc = crc32_be_8(p[0], crc);
-	crc = crc32_be_8(p[1], crc);
-	crc = crc32_be_8(p[2], crc);
-	crc = crc32_be_8(p[3], crc);
+	crc = _starpu_crc32_be_8(p[0], crc);
+	crc = _starpu_crc32_be_8(p[1], crc);
+	crc = _starpu_crc32_be_8(p[2], crc);
+	crc = _starpu_crc32_be_8(p[3], crc);
 
 	return crc;
 }
@@ -56,7 +56,7 @@ uint32_t _starpu_crc32_string(char *str, uint32_t inputcrc)
 	unsigned i;
 	for (i = 0; i < len; i++)
 	{
-		hash = crc32_be_8((uint8_t)str[i], hash);
+		hash = _starpu_crc32_be_8((uint8_t)str[i], hash);
 	}
 
 	return hash;

+ 11 - 11
src/common/htable32.c

@@ -33,21 +33,20 @@ void *_starpu_htbl_search_32(struct starpu_htbl32_node *htbl, uint32_t key)
 
 	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
 	{
-	
-	//	printf("search : current bit = %d \n", currentbit);
+		//	printf("search : current bit = %d \n", currentbit);
 		if (STARPU_UNLIKELY(current_htbl == NULL))
 			return NULL;
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
+		unsigned last_currentbit =
 			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
 		uint32_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(key & (offloaded_mask)) >> (last_currentbit);
 
 		current_htbl = current_htbl->children[current_index];
@@ -73,29 +72,30 @@ void *_starpu_htbl_insert_32(struct starpu_htbl32_node **htbl, uint32_t key, voi
 	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
 	{
 		//printf("insert : current bit = %d \n", currentbit);
-		if (*current_htbl_ptr == NULL) {
+		if (*current_htbl_ptr == NULL)
+		{
 			/* TODO pad to change that 1 into 16 ? */
 			*current_htbl_ptr = (struct starpu_htbl32_node*)calloc(sizeof(struct starpu_htbl32_node), 1);
 			assert(*current_htbl_ptr);
 		}
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
+		unsigned last_currentbit =
 			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
 		uint32_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(key & (offloaded_mask)) >> (last_currentbit);
 
-		current_htbl_ptr = 
+		current_htbl_ptr =
 			&((*current_htbl_ptr)->children[current_index]);
 	}
 
-	/* current_htbl either contains NULL or a previous entry 
+	/* current_htbl either contains NULL or a previous entry
 	 * we overwrite it anyway */
 	void *old_entry = *current_htbl_ptr;
 	*current_htbl_ptr = (struct starpu_htbl32_node *) entry;

+ 27 - 16
src/common/rwlock.c

@@ -17,16 +17,18 @@
 
 /**
  * A dummy implementation of a rw_lock using spinlocks ...
- */ 
+ */
 
 #include "rwlock.h"
 
 static void _starpu_take_busy_lock(struct _starpu_rw_lock *lock)
 {
 	uint32_t prev;
-	do {
+	do
+	{
 		prev = STARPU_TEST_AND_SET(&lock->busy, 1);
-	} while (prev);
+	}
+	while (prev);
 }
 
 static void _starpu_release_busy_lock(struct _starpu_rw_lock *lock)
@@ -47,14 +49,15 @@ void _starpu_init_rw_lock(struct _starpu_rw_lock *lock)
 int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock)
 {
 	_starpu_take_busy_lock(lock);
-	
+
 	if (lock->readercnt > 0 || lock->writer)
 	{
 		/* fail to take the lock */
 		_starpu_release_busy_lock(lock);
 		return -1;
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(lock->readercnt == 0);
 		STARPU_ASSERT(lock->writer == 0);
 
@@ -75,7 +78,8 @@ int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock)
 		_starpu_release_busy_lock(lock);
 		return -1;
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(lock->writer == 0);
 
 		/* no one is writing */
@@ -91,29 +95,33 @@ int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock)
 
 void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock)
 {
-	do {
+	do
+	{
 		_starpu_take_busy_lock(lock);
-		
+
 		if (lock->readercnt > 0 || lock->writer)
 		{
 			/* fail to take the lock */
 			_starpu_release_busy_lock(lock);
 		}
-		else {
+		else
+		{
 			STARPU_ASSERT(lock->readercnt == 0);
 			STARPU_ASSERT(lock->writer == 0);
-	
+
 			/* no one was either writing nor reading */
 			lock->writer = 1;
 			_starpu_release_busy_lock(lock);
 			return;
 		}
-	} while (1);
+	}
+	while (1);
 }
 
 void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
 {
-	do {
+	do
+	{
 		_starpu_take_busy_lock(lock);
 
 		if (lock->writer)
@@ -121,7 +129,8 @@ void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
 			/* there is a writer ... */
 			_starpu_release_busy_lock(lock);
 		}
-		else {
+		else
+		{
 			STARPU_ASSERT(lock->writer == 0);
 
 			/* no one is writing */
@@ -131,19 +140,21 @@ void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
 
 			return;
 		}
-	} while (1);
+	}
+	while (1);
 }
 
 void _starpu_release_rw_lock(struct _starpu_rw_lock *lock)
 {
 	_starpu_take_busy_lock(lock);
 	/* either writer or reader (exactly one !) */
-	if (lock->writer) 
+	if (lock->writer)
 	{
 		STARPU_ASSERT(lock->readercnt == 0);
 		lock->writer = 0;
 	}
-	else {
+	else
+	{
 		/* reading mode */
 		STARPU_ASSERT(lock->writer == 0);
 		lock->readercnt--;

+ 4 - 2
src/common/starpu_spinlock.c

@@ -74,9 +74,11 @@ int _starpu_spin_lock(struct _starpu_spinlock *lock)
 	return ret;
 #else
 	uint32_t prev;
-	do {
+	do
+	{
 		prev = STARPU_TEST_AND_SET(&lock->taken, 1);
-	} while (prev);
+	}
+	while (prev);
 	return 0;
 #endif
 #endif

+ 2 - 1
src/common/starpu_spinlock.h

@@ -23,7 +23,8 @@
 #include <common/utils.h>
 #include <common/config.h>
 
-struct _starpu_spinlock {
+struct _starpu_spinlock
+{
 #ifdef STARPU_SPINLOCK_CHECK
 	pthread_mutexattr_t errcheck_attr;
 	pthread_mutex_t errcheck_lock;

+ 34 - 29
src/common/timing.c

@@ -34,26 +34,31 @@
 #endif
 #endif
 
-static struct timespec reference_start_time_ts;
+static struct timespec _starpu_reference_start_time_ts;
 
 /* Modern CPUs' clocks are usually not synchronized so we use a monotonic clock
  * to have consistent timing measurements. The CLOCK_MONOTONIC_RAW clock is not
  * subject to NTP adjustments, but is not available on all systems (in that
  * case we use the CLOCK_MONOTONIC clock instead). */
-static void _starpu_clock_readtime(struct timespec *ts) {
+static void _starpu_clock_readtime(struct timespec *ts)
+{
 #ifdef CLOCK_MONOTONIC_RAW
 	static int raw_supported = 0;
-	switch (raw_supported) {
+	switch (raw_supported)
+	{
 	case -1:
 		break;
 	case 1:
 		clock_gettime(CLOCK_MONOTONIC_RAW, ts);
 		return;
 	case 0:
-		if (clock_gettime(CLOCK_MONOTONIC_RAW, ts)) {
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, ts))
+		{
 			raw_supported = -1;
 			break;
-		} else {
+		}
+		else
+		{
 			raw_supported = 1;
 			return;
 		}
@@ -64,7 +69,7 @@ static void _starpu_clock_readtime(struct timespec *ts) {
 
 void _starpu_timing_init(void)
 {
-	_starpu_clock_gettime(&reference_start_time_ts);
+	_starpu_clock_gettime(&_starpu_reference_start_time_ts);
 }
 
 void _starpu_clock_gettime(struct timespec *ts)
@@ -75,13 +80,13 @@ void _starpu_clock_gettime(struct timespec *ts)
 	_starpu_clock_readtime(&absolute_ts);
 
 	/* Compute the relative time since initialization */
-	starpu_timespec_sub(&absolute_ts, &reference_start_time_ts, ts);
+	starpu_timespec_sub(&absolute_ts, &_starpu_reference_start_time_ts, ts);
 }
 
 #else // !HAVE_CLOCK_GETTIME
 
 #if defined(__i386__) || defined(__pentium__) || defined(__pentiumpro__) || defined(__i586__) || defined(__i686__) || defined(__k6__) || defined(__k7__) || defined(__x86_64__)
-typedef union starpu_u_tick
+union starpu_u_tick
 {
   uint64_t tick;
 
@@ -91,64 +96,64 @@ typedef union starpu_u_tick
     uint32_t high;
   }
   sub;
-} starpu_tick_t;
+};
 
 #define STARPU_GET_TICK(t) __asm__ volatile("rdtsc" : "=a" ((t).sub.low), "=d" ((t).sub.high))
 #define STARPU_TICK_RAW_DIFF(t1, t2) ((t2).tick - (t1).tick)
 #define STARPU_TICK_DIFF(t1, t2) (STARPU_TICK_RAW_DIFF(t1, t2) - residual)
 
-static starpu_tick_t reference_start_tick;
-static double scale = 0.0;
-static unsigned long long residual = 0;
+static union starpu_u_tick _starpu_reference_start_tick;
+static double _starpu_scale = 0.0;
+static unsigned long long _starpu_residual = 0;
 
-static int inited = 0;
+static int _starpu_inited = 0;
 
 void _starpu_timing_init(void)
 {
-  static starpu_tick_t t1, t2;
+  static union starpu_u_tick t1, t2;
   int i;
 
-  if (inited) return;
+  if (_starpu_inited) return;
+
+  _starpu_residual = (unsigned long long)1 << 63;
 
-  residual = (unsigned long long)1 << 63;
-  
   for(i = 0; i < 20; i++)
     {
       STARPU_GET_TICK(t1);
       STARPU_GET_TICK(t2);
-      residual = STARPU_MIN(residual, STARPU_TICK_RAW_DIFF(t1, t2));
+      _starpu_residual = STARPU_MIN(_starpu_residual, STARPU_TICK_RAW_DIFF(t1, t2));
     }
-  
+
   {
     struct timeval tv1,tv2;
-    
+
     STARPU_GET_TICK(t1);
     gettimeofday(&tv1,0);
     usleep(500000);
     STARPU_GET_TICK(t2);
     gettimeofday(&tv2,0);
-    scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
-	     (tv1.tv_sec*1e6 + tv1.tv_usec)) / 
+    _starpu_scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
+		     (tv1.tv_sec*1e6 + tv1.tv_usec)) /
       (double)(STARPU_TICK_DIFF(t1, t2));
   }
 
-  STARPU_GET_TICK(reference_start_tick);
+  STARPU_GET_TICK(_starpu_reference_start_tick);
 
-  inited = 1;
+  _starpu_inited = 1;
 }
 
 void _starpu_clock_gettime(struct timespec *ts)
 {
-	starpu_tick_t tick_now;
+	union starpu_u_tick tick_now;
 
 	STARPU_GET_TICK(tick_now);
 
-	uint64_t elapsed_ticks = STARPU_TICK_DIFF(reference_start_tick, tick_now);
+	uint64_t elapsed_ticks = STARPU_TICK_DIFF(_starpu_reference_start_tick, tick_now);
 
 	/* We convert this number into nano-seconds so that we can fill the
 	 * timespec structure. */
-	uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(scale*1000.0));
-	
+	uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(_starpu_scale*1000.0));
+
 	long tv_nsec = (elapsed_ns % 1000000000);
 	time_t tv_sec = (elapsed_ns / 1000000000);
 
@@ -173,7 +178,7 @@ void _starpu_clock_gettime(struct timespec *ts)
 double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
 {
 	struct timespec diff;
-	
+
 	starpu_timespec_sub(end, start, &diff);
 
 	double us = (diff.tv_sec*1e6) + (diff.tv_nsec*1e-3);

+ 3 - 3
src/common/utils.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -59,9 +59,9 @@ int _starpu_mkpath(const char *s, mode_t mode)
 
 	if ((mkdir(path, mode) == -1) && (errno != EEXIST))
 		rv = -1;
-	else 
+	else
 		rv = 0;
-	
+
 out:
 	if (up)
 		free(up);

+ 2 - 1
src/core/combined_workers.c

@@ -132,7 +132,8 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 			&config->workers[id].initial_cpu_set);
 #else
 		int j;
-		for (j = 0; j < CPU_SETSIZE; j++) {
+		for (j = 0; j < CPU_SETSIZE; j++)
+		{
 			if (CPU_ISSET(j, &config->workers[id].initial_cpu_set))
 				CPU_SET(j, &combined_worker->cpu_set);
 		}

+ 2 - 1
src/core/debug.c

@@ -25,6 +25,7 @@ static pthread_mutex_t logfile_mutex = PTHREAD_MUTEX_INITIALIZER;
 static FILE *logfile;
 #endif
 
+/* Tell gdb whether FXT is compiled in or not */
 int _starpu_use_fxt
 #ifdef STARPU_USE_FXT
 	= 1
@@ -36,7 +37,7 @@ void _starpu_open_debug_logfile(void)
 #ifdef STARPU_VERBOSE
 	/* what is  the name of the file ? default = "starpu.log" */
 	char *logfile_name;
-	
+
 	logfile_name = getenv("STARPU_LOGFILENAME");
 	if (!logfile_name)
 	{

+ 1 - 1
src/core/debug.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by

+ 20 - 13
src/core/dependencies/cg.c

@@ -44,7 +44,7 @@ void _starpu_cg_list_deinit(struct _starpu_cg_list *list)
 		struct _starpu_cg *cg = list->succ[id];
 
 		/* We remove the reference on the completion group, and free it
-		 * if there is no more reference. */		
+		 * if there is no more reference. */
 		unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1);
 		if (ntags == 0)
 			free(list->succ[id]);
@@ -72,7 +72,7 @@ void _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct
 			successors->succ_list_size = 4;
 
 		/* NB: this is thread safe as the tag->lock is taken */
-		successors->succ = (struct _starpu_cg **) realloc(successors->succ, 
+		successors->succ = (struct _starpu_cg **) realloc(successors->succ,
 			successors->succ_list_size*sizeof(struct _starpu_cg *));
 	}
 #else
@@ -86,7 +86,8 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 	STARPU_ASSERT(cg);
 	unsigned remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1);
 
-	if (remaining == 0) {
+	if (remaining == 0)
+	{
 		cg->remaining = cg->ntags;
 
 		struct _starpu_tag *tag;
@@ -94,8 +95,10 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 		struct _starpu_job *j;
 
 		/* the group is now completed */
-		switch (cg->cg_type) {
-			case STARPU_CG_APPS: {
+		switch (cg->cg_type)
+		{
+			case STARPU_CG_APPS:
+			{
 				/* this is a cg for an application waiting on a set of
 	 			 * tags, wake the thread */
 				_STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex);
@@ -105,17 +108,19 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 				break;
 			}
 
-			case STARPU_CG_TAG: {
+			case STARPU_CG_TAG:
+			{
 				tag = cg->succ.tag;
 				tag_successors = &tag->tag_successors;
-	
+
 				tag_successors->ndeps_completed++;
 
 #ifdef STARPU_DEVEL
 #warning FIXME: who locks this?
 #endif
 				if ((tag->state == STARPU_BLOCKED) &&
-					(tag_successors->ndeps == tag_successors->ndeps_completed)) {
+					(tag_successors->ndeps == tag_successors->ndeps_completed))
+				{
 					/* reset the counter so that we can reuse the completion group */
 					tag_successors->ndeps_completed = 0;
 					_starpu_tag_set_ready(tag);
@@ -123,7 +128,8 @@ void _starpu_notify_cg(struct _starpu_cg *cg)
 				break;
 			}
 
- 		        case STARPU_CG_TASK: {
+ 		        case STARPU_CG_TASK:
+			{
 				j = cg->succ.job;
 
 				job_successors = &j->job_successors;
@@ -175,14 +181,14 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 		{
 			struct _starpu_job *j = cg->succ.job;
 			_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
-		}			
+		}
 
 		_starpu_notify_cg(cg);
 
 		if (cg_type == STARPU_CG_TASK)
 		{
 			struct _starpu_job *j = cg->succ.job;
-			
+
 			/* In case this task was immediately terminated, since
 			 * _starpu_notify_cg_list already hold the sync_mutex
 			 * lock, it is its reponsability to destroy the task if
@@ -197,9 +203,10 @@ void _starpu_notify_cg_list(struct _starpu_cg_list *successors)
 
 			if (must_destroy_task)
 				starpu_task_destroy(task);
-		}			
+		}
 
-		if (cg_type == STARPU_CG_APPS) {
+		if (cg_type == STARPU_CG_APPS)
+		{
 			/* Remove the temporary ref to the cg */
 			memmove(&successors->succ[succ], &successors->succ[succ+1], (nsuccs-(succ+1)) * sizeof(successors->succ[succ]));
 			succ--;

+ 10 - 5
src/core/dependencies/cg.h

@@ -34,7 +34,8 @@
 struct _starpu_job;
 
 /* Completion Group list */
-struct _starpu_cg_list {
+struct _starpu_cg_list
+{
 	unsigned nsuccs; /* how many successors ? */
 	unsigned ndeps; /* how many deps ? */
 	unsigned ndeps_completed; /* how many deps are done ? */
@@ -46,20 +47,23 @@ struct _starpu_cg_list {
 #endif
 };
 
-enum _starpu_cg_type {
+enum _starpu_cg_type
+{
 	STARPU_CG_APPS=(1<<0),
 	STARPU_CG_TAG=(1<<1),
 	STARPU_CG_TASK=(1<<2)
 };
 
 /* Completion Group */
-struct _starpu_cg {
+struct _starpu_cg
+{
 	unsigned ntags; /* number of tags depended on */
 	unsigned remaining; /* number of remaining tags */
 
 	enum _starpu_cg_type cg_type;
 
-	union {
+	union
+	{
 		/* STARPU_CG_TAG */
 		struct _starpu_tag *tag;
 
@@ -70,7 +74,8 @@ struct _starpu_cg {
 		/* in case this completion group is related to an application,
 		 * we have to explicitely wake the waiting thread instead of
 		 * reschedule the corresponding task */
-		struct {
+		struct
+		{
 			unsigned completed;
 			pthread_mutex_t cg_mutex;
 			pthread_cond_t cg_cond;

+ 25 - 23
src/core/dependencies/data_concurrency.c

@@ -39,7 +39,8 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 	{
 		req_list = handle->reduction_req_list;
 	}
-	else {
+	else
+	{
 		if (_starpu_data_requester_list_empty(handle->reduction_req_list))
 			req_list = handle->req_list;
 		else
@@ -63,7 +64,7 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 	enum starpu_access_mode r_mode = r->mode;
 	if (r_mode == STARPU_RW)
 		r_mode = STARPU_W;
-	
+
 	/* If this is a STARPU_R, STARPU_SCRATCH or STARPU_REDUX type of
 	 * access, we only proceed if the cuurrent mode is the same as the
 	 * requested mode. */
@@ -93,7 +94,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 		while (_starpu_spin_trylock(&handle->header_lock))
 			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
 	}
-	else {
+	else
+	{
 		_starpu_spin_lock(&handle->header_lock);
 	}
 
@@ -131,7 +133,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 			 * the request if needed. */
 			put_in_list = (handle->reduction_refcnt > 0);
 		}
-		else {
+		else
+		{
 			put_in_list = 0;
 		}
 	}
@@ -140,16 +143,16 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 	{
 		/* there cannot be multiple writers or a new writer
 		 * while the data is in read mode */
-		
+
 		handle->busy_count++;
 		/* enqueue the request */
 		struct _starpu_data_requester *r = _starpu_data_requester_new();
-			r->mode = mode;
-			r->is_requested_by_codelet = request_from_codelet;
-			r->j = j;
-			r->buffer_index = buffer_index;
-			r->ready_data_callback = callback;
-			r->argcb = argcb;
+		r->mode = mode;
+		r->is_requested_by_codelet = request_from_codelet;
+		r->j = j;
+		r->buffer_index = buffer_index;
+		r->ready_data_callback = callback;
+		r->argcb = argcb;
 
 		/* We put the requester in a specific list if this is a reduction task */
 		struct _starpu_data_requester_list *req_list =
@@ -160,7 +163,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 		/* failed */
 		put_in_list = 1;
 	}
-	else {
+	else
+	{
 		handle->refcnt++;
 		handle->busy_count++;
 
@@ -178,9 +182,8 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
 }
 
-
 unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_access_mode mode,
-						void (*callback)(void *), void *argcb)
+							  void (*callback)(void *), void *argcb)
 {
 	return _starpu_attempt_to_submit_data_request(0, handle, mode, callback, argcb, NULL, 0);
 }
@@ -193,7 +196,6 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
 
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
-
 }
 
 static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned start_buffer_index)
@@ -203,7 +205,8 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 	unsigned nbuffers = j->task->cl->nbuffers;
 	for (buf = start_buffer_index; buf < nbuffers; buf++)
 	{
-                if (attempt_to_submit_data_request_from_job(j, buf)) {
+                if (attempt_to_submit_data_request_from_job(j, buf))
+		{
                         j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
 			return 1;
                 }
@@ -239,10 +242,8 @@ static unsigned unlock_one_requester(struct _starpu_data_requester *r)
 	unsigned buffer_index = r->buffer_index;
 
 	if (buffer_index + 1 < nbuffers)
-	{
 		/* not all buffers are protected yet */
 		return _submit_job_enforce_data_deps(j, buffer_index + 1);
-	}
 	else
 		return 0;
 }
@@ -279,7 +280,6 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 			_starpu_data_end_reduction_mode_terminate(handle);
 	}
 
-
 	struct _starpu_data_requester *r;
 	while ((r = may_unlock_data_req_list_head(handle)))
 	{
@@ -297,7 +297,8 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 			 * the request if needed. */
 			put_in_list = (handle->reduction_refcnt > 0);
 		}
-		else {
+		else
+		{
 			put_in_list = 0;
 		}
 
@@ -307,12 +308,13 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 			 * perform a reduction before. */
 			_starpu_data_requester_list_push_front(handle->req_list, r);
 		}
-		else {
+		else
+		{
 			/* The data is now attributed to that request so we put a
 			 * reference on it. */
 			handle->refcnt++;
 			handle->busy_count++;
-		
+
 			enum starpu_access_mode previous_mode = handle->current_mode;
 			handle->current_mode = r_mode;
 
@@ -339,7 +341,7 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 			}
 
 			_starpu_data_requester_delete(r);
-			
+
 			_starpu_spin_lock(&handle->header_lock);
 			STARPU_ASSERT(handle->busy_count > 0);
 			handle->busy_count--;

+ 1 - 1
src/core/dependencies/dependencies.c

@@ -32,7 +32,7 @@ void _starpu_notify_dependencies(struct _starpu_job *j)
 
 	/* unlock tasks depending on that task */
 	_starpu_notify_task_dependencies(j);
-	
+
 	/* unlock tags depending on that task */
 	if (j->task->use_tag)
 		_starpu_notify_tag_dependencies(j->tag);

+ 21 - 21
src/core/dependencies/htable.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,21 +28,20 @@ void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
 	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
 	{
-	
 	//	printf("search : current bit = %d \n", currentbit);
 		if (STARPU_UNLIKELY(current_htbl == NULL))
 			return NULL;
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
+		unsigned last_currentbit =
 			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
 		starpu_tag_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(tag & (offloaded_mask)) >> (last_currentbit);
 
 		current_htbl = current_htbl->children[current_index];
@@ -57,7 +56,6 @@ void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
 void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag, void *entry)
 {
-
 	unsigned currentbit;
 	struct _starpu_htbl_node **current_htbl_ptr = htbl;
 	struct _starpu_htbl_node *previous_htbl_ptr = NULL;
@@ -67,7 +65,8 @@ void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag,
 
 	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
 	{
-		if (*current_htbl_ptr == NULL) {
+		if (*current_htbl_ptr == NULL)
+		{
 			/* TODO pad to change that 1 into 16 ? */
 			*current_htbl_ptr = (struct _starpu_htbl_node *) calloc(1, sizeof(struct _starpu_htbl_node));
 			assert(*current_htbl_ptr);
@@ -76,25 +75,24 @@ void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag,
 				previous_htbl_ptr->nentries++;
 		}
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
+		unsigned last_currentbit =
 			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
 		starpu_tag_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(tag & (offloaded_mask)) >> (last_currentbit);
 
 		previous_htbl_ptr = *current_htbl_ptr;
-		current_htbl_ptr = 
+		current_htbl_ptr =
 			&((*current_htbl_ptr)->children[current_index]);
-
 	}
 
-	/* current_htbl either contains NULL or a previous entry 
+	/* current_htbl either contains NULL or a previous entry
 	 * we overwrite it anyway */
 	void *old_entry = *current_htbl_ptr;
 	*current_htbl_ptr = (struct _starpu_htbl_node *) entry;
@@ -124,24 +122,25 @@ void *_starpu_htbl_remove_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 	{
 		path[level] = current_htbl_ptr;
 
-		if (STARPU_UNLIKELY(!current_htbl_ptr)) {
+		if (STARPU_UNLIKELY(!current_htbl_ptr))
+		{
 			tag_is_present = 0;
 			break;
 		}
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
+		unsigned last_currentbit =
 			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
 		starpu_tag_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(tag & (offloaded_mask)) >> (last_currentbit);
-		
-		current_htbl_ptr = 
+
+		current_htbl_ptr =
 			current_htbl_ptr->children[current_index];
 	}
 
@@ -151,8 +150,9 @@ void *_starpu_htbl_remove_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
 	void *old_entry = current_htbl_ptr;
 
-	if (tag_is_present) {
-		/* the tag was in the htbl, so we have to unroll the search 
+	if (tag_is_present)
+	{
+		/* the tag was in the htbl, so we have to unroll the search
  		 * to remove possibly useless htbl (internal) nodes */
 		for (level = maxlevel - 1; level >= 0; level--)
 		{

+ 2 - 1
src/core/dependencies/htable.h

@@ -30,7 +30,8 @@
 
 #define _STARPU_HTBL_NODE_SIZE	16
 
-struct _starpu_htbl_node {
+struct _starpu_htbl_node
+{
 	unsigned nentries;
 	struct _starpu_htbl_node *children[1<<_STARPU_HTBL_NODE_SIZE];
 };

+ 19 - 21
src/core/dependencies/implicit_data_deps.c

@@ -163,7 +163,7 @@ static void _starpu_add_writer_after_writer(starpu_data_handle_t handle, struct
 static void disable_last_writer_callback(void *cl_arg)
 {
 	starpu_data_handle_t handle = (starpu_data_handle_t) cl_arg;
-	
+
 	/* NB: we don't take the handle->sequential_consistency_mutex mutex
 	 * because the empty task that is used for synchronization is going to
 	 * be unlock in the context of a call to
@@ -172,7 +172,6 @@ static void disable_last_writer_callback(void *cl_arg)
 	handle->last_submitted_writer = NULL;
 }
 
-
 /* This function adds the implicit task dependencies introduced by data
  * sequential consistency. Two tasks are provided: pre_sync and post_sync which
  * respectively indicates which task is going to depend on the previous deps
@@ -196,8 +195,7 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 		 * they do not interfere with the application. */
 		if (pre_sync_job->reduction_task || post_sync_job->reduction_task)
 			return;
-	
-	
+
 		_STARPU_DEP_DEBUG("Tasks %p %p\n", pre_sync_task, post_sync_task);
 		/* In case we are generating the DAG, we add an implicit
 		 * dependency between the pre and the post sync tasks in case
@@ -213,7 +211,7 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 		}
 
 		enum starpu_access_mode previous_mode = handle->last_submitted_mode;
-	
+
 		if (mode & STARPU_W)
 		{
 			_STARPU_DEP_DEBUG("W %p\n", handle);
@@ -222,17 +220,17 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 				_STARPU_DEP_DEBUG("WAW %p\n", handle);
 				_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
 			}
-			else {
+			else
+			{
 				/* The task submitted previously were in read-only
 				 * mode: this task must depend on all those read-only
 				 * tasks and we get rid of the list of readers */
-			
 				_STARPU_DEP_DEBUG("WAR %p\n", handle);
 				_starpu_add_writer_after_readers(handle, pre_sync_task, post_sync_task);
 			}
-	
 		}
-		else {
+		else
+		{
 			_STARPU_DEP_DEBUG("R %p %d -> %d\n", handle, previous_mode, mode);
 			/* Add a reader, after a writer or a reader. */
 			STARPU_ASSERT(pre_sync_task);
@@ -263,10 +261,8 @@ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_
 
 				starpu_task_submit(new_sync_task);
 			}
-	
 			_starpu_add_reader_after_writer(handle, pre_sync_task, post_sync_task);
 		}
-	
 		handle->last_submitted_mode = mode;
 	}
         _STARPU_LOG_OUT();
@@ -323,7 +319,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 		if (task == handle->last_submitted_writer)
 		{
 			handle->last_submitted_writer = NULL;
-			
+
 #ifndef STARPU_USE_FXT
 			if (_starpu_bound_recording)
 #endif
@@ -333,9 +329,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 				struct _starpu_job *ghost_job = _starpu_get_job_associated_to_task(task);
 				handle->last_submitted_ghost_writer_id = ghost_job->job_id;
 			}
-			
 		}
-		
+
 		/* XXX can a task be both the last writer associated to a data
 		 * and be in its list of readers ? If not, we should not go
 		 * through the entire list once we have detected it was the
@@ -364,7 +359,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 					struct _starpu_jobid_list *link = (struct _starpu_jobid_list *) malloc(sizeof(struct _starpu_jobid_list));
 					STARPU_ASSERT(link);
 					link->next = handle->last_submitted_ghost_readers_id;
-					link->id = ghost_reader_job->job_id; 
+					link->id = ghost_reader_job->job_id;
 					handle->last_submitted_ghost_readers_id = link;
 				}
 
@@ -372,7 +367,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 				{
 					prev->next = next;
 				}
-				else {
+				else
+				{
 					/* This is the first element of the list */
 					handle->last_submitted_readers = next;
 				}
@@ -383,7 +379,8 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 				 * as soon as we find the task. TODO: check how
 				 * duplicate dependencies are treated. */
 			}
-			else {
+			else
+			{
 				prev = l;
 			}
 
@@ -406,7 +403,7 @@ void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data
 		struct _starpu_task_wrapper_list *link = (struct _starpu_task_wrapper_list *) malloc(sizeof(struct _starpu_task_wrapper_list));
 		link->task = post_sync_task;
 		link->next = handle->post_sync_tasks;
-		handle->post_sync_tasks = link;		
+		handle->post_sync_tasks = link;
 	}
 
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
@@ -431,7 +428,6 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
 			post_sync_tasks = handle->post_sync_tasks;
 			handle->post_sync_tasks = NULL;
 		}
-
 	}
 
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
@@ -440,7 +436,8 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
 	{
 		struct _starpu_task_wrapper_list *link = post_sync_tasks;
 
-		while (link) {
+		while (link)
+		{
 			/* There is no need to depend on that task now, since it was already unlocked */
 			_starpu_release_data_enforce_sequential_consistency(link->task, handle);
 
@@ -478,7 +475,8 @@ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_a
 		STARPU_ASSERT(!ret);
 		starpu_task_wait(sync_task);
 	}
-	else {
+	else
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 	}
 

+ 17 - 14
src/core/dependencies/tags.c

@@ -44,7 +44,6 @@ static struct _starpu_cg *create_cg_apps(unsigned ntags)
 	return cg;
 }
 
-
 static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag)
 {
 	struct _starpu_cg *cg = (struct _starpu_cg *) malloc(sizeof(struct _starpu_cg));
@@ -90,7 +89,8 @@ void starpu_tag_remove(starpu_tag_t id)
 
 	pthread_rwlock_unlock(&tag_global_rwlock);
 
-	if (tag) {
+	if (tag)
+	{
 		_starpu_spin_lock(&tag->lock);
 
 		unsigned nsuccs = tag->tag_successors.nsuccs;
@@ -126,7 +126,8 @@ static struct _starpu_tag *gettag_struct(starpu_tag_t id)
 	struct _starpu_tag *tag;
 	tag = (struct _starpu_tag *) _starpu_htbl_search_tag(tag_htbl, id);
 
-	if (tag == NULL) {
+	if (tag == NULL)
+	{
 		/* the tag does not exist yet : create an entry */
 		tag = _starpu_tag_init(id);
 
@@ -172,7 +173,8 @@ static void _starpu_tag_add_succ(struct _starpu_tag *tag, struct _starpu_cg *cg)
 
 	_starpu_add_successor_to_cg_list(&tag->tag_successors, cg);
 
-	if (tag->state == STARPU_DONE) {
+	if (tag->state == STARPU_DONE)
+	{
 		/* the tag was already completed sooner */
 		_starpu_notify_cg(cg);
 	}
@@ -201,11 +203,11 @@ void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
 {
 	_STARPU_TRACE_TAG(id, job);
 	job->task->use_tag = 1;
-	
+
 	struct _starpu_tag *tag= gettag_struct(id);
 	tag->job = job;
 	tag->is_assigned = 1;
-	
+
 	job->tag = tag;
 
 	/* the tag is now associated to a job */
@@ -226,11 +228,11 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
 
 	STARPU_ASSERT(ndeps != 0);
-	
+
 	for (i = 0; i < ndeps; i++)
 	{
 		starpu_tag_t dep_id = array[i];
-		
+
 		/* id depends on dep_id
 		 * so cg should be among dep_id's successors*/
 		_STARPU_TRACE_TAG_DEPS(id, dep_id);
@@ -248,7 +250,7 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
 {
 	unsigned i;
-	
+
 	/* create the associated completion group */
 	struct _starpu_tag *tag_child = gettag_struct(id);
 
@@ -257,14 +259,14 @@ void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...)
 	struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child);
 
 	STARPU_ASSERT(ndeps != 0);
-	
+
 	va_list pa;
 	va_start(pa, ndeps);
 	for (i = 0; i < ndeps; i++)
 	{
 		starpu_tag_t dep_id;
 		dep_id = va_arg(pa, starpu_tag_t);
-	
+
 		/* id depends on dep_id
 		 * so cg should be among dep_id's successors*/
 		_STARPU_TRACE_TAG_DEPS(id, dep_id);
@@ -291,7 +293,8 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 	_STARPU_LOG_IN();
 
 	/* It is forbidden to block within callbacks or codelets */
-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
+	{
 		_STARPU_LOG_OUT_TAG("edeadlk");
 		return -EDEADLK;
 	}
@@ -300,7 +303,7 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 	for (i = 0, current = 0; i < ntags; i++)
 	{
 		struct _starpu_tag *tag = gettag_struct(id[i]);
-		
+
 		_starpu_spin_lock(&tag->lock);
 
 		if (tag->state == STARPU_DONE)
@@ -321,7 +324,7 @@ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id)
 		_STARPU_LOG_OUT_TAG("all deps are already fulfilled");
 		return 0;
 	}
-	
+
 	/* there is at least one task that is not finished */
 	struct _starpu_cg *cg = create_cg_apps(current);
 

+ 4 - 2
src/core/dependencies/tags.h

@@ -25,7 +25,8 @@
 
 #define _STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
 
-enum _starpu_tag_state {
+enum _starpu_tag_state
+{
 	/* this tag is not declared by any task */
 	STARPU_INVALID_STATE,
 	/* _starpu_tag_declare was called to associate the tag to a task */
@@ -44,7 +45,8 @@ enum _starpu_tag_state {
 
 struct _starpu_job;
 
-struct _starpu_tag {
+struct _starpu_tag
+{
 	struct _starpu_spinlock lock;
 	starpu_tag_t id; /* an identifier for the task */
 	enum _starpu_tag_state state;

+ 2 - 2
src/core/dependencies/task_deps.c

@@ -48,7 +48,8 @@ static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg)
 
 	_starpu_add_successor_to_cg_list(&j->job_successors, cg);
 
-	if (j->terminated) {
+	if (j->terminated)
+	{
 		/* the task was already completed sooner */
 		_starpu_notify_cg(cg);
 	}
@@ -90,6 +91,5 @@ void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, st
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex);
 	}
 
-	
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex);
 }

+ 2 - 1
src/core/errorcheck.h

@@ -21,7 +21,8 @@
 #include <starpu.h>
 
 /* This type describes in which state a worker may be. */
-enum _starpu_worker_status {
+enum _starpu_worker_status
+{
 	/* invalid status (for instance if we request the status of some thread
 	 * that is not controlled by StarPU */
 	STATUS_INVALID,

+ 20 - 15
src/core/jobs.c

@@ -157,7 +157,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 	if (!job_is_already_locked)
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 
-	/* the callback is executed after the dependencies so that we may remove the tag 
+	/* the callback is executed after the dependencies so that we may remove the tag
  	 * of the task itself */
 	if (task->callback_func)
 	{
@@ -168,8 +168,8 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 		/* so that we can check whether we are doing blocking calls
 		 * within the callback */
 		_starpu_set_local_worker_status(STATUS_CALLBACK);
-		
-		
+
+
 		/* Perhaps we have nested callbacks (eg. with chains of empty
 		 * tasks). So we store the current task and we will restore it
 		 * later. */
@@ -180,7 +180,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 		_STARPU_TRACE_START_CALLBACK(j);
 		task->callback_func(task->callback_arg);
 		_STARPU_TRACE_END_CALLBACK(j);
-		
+
 		_starpu_set_current_task(current_task);
 
 		_starpu_set_local_worker_status(STATUS_UNKNOWN);
@@ -214,7 +214,8 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 		if (!job_is_already_locked)
 			_STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 	}
-	else {
+	else
+	{
 		/* no one is going to synchronize with that task so we release
 		 * the data structures now. In case the job was already locked
 		 * by the caller, it is its responsability to destroy the task.
@@ -235,7 +236,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j, unsigned job_is_alrea
 	_starpu_decrement_nready_tasks();
 }
 
-/* This function is called when a new task is submitted to StarPU 
+/* This function is called when a new task is submitted to StarPU
  * it returns 1 if the tag deps are not fulfilled, 0 otherwise */
 static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
 {
@@ -259,7 +260,8 @@ static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j)
                 j->task->status = STARPU_TASK_BLOCKED_ON_TAG;
 		ret = 1;
 	}
-	else {
+	else
+	{
 		/* existing deps (if any) are fulfilled */
 		tag->state = STARPU_READY;
 		/* already prepare for next run */
@@ -281,14 +283,15 @@ static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j, u
 	struct _starpu_cg_list *job_successors = &j->job_successors;
 
 	if (!job_is_already_locked)
-		_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);	
+		_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 
 	if (!j->submitted || (job_successors->ndeps != job_successors->ndeps_completed))
 	{
                 j->task->status = STARPU_TASK_BLOCKED_ON_TASK;
 		ret = 1;
 	}
-	else {
+	else
+	{
 		/* existing deps (if any) are fulfilled */
 		/* already prepare for next run */
 		job_successors->ndeps_completed = 0;
@@ -301,8 +304,6 @@ static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j, u
 	return ret;
 }
 
-
-
 /*
  *	In order, we enforce tag, task and data dependencies. The task is
  *	passed to the scheduler only once all these constraints are fulfilled.
@@ -316,19 +317,22 @@ unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j, unsigned job_i
         _STARPU_LOG_IN();
 
 	/* enfore tag dependencies */
-	if (_starpu_not_all_tag_deps_are_fulfilled(j)) {
+	if (_starpu_not_all_tag_deps_are_fulfilled(j))
+	{
                 _STARPU_LOG_OUT_TAG("not_all_tag_deps_are_fulfilled");
 		return 0;
         }
 
 	/* enfore task dependencies */
-	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked)) {
+	if (_starpu_not_all_task_deps_are_fulfilled(j, job_is_already_locked))
+	{
                 _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled");
 		return 0;
         }
 
 	/* enforce data dependencies */
-	if (_starpu_submit_job_enforce_data_deps(j)) {
+	if (_starpu_submit_job_enforce_data_deps(j))
+	{
                 _STARPU_LOG_OUT_TAG("enforce_data_deps");
 		return 0;
         }
@@ -403,7 +407,8 @@ const char *_starpu_get_model_name(struct _starpu_job *j)
             && task->cl->model
             && task->cl->model->symbol)
                 return task->cl->model->symbol;
-        else {
+        else
+	{
 #ifdef STARPU_USE_FXT
                 return j->model_name;
 #else

+ 25 - 21
src/core/perfmodel/perfmodel.c

@@ -30,7 +30,7 @@
 #ifdef STARPU_HAVE_WINDOWS
 #include <windows.h>
 #endif
-		
+
 /* This flag indicates whether performance models should be calibrated or not.
  *	0: models need not be calibrated
  *	1: models must be calibrated
@@ -72,7 +72,7 @@ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, enum s
 {
 	double exp = -1.0;
 	double (*per_arch_cost_model)(struct starpu_buffer_descr *);
-	
+
 	per_arch_cost_model = model->per_arch[arch][nimpl].cost_model;
 
 	if (per_arch_cost_model)
@@ -99,7 +99,8 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 	{
 		return _STARPU_OPENCL_ALPHA;
 	}
-	else if (perf_archtype < STARPU_NARCH_VARIATIONS) {
+	else if (perf_archtype < STARPU_NARCH_VARIATIONS)
+	{
 		/* Gordon value */
 		return _STARPU_GORDON_ALPHA;
 	}
@@ -115,7 +116,8 @@ static double common_task_expected_perf(struct starpu_perfmodel *model, enum sta
 	double exp;
 	double alpha;
 
-	if (model->cost_model) {
+	if (model->cost_model)
+	{
 		exp = model->cost_model(task->buffers);
 		alpha = starpu_worker_get_relative_speedup(arch);
 
@@ -137,7 +139,8 @@ void _starpu_load_perfmodel(struct starpu_perfmodel *model)
 	if (!load_model)
 		return;
 
-	switch (model->type) {
+	switch (model->type)
+	{
 		case STARPU_PER_ARCH:
 		case STARPU_COMMON:
 			break;
@@ -160,9 +163,11 @@ void _starpu_load_perfmodel(struct starpu_perfmodel *model)
 
 static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, enum starpu_perf_archtype arch,  unsigned nimpl)
 {
-	if (model) {
+	if (model)
+	{
 		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
-		switch (model->type) {
+		switch (model->type)
+		{
 			case STARPU_PER_ARCH:
 
 				return per_arch_task_expected_perf(model, arch, task, nimpl);
@@ -182,7 +187,7 @@ static double starpu_model_expected_perf(struct starpu_task *task, struct starpu
 
 			default:
 				STARPU_ABORT();
-		};
+		}
 	}
 
 	/* no model was found */
@@ -211,7 +216,7 @@ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned
 	/* If we don't need to read the content of the handle */
 	if (!(mode & STARPU_R))
 		return 0.0;
-	
+
 	if (_starpu_is_data_present_or_requested(handle, memory_node))
 		return 0.0;
 
@@ -259,9 +264,8 @@ void _starpu_get_perf_model_dir(char *path, size_t maxlen)
 	const char *home_path = getenv("HOME");
 	if (!home_path)
 		home_path = getenv("USERPROFILE");
-	if (!home_path) {
+	if (!home_path)
 		_STARPU_ERROR("couldn't find a home place to put starpu data\n");
-	}
 	snprintf(path, maxlen, "%s/.starpu/sampling/", home_path);
 #endif
 }
@@ -294,8 +298,8 @@ void _starpu_create_sampling_directory_if_needed(void)
 		/* The performance of the codelets are stored in
 		 * $STARPU_PERF_MODEL_DIR/codelets/ while those of the bus are stored in
 		 * $STARPU_PERF_MODEL_DIR/bus/ so that we don't have name collisions */
-		
-		/* Testing if a directory exists and creating it otherwise 
+
+		/* Testing if a directory exists and creating it otherwise
 		   may not be safe: it is possible that the permission are
 		   changed in between. Instead, we create it and check if
 		   it already existed before */
@@ -305,13 +309,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 		if (ret == -1)
 		{
 			STARPU_ASSERT(errno == EEXIST);
-	
+
 			/* make sure that it is actually a directory */
 			struct stat sb;
 			stat(perf_model_dir, &sb);
 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
 		}
-	
+
 		/* Per-task performance models */
 		char perf_model_dir_codelets[256];
 		_starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
@@ -320,13 +324,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 		if (ret == -1)
 		{
 			STARPU_ASSERT(errno == EEXIST);
-	
+
 			/* make sure that it is actually a directory */
 			struct stat sb;
 			stat(perf_model_dir_codelets, &sb);
 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
 		}
-	
+
 		/* Performance of the memory subsystem */
 		char perf_model_dir_bus[256];
 		_starpu_get_perf_model_dir_bus(perf_model_dir_bus, 256);
@@ -335,13 +339,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 		if (ret == -1)
 		{
 			STARPU_ASSERT(errno == EEXIST);
-	
+
 			/* make sure that it is actually a directory */
 			struct stat sb;
 			stat(perf_model_dir_bus, &sb);
 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
 		}
-	
+
 		/* Performance debug measurements */
 		char perf_model_dir_debug[256];
 		_starpu_get_perf_model_dir_debug(perf_model_dir_debug, 256);
@@ -350,13 +354,13 @@ void _starpu_create_sampling_directory_if_needed(void)
 		if (ret == -1)
 		{
 			STARPU_ASSERT(errno == EEXIST);
-	
+
 			/* make sure that it is actually a directory */
 			struct stat sb;
 			stat(perf_model_dir_debug, &sb);
 			STARPU_ASSERT(S_ISDIR(sb.st_mode));
 		}
-	
+
 		directory_existence_was_tested = 1;
 	}
 }

+ 2 - 1
src/core/perfmodel/perfmodel.h

@@ -33,7 +33,8 @@ struct _starpu_job;
 enum starpu_perf_archtype;
 
 ///* File format */
-//struct model_file_format {
+//struct model_file_format
+// {
 //	unsigned ncore_entries;
 //	unsigned ncuda_entries;
 //	/* contains core entries, then cuda ones */

+ 28 - 14
src/core/perfmodel/perfmodel_bus.c

@@ -47,7 +47,8 @@
 #define MAXCPUS	32
 
 /* timing is in µs per byte (i.e. slowness, inverse of bandwidth) */
-struct dev_timing {
+struct dev_timing
+{
 	int cpu_id;
 	double timing_htod;
 	double timing_dtoh;
@@ -785,12 +786,14 @@ static int load_bus_latency_file_content(void)
 			double latency;
 
 			n = fscanf(f, "%lf", &latency);
-			if (n != 1) {
+			if (n != 1)
+			{
 				fclose(f);
 				return 0;
 			}
 			n = getc(f);
-			if (n != '\t') {
+			if (n != '\t')
+			{
 				fclose(f);
 				return 0;
 			}
@@ -799,7 +802,8 @@ static int load_bus_latency_file_content(void)
 		}
 
 		n = getc(f);
-		if (n != '\n') {
+		if (n != '\n')
+		{
 			fclose(f);
 			return 0;
 		}
@@ -852,7 +856,8 @@ static void write_bus_latency_file_content(void)
 			{
 				latency = 0.0;
 			}
-			else {
+			else
+			{
 				/* µs */
                                 latency = ((src && dst)?2000.0:500.0);
 			}
@@ -925,13 +930,15 @@ static int load_bus_bandwidth_file_content(void)
 			double bandwidth;
 
 			n = fscanf(f, "%lf", &bandwidth);
-			if (n != 1) {
+			if (n != 1)
+			{
 				fprintf(stderr,"didn't get a number\n");
 				fclose(f);
 				return 0;
 			}
 			n = getc(f);
-			if (n != '\t') {
+			if (n != '\t')
+			{
 				fclose(f);
 				return 0;
 			}
@@ -940,7 +947,8 @@ static int load_bus_bandwidth_file_content(void)
 		}
 
 		n = getc(f);
-		if (n != '\n') {
+		if (n != '\n')
+		{
 			fclose(f);
 			return 0;
 		}
@@ -1010,7 +1018,8 @@ static void write_bus_bandwidth_file_content(void)
 				bandwidth = 1.0/slowness;
 			}
 #endif
-			else {
+			else
+			{
 			        /* convention */
 			        bandwidth = 0.0;
 			}
@@ -1094,12 +1103,14 @@ static void check_bus_config_file()
 
         get_config_path(path, 256);
         res = access(path, F_OK);
-        if (res) {
+        if (res)
+	{
 		fprintf(stderr, "No performance model for the bus, calibrating...");
 		starpu_force_bus_sampling();
 		fprintf(stderr, "done\n");
         }
-        else {
+        else
+	{
                 FILE *f;
                 int ret, read_cuda, read_opencl;
                 unsigned read_cpus;
@@ -1130,17 +1141,20 @@ static void check_bus_config_file()
 #endif
 
                 // Checking if both configurations match
-                if (read_cpus != ncpus) {
+                if (read_cpus != ncpus)
+		{
 			fprintf(stderr, "Current configuration does not match the bus performance model (CPUS: (stored) %u != (current) %u), recalibrating...", read_cpus, ncpus);
                         starpu_force_bus_sampling();
 			fprintf(stderr, "done\n");
                 }
-                else if (read_cuda != ncuda) {
+                else if (read_cuda != ncuda)
+		{
                         fprintf(stderr, "Current configuration does not match the bus performance model (CUDA: (stored) %d != (current) %d), recalibrating...", read_cuda, ncuda);
                         starpu_force_bus_sampling();
 			fprintf(stderr, "done\n");
                 }
-                else if (read_opencl != nopencl) {
+                else if (read_opencl != nopencl)
+		{
                         fprintf(stderr, "Current configuration does not match the bus performance model (OpenCL: (stored) %d != (current) %d), recalibrating...", read_opencl, nopencl);
                         starpu_force_bus_sampling();
 			fprintf(stderr, "done\n");

+ 109 - 79
src/core/perfmodel/perfmodel_history.c

@@ -44,8 +44,6 @@ static struct starpu_model_list *registered_models = NULL;
 /*
  * History based model
  */
-
-
 static void insert_history_entry(struct starpu_history_entry *entry, struct starpu_history_list **list, struct starpu_htbl32_node **history_ptr)
 {
 	struct starpu_history_list *link;
@@ -61,7 +59,6 @@ static void insert_history_entry(struct starpu_history_entry *entry, struct star
 	STARPU_ASSERT(old == NULL);
 }
 
-
 static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, unsigned arch, unsigned nimpl)
 {
 	struct starpu_per_arch_perfmodel *per_arch_model;
@@ -193,7 +190,8 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel
 
 	/* parse cpu entries */
 	unsigned i;
-	for (i = 0; i < nentries; i++) {
+	for (i = 0; i < nentries; i++)
+	{
 		struct starpu_history_entry *entry = NULL;
 		if (scan_history)
 		{
@@ -202,7 +200,7 @@ static void parse_per_arch_model_file(FILE *f, struct starpu_per_arch_perfmodel
 		}
 
 		scan_history_entry(f, entry);
-		
+
 		/* insert the entry in the hashtable and the list structures  */
 		if (scan_history)
 			insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history);
@@ -214,36 +212,44 @@ static void parse_arch(FILE *f, struct starpu_perfmodel *model, unsigned scan_hi
 	struct starpu_per_arch_perfmodel dummy;
 	int nimpls, implmax, skipimpl, impl;
 	unsigned ret, arch;
-	
 
-	for (arch = archmin; arch < archmax; arch++) {
+	for (arch = archmin; arch < archmax; arch++)
+	{
 		_starpu_drop_comments(f);
 		ret = fscanf(f, "%d\n", &nimpls);
 		STARPU_ASSERT(ret == 1);
 		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
 		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
-		for (impl = 0; impl < implmax; impl++) {
+		for (impl = 0; impl < implmax; impl++)
+		{
 			parse_per_arch_model_file(f, &model->per_arch[arch][impl], scan_history);
 		}
-		if (skipimpl > 0) {
-			for (impl = 0; impl < skipimpl; impl++) {
+		if (skipimpl > 0)
+		{
+			for (impl = 0; impl < skipimpl; impl++)
+			{
 				parse_per_arch_model_file(f, &dummy, 0);
 			}
 		}
 	}
 
-	if (skiparch > 0) {
+	if (skiparch > 0)
+	{
 		_starpu_drop_comments(f);
 		ret = fscanf(f, "%d\n", &nimpls);
 		STARPU_ASSERT(ret == 1);
 		implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS);
 		skipimpl = nimpls - STARPU_MAXIMPLEMENTATIONS;
-		for (arch = 0; arch < skiparch; arch ++) {
-			for (impl = 0; impl < implmax; impl++) {
+		for (arch = 0; arch < skiparch; arch ++)
+		{
+			for (impl = 0; impl < implmax; impl++)
+			{
 				parse_per_arch_model_file(f, &dummy, 0);
 			}
-			if (skipimpl > 0) {
-				for (impl = 0; impl < skipimpl; impl++) {
+			if (skipimpl > 0)
+			{
+				for (impl = 0; impl < skipimpl; impl++)
+				{
 					parse_per_arch_model_file(f, &dummy, 0);
 				}
 			}
@@ -270,9 +276,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	if (narchs > 0)
 	{
 		parse_arch(f, model, scan_history,
-				archmin,
-				STARPU_MIN(narchs, STARPU_MAXCPUS),
-				narchs - STARPU_MAXCPUS);
+			   archmin,
+			   STARPU_MIN(narchs, STARPU_MAXCPUS),
+			   narchs - STARPU_MAXCPUS);
 	}
 
 	/* Parsing CUDA devs */
@@ -284,9 +290,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	if (narchs > 0)
 	{
 		parse_arch(f, model, scan_history,
-				archmin,
-				archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
-				narchs - STARPU_MAXCUDADEVS);
+			   archmin,
+			   archmin + STARPU_MIN(narchs, STARPU_MAXCUDADEVS),
+			   narchs - STARPU_MAXCUDADEVS);
 	}
 
 	/* Parsing OpenCL devs */
@@ -299,9 +305,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	if (narchs > 0)
 	{
 		parse_arch(f, model, scan_history,
-				archmin,
-				archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
-				narchs - STARPU_MAXOPENCLDEVS);
+			   archmin,
+			   archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
+			   narchs - STARPU_MAXOPENCLDEVS);
 	}
 
 	/* Parsing Gordon implementations */
@@ -314,9 +320,9 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 	if (narchs > 0)
 	{
 		parse_arch(f, model, scan_history,
-				archmin,
-				archmin + max_gordondevs,
-				narchs - max_gordondevs);
+			   archmin,
+			   archmin + max_gordondevs,
+			   narchs - max_gordondevs);
 	}
 }
 
@@ -334,7 +340,8 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 	{
 		/* Dump the list of all entries in the history */
 		ptr = per_arch_model->list;
-		while(ptr) {
+		while(ptr)
+		{
 			nentries++;
 			ptr = ptr->next;
 		}
@@ -353,7 +360,8 @@ static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, un
 	{
 		fprintf(f, "# hash\t\tsize\t\tmean\t\tdev\t\tsum\t\tsum2\t\tn\n");
 		ptr = per_arch_model->list;
-		while (ptr) {
+		while (ptr)
+		{
 			dump_history_entry(f, ptr->entry);
 			ptr = ptr->next;
 		}
@@ -374,7 +382,8 @@ static unsigned get_n_entries(struct starpu_perfmodel *model, unsigned arch, uns
 	{
 		/* Dump the list of all entries in the history */
 		ptr = per_arch_model->list;
-		while(ptr) {
+		while(ptr)
+		{
 			nentries++;
 			ptr = ptr->next;
 		}
@@ -403,21 +412,25 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 				break;
 		}
 
-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED) {
+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+		{
 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 				if (get_n_entries(model, arch, nimpl))
 				{
 					number_of_archs[idx]++;
 					break;
 				}
-		} else if (model->type == STARPU_REGRESSION_BASED) {
+		}
+		else if (model->type == STARPU_REGRESSION_BASED)
+		{
 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 				if (model->per_arch[arch][nimpl].regression.nsample)
 				{
 					number_of_archs[idx]++;
 					break;
 				}
-		} else
+		}
+		else
 			STARPU_ASSERT(!"Unknown history-based performance model");
 	}
 
@@ -464,15 +477,19 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 		}
 
 		unsigned max_impl = 0;
-		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED) {
+		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
+		{
 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 				if (get_n_entries(model, arch, nimpl))
 					max_impl = nimpl + 1;
-		} else if (model->type == STARPU_REGRESSION_BASED) {
+		}
+		else if (model->type == STARPU_REGRESSION_BASED)
+		{
 			for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 				if (model->per_arch[arch][nimpl].regression.nsample)
 					max_impl = nimpl + 1;
-		} else
+		}
+		else
 			STARPU_ASSERT(!"Unknown history-based performance model");
 
 		if (max_impl == 0)
@@ -514,7 +531,7 @@ static void get_model_debug_path(struct starpu_perfmodel *model, const char *arc
 
 	_starpu_get_perf_model_dir_debug(path, maxlen);
 	strncat(path, model->symbol, maxlen);
-	
+
 	char hostname[32];
 	char *forced_hostname = getenv("STARPU_HOSTNAME");
 	if (forced_hostname && forced_hostname[0])
@@ -535,7 +552,8 @@ int _starpu_register_model(struct starpu_perfmodel *model)
 {
 	/* If the model has already been loaded, there is nothing to do */
 	_STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
-	if (model->is_loaded) {
+	if (model->is_loaded)
+	{
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 		return 0;
 	}
@@ -544,7 +562,8 @@ int _starpu_register_model(struct starpu_perfmodel *model)
 	/* We have to make sure the model has not been loaded since the
          * last time we took the lock */
 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&registered_models_rwlock);
-	if (model->is_loaded) {
+	if (model->is_loaded)
+	{
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 		return 0;
 	}
@@ -565,8 +584,10 @@ int _starpu_register_model(struct starpu_perfmodel *model)
 	unsigned arch;
 	unsigned nimpl;
 
-	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++) {
-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
+	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
+	{
+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		{
 			char debugpath[256];
 			starpu_perfmodel_debugfilepath(model, arch, debugpath, 256, nimpl);
 			model->per_arch[arch][nimpl].debug_file = fopen(debugpath, "a+");
@@ -583,7 +604,7 @@ static void get_model_path(struct starpu_perfmodel *model, char *path, size_t ma
 {
 	_starpu_get_perf_model_dir_codelets(path, maxlen);
 	strncat(path, model->symbol, maxlen);
-	
+
 	char hostname[32];
 	char *forced_hostname = getenv("STARPU_HOSTNAME");
 	if (forced_hostname && forced_hostname[0])
@@ -626,8 +647,9 @@ static void _starpu_dump_registered_models(void)
 
 	_STARPU_DEBUG("DUMP MODELS !\n");
 
-	while (node) {
-		save_history_based_model(node->model);		
+	while (node)
+	{
+		save_history_based_model(node->model);
 		node = node->next;
 
 		/* XXX free node */
@@ -657,12 +679,11 @@ void _starpu_deinitialize_registered_performance_models(void)
  * is still not loaded once we have the lock, we do load it.  */
 void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history)
 {
-
 	STARPU_ASSERT(model);
 	STARPU_ASSERT(model->symbol);
-	
+
 	int already_loaded;
- 
+
 	_STARPU_PTHREAD_RWLOCK_RDLOCK(&registered_models_rwlock);
 	already_loaded = model->is_loaded;
 	_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
@@ -681,7 +702,7 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&registered_models_rwlock);
 		return;
 	}
-	
+
 	_STARPU_PTHREAD_RWLOCK_INIT(&model->model_rwlock, NULL);
 
 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&model->model_rwlock);
@@ -695,12 +716,13 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
 	_STARPU_DEBUG("Opening performance model file %s for model %s ... ", path, model->symbol);
 
 	unsigned calibrate_flag = _starpu_get_calibrate_flag();
-	model->benchmarking = calibrate_flag; 
-	
+	model->benchmarking = calibrate_flag;
+
 	/* try to open an existing file and load it */
 	int res;
-	res = access(path, F_OK); 
-	if (res == 0) {
+	res = access(path, F_OK);
+	if (res == 0)
+	{
 		if (calibrate_flag == 2)
 		{
 			/* The user specified that the performance model should
@@ -709,21 +731,24 @@ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned s
                         _STARPU_DEBUG("Overwrite existing file\n");
 			initialize_model(model);
 		}
-		else {
+		else
+		{
 			/* We load the available file */
 			_STARPU_DEBUG("File exists\n");
 			FILE *f;
 			f = fopen(path, "r");
 			STARPU_ASSERT(f);
-	
+
 			parse_model_file(f, model, scan_history);
-	
+
 			fclose(f);
 		}
 	}
-	else {
+	else
+	{
 		_STARPU_DEBUG("File does not exists\n");
-		if (!calibrate_flag) {
+		if (!calibrate_flag)
+		{
 			_STARPU_DISP("Warning: model %s is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
 			_starpu_set_calibrate_flag(1);
 			model->benchmarking = 1;
@@ -753,16 +778,19 @@ int starpu_list_models(FILE *output)
 
         strncpy(path, perf_model_dir_codelets, 256);
         dp = opendir(path);
-        if (dp != NULL) {
-                while ((ep = readdir(dp))) {
+        if (dp != NULL)
+	{
+                while ((ep = readdir(dp)))
+		{
                         if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, ".."))
                                 fprintf(output, "file: <%s>\n", ep->d_name);
                 }
                 closedir (dp);
                 return 0;
         }
-        else {
-                perror ("Couldn't open the directory");
+        else
+	{
+                perror("Couldn't open the directory");
                 return 1;
         }
 }
@@ -782,9 +810,11 @@ int starpu_load_history_debug(const char *symbol, struct starpu_perfmodel *model
 	/* does it exist ? */
 	int res;
 	res = access(path, F_OK);
-	if (res) {
+	if (res)
+	{
 		char *dot = strrchr(symbol, '.');
-		if (dot) {
+		if (dot)
+		{
 			char *symbol2 = strdup(symbol);
 			symbol2[dot-symbol] = '\0';
 			int ret;
@@ -846,7 +876,7 @@ void starpu_perfmodel_get_arch_name(enum starpu_perf_archtype arch, char *archna
 }
 
 void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model,
-		enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
+				    enum starpu_perf_archtype arch, char *path, size_t maxlen, unsigned nimpl)
 {
 	char archname[32];
 	starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl);
@@ -880,7 +910,8 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
 	if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1)
 		exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c;
-	else {
+	else
+	{
 		uint32_t key = _starpu_compute_buffers_footprint(j);
 		struct starpu_per_arch_perfmodel *per_arch_model = &model->per_arch[arch][nimpl];
 		struct starpu_htbl32_node *history = per_arch_model->history;
@@ -892,7 +923,8 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
 		if (entry && entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
 			exp = entry->mean;
-		else if (!model->benchmarking) {
+		else if (!model->benchmarking)
+		{
 			_STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
 			_starpu_set_calibrate_flag(1);
 			model->benchmarking = 1;
@@ -929,7 +961,8 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 		/* Not calibrated enough */
 		exp = -1.0;
 
-	if (exp == -1.0 && !model->benchmarking) {
+	if (exp == -1.0 && !model->benchmarking)
+	{
 		_STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
 		_starpu_set_calibrate_flag(1);
 		model->benchmarking = 1;
@@ -948,14 +981,11 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
 		{
-			uint32_t key = _starpu_compute_buffers_footprint(j);
 			struct starpu_history_entry *entry;
-
 			struct starpu_htbl32_node *history;
 			struct starpu_htbl32_node **history_ptr;
-
 			struct starpu_history_list **list;
-
+			uint32_t key = _starpu_compute_buffers_footprint(j);
 
 			history = per_arch_model->history;
 			history_ptr = &per_arch_model->history;
@@ -982,7 +1012,8 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				insert_history_entry(entry, list, history_ptr);
 
 			}
-			else {
+			else
+			{
 				/* there is already some entry with the same footprint */
 				entry->sum += measured;
 				entry->sum2 += measured*measured;
@@ -992,10 +1023,10 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 				entry->mean = entry->sum / n;
 				entry->deviation = sqrt((entry->sum2 - (entry->sum*entry->sum)/n)/n);
 			}
-			
+
 			STARPU_ASSERT(entry);
 		}
-			
+
 		if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED)
 		{
 			struct starpu_regression_model *reg_model;
@@ -1018,7 +1049,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			reg_model->nsample++;
 
 			unsigned n = reg_model->nsample;
-			
+
 			double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny);
 			double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx);
 
@@ -1035,12 +1066,12 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 		if (!j->footprint_is_computed)
 			(void) _starpu_compute_buffers_footprint(j);
-			
+
 		STARPU_ASSERT(j->footprint_is_computed);
 
 		fprintf(debug_file, "0x%x\t%lu\t%f\t%f\t%f\t%d\t\t", j->footprint, (unsigned long) _starpu_job_get_data_size(j), measured, task->predicted, task->predicted_transfer, cpuid);
 		unsigned i;
-			
+
 		for (i = 0; i < task->cl->nbuffers; i++)
 		{
 			starpu_data_handle_t handle = task->buffers[i].handle;
@@ -1049,10 +1080,9 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 			STARPU_ASSERT(handle->ops->display);
 			handle->ops->display(handle, debug_file);
 		}
-		fprintf(debug_file, "\n");	
+		fprintf(debug_file, "\n");
 
 #endif
-		
 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
 	}
 }

+ 11 - 9
src/core/perfmodel/regression.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -117,7 +117,8 @@ static unsigned find_list_size(struct starpu_history_list *list_history)
 	unsigned cnt = 0;
 
 	struct starpu_history_list *ptr = list_history;
-	while (ptr) {
+	while (ptr)
+	{
 		cnt++;
 		ptr = ptr->next;
 	}
@@ -143,7 +144,8 @@ static void dump_list(unsigned *x, double *y, struct starpu_history_list *list_h
 	struct starpu_history_list *ptr = list_history;
 	unsigned i = 0;
 
-	while (ptr) {
+	while (ptr)
+	{
 		x[i] = ptr->entry->size;
 		y[i] = ptr->entry->mean;
 
@@ -153,7 +155,7 @@ static void dump_list(unsigned *x, double *y, struct starpu_history_list *list_h
 }
 
 
-/* y = ax^b + c 
+/* y = ax^b + c
  * 	return 0 if success, -1 otherwise
  * 	if success, a, b and c are modified
  * */
@@ -171,7 +173,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
 	double cmin = 0.0;
 	double cmax = find_list_min(y, n);
-	
+
 	unsigned iter;
 
 	double err = 100000.0;
@@ -180,7 +182,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 	{
 		double c1, c2;
 		double r1, r2;
-		
+
 		double radius = 0.01;
 
 		c1 = cmin + (0.5-radius)*(cmax - cmin);
@@ -197,7 +199,8 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 		{
 			cmax = (cmin + cmax)/2;
 		}
-		else {
+		else
+		{
 			/* 2 is better */
 			cmin = (cmin + cmax)/2;
 		}
@@ -210,7 +213,7 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
 	*c = (cmin + cmax)/2;
 
-	*b = compute_b(*c, n, x, y); 
+	*b = compute_b(*c, n, x, y);
 	*a = exp(compute_a(*c, *b, n, x, y));
 
 	free(x);
@@ -218,4 +221,3 @@ int _starpu_regression_non_linear_power(struct starpu_history_list *ptr, double
 
 	return 0;
 }
-

+ 4 - 3
src/core/progress_hook.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,7 +21,8 @@
 
 #define NMAXHOOKS	16
 
-struct progression_hook {
+struct progression_hook
+{
 	unsigned (*func)(void *arg);
 	void *arg;
 	unsigned active;
@@ -48,7 +49,7 @@ int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg)
 			active_hook_cnt++;
 
 			_STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock);
-			
+
 			return hook;
 		}
 	}

+ 15 - 10
src/core/sched_policy.c

@@ -49,7 +49,8 @@ extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy;
 extern struct starpu_sched_policy _starpu_sched_pgreedy_policy;
 extern struct starpu_sched_policy heft_policy;
 
-static struct starpu_sched_policy *predefined_policies[] = {
+static struct starpu_sched_policy *predefined_policies[] =
+{
 	&_starpu_sched_ws_policy,
 	&_starpu_sched_prio_policy,
 	&_starpu_sched_dm_policy,
@@ -98,7 +99,6 @@ static void load_sched_policy(struct starpu_sched_policy *sched_policy)
 
 static struct starpu_sched_policy *find_sched_policy_from_name(const char *policy_name)
 {
-
 	if (!policy_name)
 		return NULL;
 
@@ -109,7 +109,8 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 		p = predefined_policies[i];
 		if (p->policy_name)
 		{
-			if (strcmp(policy_name, p->policy_name) == 0) {
+			if (strcmp(policy_name, p->policy_name) == 0)
+			{
 				/* we found a policy with the requested name */
 				return p;
 			}
@@ -124,7 +125,8 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 static void display_sched_help_message(void)
 {
 	const char *sched_env = getenv("STARPU_SCHED");
-	if (sched_env && (strcmp(sched_env, "help") == 0)) {
+	if (sched_env && (strcmp(sched_env, "help") == 0))
+	{
 		fprintf(stderr, "STARPU_SCHED can be either of\n");
 
 		/* display the description of all predefined policies */
@@ -153,7 +155,8 @@ static struct starpu_sched_policy *select_sched_policy(struct _starpu_machine_co
 	{
 		sched_pol_name = user_conf->sched_policy_name;
 	}
-	else {
+	else
+	{
 		sched_pol_name = getenv("STARPU_SCHED");
 	}
 
@@ -184,7 +187,8 @@ void _starpu_init_sched_policy(struct _starpu_machine_config *config)
 	{
 		do_calibrate = config->user_conf->calibrate;
 	}
-	else {
+	else
+	{
 		int res = starpu_get_env_number("STARPU_CALIBRATE");
 		do_calibrate =  (res < 0)?0:(unsigned)res;
 	}
@@ -215,7 +219,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 	/* Is this a basic worker or a combined worker ? */
 	int is_basic_worker = (workerid < nbasic_workers);
 
-	unsigned memory_node; 
+	unsigned memory_node;
 	struct _starpu_worker *worker = NULL;
 	struct _starpu_combined_worker *combined_worker = NULL;
 
@@ -240,7 +244,8 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 	{
 		return _starpu_push_local_task(worker, task, 0);
 	}
-	else {
+	else
+	{
 		/* This is a combined worker so we create task aliases */
 		int worker_size = combined_worker->worker_size;
 		int *combined_workerid = combined_worker->combined_workerid;
@@ -292,7 +297,8 @@ int _starpu_push_task(struct _starpu_job *j, unsigned job_is_already_locked)
 	{
 		ret = _starpu_push_task_on_specific_worker(task, task->workerid);
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(policy.push_task);
 		ret = policy.push_task(task);
 	}
@@ -385,4 +391,3 @@ int starpu_push_local_task(int workerid, struct starpu_task *task, int back)
 	return _starpu_push_local_task(worker, task, back);
 }
 
-

+ 31 - 23
src/core/task.c

@@ -138,18 +138,18 @@ void starpu_task_destroy(struct starpu_task *task)
    /* If starpu_task_destroy is called in a callback, we just set the destroy
       flag. The task will be destroyed after the callback returns */
    if (task == starpu_get_current_task()
-       && _starpu_get_local_worker_status() == STATUS_CALLBACK) {
+       && _starpu_get_local_worker_status() == STATUS_CALLBACK)
+   {
 
-      task->destroy = 1;
+	   task->destroy = 1;
 
-   } else {
-
-      starpu_task_deinit(task);
-
-      /* TODO handle the case of task with detach = 1 and destroy = 1 */
-      /* TODO handle the case of non terminated tasks -> return -EINVAL */
-	
-      free(task);
+   }
+   else
+   {
+	   starpu_task_deinit(task);
+	   /* TODO handle the case of task with detach = 1 and destroy = 1 */
+	   /* TODO handle the case of non terminated tasks -> return -EINVAL */
+	   free(task);
    }
 }
 
@@ -158,13 +158,15 @@ int starpu_task_wait(struct starpu_task *task)
         _STARPU_LOG_IN();
 	STARPU_ASSERT(task);
 
-	if (task->detach || task->synchronous) {
+	if (task->detach || task->synchronous)
+	{
 		_STARPU_DEBUG("Task is detached or asynchronous. Waiting returns immediately\n");
 		_STARPU_LOG_OUT_TAG("einval");
 		return -EINVAL;
 	}
 
-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
+	{
 		_STARPU_LOG_OUT_TAG("edeadlk");
 		return -EDEADLK;
 	}
@@ -208,7 +210,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 	_starpu_increment_nsubmitted_tasks();
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
-	
+
 	j->submitted = 1;
 
 	int ret = _starpu_enforce_deps_and_schedule(j, 1);
@@ -232,7 +234,8 @@ int starpu_task_submit(struct starpu_task *task)
 	{
 		/* Perhaps it is not possible to submit a synchronous
 		 * (blocking) task */
-                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
+                if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
+		{
                         _STARPU_LOG_OUT_TAG("EDEADLK");
 			return -EDEADLK;
                 }
@@ -245,12 +248,14 @@ int starpu_task_submit(struct starpu_task *task)
 	{
 		uint32_t where = task->cl->where;
 		unsigned i;
-		if (!_starpu_worker_exists(where)) {
+		if (!_starpu_worker_exists(where))
+		{
                         _STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
                 }
 		assert(task->cl->nbuffers <= STARPU_NMAXBUFS);
-		for (i = 0; i < task->cl->nbuffers; i++) {
+		for (i = 0; i < task->cl->nbuffers; i++)
+		{
 			/* Make sure handles are not partitioned */
 			assert(task->buffers[i].handle->nchildren == 0);
 		}
@@ -258,7 +263,8 @@ int starpu_task_submit(struct starpu_task *task)
 		/* In case we require that a task should be explicitely
 		 * executed on a specific worker, we make sure that the worker
 		 * is able to execute this task.  */
-		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0)) {
+		if (task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))
+		{
                         _STARPU_LOG_OUT_TAG("ENODEV");
 			return -ENODEV;
                 }
@@ -313,7 +319,7 @@ void starpu_display_codelet_stats(struct starpu_codelet *cl)
 		fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol);
 
 	unsigned long total = 0;
-	
+
 	for (worker = 0; worker < nworkers; worker++)
 		total += cl->per_worker_stats[worker];
 
@@ -342,7 +348,7 @@ int starpu_task_wait_for_all(void)
 
 	while (nsubmitted > 0)
 		_STARPU_PTHREAD_COND_WAIT(&submitted_cond, &submitted_mutex);
-	
+
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
 
 	return 0;
@@ -363,7 +369,7 @@ int starpu_task_wait_for_no_ready(void)
 
 	while (nready > 0)
 		_STARPU_PTHREAD_COND_WAIT(&submitted_cond, &submitted_mutex);
-	
+
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&submitted_mutex);
 
 	return 0;
@@ -436,16 +442,18 @@ double _starpu_task_get_conversion_time(struct starpu_task *task)
 	int i;
 	double conversion_time = 0.0;
 
-	for (i = 0; i < task->cl->nbuffers; i++) {
+	for (i = 0; i < task->cl->nbuffers; i++)
+	{
 		starpu_data_handle_t handle = task->buffers[i].handle;
 		enum starpu_data_interface_id id = starpu_get_handle_interface_id(handle);
-		if (id == STARPU_MULTIFORMAT_INTERFACE_ID) {
+		if (id == STARPU_MULTIFORMAT_INTERFACE_ID)
+		{
 			struct starpu_multiformat_interface *tmp;
 			uint32_t node = starpu_worker_get_memory_node(task->workerid);
 			tmp = starpu_data_get_interface_on_node(handle, node);
 			conversion_time += tmp->conversion_time;
 			/* XXX : this may not be the right place to reset this field,
-			 * but we need to make sure the conversion time won't be counted 
+			 * but we need to make sure the conversion time won't be counted
                          * twice */
 			tmp->conversion_time = 0;
 		}

+ 14 - 8
src/core/task_bundle.c

@@ -88,7 +88,8 @@ int starpu_task_bundle_insert(struct starpu_task_bundle *bundle, struct starpu_t
 	{
 		bundle->list = entry;
 	}
-	else {
+	else
+	{
 		struct starpu_task_bundle_entry *item;
 		item = bundle->list;
 		while (item->next)
@@ -188,7 +189,8 @@ double starpu_task_bundle_expected_length(struct starpu_task_bundle *bundle,  en
 	struct starpu_task_bundle_entry *entry;
 	entry = bundle->list;
 
-	while (entry) {
+	while (entry)
+	{
 		double task_length = starpu_task_expected_length(entry->task, arch, nimpl);
 
 		/* In case the task is not calibrated, we consider the task
@@ -198,7 +200,7 @@ double starpu_task_bundle_expected_length(struct starpu_task_bundle *bundle,  en
 
 		entry = entry->next;
 	}
-	
+
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
 
 	return expected_length;
@@ -215,7 +217,8 @@ double starpu_task_bundle_expected_power(struct starpu_task_bundle *bundle,  enu
 	struct starpu_task_bundle_entry *entry;
 	entry = bundle->list;
 
-	while (entry) {
+	while (entry)
+	{
 		double task_power = starpu_task_expected_power(entry->task, arch, nimpl);
 
 		/* In case the task is not calibrated, we consider the task
@@ -225,13 +228,14 @@ double starpu_task_bundle_expected_power(struct starpu_task_bundle *bundle,  enu
 
 		entry = entry->next;
 	}
-	
+
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
 
 	return expected_power;
 }
 
-struct handle_list {
+struct handle_list
+{
 	starpu_data_handle_t handle;
 	enum starpu_access_mode mode;
 	struct handle_list *next;
@@ -271,7 +275,8 @@ static void insertion_handle_sorted(struct handle_list **listp, starpu_data_hand
 		/* The handle is already in the list */
 		prev->mode |= mode;
 	}
-	else {
+	else
+	{
 		/* The handle was not in the list, we insert it after prev */
 		struct handle_list *link = (struct handle_list *) malloc(sizeof(struct handle_list));
 		STARPU_ASSERT(link);
@@ -293,7 +298,8 @@ double starpu_task_bundle_expected_data_transfer_time(struct starpu_task_bundle
 
 	/* For each task in the bundle */
 	struct starpu_task_bundle_entry *entry = bundle->list;
-	while (entry) {
+	while (entry)
+	{
 		struct starpu_task *task = entry->task;
 
 		if (task->cl)

+ 79 - 42
src/core/topology.c

@@ -42,7 +42,6 @@
 #define hwloc_bitmap_singlify hwloc_cpuset_singlify
 #endif
 
-		
 static unsigned topology_is_initialized = 0;
 
 static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config);
@@ -92,9 +91,11 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
                 unsigned tmp[STARPU_NMAXWORKERS];
                 unsigned nb=0;
                 int i;
-                for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
+                for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
+		{
                         uint32_t key = _starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
-                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL) {
+                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL)
+			{
                                 tmp[nb] = topology->workers_opencl_gpuid[i];
                                 nb++;
                         }
@@ -110,9 +111,11 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
                 unsigned nb=0;
                 int i;
 
-                for(i=0 ; i<STARPU_NMAXWORKERS ; i++) {
+                for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
+		{
                         uint32_t key = _starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
-                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL) {
+                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL)
+			{
                                 _starpu_htbl_insert_32(&devices_already_used, key, config);
                                 tmp[nb] = topology->workers_opencl_gpuid[i];
                                 nb ++;
@@ -161,7 +164,8 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 		/* we use the content of the STARPU_WORKERS_CUDAID env. variable */
 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
 		{
-			if (!wrap) {
+			if (!wrap)
+			{
 				long int val;
 				val = strtol(strval, &endptr, 10);
 				if (endptr != strval)
@@ -169,18 +173,20 @@ static void _starpu_initialize_workers_gpuid(int use_explicit_workers_gpuid, int
 					workers_gpuid[i] = (unsigned)val;
 					strval = endptr;
 				}
-				else {
+				else
+				{
 					/* there must be at least one entry */
 					STARPU_ASSERT(i != 0);
 					number_of_entries = i;
-	
+
 					/* there is no more values in the string */
 					wrap = 1;
 
 					workers_gpuid[i] = workers_gpuid[0];
 				}
 			}
-			else {
+			else
+			{
 				workers_gpuid[i] = workers_gpuid[i % number_of_entries];
 			}
 		}
@@ -262,7 +268,7 @@ static void _starpu_init_topology(struct _starpu_machine_config *config)
 unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config)
 {
 	_starpu_init_topology(config);
-	
+
 	return config->topology.nhwcpus;
 }
 
@@ -290,7 +296,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 		/* the user explicitely disabled CUDA */
 		topology->ncudagpus = 0;
 	}
-	else {
+	else
+	{
 		/* we need to initialize CUDA early to count the number of devices */
 		_starpu_init_cuda();
 
@@ -298,16 +305,21 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 		{
 			explicitval = user_conf->ncuda;
 		}
-		else {
+		else
+		{
 			explicitval = starpu_get_env_number("STARPU_NCUDA");
 		}
 
-		if (explicitval < 0) {
+		if (explicitval < 0)
+		{
 			config->topology.ncudagpus =
 				STARPU_MIN(_starpu_get_cuda_device_count(), STARPU_MAXCUDADEVS);
-		} else {
+		}
+		else
+		{
 			/* use the specified value */
-			if (explicitval > STARPU_MAXCUDADEVS) {
+			if (explicitval > STARPU_MAXCUDADEVS)
+			{
 				fprintf(stderr,"# Warning: %d CUDA devices requested. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", explicitval, STARPU_MAXCUDADEVS);
 				explicitval = STARPU_MAXCUDADEVS;
 			}
@@ -329,7 +341,7 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 		int devid = _starpu_get_next_cuda_gpuid(config);
 		enum starpu_perf_archtype arch = STARPU_CUDA_DEFAULT + devid;
 		config->workers[topology->nworkers + cudagpu].devid = devid;
-		config->workers[topology->nworkers + cudagpu].perf_arch = arch; 
+		config->workers[topology->nworkers + cudagpu].perf_arch = arch;
 		config->workers[topology->nworkers + cudagpu].worker_mask = STARPU_CUDA;
 		config->worker_mask |= STARPU_CUDA;
 
@@ -346,7 +358,8 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 		/* the user explicitely disabled OpenCL */
 		topology->nopenclgpus = 0;
 	}
-	else {
+	else
+	{
 		/* we need to initialize OpenCL early to count the number of devices */
 		int nb_devices;
 		_starpu_opencl_init();
@@ -356,20 +369,25 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 		{
 			explicitval = user_conf->nopencl;
 		}
-		else {
+		else
+		{
 			explicitval = starpu_get_env_number("STARPU_NOPENCL");
 		}
 
 
-		if (explicitval < 0) {
+		if (explicitval < 0)
+		{
 			topology->nopenclgpus = nb_devices;
 		}
-		else {
-			if (explicitval > nb_devices) {
+		else
+		{
+			if (explicitval > nb_devices)
+			{
 				/* The user requires more OpenCL devices than there is available */
 				topology->nopenclgpus = nb_devices;
 			}
-			else {
+			else
+			{
 				/* use the specified value */
 				topology->nopenclgpus = (unsigned)explicitval;
 			}
@@ -388,32 +406,38 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 	for (openclgpu = 0; openclgpu < topology->nopenclgpus; openclgpu++)
 	{
 		int devid = _starpu_get_next_opencl_gpuid(config);
-		if (devid == -1) { // There is no more devices left
+		if (devid == -1)
+		{ // There is no more devices left
 			topology->nopenclgpus = openclgpu;
 			break;
 		}
 		config->workers[topology->nworkers + openclgpu].arch = STARPU_OPENCL_WORKER;
 		enum starpu_perf_archtype arch = STARPU_OPENCL_DEFAULT + devid;
 		config->workers[topology->nworkers + openclgpu].devid = devid;
-		config->workers[topology->nworkers + openclgpu].perf_arch = arch; 
+		config->workers[topology->nworkers + openclgpu].perf_arch = arch;
 		config->workers[topology->nworkers + openclgpu].worker_mask = STARPU_OPENCL;
 		config->worker_mask |= STARPU_OPENCL;
 	}
 
 	topology->nworkers += topology->nopenclgpus;
 #endif
-	
+
 #ifdef STARPU_USE_GORDON
-	if (user_conf && (user_conf->ncuda != -1)) {
+	if (user_conf && (user_conf->ncuda != -1))
+	{
 		explicitval = user_conf->ncuda;
 	}
-	else {
+	else
+	{
 		explicitval = starpu_get_env_number("STARPU_NGORDON");
 	}
 
-	if (explicitval < 0) {
+	if (explicitval < 0)
+	{
 		topology->ngordon_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
-	} else {
+	}
+	else
+	{
 		/* use the specified value */
 		topology->ngordon_spus = (unsigned)explicitval;
 		STARPU_ASSERT(topology->ngordon_spus <= NMAXGORDONSPUS);
@@ -440,20 +464,25 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config,
 /* we put the CPU section after the accelerator : in case there was an
  * accelerator found, we devote one cpu */
 #ifdef STARPU_USE_CPU
-	if (user_conf && (user_conf->ncpus != -1)) {
+	if (user_conf && (user_conf->ncpus != -1))
+	{
 		explicitval = user_conf->ncpus;
 	}
-	else {
+	else
+	{
 		explicitval = starpu_get_env_number("STARPU_NCPUS");
 	}
 
-	if (explicitval < 0) {
+	if (explicitval < 0)
+	{
 		unsigned already_busy_cpus = (topology->ngordon_spus?1:0) + topology->ncudagpus + topology->nopenclgpus;
 		long avail_cpus = topology->nhwcpus - (use_accelerator?already_busy_cpus:0);
 		if (avail_cpus < 0)
 			avail_cpus = 0;
 		topology->ncpus = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
-	} else {
+	}
+	else
+	{
 		/* use the specified value */
 		topology->ncpus = (unsigned)explicitval;
 		STARPU_ASSERT(topology->ncpus <= STARPU_MAXCPUS);
@@ -521,7 +550,8 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 		/* we use the content of the STARPU_WORKERS_CUDAID env. variable */
 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
 		{
-			if (!wrap) {
+			if (!wrap)
+			{
 				long int val;
 				val = strtol(strval, &endptr, 10);
 				if (endptr != strval)
@@ -529,7 +559,8 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 					topology->workers_bindid[i] = (unsigned)(val % topology->nhwcpus);
 					strval = endptr;
 				}
-				else {
+				else
+				{
 					/* there must be at least one entry */
 					STARPU_ASSERT(i != 0);
 					number_of_entries = i;
@@ -540,7 +571,8 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 					topology->workers_bindid[i] = topology->workers_bindid[0];
 				}
 			}
-			else {
+			else
+			{
 				topology->workers_bindid[i] = topology->workers_bindid[i % number_of_entries];
 			}
 		}
@@ -632,7 +664,8 @@ void _starpu_bind_thread_on_cpu(struct _starpu_machine_config *config STARPU_ATT
 
 #elif defined(__MINGW32__) || defined(__CYGWIN__)
 	DWORD mask = 1 << cpuid;
-	if (!SetThreadAffinityMask(GetCurrentThread(), mask)) {
+	if (!SetThreadAffinityMask(GetCurrentThread(), mask))
+	{
 		fprintf(stderr,"SetThreadMaskAffinity(%lx) failed\n", mask);
 		STARPU_ABORT();
 	}
@@ -667,9 +700,10 @@ static void _starpu_init_workers_binding(struct _starpu_machine_config *config)
 		/* Perhaps the worker has some "favourite" bindings  */
 		int *preferred_binding = NULL;
 		int npreferred = 0;
-		
+
 		/* select the memory node that contains worker's memory */
-		switch (workerarg->arch) {
+		switch (workerarg->arch)
+		{
 			case STARPU_CPU_WORKER:
 			/* "dedicate" a cpu cpu to that worker */
 				is_a_set_of_accelerators = 0;
@@ -702,7 +736,8 @@ static void _starpu_init_workers_binding(struct _starpu_machine_config *config)
 				for (worker2 = 0; worker2 < worker; worker2++)
 				{
 					struct _starpu_worker *workerarg = &config->workers[worker];
-					if (workerarg->arch == STARPU_CUDA_WORKER) {
+					if (workerarg->arch == STARPU_CUDA_WORKER)
+					{
 						unsigned memory_node2 = starpu_worker_get_memory_node(worker2);
 						_starpu_register_bus(memory_node2, memory_node);
 						_starpu_register_bus(memory_node, memory_node2);
@@ -732,13 +767,15 @@ static void _starpu_init_workers_binding(struct _starpu_machine_config *config)
 				STARPU_ABORT();
 		}
 
-		if (is_a_set_of_accelerators) {
+		if (is_a_set_of_accelerators)
+		{
 			if (accelerator_bindid == -1)
 				accelerator_bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
 
 			workerarg->bindid = accelerator_bindid;
 		}
-		else {
+		else
+		{
 			workerarg->bindid = _starpu_get_next_bindid(config, preferred_binding, npreferred);
 		}
 

+ 52 - 36
src/core/workers.c

@@ -52,7 +52,7 @@ struct _starpu_machine_config *_starpu_get_machine_config(void)
 uint32_t _starpu_worker_exists(uint32_t task_mask)
 {
 	return (task_mask & config.worker_mask);
-} 
+}
 
 uint32_t _starpu_can_submit_cuda_task(void)
 {
@@ -71,7 +71,8 @@ uint32_t _starpu_can_submit_opencl_task(void)
 
 static int _starpu_can_use_nth_implementation(enum starpu_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
 {
-	switch(arch) {
+	switch(arch)
+	{
 	case STARPU_CPU_WORKER:
 		if (cl->cpu_func == STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
 			return cl->cpu_funcs[nimpl] != NULL;
@@ -119,7 +120,8 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 		return !!((task->cl->where & config.workers[workerid].worker_mask) &&
 				_starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
 	}
-	else {
+	else
+	{
 		if ((cl->type == STARPU_SPMD) || (cl->type == STARPU_FORKJOIN))
 		{
 			/* TODO we should add other types of constraints */
@@ -142,7 +144,7 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
  */
 
 #ifdef STARPU_USE_GORDON
-static unsigned gordon_inited = 0;	
+static unsigned gordon_inited = 0;
 static struct _starpu_worker_set gordon_worker_set;
 #endif
 
@@ -187,19 +189,20 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 		workerarg->terminated_jobs = _starpu_job_list_new();
 
 		starpu_task_list_init(&workerarg->local_tasks);
-	
+
 		workerarg->status = STATUS_INITIALIZING;
 
 		_STARPU_DEBUG("initialising worker %u\n", worker);
 
 		_starpu_init_worker_queue(workerarg);
 
-		switch (workerarg->arch) {
+		switch (workerarg->arch)
+		{
 #ifdef STARPU_USE_CPU
 			case STARPU_CPU_WORKER:
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
-				pthread_create(&workerarg->worker_thread, 
+				pthread_create(&workerarg->worker_thread,
 						NULL, _starpu_cpu_worker, workerarg);
 				break;
 #endif
@@ -207,7 +210,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 			case STARPU_CUDA_WORKER:
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
-				pthread_create(&workerarg->worker_thread, 
+				pthread_create(&workerarg->worker_thread,
 						NULL, _starpu_cuda_worker, workerarg);
 
 				break;
@@ -216,23 +219,23 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 			case STARPU_OPENCL_WORKER:
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
-				pthread_create(&workerarg->worker_thread, 
+				pthread_create(&workerarg->worker_thread,
 						NULL, _starpu_opencl_worker, workerarg);
 
 				break;
 #endif
 #ifdef STARPU_USE_GORDON
 			case STARPU_GORDON_WORKER:
-				/* we will only launch gordon once, but it will handle 
+				/* we will only launch gordon once, but it will handle
 				 * the different SPU workers */
 				if (!gordon_inited)
 				{
-					gordon_worker_set.nworkers = config->ngordon_spus; 
+					gordon_worker_set.nworkers = config->ngordon_spus;
 					gordon_worker_set.workers = &config->workers[worker];
 
 					gordon_worker_set.set_is_initialized = 0;
 
-					pthread_create(&gordon_worker_set.worker_thread, NULL, 
+					pthread_create(&gordon_worker_set.worker_thread, NULL,
 							_starpu_gordon_worker, &gordon_worker_set);
 
 					_STARPU_PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
@@ -243,7 +246,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 
 					gordon_inited = 1;
 				}
-				
+
 				workerarg->set = &gordon_worker_set;
 				gordon_worker_set.joined = 0;
 				workerarg->worker_is_running = 1;
@@ -259,10 +262,11 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *config)
 	{
 		struct _starpu_worker *workerarg = &config->workers[worker];
 
-		switch (workerarg->arch) {
+		switch (workerarg->arch)
+		{
 			case STARPU_CPU_WORKER:
 			case STARPU_CUDA_WORKER:
-			case STARPU_OPENCL_WORKER:			  
+			case STARPU_OPENCL_WORKER:
 				_STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
 				while (!workerarg->worker_is_initialized)
 					_STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
@@ -355,11 +359,12 @@ int starpu_init(struct starpu_conf *user_conf)
 		/* Wait for the other one changing it */
 		_STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
 	init_count++;
-	if (initialized == INITIALIZED) {
-	  /* He initialized it, don't do it again, and let the others get the mutex */
-	  _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
-	  return 0;
-	  }
+	if (initialized == INITIALIZED)
+	{
+		/* He initialized it, don't do it again, and let the others get the mutex */
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
+		return 0;
+	}
 	/* initialized == UNINITIALIZED */
 	initialized = CHANGING;
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
@@ -370,11 +375,11 @@ int starpu_init(struct starpu_conf *user_conf)
 #endif
 
 	srand(2008);
-	
+
 #ifdef STARPU_USE_FXT
 	_starpu_start_fxt_profiling();
 #endif
-	
+
 	_starpu_open_debug_logfile();
 
 	_starpu_data_interface_init();
@@ -390,7 +395,8 @@ int starpu_init(struct starpu_conf *user_conf)
 	config.user_conf = user_conf;
 
 	ret = _starpu_build_topology(&config);
-	if (ret) {
+	if (ret)
+	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
 		init_count--;
 		initialized = UNINITIALIZED;
@@ -402,7 +408,7 @@ int starpu_init(struct starpu_conf *user_conf)
 
 	/* We need to store the current task handled by the different
 	 * threads */
-	_starpu_initialize_current_task_key();	
+	_starpu_initialize_current_task_key();
 
 	/* initialize the scheduling policy */
 	_starpu_init_sched_policy(&config);
@@ -423,7 +429,7 @@ int starpu_init(struct starpu_conf *user_conf)
 }
 
 /*
- * Handle runtime termination 
+ * Handle runtime termination
  */
 
 static void _starpu_terminate_workers(struct _starpu_machine_config *config)
@@ -434,7 +440,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 	for (workerid = 0; workerid < config->topology.nworkers; workerid++)
 	{
 		starpu_wake_all_blocked_workers();
-		
+
 		_STARPU_DEBUG("wait for worker %u\n", workerid);
 
 		struct _starpu_worker_set *set = config->workers[workerid].set;
@@ -442,13 +448,16 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 
 		/* in case StarPU termination code is called from a callback,
  		 * we have to check if pthread_self() is the worker itself */
-		if (set){ 
-			if (!set->joined) {
+		if (set)
+		{
+			if (!set->joined)
+			{
 				if (!pthread_equal(pthread_self(), set->worker_thread))
 				{
 					status = pthread_join(set->worker_thread, NULL);
 #ifdef STARPU_VERBOSE
-					if (status) {
+					if (status)
+					{
 						_STARPU_DEBUG("pthread_join -> %d\n", status);
                                         }
 #endif
@@ -457,12 +466,14 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *config)
 				set->joined = 1;
 			}
 		}
-		else {
+		else
+		{
 			if (!pthread_equal(pthread_self(), worker->worker_thread))
 			{
 				status = pthread_join(worker->worker_thread, NULL);
 #ifdef STARPU_VERBOSE
-				if (status) {
+				if (status)
+				{
 					_STARPU_DEBUG("pthread_join -> %d\n", status);
                                 }
 #endif
@@ -511,7 +522,8 @@ void starpu_shutdown(void)
 	const char *stats;
 	_STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
 	init_count--;
-	if (init_count) {
+	if (init_count)
+	{
 		_STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
 		return;
 	}
@@ -633,7 +645,8 @@ int starpu_worker_get_id(void)
 	{
 		return worker->workerid;
 	}
-	else {
+	else
+	{
 		/* there is no worker associated to that thread, perhaps it is
 		 * a thread from the application or this is some SPU worker */
 		return -1;
@@ -649,7 +662,8 @@ int starpu_combined_worker_get_id(void)
 	{
 		return worker->combined_workerid;
 	}
-	else {
+	else
+	{
 		/* there is no worker associated to that thread, perhaps it is
 		 * a thread from the application or this is some SPU worker */
 		return -1;
@@ -665,7 +679,8 @@ int starpu_combined_worker_get_size(void)
 	{
 		return worker->worker_size;
 	}
-	else {
+	else
+	{
 		/* there is no worker associated to that thread, perhaps it is
 		 * a thread from the application or this is some SPU worker */
 		return -1;
@@ -681,7 +696,8 @@ int starpu_combined_worker_get_rank(void)
 	{
 		return worker->current_rank;
 	}
-	else {
+	else
+	{
 		/* there is no worker associated to that thread, perhaps it is
 		 * a thread from the application or this is some SPU worker */
 		return -1;

+ 11 - 8
src/core/workers.h

@@ -53,7 +53,8 @@
 
 #include <starpu_parameters.h>
 
-struct _starpu_worker {
+struct _starpu_worker
+{
 	struct _starpu_machine_config *config;
         pthread_mutex_t mutex;
 	enum starpu_archtype arch; /* what is the type of worker ? */
@@ -89,7 +90,8 @@ struct _starpu_worker {
 #endif
 };
 
-struct _starpu_combined_worker {
+struct _starpu_combined_worker
+{
 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
 	uint32_t worker_mask; /* what is the type of workers ? */
 	int worker_size;
@@ -104,9 +106,10 @@ struct _starpu_combined_worker {
 #endif
 };
 
-/* in case a single CPU worker may control multiple 
+/* in case a single CPU worker may control multiple
  * accelerators (eg. Gordon for n SPUs) */
-struct _starpu_worker_set {
+struct _starpu_worker_set
+{
         pthread_mutex_t mutex;
 	pthread_t worker_thread; /* the thread which runs the worker */
 	unsigned nworkers;
@@ -117,8 +120,8 @@ struct _starpu_worker_set {
 	unsigned set_is_initialized;
 };
 
-struct _starpu_machine_config {
-
+struct _starpu_machine_config
+{
 	struct starpu_machine_topology topology;
 
 #ifdef STARPU_HAVE_HWLOC
@@ -127,13 +130,13 @@ struct _starpu_machine_config {
 
 	/* Where to bind workers ? */
 	int current_bindid;
-	
+
 	/* Which GPU(s) do we use for CUDA ? */
 	int current_cuda_gpuid;
 
 	/* Which GPU(s) do we use for OpenCL ? */
 	int current_opencl_gpuid;
-	
+
 	/* Basic workers : each of this worker is running its own driver and
 	 * can be combined with other basic workers. */
 	struct _starpu_worker workers[STARPU_NMAXWORKERS];

+ 44 - 31
src/datawizard/coherency.c

@@ -40,7 +40,8 @@ uint32_t _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
 
 	for (node = 0; node < nnodes; node++)
 	{
-		if (handle->per_node[node].state != STARPU_INVALID) {
+		if (handle->per_node[node].state != STARPU_INVALID)
+		{
 			/* we found a copy ! */
 			src_node_mask |= (1<<node);
 		}
@@ -66,11 +67,14 @@ uint32_t _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
 				if (!link_supports_direct_transfers(handle, i, destination, &handling_node))
 					continue;
 
-				if (time == 0.0) {
+				if (time == 0.0)
+				{
 					/* No estimation, will have to revert to dumb strategy */
 					cost = 0.0;
 					break;
-				} else if (time < cost) {
+				}
+				else if (time < cost)
+				{
 					cost = time;
 					src_node = i;
 				}
@@ -97,7 +101,7 @@ uint32_t _starpu_select_src_node(starpu_data_handle_t handle, unsigned destinati
 #ifndef HAVE_CUDA_MEMCPY_PEER
 					_starpu_get_node_kind(i) != STARPU_CUDA_RAM &&
 #endif
-					_starpu_get_node_kind(i) != STARPU_OPENCL_RAM)	
+					_starpu_get_node_kind(i) != STARPU_OPENCL_RAM)
 				break ;
 		}
 	}
@@ -123,7 +127,8 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 	unsigned requesting_node = requesting_replicate->memory_node;
 	requesting_replicate->requested[requesting_node] = 0;
 
-	if (mode & STARPU_W) {
+	if (mode & STARPU_W)
+	{
 		/* the requesting node now has the only valid copy */
 		uint32_t node;
 		for (node = 0; node < nnodes; node++)
@@ -131,7 +136,8 @@ void _starpu_update_data_state(starpu_data_handle_t handle,
 
 		requesting_replicate->state = STARPU_OWNER;
 	}
-	else { /* read only */
+	else
+	{ /* read only */
 		if (requesting_replicate->state != STARPU_OWNER)
 		{
 			/* there was at least another copy of the data */
@@ -227,7 +233,8 @@ static int determine_request_path(starpu_data_handle_t handle,
 	unsigned handling_node;
 	int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node);
 
-	if (!link_is_valid) {
+	if (!link_is_valid)
+	{
 		/* We need an intermediate hop to implement data staging
 		 * through main memory. */
 		STARPU_ASSERT(max_len >= 2);
@@ -246,9 +253,10 @@ static int determine_request_path(starpu_data_handle_t handle,
 
 		return 2;
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(max_len >= 1);
-		
+
 		src_nodes[0] = src_node;
 		dst_nodes[0] = dst_node;
 		handling_nodes[0] = handling_node;
@@ -273,17 +281,18 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 	if (r)
 	{
 		_starpu_spin_lock(&r->lock);
-                
+
                 /* perhaps we need to "upgrade" the request */
-		if (is_prefetch < r->prefetch) 
+		if (is_prefetch < r->prefetch)
 			_starpu_update_prefetch_status(r);
-		
+
 		if (mode & STARPU_R)
 		{
 			/* in case the exisiting request did not imply a memory
 			 * transfer yet, we have to increment the refcnt now
 			 * (so that the source remains valid) */
-			if (!(r->mode & STARPU_R)) {
+			if (!(r->mode & STARPU_R))
+			{
 				replicate->refcnt++;
 				replicate->handle->busy_count++;
 			}
@@ -302,21 +311,21 @@ static struct _starpu_data_request *_starpu_search_existing_data_request(struct
 
 /*
  * This function is called when the data is needed on the local node, this
- * returns a pointer to the local copy 
+ * returns a pointer to the local copy
  *
  *			R 	STARPU_W 	STARPU_RW
  *	Owner		OK	OK	OK
  *	Shared		OK	1	1
  *	Invalid		2	3	4
  *
- * case 1 : shared + (read)write : 
+ * case 1 : shared + (read)write :
  * 	no data copy but shared->Invalid/Owner
- * case 2 : invalid + read : 
+ * case 2 : invalid + read :
  * 	data copy + invalid->shared + owner->shared (STARPU_ASSERT(there is a valid))
- * case 3 : invalid + write : 
+ * case 3 : invalid + write :
  * 	no data copy + invalid->owner + (owner,shared)->invalid
- * case 4 : invalid + R/STARPU_W : 
- * 	data copy + if (STARPU_W) (invalid->owner + owner->invalid) 
+ * case 4 : invalid + R/STARPU_W :
+ * 	data copy + if (STARPU_W) (invalid->owner + owner->invalid)
  * 		    else (invalid,owner->shared)
  */
 
@@ -336,16 +345,16 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 		/* the data is already available so we can stop */
 		_starpu_update_data_state(handle, dst_replicate, mode);
 		_starpu_msi_cache_hit(requesting_node);
-		
+
 #ifdef STARPU_MEMORY_STATUS
 		_starpu_handle_stats_cache_hit(handle, requesting_node);
 
 		/* XXX Broken ? */
-		if (old_state == STARPU_SHARED 
+		if (old_state == STARPU_SHARED
 		    && dst_replicate->state == STARPU_OWNER)
 			_starpu_handle_stats_shared_to_owner(handle, requesting_node);
 #endif
-		
+
 		_starpu_memchunk_recently_used(dst_replicate->mc, requesting_node);
 
 		_starpu_spin_unlock(&handle->header_lock);
@@ -405,19 +414,20 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
 		/* Try to reuse a request if possible */
 		r = _starpu_search_existing_data_request(hop_dst_replicate,
-				(mode & STARPU_R)?hop_src_node:hop_dst_node, 
+				(mode & STARPU_R)?hop_src_node:hop_dst_node,
 							 mode, is_prefetch);
 
 		reused_requests[hop] = !!r;
 
-		if (!r) {
+		if (!r)
+		{
 			/* Create a new request if there was no request to reuse */
 			r = _starpu_create_data_request(handle, hop_src_replicate,
 							hop_dst_replicate, hop_handling_node,
 							mode, ndeps, is_prefetch);
 		}
 
-		requests[hop] = r; 
+		requests[hop] = r;
 	}
 
 	/* Chain these requests */
@@ -461,7 +471,8 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_
 	while (_starpu_spin_trylock(&handle->header_lock))
 		_starpu_datawizard_progress(local_node, 1);
 
-	if (!is_prefetch) {
+	if (!is_prefetch)
+	{
 		dst_replicate->refcnt++;
 		dst_replicate->handle->busy_count++;
 	}
@@ -475,7 +486,7 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_
 	 * unlocked the header. */
 	if (!r)
 		return 0;
-	
+
 	_starpu_spin_unlock(&handle->header_lock);
 
 	int ret = is_prefetch?0:_starpu_wait_data_request_completion(r, 1);
@@ -508,7 +519,7 @@ uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle)
 	return handle->footprint;
 }
 
-/* in case the data was accessed on a write mode, do not forget to 
+/* in case the data was accessed on a write mode, do not forget to
  * make it accessible again once it is possible ! */
 void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_wt_mask, struct _starpu_data_replicate *replicate)
 {
@@ -553,7 +564,7 @@ static void _starpu_set_data_requested_flag_if_needed(struct _starpu_data_replic
 // XXX : this is just a hint, so we don't take the lock ...
 //	pthread_spin_lock(&handle->header_lock);
 
-	if (replicate->state == STARPU_INVALID) 
+	if (replicate->state == STARPU_INVALID)
 	{
 		unsigned dst_node = replicate->memory_node;
 		replicate->requested[dst_node] = 1;
@@ -613,7 +624,8 @@ int _starpu_fetch_task_input(struct starpu_task *task, uint32_t mask)
 		{
 			local_replicate = &handle->per_worker[workerid];
 		}
-		else {
+		else
+		{
 			/* That's a "normal" buffer (R/W) */
 			local_replicate = &handle->per_node[local_memory_node];
 		}
@@ -708,7 +720,8 @@ unsigned _starpu_is_data_present_or_requested(starpu_data_handle_t handle, uint3
 	{
 		ret  = 1;
 	}
-	else {
+	else
+	{
 		unsigned i;
 		unsigned nnodes = _starpu_get_memory_nodes_count();
 

+ 15 - 11
src/datawizard/coherency.h

@@ -31,7 +31,8 @@
 #include <datawizard/interfaces/data_interface.h>
 #include <datawizard/datastats.h>
 
-enum _starpu_cache_state {
+enum _starpu_cache_state
+{
 	STARPU_OWNER,
 	STARPU_SHARED,
 	STARPU_INVALID
@@ -55,23 +56,23 @@ LIST_TYPE(_starpu_data_replicate,
 	unsigned initialized;
 
 	/* describes the state of the local data in term of coherency */
-	enum _starpu_cache_state	state; 
+	enum _starpu_cache_state	state;
 
 	int refcnt;
 
 	/* is the data locally allocated ? */
-	uint8_t allocated; 
+	uint8_t allocated;
 	/* was it automatically allocated ? */
-	/* perhaps the allocation was perform higher in the hiearchy 
+	/* perhaps the allocation was perform higher in the hiearchy
 	 * for now this is just translated into !automatically_allocated
 	 * */
 	uint8_t automatically_allocated;
 
         /* Pointer to memchunk for LRU strategy */
 	struct _starpu_mem_chunk * mc;
- 
+
 	/* To help the scheduling policies to make some decision, we
-	   may keep a track of the tasks that are likely to request 
+	   may keep a track of the tasks that are likely to request
 	   this data on the current node.
 	   It is the responsability of the scheduling _policy_ to set that
 	   flag when it assigns a task to a queue, policies which do not
@@ -83,18 +84,21 @@ LIST_TYPE(_starpu_data_replicate,
 
 struct _starpu_data_requester_list;
 
-struct _starpu_jobid_list {
+struct _starpu_jobid_list
+{
 	unsigned long id;
 	struct _starpu_jobid_list *next;
 };
 
 /* This structure describes a simply-linked list of task */
-struct _starpu_task_wrapper_list {
+struct _starpu_task_wrapper_list
+{
 	struct starpu_task *task;
 	struct _starpu_task_wrapper_list *next;
 };
 
-struct _starpu_data_state {
+struct _starpu_data_state
+{
 	struct _starpu_data_requester_list *req_list;
 	/* the number of requests currently in the scheduling engine (not in
 	 * the req_list anymore), i.e. the number of holders of the
@@ -154,7 +158,7 @@ struct _starpu_data_state {
 	/* This lock should protect any operation to enforce
 	 * sequential_consistency */
 	pthread_mutex_t sequential_consistency_mutex;
-	
+
 	/* The last submitted task (or application data request) that declared
 	 * it would modify the piece of data ? Any task accessing the data in a
 	 * read-only mode should depend on that task implicitely if the
@@ -172,7 +176,7 @@ struct _starpu_data_state {
 	unsigned last_submitted_ghost_writer_id_is_valid;
 	unsigned long last_submitted_ghost_writer_id;
 	struct _starpu_jobid_list *last_submitted_ghost_readers_id;
-	
+
 	struct _starpu_task_wrapper_list *post_sync_tasks;
 	unsigned post_sync_tasks_cnt;
 

+ 78 - 63
src/datawizard/copy_driver.c

@@ -122,7 +122,8 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 	}
 #endif
 
-	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind)) {
+	switch (_STARPU_MEMORY_NODE_TUPLE(src_kind,dst_kind))
+	{
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CPU_RAM):
 		/* STARPU_CPU_RAM -> STARPU_CPU_RAM */
 		STARPU_ASSERT(copy_methods->ram_to_ram);
@@ -132,11 +133,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CPU_RAM):
 		/* only the proper CUBLAS thread can initiate this directly ! */
 		STARPU_ASSERT(copy_methods->cuda_to_ram);
-		if (!req || !copy_methods->cuda_to_ram_async) {
+		if (!req || !copy_methods->cuda_to_ram_async)
+		{
 			/* this is not associated to a request so it's synchronous */
 			copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node);
 		}
-		else {
+		else
+		{
 			req->async_channel.type = STARPU_CUDA_RAM;
 			cures = cudaEventCreate(&req->async_channel.event.cuda_event);
 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
@@ -153,11 +156,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		/* only the proper CUBLAS thread can initiate this ! */
 		STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
 		STARPU_ASSERT(copy_methods->ram_to_cuda);
-		if (!req || !copy_methods->ram_to_cuda_async) {
+		if (!req || !copy_methods->ram_to_cuda_async)
+		{
 			/* this is not associated to a request so it's synchronous */
 			copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node);
 		}
-		else {
+		else
+		{
 			req->async_channel.type = STARPU_CUDA_RAM;
 			cures = cudaEventCreate(&req->async_channel.event.cuda_event);
 			if (STARPU_UNLIKELY(cures != cudaSuccess))
@@ -174,12 +179,14 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
 		/* CUDA - CUDA transfer */
 		STARPU_ASSERT(copy_methods->cuda_to_cuda || copy_methods->cuda_to_cuda_async);
-		if (!req || !copy_methods->cuda_to_cuda_async) {
+		if (!req || !copy_methods->cuda_to_cuda_async)
+		{
 			STARPU_ASSERT(copy_methods->cuda_to_cuda);
 			/* this is not associated to a request so it's synchronous */
 			copy_methods->cuda_to_cuda(src_interface, src_node, dst_interface, dst_node);
 		}
-		else {
+		else
+		{
 			req->async_channel.type = STARPU_CUDA_RAM;
 			cures = cudaEventCreate(&req->async_channel.event.cuda_event);
 			if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures);
@@ -195,18 +202,22 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 #ifdef STARPU_USE_OPENCL
 	case _STARPU_MEMORY_NODE_TUPLE(STARPU_OPENCL_RAM,STARPU_CPU_RAM):
 		/* OpenCL -> RAM */
-		if (_starpu_get_local_memory_node() == src_node) {
+		if (_starpu_get_local_memory_node() == src_node)
+		{
 			STARPU_ASSERT(copy_methods->opencl_to_ram);
-			if (!req || !copy_methods->opencl_to_ram_async) {
+			if (!req || !copy_methods->opencl_to_ram_async)
+			{
 				/* this is not associated to a request so it's synchronous */
 				copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node);
 			}
-			else {
+			else
+			{
 				req->async_channel.type = STARPU_OPENCL_RAM;
 				ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
 			}
 		}
-		else {
+		else
+		{
 			/* we should not have a blocking call ! */
 			STARPU_ABORT();
 		}
@@ -215,11 +226,13 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 		/* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */
 		STARPU_ASSERT(_starpu_get_local_memory_node() == dst_node);
 		STARPU_ASSERT(copy_methods->ram_to_opencl);
-		if (!req || !copy_methods->ram_to_opencl_async) {
+		if (!req || !copy_methods->ram_to_opencl_async)
+		{
 			/* this is not associated to a request so it's synchronous */
 			copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node);
 		}
-		else {
+		else
+		{
 			req->async_channel.type = STARPU_OPENCL_RAM;
 			ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, &(req->async_channel.event.opencl_event));
 		}
@@ -266,12 +279,13 @@ int __attribute__((warn_unused_result)) _starpu_driver_copy_data_1_to_1(starpu_d
 	STARPU_ASSERT(dst_replicate->allocated);
 	STARPU_ASSERT(dst_replicate->refcnt);
 
-	/* if there is no need to actually read the data, 
+	/* if there is no need to actually read the data,
 	 * we do not perform any transfer */
-	if (!donotread) {
+	if (!donotread)
+	{
 		size_t size = _starpu_data_get_size(handle);
 		_starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size);
-		
+
 #ifdef STARPU_USE_FXT
 		com_id = STARPU_ATOMIC_ADD(&communication_cnt, 1);
 
@@ -303,34 +317,35 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 	cudaError_t cures;
 #endif
 
-	switch (kind) {
+	switch (kind)
+	{
 #ifdef STARPU_USE_CUDA
-		case STARPU_CUDA_RAM:
-			event = (*async_channel).event.cuda_event;
+	case STARPU_CUDA_RAM:
+		event = (*async_channel).event.cuda_event;
 
-			cures = cudaEventSynchronize(event);
-			if (STARPU_UNLIKELY(cures))
-				STARPU_CUDA_REPORT_ERROR(cures);
+		cures = cudaEventSynchronize(event);
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
 
-			cures = cudaEventDestroy(event);
-			if (STARPU_UNLIKELY(cures))
-				STARPU_CUDA_REPORT_ERROR(cures);
+		cures = cudaEventDestroy(event);
+		if (STARPU_UNLIKELY(cures))
+			STARPU_CUDA_REPORT_ERROR(cures);
 
-			break;
+		break;
 #endif
 #ifdef STARPU_USE_OPENCL
-      case STARPU_OPENCL_RAM:
-         {
-                 if ((*async_channel).event.opencl_event == NULL) STARPU_ABORT();
-                 cl_int err = clWaitForEvents(1, &((*async_channel).event.opencl_event));
-                 if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-                 clReleaseEvent((*async_channel).event.opencl_event);
-         }
-         break;
+	case STARPU_OPENCL_RAM:
+	{
+		if ((*async_channel).event.opencl_event == NULL) STARPU_ABORT();
+		cl_int err = clWaitForEvents(1, &((*async_channel).event.opencl_event));
+		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+		clReleaseEvent((*async_channel).event.opencl_event);
+	      break;
+	}
 #endif
-		case STARPU_CPU_RAM:
-		default:
-			STARPU_ABORT();
+	case STARPU_CPU_RAM:
+	default:
+		STARPU_ABORT();
 	}
 }
 
@@ -342,36 +357,36 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 	cudaEvent_t event;
 #endif
 
-	switch (kind) {
+	switch (kind)
+	{
 #ifdef STARPU_USE_CUDA
-		case STARPU_CUDA_RAM:
-			event = (*async_channel).event.cuda_event;
-			cudaError_t cures = cudaEventQuery(event);
-
-			success = (cures == cudaSuccess);
-			if (success)
-				cudaEventDestroy(event);
-			else if (cures != cudaErrorNotReady)
-				STARPU_CUDA_REPORT_ERROR(cures);
-
-			break;
+	case STARPU_CUDA_RAM:
+		event = (*async_channel).event.cuda_event;
+		cudaError_t cures = cudaEventQuery(event);
+
+		success = (cures == cudaSuccess);
+		if (success)
+			cudaEventDestroy(event);
+		else if (cures != cudaErrorNotReady)
+			STARPU_CUDA_REPORT_ERROR(cures);
+		break;
 #endif
 #ifdef STARPU_USE_OPENCL
-      case STARPU_OPENCL_RAM:
-         {
-            cl_int event_status;
-            cl_event opencl_event = (*async_channel).event.opencl_event;
-            if (opencl_event == NULL) STARPU_ABORT();
-            cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-            if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
-            success = (event_status == CL_COMPLETE);
-            break;
-         }
+	case STARPU_OPENCL_RAM:
+	{
+		cl_int event_status;
+		cl_event opencl_event = (*async_channel).event.opencl_event;
+		if (opencl_event == NULL) STARPU_ABORT();
+		cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
+		success = (event_status == CL_COMPLETE);
+		break;
+	}
 #endif
-		case STARPU_CPU_RAM:
-		default:
-			STARPU_ABORT();
-			success = 0;
+	case STARPU_CPU_RAM:
+	default:
+		STARPU_ABORT();
+		success = 0;
 	}
 
 	return success;

+ 4 - 2
src/datawizard/copy_driver.h

@@ -38,7 +38,8 @@ struct _starpu_data_replicate;
 
 /* this is a structure that can be queried to see whether an asynchronous
  * transfer has terminated or not */
-union _starpu_async_channel_event {
+union _starpu_async_channel_event
+{
 	int dummy;
 #ifdef STARPU_USE_CUDA
 	cudaEvent_t cuda_event;
@@ -48,7 +49,8 @@ union _starpu_async_channel_event {
 #endif
 };
 
-struct _starpu_async_channel {
+struct _starpu_async_channel
+{
 	union _starpu_async_channel_event event;
 	enum _starpu_node_kind type;
 };

+ 35 - 27
src/datawizard/data_request.c

@@ -41,7 +41,7 @@ void _starpu_init_data_request_lists(void)
 
 		data_requests_pending[i] = _starpu_data_request_list_new();
 		_STARPU_PTHREAD_MUTEX_INIT(&data_requests_pending_list_mutex[i], NULL);
-		
+
 		starpu_memstrategy_drop_prefetch[i]=0;
 	}
 }
@@ -72,7 +72,8 @@ static void starpu_data_request_destroy(struct _starpu_data_request *r)
 	{
 		node = r->src_replicate->memory_node;
 	}
-	else {
+	else
+	{
 		node = r->dst_replicate->memory_node;
 	}
 
@@ -119,7 +120,8 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha
 		src_replicate->refcnt++;
 		handle->busy_count++;
 	}
-	else {
+	else
+	{
 		unsigned dst_node = dst_replicate->memory_node;
 		dst_replicate->request[dst_node] = r;
 	}
@@ -138,7 +140,8 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 
 	uint32_t local_node = _starpu_get_local_memory_node();
 
-	do {
+	do
+	{
 		_starpu_spin_lock(&r->lock);
 
 		if (r->completed)
@@ -152,13 +155,14 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 
 		_starpu_datawizard_progress(local_node, may_alloc);
 
-	} while (1);
+	}
+	while (1);
 
 
 	retval = r->retval;
 	if (retval)
 		_STARPU_DISP("REQUEST %p COMPLETED (retval %d) !\n", r, r->retval);
-		
+
 
 	r->refcnt--;
 
@@ -167,10 +171,10 @@ int _starpu_wait_data_request_completion(struct _starpu_data_request *r, unsigne
 		do_delete = 1;
 
 	_starpu_spin_unlock(&r->lock);
-	
+
 	if (do_delete)
 		starpu_data_request_destroy(r);
-	
+
 	return retval;
 }
 
@@ -191,9 +195,9 @@ void _starpu_post_data_request(struct _starpu_data_request *r, uint32_t handling
 
 	/* insert the request in the proper list */
 	_STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[handling_node]);
-	if (r->prefetch) {
+	if (r->prefetch)
 		_starpu_data_request_list_push_back(prefetch_requests[handling_node], r);
-	} else
+	else
 		_starpu_data_request_list_push_back(data_requests[handling_node], r);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data_requests_list_mutex[handling_node]);
 
@@ -240,12 +244,12 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 	{
 		if (old_src_replicate_state == STARPU_OWNER)
 			_starpu_handle_stats_invalidated(handle, src_replicate->memory_node);
-		else 
+		else
 		{
 			/* XXX Currently only ex-OWNER are tagged as invalidated */
 			/* XXX Have to check all old state of every node in case a SHARED data become OWNED by the dst_replicate */
 		}
-		
+
 	}
 	if (dst_replicate->state == STARPU_SHARED)
 		_starpu_handle_stats_loaded_shared(handle, dst_replicate->memory_node);
@@ -274,7 +278,7 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 	}
 
 	r->completed = 1;
-	
+
 	/* Remove a reference on the destination replicate  */
 	STARPU_ASSERT(dst_replicate->refcnt > 0);
 	dst_replicate->refcnt--;
@@ -297,12 +301,12 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r
 	/* if nobody is waiting on that request, we can get rid of it */
 	if (r->refcnt == 0)
 		do_delete = 1;
-	
+
 	r->retval = 0;
 
 	/* In case there are one or multiple callbacks, we execute them now. */
 	struct _starpu_callback_list *callbacks = r->callbacks;
-	
+
 	_starpu_spin_unlock(&r->lock);
 
 	if (do_delete)
@@ -426,7 +430,8 @@ void _starpu_handle_node_data_requests(uint32_t src_node, unsigned may_alloc)
 	_starpu_data_request_list_delete(local_list);
 }
 
-void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc){
+void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc)
+{
 	starpu_memstrategy_drop_prefetch[src_node]=0;
 
 	struct _starpu_data_request *r;
@@ -437,7 +442,7 @@ void _starpu_handle_node_prefetch_requests(uint32_t src_node, unsigned may_alloc
         _STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[src_node]);
 
 	struct _starpu_data_request_list *local_list = prefetch_requests[src_node];
-	
+
 	if (_starpu_data_request_list_empty(local_list))
 	{
 		/* there is no request */
@@ -516,24 +521,26 @@ static void _handle_pending_node_data_requests(uint32_t src_node, unsigned force
 		r = _starpu_data_request_list_pop_front(local_list);
 
 		starpu_data_handle_t handle = r->handle;
-		
+
 		_starpu_spin_lock(&handle->header_lock);
-	
+
 		_starpu_spin_lock(&r->lock);
-	
+
 		/* wait until the transfer is terminated */
 		if (force)
 		{
 			_starpu_driver_wait_request_completion(&r->async_channel);
 			starpu_handle_data_request_completion(r);
 		}
-		else {
+		else
+		{
 			if (_starpu_driver_test_request_completion(&r->async_channel))
 			{
 				/* The request was completed */
 				starpu_handle_data_request_completion(r);
 			}
-			else {
+			else
+			{
 				/* The request was not completed, so we put it
 				 * back again on the list of pending requests
 				 * so that it can be handled later on. */
@@ -572,10 +579,11 @@ int _starpu_check_that_no_data_request_exists(uint32_t node)
 }
 
 
-void _starpu_update_prefetch_status(struct _starpu_data_request *r){
+void _starpu_update_prefetch_status(struct _starpu_data_request *r)
+{
 	STARPU_ASSERT(r->prefetch > 0);
 	r->prefetch=0;
-	
+
 	/* We have to promote chained_request too! */
 	unsigned chained_req;
 	for (chained_req = 0; chained_req < r->next_req_count; chained_req++)
@@ -586,7 +594,7 @@ void _starpu_update_prefetch_status(struct _starpu_data_request *r){
 	}
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&data_requests_list_mutex[r->handling_node]);
-	
+
 	/* The request can be in a different list (handling request or the temp list)
 	 * we have to check that it is really in the prefetch list. */
 	struct _starpu_data_request *r_iter;
@@ -594,13 +602,13 @@ void _starpu_update_prefetch_status(struct _starpu_data_request *r){
 	     r_iter != _starpu_data_request_list_end(prefetch_requests[r->handling_node]);
 	     r_iter = _starpu_data_request_list_next(r_iter))
 	{
-		
+
 		if (r==r_iter)
 		{
 			_starpu_data_request_list_erase(prefetch_requests[r->handling_node],r);
 			_starpu_data_request_list_push_front(data_requests[r->handling_node],r);
 			break;
-		}		
+		}
 	}
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data_requests_list_mutex[r->handling_node]);
 }

+ 2 - 1
src/datawizard/data_request.h

@@ -26,7 +26,8 @@
 
 struct _starpu_data_replicate;
 
-struct _starpu_callback_list {
+struct _starpu_callback_list
+{
 	void (*callback_func)(void *);
 	void *callback_arg;
 	struct _starpu_callback_list *next;

+ 5 - 5
src/datawizard/datastats.c

@@ -95,13 +95,13 @@ void _starpu_display_alloc_cache_stats(void)
 #ifdef STARPU_DATA_STATS
 	fprintf(stderr, "Allocation cache stats:\n");
 	unsigned node;
-	for (node = 0; node < STARPU_MAXNODES; node++) 
+	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		if (alloc_cnt[node]) 
+		if (alloc_cnt[node])
 		{
 			fprintf(stderr, "memory node %d\n", node);
 			fprintf(stderr, "\ttotal alloc : %u\n", alloc_cnt[node]);
-			fprintf(stderr, "\tcached alloc: %u (%2.2f \%%)\n", 
+			fprintf(stderr, "\tcached alloc: %u (%2.2f \%%)\n",
 				alloc_cache_hit_cnt[node], (100.0f*alloc_cache_hit_cnt[node])/(alloc_cnt[node]));
 		}
 	}
@@ -161,12 +161,12 @@ void _starpu_display_data_stats(void)
 void _starpu_display_data_handle_stats(starpu_data_handle_t handle)
 {
 	unsigned node;
-	
+
 	fprintf(stderr, "#-----\n");
 	fprintf(stderr, "Data : %p\n", handle);
 	fprintf(stderr, "Size : %d\n", (int)handle->data_size);
 	fprintf(stderr, "\n");
-	
+
 	fprintf(stderr, "#--\n");
 	fprintf(stderr, "Data access stats\n");
 	fprintf(stderr, "/!\\ Work Underway\n");

+ 17 - 15
src/datawizard/filters.c

@@ -32,7 +32,8 @@ static void map_filter(starpu_data_handle_t root_handle, struct starpu_data_filt
 		/* this is a leaf */
 		starpu_data_partition(root_handle, f);
 	}
-	else {
+	else
+	{
 		/* try to apply the data filter recursively */
 		unsigned child;
 		for (child = 0; child < root_handle->nchildren; child++)
@@ -94,7 +95,7 @@ starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_handle,
 	starpu_data_handle_t current_handle = root_handle;
 
 	/* the variable number of argument must correlate the depth in the tree */
-	unsigned i; 
+	unsigned i;
 	for (i = 0; i < depth; i++)
 	{
 		unsigned next_child;
@@ -185,7 +186,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 		unsigned node;
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
-			struct _starpu_data_replicate *initial_replicate; 
+			struct _starpu_data_replicate *initial_replicate;
 			struct _starpu_data_replicate *child_replicate;
 
 			initial_replicate = &initial_handle->per_node[node];
@@ -197,7 +198,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 			child_replicate->refcnt = 0;
 			child_replicate->memory_node = node;
 			child_replicate->relaxed_coherency = 0;
-			
+
 			/* update the interface */
 			void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node);
 			void *child_interface = starpu_data_get_interface_on_node(child, node);
@@ -210,7 +211,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 		{
 			struct _starpu_data_replicate *child_replicate;
 			child_replicate = &child->per_worker[worker];
-			
+
 			child_replicate->state = STARPU_INVALID;
 			child_replicate->allocated = 0;
 			child_replicate->automatically_allocated = 0;
@@ -264,11 +265,11 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
 		int ret;
 		ret = _starpu_fetch_data_on_node(child_handle, &child_handle->per_node[gathering_node], STARPU_R, 0, NULL, NULL);
-		/* for now we pretend that the RAM is almost unlimited and that gathering 
+		/* for now we pretend that the RAM is almost unlimited and that gathering
 		 * data should be possible from the node that does the unpartionning ... we
 		 * don't want to have the programming deal with memory shortage at that time,
 		 * really */
-		STARPU_ASSERT(ret == 0); 
+		STARPU_ASSERT(ret == 0);
 
 		_starpu_data_free_interfaces(&root_handle->children[child]);
 		_starpu_data_requester_list_delete(child_handle->req_list);
@@ -278,7 +279,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 	/* the gathering_node should now have a valid copy of all the children.
 	 * For all nodes, if the node had all copies and none was locally
 	 * allocated then the data is still valid there, else, it's invalidated
-	 * for the gathering node, if we have some locally allocated data, we 
+	 * for the gathering node, if we have some locally allocated data, we
 	 * copy all the children (XXX this should not happen so we just do not
 	 * do anything since this is transparent ?) */
 	unsigned still_valid[STARPU_MAXNODES];
@@ -299,9 +300,10 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 		{
 			struct _starpu_data_replicate *local = &root_handle->children[child].per_node[node];
 
-			if (local->state == STARPU_INVALID) {
+			if (local->state == STARPU_INVALID)
+			{
 				/* One of the bits is missing */
-				isvalid = 0; 
+				isvalid = 0;
 			}
 
 			if (local->allocated && local->automatically_allocated)
@@ -331,7 +333,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, uint32_t gatherin
 
 	for (node = 0; node < STARPU_MAXNODES; node++)
 	{
-		root_handle->per_node[node].state = 
+		root_handle->per_node[node].state =
 			still_valid[node]?newstate:STARPU_INVALID;
 	}
 
@@ -358,15 +360,15 @@ static void starpu_data_create_children(starpu_data_handle_t handle, unsigned nc
 	for (child = 0; child < nchildren; child++)
 	{
 		starpu_data_handle_t handle_child = &handle->children[child];
-		
+
 		struct starpu_data_interface_ops *ops;
-		
+
 		/* what's this child's interface ? */
 		if (f->get_child_ops)
 		  ops = f->get_child_ops(f, child);
 		else
 		  ops = handle->ops;
-		
+
 		handle_child->ops = ops;
 
 		size_t interfacesize = ops->interface_size;
@@ -386,7 +388,7 @@ static void starpu_data_create_children(starpu_data_handle_t handle, unsigned nc
 			STARPU_ASSERT(handle_child->per_worker[worker].data_interface);
 		}
 	}
-	
+
 	/* this handle now has children */
 	handle->nchildren = nchildren;
 }

+ 6 - 5
src/datawizard/interfaces/bcsr_filters.c

@@ -25,14 +25,14 @@ void starpu_canonical_block_filter_bcsr(void *father_interface, void *child_inte
 	struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface;
 	/* each chunk becomes a small dense matrix */
 	struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
-	
+
 	size_t elemsize = bcsr_father->elemsize;
 	uint32_t firstentry = bcsr_father->firstentry;
 
 	/* size of the tiles */
 	uint32_t r = bcsr_father->r;
 	uint32_t c = bcsr_father->c;
-	
+
 	uint32_t ptr_offset = c*r*id*elemsize;
 
 	matrix_child->nx = c;
@@ -40,8 +40,9 @@ void starpu_canonical_block_filter_bcsr(void *father_interface, void *child_inte
 	matrix_child->ld = c;
 	matrix_child->elemsize = elemsize;
 
-	if (bcsr_father->nzval) {
-	  uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
-	  matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
+	if (bcsr_father->nzval)
+	{
+		uint8_t *nzval = (uint8_t *)(bcsr_father->nzval);
+		matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset];
 	}
 }

+ 36 - 27
src/datawizard/interfaces/bcsr_interface.c

@@ -42,7 +42,8 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTR
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
 #endif
 
-static const struct starpu_data_copy_methods bcsr_copy_data_methods_s = {
+static const struct starpu_data_copy_methods bcsr_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -68,7 +69,8 @@ static int bcsr_compare(void *data_interface_a, void *data_interface_b);
 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle);
 
 
-static struct starpu_data_interface_ops interface_bcsr_ops = {
+static struct starpu_data_interface_ops interface_bcsr_ops =
+{
 	.register_data_handle = register_bcsr_handle,
 	.allocate_data_on_node = allocate_bcsr_buffer_on_node,
 	.free_data_on_node = free_bcsr_buffer_on_node,
@@ -90,12 +92,14 @@ static void register_bcsr_handle(starpu_data_handle_t handle, uint32_t home_node
 		struct starpu_bcsr_interface *local_interface = (struct starpu_bcsr_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->nzval = bcsr_interface->nzval;
 			local_interface->colind = bcsr_interface->colind;
 			local_interface->rowptr = bcsr_interface->rowptr;
 		}
-		else {
+		else
+		{
 			local_interface->nzval = 0;
 			local_interface->colind = NULL;
 			local_interface->rowptr = NULL;
@@ -115,7 +119,8 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, uint32_t home_no
 		uint32_t *rowptr, uint32_t firstentry,
 		uint32_t r, uint32_t c, size_t elemsize)
 {
-	struct starpu_bcsr_interface bcsr_interface = {
+	struct starpu_bcsr_interface bcsr_interface =
+	{
 		.nzval = nzval,
 		.colind = colind,
 		.rowptr = rowptr,
@@ -212,7 +217,7 @@ uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle)
 
 	struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *)
 		starpu_data_get_interface_on_node(handle, node);
-	
+
 	return data_interface->nzval;
 }
 
@@ -245,7 +250,7 @@ static size_t bcsr_interface_get_size(starpu_data_handle_t handle)
 	uint32_t c = starpu_bcsr_get_c(handle);
 	size_t elemsize = starpu_bcsr_get_elemsize(handle);
 
-	size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); 
+	size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
 
 	return size;
 }
@@ -272,7 +277,8 @@ static ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, uint32_t dst_
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			addr_nzval = (uintptr_t)malloc(nnz*r*c*elemsize);
 			if (!addr_nzval)
@@ -305,42 +311,43 @@ static ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, uint32_t dst_
 #endif
 #ifdef STARPU_USE_OPENCL
 		case STARPU_OPENCL_RAM:
-                        {
-                                int ret;
-                                void *ptr;
+		{
+			int ret;
+			void *ptr;
 
-                                ret = _starpu_opencl_allocate_memory(&ptr, nnz*r*c*elemsize, CL_MEM_READ_WRITE);
-                                addr_nzval = (uintptr_t)ptr;
-                                if (ret) goto fail_nzval;
+			ret = _starpu_opencl_allocate_memory(&ptr, nnz*r*c*elemsize, CL_MEM_READ_WRITE);
+			addr_nzval = (uintptr_t)ptr;
+			if (ret) goto fail_nzval;
 
-                                ret = _starpu_opencl_allocate_memory(&ptr, nnz*sizeof(uint32_t), CL_MEM_READ_WRITE);
-                                addr_colind = ptr;
-				if (ret) goto fail_colind;
+			ret = _starpu_opencl_allocate_memory(&ptr, nnz*sizeof(uint32_t), CL_MEM_READ_WRITE);
+			addr_colind = ptr;
+			if (ret) goto fail_colind;
 
-                                ret = _starpu_opencl_allocate_memory(&ptr, (nrow+1)*sizeof(uint32_t), CL_MEM_READ_WRITE);
-                                addr_rowptr = ptr;
-				if (ret) goto fail_rowptr;
+			ret = _starpu_opencl_allocate_memory(&ptr, (nrow+1)*sizeof(uint32_t), CL_MEM_READ_WRITE);
+			addr_rowptr = ptr;
+			if (ret) goto fail_rowptr;
 
-                                break;
-                        }
+			break;
+		}
 #endif
 		default:
 			assert(0);
 	}
 
 	/* allocation succeeded */
-	allocated_memory = 
+	allocated_memory =
 		nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
 
 	/* update the data properly in consequence */
 	bcsr_interface->nzval = addr_nzval;
 	bcsr_interface->colind = addr_colind;
 	bcsr_interface->rowptr = addr_rowptr;
-	
+
 	return allocated_memory;
 
 fail_rowptr:
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void *)addr_colind);
 #ifdef STARPU_USE_CUDA
@@ -358,7 +365,8 @@ fail_rowptr:
 	}
 
 fail_colind:
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void *)addr_nzval);
 #ifdef STARPU_USE_CUDA
@@ -386,7 +394,8 @@ static void free_bcsr_buffer_on_node(void *data_interface, uint32_t node)
 	struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface;
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void*)bcsr_interface->nzval);
 			free((void*)bcsr_interface->colind);

+ 2 - 1
src/datawizard/interfaces/block_filters.c

@@ -41,7 +41,8 @@ void starpu_block_filter_func_block(void *father_interface, void *child_interfac
 	block_child->nz = nz;
 	block_child->elemsize = elemsize;
 
-	if (block_father->ptr) {
+	if (block_father->ptr)
+	{
                 block_child->ptr = block_father->ptr + offset;
                 block_child->ldy = block_father->ldy;
                 block_child->ldz = block_father->ldz;

+ 53 - 30
src/datawizard/interfaces/block_interface.c

@@ -42,7 +42,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 #endif
 
-static const struct starpu_data_copy_methods block_copy_data_methods_s = {
+static const struct starpu_data_copy_methods block_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -77,7 +78,8 @@ static void display_block_interface(starpu_data_handle_t handle, FILE *f);
 static int convert_block_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 
-static struct starpu_data_interface_ops interface_block_ops = {
+static struct starpu_data_interface_ops interface_block_ops =
+{
 	.register_data_handle = register_block_handle,
 	.allocate_data_on_node = allocate_block_buffer_on_node,
 	.handle_to_pointer = block_handle_to_pointer,
@@ -89,13 +91,13 @@ static struct starpu_data_interface_ops interface_block_ops = {
 #ifdef STARPU_USE_GORDON
 	.convert_to_gordon = convert_block_to_gordon,
 #endif
-	.interfaceid = STARPU_BLOCK_INTERFACE_ID, 
+	.interfaceid = STARPU_BLOCK_INTERFACE_ID,
 	.interface_size = sizeof(struct starpu_block_interface),
 	.display = display_block_interface
 };
 
 #ifdef STARPU_USE_GORDON
-int convert_block_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+int convert_block_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
 {
 	/* TODO */
 	STARPU_ABORT();
@@ -124,14 +126,16 @@ static void register_block_handle(starpu_data_handle_t handle, uint32_t home_nod
 		struct starpu_block_interface *local_interface = (struct starpu_block_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->ptr = block_interface->ptr;
                         local_interface->dev_handle = block_interface->dev_handle;
                         local_interface->offset = block_interface->offset;
 			local_interface->ldy  = block_interface->ldy;
 			local_interface->ldz  = block_interface->ldz;
 		}
-		else {
+		else
+		{
 			local_interface->ptr = 0;
                         local_interface->dev_handle = 0;
                         local_interface->offset = 0;
@@ -151,7 +155,8 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, uint32_t home_n
 			uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx,
 			uint32_t ny, uint32_t nz, size_t elemsize)
 {
-	struct starpu_block_interface block_interface = {
+	struct starpu_block_interface block_interface =
+	{
 		.ptr = ptr,
                 .dev_handle = ptr,
                 .offset = 0,
@@ -205,7 +210,7 @@ static size_t block_interface_get_size(starpu_data_handle_t handle)
 
 	block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, 0);
 
-	size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize; 
+	size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize;
 
 	return size;
 }
@@ -241,7 +246,7 @@ uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle)
 	node = _starpu_get_local_memory_node();
 
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-	
+
 	struct starpu_block_interface *block_interface = (struct starpu_block_interface *)
 		starpu_data_get_interface_on_node(handle, node);
 
@@ -304,10 +309,11 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc(nx*ny*nz*elemsize);
-			if (!addr) 
+			if (!addr)
 				fail = 1;
 
 			break;
@@ -334,7 +340,8 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
                                 void *ptr;
                                 ret = _starpu_opencl_allocate_memory(&ptr, nx*ny*nz*elemsize, CL_MEM_READ_WRITE);
                                 addr = (uintptr_t)ptr;
-				if (ret) {
+				if (ret)
+				{
 					fail = 1;
 				}
 				break;
@@ -344,7 +351,8 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
 			assert(0);
 	}
 
-	if (!fail) {
+	if (!fail)
+	{
 		/* allocation succeeded */
 		allocated_memory = nx*ny*nz*elemsize;
 
@@ -354,11 +362,13 @@ static ssize_t allocate_block_buffer_on_node(void *data_interface_, uint32_t dst
                 dst_block->offset = 0;
 		dst_block->ldy = nx;
 		dst_block->ldz = nx*ny;
-	} else {
+	}
+	else
+	{
 		/* allocation failed */
 		allocated_memory = -ENOMEM;
 	}
-	
+
 	return allocated_memory;
 }
 
@@ -371,7 +381,8 @@ static void free_block_buffer_on_node(void *data_interface, uint32_t node)
 #endif
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void*)block_interface->ptr);
 			break;
@@ -416,7 +427,8 @@ static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIB
                         if (STARPU_UNLIKELY(cures))
                                 STARPU_CUDA_REPORT_ERROR(cures);
                 }
-		else {
+		else
+		{
 			/* Are all plans contiguous */
                         cures = cudaMemcpy2D((char *)dst_block->ptr, dst_block->ldz*elemsize,
                                              (char *)src_block->ptr, src_block->ldz*elemsize,
@@ -425,7 +437,8 @@ static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIB
                                 STARPU_CUDA_REPORT_ERROR(cures);
                 }
 	}
-	else {
+	else
+	{
 		/* Default case: we transfer all lines one by one: ny*nz transfers */
 		unsigned layer;
 		for (layer = 0; layer < src_block->nz; layer++)
@@ -481,12 +494,14 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
 				ret = 0;
 			}
-			else {
+			else
+			{
 				ret = -EAGAIN;
 			}
-			
+
 		}
-		else {
+		else
+		{
 			/* Are all plans contiguous */
 			_STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
 			cures = cudaMemcpy2DAsync((char *)dst_block->ptr, dst_block->ldz*elemsize,
@@ -503,12 +518,14 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 
 				ret = 0;
 			}
-			else {
+			else
+			{
 				ret = -EAGAIN;
 			}
 		}
 	}
-	else {
+	else
+	{
 		/* Default case: we transfer all lines one by one: ny*nz transfers */
 		unsigned layer;
 		for (layer = 0; layer < src_block->nz; layer++)
@@ -609,19 +626,22 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
                         if (STARPU_UNLIKELY(err))
                                 STARPU_OPENCL_REPORT_ERROR(err);
                 }
-		else {
+		else
+		{
 			/* Are all plans contiguous */
                         /* XXX non contiguous buffers are not properly supported yet. (TODO) */
                         STARPU_ASSERT(0);
                 }
         }
-	else {
+	else
+	{
 		/* Default case: we transfer all lines one by one: ny*nz transfers */
 		unsigned layer;
 		for (layer = 0; layer < src_block->nz; layer++)
 		{
                         unsigned j;
-                        for(j=0 ; j<src_block->ny ; j++) {
+                        for(j=0 ; j<src_block->ny ; j++)
+			{
                                 void *ptr = (void*)src_block->ptr+(layer*src_block->ldz*src_block->elemsize)+(j*src_block->ldy*src_block->elemsize);
                                 err = _starpu_opencl_copy_ram_to_opencl(ptr, src_node, (cl_mem)dst_block->dev_handle, dst_node,
                                                                         src_block->nx*src_block->elemsize,
@@ -673,20 +693,23 @@ static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARP
                         if (STARPU_UNLIKELY(err))
                                 STARPU_OPENCL_REPORT_ERROR(err);
                 }
-                else {
+                else
+		{
 			/* Are all plans contiguous */
                         /* XXX non contiguous buffers are not properly supported yet. (TODO) */
                         STARPU_ASSERT(0);
                 }
         }
-	else {
+	else
+	{
 		/* Default case: we transfer all lines one by one: ny*nz transfers */
                 /* XXX non contiguous buffers are not properly supported yet. (TODO) */
 		unsigned layer;
 		for (layer = 0; layer < src_block->nz; layer++)
 		{
                         unsigned j;
-                        for(j=0 ; j<src_block->ny ; j++) {
+                        for(j=0 ; j<src_block->ny ; j++)
+			{
                                 void *ptr = (void *)dst_block->ptr+(layer*dst_block->ldz*dst_block->elemsize)+(j*dst_block->ldy*dst_block->elemsize);
                                 err = _starpu_opencl_copy_opencl_to_ram((void*)src_block->dev_handle, src_node, ptr, dst_node,
                                                                         src_block->nx*src_block->elemsize,
@@ -753,7 +776,7 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 		uint32_t src_offset = (y*ldy_src + y*z*ldz_src)*elemsize;
 		uint32_t dst_offset = (y*ldy_dst + y*z*ldz_dst)*elemsize;
 
-		memcpy((void *)(ptr_dst + dst_offset), 
+		memcpy((void *)(ptr_dst + dst_offset),
 			(void *)(ptr_src + src_offset), nx*elemsize);
 	}
 

+ 11 - 10
src/datawizard/interfaces/csr_filters.c

@@ -35,20 +35,21 @@ void starpu_vertical_block_filter_func_csr(void *father_interface, void *child_i
 
 	uint32_t first_index = id*chunk_size - firstentry;
 	uint32_t local_firstentry = rowptr[first_index];
-	
-	uint32_t child_nrow = 
+
+	uint32_t child_nrow =
 	  STARPU_MIN(chunk_size, nrow - id*chunk_size);
-	
-	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index]; 
-	
+
+	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index];
+
 	csr_child->nnz = local_nnz;
 	csr_child->nrow = child_nrow;
 	csr_child->firstentry = local_firstentry;
 	csr_child->elemsize = elemsize;
-	
-	if (csr_father->nzval) {
-	  csr_child->rowptr = &csr_father->rowptr[first_index];
-	  csr_child->colind = &csr_father->colind[local_firstentry];
-	  csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
+
+	if (csr_father->nzval)
+	{
+		csr_child->rowptr = &csr_father->rowptr[first_index];
+		csr_child->colind = &csr_father->colind[local_firstentry];
+		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
 	}
 }

+ 26 - 15
src/datawizard/interfaces/csr_interface.c

@@ -42,7 +42,8 @@ static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_
 static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
 #endif
 
-static const struct starpu_data_copy_methods csr_copy_data_methods_s = {
+static const struct starpu_data_copy_methods csr_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -70,7 +71,8 @@ static size_t csr_interface_get_size(starpu_data_handle_t handle);
 static int csr_compare(void *data_interface_a, void *data_interface_b);
 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle);
 
-static struct starpu_data_interface_ops interface_csr_ops = {
+static struct starpu_data_interface_ops interface_csr_ops =
+{
 	.register_data_handle = register_csr_handle,
 	.allocate_data_on_node = allocate_csr_buffer_on_node,
 	.free_data_on_node = free_csr_buffer_on_node,
@@ -92,11 +94,13 @@ static void register_csr_handle(starpu_data_handle_t handle, uint32_t home_node,
 		struct starpu_csr_interface *local_interface = (struct starpu_csr_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->nzval = csr_interface->nzval;
 			local_interface->colind = csr_interface->colind;
 		}
-		else {
+		else
+		{
 			local_interface->nzval = 0;
 			local_interface->colind = NULL;
 		}
@@ -114,7 +118,8 @@ static void register_csr_handle(starpu_data_handle_t handle, uint32_t home_node,
 void starpu_csr_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
 		uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize)
 {
-	struct starpu_csr_interface csr_interface = {
+	struct starpu_csr_interface csr_interface =
+	{
 		.nnz = nnz,
 		.nrow = nrow,
 		.nzval = nzval,
@@ -246,7 +251,8 @@ static ssize_t allocate_csr_buffer_on_node(void *data_interface_, uint32_t dst_n
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			addr_nzval = (uintptr_t)malloc(nnz*elemsize);
 			if (!addr_nzval)
@@ -303,18 +309,19 @@ static ssize_t allocate_csr_buffer_on_node(void *data_interface_, uint32_t dst_n
 	}
 
 	/* allocation succeeded */
-	allocated_memory = 
+	allocated_memory =
 		nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t);
 
 	/* update the data properly in consequence */
 	csr_interface->nzval = addr_nzval;
 	csr_interface->colind = addr_colind;
 	csr_interface->rowptr = addr_rowptr;
-	
+
 	return allocated_memory;
 
 fail_rowptr:
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void *)addr_colind);
 #ifdef STARPU_USE_CUDA
@@ -332,7 +339,8 @@ fail_rowptr:
 	}
 
 fail_colind:
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void *)addr_nzval);
 #ifdef STARPU_USE_CUDA
@@ -360,7 +368,8 @@ static void free_csr_buffer_on_node(void *data_interface, uint32_t node)
 	struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface;
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void*)csr_interface->nzval);
 			free((void*)csr_interface->colind);
@@ -465,13 +474,14 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_
 		if (STARPU_UNLIKELY(cures))
 			STARPU_CUDA_REPORT_ERROR(cures);
 	}
-	
+
 	if (synchronous_fallback)
 	{
 		_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
 		return 0;
 	}
-	else {
+	else
+	{
 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
 		return -EAGAIN;
 	}
@@ -569,13 +579,14 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
 		if (STARPU_UNLIKELY(cures))
 			STARPU_CUDA_REPORT_ERROR(cures);
 	}
-	
+
 	if (synchronous_fallback)
 	{
 		_STARPU_TRACE_DATA_COPY(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t));
 		return 0;
 	}
-	else {
+	else
+	{
 		_STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
 		return -EAGAIN;
 	}

+ 26 - 17
src/datawizard/interfaces/data_interface.c

@@ -45,7 +45,8 @@ void _starpu_data_interface_shutdown()
 
 	_starpu_spin_destroy(&registered_handles_lock);
 
-	HASH_ITER(hh, registered_handles, entry, tmp) {
+	HASH_ITER(hh, registered_handles, entry, tmp)
+	{
 		HASH_DEL(registered_handles, entry);
 		free(entry);
 	}
@@ -89,7 +90,7 @@ starpu_data_handle_t starpu_data_lookup(const void *ptr)
 	return result;
 }
 
-/* 
+/*
  * Start monitoring a piece of data
  */
 
@@ -162,18 +163,20 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 	{
 		struct _starpu_data_replicate *replicate;
 		replicate = &handle->per_node[node];
-		
+
 		replicate->memory_node = node;
 		replicate->relaxed_coherency = 0;
 		replicate->refcnt = 0;
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			/* this is the home node with the only valid copy */
 			replicate->state = STARPU_OWNER;
 			replicate->allocated = 1;
 			replicate->automatically_allocated = 0;
 		}
-		else {
+		else
+		{
 			/* the value is not available here yet */
 			replicate->state = STARPU_INVALID;
 			replicate->allocated = 0;
@@ -323,7 +326,7 @@ int starpu_data_set_tag(starpu_data_handle_t handle, int tag)
         return 0;
 }
 
-/* 
+/*
  * Stop monitoring a piece of data
  */
 
@@ -360,20 +363,22 @@ void _starpu_data_free_interfaces(starpu_data_handle_t handle)
 	}
 }
 
-struct _starpu_unregister_callback_arg {
+struct _starpu_unregister_callback_arg
+{
 	unsigned memory_node;
 	starpu_data_handle_t handle;
 	unsigned terminated;
 	pthread_mutex_t mutex;
 	pthread_cond_t cond;
-}; 
+};
 
 /* Check whether we should tell starpu_data_unregister that the data handle is
  * not busy any more.
  * The header is supposed to be locked */
 void _starpu_data_check_not_busy(starpu_data_handle_t handle)
 {
-	if (!handle->busy_count && handle->busy_waiting) {
+	if (!handle->busy_count && handle->busy_waiting)
+	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 		_STARPU_PTHREAD_COND_BROADCAST(&handle->busy_cond);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
@@ -393,7 +398,7 @@ static void _starpu_data_unregister_fetch_data_callback(void *_arg)
 
 	ret = _starpu_fetch_data_on_node(handle, replicate, STARPU_R, 0, NULL, NULL);
 	STARPU_ASSERT(!ret);
-	
+
 	/* unlock the caller */
 	_STARPU_PTHREAD_MUTEX_LOCK(&arg->mutex);
 	arg->terminated = 1;
@@ -414,7 +419,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
 		/* Fetch data in the home of the data to ensure we have a valid copy
 		 * where we registered it */
-		int home_node = handle->home_node; 
+		int home_node = handle->home_node;
 		if (home_node >= 0)
 		{
 			struct _starpu_unregister_callback_arg arg;
@@ -423,7 +428,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 			arg.terminated = 0;
 			_STARPU_PTHREAD_MUTEX_INIT(&arg.mutex, NULL);
 			_STARPU_PTHREAD_COND_INIT(&arg.cond, NULL);
-	
+
 			if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R,
 					_starpu_data_unregister_fetch_data_callback, &arg))
 			{
@@ -432,7 +437,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 				int ret = _starpu_fetch_data_on_node(handle, home_replicate, STARPU_R, 0, NULL, NULL);
 				STARPU_ASSERT(!ret);
 			}
-			else {
+			else
+			{
 				_STARPU_PTHREAD_MUTEX_LOCK(&arg.mutex);
 				while (!arg.terminated)
 					_STARPU_PTHREAD_COND_WAIT(&arg.cond, &arg.mutex);
@@ -441,7 +447,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 			_starpu_release_data_on_node(handle, 0, &handle->per_node[home_node]);
 		}
 	}
-	else {
+	else
+	{
 		/* Should we postpone the unregister operation ? */
 		if ((handle->refcnt > 0) && handle->lazy_unregister)
 			return;
@@ -467,7 +474,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 	{
 		struct _starpu_data_replicate *local = &handle->per_node[node];
 
-		if (local->allocated && local->automatically_allocated){
+		if (local->allocated && local->automatically_allocated)
+		{
 			/* free the data copy in a lazy fashion */
 			_starpu_request_mem_chunk_removal(handle, node);
 		}
@@ -502,12 +510,13 @@ void starpu_data_invalidate(starpu_data_handle_t handle)
 	{
 		struct _starpu_data_replicate *local = &handle->per_node[node];
 
-		if (local->allocated && local->automatically_allocated){
+		if (local->allocated && local->automatically_allocated)
+		{
 			/* free the data copy in a lazy fashion */
 			_starpu_request_mem_chunk_removal(handle, node);
 		}
 
-		local->state = STARPU_INVALID; 
+		local->state = STARPU_INVALID;
 	}
 
 	_starpu_spin_unlock(&handle->header_lock);

+ 21 - 20
src/datawizard/interfaces/matrix_filters.c

@@ -25,9 +25,9 @@
  */
 void starpu_block_filter_func(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks)
 {
-       struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface;
-       struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
-  
+	struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface;
+	struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface;
+
 	uint32_t nx = matrix_father->nx;
 	uint32_t ny = matrix_father->ny;
 	size_t elemsize = matrix_father->elemsize;
@@ -36,21 +36,22 @@ void starpu_block_filter_func(void *father_interface, void *child_interface, STA
 
 	size_t chunk_size = ((size_t)nx + nchunks - 1)/nchunks;
 	size_t offset = (size_t)id*chunk_size*elemsize;
-	
-	uint32_t child_nx = 
+
+	uint32_t child_nx =
 	  STARPU_MIN(chunk_size, (size_t)nx - (size_t)id*chunk_size);
-	
+
 	/* update the child's interface */
 	matrix_child->nx = child_nx;
 	matrix_child->ny = ny;
 	matrix_child->elemsize = elemsize;
-	
+
 	/* is the information on this node valid ? */
-	if (matrix_father->ptr) {
-	  matrix_child->ptr = matrix_father->ptr + offset;
-	  matrix_child->ld = matrix_father->ld;
-	  matrix_child->dev_handle = matrix_father->dev_handle;
-	  matrix_child->offset = matrix_father->offset + offset;
+	if (matrix_father->ptr)
+	{
+		matrix_child->ptr = matrix_father->ptr + offset;
+		matrix_child->ld = matrix_father->ld;
+		matrix_child->dev_handle = matrix_father->dev_handle;
+		matrix_child->offset = matrix_father->offset + offset;
 	}
 }
 
@@ -66,7 +67,7 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 	STARPU_ASSERT(nchunks <= ny);
 
 	size_t chunk_size = ((size_t)ny + nchunks - 1)/nchunks;
-	size_t child_ny = 
+	size_t child_ny =
 	  STARPU_MIN(chunk_size, (size_t)ny - (size_t)id*chunk_size);
 
 	matrix_child->nx = nx;
@@ -74,12 +75,12 @@ void starpu_vertical_block_filter_func(void *father_interface, void *child_inter
 	matrix_child->elemsize = elemsize;
 
 	/* is the information on this node valid ? */
-	if (matrix_father->ptr) {
-	  size_t offset = 
-	    (size_t)id*chunk_size*matrix_father->ld*elemsize;
-	  matrix_child->ptr = matrix_father->ptr + offset;
-	  matrix_child->ld = matrix_father->ld;
-	  matrix_child->dev_handle = matrix_father->dev_handle;
-	  matrix_child->offset = matrix_father->offset + offset;
+	if (matrix_father->ptr)
+	{
+		size_t offset = (size_t)id*chunk_size*matrix_father->ld*elemsize;
+		matrix_child->ptr = matrix_father->ptr + offset;
+		matrix_child->ld = matrix_father->ld;
+		matrix_child->dev_handle = matrix_father->dev_handle;
+		matrix_child->offset = matrix_father->offset + offset;
 	}
 }

+ 30 - 19
src/datawizard/interfaces/matrix_interface.c

@@ -46,7 +46,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 #endif
 
-static const struct starpu_data_copy_methods matrix_copy_data_methods_s = {
+static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -80,10 +81,11 @@ static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle);
 static int matrix_compare(void *data_interface_a, void *data_interface_b);
 static void display_matrix_interface(starpu_data_handle_t handle, FILE *f);
 #ifdef STARPU_USE_GORDON
-static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 
-struct starpu_data_interface_ops _starpu_interface_matrix_ops = {
+struct starpu_data_interface_ops _starpu_interface_matrix_ops =
+{
 	.register_data_handle = register_matrix_handle,
 	.allocate_data_on_node = allocate_matrix_buffer_on_node,
 	.handle_to_pointer = matrix_handle_to_pointer,
@@ -95,13 +97,13 @@ struct starpu_data_interface_ops _starpu_interface_matrix_ops = {
 #ifdef STARPU_USE_GORDON
 	.convert_to_gordon = convert_matrix_to_gordon,
 #endif
-	.interfaceid = STARPU_MATRIX_INTERFACE_ID, 
+	.interfaceid = STARPU_MATRIX_INTERFACE_ID,
 	.interface_size = sizeof(struct starpu_matrix_interface),
 	.display = display_matrix_interface
 };
 
 #ifdef STARPU_USE_GORDON
-static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+static int convert_matrix_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
 {
 	size_t elemsize = GET_MATRIX_ELEMSIZE(interface);
 	uint32_t nx = STARPU_MATRIX_GET_NX(interface);
@@ -128,13 +130,15 @@ static void register_matrix_handle(starpu_data_handle_t handle, uint32_t home_no
 		struct starpu_matrix_interface *local_interface = (struct starpu_matrix_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->ptr = matrix_interface->ptr;
                         local_interface->dev_handle = matrix_interface->dev_handle;
                         local_interface->offset = matrix_interface->offset;
 			local_interface->ld  = matrix_interface->ld;
 		}
-		else {
+		else
+		{
 			local_interface->ptr = 0;
 			local_interface->dev_handle = 0;
 			local_interface->offset = 0;
@@ -163,7 +167,8 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, uint32_t home_
 			uintptr_t ptr, uint32_t ld, uint32_t nx,
 			uint32_t ny, size_t elemsize)
 {
-	struct starpu_matrix_interface matrix_interface = {
+	struct starpu_matrix_interface matrix_interface =
+	{
 		.ptr = ptr,
 		.ld = ld,
 		.nx = nx,
@@ -206,7 +211,7 @@ static size_t matrix_interface_get_size(starpu_data_handle_t handle)
 		starpu_data_get_interface_on_node(handle, 0);
 
 	size_t size;
-	size = (size_t)matrix_interface->nx*matrix_interface->ny*matrix_interface->elemsize; 
+	size = (size_t)matrix_interface->nx*matrix_interface->ny*matrix_interface->elemsize;
 
 	return size;
 }
@@ -284,10 +289,11 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc((size_t)nx*ny*elemsize);
-			if (!addr) 
+			if (!addr)
 				fail = 1;
 
 			break;
@@ -298,7 +304,7 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 			{
 				if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
 					 STARPU_CUDA_REPORT_ERROR(status);
-					
+
 				fail = 1;
 			}
 
@@ -313,7 +319,8 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
                                 void *ptr;
                                 ret = _starpu_opencl_allocate_memory(&ptr, nx*ny*elemsize, CL_MEM_READ_WRITE);
                                 addr = (uintptr_t)ptr;
-				if (ret) {
+				if (ret)
+				{
 					fail = 1;
 				}
 				break;
@@ -323,7 +330,8 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
 			assert(0);
 	}
 
-	if (!fail) {
+	if (!fail)
+	{
 		/* allocation succeeded */
 		allocated_memory = (size_t)nx*ny*elemsize;
 
@@ -332,11 +340,13 @@ static ssize_t allocate_matrix_buffer_on_node(void *data_interface_, uint32_t ds
                 matrix_interface->dev_handle = addr;
                 matrix_interface->offset = 0;
 		matrix_interface->ld = ld;
-	} else {
+	}
+	else
+	{
 		/* allocation failed */
 		allocated_memory = -ENOMEM;
 	}
-	
+
 	return allocated_memory;
 }
 
@@ -349,13 +359,14 @@ static void free_matrix_buffer_on_node(void *data_interface, uint32_t node)
 #endif
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void*)matrix_interface->ptr);
 			break;
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
-			status = cudaFree((void*)matrix_interface->ptr);			
+			status = cudaFree((void*)matrix_interface->ptr);
 			if (STARPU_UNLIKELY(status))
 				STARPU_CUDA_REPORT_ERROR(status);
 
@@ -663,7 +674,7 @@ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBU
 		uint32_t src_offset = y*ld_src*elemsize;
 		uint32_t dst_offset = y*ld_dst*elemsize;
 
-		memcpy((void *)(ptr_dst + dst_offset), 
+		memcpy((void *)(ptr_dst + dst_offset),
 			(void *)(ptr_src + src_offset), nx*elemsize);
 	}
 

+ 51 - 28
src/datawizard/interfaces/multiformat_interface.c

@@ -40,7 +40,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
-static const struct starpu_data_copy_methods multiformat_copy_data_methods_s = {
+static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -74,11 +75,12 @@ static int multiformat_compare(void *data_interface_a, void *data_interface_b);
 static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
 static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
 #ifdef STARPU_USE_GORDON
-static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 
 
-static struct starpu_data_interface_ops interface_multiformat_ops = {
+static struct starpu_data_interface_ops interface_multiformat_ops =
+{
 	.register_data_handle  = register_multiformat_handle,
 	.allocate_data_on_node = allocate_multiformat_buffer_on_node,
 	.handle_to_pointer     = multiformat_handle_to_pointer,
@@ -98,10 +100,11 @@ static struct starpu_data_interface_ops interface_multiformat_ops = {
 static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
 {
 	STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
-	struct starpu_multiformat_interface *multiformat_interface = 
+	struct starpu_multiformat_interface *multiformat_interface =
 		starpu_data_get_interface_on_node(handle, node);
 
-	switch(_starpu_get_node_kind(node)) {
+	switch(_starpu_get_node_kind(node))
+	{
 		case STARPU_CPU_RAM:
 			return multiformat_interface->cpu_ptr;
 #ifdef STARPU_USE_CUDA
@@ -123,11 +126,13 @@ static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t ho
 	multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
 
 	unsigned node;
-	for (node = 0; node < STARPU_MAXNODES; node++) {
+	for (node = 0; node < STARPU_MAXNODES; node++)
+	{
 		struct starpu_multiformat_interface *local_interface =
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->cpu_ptr    = multiformat_interface->cpu_ptr;
 #ifdef STARPU_USE_CUDA
 			local_interface->cuda_ptr   = multiformat_interface->cuda_ptr;
@@ -138,7 +143,8 @@ static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t ho
 			local_interface->dev_handle = multiformat_interface->dev_handle;
 			local_interface->offset     = multiformat_interface->offset;
 		}
-		else {
+		else
+		{
 			local_interface->cpu_ptr    = NULL;
 #ifdef STARPU_USE_CUDA
 			local_interface->cuda_ptr   = NULL;
@@ -161,7 +167,8 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 				      uint32_t nobjects,
 				      struct starpu_multiformat_data_interface_ops *format_ops)
 {
-	struct starpu_multiformat_interface multiformat = {
+	struct starpu_multiformat_interface multiformat =
+	{
 		.cpu_ptr    = ptr,
 #ifdef STARPU_USE_CUDA
 		.cuda_ptr   = NULL,
@@ -169,7 +176,7 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 #ifdef STARPu_USE_OPENCL
 		.opencl_ptr = NULL,
 #endif
-		.nx         = nobjects, 
+		.nx         = nobjects,
 		.dev_handle = (uintptr_t) ptr,
 		.offset     = 0,
 		.ops        = format_ops
@@ -231,7 +238,8 @@ static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
 	multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free(multiformat_interface->cpu_ptr);
 			multiformat_interface->cpu_ptr = NULL;
@@ -261,14 +269,17 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 	ssize_t allocated_memory;
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
 			addr = (uintptr_t)malloc(allocated_memory);
-			if (!addr) {
+			if (!addr)
+			{
 				fail = 1;
 			}
-			else {
+			else
+			{
 				multiformat_interface->cpu_ptr = (void *) addr;
 				multiformat_interface->dev_handle = addr;
 			}
@@ -278,10 +289,12 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 			{
 				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
 				cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
-				if (STARPU_UNLIKELY(status)) {
+				if (STARPU_UNLIKELY(status))
+				{
 					STARPU_CUDA_REPORT_ERROR(status);
 				}
-				else {
+				else
+				{
 					multiformat_interface->cuda_ptr = (void *)addr;
 					multiformat_interface->dev_handle = addr;
 				}
@@ -296,10 +309,12 @@ static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32
 				allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
                                 ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
                                 addr = (uintptr_t)ptr;
-				if (ret) {
+				if (ret)
+				{
 					fail = 1;
 				}
-				else {
+				else
+				{
 					multiformat_interface->opencl_ptr = (void *)addr;
 					multiformat_interface->dev_handle = addr;
 
@@ -358,11 +373,13 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 
 	cudaError_t status;
 
-	switch (kind) {
+	switch (kind)
+	{
 		case cudaMemcpyHostToDevice:
 		{
 			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
-			if (src_multiformat->cuda_ptr == NULL) {
+			if (src_multiformat->cuda_ptr == NULL)
+			{
 				src_multiformat->cuda_ptr = malloc(size);
 				if (src_multiformat->cuda_ptr == NULL)
 					return -ENOMEM;
@@ -376,7 +393,8 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 			dst_multiformat->conversion_time = starpu_timing_now() - tmp;
 
 			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
-			if (STARPU_UNLIKELY(status)) {
+			if (STARPU_UNLIKELY(status))
+			{
 				STARPU_CUDA_REPORT_ERROR(status);
 			}
 			break;
@@ -387,12 +405,12 @@ static int copy_cuda_common(void *src_interface, unsigned src_node,
 			status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
 			if (STARPU_UNLIKELY(status))
 				STARPU_CUDA_REPORT_ERROR(status);
-		
+
 			void *buffers[1];
 			struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
 			buffers[0] = dst_interface;
 			cl->cpu_func(buffers, NULL);
-							  
+
 			break;
 		}
 		case cudaMemcpyDeviceToDevice:
@@ -431,11 +449,13 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 	size_t size;
 	cudaError_t status;
 
-	switch (kind) {
+	switch (kind)
+	{
 		case cudaMemcpyHostToDevice:
 		{
 			size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
-			if (src_multiformat->cuda_ptr == NULL) {
+			if (src_multiformat->cuda_ptr == NULL)
+			{
 				src_multiformat->cuda_ptr = malloc(size);
 				if (src_multiformat->cuda_ptr == NULL)
 					return -ENOMEM;
@@ -451,7 +471,8 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *
 
 			/* Actual copy from host to device */
 			status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
-			if (STARPU_UNLIKELY(status)) {
+			if (STARPU_UNLIKELY(status))
+			{
 				STARPU_CUDA_REPORT_ERROR(status);
 			}
 			break;
@@ -602,9 +623,11 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
 
 	size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
 
-	if (src_multiformat->opencl_ptr == NULL) {
+	if (src_multiformat->opencl_ptr == NULL)
+	{
 		src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
-		if (src_multiformat->opencl_ptr == NULL) {
+		if (src_multiformat->opencl_ptr == NULL)
+		{
 			return -ENOMEM;
 		}
 	}

+ 27 - 17
src/datawizard/interfaces/variable_interface.c

@@ -42,7 +42,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *_event);
 #endif
 
-static const struct starpu_data_copy_methods variable_copy_data_methods_s = {
+static const struct starpu_data_copy_methods variable_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -75,10 +76,11 @@ static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle);
 static int variable_compare(void *data_interface_a, void *data_interface_b);
 static void display_variable_interface(starpu_data_handle_t handle, FILE *f);
 #ifdef STARPU_USE_GORDON
-static int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 
-static struct starpu_data_interface_ops interface_variable_ops = {
+static struct starpu_data_interface_ops interface_variable_ops =
+{
 	.register_data_handle = register_variable_handle,
 	.allocate_data_on_node = allocate_variable_buffer_on_node,
 	.handle_to_pointer = variable_handle_to_pointer,
@@ -91,7 +93,7 @@ static struct starpu_data_interface_ops interface_variable_ops = {
 	.convert_to_gordon = convert_variable_to_gordon,
 #endif
 	.interfaceid = STARPU_VARIABLE_INTERFACE_ID,
-	.interface_size = sizeof(struct starpu_variable_interface), 
+	.interface_size = sizeof(struct starpu_variable_interface),
 	.display = display_variable_interface
 };
 
@@ -110,10 +112,12 @@ static void register_variable_handle(starpu_data_handle_t handle, uint32_t home_
 		struct starpu_variable_interface *local_interface = (struct starpu_variable_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->ptr = STARPU_VARIABLE_GET_PTR(data_interface);
 		}
-		else {
+		else
+		{
 			local_interface->ptr = 0;
 		}
 
@@ -122,7 +126,7 @@ static void register_variable_handle(starpu_data_handle_t handle, uint32_t home_
 }
 
 #ifdef STARPU_USE_GORDON
-int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
 {
 	*ptr = STARPU_VARIABLE_GET_PTR(interface);
 	(*ss).size = STARPU_VARIABLE_GET_ELEMSIZE(interface);
@@ -135,12 +139,13 @@ int convert_variable_to_gordon(void *data_interface, uint64_t *ptr, gordon_strid
 void starpu_variable_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
                         uintptr_t ptr, size_t elemsize)
 {
-	struct starpu_variable_interface variable = {
+	struct starpu_variable_interface variable =
+	{
 		.ptr = ptr,
 		.elemsize = elemsize
-	};	
+	};
 
-	starpu_data_register(handleptr, home_node, &variable, &interface_variable_ops); 
+	starpu_data_register(handleptr, home_node, &variable, &interface_variable_ops);
 }
 
 
@@ -156,7 +161,7 @@ static int variable_compare(void *data_interface_a, void *data_interface_b)
 
 	/* Two variables are considered compatible if they have the same size */
 	return (variable_a->elemsize == variable_b->elemsize);
-} 
+}
 
 static void display_variable_interface(starpu_data_handle_t handle, FILE *f)
 {
@@ -208,7 +213,8 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 	cudaError_t status;
 #endif
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc(elemsize);
 			if (!addr)
@@ -233,7 +239,8 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
                                 void *ptr;
                                 ret = _starpu_opencl_allocate_memory(&ptr, elemsize, CL_MEM_READ_WRITE);
                                 addr = (uintptr_t)ptr;
-				if (ret) {
+				if (ret)
+				{
 					fail = 1;
 				}
 				break;
@@ -251,14 +258,15 @@ static ssize_t allocate_variable_buffer_on_node(void *data_interface_, uint32_t
 
 	/* update the data properly in consequence */
 	variable_interface->ptr = addr;
-	
+
 	return allocated_memory;
 }
 
 static void free_variable_buffer_on_node(void *data_interface, uint32_t node)
 {
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void*)STARPU_VARIABLE_GET_PTR(data_interface));
 			break;
@@ -315,7 +323,8 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 	{
 		return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
 	}
-	else {
+	else
+	{
 #ifdef HAVE_CUDA_MEMCPY_PEER
 		int src_dev = _starpu_memory_node_to_devid(src_node);
 		int dst_dev = _starpu_memory_node_to_devid(dst_node);
@@ -384,7 +393,8 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					v
 	{
 		return copy_cuda_async_common(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice);
 	}
-	else {
+	else
+	{
 #ifdef HAVE_CUDA_MEMCPY_PEER
 		int src_dev = _starpu_memory_node_to_devid(src_node);
 		int dst_dev = _starpu_memory_node_to_devid(dst_node);

+ 42 - 38
src/datawizard/interfaces/vector_filters.c

@@ -24,7 +24,7 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 {
         struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface;
         struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface;
-	
+
 	uint32_t nx = vector_father->nx;
 	size_t elemsize = vector_father->elemsize;
 
@@ -33,16 +33,17 @@ void starpu_block_filter_func_vector(void *father_interface, void *child_interfa
 	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
 	size_t offset = id*chunk_size*elemsize;
 
-	uint32_t child_nx = 
+	uint32_t child_nx =
 	  STARPU_MIN(chunk_size, nx - id*chunk_size);
 
 	vector_child->nx = child_nx;
 	vector_child->elemsize = elemsize;
 
-	if (vector_father->ptr) {
-	  vector_child->ptr = vector_father->ptr + offset;
-	  vector_child->dev_handle = vector_father->dev_handle;
-	  vector_child->offset = vector_father->offset + offset;
+	if (vector_father->ptr)
+	{
+		vector_child->ptr = vector_father->ptr + offset;
+		vector_child->dev_handle = vector_father->dev_handle;
+		vector_child->offset = vector_father->offset + offset;
 	}
 }
 
@@ -51,7 +52,7 @@ void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_i
 {
         /* there cannot be more than 2 chunks */
         STARPU_ASSERT(id < 2);
-	
+
 	struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface;
 	struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface;
 
@@ -61,29 +62,31 @@ void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_i
 	size_t elemsize = vector_father->elemsize;
 
 	STARPU_ASSERT(length_first < nx);
-	
+
 	/* this is the first child */
-	if (id == 0) {
-	  vector_child->nx = length_first;
-	  vector_child->elemsize = elemsize;
-
-	  if (vector_father->ptr) {
-	    vector_child->ptr = vector_father->ptr;
-	    vector_child->offset = vector_father->offset;
-	    vector_child->dev_handle = vector_father->dev_handle;
-	  }
+	if (id == 0)
+	{
+		vector_child->nx = length_first;
+		vector_child->elemsize = elemsize;
+
+		if (vector_father->ptr)
+		{
+			vector_child->ptr = vector_father->ptr;
+			vector_child->offset = vector_father->offset;
+			vector_child->dev_handle = vector_father->dev_handle;
+		}
 	}
-
-	/* the second child */
-	else {
-	  vector_child->nx = nx - length_first;
-	  vector_child->elemsize = elemsize;
-
-	  if (vector_father->ptr) {
-	    vector_child->ptr = vector_father->ptr + length_first*elemsize;
-	    vector_child->offset = vector_father->offset + length_first*elemsize;
-	    vector_child->dev_handle = vector_father->dev_handle;
-	  }
+	else /* the second child */
+	{
+		vector_child->nx = nx - length_first;
+		vector_child->elemsize = elemsize;
+
+		if (vector_father->ptr)
+		{
+			vector_child->ptr = vector_father->ptr + length_first*elemsize;
+			vector_child->offset = vector_father->offset + length_first*elemsize;
+			vector_child->dev_handle = vector_father->dev_handle;
+		}
 	}
 }
 
@@ -103,15 +106,16 @@ void starpu_vector_list_filter_func(void *father_interface, void *child_interfac
 
 	vector_child->nx = chunk_size;
 	vector_child->elemsize = elemsize;
-	
-	if (vector_father->ptr) {
-	  /* compute the current position */
-	  unsigned i;
-	  for (i = 0; i < id; i++) 
-	    current_pos += length_tab[i];
-	  
-	  vector_child->ptr = vector_father->ptr + current_pos*elemsize;
-	  vector_child->offset = vector_father->offset + current_pos*elemsize;
-	  vector_child->dev_handle = vector_father->dev_handle;
+
+	if (vector_father->ptr)
+	{
+		/* compute the current position */
+		unsigned i;
+		for (i = 0; i < id; i++)
+			current_pos += length_tab[i];
+
+		vector_child->ptr = vector_father->ptr + current_pos*elemsize;
+		vector_child->offset = vector_father->offset + current_pos*elemsize;
+		vector_child->dev_handle = vector_father->dev_handle;
 	}
 }

+ 29 - 19
src/datawizard/interfaces/vector_interface.c

@@ -42,7 +42,8 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
-static const struct starpu_data_copy_methods vector_copy_data_methods_s = {
+static const struct starpu_data_copy_methods vector_copy_data_methods_s =
+{
 	.ram_to_ram = copy_ram_to_ram,
 	.ram_to_spu = NULL,
 #ifdef STARPU_USE_CUDA
@@ -75,10 +76,11 @@ static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle);
 static int vector_compare(void *data_interface_a, void *data_interface_b);
 static void display_vector_interface(starpu_data_handle_t handle, FILE *f);
 #ifdef STARPU_USE_GORDON
-static int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss); 
+static int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
 #endif
 
-static struct starpu_data_interface_ops interface_vector_ops = {
+static struct starpu_data_interface_ops interface_vector_ops =
+{
 	.register_data_handle = register_vector_handle,
 	.allocate_data_on_node = allocate_vector_buffer_on_node,
 	.handle_to_pointer = vector_handle_to_pointer,
@@ -91,7 +93,7 @@ static struct starpu_data_interface_ops interface_vector_ops = {
 	.convert_to_gordon = convert_vector_to_gordon,
 #endif
 	.interfaceid = STARPU_VECTOR_INTERFACE_ID,
-	.interface_size = sizeof(struct starpu_vector_interface), 
+	.interface_size = sizeof(struct starpu_vector_interface),
 	.display = display_vector_interface
 };
 
@@ -115,12 +117,14 @@ static void register_vector_handle(starpu_data_handle_t handle, uint32_t home_no
 		struct starpu_vector_interface *local_interface = (struct starpu_vector_interface *)
 			starpu_data_get_interface_on_node(handle, node);
 
-		if (node == home_node) {
+		if (node == home_node)
+		{
 			local_interface->ptr = vector_interface->ptr;
                         local_interface->dev_handle = vector_interface->dev_handle;
                         local_interface->offset = vector_interface->offset;
 		}
-		else {
+		else
+		{
 			local_interface->ptr = 0;
                         local_interface->dev_handle = 0;
                         local_interface->offset = 0;
@@ -132,10 +136,10 @@ static void register_vector_handle(starpu_data_handle_t handle, uint32_t home_no
 }
 
 #ifdef STARPU_USE_GORDON
-int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss) 
+int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss)
 {
 	struct starpu_vector_interface *vector_interface = interface;
-	
+
 	*ptr = vector_interface->ptr;
 	(*ss).size = vector_interface->nx * vector_interface->elemsize;
 
@@ -147,15 +151,16 @@ int convert_vector_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideS
 void starpu_vector_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
                         uintptr_t ptr, uint32_t nx, size_t elemsize)
 {
-	struct starpu_vector_interface vector = {
+	struct starpu_vector_interface vector =
+	{
 		.ptr = ptr,
 		.nx = nx,
 		.elemsize = elemsize,
                 .dev_handle = ptr,
                 .offset = 0
-	};	
+	};
 
-	starpu_data_register(handleptr, home_node, &vector, &interface_vector_ops); 
+	starpu_data_register(handleptr, home_node, &vector, &interface_vector_ops);
 }
 
 
@@ -243,7 +248,8 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 	cudaError_t status;
 #endif
 
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			addr = (uintptr_t)malloc(nx*elemsize);
 			if (!addr)
@@ -268,7 +274,8 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
                                 void *ptr;
                                 ret = _starpu_opencl_allocate_memory(&ptr, nx*elemsize, CL_MEM_READ_WRITE);
                                 addr = (uintptr_t)ptr;
-				if (ret) {
+				if (ret)
+				{
 					fail = 1;
 				}
 				break;
@@ -288,7 +295,7 @@ static ssize_t allocate_vector_buffer_on_node(void *data_interface_, uint32_t ds
 	vector_interface->ptr = addr;
         vector_interface->dev_handle = addr;
         vector_interface->offset = 0;
-	
+
 	return allocated_memory;
 }
 
@@ -301,7 +308,8 @@ static void free_vector_buffer_on_node(void *data_interface, uint32_t node)
 #endif
 
 	enum _starpu_node_kind kind = _starpu_get_node_kind(node);
-	switch(kind) {
+	switch(kind)
+	{
 		case STARPU_CPU_RAM:
 			free((void*)vector_interface->ptr);
 			break;
@@ -395,7 +403,8 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 	{
 		return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
 	}
-	else {
+	else
+	{
 #ifdef HAVE_CUDA_MEMCPY_PEER
 		return copy_cuda_peer_common(src_interface, src_node, dst_interface, dst_node, 0, 0);
 #else
@@ -433,13 +442,14 @@ static int copy_cuda_async_common(void *src_interface, unsigned src_node STARPU_
 	return -EAGAIN;
 }
 
-static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					void *dst_interface, unsigned dst_node, cudaStream_t stream)
+static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream)
 {
 	if (src_node == dst_node)
 	{
 		return copy_cuda_async_common(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice);
 	}
-	else {
+	else
+	{
 #ifdef HAVE_CUDA_MEMCPY_PEER
 		return copy_cuda_peer_common(src_interface, src_node, dst_interface, dst_node, 1, stream);
 #else
@@ -525,7 +535,7 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
 	size_t size = src_vector->nx*src_vector->elemsize;
 
-	err = clEnqueueCopyBuffer(cq, (cl_mem)src_vector->dev_handle, (cl_mem)dst_vector->dev_handle, src_vector->offset, dst_vector->offset, size, 0, NULL, NULL); 
+	err = clEnqueueCopyBuffer(cq, (cl_mem)src_vector->dev_handle, (cl_mem)dst_vector->dev_handle, src_vector->offset, dst_vector->offset, size, 0, NULL, NULL);
         if (STARPU_UNLIKELY(err))
                 STARPU_OPENCL_REPORT_ERROR(err);
 

+ 6 - 4
src/datawizard/interfaces/void_interface.c

@@ -33,7 +33,8 @@ static int dummy_cuda_copy_async(void *src_interface, unsigned src_node, void *d
 static int dummy_opencl_copy_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *_event);
 #endif
 
-static const struct starpu_data_copy_methods void_copy_data_methods_s = {
+static const struct starpu_data_copy_methods void_copy_data_methods_s =
+{
 	.ram_to_ram = dummy_copy,
 	.ram_to_spu = dummy_copy,
 #ifdef STARPU_USE_CUDA
@@ -64,7 +65,8 @@ static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle);
 static int void_compare(void *data_interface_a, void *data_interface_b);
 static void display_void_interface(starpu_data_handle_t handle, FILE *f);
 
-static struct starpu_data_interface_ops interface_void_ops = {
+static struct starpu_data_interface_ops interface_void_ops =
+{
 	.register_data_handle = register_void_handle,
 	.allocate_data_on_node = allocate_void_buffer_on_node,
 	.free_data_on_node = free_void_buffer_on_node,
@@ -73,7 +75,7 @@ static struct starpu_data_interface_ops interface_void_ops = {
 	.footprint = footprint_void_interface_crc32,
 	.compare = void_compare,
 	.interfaceid = STARPU_VOID_INTERFACE_ID,
-	.interface_size = 0, 
+	.interface_size = 0,
 	.display = display_void_interface
 };
 
@@ -87,7 +89,7 @@ static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UN
 /* declare a new data with the void interface */
 void starpu_void_data_register(starpu_data_handle_t *handleptr)
 {
-	starpu_data_register(handleptr, 0, NULL, &interface_void_ops); 
+	starpu_data_register(handleptr, 0, NULL, &interface_void_ops);
 }
 
 

+ 20 - 10
src/datawizard/memalloc.c

@@ -76,7 +76,8 @@ static void lock_all_subtree(starpu_data_handle_t handle)
 		while (_starpu_spin_trylock(&handle->header_lock))
 			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
 	}
-	else {
+	else
+	{
 		/* lock all sub-subtrees children */
 		unsigned child;
 		for (child = 0; child < handle->nchildren; child++)
@@ -93,7 +94,8 @@ static void unlock_all_subtree(starpu_data_handle_t handle)
 		/* this is a leaf */
 		_starpu_spin_unlock(&handle->header_lock);
 	}
-	else {
+	else
+	{
 		/* lock all sub-subtrees children
 		 * Note that this is done in the reverse order of the
 		 * lock_all_subtree so that we avoid deadlock */
@@ -143,7 +145,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 		struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
 
 		/* this is a leaf */
-		switch(src_replicate->state) {
+		switch(src_replicate->state)
+		{
 		case STARPU_OWNER:
 			/* the local node has the only copy */
 			/* the owner is now the destination_node */
@@ -176,7 +179,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 			cnt = 0;
 			for (i = 0; i < STARPU_MAXNODES; i++)
 			{
-				if (handle->per_node[i].state == STARPU_SHARED) {
+				if (handle->per_node[i].state == STARPU_SHARED)
+				{
 					cnt++;
 					last = i;
 				}
@@ -194,7 +198,8 @@ static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_n
 			break;
 		}
 	}
-	else {
+	else
+	{
 		/* lock all sub-subtrees children */
 		unsigned child;
 		for (child = 0; child < handle->nchildren; child++)
@@ -317,7 +322,8 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
 		_starpu_spin_unlock(&handle->header_lock);
 	}
-	else {
+	else
+	{
 		/* try to lock all the leafs of the subtree */
 		lock_all_subtree(handle);
 
@@ -562,7 +568,8 @@ static size_t free_potentially_in_use_mc(uint32_t node, unsigned force, size_t r
 				break;
 			#endif
 		}
-		else {
+		else
+		{
 			/* We must free the memory now: note that data
 			 * coherency is not maintained in that case ! */
 			freed += do_free_mem_chunk(mc, node);
@@ -663,7 +670,8 @@ void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned nod
 	{
 		next_mc = _starpu_mem_chunk_list_next(mc);
 
-		if (mc->data == handle) {
+		if (mc->data == handle)
+		{
 			/* we found the data */
 			mc->data_was_deleted = 1;
 
@@ -756,7 +764,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 	_STARPU_TRACE_END_ALLOC_REUSE(dst_node);
 #endif
 
-	do {
+	do
+	{
 		STARPU_ASSERT(handle->ops);
 		STARPU_ASSERT(handle->ops->allocate_data_on_node);
 
@@ -806,7 +815,8 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _s
 			_starpu_data_check_not_busy(handle);
 		}
 
-	} while((allocated_memory == -ENOMEM) && attempts++ < 2);
+	}
+	while((allocated_memory == -ENOMEM) && attempts++ < 2);
 
 	return allocated_memory;
 }

+ 1 - 1
src/datawizard/memalloc.h

@@ -33,7 +33,7 @@ LIST_TYPE(_starpu_mem_chunk,
 	size_t size;
 
 	uint32_t footprint;
-	
+
 	/* The footprint of the data is not sufficient to determine whether two
 	 * pieces of data have the same layout (there could be collision in the
 	 * hash function ...) so we still keep a copy of the actual layout (ie.

+ 10 - 9
src/datawizard/memory_nodes.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,15 +28,16 @@ static pthread_key_t memory_node_key;
 
 void _starpu_init_memory_nodes(void)
 {
-	/* there is no node yet, subsequent nodes will be 
+	/* there is no node yet, subsequent nodes will be
 	 * added using _starpu_register_memory_node */
 	descr.nnodes = 0;
 
 	pthread_key_create(&memory_node_key, NULL);
 
 	unsigned i;
-	for (i = 0; i < STARPU_MAXNODES; i++) {
-		descr.nodes[i] = STARPU_UNUSED; 
+	for (i = 0; i < STARPU_MAXNODES; i++)
+	{
+		descr.nodes[i] = STARPU_UNUSED;
 		descr.nworkers[i] = 0;
 	}
 
@@ -64,8 +65,8 @@ unsigned _starpu_get_local_memory_node(void)
 {
 	unsigned *memory_node;
 	memory_node = (unsigned *) pthread_getspecific(memory_node_key);
-	
-	/* in case this is called by the programmer, we assume the RAM node 
+
+	/* in case this is called by the programmer, we assume the RAM node
 	   is the appropriate memory node ... so we return 0 XXX */
 	if (STARPU_UNLIKELY(!memory_node))
 		return 0;
@@ -127,7 +128,7 @@ void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_
 {
 	unsigned cond_id;
 	unsigned nconds_total, nconds;
-	
+
 	pthread_rwlock_wrlock(&descr.conditions_rwlock);
 
 	/* we only insert the queue if it's not already in the list */
@@ -150,7 +151,7 @@ void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_
 	descr.condition_count[nodeid]++;
 
 	/* do we have to add it in the global list as well ? */
-	nconds_total = descr.total_condition_count; 
+	nconds_total = descr.total_condition_count;
 	for (cond_id = 0; cond_id < nconds_total; cond_id++)
 	{
 		if (descr.conditions_all[cond_id].cond == cond)
@@ -159,7 +160,7 @@ void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_
 			pthread_rwlock_unlock(&descr.conditions_rwlock);
 			return;
 		}
-	} 
+	}
 
 	/* it was not in the global list either */
 	descr.conditions_all[nconds_total].cond = cond;

+ 6 - 3
src/datawizard/memory_nodes.h

@@ -23,7 +23,8 @@
 #include <datawizard/coherency.h>
 #include <datawizard/memalloc.h>
 
-enum _starpu_node_kind {
+enum _starpu_node_kind
+{
 	STARPU_UNUSED     = 0x00,
 	STARPU_CPU_RAM    = 0x01,
 	STARPU_CUDA_RAM   = 0x02,
@@ -35,12 +36,14 @@ enum _starpu_node_kind {
 #define _STARPU_MEMORY_NODE_TUPLE_FIRST(tuple) (tuple & 0x0F)
 #define _STARPU_MEMORY_NODE_TUPLE_SECOND(tuple) (tuple & 0xF0)
 
-struct _starpu_cond_and_mutex {
+struct _starpu_cond_and_mutex
+{
         pthread_cond_t *cond;
         pthread_mutex_t *mutex;
 };
 
-struct _starpu_mem_node_descr {
+struct _starpu_mem_node_descr
+{
 	unsigned nnodes;
 	enum _starpu_node_kind nodes[STARPU_MAXNODES];
 

+ 26 - 24
src/datawizard/reduction.c

@@ -50,10 +50,11 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 	STARPU_ASSERT(init_cl);
 
 	_starpu_cl_func init_func = NULL;
-	
+
 	/* TODO Check that worker may execute the codelet */
 
-	switch (starpu_worker_get_type(workerid)) {
+	switch (starpu_worker_get_type(workerid))
+	{
 		case STARPU_CPU_WORKER:
 			init_func = init_cl->cpu_func;
 			break;
@@ -112,7 +113,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 			/* Make sure the replicate is not removed */
 			handle->per_worker[worker].refcnt++;
 
-			uint32_t home_node = starpu_worker_get_memory_node(worker); 
+			uint32_t home_node = starpu_worker_get_memory_node(worker);
 			starpu_data_register(&handle->reduction_tmp_handles[worker],
 				home_node, handle->per_worker[worker].data_interface, handle->ops);
 
@@ -120,7 +121,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
 			replicate_array[replicate_count++] = handle->reduction_tmp_handles[worker];
 		}
-		else {
+		else
+		{
 			handle->reduction_tmp_handles[worker] = NULL;
 		}
 	}
@@ -133,7 +135,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 #endif
 
 //	fprintf(stderr, "REDUX REFCNT = %d\n", handle->reduction_refcnt);
-	
+
 	if (replicate_count > 0)
 	{
 		/* Temporarily unlock the handle */
@@ -144,7 +146,7 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 		 * replicate */
 		struct starpu_task *last_replicate_deps[replicate_count];
 		memset(last_replicate_deps, 0, replicate_count*sizeof(struct starpu_task *));
-	
+
 		unsigned step = 1;
 		while (step <= replicate_count)
 		{
@@ -156,42 +158,42 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 					/* Perform the reduction between replicates i
 					 * and i+step and put the result in replicate i */
 					struct starpu_task *redux_task = starpu_task_create();
-		
+
 					redux_task->cl = handle->redux_cl;
 					STARPU_ASSERT(redux_task->cl);
-		
+
 					redux_task->buffers[0].handle = replicate_array[i];
 					redux_task->buffers[0].mode = STARPU_RW;
-		
+
 					redux_task->buffers[1].handle = replicate_array[i+step];
 					redux_task->buffers[1].mode = STARPU_R;
-	
+
 					redux_task->detach = 0;
-	
+
 					int ndeps = 0;
 					struct starpu_task *task_deps[2];
-	
+
 					if (last_replicate_deps[i])
 						task_deps[ndeps++] = last_replicate_deps[i];
-	
+
 					if (last_replicate_deps[i+step])
 						task_deps[ndeps++] = last_replicate_deps[i+step];
-	
+
 					/* i depends on this task */
 					last_replicate_deps[i] = redux_task;
-	
+
 					/* we don't perform the reduction until both replicates are ready */
-					starpu_task_declare_deps_array(redux_task, ndeps, task_deps); 
-		
+					starpu_task_declare_deps_array(redux_task, ndeps, task_deps);
+
 					int ret = starpu_task_submit(redux_task);
 					STARPU_ASSERT(!ret);
-		
+
 				}
 			}
 
 			step *= 2;
 		}
-	
+
 		struct starpu_task *redux_task = starpu_task_create();
 
 		/* Mark these tasks so that StarPU does not block them
@@ -222,23 +224,23 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 		for (replicate = 0; replicate < replicate_count; replicate++)
 		{
 			struct starpu_task *redux_task = starpu_task_create();
-	
+
 			/* Mark these tasks so that StarPU does not block them
 			 * when they try to access the handle (normal tasks are
 			 * data requests to that handle are frozen until the
 			 * data is coherent again). */
 			struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task);
 			j->reduction_task = 1;
-	
+
 			redux_task->cl = handle->redux_cl;
 			STARPU_ASSERT(redux_task->cl);
-	
+
 			redux_task->buffers[0].handle = handle;
 			redux_task->buffers[0].mode = STARPU_RW;
-	
+
 			redux_task->buffers[1].handle = replicate_array[replicate];
 			redux_task->buffers[1].mode = STARPU_R;
-	
+
 			int ret = starpu_task_submit(redux_task);
 			STARPU_ASSERT(!ret);
 		}

+ 1 - 1
src/datawizard/sort_data_handles.c

@@ -39,7 +39,7 @@ static void find_data_path(struct _starpu_data_state *data, unsigned path[])
 		path[depth - level - 1] = current->sibling_index;
 		current = data->father_handle;
 	}
-} 
+}
 
 static int _compar_data_paths(const unsigned pathA[], unsigned depthA,
 				const unsigned pathB[], unsigned depthB)

+ 15 - 8
src/datawizard/user_interactions.c

@@ -41,7 +41,8 @@ int starpu_data_request_allocation(starpu_data_handle_t handle, uint32_t node)
 	return 0;
 }
 
-struct user_interaction_wrapper {
+struct user_interaction_wrapper
+{
 	starpu_data_handle_t handle;
 	enum starpu_access_mode mode;
 	unsigned node;
@@ -159,7 +160,8 @@ int starpu_data_acquire_cb(starpu_data_handle_t handle,
 		int ret = starpu_task_submit(wrapper->pre_sync_task);
 		STARPU_ASSERT(!ret);
 	}
-	else {
+	else
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 
 		starpu_data_acquire_cb_pre_sync_callback(wrapper);
@@ -183,7 +185,7 @@ static inline void _starpu_data_acquire_continuation(void *arg)
 	struct _starpu_data_replicate *ram_replicate = &handle->per_node[0];
 
 	_starpu_fetch_data_on_node(handle, ram_replicate, wrapper->mode, 0, NULL, NULL);
-	
+
 	/* continuation of starpu_data_acquire */
 	_STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock);
 	wrapper->finished = 1;
@@ -198,7 +200,8 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
         _STARPU_LOG_IN();
 
 	/* unless asynchronous, it is forbidden to call this function from a callback or a codelet */
-	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) {
+	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
+	{
                 _STARPU_LOG_OUT_TAG("EDEADLK");
 		return -EDEADLK;
         }
@@ -240,7 +243,8 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 		STARPU_ASSERT(!ret);
 		//starpu_task_wait(wrapper.pre_sync_task);
 	}
-	else {
+	else
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
 	}
 
@@ -254,7 +258,8 @@ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_access_mode mod
 		int ret = _starpu_fetch_data_on_node(handle, ram_replicate, mode, 0, NULL, NULL);
 		STARPU_ASSERT(!ret);
 	}
-	else {
+	else
+	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&wrapper.lock);
 		while (!wrapper.finished)
 			_STARPU_PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock);
@@ -336,7 +341,8 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigne
 
 		_starpu_spin_lock(&handle->header_lock);
 
-		if (!async) {
+		if (!async)
+		{
 			replicate->refcnt--;
 			STARPU_ASSERT(replicate->refcnt >= 0);
 			STARPU_ASSERT(handle->busy_count > 0);
@@ -348,7 +354,8 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigne
 		_starpu_spin_unlock(&handle->header_lock);
 
 	}
-	else if (!async) {
+	else if (!async)
+	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock);
 		while (!wrapper->finished)
 			_STARPU_PTHREAD_COND_WAIT(&wrapper->cond, &wrapper->lock);

+ 9 - 6
src/datawizard/write_back.c

@@ -19,7 +19,8 @@
 #include <datawizard/write_back.h>
 #include <core/dependencies/data_concurrency.h>
 
-static void wt_callback(void *arg) {
+static void wt_callback(void *arg)
+{
 	starpu_data_handle_t handle = (starpu_data_handle_t) arg;
 
 	_starpu_spin_lock(&handle->header_lock);
@@ -27,10 +28,11 @@ static void wt_callback(void *arg) {
 	_starpu_spin_unlock(&handle->header_lock);
 }
 
-void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node, 
+void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node,
 				uint32_t write_through_mask)
 {
-	if ((write_through_mask & ~(1<<requesting_node)) == 0) {
+	if ((write_through_mask & ~(1<<requesting_node)) == 0)
+	{
 		/* nothing will be done ... */
 		return;
 	}
@@ -39,9 +41,10 @@ void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting
 	uint32_t node;
 	for (node = 0; node < _starpu_get_memory_nodes_count(); node++)
 	{
-		if (write_through_mask & (1<<node)) {
+		if (write_through_mask & (1<<node))
+		{
 			/* we need to commit the buffer on that node */
-			if (node != requesting_node) 
+			if (node != requesting_node)
 			{
 				while (_starpu_spin_trylock(&handle->header_lock))
 					_starpu_datawizard_progress(requesting_node, 1);
@@ -71,7 +74,7 @@ void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask)
 	handle->wt_mask = wt_mask;
 
 	/* in case the data has some children, set their wt_mask as well */
-	if (handle->nchildren > 0) 
+	if (handle->nchildren > 0)
 	{
 		unsigned child;
 		for (child = 0; child < handle->nchildren; child++)

+ 2 - 2
src/datawizard/write_back.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -24,7 +24,7 @@
 /* If a write-through mask is associated to that data handle, this propagates
  * the the current value of the data onto the different memory nodes in the
  * write_through_mask. */
-void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node, 
+void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting_node,
 					   uint32_t write_through_mask);
 
 #endif // __DW_WRITE_BACK_H__

+ 2 - 1
src/debug/starpu_debug_helpers.h

@@ -22,7 +22,8 @@
 #include <starpu_util.h>
 
 #ifdef __cplusplus
-extern "C" {
+extern "C"
+{
 #endif
 
 /* Perform a ping pong between the two memory nodes */

+ 70 - 51
src/debug/traces/starpu_fxt.c

@@ -97,7 +97,7 @@ static struct _starpu_symbol_name_list *symbol_list;
 
 LIST_TYPE(_starpu_communication,
 	unsigned comid;
-	float comm_start;	
+	float comm_start;
 	float bandwidth;
 	unsigned src_node;
 	unsigned dst_node;
@@ -175,7 +175,7 @@ static void update_accumulated_time(int worker, double sleep_time, double exec_t
 	 * point in our graph */
 	double elapsed = current_timestamp - last_activity_flush_timestamp[worker];
 	if (forceflush || (elapsed > ACTIVITY_PERIOD))
-	{		
+	{
 		if (activity_file)
 			fprintf(activity_file, "%d\t%f\t%f\t%f\t%f\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]);
 
@@ -197,7 +197,7 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 	if (out_paje_file)
 	{
 		fprintf(out_paje_file, "7       %f	%"PRIu64"      Mn      %sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), ev->param[0], prefix, options->file_prefix, ev->param[0]);
-	
+
 		if (!options->no_bus)
 			fprintf(out_paje_file, "13       %f bw %sMEMNODE%"PRIu64" 0.0\n", 0.0f, prefix, ev->param[0]);
 	}
@@ -205,10 +205,10 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
 static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
-	/* 
+	/*
 	   arg0 : type of worker (cuda, cpu ..)
 	   arg1 : memory node
-	   arg2 : thread id 
+	   arg2 : thread id
 	*/
 	char *prefix = options->file_prefix;
 
@@ -222,7 +222,8 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 	char *kindstr = "";
 	enum starpu_perf_archtype archtype = 0;
 
-	switch (ev->param[0]) {
+	switch (ev->param[0])
+	{
 		case _STARPU_FUT_APPS_KEY:
 			set_next_other_worker_color(workerid);
 			kindstr = "apps";
@@ -326,7 +327,8 @@ static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options
 		green = (1.0f * hash_symbol_green) / hash_sum;
 		blue = (1.0f * hash_symbol_blue) / hash_sum;
 	}
-	else {
+	else
+	{
 		/* Use the hardcoded value for execution mode */
 		red = 0.0f;
 		green = 0.6f;
@@ -384,7 +386,7 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 	float codelet_length = (end_codelet_time - last_codelet_start[worker]);
 
 	update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
-	
+
 	if (distrib_time)
 	fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%f\n", last_codelet_symbol[worker],
 				prefix, worker, codelet_size, codelet_hash, codelet_length);
@@ -417,11 +419,12 @@ static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *o
 	if (worker < 0)
 	{
 		if (out_paje_file)
-		fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
+			fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
 	}
-	else {
+	else
+	{
 		if (out_paje_file)
-		fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
+			fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
 	}
 }
 
@@ -433,7 +436,7 @@ static void handle_start_callback(struct fxt_ev_64 *ev, struct starpu_fxt_option
 		return;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
 }
 
 static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -444,7 +447,7 @@ static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		return;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
 }
 
 static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus)
@@ -455,8 +458,8 @@ static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		return;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
-				get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
+			get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
 }
 
 static double last_sleep_start[STARPU_NMAXWORKERS];
@@ -471,8 +474,8 @@ static void handle_start_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *
 	last_sleep_start[worker] = start_sleep_time;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
-				get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
+			get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
 }
 
 static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -484,8 +487,8 @@ static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 	float end_sleep_timestamp = get_event_time_stamp(ev, options);
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
-				end_sleep_timestamp, options->file_prefix, ev->param[0]);
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
+			end_sleep_timestamp, options->file_prefix, ev->param[0]);
 
 	double sleep_length = end_sleep_timestamp - last_sleep_start[worker];
 
@@ -531,7 +534,7 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 	unsigned dst = ev->param[1];
 	unsigned size = ev->param[2];
 	unsigned comid = ev->param[3];
-	
+
 	char *prefix = options->file_prefix;
 
 	if (!options->no_bus)
@@ -631,14 +634,14 @@ static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *opti
 		fprintf(out_paje_file, "13       %f ntask %ssched %f\n", current_timestamp, options->file_prefix, (float)curq_size);
 
 	if (activity_file)
-	fprintf(activity_file, "cnt_ready\t%f\t%d\n", current_timestamp, curq_size);
+		fprintf(activity_file, "cnt_ready\t%f\t%d\n", current_timestamp, curq_size);
 }
 
 static
 void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	float current_timestamp = get_event_time_stamp(ev, options);
-	unsigned long nsubmitted = ev->param[0]; 
+	unsigned long nsubmitted = ev->param[0];
 	if (activity_file)
 	fprintf(activity_file, "cnt_submitted\t%f\t%lu\n", current_timestamp, nsubmitted);
 }
@@ -648,8 +651,8 @@ static void handle_codelet_tag_deps(struct fxt_ev_64 *ev)
 	uint64_t child;
 	uint64_t father;
 
-	child = ev->param[0]; 
-	father = ev->param[1]; 
+	child = ev->param[0];
+	father = ev->param[1];
 
 	_starpu_fxt_dag_add_tag_deps(child, father);
 }
@@ -676,14 +679,16 @@ static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 
 	const char *colour;
 	char buffer[32];
-	if (options->per_task_colour) {
+	if (options->per_task_colour)
+	{
 		snprintf(buffer, 32, "#%x%x%x",
-			get_colour_symbol_red(name)/4,
-			get_colour_symbol_green(name)/4,
-			get_colour_symbol_blue(name)/4);
+			 get_colour_symbol_red(name)/4,
+			 get_colour_symbol_green(name)/4,
+			 get_colour_symbol_blue(name)/4);
 		colour = &buffer[0];
 	}
-	else {
+	else
+	{
 		colour= (worker < 0)?"#aaaaaa":get_worker_color(worker);
 	}
 
@@ -706,14 +711,16 @@ static void handle_tag_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 
 	const char *colour;
 	char buffer[32];
-	if (options->per_task_colour) {
+	if (options->per_task_colour)
+	{
 		snprintf(buffer, 32, "%.4f,%.4f,%.4f",
-			get_colour_symbol_red(name)/1024.0,
-			get_colour_symbol_green(name)/1024.0,
-			get_colour_symbol_blue(name)/1024.0);
+			 get_colour_symbol_red(name)/1024.0,
+			 get_colour_symbol_green(name)/1024.0,
+			 get_colour_symbol_blue(name)/1024.0);
 		colour = &buffer[0];
 	}
-	else {
+	else
+	{
 		colour= (worker < 0)?"0.0,0.0,0.0":get_worker_color(worker);
 	}
 
@@ -797,18 +804,20 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 	/* Open the trace file */
 	int fd_in;
 	fd_in = open(filename_in, O_RDONLY);
-	if (fd_in < 0) {
+	if (fd_in < 0)
+	{
 	        perror("open failed :");
 	        exit(-1);
 	}
 
 	static fxt_t fut;
 	fut = fxt_fdopen(fd_in);
-	if (!fut) {
+	if (!fut)
+	{
 	        perror("fxt_fdopen :");
 	        exit(-1);
 	}
-	
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
@@ -834,13 +843,16 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 	}
 
 	struct fxt_ev_64 ev;
-	while(1) {
+	while(1)
+	{
 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
-		if (ret != FXT_EV_OK) {
+		if (ret != FXT_EV_OK)
+		{
 			break;
 		}
 
-		switch (ev.code) {
+		switch (ev.code)
+		{
 			case _STARPU_FUT_WORKER_INIT_START:
 				handle_worker_init_start(&ev, options);
 				break;
@@ -1052,7 +1064,8 @@ void starpu_fxt_distrib_file_init(struct starpu_fxt_options *options)
 	{
 		distrib_time = fopen(options->distrib_time_path, "w+");
 	}
-	else {
+	else
+	{
 		distrib_time = NULL;
 	}
 }
@@ -1102,7 +1115,8 @@ void starpu_fxt_paje_file_init(struct starpu_fxt_options *options)
 
 		_starpu_fxt_write_paje_header(out_paje_file);
 	}
-	else {
+	else
+	{
 		out_paje_file = NULL;
 	}
 }
@@ -1119,18 +1133,20 @@ static uint64_t starpu_fxt_find_start_time(char *filename_in)
 	/* Open the trace file */
 	int fd_in;
 	fd_in = open(filename_in, O_RDONLY);
-	if (fd_in < 0) {
+	if (fd_in < 0)
+	{
 	        perror("open failed :");
 	        exit(-1);
 	}
 
 	static fxt_t fut;
 	fut = fxt_fdopen(fd_in);
-	if (!fut) {
+	if (!fut)
+	{
 	        perror("fxt_fdopen :");
 	        exit(-1);
 	}
-	
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
@@ -1166,7 +1182,8 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
 		starpu_fxt_parse_new_file(options->filenames[0], options);
 	}
-	else {
+	else
+	{
 		unsigned inputfile;
 
 		uint64_t offsets[64];
@@ -1181,7 +1198,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 		 * More generally:
 		 *	- psi_k(x) = x - offset_k
 		 */
-		
+
 		int unique_keys[64];
 		int rank_k[64];
 		uint64_t start_k[64];
@@ -1191,13 +1208,13 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
 		unsigned found_one_sync_point = 0;
 		int key = 0;
-		unsigned display_mpi = 0; 
+		unsigned display_mpi = 0;
 
 		/* Compute all start_k */
 		for (inputfile = 0; inputfile < options->ninputfiles; inputfile++)
 		{
 			uint64_t file_start = starpu_fxt_find_start_time(options->filenames[inputfile]);
-			start_k[inputfile] = file_start; 
+			start_k[inputfile] = file_start;
 		}
 
 		/* Compute all sync_k if they exist */
@@ -1212,14 +1229,16 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 				/* There was no sync point, we assume there is no offset */
 				sync_k_exists[inputfile] = 0;
 			}
-			else {
+			else
+			{
 				if (!found_one_sync_point)
 				{
 					key = unique_keys[inputfile];
 					display_mpi = 1;
 					found_one_sync_point = 1;
 				}
-				else {
+				else
+				{
 					if (key != unique_keys[inputfile])
 					{
 						fprintf(stderr, "Warning: traces are coming from different run so we will not try to display MPI communications.\n");

+ 9 - 8
src/debug/traces/starpu_fxt_dag.c

@@ -35,7 +35,8 @@ void _starpu_fxt_dag_init(char *out_path)
 
 	/* create a new file */
 	out_file = fopen(out_path, "w+");
-	if (!out_file) {
+	if (!out_file)
+	{
 		fprintf(stderr,"error while opening %s\n", out_path);
 		perror("fopen");
 		exit(1);
@@ -66,27 +67,27 @@ void _starpu_fxt_dag_terminate(void)
 void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n", 
-		(unsigned long long)father, (unsigned long long)child);
+		fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n",
+			(unsigned long long)father, (unsigned long long)child);
 }
 
 void _starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
-} 
+		fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
+}
 
 void _starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n", 
-		(unsigned long long)tag, color);
+		fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n",
+			(unsigned long long)tag, color);
 }
 
 void _starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
+		fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
 }
 
 void _starpu_fxt_dag_add_sync_point(void)

+ 17 - 9
src/debug/traces/starpu_fxt_mpi.c

@@ -21,7 +21,8 @@
 
 #include "starpu_fxt.h"
 
-struct mpi_transfer {
+struct mpi_transfer
+{
 	unsigned matched;
 	int other_rank; /* src for a recv, dest for a send */
 	int mpi_tag;
@@ -38,18 +39,20 @@ int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *ke
 	/* Open the trace file */
 	int fd_in;
 	fd_in = open(filename_in, O_RDONLY);
-	if (fd_in < 0) {
+	if (fd_in < 0)
+	{
 	        perror("open failed :");
 	        exit(-1);
 	}
 
 	static fxt_t fut;
 	fut = fxt_fdopen(fd_in);
-	if (!fut) {
+	if (!fut)
+	{
 	        perror("fxt_fdopen :");
 	        exit(-1);
 	}
-	
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
@@ -57,9 +60,11 @@ int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *ke
 
 	int func_ret = -1;
 	unsigned found = 0;
-	while(!found) {
+	while(!found)
+	{
 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
-		if (ret != FXT_EV_OK) {
+		if (ret != FXT_EV_OK)
+		{
 			fprintf(stderr, "no more block ...\n");
 			break;
 		}
@@ -116,7 +121,8 @@ void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 		{
 			mpi_sends_list_size[src] *= 2;
 		}
-		else {
+		else
+		{
 			mpi_sends_list_size[src] = 1;
 		}
 
@@ -140,7 +146,8 @@ void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst,
 		{
 			mpi_recvs_list_size[dst] *= 2;
 		}
-		else {
+		else
+		{
 			mpi_recvs_list_size[dst] = 1;
 		}
 
@@ -174,7 +181,8 @@ struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED,
 
 			all_previous_were_matched = 0;
 		}
-		else {
+		else
+		{
 			if (all_previous_were_matched)
 			{
 				/* All previous transfers are already matched,

+ 22 - 19
src/dolib.c

@@ -20,27 +20,30 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-int main(int argc, char *argv[]) {
-  char *prog, *arch, *def, *name, *lib;
-  char s[1024];
+int main(int argc, char *argv[])
+{
+	char *prog, *arch, *def, *name, *lib;
+	char s[1024];
 
-  if (argc != 6) {
-    fprintf(stderr,"bad number of arguments");
-    exit(EXIT_FAILURE);
-  }
+	if (argc != 6)
+	{
+		fprintf(stderr,"bad number of arguments");
+		exit(EXIT_FAILURE);
+	}
 
-  prog = argv[1];
-  arch = argv[2];
-  def = argv[3];
-  name = argv[4];
-  lib = argv[5];
+	prog = argv[1];
+	arch = argv[2];
+	def = argv[3];
+	name = argv[4];
+	lib = argv[5];
 
-  snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
-      prog, arch, def, name, lib);
-  if (system(s)) {
-    fprintf(stderr, "%s failed\n", s);
-    exit(EXIT_FAILURE);
-  }
+	snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
+		 prog, arch, def, name, lib);
+	if (system(s))
+	{
+		fprintf(stderr, "%s failed\n", s);
+		exit(EXIT_FAILURE);
+	}
 
-  exit(EXIT_SUCCESS);
+	exit(EXIT_SUCCESS);
 }

+ 18 - 13
src/drivers/cpu/driver_cpu.c

@@ -56,12 +56,14 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct _starpu_worker *cpu_
 	 * execute the kernel at all. */
 	if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
 	{
-		if (cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS) {
+		if (cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS)
+		{
 			_starpu_cl_func func = cl->cpu_func;
 			STARPU_ASSERT(func);
 			func(task->interfaces, task->cl_arg);
 		}
-		else {
+		else
+		{
 			/* _STARPU_DEBUG("CPU driver : running kernel (%d)\n", j->nimpl); */
 			_starpu_cl_func func = cl->cpu_funcs[j->nimpl];
 			STARPU_ASSERT(func);
@@ -131,8 +133,8 @@ void *_starpu_cpu_worker(void *arg)
 		_STARPU_PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex);
 
 		task = _starpu_pop_task(cpu_arg);
-	
-                if (!task) 
+
+                if (!task)
 		{
 			if (_starpu_worker_can_block(memnode))
 				_starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex);
@@ -142,13 +144,13 @@ void *_starpu_cpu_worker(void *arg)
 			continue;
 		};
 
-		_STARPU_PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);	
+		_STARPU_PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);
 
 		STARPU_ASSERT(task);
 		j = _starpu_get_job_associated_to_task(task);
-	
+
 		/* can a cpu perform that task ? */
-		if (!_STARPU_CPU_MAY_PERFORM(j)) 
+		if (!_STARPU_CPU_MAY_PERFORM(j))
 		{
 			/* put it and the end of the queue ... XXX */
 			_starpu_push_task(j, 0);
@@ -158,8 +160,8 @@ void *_starpu_cpu_worker(void *arg)
 		int rank = 0;
 		int is_parallel_task = (j->task_size > 1);
 
-		enum starpu_perf_archtype perf_arch; 
-	
+		enum starpu_perf_archtype perf_arch;
+
 		/* Get the rank in case it is a parallel task */
 		if (is_parallel_task)
 		{
@@ -179,7 +181,8 @@ void *_starpu_cpu_worker(void *arg)
 			cpu_arg->current_rank = rank;
 			perf_arch = combined_worker->perf_arch;
 		}
-		else {
+		else
+		{
 			cpu_arg->combined_workerid = cpu_arg->workerid;
 			cpu_arg->worker_size = 1;
 			cpu_arg->current_rank = 0;
@@ -192,12 +195,14 @@ void *_starpu_cpu_worker(void *arg)
 
 		_starpu_set_current_task(NULL);
 
-		if (res) {
-			switch (res) {
+		if (res)
+		{
+			switch (res)
+			{
 				case -EAGAIN:
 					_starpu_push_task(j, 0);
 					continue;
-				default: 
+				default:
 					assert(0);
 			}
 		}

+ 19 - 12
src/drivers/cuda/driver_cuda.c

@@ -63,7 +63,7 @@ static void limit_gpu_mem_if_needed(int devid)
 	_STARPU_DEBUG("CUDA device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
 			devid, (size_t)to_waste/(1024*1024), (size_t)limit, (size_t)totalGlobalMem/(1024*1024),
 			(size_t)(totalGlobalMem - to_waste)/(1024*1024));
-	
+
 	/* Allocate a large buffer to waste memory and constraint the amount of available memory. */
 	cures = cudaMalloc((void **)&wasted_memory[devid], to_waste);
 	if (STARPU_UNLIKELY(cures))
@@ -161,7 +161,8 @@ unsigned _starpu_get_cuda_device_count(void)
 	if (STARPU_UNLIKELY(cures))
 		 return 0;
 
-	if (cnt > STARPU_MAXCUDADEVS) {
+	if (cnt > STARPU_MAXCUDADEVS)
+	{
 		fprintf(stderr, "# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS);
 		cnt = STARPU_MAXCUDADEVS;
 	}
@@ -191,13 +192,14 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 	struct starpu_codelet *cl = task->cl;
 	STARPU_ASSERT(cl);
 
-	if (cl->model && cl->model->benchmarking) 
+	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 
 	ret = _starpu_fetch_task_input(task, mask);
-	if (ret != 0) {
+	if (ret != 0)
+	{
 		/* there was not enough memory, so the input of
-		 * the codelet cannot be fetched ... put the 
+		 * the codelet cannot be fetched ... put the
 		 * codelet back, and try it later */
 		return -EAGAIN;
 	}
@@ -219,12 +221,14 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 		STARPU_CUDA_REPORT_ERROR(cures);
 #endif
 
-	if (cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS) {
+	if (cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS)
+	{
 		_starpu_cl_func func = cl->cuda_func;
 		STARPU_ASSERT(func);
 		func(task->interfaces, task->cl_arg);
 	}
-	else {
+	else
+	{
 		/* _STARPU_DEBUG("Cuda driver : running kernel * (%d)\n", j->nimpl); */
 		_starpu_cl_func func = cl->cuda_funcs[j->nimpl];
 		STARPU_ASSERT(func);
@@ -305,8 +309,8 @@ void *_starpu_cuda_worker(void *arg)
 		_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
 
 		task = _starpu_pop_task(args);
-	
-                if (task == NULL) 
+
+                if (task == NULL)
 		{
 			if (_starpu_worker_can_block(memnode))
 				_starpu_block_worker(workerid, args->sched_cond, args->sched_mutex);
@@ -335,8 +339,10 @@ void *_starpu_cuda_worker(void *arg)
 
 		_starpu_set_current_task(NULL);
 
-		if (res) {
-			switch (res) {
+		if (res)
+		{
+			switch (res)
+			{
 				case -EAGAIN:
 					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
 					_starpu_push_task(j, 0);
@@ -372,7 +378,8 @@ void *_starpu_cuda_worker(void *arg)
 void starpu_cublas_report_error(const char *func, const char *file, int line, cublasStatus status)
 {
 	char *errormsg;
-	switch (status) {
+	switch (status)
+	{
 		case CUBLAS_STATUS_SUCCESS:
 			errormsg = "success";
 			break;

+ 12 - 8
src/drivers/driver_common/driver_common.c

@@ -39,13 +39,14 @@ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j
 		calibrate_model = 1;
 
 	args->status = STATUS_EXECUTING;
-	task->status = STARPU_TASK_RUNNING;	
+	task->status = STARPU_TASK_RUNNING;
 
-	if (rank == 0) {
+	if (rank == 0)
+	{
 		cl->per_worker_stats[workerid]++;
 
 		profiling_info = task->profiling_info;
-	
+
 		if ((profiling && profiling_info) || calibrate_model || starpu_top)
 		{
 			_starpu_clock_gettime(codelet_start);
@@ -75,7 +76,8 @@ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j,
 	if (cl->model && cl->model->benchmarking)
 		calibrate_model = 1;
 
-	if (rank == 0) {
+	if (rank == 0)
+	{
 		if ((profiling && profiling_info) || calibrate_model || starpu_top)
 			_starpu_clock_gettime(codelet_end);
 	}
@@ -112,7 +114,7 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 			memcpy(&profiling_info->end_time, codelet_end, sizeof(struct timespec));
 
 			profiling_info->workerid = workerid;
-			
+
 			_starpu_worker_update_profiling_info_executing(workerid, &measured_ts, 1,
 				profiling_info->used_cycles,
 				profiling_info->stall_cycles,
@@ -130,11 +132,13 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	if (!updated)
 		_starpu_worker_update_profiling_info_executing(workerid, NULL, 1, 0, 0, 0);
 
-	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking) {
+	if (profiling_info && profiling_info->power_consumed && cl->power_model && cl->power_model->benchmarking)
+	{
 		_starpu_update_perfmodel_history(j, j->task->cl->power_model,  perf_arch, worker_args->devid, profiling_info->power_consumed,j->nimpl);
-		}
+	}
 
-	if (j->task->cl->conversion_model) {
+	if (j->task->cl->conversion_model)
+	{
 		_starpu_update_perfmodel_history(j, j->task->cl->conversion_model, perf_arch,
 						 worker_args->devid, conversion_time, j->nimpl);
 	}

+ 47 - 31
src/drivers/gordon/driver_gordon.c

@@ -34,7 +34,8 @@ pthread_t progress_thread;
 pthread_cond_t progress_cond;
 pthread_mutex_t progress_mutex;
 
-struct gordon_task_wrapper_s {
+struct gordon_task_wrapper_s
+{
 	/* who has executed that ? */
 	struct _starpu_worker *worker;
 
@@ -53,7 +54,7 @@ void *gordon_worker_progress(void *arg)
 
 	/* fix the thread on the correct cpu */
 	struct _starpu_worker_set *gordon_set_arg = arg;
-	unsigned prog_thread_bind_id = 
+	unsigned prog_thread_bind_id =
 		(gordon_set_arg->workers[0].bindid + 1)%(gordon_set_arg->config->nhwcores);
 	_starpu_bind_thread_on_cpu(gordon_set_arg->config, prog_thread_bind_id);
 
@@ -62,8 +63,9 @@ void *gordon_worker_progress(void *arg)
 	_STARPU_PTHREAD_COND_SIGNAL(&progress_cond);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
 
-	while (1) {
-		/* the Gordon runtime needs to make sure that we poll it 
+	while (1)
+	{
+		/* the Gordon runtime needs to make sure that we poll it
 		 * so that we handle jobs that are done */
 
 		/* wait for one task termination */
@@ -89,10 +91,11 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
 	/* if it is non null, the argument buffer is considered
  	 * as the first read-only buffer */
-	if (task->cl_arg) {
+	if (task->cl_arg)
+	{
 		gordon_job->buffers[in] = (uint64_t)task->cl_arg;
 		gordon_job->ss[in].size = (uint32_t)task->cl_arg_size;
-		
+
 		nin++; in++;
 	}
 
@@ -103,7 +106,8 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 		struct starpu_buffer_descr *descr;
 		descr = &task->buffers[buffer];
 
-		switch (descr->mode) {
+		switch (descr->mode)
+		{
 			case STARPU_R:
 				nin++;
 				break;
@@ -123,7 +127,8 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 		struct starpu_buffer_descr *descr;
 		descr = &task->buffers[buffer];
 
-		switch (descr->mode) {
+		switch (descr->mode)
+		{
 			case STARPU_R:
 				gordon_buffer = in++;
 				break;
@@ -150,7 +155,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	}
 }
 
-/* we assume the data are already available so that the data interface fields are 
+/* we assume the data are already available so that the data interface fields are
  * already filled */
 static struct gordon_task_wrapper_s *starpu_to_gordon_job(struct _starpu_job *j)
 {
@@ -183,8 +188,8 @@ static void handle_terminated_job(struct _starpu_job *j)
 
 static void gordon_callback_list_func(void *arg)
 {
-	struct gordon_task_wrapper_s *task_wrapper = arg; 
-	struct _starpu_job_list *wrapper_list; 
+	struct gordon_task_wrapper_s *task_wrapper = arg;
+	struct _starpu_job_list *wrapper_list;
 
 	/* we don't know who will execute that codelet : so we actually defer the
  	 * execution of the StarPU codelet and the job termination later */
@@ -232,7 +237,7 @@ static void gordon_callback_list_func(void *arg)
 
 static void gordon_callback_func(void *arg)
 {
-	struct gordon_task_wrapper_s *task_wrapper = arg; 
+	struct gordon_task_wrapper_s *task_wrapper = arg;
 
 	/* we don't know who will execute that codelet : so we actually defer the
  	 * execution of the StarPU codelet and the job termination later */
@@ -254,7 +259,8 @@ int inject_task(struct _starpu_job *j, struct _starpu_worker *worker)
 	struct starpu_task *task = j->task;
 	int ret = _starpu_fetch_task_input(task, 0);
 
-	if (ret != 0) {
+	if (ret != 0)
+	{
 		/* there was not enough memory so the codelet cannot be executed right now ... */
 		/* push the codelet back and try another one ... */
 		return STARPU_TRYAGAIN;
@@ -277,15 +283,17 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 	struct _starpu_job *j;
 
 	// TODO !
-//	
+//
 //	for (j = _starpu_job_list_begin(list); j != _starpu_job_list_end(list); j = _starpu_job_list_next(j) )
 //	{
-//		if (!_STARPU_GORDON_MAY_PERFORM(j)) {
+//		if (!_STARPU_GORDON_MAY_PERFORM(j))
+//              {
 //			// XXX TODO
 //			ninvalids++;
 //			assert(0);
 //		}
-//		else {
+//		else
+//              {
 //			nvalids++;
 //		}
 //	}
@@ -293,7 +301,7 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 	nvalids = _job_list_size(list);
 //	_STARPU_DEBUG("nvalids %d \n", nvalids);
 
-	
+
 
 	struct gordon_task_wrapper_s *task_wrapper = malloc(sizeof(struct gordon_task_wrapper_s));
 	gordon_job_t *gordon_jobs = gordon_alloc_jobs(nvalids, 0);
@@ -303,7 +311,7 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 	task_wrapper->j = NULL;
 	task_wrapper->terminated = 0;
 	task_wrapper->worker = worker;
-	
+
 	unsigned index;
 	for (j = _starpu_job_list_begin(list), index = 0; j != _starpu_job_list_end(list); j = _starpu_job_list_next(j), index++)
 	{
@@ -322,7 +330,7 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 		/* we should not hardcore the memory node ... XXX */
 		unsigned memory_node = 0;
 		starpu_to_gordon_buffers(j, &gordon_jobs[index], memory_node);
-		
+
 	}
 
 	gordon_pushjob(task_wrapper->gordon_job, gordon_callback_list_func, task_wrapper);
@@ -333,12 +341,15 @@ int inject_task_list(struct _starpu_job_list *list, struct _starpu_worker *worke
 void *gordon_worker_inject(struct _starpu_worker_set *arg)
 {
 
-	while(_starpu_machine_is_running()) {
-		if (gordon_busy_enough()) {
+	while(_starpu_machine_is_running())
+	{
+		if (gordon_busy_enough())
+		{
 			/* gordon already has enough work, wait a little TODO */
 			_starpu_wait_on_sched_event();
 		}
-		else {
+		else
+		{
 #ifndef NOCHAIN
 			int ret = 0;
 #ifdef STARPU_DEVEL
@@ -382,7 +393,8 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 						list->_head = it_j;
 						it_j->_prev = NULL;
 					}
-					else {
+					else
+					{
 						/* this is the last chunk */
 						chunk_list = list;
 					}
@@ -390,7 +402,8 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 					ret = inject_task_list(chunk_list, &arg->workers[0]);
 				}
 			}
-			else {
+			else
+			{
 				_starpu_wait_on_sched_event();
 			}
 #else
@@ -398,18 +411,21 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 			struct _starpu_job *j;
 			j =  _starpu_pop_task();
 	//		_STARPU_DEBUG("pop task %p\n", j);
-			if (j) {
-				if (_STARPU_GORDON_MAY_PERFORM(j)) {
+			if (j)
+			{
+				if (_STARPU_GORDON_MAY_PERFORM(j))
+				{
 					/* inject that task */
 					/* XXX we hardcore &arg->workers[0] for now */
 					inject_task(j, &arg->workers[0]);
 				}
-				else {
+				else
+				{
 					_starpu_push_task(j, 0);
 				}
 			}
 #endif
-			
+
 		}
 	}
 
@@ -423,7 +439,7 @@ void *_starpu_gordon_worker(void *arg)
 	_starpu_bind_thread_on_cpu(gordon_set_arg->config, gordon_set_arg->workers[0].bindid);
 
 	/* TODO set_local_memory_node per SPU */
-	gordon_init(gordon_set_arg->nworkers);	
+	gordon_init(gordon_set_arg->nworkers);
 
 	/* NB: On SPUs, the worker_key is set to NULL since there is no point
 	 * in associating the PPU thread with a specific SPU (worker) while
@@ -448,7 +464,7 @@ void *_starpu_gordon_worker(void *arg)
 	/* launch the progression thread */
 	_STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL);
 	_STARPU_PTHREAD_COND_INIT(&progress_cond, NULL);
-	
+
 	pthread_create(&progress_thread, NULL, gordon_worker_progress, gordon_set_arg);
 
 	/* wait for the progression thread to be ready */
@@ -458,7 +474,7 @@ void *_starpu_gordon_worker(void *arg)
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex);
 
 	_STARPU_DEBUG("progress thread is running ... \n");
-	
+
 	/* tell the core that gordon is ready */
 	_STARPU_PTHREAD_MUTEX_LOCK(&gordon_set_arg->mutex);
 	gordon_set_arg->set_is_initialized = 1;

+ 54 - 28
src/drivers/opencl/driver_opencl.c

@@ -204,20 +204,25 @@ cl_int _starpu_opencl_copy_ram_to_opencl_async_sync(void *ptr, unsigned src_node
         err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
         if (event)
                 _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
-        if (STARPU_LIKELY(err == CL_SUCCESS)) {
+        if (STARPU_LIKELY(err == CL_SUCCESS))
+	{
                 *ret = (event == NULL) ? 0 : -EAGAIN;
                 return CL_SUCCESS;
         }
-        else {
-                if (event != NULL) {
+        else
+	{
+                if (event != NULL)
+		{
                         /* The asynchronous copy has failed, try to copy synchronously */
                         err = clEnqueueWriteBuffer(transfer_queues[worker->devid], buffer, CL_TRUE, offset, size, ptr, 0, NULL, NULL);
                 }
-                if (STARPU_LIKELY(err == CL_SUCCESS)) {
+                if (STARPU_LIKELY(err == CL_SUCCESS))
+		{
                         *ret = 0;
                         return CL_SUCCESS;
                 }
-                else {
+                else
+		{
                         STARPU_OPENCL_REPORT_ERROR(err);
                         return err;
                 }
@@ -253,19 +258,23 @@ cl_int _starpu_opencl_copy_opencl_to_ram_async_sync(cl_mem buffer, unsigned src_
         err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, blocking, offset, size, ptr, 0, NULL, event);
         if (event)
                 _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
-        if (STARPU_LIKELY(err == CL_SUCCESS)) {
+        if (STARPU_LIKELY(err == CL_SUCCESS))
+	{
                 *ret = (event == NULL) ? 0 : -EAGAIN;
                 return CL_SUCCESS;
         }
-        else {
+        else
+	{
                 if (event != NULL)
                         /* The asynchronous copy has failed, try to copy synchronously */
                         err = clEnqueueReadBuffer(transfer_queues[worker->devid], buffer, CL_TRUE, offset, size, ptr, 0, NULL, NULL);
-                if (STARPU_LIKELY(err == CL_SUCCESS)) {
+                if (STARPU_LIKELY(err == CL_SUCCESS))
+		{
                         *ret = 0;
                         return CL_SUCCESS;
                 }
-                else {
+                else
+		{
                         STARPU_OPENCL_REPORT_ERROR(err);
                         return err;
                 }
@@ -334,7 +343,8 @@ cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node STARP
 void _starpu_opencl_init(void)
 {
 	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
-        if (!init_done) {
+        if (!init_done)
+	{
                 cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
                 cl_uint nb_platforms;
                 cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
@@ -351,19 +361,23 @@ void _starpu_opencl_init(void)
                 // Get devices
                 nb_devices = 0;
                 {
-                        for (i=0; i<nb_platforms; i++) {
+                        for (i=0; i<nb_platforms; i++)
+			{
                                 cl_uint num;
 				int platform_valid = 1;
 				char name[1024], vendor[1024];
 
 				err = clGetPlatformInfo(platform_id[i], CL_PLATFORM_NAME, 1024, name, NULL);
-				if (err != CL_SUCCESS) {
+				if (err != CL_SUCCESS)
+				{
 					STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo NAME", err);
 					platform_valid = 0;
 				}
-				else {
+				else
+				{
 					err = clGetPlatformInfo(platform_id[i], CL_PLATFORM_VENDOR, 1024, vendor, NULL);
-					if (err != CL_SUCCESS) {
+					if (err != CL_SUCCESS)
+					{
 						STARPU_OPENCL_REPORT_ERROR_WITH_MSG("clGetPlatformInfo VENDOR", err);
 						platform_valid = 0;
 					}
@@ -374,12 +388,15 @@ void _starpu_opencl_init(void)
 				else
 					_STARPU_DEBUG("Platform invalid\n");
 #endif
-				if (platform_valid) {
+				if (platform_valid)
+				{
 					err = clGetDeviceIDs(platform_id[i], device_type, STARPU_MAXOPENCLDEVS-nb_devices, &devices[nb_devices], &num);
-					if (err == CL_DEVICE_NOT_FOUND) {
+					if (err == CL_DEVICE_NOT_FOUND)
+					{
 						_STARPU_DEBUG("  No devices detected on this platform\n");
 					}
-					else {
+					else
+					{
 						if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
 						_STARPU_DEBUG("  %d devices detected\n", num);
 						nb_devices += num;
@@ -391,13 +408,15 @@ void _starpu_opencl_init(void)
                 // Get location of OpenCl kernel source files
                 _starpu_opencl_program_dir = getenv("STARPU_OPENCL_PROGRAM_DIR");
 
-		if (nb_devices > STARPU_MAXOPENCLDEVS) {
+		if (nb_devices > STARPU_MAXOPENCLDEVS)
+		{
 			_STARPU_DISP("# Warning: %d OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices?\n", nb_devices, STARPU_MAXOPENCLDEVS);
 			nb_devices = STARPU_MAXOPENCLDEVS;
 		}
 
                 // initialise internal structures
-                for(i=0 ; i<nb_devices ; i++) {
+                for(i=0 ; i<nb_devices ; i++)
+		{
                         contexts[i] = NULL;
                         queues[i] = NULL;
                         transfer_queues[i] = NULL;
@@ -467,8 +486,8 @@ void *_starpu_opencl_worker(void *arg)
 		_STARPU_PTHREAD_MUTEX_LOCK(args->sched_mutex);
 
 		task = _starpu_pop_task(args);
-		
-                if (task == NULL) 
+
+                if (task == NULL)
 		{
 			if (_starpu_worker_can_block(memnode))
 				_starpu_block_worker(workerid, args->sched_cond, args->sched_mutex);
@@ -497,8 +516,10 @@ void *_starpu_opencl_worker(void *arg)
 
 		_starpu_set_current_task(NULL);
 
-                if (res) {
-			switch (res) {
+                if (res)
+		{
+			switch (res)
+			{
 				case -EAGAIN:
 					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
 					_starpu_push_task(j, 0);
@@ -527,7 +548,8 @@ static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
 {
 	int err;
 
-        if (!init_done) {
+        if (!init_done)
+	{
                 _starpu_opencl_init();
         }
 
@@ -541,7 +563,8 @@ static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname)
 
 unsigned _starpu_opencl_get_device_count(void)
 {
-        if (!init_done) {
+        if (!init_done)
+	{
                 _starpu_opencl_init();
         }
 	return nb_devices;
@@ -562,7 +585,8 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 	STARPU_ASSERT(cl);
 
 	ret = _starpu_fetch_task_input(task, mask);
-	if (ret != 0) {
+	if (ret != 0)
+	{
 		/* there was not enough memory, so the input of
 		 * the codelet cannot be fetched ... put the
 		 * codelet back, and try it later */
@@ -572,12 +596,14 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
 	_starpu_driver_start_job(args, j, &codelet_start, 0);
 
-	if (cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS) {
+	if (cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS)
+	{
 		_starpu_cl_func func = cl->opencl_func;
 		STARPU_ASSERT(func);
 		func(task->interfaces, task->cl_arg);
 	}
-	else {
+	else
+	{
 		/* _STARPU_DEBUG("OpenCL driver : running kernel (%d)\n", j->nimpl); */
 		_starpu_cl_func func = cl->opencl_funcs[j->nimpl];
 		STARPU_ASSERT(func);

+ 39 - 20
src/drivers/opencl/driver_opencl_utils.c

@@ -39,42 +39,50 @@ char *_starpu_opencl_program_dir;
 #define _STARPU_STRINGIFY(x) _STARPU_STRINGIFY_(x)
 
 static
-int _starpu_opencl_locate_file(const char *source_file_name, char *located_file_name, char *located_dir_name) {
+int _starpu_opencl_locate_file(const char *source_file_name, char *located_file_name, char *located_dir_name)
+{
 	int ret = EXIT_FAILURE;
 
         _STARPU_DEBUG("Trying to locate <%s>\n", source_file_name);
-        if (access(source_file_name, R_OK) == 0) {
+        if (access(source_file_name, R_OK) == 0)
+	{
                 strcpy(located_file_name, source_file_name);
 		ret = EXIT_SUCCESS;
         }
 
-	if (ret == EXIT_FAILURE && _starpu_opencl_program_dir) {
+	if (ret == EXIT_FAILURE && _starpu_opencl_program_dir)
+	{
 		sprintf(located_file_name, "%s/%s", _starpu_opencl_program_dir, source_file_name);
 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
 		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
 	}
 
-	if (ret == EXIT_FAILURE) {
+	if (ret == EXIT_FAILURE)
+	{
 		sprintf(located_file_name, "%s/%s", _STARPU_STRINGIFY(STARPU_OPENCL_DATADIR), source_file_name);
 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
 		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
 	}
 
-	if (ret == EXIT_FAILURE) {
+	if (ret == EXIT_FAILURE)
+	{
 		sprintf(located_file_name, "%s/%s", STARPU_SRC_DIR, source_file_name);
 		_STARPU_DEBUG("Trying to locate <%s>\n", located_file_name);
 		if (access(located_file_name, R_OK) == 0) ret = EXIT_SUCCESS;
 	}
 
-	if (ret == EXIT_FAILURE) {
+	if (ret == EXIT_FAILURE)
+	{
 		strcpy(located_file_name, "");
 		strcpy(located_dir_name, "");
 		_STARPU_ERROR("Cannot locate file <%s>\n", source_file_name);
 	}
-	else {
+	else
+	{
 		char *last = strrchr(located_file_name, '/');
 		if (!last) strcpy(located_dir_name, "");
-		else {
+		else
+		{
 			sprintf(located_dir_name, "%s", located_file_name);
 			located_dir_name[strlen(located_file_name)-strlen(last)+1] = '\0';
 		}
@@ -96,7 +104,8 @@ cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, str
         starpu_opencl_get_queue(devid, queue);
 
         program = opencl_programs->programs[devid];
-        if (!program) {
+        if (!program)
+	{
                 _STARPU_DISP("Program not available\n");
                 return CL_INVALID_PROGRAM;
         }
@@ -108,7 +117,8 @@ cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, str
 	return CL_SUCCESS;
 }
 
-cl_int starpu_opencl_release_kernel(cl_kernel kernel) {
+cl_int starpu_opencl_release_kernel(cl_kernel kernel)
+{
 	cl_int err;
 
 	err = clReleaseKernel(kernel);
@@ -133,8 +143,9 @@ char *_starpu_opencl_load_program_source(const char *filename)
         stat(filename, &statbuf);
         source = (char *) malloc(statbuf.st_size + 1);
 
-        for(c=fgetc(fh), x=0 ; c != EOF ; c = fgetc(fh), x++) {
-          source[x] = c;
+        for(c=fgetc(fh), x=0 ; c != EOF ; c = fgetc(fh), x++)
+	{
+		source[x] = c;
         }
         source[x] = '\0';
 
@@ -154,7 +165,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 
         nb_devices = _starpu_opencl_get_device_count();
         // Iterate over each device
-        for(dev = 0; dev < nb_devices; dev ++) {
+        for(dev = 0; dev < nb_devices; dev ++)
+	{
                 cl_device_id device;
                 cl_context   context;
                 cl_program   program;
@@ -162,7 +174,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 
                 starpu_opencl_get_device(dev, &device);
                 starpu_opencl_get_context(dev, &context);
-                if (context == NULL) {
+                if (context == NULL)
+		{
                         _STARPU_DEBUG("[%d] is not a valid OpenCL context\n", dev);
                         continue;
                 }
@@ -177,7 +190,8 @@ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, str
 
                 // Build the program executable
                 err = clBuildProgram(program, 1, &device, build_options, NULL, NULL);
-                if (err != CL_SUCCESS) {
+                if (err != CL_SUCCESS)
+		{
                         size_t len;
                         static char buffer[4096];
 
@@ -237,7 +251,8 @@ cl_int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs
 
         nb_devices = _starpu_opencl_get_device_count();
         // Iterate over each device
-        for(dev = 0; dev < nb_devices; dev ++) {
+        for(dev = 0; dev < nb_devices; dev ++)
+	{
                 if (opencl_programs->programs[dev])
                         clReleaseProgram(opencl_programs->programs[dev]);
         }
@@ -252,7 +267,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 #endif
 
 #ifdef CL_PROFILING_CLOCK_CYCLE_COUNT
-	if (starpu_profiling_status_get() && info) {
+	if (starpu_profiling_status_get() && info)
+	{
 		cl_int err;
 		unsigned int clock_cycle_count;
 		size_t size;
@@ -263,7 +279,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 	}
 #endif
 #ifdef CL_PROFILING_STALL_CYCLE_COUNT
-	if (starpu_profiling_status_get() && info) {
+	if (starpu_profiling_status_get() && info)
+	{
 		cl_int err;
 		unsigned int stall_cycle_count;
 		size_t size;
@@ -275,7 +292,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 	}
 #endif
 #ifdef CL_PROFILING_POWER_CONSUMED
-	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking))) {
+	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking)))
+	{
 		cl_int err;
 		double power_consumed;
 		size_t size;
@@ -293,7 +311,8 @@ int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
 void starpu_opencl_display_error(const char *func, const char *file, int line, const char* msg, cl_int status)
 {
 	const char *errormsg;
-	switch (status) {
+	switch (status)
+	{
 	case CL_SUCCESS:
 		errormsg = "success";
 		break;

+ 153 - 75
src/profiling/bound.c

@@ -45,7 +45,8 @@
  * - the total numer of tasks of a given kind is equal to the number run by the
  *   application.
  */
-struct bound_task_pool {
+struct bound_task_pool
+{
 	/* Which codelet has been executed */
 	struct starpu_codelet *cl;
 	/* Task footprint key */
@@ -77,7 +78,8 @@ struct bound_task_pool {
 /* Note: only task-task, implicit data dependencies or task-tag dependencies
  * are taken into account. Tags released in a callback or something like this
  * is not taken into account, only tags associated with a task are. */
-struct bound_task {
+struct bound_task
+{
 	/* Unique ID */
 	unsigned long id;
 	/* Tag ID, if any */
@@ -100,7 +102,8 @@ struct bound_task {
 	struct bound_task *next;
 };
 
-struct bound_tag_dep {
+struct bound_tag_dep
+{
 	starpu_tag_t tag;
 	starpu_tag_t dep_tag;
 	struct bound_tag_dep *next;
@@ -197,14 +200,18 @@ void _starpu_bound_record(struct _starpu_job *j)
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	/* Re-check, this time with mutex held */
-	if (!_starpu_bound_recording) {
+	if (!_starpu_bound_recording)
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		return;
 	}
 
-	if (recorddeps) {
+	if (recorddeps)
+	{
 		new_task(j);
-	} else {
+	}
+	else
+	{
 		struct bound_task_pool *tp;
 
 		_starpu_compute_buffers_footprint(j);
@@ -216,7 +223,8 @@ void _starpu_bound_record(struct _starpu_job *j)
 				if (tp->cl == j->task->cl && tp->footprint == j->footprint)
 					break;
 
-		if (!tp) {
+		if (!tp)
+		{
 			tp = (struct bound_task_pool *) malloc(sizeof(*tp));
 			tp->cl = j->task->cl;
 			tp->footprint = j->footprint;
@@ -241,7 +249,8 @@ void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id)
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	/* Re-check, this time with mutex held */
-	if (!_starpu_bound_recording || !recorddeps) {
+	if (!_starpu_bound_recording || !recorddeps)
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		return;
 	}
@@ -266,7 +275,8 @@ void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j)
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	/* Re-check, this time with mutex held */
-	if (!_starpu_bound_recording || !recorddeps) {
+	if (!_starpu_bound_recording || !recorddeps)
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		return;
 	}
@@ -301,14 +311,16 @@ void _starpu_bound_job_id_dep(struct _starpu_job *j, unsigned long id)
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	/* Re-check, this time with mutex held */
-	if (!_starpu_bound_recording || !recorddeps) {
+	if (!_starpu_bound_recording || !recorddeps)
+	{
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		return;
 	}
 
 	new_task(j);
 	dep_t = find_job(id);
-	if (!dep_t) {
+	if (!dep_t)
+	{
 		fprintf(stderr,"dependency %lu not found !\n", id);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 		return;
@@ -326,12 +338,16 @@ void starpu_bound_stop(void)
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 }
 
-static void _starpu_get_tasks_times(int nw, int nt, double *times) {
+static void _starpu_get_tasks_times(int nw, int nt, double *times)
+{
 	struct bound_task_pool *tp;
 	int w, t;
-	for (w = 0; w < nw; w++) {
-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
-			struct _starpu_job j = {
+	for (w = 0; w < nw; w++)
+	{
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
+			struct _starpu_job j =
+			{
 				.footprint = tp->footprint,
 				.footprint_is_computed = 1,
 			};
@@ -345,9 +361,11 @@ static void _starpu_get_tasks_times(int nw, int nt, double *times) {
 	}
 }
 
-static int ancestor(struct bound_task *child, struct bound_task *parent) {
+static int ancestor(struct bound_task *child, struct bound_task *parent)
+{
 	int i;
-	for (i = 0; i < child->depsn; i++) {
+	for (i = 0; i < child->depsn; i++)
+	{
 		if (parent == child->deps[i])
 			return 1;
 		if (ancestor(child->deps[i], parent))
@@ -356,17 +374,20 @@ static int ancestor(struct bound_task *child, struct bound_task *parent) {
 	return 0;
 }
 
-void starpu_bound_print_dot(FILE *output) {
+void starpu_bound_print_dot(FILE *output)
+{
 	struct bound_task *t;
 	struct bound_tag_dep *td;
 	int i;
 
-	if (!recorddeps) {
+	if (!recorddeps)
+	{
 		fprintf(output, "Not supported\n");
 		return;
 	}
 	fprintf(output, "strict digraph bounddeps {\n");
-	for (t = tasks; t; t = t->next) {
+	for (t = tasks; t; t = t->next)
+	{
 		fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, t->cl->model->symbol);
 		for (i = 0; i < t->depsn; i++)
 			fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i]->id, t->id);
@@ -388,20 +409,25 @@ void starpu_bound_print_lp(FILE *output)
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	nw = starpu_worker_get_count();
 
-	if (recorddeps) {
+	if (recorddeps)
+	{
 		struct bound_task *t1, *t2;
 		struct bound_tag_dep *td;
 		int i;
 
 		nt = 0;
-		for (t1 = tasks; t1; t1 = t1->next) {
-			struct _starpu_job j = {
+		for (t1 = tasks; t1; t1 = t1->next)
+		{
+			struct _starpu_job j =
+			{
 				.footprint = t1->footprint,
 				.footprint_is_computed = 1,
 			};
-			for (w = 0; w < nw; w++) {
+			for (w = 0; w < nw; w++)
+			{
 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
-				if (t1->duration[arch] == 0.) {
+				if (t1->duration[arch] == 0.)
+				{
 					double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl);
 					if (length == -1.0)
 						/* Avoid problems with binary coding of doubles */
@@ -422,8 +448,10 @@ void starpu_bound_print_lp(FILE *output)
 			fprintf(output, "c%lu <= tmax;\n", t1->id);
 
 		fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n");
-		for (t1 = tasks; t1; t1 = t1->next) {
-			for (w = 0; w < nw; w++) {
+		for (t1 = tasks; t1; t1 = t1->next)
+		{
+			for (w = 0; w < nw; w++)
+			{
 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
 				if (t1->duration[arch] != -1.0)
 					fprintf(output, " +t%luw%d", t1->id, w);
@@ -433,9 +461,11 @@ void starpu_bound_print_lp(FILE *output)
 
 		fprintf(output, "\n/* Completion time is start time plus computation time */\n");
 		fprintf(output, "/* According to where the task is indeed executed */\n");
-		for (t1 = tasks; t1; t1 = t1->next) {
+		for (t1 = tasks; t1; t1 = t1->next)
+		{
 			fprintf(output, "/* %s %x */\tc%lu = s%lu", t1->cl->model->symbol, (unsigned) t1->footprint, t1->id, t1->id);
-			for (w = 0; w < nw; w++) {
+			for (w = 0; w < nw; w++)
+			{
 				enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
 				if (t1->duration[arch] != -1.0)
 					fprintf(output, " + %f t%luw%d", t1->duration[arch], t1->id, w);
@@ -451,7 +481,8 @@ void starpu_bound_print_lp(FILE *output)
 
 		fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */");
 		for (t1 = tasks; t1; t1 = t1->next)
-			if (t1->use_tag) {
+			if (t1->use_tag)
+			{
 				for (w = 0; w < nw; w++)
 					fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id);
 			}
@@ -463,13 +494,17 @@ void starpu_bound_print_lp(FILE *output)
 /* TODO: factorize ancestor calls */
 		fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n");
 		fprintf(output, "   one is started after the other's completion */\n");
-		for (t1 = tasks; t1; t1 = t1->next) {
+		for (t1 = tasks; t1; t1 = t1->next)
+		{
 			for (t2 = t1->next; t2; t2 = t2->next)
 			{
-				if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
-					for (w = 0; w < nw; w++) {
+				if (!ancestor(t1, t2) && !ancestor(t2, t1))
+				{
+					for (w = 0; w < nw; w++)
+					{
 						enum starpu_perf_archtype arch = starpu_worker_get_perf_archtype(w);
-						if (t1->duration[arch] != -1.0) {
+						if (t1->duration[arch] != -1.0)
+						{
 							fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n",
 									t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id);
 							fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n",
@@ -491,26 +526,32 @@ void starpu_bound_print_lp(FILE *output)
 				}
 #endif
 
-		if (recordprio) {
+		if (recordprio)
+		{
 			fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n");
 			fprintf(output, " * more prioritized first */\n");
-			for (t1 = tasks; t1; t1 = t1->next) {
+			for (t1 = tasks; t1; t1 = t1->next)
+			{
 				for (t2 = t1->next; t2; t2 = t2->next)
 				{
 					if (!ancestor(t1, t2) && !ancestor(t2, t1)
-					     && t1->priority != t2->priority) {
-						if (t1->priority > t2->priority) {
+					     && t1->priority != t2->priority)
+					{
+						if (t1->priority > t2->priority)
+						{
 							/* Either t2 is scheduled before t1, but then it
 							   needs to be scheduled before some t dep finishes */
 
 							/* One of the t1 deps to give the maximum start time for t2 */
-							if (t1->depsn > 1) {
+							if (t1->depsn > 1)
+							{
 								for (i = 0; i < t1->depsn; i++)
 									fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i);
 								fprintf(output, " = 1;\n");
 							}
 
-							for (i = 0; i < t1->depsn; i++) {
+							for (i = 0; i < t1->depsn; i++)
+							{
 								fprintf(output, "c%lu - s%lu >= ", t1->deps[i]->id, t2->id);
 								if (t1->depsn > 1)
 									/* Only checks this when it's this dependency that is chosen */
@@ -524,18 +565,22 @@ void starpu_bound_print_lp(FILE *output)
 
 							/* Or t2 is scheduled after t1 is.  */
 							fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id);
-						} else {
+						}
+						else
+						{
 							/* Either t1 is scheduled before t2, but then it
 							   needs to be scheduled before some t2 dep finishes */
 
 							/* One of the t2 deps to give the maximum start time for t1 */
-							if (t2->depsn > 1) {
+							if (t2->depsn > 1)
+							{
 								for (i = 0; i < t2->depsn; i++)
 									fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i);
 								fprintf(output, " = 1;\n");
 							}
 
-							for (i = 0; i < t2->depsn; i++) {
+							for (i = 0; i < t2->depsn; i++)
+							{
 								fprintf(output, "c%lu - s%lu >= ", t2->deps[i]->id, t1->id);
 								if (t2->depsn > 1)
 									/* Only checks this when it's this dependency that is chosen */
@@ -555,14 +600,19 @@ void starpu_bound_print_lp(FILE *output)
 
 		for (t1 = tasks; t1; t1 = t1->next)
 			for (t2 = t1->next; t2; t2 = t2->next)
-				if (!ancestor(t1, t2) && !ancestor(t2, t1)) {
+				if (!ancestor(t1, t2) && !ancestor(t2, t1))
+				{
 					fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id);
-					if (recordprio && t1->priority != t2->priority) {
-						if (t1->priority > t2->priority) {
+					if (recordprio && t1->priority != t2->priority)
+					{
+						if (t1->priority > t2->priority)
+						{
 							if (t1->depsn > 1)
 								for (i = 0; i < t1->depsn; i++)
 									fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i);
-						} else {
+						}
+						else
+						{
 							if (t2->depsn > 1)
 								for (i = 0; i < t2->depsn; i++)
 									fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i);
@@ -573,7 +623,9 @@ void starpu_bound_print_lp(FILE *output)
 		for (t1 = tasks; t1; t1 = t1->next)
 			for (w = 0; w < nw; w++)
 				fprintf(output, "bin t%luw%d;\n", t1->id, w);
-	} else {
+	}
+	else
+	{
 		struct bound_task_pool *tp;
 		nt = 0;
 		for (tp = task_pools; tp; tp = tp->next)
@@ -589,11 +641,13 @@ void starpu_bound_print_lp(FILE *output)
 			fprintf(output, "min: tmax;\n\n");
 
 			fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n");
-			for (w = 0; w < nw; w++) {
+			for (w = 0; w < nw; w++)
+			{
 				char name[32];
 				starpu_worker_get_name(w, name, sizeof(name));
 				fprintf(output, "/* worker %s */\n0", name);
-				for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+				{
 					if (times[w*nt+t] != -1.0)
 						fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t);
 				}
@@ -602,7 +656,8 @@ void starpu_bound_print_lp(FILE *output)
 			fprintf(output, "\n");
 
 			fprintf(output, "/* And we have to have computed exactly all tasks */\n");
-			for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+			{
 				fprintf(output, "/* task %s key %x */\n0", tp->cl->model->symbol, (unsigned) tp->footprint);
 				for (w = 0; w < nw; w++)
 					if (times[w*nt+t] != -1.0)
@@ -619,7 +674,8 @@ void starpu_bound_print_lp(FILE *output)
 			fprintf(output, "/* int ");
 			int first = 1;
 			for (w = 0; w < nw; w++)
-				for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+				for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+				{
 					if (!first)
 						fprintf(output, ",");
 					else
@@ -643,7 +699,8 @@ void starpu_bound_print_mps(FILE *output)
 	int nw; /* Number of different workers */
 	int t, w;
 
-	if (recorddeps) {
+	if (recorddeps)
+	{
 		fprintf(output, "Not supported\n");
 		return;
 	}
@@ -654,7 +711,6 @@ void starpu_bound_print_mps(FILE *output)
 	nt = 0;
 	for (tp = task_pools; tp; tp = tp->next)
 		nt++;
-
 	{
 		double times[nw*nt];
 
@@ -668,7 +724,8 @@ void starpu_bound_print_mps(FILE *output)
 		fprintf(output, " N  TMAX\n");
 
 		fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
-		for (w = 0; w < nw; w++) {
+		for (w = 0; w < nw; w++)
+		{
 			char name[32];
 			starpu_worker_get_name(w, name, sizeof(name));
 			fprintf(output, "* worker %s\n", name);
@@ -676,7 +733,8 @@ void starpu_bound_print_mps(FILE *output)
 		}
 
 		fprintf(output, "\n* And we have to have computed exactly all tasks\n");
-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
 			fprintf(output, "* task %s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
 			fprintf(output, " E  T%d\n", t);
 		}
@@ -686,7 +744,8 @@ void starpu_bound_print_mps(FILE *output)
 		fprintf(output, "\n* Execution times and completion of all tasks\n");
 		for (w = 0; w < nw; w++)
 			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
-				if (times[w*nt+t] != -1.0) {
+				if (times[w*nt+t] != -1.0)
+				{
 					char name[9];
 					snprintf(name, sizeof(name), "W%dT%d", w, t);
 					fprintf(stderr,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
@@ -751,7 +810,8 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 		glp_set_obj_coef(lp, nw*nt+1, 1.);
 
 		for (w = 0; w < nw; w++)
-			for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+			{
 				char name[32];
 				snprintf(name, sizeof(name), "w%dt%dn", w, t);
 				glp_set_col_name(lp, colnum(w, t), name);
@@ -763,23 +823,27 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
 		/* Total worker execution time */
 		glp_add_rows(lp, nw);
-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
 			int someone = 0;
 			for (w = 0; w < nw; w++)
 				if (times[w*nt+t] != -1.)
 					someone = 1;
-			if (!someone) {
+			if (!someone)
+			{
 				/* This task does not have any performance model at all, abort */
 				glp_delete_prob(lp);
 				return NULL;
 			}
 		}
-		for (w = 0; w < nw; w++) {
+		for (w = 0; w < nw; w++)
+		{
 			char name[32], title[64];
 			starpu_worker_get_name(w, name, sizeof(name));
 			snprintf(title, sizeof(title), "worker %s", name);
 			glp_set_row_name(lp, w+1, title);
-			for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+			{
 				ia[n] = w+1;
 				ja[n] = colnum(w, t);
 				if (times[w*nt+t] == -1.)
@@ -798,12 +862,14 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 
 		/* Total task completion */
 		glp_add_rows(lp, nt);
-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
 			char name[32], title[64];
 			starpu_worker_get_name(w, name, sizeof(name));
 			snprintf(title, sizeof(title), "task %s key %x", tp->cl->model->symbol, (unsigned) tp->footprint);
 			glp_set_row_name(lp, nw+t+1, title);
-			for (w = 0; w < nw; w++) {
+			for (w = 0; w < nw; w++)
+			{
 				ia[n] = nw+t+1;
 				ja[n] = colnum(w, t);
 				ar[n] = 1;
@@ -821,12 +887,14 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 	glp_init_smcp(&parm);
 	parm.msg_lev = GLP_MSG_OFF;
 	ret = glp_simplex(lp, &parm);
-	if (ret) {
+	if (ret)
+	{
 		glp_delete_prob(lp);
 		lp = NULL;
 		return NULL;
 	}
-	if (integer) {
+	if (integer)
+	{
 		glp_iocp iocp;
 		glp_init_iocp(&iocp);
 		iocp.msg_lev = GLP_MSG_OFF;
@@ -837,16 +905,19 @@ static glp_prob *_starpu_bound_glp_resolve(int integer)
 }
 #endif /* HAVE_GLPK_H */
 
-void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
+void starpu_bound_print(FILE *output, int integer __attribute__ ((unused)))
+{
 #ifdef HAVE_GLPK_H
-	if (recorddeps) {
+	if (recorddeps)
+	{
 		fprintf(output, "Not supported\n");
 		return;
 	}
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	glp_prob *lp = _starpu_bound_glp_resolve(integer);
-	if (lp) {
+	if (lp)
+	{
 		struct bound_task_pool * tp;
 		int t, w;
 		int nw; /* Number of different workers */
@@ -861,7 +932,8 @@ void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
 
 		fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax);
 
-		for (t = 0, tp = task_pools; tp; t++, tp = tp->next) {
+		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
+		{
 			fprintf(output, "%s key %x\n", tp->cl->model->symbol, (unsigned) tp->footprint);
 			for (w = 0; w < nw; w++)
 				if (integer)
@@ -872,7 +944,9 @@ void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
 		}
 
 		glp_delete_prob(lp);
-	} else {
+	}
+	else
+	{
 		fprintf(stderr, "Simplex failed\n");
 	}
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
@@ -881,23 +955,27 @@ void starpu_bound_print(FILE *output, int integer __attribute__ ((unused))) {
 #endif /* HAVE_GLPK_H */
 }
 
-void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused))) {
+void starpu_bound_compute(double *res, double *integer_res __attribute__ ((unused)), int integer __attribute__ ((unused)))
+{
 #ifdef HAVE_GLPK_H
 	double ret;
 
-	if (recorddeps) {
+	if (recorddeps)
+	{
 		*res = 0.;
 		return;
 	}
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	glp_prob *lp = _starpu_bound_glp_resolve(integer);
-	if (lp) {
+	if (lp)
+	{
 		ret = glp_get_obj_val(lp);
 		if (integer)
 			*integer_res = glp_mip_obj_val(lp);
 		glp_delete_prob(lp);
-	} else
+	}
+	else
 		ret = 0.;
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 	*res = ret;

+ 17 - 13
src/profiling/profiling.c

@@ -39,7 +39,8 @@ static struct timespec executing_start_date[STARPU_NMAXWORKERS];
 
 /* Store the busid of the different (src, dst) pairs. busid_matrix[src][dst]
  * contains the busid of (src, dst) or -1 if the bus was not registered. */
-struct node_pair {
+struct node_pair
+{
 	int src;
 	int dst;
 	struct starpu_bus_profiling_info *bus_info;
@@ -47,7 +48,7 @@ struct node_pair {
 
 static int busid_matrix[STARPU_MAXNODES][STARPU_MAXNODES];
 static struct starpu_bus_profiling_info bus_profiling_info[STARPU_MAXNODES][STARPU_MAXNODES];
-static struct node_pair busid_to_node_pair[STARPU_MAXNODES*STARPU_MAXNODES]; 
+static struct node_pair busid_to_node_pair[STARPU_MAXNODES*STARPU_MAXNODES];
 static unsigned busid_cnt = 0;
 
 static void _starpu_bus_reset_profiling_info(struct starpu_bus_profiling_info *bus_info);
@@ -148,7 +149,7 @@ static void _starpu_worker_reset_profiling_info_with_lock(int workerid)
 	worker_info[workerid].used_cycles = 0;
 	worker_info[workerid].stall_cycles = 0;
 	worker_info[workerid].power_consumed = 0;
-	
+
 	/* We detect if the worker is already sleeping or doing some
 	 * computation */
 	enum _starpu_worker_status status = _starpu_worker_get_status(workerid);
@@ -158,7 +159,8 @@ static void _starpu_worker_reset_profiling_info_with_lock(int workerid)
 		worker_registered_sleeping_start[workerid] = 1;
 		_starpu_clock_gettime(&sleeping_start_date[workerid]);
 	}
-	else {
+	else
+	{
 		worker_registered_sleeping_start[workerid] = 0;
 	}
 
@@ -167,7 +169,8 @@ static void _starpu_worker_reset_profiling_info_with_lock(int workerid)
 		worker_registered_executing_start[workerid] = 1;
 		_starpu_clock_gettime(&executing_start_date[workerid]);
 	}
-	else {
+	else
+	{
 		worker_registered_executing_start[workerid] = 0;
 	}
 }
@@ -184,7 +187,7 @@ void _starpu_worker_register_sleeping_start_date(int workerid, struct timespec *
 	if (profiling)
 	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
-		worker_registered_sleeping_start[workerid] = 1;	
+		worker_registered_sleeping_start[workerid] = 1;
 		memcpy(&sleeping_start_date[workerid], sleeping_start, sizeof(struct timespec));
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 	}
@@ -195,7 +198,7 @@ void _starpu_worker_register_executing_start_date(int workerid, struct timespec
 	if (profiling)
 	{
 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
-		worker_registered_executing_start[workerid] = 1;	
+		worker_registered_executing_start[workerid] = 1;
 		memcpy(&executing_start_date[workerid], executing_start, sizeof(struct timespec));
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 	}
@@ -208,7 +211,7 @@ void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec
 		_STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
 
                 /* Perhaps that profiling was enabled while the worker was
-                 * already blocked, so we don't measure (end - start), but 
+                 * already blocked, so we don't measure (end - start), but
                  * (end - max(start,worker_start)) where worker_start is the
                  * date of the previous profiling info reset on the worker */
 		struct timespec *worker_start = &worker_info[workerid].start_time;
@@ -223,7 +226,7 @@ void _starpu_worker_update_profiling_info_sleeping(int workerid, struct timespec
 
 		starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
 
-		worker_registered_sleeping_start[workerid] = 0;	
+		worker_registered_sleeping_start[workerid] = 0;
 
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
 	}
@@ -243,9 +246,10 @@ void _starpu_worker_update_profiling_info_executing(int workerid, struct timespe
 		worker_info[workerid].stall_cycles += stall_cycles;
 		worker_info[workerid].power_consumed += power_consumed;
 		worker_info[workerid].executed_tasks += executed_tasks;
-	
+
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
-	} else /* Not thread safe, shouldn't be too much a problem */
+	}
+	else /* Not thread safe, shouldn't be too much a problem */
 		worker_info[workerid].executed_tasks += executed_tasks;
 }
 
@@ -329,7 +333,7 @@ void _starpu_initialize_busid_matrix(void)
 	int i, j;
 	for (j = 0; j < STARPU_MAXNODES; j++)
 	for (i = 0; i < STARPU_MAXNODES; i++)
-		busid_matrix[i][j] = -1;	
+		busid_matrix[i][j] = -1;
 
 	busid_cnt = 0;
 }
@@ -400,7 +404,7 @@ int starpu_bus_get_profiling_info(int busid, struct starpu_bus_profiling_info *b
 	_starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]);
 
 	return 0;
-} 
+}
 
 void _starpu_bus_update_profiling_info(int src_node, int dst_node, size_t size)
 {

+ 10 - 5
src/profiling/profiling_helpers.c

@@ -30,7 +30,7 @@ void starpu_bus_profiling_helper_display_summary(void)
 	for (busid = 0; busid < bus_cnt; busid++)
 	{
 		int src, dst;
-	
+
 		src = starpu_bus_get_src(busid);
 		dst = starpu_bus_get_dst(busid);
 
@@ -67,7 +67,8 @@ void starpu_worker_profiling_helper_display_summary(void)
 
 		starpu_worker_get_name(workerid, name, sizeof(name));
 
-		if (profiling) {
+		if (profiling)
+		{
 			double total_time = starpu_timing_timespec_to_us(&info.total_time) / 1000.;
 			double executing_time = starpu_timing_timespec_to_us(&info.executing_time) / 1000.;
 			double sleeping_time = starpu_timing_timespec_to_us(&info.sleeping_time) / 1000.;
@@ -80,16 +81,20 @@ void starpu_worker_profiling_helper_display_summary(void)
 				fprintf(stderr, "\t%lu Mcy %lu Mcy stall\n", info.used_cycles/1000000, info.stall_cycles/1000000);
 			if (info.power_consumed)
 				fprintf(stderr, "\t%f J consumed\n", info.power_consumed);
-		} else {
+		}
+		else
+		{
 			fprintf(stderr, "\t%-32s\t%d task(s)\n", name, info.executed_tasks);
 		}
 
 		sum_consumed += info.power_consumed;
 	}
 
-	if (profiling) {
+	if (profiling)
+	{
 		const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
-		if (strval_idle_power) {
+		if (strval_idle_power)
+		{
 			double idle_power = atof(strval_idle_power); /* Watt */
 			double idle_consumption = idle_power * overall_time / 1000.; /* J */
 

+ 58 - 41
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -56,7 +56,7 @@ static int count_non_ready_buffers(struct starpu_task *task, uint32_t node)
 
 		descr = &descrs[index];
 		handle = descr->handle;
-		
+
 		int is_valid;
 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
 
@@ -74,7 +74,7 @@ static struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo
 	if (fifo_queue->ntasks == 0)
 		return NULL;
 
-	if (fifo_queue->ntasks > 0) 
+	if (fifo_queue->ntasks > 0)
 	{
 		fifo_queue->ntasks--;
 
@@ -105,12 +105,12 @@ static struct starpu_task *_starpu_fifo_pop_first_ready_task(struct _starpu_fifo
 
 			current = current->prev;
 		}
-		
+
 		starpu_task_list_erase(&fifo_queue->taskq, task);
 
 		_STARPU_TRACE_JOB_POP(task, 0);
 	}
-	
+
 	return task;
 }
 
@@ -124,9 +124,10 @@ static struct starpu_task *dmda_pop_ready_task(void)
 	unsigned node = starpu_worker_get_memory_node(workerid);
 
 	task = _starpu_fifo_pop_first_ready_task(fifo, node);
-	if (task) {
+	if (task)
+	{
 		double model = task->predicted;
-	
+
 		fifo->exp_len -= model;
 		fifo->exp_start = starpu_timing_now() + model;
 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
@@ -154,9 +155,10 @@ static struct starpu_task *dmda_pop_task(void)
 	struct _starpu_fifo_taskq *fifo = queue_array[workerid];
 
 	task = _starpu_fifo_pop_task(fifo, workerid);
-	if (task) {
+	if (task)
+	{
 		double model = task->predicted;
-	
+
 		fifo->exp_len -= model;
 		fifo->exp_start = starpu_timing_now() + model;
 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
@@ -195,7 +197,7 @@ static struct starpu_task *dmda_pop_every_task(void)
 		fifo->exp_len -= model;
 		fifo->exp_start = starpu_timing_now() + model;
 		fifo->exp_end = fifo->exp_start + fifo->exp_len;
-	
+
 		new_list = new_list->next;
 	}
 
@@ -218,7 +220,8 @@ int _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, pthread
 		task->prev = NULL;
 		task->next = NULL;
 	}
-	else {
+	else
+	{
 		struct starpu_task *current = list->head;
 		struct starpu_task *prev = NULL;
 
@@ -239,7 +242,8 @@ int _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, pthread
 			task->next = list->head;
 			list->head = task;
 		}
-		else {
+		else
+		{
 			if (current)
 			{
 				/* Insert between prev and current */
@@ -248,7 +252,8 @@ int _starpu_fifo_push_sorted_task(struct _starpu_fifo_taskq *fifo_queue, pthread
 				task->next = current;
 				current->prev = task;
 			}
-			else {
+			else
+			{
 				/* Insert at the tail of the list */
 				list->tail->next = task;
 				task->next = NULL;
@@ -318,8 +323,10 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 	unsigned best_impl = 0;
 	unsigned nimpl;
 
-	for (worker = 0; worker < nworkers; worker++) {
-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
+	for (worker = 0; worker < nworkers; worker++)
+	{
+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		{
 			double exp_end;
 
 			fifo = queue_array[worker];
@@ -344,7 +351,8 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 					|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
 					|| (!calibrating && local_length == -1.0) /* Not calibrating but this worker is being calibrated */
 					|| (calibrating && local_length == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
-					) {
+					)
+			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best = worker;
 				best_impl = nimpl;
@@ -377,11 +385,12 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio)
 		}
 	}
 
-	if (unknown) {
+	if (unknown)
+	{
 		best = ntasks_best;
 		model_best = 0.0;
 	}
-	
+
 	//_STARPU_DEBUG("Scheduler dm: kernel (%u)\n", best_impl);
 
 	 _starpu_get_job_associated_to_task(task)->nimpl = best_impl;
@@ -396,7 +405,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 	struct _starpu_fifo_taskq *fifo;
 	unsigned worker;
 	int best = -1;
-	
+
 	/* this flag is set if the corresponding worker is selected because
 	   there is no performance prediction available yet */
 	int forced_best = -1;
@@ -423,8 +432,10 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 	unsigned best_impl = 0;
 	unsigned nimpl;
 
-	for (worker = 0; worker < nworkers; worker++) {
-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
+	for (worker = 0; worker < nworkers; worker++)
+	{
+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		{
 			fifo = queue_array[worker];
 
 			/* Sometimes workers didn't take the tasks as early as we expected */
@@ -453,7 +464,8 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 					|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
 					|| (!calibrating && local_task_length[worker][nimpl] == -1.0) /* Not calibrating but this worker is being calibrated */
 					|| (calibrating && local_task_length[worker][nimpl] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
-					) {
+					)
+			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best = worker;
 				best_impl = nimpl;
@@ -493,7 +505,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 		forced_best = ntasks_best;
 
 	double best_fitness = -1;
-	
+
 	if (forced_best == -1)
 	{
 		for (worker = 0; worker < nworkers; worker++)
@@ -504,12 +516,13 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 				/* no one on that queue may execute this task */
 				continue;
 			}
-	
-			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end) 
+
+			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end)
 				+ beta*(local_data_penalty[worker][nimpl])
 				+ _gamma*(local_power[worker][nimpl]);
 
-			if (exp_end[worker][nimpl] > max_exp_end) {
+			if (exp_end[worker][nimpl] > max_exp_end)
+			{
 				/* This placement will make the computation
 				 * longer, take into account the idle
 				 * consumption of other cpus */
@@ -529,7 +542,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 	}
 
 	STARPU_ASSERT(forced_best != -1 || best != -1);
-	
+
 	if (forced_best != -1)
 	{
 		/* there is no prediction available for that task
@@ -539,7 +552,7 @@ static int _dmda_push_task(struct starpu_task *task, unsigned prio)
 		model_best = 0.0;
 		//penality_best = 0.0;
 	}
-	else 
+	else
 	{
 		model_best = local_task_length[best][nimpl];
 		//penality_best = local_data_penalty[best][nimpl];
@@ -568,8 +581,8 @@ static int dmda_push_task(struct starpu_task *task)
 	return _dmda_push_task(task, 0);
 }
 
-static void initialize_dmda_policy(struct starpu_machine_topology *topology, 
-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void initialize_dmda_policy(struct starpu_machine_topology *topology,
+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	nworkers = topology->nworkers;
 
@@ -593,10 +606,10 @@ static void initialize_dmda_policy(struct starpu_machine_topology *topology,
 	for (workerid = 0; workerid < nworkers; workerid++)
 	{
 		queue_array[workerid] = _starpu_create_fifo();
-	
+
 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
-	
+
 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
 	}
 }
@@ -611,8 +624,8 @@ static void initialize_dmda_sorted_policy(struct starpu_machine_topology *topolo
 	starpu_sched_set_max_priority(INT_MAX);
 }
 
-static void deinitialize_dmda_policy(struct starpu_machine_topology *topology, 
-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void deinitialize_dmda_policy(struct starpu_machine_topology *topology,
+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	unsigned workerid;
 	for (workerid = 0; workerid < topology->nworkers; workerid++)
@@ -622,10 +635,11 @@ static void deinitialize_dmda_policy(struct starpu_machine_topology *topology,
 }
 
 /* TODO: use post_exec_hook to fix the expected start */
-struct starpu_sched_policy _starpu_sched_dm_policy = {
+struct starpu_sched_policy _starpu_sched_dm_policy =
+{
 	.init_sched = initialize_dmda_policy,
 	.deinit_sched = deinitialize_dmda_policy,
-	.push_task = dm_push_task, 
+	.push_task = dm_push_task,
 	.pop_task = dmda_pop_task,
 	.post_exec_hook = NULL,
 	.pop_every_task = dmda_pop_every_task,
@@ -633,10 +647,11 @@ struct starpu_sched_policy _starpu_sched_dm_policy = {
 	.policy_description = "performance model"
 };
 
-struct starpu_sched_policy _starpu_sched_dmda_policy = {
+struct starpu_sched_policy _starpu_sched_dmda_policy =
+{
 	.init_sched = initialize_dmda_policy,
 	.deinit_sched = deinitialize_dmda_policy,
-	.push_task = dmda_push_task, 
+	.push_task = dmda_push_task,
 	.pop_task = dmda_pop_task,
 	.post_exec_hook = NULL,
 	.pop_every_task = dmda_pop_every_task,
@@ -644,10 +659,11 @@ struct starpu_sched_policy _starpu_sched_dmda_policy = {
 	.policy_description = "data-aware performance model"
 };
 
-struct starpu_sched_policy _starpu_sched_dmda_sorted_policy = {
+struct starpu_sched_policy _starpu_sched_dmda_sorted_policy =
+{
 	.init_sched = initialize_dmda_sorted_policy,
 	.deinit_sched = deinitialize_dmda_policy,
-	.push_task = dmda_push_sorted_task, 
+	.push_task = dmda_push_sorted_task,
 	.pop_task = dmda_pop_ready_task,
 	.post_exec_hook = NULL,
 	.pop_every_task = dmda_pop_every_task,
@@ -655,10 +671,11 @@ struct starpu_sched_policy _starpu_sched_dmda_sorted_policy = {
 	.policy_description = "data-aware performance model (sorted)"
 };
 
-struct starpu_sched_policy _starpu_sched_dmda_ready_policy = {
+struct starpu_sched_policy _starpu_sched_dmda_ready_policy =
+{
 	.init_sched = initialize_dmda_policy,
 	.deinit_sched = deinitialize_dmda_policy,
-	.push_task = dmda_push_task, 
+	.push_task = dmda_push_task,
 	.pop_task = dmda_pop_ready_task,
 	.post_exec_hook = NULL,
 	.pop_every_task = dmda_pop_every_task,

+ 4 - 3
src/sched_policies/deque_queues.c

@@ -100,7 +100,8 @@ struct _starpu_job_list *_starpu_deque_pop_every_task(struct _starpu_deque_jobq
 	{
 		new_list = NULL;
 	}
-	else {
+	else
+	{
 		/* there is a task */
 		old_list = deque_queue->jobq;
 		new_list = _starpu_job_list_new();
@@ -123,7 +124,7 @@ struct _starpu_job_list *_starpu_deque_pop_every_task(struct _starpu_deque_jobq
 			{
 				/* this elements can be moved into the new list */
 				new_list_size++;
-				
+
 				_starpu_job_list_erase(old_list, i);
 				_starpu_job_list_push_back(new_list, i);
 				i->nimpl = nimpl;
@@ -141,7 +142,7 @@ struct _starpu_job_list *_starpu_deque_pop_every_task(struct _starpu_deque_jobq
 			deque_queue->njobs -= new_list_size;
 		}
 	}
-	
+
 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 
 	return new_list;

+ 2 - 1
src/sched_policies/deque_queues.h

@@ -23,7 +23,8 @@
 #include <common/config.h>
 #include <core/jobs.h>
 
-struct _starpu_deque_jobq {
+struct _starpu_deque_jobq
+{
 	/* the actual list */
 	struct _starpu_job_list *jobq;
 

+ 6 - 4
src/sched_policies/detect_combined_workers.c

@@ -41,7 +41,8 @@
  * workers		CPU-workers found by recursion in all the sub-trees and in this very one, represented as leaves in hwloc.
  */
 
-struct _starpu_tree {
+struct _starpu_tree
+{
     hwloc_obj_t obj;
     unsigned nb_workers;
     int *workers;
@@ -386,7 +387,7 @@ static void find_and_assign_combinations_with_hwloc(struct starpu_machine_topolo
     struct _starpu_tree tree;
 
     /* Of course we start from the root */
-    tree.obj = hwloc_get_obj_by_depth(topology->hwtopology, HWLOC_OBJ_SYSTEM, 0); 
+    tree.obj = hwloc_get_obj_by_depth(topology->hwtopology, HWLOC_OBJ_SYSTEM, 0);
     tree.nb_workers = 0;
     tree.workers = (int *) malloc(topology->nhwcpus * sizeof(int));
 
@@ -441,7 +442,7 @@ static void find_and_assign_combinations_without_hwloc(struct starpu_machine_top
 
 		/* We register this combination */
 		int ret;
-		ret = starpu_combined_worker_assign_workerid(size, workerids); 
+		ret = starpu_combined_worker_assign_workerid(size, workerids);
 		STARPU_ASSERT(ret >= 0);
 	    }
 	}
@@ -478,7 +479,8 @@ void _starpu_sched_find_worker_combinations(struct starpu_machine_topology *topo
 
     if ((config->user_conf && config->user_conf->single_combined_worker > 0) || starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER") > 0)
 	combine_all_cpu_workers(topology);
-    else {
+    else
+    {
 #ifdef STARPU_HAVE_HWLOC
 	find_and_assign_combinations_with_hwloc(topology);
 #else

+ 7 - 6
src/sched_policies/eager_central_policy.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -29,8 +29,8 @@ static struct _starpu_fifo_taskq *fifo;
 static pthread_cond_t sched_cond;
 static pthread_mutex_t sched_mutex;
 
-static void initialize_eager_center_policy(struct starpu_machine_topology *topology, 
-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void initialize_eager_center_policy(struct starpu_machine_topology *topology,
+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	/* there is only a single queue in that trivial design */
 	fifo = _starpu_create_fifo();
@@ -43,8 +43,8 @@ static void initialize_eager_center_policy(struct starpu_machine_topology *topol
 		starpu_worker_set_sched_condition(workerid, &sched_cond, &sched_mutex);
 }
 
-static void deinitialize_eager_center_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology, 
-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void deinitialize_eager_center_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology,
+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	//STARPU_ASSERT(_starpu_fifo_empty(fifo));
 
@@ -67,7 +67,8 @@ static struct starpu_task *pop_task_eager_policy(void)
 	return _starpu_fifo_pop_task(fifo, starpu_worker_get_id());
 }
 
-struct starpu_sched_policy _starpu_sched_eager_policy = {
+struct starpu_sched_policy _starpu_sched_eager_policy =
+{
 	.init_sched = initialize_eager_center_policy,
 	.deinit_sched = deinitialize_eager_center_policy,
 	.push_task = push_task_eager_policy,

+ 19 - 14
src/sched_policies/eager_central_priority_policy.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -31,8 +31,9 @@
 
 #define NPRIO_LEVELS	(MAX_LEVEL - MIN_LEVEL + 1)
 
-struct starpu_priority_taskq_s {
-	/* the actual lists 
+struct starpu_priority_taskq_s
+{
+	/* the actual lists
 	 *	taskq[p] is for priority [p - STARPU_MIN_PRIO] */
 	struct starpu_task_list taskq[NPRIO_LEVELS];
 	unsigned ntasks[NPRIO_LEVELS];
@@ -43,19 +44,19 @@ struct starpu_priority_taskq_s {
 /* the former is the actual queue, the latter some container */
 static struct starpu_priority_taskq_s *taskq;
 
-/* keep track of the total number of tasks to be scheduled to avoid infinite 
+/* keep track of the total number of tasks to be scheduled to avoid infinite
  * polling when there are really few tasks in the overall queue */
 static pthread_cond_t global_sched_cond;
 static pthread_mutex_t global_sched_mutex;
 
 /*
- * Centralized queue with priorities 
+ * Centralized queue with priorities
  */
 
 static struct starpu_priority_taskq_s *_starpu_create_priority_taskq(void)
 {
 	struct starpu_priority_taskq_s *central_queue;
-	
+
 	central_queue = (struct starpu_priority_taskq_s *) malloc(sizeof(struct starpu_priority_taskq_s));
 	central_queue->total_ntasks = 0;
 
@@ -74,8 +75,8 @@ static void _starpu_destroy_priority_taskq(struct starpu_priority_taskq_s *prior
 	free(priority_queue);
 }
 
-static void initialize_eager_center_priority_policy(struct starpu_machine_topology *topology, 
-			__attribute__ ((unused))	struct starpu_sched_policy *_policy) 
+static void initialize_eager_center_priority_policy(struct starpu_machine_topology *topology,
+			__attribute__ ((unused))	struct starpu_sched_policy *_policy)
 {
 	/* In this policy, we support more than two levels of priority. */
 	starpu_sched_set_min_priority(MIN_LEVEL);
@@ -93,7 +94,7 @@ static void initialize_eager_center_priority_policy(struct starpu_machine_topolo
 }
 
 static void deinitialize_eager_center_priority_policy(struct starpu_machine_topology *topology __attribute__ ((unused)),
-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	/* TODO check that there is no task left in the queue */
 
@@ -107,7 +108,7 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 	_STARPU_PTHREAD_MUTEX_LOCK(&global_sched_mutex);
 
 	_STARPU_TRACE_JOB_PUSH(task, 1);
-	
+
 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
 
 	starpu_task_list_push_front(&taskq->taskq[priolevel], task);
@@ -141,15 +142,18 @@ static struct starpu_task *_starpu_priority_pop_task(void)
 	if (taskq->total_ntasks > 0)
 	{
 		unsigned priolevel = NPRIO_LEVELS - 1;
-		do {
-			if (taskq->ntasks[priolevel] > 0) {
+		do
+		{
+			if (taskq->ntasks[priolevel] > 0)
+			{
 				/* there is some task that we can grab */
 				task = starpu_task_list_pop_back(&taskq->taskq[priolevel]);
 				taskq->ntasks[priolevel]--;
 				taskq->total_ntasks--;
 				_STARPU_TRACE_JOB_POP(task, 0);
 			}
-		} while (!task && priolevel-- > 0);
+		}
+		while (!task && priolevel-- > 0);
 	}
 	STARPU_ASSERT(starpu_worker_can_execute_task(starpu_worker_get_id(), task, 0) || !"prio does not support \"can_execute\"");
 
@@ -158,7 +162,8 @@ static struct starpu_task *_starpu_priority_pop_task(void)
 	return task;
 }
 
-struct starpu_sched_policy _starpu_sched_prio_policy = {
+struct starpu_sched_policy _starpu_sched_prio_policy =
+{
 	.init_sched = initialize_eager_center_priority_policy,
 	.deinit_sched = deinitialize_eager_center_priority_policy,
 	/* we always use priorities in that policy */

+ 9 - 7
src/sched_policies/fifo_queues.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  * Copyright (C) 2011  Télécom-SudParis
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -90,7 +90,7 @@ struct starpu_task *_starpu_fifo_pop_task(struct _starpu_fifo_taskq *fifo_queue,
 				return task;
 			}
 	}
-	
+
 	return NULL;
 }
 
@@ -102,12 +102,13 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 
 	struct starpu_task *new_list = NULL;
 	struct starpu_task *new_list_tail = NULL;
-	
+
 	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
 
 	size = fifo_queue->ntasks;
 
-	if (size > 0) {
+	if (size > 0)
+	{
 		old_list = &fifo_queue->taskq;
 		unsigned new_list_size = 0;
 
@@ -125,7 +126,7 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 			{
 				/* this elements can be moved into the new list */
 				new_list_size++;
-				
+
 				starpu_task_list_erase(old_list, task);
 
 				if (new_list_tail)
@@ -135,7 +136,8 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 					task->next = NULL;
 					new_list_tail = task;
 				}
-				else {
+				else
+				{
 					new_list = task;
 					new_list_tail = task;
 					task->prev = NULL;
@@ -144,7 +146,7 @@ struct starpu_task *_starpu_fifo_pop_every_task(struct _starpu_fifo_taskq *fifo_
 				_starpu_get_job_associated_to_task(task)->nimpl = nimpl;
 				break;
 			}
-		
+
 			task = next_task;
 		}
 

+ 2 - 1
src/sched_policies/fifo_queues.h

@@ -22,7 +22,8 @@
 #include <starpu.h>
 #include <common/config.h>
 
-struct _starpu_fifo_taskq {
+struct _starpu_fifo_taskq
+{
 	/* the actual list */
 	struct starpu_task_list taskq;
 

+ 46 - 30
src/sched_policies/heft.c

@@ -51,14 +51,15 @@ const float gamma_maximum=10000.0;
 const float idle_power_minimum=0;
 const float idle_power_maximum=10000.0;
 
-static void param_modified(struct starpu_top_param* d){
+static void param_modified(struct starpu_top_param* d)
+{
 	//just to show parameter modification
-	fprintf(stderr,"%s has been modified : alpha=%f|beta=%f|gamma=%f|idle_power=%f !\n", 
+	fprintf(stderr,"%s has been modified : alpha=%f|beta=%f|gamma=%f|idle_power=%f !\n",
 		d->name, alpha,beta,_gamma,idle_power);
 }
 
-static void heft_init(struct starpu_machine_topology *topology, 
-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void heft_init(struct starpu_machine_topology *topology,
+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	nworkers = topology->nworkers;
 
@@ -77,7 +78,7 @@ static void heft_init(struct starpu_machine_topology *topology,
 	const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
 	if (strval_idle_power)
 		idle_power = atof(strval_idle_power);
-	
+
 	starpu_top_register_parameter_float("HEFT_ALPHA", &alpha, alpha_minimum,alpha_maximum,param_modified);
 	starpu_top_register_parameter_float("HEFT_BETA", &beta, beta_minimum,beta_maximum,param_modified);
 	starpu_top_register_parameter_float("HEFT_GAMMA", &_gamma, gamma_minimum,gamma_maximum,param_modified);
@@ -88,12 +89,12 @@ static void heft_init(struct starpu_machine_topology *topology,
 	{
 		exp_start[workerid] = starpu_timing_now();
 		exp_len[workerid] = 0.0;
-		exp_end[workerid] = exp_start[workerid]; 
+		exp_end[workerid] = exp_start[workerid];
 		ntasks[workerid] = 0;
 
 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
-	
+
 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
 	}
 }
@@ -103,7 +104,7 @@ static void heft_post_exec_hook(struct starpu_task *task)
 	int workerid = starpu_worker_get_id();
 	double model = task->predicted;
 	double transfer_model = task->predicted_transfer;
-	
+
 	/* Once we have executed the task, we can update the predicted amount
 	 * of work. */
 	_STARPU_PTHREAD_MUTEX_LOCK(&sched_mutex[workerid]);
@@ -143,11 +144,14 @@ static void heft_push_task_notify(struct starpu_task *task, int workerid)
 	/* If there is no prediction available, we consider the task has a null length */
 	if (predicted_transfer != -1.0)
 	{
-		if (starpu_timing_now() + predicted_transfer < exp_end[workerid]) {
+		if (starpu_timing_now() + predicted_transfer < exp_end[workerid])
+		{
 			/* We may hope that the transfer will be finished by
 			 * the start of the task. */
 			predicted_transfer = 0;
-		} else {
+		}
+		else
+		{
 			/* The transfer will not be finished by then, take the
 			 * remainder into account */
 			predicted_transfer = (starpu_timing_now() + predicted_transfer) - exp_end[workerid];
@@ -176,11 +180,14 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	exp_end[best_workerid] += predicted;
 	exp_len[best_workerid] += predicted;
 
-	if (starpu_timing_now() + predicted_transfer < exp_end[best_workerid]) {
+	if (starpu_timing_now() + predicted_transfer < exp_end[best_workerid])
+	{
 		/* We may hope that the transfer will be finished by
 		 * the start of the task. */
 		predicted_transfer = 0;
-	} else {
+	}
+	else
+	{
 		/* The transfer will not be finished by then, take the
 		 * remainder into account */
 		predicted_transfer = (starpu_timing_now() + predicted_transfer) - exp_end[best_workerid];
@@ -195,7 +202,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	task->predicted_transfer = predicted_transfer;
 
 	if (starpu_top_status_get())
-		starpu_top_task_prevision(task, best_workerid, 
+		starpu_top_task_prevision(task, best_workerid,
 					(unsigned long long)(exp_end[best_workerid]-predicted)/1000,
 					(unsigned long long)exp_end[best_workerid]/1000);
 
@@ -232,8 +239,10 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
 	unsigned nimpl;
 
-	for (worker = 0; worker < nworkers; worker++) {
-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
+	for (worker = 0; worker < nworkers; worker++)
+	{
+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		{
 			/* Sometimes workers didn't take the tasks as early as we expected */
 			exp_start[worker] = STARPU_MAX(exp_start[worker], starpu_timing_now());
 			exp_end[worker][nimpl] = exp_start[worker] + exp_len[worker];
@@ -258,7 +267,8 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 				//_STARPU_DEBUG("Scheduler heft bundle: task length (%lf) local power (%lf) worker (%u) kernel (%u) \n", local_task_length[worker],local_power[worker],worker,nimpl);
 
 			}
-			else {
+			else
+			{
 				local_task_length[worker][nimpl] = starpu_task_expected_length(task, perf_arch, nimpl);
 				local_data_penalty[worker][nimpl] = starpu_task_expected_data_transfer_time(memory_node, task);
 				local_power[worker][nimpl] = starpu_task_expected_power(task, perf_arch,nimpl);
@@ -276,7 +286,8 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 				|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
 				|| (!calibrating && local_task_length[worker][nimpl] == -1.0) /* Not calibrating but this worker is being calibrated */
 				|| (calibrating && local_task_length[worker][nimpl] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
-				) {
+				)
+			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best = worker;
 				nimpl_best = nimpl;
@@ -323,7 +334,7 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 	unsigned worker, nimpl;
 	int best = -1;
 	int selected_impl= -1;
-	
+
 	/* this flag is set if the corresponding worker is selected because
 	   there is no performance prediction available yet */
 	int forced_worker;
@@ -352,7 +363,8 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
 	/* If there is no prediction available for that task with that arch we
 	 * want to speed-up calibration time so we force this measurement */
-	if (forced_worker != -1) {
+	if (forced_worker != -1)
+	{
 		_starpu_get_job_associated_to_task(task)->nimpl = forced_impl;
 		return push_task_on_best_worker(task, forced_worker, 0.0, 0.0, prio);
 	}
@@ -362,24 +374,26 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 	 *	trade-off between load-balacing, data locality, and energy
 	 *	consumption.
 	 */
-	
+
 	double fitness[nworkers][STARPU_MAXIMPLEMENTATIONS];
 	double best_fitness = -1;
 
 	for (worker = 0; worker < nworkers; worker++)
 	{
-		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) {
+		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
+		{
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
 			{
 				/* no one on that queue may execute this task */
 				continue;
 			}
 
-			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end) 
+			fitness[worker][nimpl] = alpha*(exp_end[worker][nimpl] - best_exp_end)
 						+ beta*(local_data_penalty[worker][nimpl])
 						+ _gamma*(local_power[worker][nimpl]);
 
-			if (exp_end[worker][nimpl] > max_exp_end) {
+			if (exp_end[worker][nimpl] > max_exp_end)
+			{
 				/* This placement will make the computation
 				 * longer, take into account the idle
 				 * consumption of other cpus */
@@ -398,7 +412,7 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 
 	/* By now, we must have found a solution */
 	STARPU_ASSERT(best != -1);
-	
+
 	/* we should now have the best worker in variable "best" */
 	double model_best, transfer_model_best;
 
@@ -423,12 +437,13 @@ static int _heft_push_task(struct starpu_task *task, unsigned prio)
 			_STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex);
 
 	}
-	else {
+	else
+	{
 		model_best = local_task_length[best][selected_impl];
 		transfer_model_best = local_data_penalty[best][selected_impl];
 	}
 
-	
+
 	_starpu_get_job_associated_to_task(task)->nimpl = selected_impl;
 
 	return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio);
@@ -442,8 +457,8 @@ static int heft_push_task(struct starpu_task *task)
 	return _heft_push_task(task, 0);
 }
 
-static void heft_deinit(__attribute__ ((unused)) struct starpu_machine_topology *topology, 
-                        __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void heft_deinit(__attribute__ ((unused)) struct starpu_machine_topology *topology,
+                        __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	unsigned workerid;
 	for (workerid = 0; workerid < nworkers; workerid++)
@@ -453,10 +468,11 @@ static void heft_deinit(__attribute__ ((unused)) struct starpu_machine_topology
 	}
 }
 
-struct starpu_sched_policy heft_policy = {
+struct starpu_sched_policy heft_policy =
+{
 	.init_sched = heft_init,
 	.deinit_sched = heft_deinit,
-	.push_task = heft_push_task, 
+	.push_task = heft_push_task,
 	.push_task_notify = heft_push_task_notify,
 	.pop_task = NULL,
 	.pop_every_task = NULL,

+ 15 - 11
src/sched_policies/parallel_greedy.c

@@ -37,8 +37,8 @@ static int possible_combinations_cnt[STARPU_NMAXWORKERS];
 static int possible_combinations[STARPU_NMAXWORKERS][10];
 static int possible_combinations_size[STARPU_NMAXWORKERS][10];
 
-static void initialize_pgreedy_policy(struct starpu_machine_topology *topology, 
-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	/* masters pick tasks from that queue */
 	fifo = _starpu_create_fifo();
@@ -65,7 +65,7 @@ static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
 	}
 
 	unsigned i;
-	
+
 	for (i = 0; i < ncombinedworkers; i++)
 	{
 		workerid = nworkers + i;
@@ -113,7 +113,8 @@ static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
 			starpu_worker_set_sched_condition(workerid,
 				&sched_cond, &sched_mutex);
 		}
-		else {
+		else
+		{
 			starpu_worker_set_sched_condition(workerid,
 				&master_sched_cond[master],
 				&master_sched_mutex[master]);
@@ -128,8 +129,8 @@ static void initialize_pgreedy_policy(struct starpu_machine_topology *topology,
 #endif
 }
 
-static void deinitialize_pgreedy_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology, 
-		   __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void deinitialize_pgreedy_policy(__attribute__ ((unused)) struct starpu_machine_topology *topology,
+		   __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	/* TODO check that there is no task left in the queue */
 
@@ -175,14 +176,14 @@ static struct starpu_task *pop_task_pgreedy_policy(void)
 					best_workerid = combined_worker;
 				}
 			}
-		} 
+		}
 
 		/* In case nobody can execute this task, we let the master
 		 * worker take it anyway, so that it can discard it afterward.
 		 * */
 		if (best_workerid == -1)
 			return task;
-		
+
 		/* Is this a basic worker or a combined worker ? */
 		int nbasic_workers = (int)starpu_worker_get_count();
 		int is_basic_worker = (best_workerid < nbasic_workers);
@@ -192,7 +193,8 @@ static struct starpu_task *pop_task_pgreedy_policy(void)
 			/* The master is alone */
 			return task;
 		}
-		else {
+		else
+		{
 			/* The master needs to dispatch the task between the
 			 * different combined workers */
 			struct _starpu_combined_worker *combined_worker;
@@ -226,13 +228,15 @@ static struct starpu_task *pop_task_pgreedy_policy(void)
 			return master_alias;
 		}
 	}
-	else {
+	else
+	{
 		/* The worker is a slave */
 		return _starpu_fifo_pop_task(local_fifo[workerid], workerid);
 	}
 }
 
-struct starpu_sched_policy _starpu_sched_pgreedy_policy = {
+struct starpu_sched_policy _starpu_sched_pgreedy_policy =
+{
 	.init_sched = initialize_pgreedy_policy,
 	.deinit_sched = deinitialize_pgreedy_policy,
 	.push_task = push_task_pgreedy_policy,

+ 29 - 23
src/sched_policies/parallel_heft.c

@@ -52,10 +52,10 @@ static void parallel_heft_post_exec_hook(struct starpu_task *task)
 	int workerid = starpu_worker_get_id();
 	double model = task->predicted;
 	double transfer_model = task->predicted_transfer;
-	
+
 	if (model < 0.0)
 		model = 0.0;
-	
+
 	/* Once we have executed the task, we can update the predicted amount
 	 * of work. */
 	_STARPU_PTHREAD_MUTEX_LOCK(&sched_mutex[workerid]);
@@ -75,7 +75,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	int nbasic_workers = (int)starpu_worker_get_count();
 	int is_basic_worker = (best_workerid < nbasic_workers);
 
-	unsigned memory_node; 
+	unsigned memory_node;
 	memory_node = starpu_worker_get_memory_node(best_workerid);
 
 	if (starpu_get_prefetch_flag())
@@ -93,12 +93,13 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 		worker_exp_len[best_workerid] += exp_end_predicted - worker_exp_end[best_workerid];
 		worker_exp_end[best_workerid] = exp_end_predicted;
 		worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid];
-	
+
 		ntasks[best_workerid]++;
 
 		ret = starpu_push_local_task(best_workerid, task, prio);
 	}
-	else {
+	else
+	{
 		/* This is a combined worker so we create task aliases */
 		struct _starpu_combined_worker *combined_worker;
 		combined_worker = _starpu_get_combined_worker_struct(best_workerid);
@@ -122,13 +123,13 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 			alias->predicted = exp_end_predicted - worker_exp_end[local_worker];
 			/* TODO */
 			alias->predicted_transfer = 0;
-	
+
 			worker_exp_len[local_worker] += exp_end_predicted - worker_exp_end[local_worker];
 			worker_exp_end[local_worker] = exp_end_predicted;
 			worker_exp_start[local_worker] = exp_end_predicted - worker_exp_len[local_worker];
-		
+
 			ntasks[local_worker]++;
-	
+
 			ret |= starpu_push_local_task(local_worker, alias, prio);
 		}
 
@@ -146,7 +147,8 @@ static double compute_expected_end(int workerid, double length)
 		/* This is a basic worker */
 		return worker_exp_start[workerid] + worker_exp_len[workerid] + length;
 	}
-	else {
+	else
+	{
 		/* This is a combined worker, the expected end is the end for the latest worker */
 		int worker_size;
 		int *combined_workerid;
@@ -175,7 +177,8 @@ static double compute_ntasks_end(int workerid)
 		/* This is a basic worker */
 		return ntasks[workerid] / starpu_worker_get_relative_speedup(perf_arch);
 	}
-	else {
+	else
+	{
 		/* This is a combined worker, the expected end is the end for the latest worker */
 		int worker_size;
 		int *combined_workerid;
@@ -198,7 +201,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 {
 	unsigned worker;
 	int best = -1;
-	
+
 	/* this flag is set if the corresponding worker is selected because
 	   there is no performance prediction available yet */
 	int forced_best = -1;
@@ -244,7 +247,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 				skip_worker[worker][nimpl] = 1;
 				continue;
 			}
-			else {
+			else
+			{
 				skip_worker[worker][nimpl] = 0;
 			}
 
@@ -261,7 +265,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 					|| (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
 					|| (!calibrating && local_task_length[worker][nimpl] == -1.0) /* Not calibrating but this worker is being calibrated */
 					|| (calibrating && local_task_length[worker][nimpl] == -1.0 && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
-					) {
+					)
+			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best = worker;
 			}
@@ -318,8 +323,8 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 					/* no one on that queue may execute this task */
 					continue;
 				}
-		
-				fitness[worker][nimpl] = alpha*(local_exp_end[worker][nimpl] - best_exp_end) 
+
+				fitness[worker][nimpl] = alpha*(local_exp_end[worker][nimpl] - best_exp_end)
 						+ beta*(local_data_penalty[worker][nimpl])
 						+ _gamma*(local_power[worker][nimpl]);
 
@@ -352,7 +357,7 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio)
 		//penality_best = 0.0;
 		best_exp_end = local_exp_end[best][nimpl];
 	}
-	else 
+	else
 	{
 		//penality_best = local_data_penalty[best][nimpl];
 		best_exp_end = local_exp_end[best][nimpl];
@@ -373,8 +378,8 @@ static int parallel_heft_push_task(struct starpu_task *task)
 	return _parallel_heft_push_task(task, 0);
 }
 
-static void initialize_parallel_heft_policy(struct starpu_machine_topology *topology, 
-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void initialize_parallel_heft_policy(struct starpu_machine_topology *topology,
+	 __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	nworkers = topology->nworkers;
 
@@ -403,12 +408,12 @@ static void initialize_parallel_heft_policy(struct starpu_machine_topology *topo
 	{
 		worker_exp_start[workerid] = starpu_timing_now();
 		worker_exp_len[workerid] = 0.0;
-		worker_exp_end[workerid] = worker_exp_start[workerid]; 
+		worker_exp_end[workerid] = worker_exp_start[workerid];
 		ntasks[workerid] = 0;
-	
+
 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
-	
+
 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
 	}
 
@@ -437,10 +442,11 @@ static void initialize_parallel_heft_policy(struct starpu_machine_topology *topo
 }
 
 /* TODO: use post_exec_hook to fix the expected start */
-struct starpu_sched_policy _starpu_sched_parallel_heft_policy = {
+struct starpu_sched_policy _starpu_sched_parallel_heft_policy =
+{
 	.init_sched = initialize_parallel_heft_policy,
 	.deinit_sched = NULL,
-	.push_task = parallel_heft_push_task, 
+	.push_task = parallel_heft_push_task,
 	.pop_task = NULL,
 	.post_exec_hook = parallel_heft_post_exec_hook,
 	.pop_every_task = NULL,

+ 8 - 6
src/sched_policies/random_policy.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010-2011  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -49,7 +49,8 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 		double worker_alpha = starpu_worker_get_relative_speedup(perf_arch);
 
-		if (alpha + worker_alpha > random && starpu_worker_can_execute_task(worker, task, 0)) {
+		if (alpha + worker_alpha > random && starpu_worker_can_execute_task(worker, task, 0))
+		{
 			/* we found the worker */
 			selected = worker;
 			break;
@@ -67,8 +68,8 @@ static int random_push_task(struct starpu_task *task)
 	return _random_push_task(task, !!task->priority);
 }
 
-static void initialize_random_policy(struct starpu_machine_topology *topology, 
-	 __attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void initialize_random_policy(struct starpu_machine_topology *topology,
+				     __attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	starpu_srand48(time(NULL));
 
@@ -79,12 +80,13 @@ static void initialize_random_policy(struct starpu_machine_topology *topology,
 	{
 		_STARPU_PTHREAD_MUTEX_INIT(&sched_mutex[workerid], NULL);
 		_STARPU_PTHREAD_COND_INIT(&sched_cond[workerid], NULL);
-	
+
 		starpu_worker_set_sched_condition(workerid, &sched_cond[workerid], &sched_mutex[workerid]);
 	}
 }
 
-struct starpu_sched_policy _starpu_sched_random_policy = {
+struct starpu_sched_policy _starpu_sched_random_policy =
+{
 	.init_sched = initialize_random_policy,
 	.deinit_sched = NULL,
 	.push_task = random_push_task,

+ 6 - 6
src/sched_policies/stack_queues.c

@@ -22,7 +22,7 @@
 #include <errno.h>
 #include <common/utils.h>
 
-/* keep track of the total number of jobs to be scheduled to avoid infinite 
+/* keep track of the total number of jobs to be scheduled to avoid infinite
  * polling when there are really few jobs in the overall queue */
 static unsigned total_number_of_jobs;
 
@@ -82,23 +82,23 @@ struct _starpu_job *_starpu_stack_pop_task(struct _starpu_stack_jobq *stack_queu
 		return NULL;
 
 	/* TODO find a task that suits workerid */
-	if (stack_queue->njobs > 0) 
+	if (stack_queue->njobs > 0)
 	{
 		/* there is a task */
 		j = _starpu_job_list_pop_back(stack_queue->jobq);
-	
+
 		STARPU_ASSERT(j);
 		stack_queue->njobs--;
-		
+
 		_STARPU_TRACE_JOB_POP(j, 0);
 
-		/* we are sure that we got it now, so at worst, some people thought 
+		/* we are sure that we got it now, so at worst, some people thought
 		 * there remained some work and will soon discover it is not true */
 		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
 		total_number_of_jobs--;
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	}
-	
+
 	return j;
 
 }

+ 2 - 1
src/sched_policies/stack_queues.h

@@ -23,7 +23,8 @@
 #include <common/config.h>
 #include <core/jobs.h>
 
-struct _starpu_stack_jobq {
+struct _starpu_stack_jobq
+{
 	/* the actual list */
 	struct _starpu_job_list *jobq;
 

+ 27 - 16
src/sched_policies/work_stealing_policy.c

@@ -36,7 +36,8 @@ static unsigned performed_total = 0;
 static float overload_metric(unsigned id)
 {
 	float execution_ratio = 0.0f;
-	if (performed_total > 0) {
+	if (performed_total > 0)
+	{
 		execution_ratio = _starpu_get_deque_nprocessed(queue_array[id])/performed_total;
 	}
 
@@ -44,10 +45,11 @@ static float overload_metric(unsigned id)
 	performed_queue = _starpu_get_deque_nprocessed(queue_array[id]);
 
 	float current_ratio = 0.0f;
-	if (performed_queue > 0) {
+	if (performed_queue > 0)
+	{
 		current_ratio = _starpu_get_deque_njobs(queue_array[id])/performed_queue;
 	}
-	
+
 	return (current_ratio - execution_ratio);
 }
 
@@ -59,16 +61,19 @@ static struct _starpu_deque_jobq *select_victimq(void)
 	unsigned attempts = nworkers;
 
 	unsigned worker = rr_worker;
-	do {
+	do
+	{
 		if (overload_metric(worker) > 0.0f)
 		{
 			q = queue_array[worker];
 			return q;
 		}
-		else {
+		else
+		{
 			worker = (worker + 1)%nworkers;
 		}
-	} while(attempts-- > 0);
+	}
+	while(attempts-- > 0);
 
 	/* take one anyway ... */
 	q = queue_array[rr_worker];
@@ -84,16 +89,19 @@ static struct _starpu_deque_jobq *select_workerq(void)
 	unsigned attempts = nworkers;
 
 	unsigned worker = rr_worker;
-	do {
+	do
+	{
 		if (overload_metric(worker) < 0.0f)
 		{
 			q = queue_array[worker];
 			return q;
 		}
-		else {
+		else
+		{
 			worker = (worker + 1)%nworkers;
 		}
-	} while(attempts-- > 0);
+	}
+	while(attempts-- > 0);
 
 	/* take one anyway ... */
 	q = queue_array[rr_worker];
@@ -117,7 +125,7 @@ static struct _starpu_deque_jobq *select_victimq(void)
 }
 
 
-/* when anonymous threads submit tasks, 
+/* when anonymous threads submit tasks,
  * we need to select a queue where to dispose them */
 static struct _starpu_deque_jobq *select_workerq(void)
 {
@@ -148,19 +156,21 @@ static struct starpu_task *ws_pop_task(void)
 	_STARPU_PTHREAD_MUTEX_LOCK(&global_sched_mutex);
 
 	task = _starpu_deque_pop_task(q, -1);
-	if (task) {
+	if (task)
+	{
 		/* there was a local task */
 		performed_total++;
 		_STARPU_PTHREAD_MUTEX_UNLOCK(&global_sched_mutex);
 		return task;
 	}
-	
+
 	/* we need to steal someone's job */
 	struct _starpu_deque_jobq *victimq;
 	victimq = select_victimq();
 
 	task = _starpu_deque_pop_task(victimq, workerid);
-	if (task) {
+	if (task)
+	{
 		_STARPU_TRACE_WORK_STEALING(q, victimq);
 		performed_total++;
 	}
@@ -194,8 +204,8 @@ static int ws_push_task(struct starpu_task *task)
         return 0;
 }
 
-static void initialize_ws_policy(struct starpu_machine_topology *topology, 
-				__attribute__ ((unused)) struct starpu_sched_policy *_policy) 
+static void initialize_ws_policy(struct starpu_machine_topology *topology,
+				__attribute__ ((unused)) struct starpu_sched_policy *_policy)
 {
 	nworkers = topology->nworkers;
 	rr_worker = 0;
@@ -211,7 +221,8 @@ static void initialize_ws_policy(struct starpu_machine_topology *topology,
 	}
 }
 
-struct starpu_sched_policy _starpu_sched_ws_policy = {
+struct starpu_sched_policy _starpu_sched_ws_policy =
+{
 	.init_sched = initialize_ws_policy,
 	.deinit_sched = NULL,
 	.push_task = ws_push_task,

+ 18 - 11
src/top/starpu_top.c

@@ -43,7 +43,7 @@ pthread_cond_t starpu_top_wait_for_continue_cond = PTHREAD_COND_INITIALIZER;
 
 int starpu_top_status_get()
 {
-  return starpu_top;
+	return starpu_top;
 }
 
 
@@ -82,7 +82,8 @@ void copy_data_and_param()
 	}
 }
 
-static void starpu_top_get_device_type(int id, char* type){
+static void starpu_top_get_device_type(int id, char* type)
+{
 	enum starpu_archtype device_type=starpu_worker_get_type(id);
 	switch (device_type)
 	{
@@ -125,7 +126,8 @@ static void starpu_top_send_devices_info()
 }
 
 
-void starpu_top_init_and_wait(const char* server_name){
+void starpu_top_init_and_wait(const char* server_name)
+{
 	starpu_top=1;
 	sem_init(&starpu_top_wait_for_go,0,0);
 
@@ -177,7 +179,8 @@ void starpu_top_init_and_wait(const char* server_name){
 	_starpu_top_message_add(_starpu_top_mt,message);
 	struct starpu_top_param * cur_param = starpu_top_first_param;
 	printf("%s:%d sending parameters\n", __FILE__, __LINE__);
-	while(cur_param != NULL){
+	while(cur_param != NULL)
+	{
 	  _starpu_top_message_add(_starpu_top_mt,message_for_topparam_init(cur_param));
 	  cur_param = cur_param->next;
 	}
@@ -385,7 +388,6 @@ void starpu_top_enqueue_param(struct starpu_top_param* param)
 	}
 }
 
-
 struct starpu_top_param* starpu_top_register_parameter_boolean(const char* param_name,
 							       int* parameter_field,
 							       void (*callback)(struct starpu_top_param*))
@@ -404,7 +406,6 @@ struct starpu_top_param* starpu_top_register_parameter_boolean(const char* param
 	return param;
 }
 
-
 struct starpu_top_param* starpu_top_register_parameter_integer(const char* param_name,
 							       int* parameter_field,
 							       int minimum_value,
@@ -426,6 +427,7 @@ struct starpu_top_param* starpu_top_register_parameter_integer(const char* param
 
 	return param;
 }
+
 struct starpu_top_param* starpu_top_register_parameter_float(const char* param_name,
 							     double* parameter_field,
 							     double minimum_value,
@@ -473,7 +475,8 @@ struct starpu_top_param* starpu_top_register_parameter_enum(const char* param_na
 *****************UPDATE FUNC******************
 **********************************************/
 
-void starpu_top_update_data_boolean(const struct starpu_top_data* data, int value) {
+void starpu_top_update_data_boolean(const struct starpu_top_data* data, int value)
+{
 	if (!starpu_top_status_get())
 		return;
 	if(data->active)
@@ -488,7 +491,8 @@ void starpu_top_update_data_boolean(const struct starpu_top_data* data, int valu
 	}
 }
 
-void starpu_top_update_data_integer(const struct starpu_top_data* data, int value){
+void starpu_top_update_data_integer(const struct starpu_top_data* data, int value)
+{
 	if (!starpu_top_status_get())
 		return;
 	if(data->active)
@@ -503,7 +507,8 @@ void starpu_top_update_data_integer(const struct starpu_top_data* data, int valu
 	}
 }
 
-void starpu_top_update_data_float(const struct starpu_top_data* data, double value){
+void starpu_top_update_data_float(const struct starpu_top_data* data, double value)
+{
 	if (!starpu_top_status_get())
 		return;
 	if(data->active)
@@ -517,7 +522,8 @@ void starpu_top_update_data_float(const struct starpu_top_data* data, double val
 	}
 }
 
-void starpu_top_update_parameter(const struct starpu_top_param* param){
+void starpu_top_update_parameter(const struct starpu_top_param* param)
+{
 	if (!starpu_top_status_get())
 		return;
 	char*message = (char *) malloc(50);
@@ -666,7 +672,8 @@ void starpu_top_change_data_active(char* message, int active)
 	starpu_top_datas[data_id]->active = active;
 }
 
-void starpu_top_change_parameter_value(const char* message){
+void starpu_top_change_parameter_value(const char* message)
+{
 	const char*tmp = strstr(message, ";")+1;
 	int param_id = atoi(tmp);
 	struct starpu_top_param* param = starpu_top_params[param_id];

+ 2 - 1
src/top/starpu_top_connection.h

@@ -23,7 +23,8 @@
 #include <starpu_top.h>
 
 #ifdef __cplusplus
-extern "C" {
+extern "C"
+{
 #endif
 
 extern struct _starpu_top_message_queue* _starpu_top_mt;

+ 0 - 0
src/top/starpu_top_message_queue.c


Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels