14 gadi atpakaļ · f8fd5492bd
--- a/README.dev
+++ b/README.dev
@@ -44,6 +44,8 @@ Coding Conventions
 
				   or
			
 
				        typedef uint64_t starpu_tag_t;
			
 
				 
			
 
				+- When a variable can only take a finite set of values, use an enum
			
 
				+  type instead of defining macros for each of the values.
			
 
				 
			
 
				 
			
 
				 
			
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -141,7 +141,7 @@ which have never been calibrated yet, and save the result in
 
				 The models are indexed by machine name. To share the models between machines (e.g. for a homogeneous cluster), use @code{export STARPU_HOSTNAME=some_global_name}. To force continuing calibration, use
			
 
				 @code{export STARPU_CALIBRATE=1} . This may be necessary if your application
			
 
				 has not-so-stable performance. StarPU will force calibration (and thus ignore
			
 
				-the current result) until 10 (STARPU_CALIBRATION_MINIMUM) measurements have been
			
 
				+the current result) until 10 (_STARPU_CALIBRATION_MINIMUM) measurements have been
			
 
				 made on each architecture, to avoid badly scheduling tasks just because the
			
 
				 first measurements were not so good. Details on the current performance model status
			
 
				 can be obtained from the @code{starpu_perfmodel_display} command: the @code{-l}
			
--- a/src/common/htable32.c
+++ b/src/common/htable32.c
@@ -29,9 +29,9 @@ void *_starpu_htbl_search_32(struct starpu_htbl32_node *htbl, uint32_t key)
 
				 	struct starpu_htbl32_node *current_htbl = htbl;
			
 
				 
			
 
				 	/* 000000000001111 with HTBL_NODE_SIZE 1's */
			
 
				-	uint32_t mask = (1<<STARPU_HTBL32_NODE_SIZE)-1;
			
 
				+	uint32_t mask = (1<<_STARPU_HTBL32_NODE_SIZE)-1;
			
 
				 
			
 
				-	for(currentbit = 0; currentbit < keysize; currentbit+=STARPU_HTBL32_NODE_SIZE)
			
 
				+	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
			
 
				 	{
			
 
				 	
			
 
				 	//	printf("search : current bit = %d \n", currentbit);
			
@@ -45,7 +45,7 @@ void *_starpu_htbl_search_32(struct starpu_htbl32_node *htbl, uint32_t key)
 
				 		 * */
			
 
				 
			
 
				 		unsigned last_currentbit = 
			
 
				-			keysize - (currentbit + STARPU_HTBL32_NODE_SIZE);
			
 
				+			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
			
 
				 		uint32_t offloaded_mask = mask << last_currentbit;
			
 
				 		unsigned current_index = 
			
 
				 			(key & (offloaded_mask)) >> (last_currentbit);
			
@@ -68,9 +68,9 @@ void *_starpu_htbl_insert_32(struct starpu_htbl32_node **htbl, uint32_t key, voi
 
				 	struct starpu_htbl32_node **current_htbl_ptr = htbl;
			
 
				 
			
 
				 	/* 000000000001111 with HTBL_NODE_SIZE 1's */
			
 
				-	uint32_t mask = (1<<STARPU_HTBL32_NODE_SIZE)-1;
			
 
				+	uint32_t mask = (1<<_STARPU_HTBL32_NODE_SIZE)-1;
			
 
				 
			
 
				-	for(currentbit = 0; currentbit < keysize; currentbit+=STARPU_HTBL32_NODE_SIZE)
			
 
				+	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
			
 
				 	{
			
 
				 		//printf("insert : current bit = %d \n", currentbit);
			
 
				 		if (*current_htbl_ptr == NULL) {
			
@@ -86,7 +86,7 @@ void *_starpu_htbl_insert_32(struct starpu_htbl32_node **htbl, uint32_t key, voi
 
				 		 * */
			
 
				 
			
 
				 		unsigned last_currentbit = 
			
 
				-			keysize - (currentbit + STARPU_HTBL32_NODE_SIZE);
			
 
				+			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
			
 
				 		uint32_t offloaded_mask = mask << last_currentbit;
			
 
				 		unsigned current_index = 
			
 
				 			(key & (offloaded_mask)) >> (last_currentbit);
			
--- a/src/common/htable32.h
+++ b/src/common/htable32.h
@@ -23,13 +23,13 @@
 
				 #include <stdio.h>
			
 
				 #include <assert.h>
			
 
				 
			
 
				-#define STARPU_HTBL32_NODE_SIZE	16
			
 
				+#define _STARPU_HTBL32_NODE_SIZE	16
			
 
				 
			
 
				 /* Hierarchical table: all nodes have a 2^16 arity . */
			
 
				 /* Note: this struct is used in include/starpu_perfmodel.h */
			
 
				 struct starpu_htbl32_node {
			
 
				 	unsigned nentries;
			
 
				-	struct starpu_htbl32_node *children[1<<STARPU_HTBL32_NODE_SIZE];
			
 
				+	struct starpu_htbl32_node *children[1<<_STARPU_HTBL32_NODE_SIZE];
			
 
				 };
			
 
				 
			
 
				 /* Look for a 32bit key into the hierchical table. Returns the entry if
			
--- a/src/core/dependencies/cg.h
+++ b/src/core/dependencies/cg.h
@@ -44,16 +44,18 @@ struct _starpu_cg_list {
 
				 #endif
			
 
				 };
			
 
				 
			
 
				-#define STARPU_CG_APPS	(1<<0)
			
 
				-#define STARPU_CG_TAG	(1<<1)
			
 
				-#define STARPU_CG_TASK	(1<<2)
			
 
				+enum _starpu_cg_type {
			
 
				+	STARPU_CG_APPS=(1<<0),
			
 
				+	STARPU_CG_TAG=(1<<1),
			
 
				+	STARPU_CG_TASK=(1<<2)
			
 
				+};
			
 
				 
			
 
				 /* Completion Group */
			
 
				 struct _starpu_cg {
			
 
				 	unsigned ntags; /* number of tags depended on */
			
 
				 	unsigned remaining; /* number of remaining tags */
			
 
				 
			
 
				-	unsigned cg_type; /* STARPU_CG_APPS or STARPU_CG_TAG or STARPU_CG_TASK */
			
 
				+	enum _starpu_cg_type cg_type;
			
 
				 
			
 
				 	union {
			
 
				 		/* STARPU_CG_TAG */
			
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -125,7 +125,7 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 
			
 
				 		if ((handle->reduction_refcnt == 0) && (previous_mode == STARPU_REDUX) && (mode != STARPU_REDUX))
			
 
				 		{
			
 
				-			starpu_data_end_reduction_mode(handle);
			
 
				+			_starpu_data_end_reduction_mode(handle);
			
 
				 
			
 
				 			/* Since we need to perform a mode change, we freeze
			
 
				 			 * the request if needed. */
			
@@ -167,7 +167,7 @@ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_cod
 
				 		handle->current_mode = mode;
			
 
				 
			
 
				 		if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
			
 
				-			starpu_data_start_reduction_mode(handle);
			
 
				+			_starpu_data_start_reduction_mode(handle);
			
 
				 
			
 
				 		/* success */
			
 
				 		put_in_list = 0;
			
@@ -276,7 +276,7 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 		//fprintf(stderr, "NOTIFY REDUCTION TASK RED REFCNT %d\n", handle->reduction_refcnt);
			
 
				 		handle->reduction_refcnt--;
			
 
				 		if (handle->reduction_refcnt == 0)
			
 
				-			starpu_data_end_reduction_mode_terminate(handle);
			
 
				+			_starpu_data_end_reduction_mode_terminate(handle);
			
 
				 	}
			
 
				 
			
 
				 
			
@@ -291,7 +291,7 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 		int put_in_list = 1;
			
 
				 		if ((handle->reduction_refcnt == 0) && (handle->current_mode == STARPU_REDUX) && (r_mode != STARPU_REDUX))
			
 
				 		{
			
 
				-			starpu_data_end_reduction_mode(handle);
			
 
				+			_starpu_data_end_reduction_mode(handle);
			
 
				 
			
 
				 			/* Since we need to perform a mode change, we freeze
			
 
				 			 * the request if needed. */
			
@@ -321,7 +321,7 @@ void _starpu_notify_data_dependencies(starpu_data_handle_t handle)
 
				 			 * kept intact because we'll reduce a valid copy of the
			
 
				 			 * "per-node replicate" with the per-worker replicates .*/
			
 
				 			if ((r_mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
			
 
				-				starpu_data_start_reduction_mode(handle);
			
 
				+				_starpu_data_start_reduction_mode(handle);
			
 
				 
			
 
				 			_starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
--- a/src/core/dependencies/htable.c
+++ b/src/core/dependencies/htable.c
@@ -23,10 +23,10 @@ void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 	unsigned currentbit;
			
 
				 	struct _starpu_htbl_node *current_htbl = htbl;
			
 
				 
			
 
				-	/* 000000000001111 with STARPU_HTBL_NODE_SIZE 1's */
			
 
				-	starpu_tag_t mask = (1<<STARPU_HTBL_NODE_SIZE)-1;
			
 
				+	/* 000000000001111 with _STARPU_HTBL_NODE_SIZE 1's */
			
 
				+	starpu_tag_t mask = (1<<_STARPU_HTBL_NODE_SIZE)-1;
			
 
				 
			
 
				-	for(currentbit = 0; currentbit < STARPU_TAG_SIZE; currentbit+=STARPU_HTBL_NODE_SIZE)
			
 
				+	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
			
 
				 	{
			
 
				 	
			
 
				 	//	printf("search : current bit = %d \n", currentbit);
			
@@ -40,7 +40,7 @@ void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 		 * */
			
 
				 
			
 
				 		unsigned last_currentbit = 
			
 
				-			STARPU_TAG_SIZE - (currentbit + STARPU_HTBL_NODE_SIZE);
			
 
				+			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
			
 
				 		starpu_tag_t offloaded_mask = mask << last_currentbit;
			
 
				 		unsigned current_index = 
			
 
				 			(tag & (offloaded_mask)) >> (last_currentbit);
			
@@ -62,10 +62,10 @@ void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag,
 
				 	struct _starpu_htbl_node **current_htbl_ptr = htbl;
			
 
				 	struct _starpu_htbl_node *previous_htbl_ptr = NULL;
			
 
				 
			
 
				-	/* 000000000001111 with STARPU_HTBL_NODE_SIZE 1's */
			
 
				-	starpu_tag_t mask = (1<<STARPU_HTBL_NODE_SIZE)-1;
			
 
				+	/* 000000000001111 with _STARPU_HTBL_NODE_SIZE 1's */
			
 
				+	starpu_tag_t mask = (1<<_STARPU_HTBL_NODE_SIZE)-1;
			
 
				 
			
 
				-	for(currentbit = 0; currentbit < STARPU_TAG_SIZE; currentbit+=STARPU_HTBL_NODE_SIZE)
			
 
				+	for(currentbit = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE)
			
 
				 	{
			
 
				 		if (*current_htbl_ptr == NULL) {
			
 
				 			/* TODO pad to change that 1 into 16 ? */
			
@@ -83,7 +83,7 @@ void *_starpu_htbl_insert_tag(struct _starpu_htbl_node **htbl, starpu_tag_t tag,
 
				 		 * */
			
 
				 
			
 
				 		unsigned last_currentbit = 
			
 
				-			STARPU_TAG_SIZE - (currentbit + STARPU_HTBL_NODE_SIZE);
			
 
				+			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
			
 
				 		starpu_tag_t offloaded_mask = mask << last_currentbit;
			
 
				 		unsigned current_index = 
			
 
				 			(tag & (offloaded_mask)) >> (last_currentbit);
			
@@ -113,14 +113,14 @@ void *_starpu_htbl_remove_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 	struct _starpu_htbl_node *current_htbl_ptr = htbl;
			
 
				 
			
 
				 	/* remember the path to the tag */
			
 
				-	struct _starpu_htbl_node *path[(STARPU_TAG_SIZE + STARPU_HTBL_NODE_SIZE - 1)/(STARPU_HTBL_NODE_SIZE)];
			
 
				+	struct _starpu_htbl_node *path[(_STARPU_TAG_SIZE + _STARPU_HTBL_NODE_SIZE - 1)/(_STARPU_HTBL_NODE_SIZE)];
			
 
				 
			
 
				-	/* 000000000001111 with STARPU_HTBL_NODE_SIZE 1's */
			
 
				-	starpu_tag_t mask = (1<<STARPU_HTBL_NODE_SIZE)-1;
			
 
				+	/* 000000000001111 with _STARPU_HTBL_NODE_SIZE 1's */
			
 
				+	starpu_tag_t mask = (1<<_STARPU_HTBL_NODE_SIZE)-1;
			
 
				 	int level, maxlevel;
			
 
				 	unsigned tag_is_present = 1;
			
 
				 
			
 
				-	for(currentbit = 0, level = 0; currentbit < STARPU_TAG_SIZE; currentbit+=STARPU_HTBL_NODE_SIZE, level++)
			
 
				+	for(currentbit = 0, level = 0; currentbit < _STARPU_TAG_SIZE; currentbit+=_STARPU_HTBL_NODE_SIZE, level++)
			
 
				 	{
			
 
				 		path[level] = current_htbl_ptr;
			
 
				 
			
@@ -136,7 +136,7 @@ void *_starpu_htbl_remove_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag)
 
				 		 * */
			
 
				 
			
 
				 		unsigned last_currentbit = 
			
 
				-			STARPU_TAG_SIZE - (currentbit + STARPU_HTBL_NODE_SIZE);
			
 
				+			_STARPU_TAG_SIZE - (currentbit + _STARPU_HTBL_NODE_SIZE);
			
 
				 		starpu_tag_t offloaded_mask = mask << last_currentbit;
			
 
				 		unsigned current_index = 
			
 
				 			(tag & (offloaded_mask)) >> (last_currentbit);
			
--- a/src/core/dependencies/htable.h
+++ b/src/core/dependencies/htable.h
@@ -28,11 +28,11 @@
 
				 #include <assert.h>
			
 
				 #include <core/dependencies/tags.h>
			
 
				 
			
 
				-#define STARPU_HTBL_NODE_SIZE	16
			
 
				+#define _STARPU_HTBL_NODE_SIZE	16
			
 
				 
			
 
				 struct _starpu_htbl_node {
			
 
				 	unsigned nentries;
			
 
				-	struct _starpu_htbl_node *children[1<<STARPU_HTBL_NODE_SIZE];
			
 
				+	struct _starpu_htbl_node *children[1<<_STARPU_HTBL_NODE_SIZE];
			
 
				 };
			
 
				 
			
 
				 void *_starpu_htbl_search_tag(struct _starpu_htbl_node *htbl, starpu_tag_t tag);
			
--- a/src/core/dependencies/tags.h
+++ b/src/core/dependencies/tags.h
@@ -23,7 +23,7 @@
 
				 #include <common/starpu_spinlock.h>
			
 
				 #include <core/dependencies/cg.h>
			
 
				 
			
 
				-#define STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
			
 
				+#define _STARPU_TAG_SIZE        (sizeof(starpu_tag_t)*8)
			
 
				 
			
 
				 enum _starpu_tag_state {
			
 
				 	/* this tag is not declared by any task */
			
@@ -57,8 +57,6 @@ struct _starpu_tag {
 
				 	unsigned is_submitted;
			
 
				 };
			
 
				 
			
 
				-void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);
			
 
				-
			
 
				 void _starpu_notify_dependencies(struct starpu_job_s *j);
			
 
				 void _starpu_notify_tag_dependencies(struct _starpu_tag *tag);
			
 
				 
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -47,11 +47,11 @@ struct _starpu_worker;
 
				 /* codelet function */
			
 
				 typedef void (*_starpu_cl_func)(void **, void *);
			
 
				 
			
 
				-#define STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
			
 
				-#define STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
			
 
				-#define STARPU_SPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SPU)
			
 
				-#define STARPU_GORDON_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_GORDON)
			
 
				-#define STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
			
 
				+#define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
			
 
				+#define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
			
 
				+#define _STARPU_SPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SPU)
			
 
				+#define _STARPU_GORDON_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_GORDON)
			
 
				+#define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
			
 
				 
			
 
				 /* A job is the internal representation of a task. */
			
 
				 LIST_TYPE(starpu_job,
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -89,19 +89,19 @@ double starpu_worker_get_relative_speedup(enum starpu_perf_archtype perf_archtyp
 
				 {
			
 
				 	if (perf_archtype < STARPU_CUDA_DEFAULT)
			
 
				 	{
			
 
				-		return STARPU_CPU_ALPHA * (perf_archtype + 1);
			
 
				+		return _STARPU_CPU_ALPHA * (perf_archtype + 1);
			
 
				 	}
			
 
				 	else if (perf_archtype < STARPU_OPENCL_DEFAULT)
			
 
				 	{
			
 
				-		return STARPU_CUDA_ALPHA;
			
 
				+		return _STARPU_CUDA_ALPHA;
			
 
				 	}
			
 
				 	else if (perf_archtype < STARPU_GORDON_DEFAULT)
			
 
				 	{
			
 
				-		return STARPU_OPENCL_ALPHA;
			
 
				+		return _STARPU_OPENCL_ALPHA;
			
 
				 	}
			
 
				 	else if (perf_archtype < STARPU_NARCH_VARIATIONS) {
			
 
				 		/* Gordon value */
			
 
				-		return STARPU_GORDON_ALPHA;
			
 
				+		return _STARPU_GORDON_ALPHA;
			
 
				 	}
			
 
				 
			
 
				 	STARPU_ABORT();
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -36,7 +36,7 @@
 
				 
			
 
				 /* We want more than 10% variance on X to trust regression */
			
 
				 #define VALID_REGRESSION(reg_model) \
			
 
				-	((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= STARPU_CALIBRATION_MINIMUM)
			
 
				+	((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _STARPU_CALIBRATION_MINIMUM)
			
 
				 
			
 
				 static pthread_rwlock_t registered_models_rwlock;
			
 
				 static struct starpu_model_list *registered_models = NULL;
			
@@ -890,7 +890,7 @@ double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfm
 
				 		entry = (struct starpu_history_entry *) _starpu_htbl_search_32(history, key);
			
 
				 		_STARPU_PTHREAD_RWLOCK_UNLOCK(&model->model_rwlock);
			
 
				 
			
 
				-		if (entry && entry->nsample >= STARPU_CALIBRATION_MINIMUM)
			
 
				+		if (entry && entry->nsample >= _STARPU_CALIBRATION_MINIMUM)
			
 
				 			exp = entry->mean;
			
 
				 		else if (!model->benchmarking) {
			
 
				 			_STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
			
@@ -923,7 +923,7 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 
			
 
				 	exp = entry?entry->mean:-1.0;
			
 
				 
			
 
				-	if (entry && entry->nsample < STARPU_CALIBRATION_MINIMUM)
			
 
				+	if (entry && entry->nsample < _STARPU_CALIBRATION_MINIMUM)
			
 
				 		/* TODO: report differently if we've scheduled really enough
			
 
				 		 * of that task and the scheduler should perhaps put it aside */
			
 
				 		/* Not calibrated enough */
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -321,7 +321,7 @@ static starpu_data_request_t _starpu_search_existing_data_request(struct starpu_
 
				  */
			
 
				 
			
 
				 /* This function is called with handle's header lock taken */
			
 
				-starpu_data_request_t create_request_to_fetch_data(starpu_data_handle_t handle,
			
 
				+starpu_data_request_t _starpu_create_request_to_fetch_data(starpu_data_handle_t handle,
			
 
				 				struct starpu_data_replicate_s *dst_replicate,
			
 
				                                 enum starpu_access_mode mode, unsigned is_prefetch,
			
 
				                                 void (*callback_func)(void *), void *callback_arg)
			
@@ -346,7 +346,7 @@ starpu_data_request_t create_request_to_fetch_data(starpu_data_handle_t handle,
 
				 			_starpu_handle_stats_shared_to_owner(handle, requesting_node);
			
 
				 #endif
			
 
				 		
			
 
				-		starpu_memchunk_recently_used(dst_replicate->mc, requesting_node);
			
 
				+		_starpu_memchunk_recently_used(dst_replicate->mc, requesting_node);
			
 
				 
			
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
 
				 
			
@@ -467,11 +467,11 @@ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, struct starpu_data_r
 
				 	}
			
 
				 
			
 
				 	starpu_data_request_t r;
			
 
				-	r = create_request_to_fetch_data(handle, dst_replicate, mode,
			
 
				+	r = _starpu_create_request_to_fetch_data(handle, dst_replicate, mode,
			
 
				 					is_prefetch, callback_func, callback_arg);
			
 
				 
			
 
				 	/* If no request was created, the handle was already up-to-date on the
			
 
				-	 * node. In this case, create_request_to_fetch_data has already
			
 
				+	 * node. In this case, _starpu_create_request_to_fetch_data has already
			
 
				 	 * unlocked the header. */
			
 
				 	if (!r)
			
 
				 		return 0;
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -247,14 +247,14 @@ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, uint
 
				 
			
 
				 uint32_t _starpu_select_src_node(struct _starpu_data_state *state, unsigned destination);
			
 
				 
			
 
				-starpu_data_request_t create_request_to_fetch_data(starpu_data_handle_t handle,
			
 
				-				struct starpu_data_replicate_s *dst_replicate,
			
 
				-                                enum starpu_access_mode mode, unsigned is_prefetch,
			
 
				-                                void (*callback_func)(void *), void *callback_arg);
			
 
				+starpu_data_request_t _starpu_create_request_to_fetch_data(starpu_data_handle_t handle,
			
 
				+							   struct starpu_data_replicate_s *dst_replicate,
			
 
				+							   enum starpu_access_mode mode, unsigned is_prefetch,
			
 
				+							   void (*callback_func)(void *), void *callback_arg);
			
 
				 
			
 
				 void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct starpu_data_replicate_s *replicate, int workerid);
			
 
				-void starpu_data_start_reduction_mode(starpu_data_handle_t handle);
			
 
				-void starpu_data_end_reduction_mode(starpu_data_handle_t handle);
			
 
				-void starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle);
			
 
				+void _starpu_data_start_reduction_mode(starpu_data_handle_t handle);
			
 
				+void _starpu_data_end_reduction_mode(starpu_data_handle_t handle);
			
 
				+void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle);
			
 
				 
			
 
				 #endif // __COHERENCY__H__
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -114,7 +114,7 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle, struct starpu_d
 
				 	if ((src_kind == STARPU_CUDA_RAM) || (dst_kind == STARPU_CUDA_RAM))
			
 
				 	{
			
 
				 		int node = (dst_kind == STARPU_CUDA_RAM)?dst_node:src_node;
			
 
				-		cures = cudaSetDevice(starpu_memory_node_to_devid(node));
			
 
				+		cures = cudaSetDevice(_starpu_memory_node_to_devid(node));
			
 
				 		STARPU_ASSERT(cures == cudaSuccess);
			
 
				 	}
			
 
				 #endif
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -487,8 +487,8 @@ static int copy_cuda_peer(void *src_interface STARPU_ATTRIBUTE_UNUSED, unsigned
 
				 	uint32_t nrow = src_csr->nrow;
			
 
				 	size_t elemsize = src_csr->elemsize;
			
 
				 
			
 
				-	int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-	int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+	int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+	int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 	cudaError_t cures;
			
 
				 
			
@@ -526,8 +526,8 @@ static int copy_cuda_peer_async(void *src_interface STARPU_ATTRIBUTE_UNUSED, uns
 
				 
			
 
				 	cudaError_t cures;
			
 
				 
			
 
				-	int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-	int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+	int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+	int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 	int synchronous_fallback = 0;
			
 
				 
			
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -436,8 +436,8 @@ static int copy_cuda_peer(void *src_interface, unsigned src_node STARPU_ATTRIBUT
 
				 	size_t elemsize = src_matrix->elemsize;
			
 
				 	cudaError_t cures;
			
 
				 
			
 
				-	int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-	int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+	int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+	int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 
			
 
				 #if 0
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -514,8 +514,8 @@ static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
 
				 
			
 
				 	cudaError_t status;
			
 
				 	int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
			
 
				-	int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-	int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+	int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+	int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 	if (stream)
			
 
				 	{
			
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -317,8 +317,8 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 
				 	}
			
 
				 	else {
			
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				-		int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-		int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+		int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+		int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 		struct starpu_variable_interface *src_variable = src_interface;
			
 
				 		struct starpu_variable_interface *dst_variable = dst_interface;
			
@@ -386,8 +386,8 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,					v
 
				 	}
			
 
				 	else {
			
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				-		int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-		int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+		int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+		int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 		struct starpu_variable_interface *src_variable = src_interface;
			
 
				 		struct starpu_variable_interface *dst_variable = dst_interface;
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -351,8 +351,8 @@ static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
 
				 
			
 
				 	size_t length = src_vector->nx*src_vector->elemsize;
			
 
				 
			
 
				-	int src_dev = starpu_memory_node_to_devid(src_node);
			
 
				-	int dst_dev = starpu_memory_node_to_devid(dst_node);
			
 
				+	int src_dev = _starpu_memory_node_to_devid(src_node);
			
 
				+	int dst_dev = _starpu_memory_node_to_devid(dst_node);
			
 
				 
			
 
				 	if (is_async)
			
 
				 	{
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -241,7 +241,7 @@ static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
 
				 			 * proper CUDA device in case it is needed. This avoids
			
 
				 			 * having to set it again in the free method of each
			
 
				 			 * interface. */
			
 
				-			cudaError_t err = cudaSetDevice(starpu_memory_node_to_devid(node));
			
 
				+			cudaError_t err = cudaSetDevice(_starpu_memory_node_to_devid(node));
			
 
				 			STARPU_ASSERT(err == cudaSuccess);
			
 
				 		}
			
 
				 #endif
			
@@ -699,7 +699,7 @@ static size_t _starpu_get_global_mem_size(int dst_node)
 
				 #ifdef STARPU_USE_CUDA
			
 
				 		case STARPU_CUDA_RAM:
			
 
				 		{
			
 
				-			int devid = starpu_memory_node_to_devid(dst_node);
			
 
				+			int devid = _starpu_memory_node_to_devid(dst_node);
			
 
				 			global_mem_size = starpu_cuda_get_global_mem_size(devid);
			
 
				 			break;
			
 
				 		}
			
@@ -707,7 +707,7 @@ static size_t _starpu_get_global_mem_size(int dst_node)
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 		case STARPU_OPENCL_RAM:
			
 
				 		{
			
 
				-			int devid = starpu_memory_node_to_devid(dst_node);
			
 
				+			int devid = _starpu_memory_node_to_devid(dst_node);
			
 
				 			global_mem_size = starpu_opencl_get_global_mem_size(devid);
			
 
				 			break;
			
 
				 		}
			
@@ -770,7 +770,7 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct st
 
				 			 * proper CUDA device in case it is needed. This avoids
			
 
				 			 * having to set it again in the malloc method of each
			
 
				 			 * interface. */
			
 
				-			cudaError_t err = cudaSetDevice(starpu_memory_node_to_devid(dst_node));
			
 
				+			cudaError_t err = cudaSetDevice(_starpu_memory_node_to_devid(dst_node));
			
 
				 			STARPU_ASSERT(err == cudaSuccess);
			
 
				 		}
			
 
				 #endif
			
@@ -852,7 +852,7 @@ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, uint
 
				 	return handle->per_node[memory_node].allocated;
			
 
				 }
			
 
				 
			
 
				-void starpu_memchunk_recently_used(starpu_mem_chunk_t mc, unsigned node)
			
 
				+void _starpu_memchunk_recently_used(starpu_mem_chunk_t mc, unsigned node)
			
 
				 {
			
 
				 	_STARPU_PTHREAD_RWLOCK_WRLOCK(&lru_rwlock[node]);
			
 
				 	starpu_mem_chunk_lru_t mc_lru=starpu_mem_chunk_lru_new();
			
--- a/src/datawizard/memalloc.h
+++ b/src/datawizard/memalloc.h
@@ -61,7 +61,7 @@ void _starpu_deinit_mem_chunk_lists(void);
 
				 void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, unsigned node);
			
 
				 int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct starpu_data_replicate_s *replicate, unsigned is_prefetch);
			
 
				 size_t _starpu_free_all_automatically_allocated_buffers(uint32_t node);
			
 
				-void starpu_memchunk_recently_used(starpu_mem_chunk_t mc, unsigned node);
			
 
				+void _starpu_memchunk_recently_used(starpu_mem_chunk_t mc, unsigned node);
			
 
				 
			
 
				 void _starpu_display_data_stats_by_node(int node);
			
 
				 #endif
			
--- a/src/datawizard/memory_nodes.c
+++ b/src/datawizard/memory_nodes.c
@@ -93,7 +93,7 @@ enum _starpu_node_kind _starpu_get_node_kind(uint32_t node)
 
				 	return descr.nodes[node];
			
 
				 }
			
 
				 
			
 
				-int starpu_memory_node_to_devid(unsigned node)
			
 
				+int _starpu_memory_node_to_devid(unsigned node)
			
 
				 {
			
 
				 	return descr.devid[node];
			
 
				 }
			
--- a/src/datawizard/memory_nodes.h
+++ b/src/datawizard/memory_nodes.h
@@ -75,7 +75,7 @@ unsigned _starpu_register_memory_node(enum _starpu_node_kind kind, int devid);
 
				 void _starpu_memory_node_register_condition(pthread_cond_t *cond, pthread_mutex_t *mutex, unsigned memory_node);
			
 
				 
			
 
				 enum _starpu_node_kind _starpu_get_node_kind(uint32_t node);
			
 
				-int starpu_memory_node_to_devid(unsigned node);
			
 
				+int _starpu_memory_node_to_devid(unsigned node);
			
 
				 unsigned _starpu_get_memory_nodes_count(void);
			
 
				 
			
 
				 struct _starpu_mem_node_descr *_starpu_get_memory_node_description(void);
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -77,7 +77,7 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct starp
 
				 
			
 
				 /* Enable reduction mode. This function must be called with the header lock
			
 
				  * taken. */
			
 
				-void starpu_data_start_reduction_mode(starpu_data_handle_t handle)
			
 
				+void _starpu_data_start_reduction_mode(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	STARPU_ASSERT(handle->reduction_refcnt == 0);
			
 
				 
			
@@ -95,7 +95,7 @@ void starpu_data_start_reduction_mode(starpu_data_handle_t handle)
 
				 //#define NO_TREE_REDUCTION
			
 
				 
			
 
				 /* Force reduction. The lock should already have been taken.  */
			
 
				-void starpu_data_end_reduction_mode(starpu_data_handle_t handle)
			
 
				+void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	unsigned worker;
			
 
				 
			
@@ -249,11 +249,11 @@ void starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle)
			
 
				+void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle)
			
 
				 {
			
 
				 	unsigned nworkers = starpu_worker_get_count();
			
 
				 
			
 
				-//	fprintf(stderr, "starpu_data_end_reduction_mode_terminate\n");
			
 
				+//	fprintf(stderr, "_starpu_data_end_reduction_mode_terminate\n");
			
 
				 	unsigned worker;
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
--- a/src/datawizard/write_back.c
+++ b/src/datawizard/write_back.c
@@ -54,7 +54,7 @@ void _starpu_write_through_data(starpu_data_handle_t handle, uint32_t requesting
 
				 				handle->current_mode = STARPU_R;
			
 
				 
			
 
				 				starpu_data_request_t r;
			
 
				-				r = create_request_to_fetch_data(handle, &handle->per_node[node],
			
 
				+				r = _starpu_create_request_to_fetch_data(handle, &handle->per_node[node],
			
 
				 						STARPU_R, 1, wt_callback, handle);
			
 
				 
			
 
				 			        /* If no request was created, the handle was already up-to-date on the
			
--- a/src/debug/traces/starpu_fxt.h
+++ b/src/debug/traces/starpu_fxt.h
@@ -37,8 +37,6 @@
 
				 #include "../mpi/starpu_mpi_fxt.h"
			
 
				 #include <starpu.h>
			
 
				 
			
 
				-#define FACTOR  100
			
 
				-
			
 
				 void _starpu_fxt_dag_init(char *dag_filename);
			
 
				 void _starpu_fxt_dag_terminate(void);
			
 
				 void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father);
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -148,7 +148,7 @@ void *_starpu_cpu_worker(void *arg)
 
				 		j = _starpu_get_job_associated_to_task(task);
			
 
				 	
			
 
				 		/* can a cpu perform that task ? */
			
 
				-		if (!STARPU_CPU_MAY_PERFORM(j)) 
			
 
				+		if (!_STARPU_CPU_MAY_PERFORM(j)) 
			
 
				 		{
			
 
				 			/* put it and the end of the queue ... XXX */
			
 
				 			_starpu_push_task(j, 0);
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -323,7 +323,7 @@ void *_starpu_cuda_worker(void *arg)
 
				 		j = _starpu_get_job_associated_to_task(task);
			
 
				 
			
 
				 		/* can CUDA do that task ? */
			
 
				-		if (!STARPU_CUDA_MAY_PERFORM(j))
			
 
				+		if (!_STARPU_CUDA_MAY_PERFORM(j))
			
 
				 		{
			
 
				 			/* this is neither a cuda or a cublas task */
			
 
				 			_starpu_push_task(j, 0);
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -280,7 +280,7 @@ int inject_task_list(struct starpu_job_list_s *list, struct _starpu_worker *work
 
				 //	
			
 
				 //	for (j = starpu_job_list_begin(list); j != starpu_job_list_end(list); j = starpu_job_list_next(j) )
			
 
				 //	{
			
 
				-//		if (!STARPU_GORDON_MAY_PERFORM(j)) {
			
 
				+//		if (!_STARPU_GORDON_MAY_PERFORM(j)) {
			
 
				 //			// XXX TODO
			
 
				 //			ninvalids++;
			
 
				 //			assert(0);
			
@@ -399,7 +399,7 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
				 			j =  _starpu_pop_task();
			
 
				 	//		_STARPU_DEBUG("pop task %p\n", j);
			
 
				 			if (j) {
			
 
				-				if (STARPU_GORDON_MAY_PERFORM(j)) {
			
 
				+				if (_STARPU_GORDON_MAY_PERFORM(j)) {
			
 
				 					/* inject that task */
			
 
				 					/* XXX we hardcore &arg->workers[0] for now */
			
 
				 					inject_task(j, &arg->workers[0]);
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -335,7 +335,7 @@ void _starpu_opencl_init(void)
 
				 {
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
			
 
				         if (!init_done) {
			
 
				-                cl_platform_id platform_id[STARPU_OPENCL_PLATFORM_MAX];
			
 
				+                cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX];
			
 
				                 cl_uint nb_platforms;
			
 
				                 cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
			
 
				                 cl_int err;
			
@@ -344,7 +344,7 @@ void _starpu_opencl_init(void)
 
				                 _STARPU_DEBUG("Initialising OpenCL\n");
			
 
				 
			
 
				                 // Get Platforms
			
 
				-                err = clGetPlatformIDs(STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
			
 
				+                err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms);
			
 
				                 if (err != CL_SUCCESS) nb_platforms=0;
			
 
				                 _STARPU_DEBUG("Platforms detected: %d\n", nb_platforms);
			
 
				 
			
@@ -484,7 +484,7 @@ void *_starpu_opencl_worker(void *arg)
 
				 		j = _starpu_get_job_associated_to_task(task);
			
 
				 
			
 
				 		/* can OpenCL do that task ? */
			
 
				-		if (!STARPU_OPENCL_MAY_PERFORM(j))
			
 
				+		if (!_STARPU_OPENCL_MAY_PERFORM(j))
			
 
				 		{
			
 
				 			/* this is not a OpenCL task */
			
 
				 			_starpu_push_task(j, 0);
			
--- a/src/drivers/opencl/driver_opencl_utils.h
+++ b/src/drivers/opencl/driver_opencl_utils.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -19,6 +19,6 @@
 
				 
			
 
				 #include <config.h>
			
 
				 
			
 
				-#define STARPU_OPENCL_PLATFORM_MAX 4
			
 
				+#define _STARPU_OPENCL_PLATFORM_MAX 4
			
 
				 
			
 
				 #endif /* __STARPU_OPENCL_UTILS_H__ */
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -31,9 +31,9 @@ static struct _starpu_fifo_taskq *queue_array[STARPU_NMAXWORKERS];
 
				 static pthread_cond_t sched_cond[STARPU_NMAXWORKERS];
			
 
				 static pthread_mutex_t sched_mutex[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-static double alpha = STARPU_DEFAULT_ALPHA;
			
 
				-static double beta = STARPU_DEFAULT_BETA;
			
 
				-static double _gamma = STARPU_DEFAULT_GAMMA;
			
 
				+static double alpha = _STARPU_DEFAULT_ALPHA;
			
 
				+static double beta = _STARPU_DEFAULT_BETA;
			
 
				+static double _gamma = _STARPU_DEFAULT_GAMMA;
			
 
				 static double idle_power = 0.0;
			
 
				 
			
 
				 #ifdef STARPU_VERBOSE
			
--- a/src/sched_policies/detect_combined_workers.c
+++ b/src/sched_policies/detect_combined_workers.c
@@ -24,8 +24,8 @@
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 #include <hwloc.h>
			
 
				 
			
 
				-/* tree_t
			
 
				- * ======
			
 
				+/* struct _starpi_tree
			
 
				+ * ==================
			
 
				  * Purpose
			
 
				  * =======
			
 
				  * Structure representing a tree (which can be a sub-tree itself) whose root is an hwloc
			
@@ -41,12 +41,11 @@
 
				  * workers		CPU-workers found by recursion in all the sub-trees and in this very one, represented as leaves in hwloc.
			
 
				  */
			
 
				 
			
 
				-typedef struct tree_s{
			
 
				+struct _starpu_tree {
			
 
				     hwloc_obj_t obj;
			
 
				     unsigned nb_workers;
			
 
				     int *workers;
			
 
				-} tree_t;
			
 
				-
			
 
				+};
			
 
				 
			
 
				 /* gather_trees
			
 
				  * ============
			
@@ -67,7 +66,7 @@ typedef struct tree_s{
 
				  *			Number of trees we want to combine (size of the array).
			
 
				  */
			
 
				 
			
 
				-static void gather_trees(tree_t *target_tree, tree_t *source_trees, unsigned nb_source_trees)
			
 
				+static void gather_trees(struct _starpu_tree *target_tree, struct _starpu_tree *source_trees, unsigned nb_source_trees)
			
 
				 {
			
 
				     unsigned tree_id, worker_id, index = 0;
			
 
				     for(tree_id = 0; tree_id < nb_source_trees; ++tree_id)
			
@@ -101,7 +100,7 @@ static void gather_trees(tree_t *target_tree, tree_t *source_trees, unsigned nb_
 
				  *			Maximum size of a combined worker.
			
 
				  */
			
 
				 
			
 
				-static unsigned assign_multiple_trees(tree_t *trees, unsigned nb_trees, int min_size, int max_size)
			
 
				+static unsigned assign_multiple_trees(struct _starpu_tree *trees, unsigned nb_trees, int min_size, int max_size)
			
 
				 {
			
 
				     unsigned short complete = 0;
			
 
				     unsigned tree_id, tree_id2, nb_workers_tree, nb_workers_tree2, worker_id, nb_workers_total = 0, nb_workers_assigned = 0;
			
@@ -199,7 +198,7 @@ static unsigned assign_multiple_trees(tree_t *trees, unsigned nb_trees, int min_
 
				  *			Maximum size of a combined worker.
			
 
				  */
			
 
				 
			
 
				-static unsigned find_and_assign_combinations_with_hwloc_recursive(tree_t *tree, int min_size, int max_size)
			
 
				+static unsigned find_and_assign_combinations_with_hwloc_recursive(struct _starpu_tree *tree, int min_size, int max_size)
			
 
				 {
			
 
				     unsigned subtree_id, nb_workers = 0;
			
 
				 
			
@@ -231,7 +230,7 @@ static unsigned find_and_assign_combinations_with_hwloc_recursive(tree_t *tree,
 
				     /* If there is only one child, we go to the next level right away */
			
 
				     if (obj->arity == 1)
			
 
				     {
			
 
				-	tree_t subtree = *tree;
			
 
				+	struct _starpu_tree subtree = *tree;
			
 
				 	subtree.obj = obj->children[0];
			
 
				 	nb_workers = find_and_assign_combinations_with_hwloc_recursive(&subtree, min_size, max_size);
			
 
				 	tree->nb_workers = nb_workers;
			
@@ -242,12 +241,12 @@ static unsigned find_and_assign_combinations_with_hwloc_recursive(tree_t *tree,
 
				      * CPU leaves that fits between min and max. */
			
 
				 
			
 
				     /* We allocate an array of tree structures which will contain the current node's subtrees data */
			
 
				-    tree_t *subtrees = (tree_t *) malloc(obj->arity * sizeof(tree_t));
			
 
				+    struct _starpu_tree *subtrees = (struct _starpu_tree *) malloc(obj->arity * sizeof(struct _starpu_tree));
			
 
				 
			
 
				     /* We allocate the array containing the workers of each subtree and initialize the fields left */
			
 
				     for(subtree_id = 0; subtree_id < obj->arity; ++subtree_id)
			
 
				     {
			
 
				-	tree_t *subtree = subtrees + subtree_id;
			
 
				+	struct _starpu_tree *subtree = subtrees + subtree_id;
			
 
				 
			
 
				 	subtree->obj = obj->children[subtree_id];
			
 
				 	subtree->nb_workers = 0;
			
@@ -384,7 +383,7 @@ static void find_and_assign_combinations_with_hwloc(struct starpu_machine_topolo
 
				 
			
 
				     STARPU_ASSERT(min_size <= max_size);
			
 
				 
			
 
				-    tree_t tree;
			
 
				+    struct _starpu_tree tree;
			
 
				 
			
 
				     /* Of course we start from the root */
			
 
				     tree.obj = hwloc_get_obj_by_depth(topology->hwtopology, HWLOC_OBJ_SYSTEM, 0); 
			
--- a/src/sched_policies/heft.c
+++ b/src/sched_policies/heft.c
@@ -32,9 +32,9 @@ static unsigned nworkers;
 
				 static pthread_cond_t sched_cond[STARPU_NMAXWORKERS];
			
 
				 static pthread_mutex_t sched_mutex[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-static double alpha = STARPU_DEFAULT_ALPHA;
			
 
				-static double beta = STARPU_DEFAULT_BETA;
			
 
				-static double _gamma = STARPU_DEFAULT_GAMMA;
			
 
				+static double alpha = _STARPU_DEFAULT_ALPHA;
			
 
				+static double beta = _STARPU_DEFAULT_BETA;
			
 
				+static double _gamma = _STARPU_DEFAULT_GAMMA;
			
 
				 static double idle_power = 0.0;
			
 
				 
			
 
				 static double exp_start[STARPU_NMAXWORKERS]; /* of the first queued task */
			
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -34,9 +34,9 @@ static unsigned nworkers, ncombinedworkers;
 
				 static pthread_cond_t sched_cond[STARPU_NMAXWORKERS];
			
 
				 static pthread_mutex_t sched_mutex[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-static double alpha = STARPU_DEFAULT_ALPHA;
			
 
				-static double beta = STARPU_DEFAULT_BETA;
			
 
				-static double _gamma = STARPU_DEFAULT_GAMMA;
			
 
				+static double alpha = _STARPU_DEFAULT_ALPHA;
			
 
				+static double beta = _STARPU_DEFAULT_BETA;
			
 
				+static double _gamma = _STARPU_DEFAULT_GAMMA;
			
 
				 static double idle_power = 0.0;
			
 
				 
			
 
				 static double worker_exp_start[STARPU_NMAXWORKERS];
			
--- a/src/starpu_parameters.h
+++ b/src/starpu_parameters.h
@@ -27,19 +27,19 @@
 
				  * Here are the default values of alpha, beta, gamma
			
 
				  */
			
 
				 
			
 
				-#define STARPU_DEFAULT_ALPHA 1.0
			
 
				-#define STARPU_DEFAULT_BETA 1.0
			
 
				-#define STARPU_DEFAULT_GAMMA 1000.0
			
 
				+#define _STARPU_DEFAULT_ALPHA 1.0
			
 
				+#define _STARPU_DEFAULT_BETA 1.0
			
 
				+#define _STARPU_DEFAULT_GAMMA 1000.0
			
 
				 
			
 
				 /* How many executions a codelet will have to be measured before we
			
 
				  * consider that calibration will provide a value good enough for scheduling */
			
 
				-#define STARPU_CALIBRATION_MINIMUM 10
			
 
				+#define _STARPU_CALIBRATION_MINIMUM 10
			
 
				 
			
 
				 /* Assumed relative performance ratios */
			
 
				 /* TODO: benchmark a bit instead */
			
 
				-#define STARPU_CPU_ALPHA	1.0f
			
 
				-#define STARPU_CUDA_ALPHA	13.33f
			
 
				-#define STARPU_OPENCL_ALPHA	12.22f
			
 
				-#define STARPU_GORDON_ALPHA	6.0f /* XXX this is a random value ... */
			
 
				+#define _STARPU_CPU_ALPHA	1.0f
			
 
				+#define _STARPU_CUDA_ALPHA	13.33f
			
 
				+#define _STARPU_OPENCL_ALPHA	12.22f
			
 
				+#define _STARPU_GORDON_ALPHA	6.0f /* XXX this is a random value ... */
			
 
				 
			
 
				 #endif /* _STARPU_PARAMETERS_H */
			
--- a/src/util/starpu_insert_task_utils.c
+++ b/src/util/starpu_insert_task_utils.c
@@ -18,13 +18,13 @@
 
				 #include <common/config.h>
			
 
				 #include <common/utils.h>
			
 
				 
			
 
				-typedef void (*callback_func_t)(void *);
			
 
				+typedef void (*_starpu_callback_func_t)(void *);
			
 
				 
			
 
				 /* Deal with callbacks. The unpack function may be called multiple times when
			
 
				  * we have a parallel task, and we should not free the cl_arg parameter from
			
 
				  * the callback function. */
			
 
				 struct insert_task_cb_wrapper {
			
 
				-	callback_func_t callback_func;
			
 
				+	_starpu_callback_func_t callback_func;
			
 
				 	void *callback_arg;
			
 
				 	void *arg_stack;
			
 
				 };
			
@@ -63,10 +63,10 @@ size_t _starpu_insert_task_get_arg_size(va_list varg_list)
 
				 			arg_buffer_size += cst_size;
			
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK) {
			
 
				-			(void)va_arg(varg_list, callback_func_t);
			
 
				+			(void)va_arg(varg_list, _starpu_callback_func_t);
			
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK_WITH_ARG) {
			
 
				-			va_arg(varg_list, callback_func_t);
			
 
				+			va_arg(varg_list, _starpu_callback_func_t);
			
 
				 			va_arg(varg_list, void *);
			
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK_ARG) {
			
@@ -123,11 +123,11 @@ int _starpu_pack_cl_args(size_t arg_buffer_size, char **arg_buffer, va_list varg
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK)
			
 
				 		{
			
 
				-			(void)va_arg(varg_list, callback_func_t);
			
 
				+			(void)va_arg(varg_list, _starpu_callback_func_t);
			
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK_WITH_ARG)
			
 
				 		{
			
 
				-			va_arg(varg_list, callback_func_t);
			
 
				+			va_arg(varg_list, _starpu_callback_func_t);
			
 
				 			va_arg(varg_list, void *);
			
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK_ARG) {
			
@@ -182,14 +182,14 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, struct starpu_codele
 
				 		else if (arg_type==STARPU_CALLBACK)
			
 
				 		{
			
 
				 			void (*callback_func)(void *);
			
 
				-			callback_func = va_arg(varg_list, callback_func_t);
			
 
				+			callback_func = va_arg(varg_list, _starpu_callback_func_t);
			
 
				 			cl_arg_wrapper->callback_func = callback_func;
			
 
				 		}
			
 
				 		else if (arg_type==STARPU_CALLBACK_WITH_ARG)
			
 
				 		{
			
 
				 			void (*callback_func)(void *);
			
 
				 			void *callback_arg;
			
 
				-			callback_func = va_arg(varg_list, callback_func_t);
			
 
				+			callback_func = va_arg(varg_list, _starpu_callback_func_t);
			
 
				 			callback_arg = va_arg(varg_list, void *);
			
 
				 			cl_arg_wrapper->callback_func = callback_func;
			
 
				 			cl_arg_wrapper->callback_arg = callback_arg;