8 years ago · 82d0b3921a
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,6 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				-# Copyright (C) 2009-2016  Université de Bordeaux
			
 
				+# Copyright (C) 2009-2017  Université de Bordeaux
			
 
				 # Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				 # Copyright (C) 2014, 2016 INRIA
			
 
				 #
			
@@ -44,6 +44,8 @@ New features:
 
				   * Add starpu_data_set_user_data and starpu_data_get_user_data.
			
 
				   * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating
			
 
				     execution of just one MPI node.
			
 
				+  * Add STARPU_PERF_MODEL_HOMOGENEOUS_CUDA/OPENCL/MIC/SCC to share performance
			
 
				+    models between devices, making calibration much faster.
			
 
				 
			
 
				 StarPU 1.2.0 (svn revision 18521)
			
 
				 ==============================================
			
--- a/doc/doxygen/chapters/210_check_list_performance.doxy
+++ b/doc/doxygen/chapters/210_check_list_performance.doxy
@@ -246,9 +246,24 @@ to configure a performance model for the codelets of the application (see
 
				 use on-line calibration.  StarPU will automatically calibrate codelets
			
 
				 which have never been calibrated yet, and save the result in
			
 
				 <c>$STARPU_HOME/.starpu/sampling/codelets</c>.
			
 
				-The models are indexed by machine name. To share the models between
			
 
				-machines (e.g. for a homogeneous cluster), use <c>export
			
 
				-STARPU_HOSTNAME=some_global_name</c> (\ref STARPU_HOSTNAME). To force continuing calibration,
			
 
				+The models are indexed by machine name.
			
 
				+
			
 
				+By default, StarPU stores separate performance models according to the hostname
			
 
				+of the system. To avoid having to calibrate performance models for each node
			
 
				+of a homogeneous cluster for instance, the model can be shared by using
			
 
				+<c>export STARPU_HOSTNAME=some_global_name</c> (\ref STARPU_HOSTNAME), where
			
 
				+<c>some_global_name</c> is the name of the cluster for instance, which thus
			
 
				+overrides the hostname of the system.
			
 
				+
			
 
				+By default, StarPU stores separate performance models for each GPU. To avoid
			
 
				+having to calibrate performance models for each GPU of a homogeneous set of GPU
			
 
				+devices for instance, the model can be shared by setting
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_CUDA=1</c> ,
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL=1</c> ,
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_MIC=1</c> , or
			
 
				+<c>export STARPU_PERF_MODEL_HOMOGENEOUS_SCC=1</c> (depending on your GPU device type).
			
 
				+
			
 
				+To force continuing calibration,
			
 
				 use <c>export STARPU_CALIBRATE=1</c> (\ref STARPU_CALIBRATE). This may be necessary if your application
			
 
				 has not-so-stable performance. StarPU will force calibration (and thus ignore
			
 
				 the current result) until 10 (<c>_STARPU_CALIBRATION_MINIMUM</c>) measurements have been
			
--- a/doc/doxygen/chapters/501_environment_variables.doxy
+++ b/doc/doxygen/chapters/501_environment_variables.doxy
@@ -1,7 +1,7 @@
 
				 /*
			
 
				  * This file is part of the StarPU Handbook.
			
 
				  * Copyright (C) 2009--2011  Universit@'e de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2012, 2016 INRIA
			
 
				  * Copyright (C) 2016 Uppsala University
			
 
				  * See the file version.doxy for copying conditions.
			
@@ -622,6 +622,46 @@ This specifies the main directory in which StarPU stores its
 
				 performance model files. The default is <c>$STARPU_HOME/.starpu/sampling</c>.
			
 
				 </dd>
			
 
				 
			
 
				+<dt>STARPU_PERF_MODEL_HOMEGENEOUS_CUDA</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CUDA
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CUDA
			
 
				+When this is set to 1, StarPU will assume that all CUDA devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+CUDA GPUs.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_PERF_MODEL_HOMEGENEOUS_OPENCL</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL
			
 
				+When this is set to 1, StarPU will assume that all OPENCL devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+OPENCL GPUs.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_PERF_MODEL_HOMEGENEOUS_MIC</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_MIC
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_MIC
			
 
				+When this is set to 1, StarPU will assume that all MIC devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+MIC GPUs.
			
 
				+</dd>
			
 
				+
			
 
				+<dt>STARPU_PERF_MODEL_HOMEGENEOUS_SCC</dt>
			
 
				+<dd>
			
 
				+\anchor STARPU_PERF_MODEL_HOMOGENEOUS_SCC
			
 
				+\addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_SCC
			
 
				+When this is set to 1, StarPU will assume that all SCC devices have the same
			
 
				+performance, and thus share performance models for them, thus allowing kernel
			
 
				+calibration to be much faster, since measurements only have to be once for all
			
 
				+SCC GPUs.
			
 
				+</dd>
			
 
				+
			
 
				 <dt>STARPU_HOSTNAME</dt>
			
 
				 <dd>
			
 
				 \anchor STARPU_HOSTNAME
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010-2017  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2016  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -76,6 +76,10 @@ static int running = 0;
 
				 #ifdef STARPU_SIMGRID
			
 
				 static int _mpi_world_size;
			
 
				 static int _mpi_world_rank;
			
 
				+
			
 
				+static int wait_counter;
			
 
				+static starpu_pthread_cond_t wait_counter_cond;
			
 
				+static starpu_pthread_mutex_t wait_counter_mutex;
			
 
				 #endif
			
 
				 int _starpu_mpi_fake_world_size = -1;
			
 
				 int _starpu_mpi_fake_world_rank = -1;
			
@@ -143,6 +147,12 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 
				 	(*req)->early_data_handle = NULL;
			
 
				 	(*req)->envelope = NULL;
			
 
				 	(*req)->sequential_consistency = 1;
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_queue_init(&((*req)->queue));
			
 
				+	starpu_pthread_queue_register(&wait, &((*req)->queue));
			
 
				+	(*req)->done = 0;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
			
@@ -153,6 +163,10 @@ static void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req)
 
				 	STARPU_PTHREAD_COND_DESTROY(&req->posted_cond);
			
 
				 	free(req->datatype_name);
			
 
				 	req->datatype_name = NULL;
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_queue_unregister(&wait, &req->queue);
			
 
				+	starpu_pthread_queue_destroy(&req->queue);
			
 
				+#endif
			
 
				 	free(req);
			
 
				 	req = NULL;
			
 
				 }
			
@@ -294,6 +308,9 @@ static void _starpu_mpi_submit_ready_request(void *arg)
 
				 
			
 
				 	newer_requests = 1;
			
 
				 	STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_queue_signal(&dontsleep);
			
 
				+#endif
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
@@ -350,6 +367,55 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 
				 	return req;
			
 
				  }
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+int _starpu_mpi_simgrid_mpi_test(int *done, int *flag)
			
 
				+{
			
 
				+	*flag = 0;
			
 
				+	if (*done)
			
 
				+	{
			
 
				+		starpu_pthread_queue_signal(&dontsleep);
			
 
				+		*flag = 1;
			
 
				+	}
			
 
				+	return MPI_SUCCESS;
			
 
				+}
			
 
				+static void* _starpu_mpi_simgrid_wait_req_func(void* arg)
			
 
				+{
			
 
				+	struct _starpu_simgrid_mpi_req *sim_req = arg;
			
 
				+	int ret;
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex);
			
 
				+	wait_counter++;
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex);
			
 
				+
			
 
				+	ret = MPI_Wait(sim_req->request, sim_req->status);
			
 
				+
			
 
				+	STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret));
			
 
				+
			
 
				+	*(sim_req->done) = 1;
			
 
				+	starpu_pthread_queue_signal(sim_req->queue);
			
 
				+
			
 
				+	free(sim_req);
			
 
				+
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex);
			
 
				+	if (--wait_counter == 0)
			
 
				+		STARPU_PTHREAD_COND_SIGNAL(&wait_counter_cond);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex);
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
 
				+void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done)
			
 
				+{
			
 
				+	struct _starpu_simgrid_mpi_req *sim_req;
			
 
				+	_STARPU_MPI_CALLOC(sim_req, 1, sizeof(struct _starpu_simgrid_mpi_req));
			
 
				+	sim_req->request = request;
			
 
				+	sim_req->status = status;
			
 
				+	sim_req->queue = queue;
			
 
				+	sim_req->done = done;
			
 
				+	*done = 0;
			
 
				+
			
 
				+	_starpu_simgrid_xbt_thread_create("wait for mpi transfer", _starpu_mpi_simgrid_wait_req_func, sim_req);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				  /********************************************************/
			
 
				  /*                                                      */
			
 
				  /*  Send functionalities                                */
			
@@ -379,6 +445,10 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Issend returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 	}
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	_starpu_mpi_simgrid_wait_req(&req->data_request, &req->status_store, &req->queue, &req->done);
			
 
				+#endif
			
 
				+
			
 
				 	_STARPU_MPI_TRACE_ISEND_SUBMIT_END(req->node_tag.rank, req->node_tag.data_tag, 0);
			
 
				 
			
 
				 	/* somebody is perhaps waiting for the MPI request to be posted */
			
@@ -571,6 +641,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 
				 	{
			
 
				 		_STARPU_MPI_COMM_FROM_DEBUG(req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.comm);
			
 
				 		req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.rank, _STARPU_MPI_TAG_DATA, req->node_tag.comm, &req->data_request);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		_starpu_mpi_simgrid_wait_req(&req->data_request, &req->status_store, &req->queue, &req->done);
			
 
				+#endif
			
 
				 	}
			
 
				 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 
			
@@ -682,6 +755,10 @@ static void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 
				 	_STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
			
 
				 	if (req->data_request != MPI_REQUEST_NULL)
			
 
				 	{
			
 
				+		// TODO: Fix for STARPU_SIMGRID
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		STARPU_MPI_ASSERT_MSG(0, "Implement this in STARPU_SIMGRID");
			
 
				+#endif
			
 
				 		req->ret = MPI_Wait(&req->data_request, waiting_req->status);
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 	}
			
@@ -755,7 +832,13 @@ static void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 
				 
			
 
				 	_STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.rank, req->node_tag.data_tag);
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, testing_req->flag);
			
 
				+	memcpy(testing_req->status, &req->status_store, sizeof(*testing_req->status));
			
 
				+#else
			
 
				 	req->ret = MPI_Test(&req->data_request, testing_req->flag, testing_req->status);
			
 
				+#endif
			
 
				+
			
 
				 	STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 
			
 
				 	_STARPU_MPI_TRACE_UTESTING_END(req->node_tag.rank, req->node_tag.data_tag);
			
@@ -1095,7 +1178,6 @@ static void _starpu_mpi_test_detached_requests(void)
 
				 {
			
 
				 	//_STARPU_MPI_LOG_IN();
			
 
				 	int flag;
			
 
				-	MPI_Status status;
			
 
				 	struct _starpu_mpi_req *req;
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
			
@@ -1106,7 +1188,11 @@ static void _starpu_mpi_test_detached_requests(void)
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&detached_requests_mutex);
			
 
				 
			
 
				 		//_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %d - TYPE %s %d\n", &req->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.rank);
			
 
				-		req->ret = MPI_Test(&req->data_request, &flag, &status);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag);
			
 
				+#else
			
 
				+		req->ret = MPI_Test(&req->data_request, &flag, MPI_STATUS_IGNORE);
			
 
				+#endif
			
 
				 
			
 
				 		STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret));
			
 
				 
			
@@ -1354,6 +1440,11 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	STARPU_PTHREAD_COND_SIGNAL(&cond_progression);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_wait_init(&wait);
			
 
				+	starpu_pthread_queue_init(&dontsleep);
			
 
				+	starpu_pthread_queue_register(&wait, &dontsleep);
			
 
				+#endif
			
 
				 
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 
			
@@ -1361,6 +1452,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 
			
 
				 	while (running || posted_requests || !(_starpu_mpi_req_list_empty(ready_requests)) || !(_starpu_mpi_req_list_empty(detached_requests)))// || !(_starpu_mpi_early_request_count()) || !(_starpu_mpi_sync_data_count()))
			
 
				 	{
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		starpu_pthread_wait_reset(&wait);
			
 
				+#endif
			
 
				 		/* shall we block ? */
			
 
				 		unsigned block = _starpu_mpi_req_list_empty(ready_requests) && _starpu_mpi_early_request_count() == 0 && _starpu_mpi_sync_data_count() == 0;
			
 
				 
			
@@ -1522,7 +1616,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 		}
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				-		MSG_process_sleep(0.000010);
			
 
				+		starpu_pthread_wait_wait(&wait);
			
 
				 		STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 #endif
			
 
				 	}
			
@@ -1533,6 +1627,21 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 		envelope_request_submitted = 0;
			
 
				 	}
			
 
				 
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex);
			
 
				+	while (wait_counter != 0)
			
 
				+		STARPU_PTHREAD_COND_WAIT(&wait_counter_cond, &wait_counter_mutex);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex);
			
 
				+
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&wait_counter_mutex);
			
 
				+	STARPU_PTHREAD_COND_DESTROY(&wait_counter_cond);
			
 
				+
			
 
				+	starpu_pthread_queue_unregister(&wait, &dontsleep);
			
 
				+	starpu_pthread_queue_destroy(&dontsleep);
			
 
				+	starpu_pthread_wait_destroy(&wait);
			
 
				+#endif
			
 
				+
			
 
				 	STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_list_empty(detached_requests), "List of detached requests not empty");
			
 
				 	STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_list_empty(ready_requests), "List of ready requests not empty");
			
 
				 	STARPU_MPI_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero");
			
@@ -1628,6 +1737,11 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm
 
				 	STARPU_PTHREAD_MUTEX_INIT(&mutex_posted_requests, NULL);
			
 
				 	_starpu_mpi_comm = starpu_getenv("STARPU_MPI_COMM") != NULL;
			
 
				 
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL);
			
 
				+	STARPU_PTHREAD_COND_INIT(&wait_counter_cond, NULL);
			
 
				+#endif
			
 
				+
			
 
				 #ifdef STARPU_MPI_ACTIVITY
			
 
				 	hookid = starpu_progression_hook_register(_starpu_mpi_progression_hook_func, NULL);
			
 
				 	STARPU_MPI_ASSERT_MSG(hookid >= 0, "starpu_progression_hook_register failed");
			
@@ -1721,6 +1835,9 @@ int starpu_mpi_shutdown(void)
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 	running = 0;
			
 
				 	STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	starpu_pthread_queue_signal(&dontsleep);
			
 
				+#endif
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
@@ -1740,7 +1857,7 @@ int starpu_mpi_shutdown(void)
 
				 	_starpu_mpi_req_list_delete(detached_requests);
			
 
				 	_starpu_mpi_req_list_delete(ready_requests);
			
 
				 
			
 
				-	_starpu_mpi_comm_amounts_display(rank);
			
 
				+	_starpu_mpi_comm_amounts_display(stderr, rank);
			
 
				 	_starpu_mpi_comm_amounts_free();
			
 
				 	_starpu_mpi_cache_free(world_size);
			
 
				 	_starpu_mpi_tag_free();
			
@@ -1857,13 +1974,11 @@ void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle
 
				 	tag = starpu_mpi_data_get_tag(data_handle);
			
 
				 	if (rank == -1)
			
 
				 	{
			
 
				-		fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				-		STARPU_ABORT();
			
 
				+		_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				 	}
			
 
				 	if (tag == -1)
			
 
				 	{
			
 
				-		fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				-		STARPU_ABORT();
			
 
				+		_STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				 	}
			
 
				 	starpu_mpi_comm_rank(comm, &me);
			
 
				 
			
@@ -1968,4 +2083,3 @@ int starpu_mpi_wait_for_all(MPI_Comm comm)
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
 
				-
			
--- a/mpi/src/starpu_mpi_cache.c
+++ b/mpi/src/starpu_mpi_cache.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2014 INRIA
			
 
				  *
			
@@ -74,7 +74,7 @@ void _starpu_mpi_cache_init(MPI_Comm comm)
 
				 
			
 
				 	if (_starpu_cache_enabled == 0)
			
 
				 	{
			
 
				-		if (!_starpu_silent) fprintf(stderr,"Warning: StarPU MPI Communication cache is disabled\n");
			
 
				+		_STARPU_DISP("Warning: StarPU MPI Communication cache is disabled\n");
			
 
				 		return;
			
 
				 	}
			
 
				 
			
--- a/mpi/src/starpu_mpi_cache_stats.c
+++ b/mpi/src/starpu_mpi_cache_stats.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2014, 2015, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -33,7 +33,7 @@ void _starpu_mpi_cache_stats_init(MPI_Comm comm)
 
				 	}
			
 
				 	if (stats_enabled == 0) return;
			
 
				 
			
 
				-	if (!_starpu_silent) fprintf(stderr,"Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n");
			
 
				+	_STARPU_DISP("Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n");
			
 
				 
			
 
				 	starpu_mpi_comm_size(comm, &world_size);
			
 
				 	_STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size);
			
--- a/mpi/src/starpu_mpi_comm.c
+++ b/mpi/src/starpu_mpi_comm.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2014 INRIA
			
 
				  *
			
@@ -28,6 +28,12 @@ struct _starpu_mpi_comm
 
				 	struct _starpu_mpi_envelope *envelope;
			
 
				 	MPI_Request request;
			
 
				 	int posted;
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+	MPI_Status status;
			
 
				+	starpu_pthread_queue_t queue;
			
 
				+	unsigned done;
			
 
				+#endif
			
 
				 };
			
 
				 struct _starpu_mpi_comm_hashtable
			
 
				 {
			
@@ -62,6 +68,10 @@ void _starpu_mpi_comm_free()
 
				 	{
			
 
				 		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
			
 
				 		free(_comm->envelope);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		starpu_pthread_queue_unregister(&wait, &_comm->queue);
			
 
				+		starpu_pthread_queue_destroy(&_comm->queue);
			
 
				+#endif
			
 
				 		free(_comm);
			
 
				 	}
			
 
				 	free(_starpu_mpi_comms);
			
@@ -106,6 +116,12 @@ void _starpu_mpi_comm_register(MPI_Comm comm)
 
				 		_STARPU_MPI_MALLOC(entry, sizeof(*entry));
			
 
				 		entry->comm = comm;
			
 
				 		HASH_ADD(hh, _starpu_mpi_comms_cache, comm, sizeof(entry->comm), entry);
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+		starpu_pthread_queue_init(&_comm->queue);
			
 
				+		starpu_pthread_queue_register(&wait, &_comm->queue);
			
 
				+		_comm->done = 0;
			
 
				+#endif
			
 
				 	}
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_comms_mutex);
			
 
				 }
			
@@ -123,6 +139,9 @@ void _starpu_mpi_comm_post_recv()
 
				 			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop on comm %d %d\n", i, _comm->comm);
			
 
				 			_STARPU_MPI_COMM_FROM_DEBUG(sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm);
			
 
				 			MPI_Irecv(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm, &_comm->request);
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+			_starpu_mpi_simgrid_wait_req(&_comm->request, &_comm->status, &_comm->queue, &_comm->done);
			
 
				+#endif
			
 
				 			_comm->posted = 1;
			
 
				 		}
			
 
				 	}
			
@@ -143,9 +162,11 @@ int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope *
 
				 			int flag, res;
			
 
				 			/* test whether an envelope has arrived. */
			
 
				 #ifdef STARPU_SIMGRID
			
 
				-			MSG_process_sleep(0.000001);
			
 
				-#endif
			
 
				+			res = _starpu_mpi_simgrid_mpi_test(&_comm->done, &flag);
			
 
				+			memcpy(status, &_comm->status, sizeof(*status));
			
 
				+#else
			
 
				 			res = MPI_Test(&_comm->request, &flag, status);
			
 
				+#endif
			
 
				 			STARPU_ASSERT(res == MPI_SUCCESS);
			
 
				 			if (flag)
			
 
				 			{
			
@@ -182,9 +203,13 @@ void _starpu_mpi_comm_cancel_recv()
 
				 		struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm;
			
 
				 		if (_comm->posted == 1)
			
 
				 		{
			
 
				-			MPI_Status status;
			
 
				 			MPI_Cancel(&_comm->request);
			
 
				-			MPI_Wait(&_comm->request, &status);
			
 
				+#ifndef STARPU_SIMGRID
			
 
				+			{
			
 
				+				MPI_Status status;
			
 
				+				MPI_Wait(&_comm->request, &status);
			
 
				+			}
			
 
				+#endif
			
 
				 			_comm->posted = 0;
			
 
				 		}
			
 
				 	}
			
--- a/mpi/src/starpu_mpi_private.h
+++ b/mpi/src/starpu_mpi_private.h
@@ -24,11 +24,28 @@
 
				 #include "starpu_mpi.h"
			
 
				 #include "starpu_mpi_fxt.h"
			
 
				 #include <common/list.h>
			
 
				+#include <core/simgrid.h>
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 extern "C" {
			
 
				 #endif
			
 
				+	
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+starpu_pthread_wait_t wait;
			
 
				+starpu_pthread_queue_t dontsleep;
			
 
				 
			
 
				+struct _starpu_simgrid_mpi_req
			
 
				+{
			
 
				+	MPI_Request *request;
			
 
				+	MPI_Status *status;
			
 
				+	starpu_pthread_queue_t *queue;
			
 
				+	unsigned *done;
			
 
				+};
			
 
				+
			
 
				+int _starpu_mpi_simgrid_mpi_test(int *done, int *flag);
			
 
				+void _starpu_mpi_simgrid_wait_req(MPI_Request *request, 	MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done);
			
 
				+#endif
			
 
				+	
			
 
				 extern int _starpu_debug_rank;
			
 
				 char *_starpu_mpi_get_mpi_error_code(int code);
			
 
				 extern int _starpu_mpi_comm;
			
@@ -224,6 +241,13 @@ LIST_TYPE(_starpu_mpi_req,
 
				 	int sequential_consistency;
			
 
				 
			
 
				      	UT_hash_handle hh;
			
 
				+
			
 
				+#ifdef STARPU_SIMGRID
			
 
				+        MPI_Status status_store;
			
 
				+	starpu_pthread_queue_t queue;
			
 
				+	unsigned done;
			
 
				+#endif
			
 
				+	  
			
 
				 );
			
 
				 
			
 
				 struct _starpu_mpi_argc_argv
			
--- a/mpi/src/starpu_mpi_stats.c
+++ b/mpi/src/starpu_mpi_stats.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2012, 2013, 2016  CNRS
			
 
				+ * Copyright (C) 2012, 2013, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -34,7 +34,7 @@ void _starpu_mpi_comm_amounts_init(MPI_Comm comm)
 
				 
			
 
				 	if (stats_enabled == 0) return;
			
 
				 
			
 
				-	if (!_starpu_silent) fprintf(stderr,"Warning: StarPU is executed with STARPU_COMM_STATS=1, which slows down a bit\n");
			
 
				+	_STARPU_DISP("Warning: StarPU is executed with STARPU_COMM_STATS=1, which slows down a bit\n");
			
 
				 
			
 
				 	starpu_mpi_comm_size(comm, &world_size);
			
 
				 	_STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size);
			
@@ -68,7 +68,7 @@ void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)
 
				 	memcpy(comm_amounts, comm_amount, world_size * sizeof(size_t));
			
 
				 }
			
 
				 
			
 
				-void _starpu_mpi_comm_amounts_display(int node)
			
 
				+void _starpu_mpi_comm_amounts_display(FILE *stream, int node)
			
 
				 {
			
 
				 	int dst;
			
 
				 	size_t sum = 0;
			
@@ -80,13 +80,13 @@ void _starpu_mpi_comm_amounts_display(int node)
 
				 		sum += comm_amount[dst];
			
 
				 	}
			
 
				 
			
 
				-	fprintf(stderr, "\n[starpu_comm_stats][%d] TOTAL:\t%f B\t%f MB\n", node, (float)sum, (float)sum/1024/1024);
			
 
				+	fprintf(stream, "\n[starpu_comm_stats][%d] TOTAL:\t%f B\t%f MB\n", node, (float)sum, (float)sum/1024/1024);
			
 
				 
			
 
				 	for (dst = 0; dst < world_size; dst++)
			
 
				 	{
			
 
				 		if (comm_amount[dst])
			
 
				 		{
			
 
				-			fprintf(stderr, "[starpu_comm_stats][%d->%d]\t%f B\t%f MB\n",
			
 
				+			fprintf(stream, "[starpu_comm_stats][%d->%d]\t%f B\t%f MB\n",
			
 
				 				node, dst, (float)comm_amount[dst], ((float)comm_amount[dst])/(1024*1024));
			
 
				 		}
			
 
				 	}
			
--- a/mpi/src/starpu_mpi_stats.h
+++ b/mpi/src/starpu_mpi_stats.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2012  CNRS
			
 
				+ * Copyright (C) 2012, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -17,6 +17,7 @@
 
				 #ifndef __STARPU_MPI_STATS_H__
			
 
				 #define __STARPU_MPI_STATS_H__
			
 
				 
			
 
				+#include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <mpi.h>
			
 
				 
			
@@ -27,7 +28,7 @@ extern "C" {
 
				 void _starpu_mpi_comm_amounts_init(MPI_Comm comm);
			
 
				 void _starpu_mpi_comm_amounts_free();
			
 
				 void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned dst, MPI_Datatype datatype, int count);
			
 
				-void _starpu_mpi_comm_amounts_display(int node);
			
 
				+void _starpu_mpi_comm_amounts_display(FILE *stream, int node);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/mpi/src/starpu_mpi_task_insert.c
+++ b/mpi/src/starpu_mpi_task_insert.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2014, 2016 Inria
			
 
				  *
			
@@ -84,13 +84,11 @@ void _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum
 
				 		int data_tag = starpu_mpi_data_get_tag(data);
			
 
				 		if (mpi_rank == -1)
			
 
				 		{
			
 
				-			fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				-			STARPU_ABORT();
			
 
				+			_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				 		}
			
 
				 		if (data_tag == -1)
			
 
				 		{
			
 
				-			fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				-			STARPU_ABORT();
			
 
				+			_STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				 		}
			
 
				 
			
 
				 		if (do_execute && mpi_rank != me)
			
@@ -128,13 +126,11 @@ void _starpu_mpi_exchange_data_after_execution(starpu_data_handle_t data, enum s
 
				 		int data_tag = starpu_mpi_data_get_tag(data);
			
 
				 		if(mpi_rank == -1)
			
 
				 		{
			
 
				-			fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				-			STARPU_ABORT();
			
 
				+			_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				 		}
			
 
				 		if(data_tag == -1)
			
 
				 		{
			
 
				-			fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				-			STARPU_ABORT();
			
 
				+			_STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				 		}
			
 
				 		if (mpi_rank == me)
			
 
				 		{
			
@@ -520,11 +516,11 @@ int _starpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, va_
 
				 
			
 
				 		if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				 		{
			
 
				-			fprintf(stderr, "submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				-				task, task->cl,
			
 
				-				(codelet == NULL) ? "none" :
			
 
				-				task->cl->name ? task->cl->name :
			
 
				-				(task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
			
 
				+			_STARPU_MSG("submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				+				    task, task->cl,
			
 
				+				    (codelet == NULL) ? "none" :
			
 
				+				    task->cl->name ? task->cl->name :
			
 
				+				    (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
			
 
				 
			
 
				 			task->destroy = 0;
			
 
				 			starpu_task_destroy(task);
			
@@ -668,13 +664,11 @@ void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)
 
				 	tag = starpu_mpi_data_get_tag(data_handle);
			
 
				 	if (rank == -1)
			
 
				 	{
			
 
				-		fprintf(stderr,"StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				-		STARPU_ABORT();
			
 
				+		_STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n");
			
 
				 	}
			
 
				 	if (tag == -1)
			
 
				 	{
			
 
				-		fprintf(stderr,"StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				-		STARPU_ABORT();
			
 
				+		_STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n");
			
 
				 	}
			
 
				 
			
 
				 	starpu_mpi_comm_rank(comm, &me);
			
--- a/mpi/src/starpu_mpi_task_insert_fortran.c
+++ b/mpi/src/starpu_mpi_task_insert_fortran.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2016  CNRS
			
 
				+ * Copyright (C) 2016, 2017  CNRS
			
 
				  * Copyright (C) 2016 Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -366,11 +366,11 @@ int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, vo
 
				 
			
 
				 		if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				 		{
			
 
				-			fprintf(stderr, "submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				-				task, task->cl,
			
 
				-				(codelet == NULL) ? "none" :
			
 
				-				task->cl->name ? task->cl->name :
			
 
				-				(task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
			
 
				+			_STARPU_MSG("submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				+				    task, task->cl,
			
 
				+				    (codelet == NULL) ? "none" :
			
 
				+				    task->cl->name ? task->cl->name :
			
 
				+				    (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
			
 
				 
			
 
				 			task->destroy = 0;
			
 
				 			starpu_task_destroy(task);
			
--- a/src/common/fxt.c
+++ b/src/common/fxt.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -199,7 +199,7 @@ void _starpu_stop_fxt_profiling(void)
 
				 #ifdef STARPU_VERBOSE
			
 
				 	        char hostname[128];
			
 
				 		gethostname(hostname, 128);
			
 
				-		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, _STARPU_PROF_FILE_USER);
			
 
				+		_STARPU_MSG("Writing FxT traces into file %s:%s\n", hostname, _STARPU_PROF_FILE_USER);
			
 
				 #endif
			
 
				 		fut_endup(_STARPU_PROF_FILE_USER);
			
 
				 
			
@@ -213,7 +213,7 @@ void _starpu_stop_fxt_profiling(void)
 
				 		{
			
 
				 			/* Something went wrong with the FxT trace (eg. there
			
 
				 			 * was too many events) */
			
 
				-			fprintf(stderr, "Warning: the FxT trace could not be generated properly\n");
			
 
				+			_STARPU_MSG("Warning: the FxT trace could not be generated properly\n");
			
 
				 		}
			
 
				 
			
 
				 		_starpu_written = 1;
			
--- a/src/common/thread.c
+++ b/src/common/thread.c
@@ -215,7 +215,7 @@ int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer)
 
				 {
			
 
				 	void **array;
			
 
				 #ifdef STARPU_SIMGRID_HAVE_SIMIX_PROCESS_GET_CODE
			
 
				-	if (SIMIX_process_get_code() == _starpu_mpi_simgrid_init)
			
 
				+	if ((SIMIX_process_get_code() == _starpu_mpi_simgrid_init) || (!strcmp(SIMIX_process_self_get_name(),"wait for mpi transfer")))
			
 
				 		/* Special-case the SMPI process */
			
 
				 		array = smpi_process_get_user_data();
			
 
				 	else
			
@@ -229,7 +229,7 @@ void* starpu_pthread_getspecific(starpu_pthread_key_t key)
 
				 {
			
 
				 	void **array;
			
 
				 #ifdef STARPU_SIMGRID_HAVE_SIMIX_PROCESS_GET_CODE
			
 
				-	if (SIMIX_process_get_code() == _starpu_mpi_simgrid_init)
			
 
				+	if ((SIMIX_process_get_code() == _starpu_mpi_simgrid_init) || (!strcmp(SIMIX_process_self_get_name(),"wait for mpi transfer")))
			
 
				 		/* Special-case the SMPI process */
			
 
				 		array = smpi_process_get_user_data();
			
 
				 	else
			
--- a/src/common/utils.c
+++ b/src/common/utils.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2012-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -104,7 +104,7 @@ int _starpu_mkpath(const char *s, mode_t mode)
 
				 	{
			
 
				 		if (!S_ISDIR(sb.st_mode))
			
 
				 		{
			
 
				-			fprintf(stderr,"Error: %s is not a directory:\n", path);
			
 
				+			_STARPU_MSG("Error: %s is not a directory:\n", path);
			
 
				 			STARPU_ABORT();
			
 
				 		}
			
 
				 		/* It already exists and is a directory.  */
			
@@ -137,7 +137,7 @@ void _starpu_mkpath_and_check(const char *path, mode_t mode)
 
				 
			
 
				 	if (ret == -1 && errno != EEXIST)
			
 
				 	{
			
 
				-		fprintf(stderr,"Error making StarPU directory %s:\n", path);
			
 
				+		_STARPU_MSG("Error making StarPU directory %s:\n", path);
			
 
				 		perror("mkdir");
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  * Copyright (C) 2016  Inria
			
 
				  *
			
@@ -50,6 +50,7 @@ static int current_arch_comb;
 
				 static int nb_arch_combs;
			
 
				 static starpu_pthread_rwlock_t arch_combs_mutex;
			
 
				 static int historymaxerror;
			
 
				+static char ignore_devid[STARPU_ANY_WORKER];
			
 
				 
			
 
				 /* How many executions a codelet will have to be measured before we
			
 
				  * consider that calibration will provide a value good enough for scheduling */
			
@@ -108,7 +109,8 @@ int _starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device
 
				 				for(dev2 = 0; dev2 < ndevices; dev2++)
			
 
				 				{
			
 
				 					if(arch_combs[comb]->devices[dev1].type == devices[dev2].type &&
			
 
				-					   arch_combs[comb]->devices[dev1].devid == devices[dev2].devid &&
			
 
				+					   (ignore_devid[devices[dev2].type] ||
			
 
				+					    arch_combs[comb]->devices[dev1].devid == devices[dev2].devid) &&
			
 
				 					   arch_combs[comb]->devices[dev1].ncores == devices[dev2].ncores)
			
 
				 						nfounded++;
			
 
				 				}
			
@@ -917,6 +919,11 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL);
			
 
				 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				+	/* ignore_devid[STARPU_CPU_WORKER]; */ /* Always true for now */
			
 
				+	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
			
 
				+	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL", 0);
			
 
				+	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);
			
 
				+	ignore_devid[STARPU_SCC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_SCC", 0);
			
 
				 }
			
 
				 
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
			
@@ -1106,7 +1113,7 @@ int starpu_perfmodel_list(FILE *output)
 
				         }
			
 
				 	return 0;
			
 
				 #else
			
 
				-	fprintf(stderr,"Listing perfmodels is not implemented on pure Windows yet\n");
			
 
				+	_STARPU_MSG("Listing perfmodels is not implemented on pure Windows yet\n");
			
 
				 	return 1;
			
 
				 #endif
			
 
				 }
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -144,7 +144,7 @@ static struct starpu_sched_policy *find_sched_policy_from_name(const char *polic
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-static void display_sched_help_message(void)
			
 
				+static void display_sched_help_message(FILE *stream)
			
 
				 {
			
 
				 	const char *sched_env = starpu_getenv("STARPU_SCHED");
			
 
				 	if (sched_env && (strcmp(sched_env, "help") == 0))
			
@@ -152,13 +152,13 @@ static void display_sched_help_message(void)
 
				 		/* display the description of all predefined policies */
			
 
				 		struct starpu_sched_policy **policy;
			
 
				 
			
 
				-		fprintf(stderr, "\nThe variable STARPU_SCHED can be set to one of the following strings:\n");
			
 
				+		fprintf(stream, "\nThe variable STARPU_SCHED can be set to one of the following strings:\n");
			
 
				 		for(policy=predefined_policies ; *policy!=NULL ; policy++)
			
 
				 		{
			
 
				 			struct starpu_sched_policy *p = *policy;
			
 
				-			fprintf(stderr, "%-30s\t-> %s\n", p->policy_name, p->policy_description);
			
 
				+			fprintf(stream, "%-30s\t-> %s\n", p->policy_name, p->policy_description);
			
 
				 		}
			
 
				-		fprintf(stderr, "\n");
			
 
				+		fprintf(stream, "\n");
			
 
				 	 }
			
 
				 }
			
 
				 
			
@@ -197,7 +197,7 @@ struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_c
 
				 void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *selected_policy)
			
 
				 {
			
 
				 	/* Perhaps we have to display some help */
			
 
				-	display_sched_help_message();
			
 
				+	display_sched_help_message(stderr);
			
 
				 
			
 
				 	/* Prefetch is activated by default */
			
 
				 	use_prefetch = starpu_get_env_number("STARPU_PREFETCH");
			
@@ -598,7 +598,7 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
				 
			
 
				 		if(ret == -1)
			
 
				 		{
			
 
				-			fprintf(stderr, "repush task \n");
			
 
				+			_STARPU_MSG("repush task \n");
			
 
				 			_STARPU_TRACE_JOB_POP(task, task->priority > 0);
			
 
				 			ret = _starpu_push_task_to_workers(task);
			
 
				 		}
			
@@ -1128,7 +1128,7 @@ void _starpu_print_idle_time()
 
				 	f = fopen(starpu_idle_file, "a");
			
 
				 	if (!f)
			
 
				 	{
			
 
				-		fprintf(stderr, "couldn't open %s: %s\n", starpu_idle_file, strerror(errno));
			
 
				+		_STARPU_MSG("couldn't open %s: %s\n", starpu_idle_file, strerror(errno));
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/src/core/simgrid.c
+++ b/src/core/simgrid.c
@@ -31,6 +31,7 @@
 
				 
			
 
				 #ifdef STARPU_SIMGRID
			
 
				 #include <sys/resource.h>
			
 
				+#include <simgrid/simix.h>
			
 
				 
			
 
				 #pragma weak starpu_main
			
 
				 extern int starpu_main(int argc, char *argv[]);
			
@@ -803,4 +804,35 @@ void _starpu_simgrid_count_ngpus(void)
 
				 		}
			
 
				 #endif
			
 
				 }
			
 
				+
			
 
				+typedef struct{
			
 
				+  void_f_pvoid_t code;
			
 
				+  void *userparam;
			
 
				+  void *father_data;
			
 
				+} thread_data_t;
			
 
				+
			
 
				+static int _starpu_simgrid_xbt_thread_create_wrapper(int argc, char *argv[])
			
 
				+{
			
 
				+  smx_process_t self = SIMIX_process_self();
			
 
				+  thread_data_t *t = SIMIX_process_self_get_data(self);
			
 
				+  simcall_process_set_data(self, t->father_data);
			
 
				+  t->code(t->userparam);
			
 
				+  simcall_process_set_data(self, NULL);
			
 
				+  free(t);
			
 
				+  
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code, void *param)
			
 
				+{
			
 
				+  thread_data_t *res = malloc(sizeof(thread_data_t));
			
 
				+  res->userparam = param;
			
 
				+  res->code = code;
			
 
				+  res->father_data = SIMIX_process_self_get_data(SIMIX_process_self());
			
 
				+
			
 
				+  simcall_process_create(name,
			
 
				+                           _starpu_simgrid_xbt_thread_create_wrapper, res,
			
 
				+                           SIMIX_host_self_get_name(), -1.0, 0, NULL,
			
 
				+                           /*props */ NULL,0);
			
 
				+}
			
 
				 #endif
			
--- a/src/core/simgrid.h
+++ b/src/core/simgrid.h
@@ -68,6 +68,8 @@ starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS];
 
				  * bus */
			
 
				 void _starpu_simgrid_count_ngpus(void);
			
 
				 
			
 
				+void _starpu_simgrid_xbt_thread_create(const char *name, void_f_pvoid_t code,
			
 
				+				       void *param);
			
 
				 #endif
			
 
				 
			
 
				 #endif // __SIMGRID_H__
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  * Copyright (C) 2011, 2014, 2016  INRIA
			
 
				  * Copyright (C) 2016  Uppsala University
			
@@ -1241,14 +1241,14 @@ static void *watchdog_func(void *arg)
 
				 		if (!config->watchdog_ok && last_nsubmitted
			
 
				 				&& last_nsubmitted == starpu_task_nsubmitted())
			
 
				 		{
			
 
				-			fprintf(stderr,"The StarPU watchdog detected that no task finished for %fs (can be configure through STARPU_WATCHDOG_TIMEOUT)\n", timeout);
			
 
				+			_STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n", timeout);
			
 
				 			if (watchdog_crash)
			
 
				 			{
			
 
				-				fprintf(stderr,"Crashing the process\n");
			
 
				+				_STARPU_MSG("Crashing the process\n");
			
 
				 				raise(SIGABRT);
			
 
				 			}
			
 
				 			else
			
 
				-				fprintf(stderr,"Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
			
 
				+				_STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n");
			
 
				 		}
			
 
				 		/* Only shout again after another period */
			
 
				 		config->watchdog_ok = 1;
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -477,10 +477,10 @@ _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
 
				 
			
 
				 	if (0 != mic_file_found)
			
 
				 	{
			
 
				-		fprintf(stderr, "No MIC program specified, use the environment\n"
			
 
				-			"variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
			
 
				-			"or the field 'starpu_conf.mic_sink_program_path'\n"
			
 
				-			"to define it.\n");
			
 
				+		_STARPU_MSG("No MIC program specified, use the environment\n"
			
 
				+			    "variable STARPU_MIC_SINK_PROGRAM_NAME or the environment\n"
			
 
				+			    "or the field 'starpu_conf.mic_sink_program_path'\n"
			
 
				+			    "to define it.\n");
			
 
				 
			
 
				 		return -1;
			
 
				 	}
			
@@ -865,9 +865,7 @@ _starpu_init_mic_config (struct _starpu_machine_config *config,
 
				 		if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
			
 
				 		{
			
 
				 			/* The user requires more MIC cores than there is available */
			
 
				-			fprintf(stderr,
			
 
				-				"# Warning: %d MIC cores requested. Only %d available.\n",
			
 
				-				nmiccores, topology->nhwmiccores[mic_idx]);
			
 
				+			_STARPU_MSG("# Warning: %d MIC cores requested. Only %d available.\n", nmiccores, topology->nhwmiccores[mic_idx]);
			
 
				 			nmiccores = topology->nhwmiccores[mic_idx];
			
 
				 		}
			
 
				 	}
			
@@ -992,17 +990,15 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
 
				              * detected mic devices. ! */
			
 
				             reqmicdevices = nhwmicdevices;
			
 
				 
			
 
				-        if (reqmicdevices != -1)
			
 
				-        {
			
 
				-            if ((unsigned) reqmicdevices > nhwmicdevices)
			
 
				-            {
			
 
				-                /* The user requires more MIC devices than there is available */
			
 
				-                fprintf(stderr,
			
 
				-                    "# Warning: %d MIC devices requested. Only %d available.\n",
			
 
				-                    reqmicdevices, nhwmicdevices);
			
 
				-                reqmicdevices = nhwmicdevices;
			
 
				-            }
			
 
				-        }
			
 
				+	if (reqmicdevices != -1)
			
 
				+	{
			
 
				+		if ((unsigned) reqmicdevices > nhwmicdevices)
			
 
				+		{
			
 
				+			/* The user requires more MIC devices than there is available */
			
 
				+			_STARPU_MSG("# Warning: %d MIC devices requested. Only %d available.\n", reqmicdevices, nhwmicdevices);
			
 
				+			reqmicdevices = nhwmicdevices;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				         topology->nmicdevices = 0;
			
 
				         unsigned i;
			
@@ -1624,7 +1620,7 @@ _starpu_bind_thread_on_cpu (
 
				 	if (ret)
			
 
				 	{
			
 
				 		const char *msg = strerror(ret);
			
 
				-		fprintf(stderr, "pthread_setaffinity_np: %s\n", msg);
			
 
				+		_STARPU_MSG("pthread_setaffinity_np: %s\n", msg);
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1649,8 +1649,8 @@ void starpu_shutdown(void)
 
				 	     int stats = starpu_get_env_number("STARPU_STATS");
			
 
				 	     if (stats != 0)
			
 
				 	     {
			
 
				-		  _starpu_display_msi_stats();
			
 
				-		  _starpu_display_alloc_cache_stats();
			
 
				+		  _starpu_display_msi_stats(stderr);
			
 
				+		  _starpu_display_alloc_cache_stats(stderr);
			
 
				 	     }
			
 
				 	}
			
 
				 
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011, 2016  INRIA
			
 
				  * Copyright (C) 2016  Uppsala University
			
 
				  *
			
@@ -599,6 +599,7 @@ static inline int _starpu_worker_get_id(void)
 
				 #define starpu_worker_get_id _starpu_worker_get_id
			
 
				 
			
 
				 /* Similar behaviour to starpu_worker_get_id() but fails when called from outside a worker */
			
 
				+/* This returns an unsigned object on purpose, so that the caller is sure to get a positive value */
			
 
				 static inline unsigned __starpu_worker_get_id_check(const char *f, int l)
			
 
				 {
			
 
				 	(void) l;
			
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2017  CNRS
			
 
				  * Copyright (C) 2014-2016  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -271,8 +271,6 @@ struct _starpu_data_state
 
				 	void *user_data;
			
 
				 };
			
 
				 
			
 
				-void _starpu_display_msi_stats(void);
			
 
				-
			
 
				 /* This does not take a reference on the handle, the caller has to do it,
			
 
				  * e.g. through _starpu_attempt_to_submit_data_request_from_apps()
			
 
				  * detached means that the core is allowed to drop the request. The caller
			
--- a/src/datawizard/datastats.c
+++ b/src/datawizard/datastats.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2013, 2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2015, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -43,7 +43,7 @@ void __starpu_msi_cache_miss(unsigned node)
 
				 	miss_cnt[node]++;
			
 
				 }
			
 
				 
			
 
				-void _starpu_display_msi_stats(void)
			
 
				+void _starpu_display_msi_stats(FILE *stream)
			
 
				 {
			
 
				 	if (!starpu_enable_stats())
			
 
				 		return;
			
@@ -52,8 +52,8 @@ void _starpu_display_msi_stats(void)
 
				 	unsigned total_hit_cnt = 0;
			
 
				 	unsigned total_miss_cnt = 0;
			
 
				 
			
 
				-	fprintf(stderr, "\n#---------------------\n");
			
 
				-	fprintf(stderr, "MSI cache stats :\n");
			
 
				+	fprintf(stream, "\n#---------------------\n");
			
 
				+	fprintf(stream, "MSI cache stats :\n");
			
 
				 
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
@@ -61,18 +61,18 @@ void _starpu_display_msi_stats(void)
 
				 		total_miss_cnt += miss_cnt[node];
			
 
				 	}
			
 
				 
			
 
				-	fprintf(stderr, "TOTAL MSI stats\thit %u (%2.2f %%)\tmiss %u (%2.2f %%)\n", total_hit_cnt, (100.0f*total_hit_cnt)/(total_hit_cnt+total_miss_cnt), total_miss_cnt, (100.0f*total_miss_cnt)/(total_hit_cnt+total_miss_cnt));
			
 
				+	fprintf(stream, "TOTAL MSI stats\thit %u (%2.2f %%)\tmiss %u (%2.2f %%)\n", total_hit_cnt, (100.0f*total_hit_cnt)/(total_hit_cnt+total_miss_cnt), total_miss_cnt, (100.0f*total_miss_cnt)/(total_hit_cnt+total_miss_cnt));
			
 
				 
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				 		if (hit_cnt[node]+miss_cnt[node])
			
 
				 		{
			
 
				-			fprintf(stderr, "memory node %u\n", node);
			
 
				-			fprintf(stderr, "\thit : %u (%2.2f %%)\n", hit_cnt[node], (100.0f*hit_cnt[node])/(hit_cnt[node]+miss_cnt[node]));
			
 
				-			fprintf(stderr, "\tmiss : %u (%2.2f %%)\n", miss_cnt[node], (100.0f*miss_cnt[node])/(hit_cnt[node]+miss_cnt[node]));
			
 
				+			fprintf(stream, "memory node %u\n", node);
			
 
				+			fprintf(stream, "\thit : %u (%2.2f %%)\n", hit_cnt[node], (100.0f*hit_cnt[node])/(hit_cnt[node]+miss_cnt[node]));
			
 
				+			fprintf(stream, "\tmiss : %u (%2.2f %%)\n", miss_cnt[node], (100.0f*miss_cnt[node])/(hit_cnt[node]+miss_cnt[node]));
			
 
				 		}
			
 
				 	}
			
 
				-	fprintf(stderr, "#---------------------\n");
			
 
				+	fprintf(stream, "#---------------------\n");
			
 
				 }
			
 
				 
			
 
				 /* measure the efficiency of our allocation cache */
			
@@ -91,25 +91,25 @@ void __starpu_data_allocation_inc_stats(unsigned node)
 
				 	alloc_cnt[node]++;
			
 
				 }
			
 
				 
			
 
				-void _starpu_display_alloc_cache_stats(void)
			
 
				+void _starpu_display_alloc_cache_stats(FILE *stream)
			
 
				 {
			
 
				 	if (!starpu_enable_stats())
			
 
				 		return;
			
 
				 
			
 
				-	fprintf(stderr, "\n#---------------------\n");
			
 
				-	fprintf(stderr, "Allocation cache stats:\n");
			
 
				+	fprintf(stream, "\n#---------------------\n");
			
 
				+	fprintf(stream, "Allocation cache stats:\n");
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				 		if (alloc_cnt[node])
			
 
				 		{
			
 
				-			fprintf(stderr, "memory node %u\n", node);
			
 
				-			fprintf(stderr, "\ttotal alloc : %u\n", alloc_cnt[node]);
			
 
				-			fprintf(stderr, "\tcached alloc: %u (%2.2f %%)\n",
			
 
				+			fprintf(stream, "memory node %u\n", node);
			
 
				+			fprintf(stream, "\ttotal alloc : %u\n", alloc_cnt[node]);
			
 
				+			fprintf(stream, "\tcached alloc: %u (%2.2f %%)\n",
			
 
				 				alloc_cache_hit_cnt[node], (100.0f*alloc_cache_hit_cnt[node])/(alloc_cnt[node]));
			
 
				 		}
			
 
				 		else
			
 
				-			fprintf(stderr, "No allocation on node %u\n", node);
			
 
				+			fprintf(stream, "No allocation on node %u\n", node);
			
 
				 	}
			
 
				-	fprintf(stderr, "#---------------------\n");
			
 
				+	fprintf(stream, "#---------------------\n");
			
 
				 }
			
--- a/src/datawizard/datastats.h
+++ b/src/datawizard/datastats.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2016  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2015  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2015, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -45,7 +45,7 @@ void __starpu_msi_cache_miss(unsigned node);
 
				 		__starpu_msi_cache_miss(node); \
			
 
				 } while (0)
			
 
				 
			
 
				-void _starpu_display_msi_stats(void);
			
 
				+void _starpu_display_msi_stats(FILE *stream);
			
 
				 
			
 
				 void __starpu_allocation_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED);
			
 
				 void __starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED);
			
@@ -60,6 +60,6 @@ void __starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED);
 
				 		__starpu_data_allocation_inc_stats(node); \
			
 
				 } while (0)
			
 
				 
			
 
				-void _starpu_display_alloc_cache_stats(void);
			
 
				+void _starpu_display_alloc_cache_stats(FILE *stream);
			
 
				 
			
 
				 #endif // __DATASTATS_H__
			
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009-2017  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2016  Inria
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -492,7 +492,7 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
				 
			
 
				 			/* in case there was nobody using that buffer, throw it
			
 
				 			 * away after writing it back to main memory */
			
 
				-			
			
 
				+
			
 
				 			/* choose the best target */
			
 
				 			target = choose_target(handle, node);
			
 
				 
			
@@ -767,7 +767,6 @@ restart:
 
				 
			
 
				 		if (mc->data->is_not_important && (mc->footprint == footprint))
			
 
				 		{
			
 
				-//			fprintf(stderr, "found a candidate ...\n");
			
 
				 			/* Note: this may unlock mc_list! */
			
 
				 			success = try_to_reuse_mem_chunk(mc, node, replicate, 1);
			
 
				 		}
			
@@ -1518,7 +1517,7 @@ void _starpu_memchunk_dirty(struct _starpu_mem_chunk *mc, unsigned node)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_MEMORY_STATS
			
 
				-void _starpu_memory_display_stats_by_node(int node)
			
 
				+void _starpu_memory_display_stats_by_node(FILE *stream, int node)
			
 
				 {
			
 
				 	_starpu_spin_lock(&mc_lock[node]);
			
 
				 
			
@@ -1526,38 +1525,42 @@ void _starpu_memory_display_stats_by_node(int node)
 
				 	{
			
 
				 		struct _starpu_mem_chunk *mc;
			
 
				 
			
 
				-		fprintf(stderr, "#-------\n");
			
 
				-		fprintf(stderr, "Data on Node #%d\n",node);
			
 
				+		fprintf(stream, "#-------\n");
			
 
				+		fprintf(stream, "Data on Node #%d\n",node);
			
 
				 
			
 
				 		for (mc = _starpu_mem_chunk_list_begin(&mc_list[node]);
			
 
				 		     mc != _starpu_mem_chunk_list_end(&mc_list[node]);
			
 
				 		     mc = _starpu_mem_chunk_list_next(mc))
			
 
				 		{
			
 
				 			if (mc->automatically_allocated == 0)
			
 
				-				_starpu_memory_display_handle_stats(mc->data);
			
 
				+				_starpu_memory_display_handle_stats(stream, mc->data);
			
 
				 		}
			
 
				 
			
 
				 	}
			
 
				 
			
 
				 	_starpu_spin_unlock(&mc_lock[node]);
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				-void starpu_data_display_memory_stats(void)
			
 
				+void _starpu_data_display_memory_stats(FILE *stream)
			
 
				 {
			
 
				-#ifdef STARPU_MEMORY_STATS
			
 
				 	unsigned node;
			
 
				 
			
 
				-	fprintf(stderr, "\n#---------------------\n");
			
 
				-	fprintf(stderr, "Memory stats :\n");
			
 
				+	fprintf(stream, "\n#---------------------\n");
			
 
				+	fprintf(stream, "Memory stats :\n");
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				-	     _starpu_memory_display_stats_by_node(node);
			
 
				+		_starpu_memory_display_stats_by_node(stream, node);
			
 
				 	}
			
 
				-	fprintf(stderr, "\n#---------------------\n");
			
 
				-#endif
			
 
				+	fprintf(stream, "\n#---------------------\n");
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				+void starpu_data_display_memory_stats(void)
			
 
				+{
			
 
				+#ifdef STARPU_MEMORY_STATS
			
 
				+	_starpu_data_display_memory_stats(stderr);
			
 
				+#endif
			
 
				+}
			
 
				 
			
 
				 static int
			
 
				 get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
			
@@ -1566,7 +1569,7 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 
				 	unsigned nnodes = starpu_memory_nodes_get_count();
			
 
				 	unsigned int i;
			
 
				 	double time_disk = 0;
			
 
				-				
			
 
				+
			
 
				 	for (i = 0; i < nnodes; i++)
			
 
				 	{
			
 
				 		if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
			
@@ -1576,7 +1579,7 @@ get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
 
				 			/* if we can write on the disk */
			
 
				 			if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
			
 
				 			{
			
 
				-				/* only time can change between disk <-> main_ram 
			
 
				+				/* only time can change between disk <-> main_ram
			
 
				 				 * and not between main_ram <-> worker if we compare diks*/
			
 
				 				double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
			
 
				 				if (target == -1 || time_disk > time_tmp)
			
@@ -1600,7 +1603,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 		/* try to push on RAM if we can before to push on disk */
			
 
				 		if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
			
 
				 		{
			
 
				-			if (handle->per_node[STARPU_MAIN_RAM].allocated || 
			
 
				+			if (handle->per_node[STARPU_MAIN_RAM].allocated ||
			
 
				 			    _starpu_memory_manager_test_allocate_size(STARPU_MAIN_RAM, size_handle) == 1)
			
 
				 			{
			
 
				 				target = STARPU_MAIN_RAM;
			
@@ -1612,7 +1615,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 
			
 
				 		}
			
 
				           	/* others memory nodes */
			
 
				-		else 
			
 
				+		else
			
 
				 		{
			
 
				 			target = handle->home_node;
			
 
				 		}
			
@@ -1626,7 +1629,7 @@ choose_target(starpu_data_handle_t handle, unsigned node)
 
				 		}
			
 
				 		/* node != 0 */
			
 
				 		/* try to push data to RAM if we can before to push on disk*/
			
 
				-		else if (handle->per_node[STARPU_MAIN_RAM].allocated || 
			
 
				+		else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
			
 
				 			 _starpu_memory_manager_test_allocate_size(STARPU_MAIN_RAM, size_handle) == 1)
			
 
				 		{
			
 
				 			target = STARPU_MAIN_RAM;
			
--- a/src/datawizard/memstats.c
+++ b/src/datawizard/memstats.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2012  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -47,31 +47,31 @@ void _starpu_memory_stats_free(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUS
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_MEMORY_STATS
			
 
				-void _starpu_memory_display_handle_stats(starpu_data_handle_t handle)
			
 
				+void _starpu_memory_display_handle_stats(FILE *stream, starpu_data_handle_t handle)
			
 
				 {
			
 
				 	unsigned node;
			
 
				 
			
 
				-	fprintf(stderr, "#-----\n");
			
 
				-	fprintf(stderr, "Data : %p\n", handle);
			
 
				-	fprintf(stderr, "Size : %d\n", (int)handle->ops->get_size(handle));
			
 
				-	fprintf(stderr, "\n");
			
 
				+	fprintf(stream, "#-----\n");
			
 
				+	fprintf(stream, "Data : %p\n", handle);
			
 
				+	fprintf(stream, "Size : %d\n", (int)handle->ops->get_size(handle));
			
 
				+	fprintf(stream, "\n");
			
 
				 
			
 
				-	fprintf(stderr, "#--\n");
			
 
				-	fprintf(stderr, "Data access stats\n");
			
 
				-	fprintf(stderr, "/!\\ Work Underway\n");
			
 
				+	fprintf(stream, "#--\n");
			
 
				+	fprintf(stream, "Data access stats\n");
			
 
				+	fprintf(stream, "/!\\ Work Underway\n");
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
 
				 		if (handle->memory_stats->direct_access[node]+handle->memory_stats->loaded_shared[node]
			
 
				 		    +handle->memory_stats->invalidated[node]+handle->memory_stats->loaded_owner[node])
			
 
				 		{
			
 
				-			fprintf(stderr, "Node #%u\n", node);
			
 
				-			fprintf(stderr, "\tDirect access : %d\n", handle->memory_stats->direct_access[node]);
			
 
				+			fprintf(stream, "Node #%u\n", node);
			
 
				+			fprintf(stream, "\tDirect access : %d\n", handle->memory_stats->direct_access[node]);
			
 
				 			/* XXX Not Working yet. */
			
 
				 			if (handle->memory_stats->shared_to_owner[node])
			
 
				-				fprintf(stderr, "\t\tShared to Owner : %d\n", handle->memory_stats->shared_to_owner[node]);
			
 
				-			fprintf(stderr, "\tLoaded (Owner) : %d\n", handle->memory_stats->loaded_owner[node]);
			
 
				-			fprintf(stderr, "\tLoaded (Shared) : %d\n", handle->memory_stats->loaded_shared[node]);
			
 
				-			fprintf(stderr, "\tInvalidated (was Owner) : %d\n\n", handle->memory_stats->invalidated[node]);
			
 
				+				fprintf(stream, "\t\tShared to Owner : %d\n", handle->memory_stats->shared_to_owner[node]);
			
 
				+			fprintf(stream, "\tLoaded (Owner) : %d\n", handle->memory_stats->loaded_owner[node]);
			
 
				+			fprintf(stream, "\tLoaded (Shared) : %d\n", handle->memory_stats->loaded_shared[node]);
			
 
				+			fprintf(stream, "\tInvalidated (was Owner) : %d\n\n", handle->memory_stats->invalidated[node]);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
--- a/src/datawizard/memstats.h
+++ b/src/datawizard/memstats.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -42,7 +42,7 @@ void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle, unsigned no
 
				 
			
 
				 void _starpu_memory_stats_free(starpu_data_handle_t handle);
			
 
				 
			
 
				-void _starpu_memory_display_handle_stats(starpu_data_handle_t handle);
			
 
				+void _starpu_memory_display_handle_stats(FILE *stream, starpu_data_handle_t handle);
			
 
				 
			
 
				 void _starpu_memory_handle_stats_cache_hit(starpu_data_handle_t handle, unsigned node);
			
 
				 void _starpu_memory_handle_stats_loaded_shared(starpu_data_handle_t handle, unsigned node);
			
--- a/src/debug/starpu_debug_helpers.h
+++ b/src/debug/starpu_debug_helpers.h
@@ -1,6 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010  Université de Bordeaux
			
 
				+ * Copyright (C) 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -30,7 +31,7 @@ extern "C"
 
				 void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter);
			
 
				 
			
 
				 /* Display the size of different data structures */
			
 
				-void _starpu_debug_display_structures_size(void);
			
 
				+void _starpu_debug_display_structures_size(FILE *stream);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/src/debug/structures_size.c
+++ b/src/debug/structures_size.c
@@ -1,6 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011  Université de Bordeaux
			
 
				+ * Copyright (C) 2017        CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -21,18 +22,18 @@
 
				 #include <profiling/bound.h>
			
 
				 #include <debug/starpu_debug_helpers.h>
			
 
				 
			
 
				-void _starpu_debug_display_structures_size(void)
			
 
				+void _starpu_debug_display_structures_size(FILE *stream)
			
 
				 {
			
 
				-	fprintf(stderr, "struct starpu_task\t\t%u bytes\t(%x)\n",
			
 
				+	fprintf(stream, "struct starpu_task\t\t%u bytes\t(%x)\n",
			
 
				 			(unsigned) sizeof(struct starpu_task), (unsigned) sizeof(struct starpu_task));
			
 
				-	fprintf(stderr, "struct _starpu_job\t\t%u bytes\t(%x)\n",
			
 
				+	fprintf(stream, "struct _starpu_job\t\t%u bytes\t(%x)\n",
			
 
				 			(unsigned) sizeof(struct _starpu_job), (unsigned) sizeof(struct _starpu_job));
			
 
				-	fprintf(stderr, "struct _starpu_data_state\t%u bytes\t(%x)\n",
			
 
				+	fprintf(stream, "struct _starpu_data_state\t%u bytes\t(%x)\n",
			
 
				 			(unsigned) sizeof(struct _starpu_data_state), (unsigned) sizeof(struct _starpu_data_state));
			
 
				-	fprintf(stderr, "struct _starpu_tag\t\t%u bytes\t(%x)\n",
			
 
				+	fprintf(stream, "struct _starpu_tag\t\t%u bytes\t(%x)\n",
			
 
				 			(unsigned) sizeof(struct _starpu_tag), (unsigned) sizeof(struct _starpu_tag));
			
 
				-	fprintf(stderr, "struct _starpu_cg\t\t%u bytes\t(%x)\n",
			
 
				+	fprintf(stream, "struct _starpu_cg\t\t%u bytes\t(%x)\n",
			
 
				 			(unsigned) sizeof(struct _starpu_cg), (unsigned) sizeof(struct _starpu_cg));
			
 
				-	fprintf(stderr, "struct _starpu_worker\t\t%u bytes\t(%x)\n",
			
 
				+	fprintf(stream, "struct _starpu_worker\t\t%u bytes\t(%x)\n",
			
 
				 			(unsigned) sizeof(struct _starpu_worker), (unsigned) sizeof(struct _starpu_worker));
			
 
				 }
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -2213,7 +2213,7 @@ static void handle_mpi_isend_submit_end(struct fxt_ev_64 *ev, struct starpu_fxt_
 
				 	{
			
 
				 		if (!mpi_warned)
			
 
				 		{
			
 
				-			fprintf(stderr,"Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n");
			
 
				+			_STARPU_MSG("Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n");
			
 
				 			mpi_warned = 1;
			
 
				 		}
			
 
				 	}
			
@@ -2276,7 +2276,7 @@ static void handle_mpi_irecv_complete_begin(struct fxt_ev_64 *ev, struct starpu_
 
				 	{
			
 
				 		if (!mpi_warned)
			
 
				 		{
			
 
				-			fprintf(stderr,"Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n");
			
 
				+			_STARPU_MSG("Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n");
			
 
				 			mpi_warned = 1;
			
 
				 		}
			
 
				 	}
			
@@ -3037,8 +3037,8 @@ void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *op
 
				 
			
 
				 			default:
			
 
				 #ifdef STARPU_VERBOSE
			
 
				-				fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
			
 
				-					(unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time-options->file_offset));
			
 
				+				_STARPU_MSG("unknown event.. %x at time %llx WITH OFFSET %llx\n",
			
 
				+					    (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time-options->file_offset));
			
 
				 #endif
			
 
				 				break;
			
 
				 		}
			
@@ -3195,7 +3195,7 @@ void _starpu_fxt_paje_file_init(struct starpu_fxt_options *options)
 
				 		out_paje_file = fopen(options->out_paje_path, "w+");
			
 
				 		if (!out_paje_file)
			
 
				 		{
			
 
				-			fprintf(stderr,"error while opening %s\n", options->out_paje_path);
			
 
				+			_STARPU_MSG("error while opening %s\n", options->out_paje_path);
			
 
				 			perror("fopen");
			
 
				 			exit(1);
			
 
				 		}
			
@@ -3343,7 +3343,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
				 				{
			
 
				 					if (key != unique_keys[inputfile])
			
 
				 					{
			
 
				-						fprintf(stderr, "Warning: traces are coming from different run so we will not try to display MPI communications.\n");
			
 
				+						_STARPU_MSG("Warning: traces are coming from different run so we will not try to display MPI communications.\n");
			
 
				 						display_mpi = 0;
			
 
				 					}
			
 
				 				}
			
@@ -3523,8 +3523,8 @@ void starpu_fxt_write_data_trace(char *filename_in)
 
				 
			
 
				 		default:
			
 
				 #ifdef STARPU_VERBOSE
			
 
				-			fprintf(stderr, "unknown event.. %x at time %llx WITH OFFSET %llx\n",
			
 
				-				(unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time));
			
 
				+			_STARPU_MSG("unknown event.. %x at time %llx WITH OFFSET %llx\n",
			
 
				+				    (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time));
			
 
				 #endif
			
 
				 			break;
			
 
				 		}
			
--- a/src/debug/traces/starpu_fxt_dag.c
+++ b/src/debug/traces/starpu_fxt_dag.c
@@ -37,7 +37,7 @@ void _starpu_fxt_dag_init(char *out_path)
 
				 	out_file = fopen(out_path, "w+");
			
 
				 	if (!out_file)
			
 
				 	{
			
 
				-		fprintf(stderr,"error while opening %s\n", out_path);
			
 
				+		_STARPU_MSG("error while opening %s\n", out_path);
			
 
				 		perror("fopen");
			
 
				 		exit(1);
			
 
				 	}
			
--- a/src/debug/traces/starpu_fxt_mpi.c
+++ b/src/debug/traces/starpu_fxt_mpi.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2012-2013, 2016  Université Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2014, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2014, 2016, 2017  CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -70,7 +70,7 @@ int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *ke
 
				 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
			
 
				 		if (ret != FXT_EV_OK)
			
 
				 		{
			
 
				-			fprintf(stderr, "no more block ...\n");
			
 
				+			_STARPU_MSG("no more block ...\n");
			
 
				 			break;
			
 
				 		}
			
 
				 
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -227,13 +227,13 @@ void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED)
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 	if (conf->n_cuda_opengl_interoperability)
			
 
				 	{
			
 
				-		fprintf(stderr, "OpenGL interoperability was requested, but StarPU was built with multithread GPU control support, please reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n");
			
 
				+		_STARPU_MSG("OpenGL interoperability was requested, but StarPU was built with multithread GPU control support, please reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n");
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
 
				 #elif !defined(HAVE_CUDA_GL_INTEROP_H)
			
 
				 	if (conf->n_cuda_opengl_interoperability)
			
 
				 	{
			
 
				-		fprintf(stderr,"OpenGL interoperability was requested, but cuda_gl_interop.h could not be compiled, please make sure that OpenGL headers were available before ./configure run.");
			
 
				+		_STARPU_MSG("OpenGL interoperability was requested, but cuda_gl_interop.h could not be compiled, please make sure that OpenGL headers were available before ./configure run.");
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
 
				 #else
			
@@ -324,7 +324,7 @@ static void init_device_context(unsigned devid, unsigned memnode)
 
				 	{
			
 
				 		if (cures == cudaErrorDevicesUnavailable)
			
 
				 		{
			
 
				-			fprintf(stderr,"All CUDA-capable devices are busy or unavailable\n");
			
 
				+			_STARPU_MSG("All CUDA-capable devices are busy or unavailable\n");
			
 
				 			exit(77);
			
 
				 		}
			
 
				 		STARPU_CUDA_REPORT_ERROR(cures);
			
@@ -336,7 +336,7 @@ static void init_device_context(unsigned devid, unsigned memnode)
 
				 #ifdef HAVE_CUDA_MEMCPY_PEER
			
 
				 	if (props[devid].computeMode == cudaComputeModeExclusive)
			
 
				 	{
			
 
				-		fprintf(stderr, "CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n");
			
 
				+		_STARPU_MSG("CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n");
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
 
				 #endif
			
@@ -447,7 +447,7 @@ unsigned _starpu_get_cuda_device_count(void)
 
				 
			
 
				 	if (cnt > STARPU_MAXCUDADEVS)
			
 
				 	{
			
 
				-		fprintf(stderr, "# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS);
			
 
				+		_STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS);
			
 
				 		cnt = STARPU_MAXCUDADEVS;
			
 
				 	}
			
 
				 	return (unsigned)cnt;
			
@@ -973,7 +973,7 @@ void starpu_cublas_report_error(const char *func, const char *file, int line, in
 
				 			errormsg = "unknown error";
			
 
				 			break;
			
 
				 	}
			
 
				-	fprintf(stderr, "oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
			
 
				+	_STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg);
			
 
				 	STARPU_ABORT();
			
 
				 }
			
 
				 
			
--- a/src/drivers/scc/driver_scc_common.c
+++ b/src/drivers/scc/driver_scc_common.c
@@ -47,9 +47,9 @@ static void _starpu_scc_set_src_node_id()
 
				 		else if (RCCE_ue() == 0)
			
 
				 		{
			
 
				 			/* Only node 0 print the error message. */
			
 
				-			fprintf(stderr, "The node you specify to be the master is "
			
 
				-					"greater than the total number of nodes.\n"
			
 
				-					"Taking node 0 (core %d) by default...\n", RC_COREID[0]);
			
 
				+			_STARPU_MSG("The node you specify to be the master is "
			
 
				+				    "greater than the total number of nodes.\n"
			
 
				+				    "Taking node 0 (core %d) by default...\n", RC_COREID[0]);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -173,7 +173,7 @@ void _starpu_scc_common_report_rcce_error(const char *func, const char *file, co
 
				 
			
 
				 	RCCE_error_string(err_no, error_string, &error_string_length); 
			
 
				 
			
 
				-	fprintf(stderr, "RCCE error in %s (%s:%d): %s\n", func, file, line, error_string); 
			
 
				+	_STARPU_MSG("RCCE error in %s (%s:%d): %s\n", func, file, line, error_string); 
			
 
				 	STARPU_ABORT();
			
 
				 }
			
 
				 
			
--- a/src/drivers/scc/driver_scc_sink.c
+++ b/src/drivers/scc/driver_scc_sink.c
@@ -145,7 +145,7 @@ void _starpu_scc_sink_execute(const struct _starpu_mp_node *node, void *arg, int
 
				 			case STARPU_CSR_INTERFACE_ID:
			
 
				 			case STARPU_BCSR_INTERFACE_ID:
			
 
				 			case STARPU_MULTIFORMAT_INTERFACE_ID:
			
 
				-			fprintf(stderr, "Data type not supported on SCC.\n");
			
 
				+				_STARPU_MSG("Data type not supported on SCC.\n");
			
 
				 
			
 
				 			default:
			
 
				 				STARPU_ABORT();
			
--- a/src/drivers/scc/driver_scc_source.c
+++ b/src/drivers/scc/driver_scc_source.c
@@ -239,8 +239,8 @@ void _starpu_scc_set_offset_in_shared_memory(void *ptr, void **dev_handle, size_
 
				 	{
			
 
				 		if (!_starpu_scc_common_is_in_shared_memory(ptr))
			
 
				 		{
			
 
				-			fprintf(stderr, "The data (%p) you want to register does not seem to be allocated in shared memory. "
			
 
				-					"Please use starpu_malloc to do this.\n", ptr);
			
 
				+			_STARPU_MSG("The data (%p) you want to register does not seem to be allocated in shared memory. "
			
 
				+				    "Please use starpu_malloc to do this.\n", ptr);
			
 
				 			STARPU_ABORT();
			
 
				 		}
			
 
				 
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2010-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
@@ -397,7 +397,7 @@ void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *j
 
				 	dep_t = find_job(id);
			
 
				 	if (!dep_t)
			
 
				 	{
			
 
				-		fprintf(stderr,"dependency %lu not found !\n", id);
			
 
				+		_STARPU_MSG("dependency %lu not found !\n", id);
			
 
				 		STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 		return;
			
 
				 	}
			
@@ -520,7 +520,7 @@ void starpu_bound_print_lp(FILE *output)
 
				 			if (t1->cl->model->type != STARPU_HISTORY_BASED &&
			
 
				 			    t1->cl->model->type != STARPU_NL_REGRESSION_BASED)
			
 
				 				/* TODO: */
			
 
				-				fprintf(stderr, "Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl));
			
 
				+				_STARPU_MSG("Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl));
			
 
				 
			
 
				 			struct _starpu_job j =
			
 
				 			{
			
@@ -824,7 +824,7 @@ void starpu_bound_print_lp(FILE *output)
 
				 				for (w = 0; w < nw; w++)
			
 
				 				{
			
 
				 					if (isnan(times[w*nt+t]))
			
 
				-						fprintf(stderr, "Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w);
			
 
				+						_STARPU_MSG("Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w);
			
 
				 					else
			
 
				 					{
			
 
				 						got_one = 1;
			
@@ -833,7 +833,7 @@ void starpu_bound_print_lp(FILE *output)
 
				 				}
			
 
				 				fprintf(output, " = %lu;\n", tp->n);
			
 
				 				if (!got_one)
			
 
				-					fprintf(stderr, "Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl));
			
 
				+					_STARPU_MSG("Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl));
			
 
				 				/* Show actual values */
			
 
				 				fprintf(output, "/*");
			
 
				 				for (w = 0; w < nw; w++)
			
@@ -1133,7 +1133,7 @@ void starpu_bound_print(FILE *output, int integer STARPU_ATTRIBUTE_UNUSED)
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		fprintf(stderr, "Simplex failed\n");
			
 
				+		_STARPU_MSG("Simplex failed\n");
			
 
				 	}
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 #else /* STARPU_HAVE_GLPK_H */
			
--- a/src/profiling/profiling_helpers.c
+++ b/src/profiling/profiling_helpers.c
@@ -31,17 +31,14 @@ static double convert_to_byte_units(float d, unsigned max_unit, unsigned *unit)
 
				 	return d;
			
 
				 }
			
 
				 
			
 
				-void starpu_profiling_bus_helper_display_summary(void)
			
 
				+void _starpu_profiling_bus_helper_display_summary(FILE *stream)
			
 
				 {
			
 
				-	const char *stats;
			
 
				 	int long long sum_transferred = 0;
			
 
				 	const char *byte_units[] = { "B", "KB", "MB", "GB", "TB" };
			
 
				 	unsigned max_unit = sizeof(byte_units) / sizeof(byte_units[0]);
			
 
				 
			
 
				-	if (!((stats = starpu_getenv("STARPU_BUS_STATS")) && atoi(stats))) return;
			
 
				-
			
 
				-	fprintf(stderr, "\n#---------------------\n");
			
 
				-	fprintf(stderr, "Data transfer stats:\n");
			
 
				+	fprintf(stream, "\n#---------------------\n");
			
 
				+	fprintf(stream, "Data transfer stats:\n");
			
 
				 
			
 
				 	int busid;
			
 
				 	int bus_cnt = starpu_bus_get_count();
			
@@ -66,10 +63,10 @@ void starpu_profiling_bus_helper_display_summary(void)
 
				 		_starpu_memory_node_get_name(src, src_name, sizeof(src_name));
			
 
				 		_starpu_memory_node_get_name(dst, dst_name, sizeof(dst_name));
			
 
				 
			
 
				-		fprintf(stderr, "\t%s -> %s", src_name, dst_name);
			
 
				-		fprintf(stderr, "\t%.2lf %s", d, byte_units[unit]);
			
 
				-		fprintf(stderr, "\t%.2lf %s/s", d / elapsed_time, byte_units[unit]);
			
 
				-		fprintf(stderr, "\t(transfers : %lld - avg %.2lf %s)\n", transfer_cnt, d / transfer_cnt, byte_units[unit]);
			
 
				+		fprintf(stream, "\t%s -> %s", src_name, dst_name);
			
 
				+		fprintf(stream, "\t%.2lf %s", d, byte_units[unit]);
			
 
				+		fprintf(stream, "\t%.2lf %s/s", d / elapsed_time, byte_units[unit]);
			
 
				+		fprintf(stream, "\t(transfers : %lld - avg %.2lf %s)\n", transfer_cnt, d / transfer_cnt, byte_units[unit]);
			
 
				 
			
 
				 		sum_transferred += transferred;
			
 
				 	}
			
@@ -77,23 +74,27 @@ void starpu_profiling_bus_helper_display_summary(void)
 
				 	unsigned unit = 0;
			
 
				 	double d = convert_to_byte_units(sum_transferred, max_unit, &unit);
			
 
				 
			
 
				-	fprintf(stderr, "Total transfers: %.2lf %s\n", d, byte_units[unit]);
			
 
				-	fprintf(stderr, "#---------------------\n");
			
 
				+	fprintf(stream, "Total transfers: %.2lf %s\n", d, byte_units[unit]);
			
 
				+	fprintf(stream, "#---------------------\n");
			
 
				 }
			
 
				 
			
 
				-void starpu_profiling_worker_helper_display_summary(void)
			
 
				+void starpu_profiling_bus_helper_display_summary(void)
			
 
				 {
			
 
				 	const char *stats;
			
 
				+	if (!((stats = starpu_getenv("STARPU_BUS_STATS")) && atoi(stats))) return;
			
 
				+	_starpu_profiling_bus_helper_display_summary(stderr);
			
 
				+}
			
 
				+
			
 
				+void _starpu_profiling_worker_helper_display_summary(FILE *stream)
			
 
				+{
			
 
				 	double sum_consumed = 0.;
			
 
				 	int profiling = starpu_profiling_status_get();
			
 
				 	double overall_time = 0;
			
 
				 	int workerid;
			
 
				 	int worker_cnt = starpu_worker_get_count();
			
 
				 
			
 
				-	if (!((stats = starpu_getenv("STARPU_WORKER_STATS")) && atoi(stats))) return;
			
 
				-
			
 
				-	fprintf(stderr, "\n#---------------------\n");
			
 
				-	fprintf(stderr, "Worker stats:\n");
			
 
				+	fprintf(stream, "\n#---------------------\n");
			
 
				+	fprintf(stream, "Worker stats:\n");
			
 
				 
			
 
				 	for (workerid = 0; workerid < worker_cnt; workerid++)
			
 
				 	{
			
@@ -103,8 +104,8 @@ void starpu_profiling_worker_helper_display_summary(void)
 
				 
			
 
				 		starpu_worker_get_name(workerid, name, sizeof(name));
			
 
				 
			
 
				-		fprintf(stderr, "%-32s\n", name);
			
 
				-		fprintf(stderr, "\t%d task(s)\n", info.executed_tasks);
			
 
				+		fprintf(stream, "%-32s\n", name);
			
 
				+		fprintf(stream, "\t%d task(s)\n", info.executed_tasks);
			
 
				 
			
 
				 		if (profiling)
			
 
				 		{
			
@@ -114,14 +115,14 @@ void starpu_profiling_worker_helper_display_summary(void)
 
				 			if (total_time > overall_time)
			
 
				 				overall_time = total_time;
			
 
				 
			
 
				-			fprintf(stderr, "\ttotal: %.2lf ms executing: %.2lf ms sleeping: %.2lf ms overhead %.2lf ms\n",
			
 
				+			fprintf(stream, "\ttotal: %.2lf ms executing: %.2lf ms sleeping: %.2lf ms overhead %.2lf ms\n",
			
 
				 				total_time, executing_time, sleeping_time, total_time - executing_time - sleeping_time);
			
 
				 			if (info.used_cycles || info.stall_cycles)
			
 
				-				fprintf(stderr, "\t%llu Mcy %llu Mcy stall\n", (unsigned long long)info.used_cycles/1000000, (unsigned long long)info.stall_cycles/1000000);
			
 
				+				fprintf(stream, "\t%llu Mcy %llu Mcy stall\n", (unsigned long long)info.used_cycles/1000000, (unsigned long long)info.stall_cycles/1000000);
			
 
				 			if (info.energy_consumed)
			
 
				-				fprintf(stderr, "\t%f J consumed\n", info.energy_consumed);
			
 
				+				fprintf(stream, "\t%f J consumed\n", info.energy_consumed);
			
 
				 			if (info.flops)
			
 
				-				fprintf(stderr, "\t%f GFlop/s\n\n", info.flops / total_time / 1000000);
			
 
				+				fprintf(stream, "\t%f GFlop/s\n\n", info.flops / total_time / 1000000);
			
 
				 		}
			
 
				 
			
 
				 		sum_consumed += info.energy_consumed;
			
@@ -135,10 +136,17 @@ void starpu_profiling_worker_helper_display_summary(void)
 
				 			double idle_power = atof(strval_idle_power); /* Watt */
			
 
				 			double idle_energy = idle_power * overall_time / 1000.; /* J */
			
 
				 
			
 
				-			fprintf(stderr, "Idle energy: %.2lf J\n", idle_energy);
			
 
				-			fprintf(stderr, "Total energy: %.2lf J\n",
			
 
				+			fprintf(stream, "Idle energy: %.2lf J\n", idle_energy);
			
 
				+			fprintf(stream, "Total energy: %.2lf J\n",
			
 
				 				sum_consumed + idle_energy);
			
 
				 		}
			
 
				 	}
			
 
				-	fprintf(stderr, "#---------------------\n");
			
 
				+	fprintf(stream, "#---------------------\n");
			
 
				+}
			
 
				+
			
 
				+void starpu_profiling_worker_helper_display_summary(void)
			
 
				+{
			
 
				+	const char *stats;
			
 
				+	if (!((stats = starpu_getenv("STARPU_WORKER_STATS")) && atoi(stats))) return;
			
 
				+	_starpu_profiling_worker_helper_display_summary(stderr);
			
 
				 }
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2017  Université de Bordeaux
			
 
				- * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  * Copyright (C) 2011-2012, 2016  INRIA
			
 
				  * Copyright (C) 2016  Uppsala University
			
@@ -111,10 +111,9 @@ static void param_modified(struct starpu_top_param* d)
 
				 #warning FIXME: get sched ctx to get alpha/beta/gamma/idle values
			
 
				 #endif
			
 
				 	/* Just to show parameter modification. */
			
 
				-	fprintf(stderr,
			
 
				-		"%s has been modified : "
			
 
				-		"alpha=%f|beta=%f|gamma=%f|idle_power=%f !\n",
			
 
				-		d->name, alpha,beta,_gamma, idle_power);
			
 
				+	_STARPU_MSG("%s has been modified : "
			
 
				+		    "alpha=%f|beta=%f|gamma=%f|idle_power=%f !\n",
			
 
				+		    d->name, alpha,beta,_gamma, idle_power);
			
 
				 }
			
 
				 #endif /* !STARPU_USE_TOP */
			
 
				 
			
--- a/src/sched_policies/helper_mct.c
+++ b/src/sched_policies/helper_mct.c
@@ -32,8 +32,7 @@
 
				 static void param_modified(struct starpu_top_param* d)
			
 
				 {
			
 
				 	/* Just to show parameter modification. */
			
 
				-	fprintf(stderr, "%s has been modified : %f\n",
			
 
				-			d->name, *(double*) d->value);
			
 
				+	_STARPU_MSG("%s has been modified : %f\n", d->name, *(double*) d->value);
			
 
				 }
			
 
				 #endif /* !STARPU_USE_TOP */
			
 
				 
			
--- a/src/sched_policies/parallel_eager.c
+++ b/src/sched_policies/parallel_eager.c
@@ -3,7 +3,7 @@
 
				  * Copyright (C) 2011-2016  Université de Bordeaux
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  * Copyright (C) 2011-2013  INRIA
			
 
				- * Copyright (C) 2016       CNRS
			
 
				+ * Copyright (C) 2016, 2017       CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -100,7 +100,7 @@ static void peager_add_workers(unsigned sched_ctx_id, int *workerids, unsigned n
 
				         {
			
 
				 		workerid = workerids[i];
			
 
				 
			
 
				-		fprintf(stderr, "MASTER of %d = %d\n", workerid, master_id[workerid]);
			
 
				+		_STARPU_MSG("MASTER of %d = %d\n", workerid, master_id[workerid]);
			
 
				 	}
			
 
				 #endif
			
 
				 }
			
--- a/src/sched_policies/scheduler_maker.c
+++ b/src/sched_policies/scheduler_maker.c
@@ -265,7 +265,7 @@ struct starpu_sched_tree * starpu_sched_component_make_scheduler(unsigned sched_
 
				 
			
 
				 	starpu_sched_tree_update_workers(tree);
			
 
				 #ifdef STARPU_DEVEL
			
 
				-	fprintf(stderr, "scheduler created :\n");
			
 
				+	_STARPU_MSG("scheduler created :\n");
			
 
				 	helper_display_scheduler(stderr, 0, tree->root);
			
 
				 #endif
			
 
				 
			
--- a/src/util/openmp_runtime_support_environment.c
+++ b/src/util/openmp_runtime_support_environment.c
@@ -236,7 +236,7 @@ static void read_wait_policy_var()
 
				 	ret = read_string_var(env, strings, &value);
			
 
				 	if (!ret)
			
 
				 	{
			
 
				-		fprintf(stderr, "StarPU: Invalid value for environment variable OMP_WAIT_POLICY\n");
			
 
				+		_STARPU_MSG("StarPU: Invalid value for environment variable OMP_WAIT_POLICY\n");
			
 
				 		return;
			
 
				 	}
			
 
				 	_initial_icv_values.wait_policy_var = value;
			
@@ -256,7 +256,7 @@ static void read_display_env_var(int *dest)
 
				 	ret = read_string_var(env, strings, &value);
			
 
				 	if (!ret)
			
 
				 	{
			
 
				-		fprintf(stderr, "StarPU: Invalid value for environment variable OMP_DISPLAY_ENV\n");
			
 
				+		_STARPU_MSG("StarPU: Invalid value for environment variable OMP_DISPLAY_ENV\n");
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -559,7 +559,7 @@ static void read_proc_bind_var()
 
				 
			
 
				 			if (!read_string_var(token, strings, &value))
			
 
				 			{
			
 
				-				fprintf(stderr, "StarPU: Invalid value for environment variable OMP_PROC_BIND\n");
			
 
				+				_STARPU_MSG("StarPU: Invalid value for environment variable OMP_PROC_BIND\n");
			
 
				 				break;
			
 
				 			}
			
 
				 
			
@@ -590,7 +590,7 @@ static void read_num_threads_var()
 
				 
			
 
				 			if (!read_int_var(token, &value))
			
 
				 			{
			
 
				-				fprintf(stderr, "StarPU: Invalid value for environment variable OMP_NUM_THREADS\n");
			
 
				+				_STARPU_MSG("StarPU: Invalid value for environment variable OMP_NUM_THREADS\n");
			
 
				 				break;
			
 
				 			}
			
 
				 
			
@@ -613,7 +613,7 @@ static void read_omp_int_var(const char *name, int *icv)
 
				 	ret = read_int_var(env, &value);
			
 
				 	if (!ret || value < 0)
			
 
				 	{
			
 
				-		fprintf(stderr, "StarPU: Invalid value for environment variable %s\n", name);
			
 
				+		_STARPU_MSG("StarPU: Invalid value for environment variable %s\n", name);
			
 
				 		return;
			
 
				 	}
			
 
				 	*icv = value;
			
@@ -632,7 +632,7 @@ static void read_omp_boolean_var(const char *name, int *icv)
 
				 	ret = read_string_var(env, strings, &value);
			
 
				 	if (!ret)
			
 
				 	{
			
 
				-		fprintf(stderr, "StarPU: Invalid value for environment variable %s\n", name);
			
 
				+		_STARPU_MSG("StarPU: Invalid value for environment variable %s\n", name);
			
 
				 		return;
			
 
				 	}
			
 
				 	*icv = value;
			
--- a/src/util/starpu_task_insert.c
+++ b/src/util/starpu_task_insert.c
@@ -141,11 +141,11 @@ int _starpu_task_insert_v(struct starpu_codelet *cl, va_list varg_list)
 
				 
			
 
				 	if (STARPU_UNLIKELY(ret == -ENODEV))
			
 
				 	{
			
 
				-		fprintf(stderr, "submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				-			task, task->cl,
			
 
				-			(cl == NULL) ? "none" :
			
 
				-			task->cl->name ? task->cl->name :
			
 
				-			(task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
			
 
				+		_STARPU_MSG("submission of task %p wih codelet %p failed (symbol `%s') (err: ENODEV)\n",
			
 
				+			    task, task->cl,
			
 
				+			    (cl == NULL) ? "none" :
			
 
				+			    task->cl->name ? task->cl->name :
			
 
				+			    (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none");
			
 
				 
			
 
				 		task->destroy = 0;
			
 
				 		starpu_task_destroy(task);
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,7 +1,7 @@
 
				 # StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				 #
			
 
				 # Copyright (C) 2009-2017  Université de Bordeaux
			
 
				-# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016  CNRS
			
 
				+# Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  CNRS
			
 
				 # Copyright (C) 2010, 2011, 2012  INRIA
			
 
				 #
			
 
				 # StarPU is free software; you can redistribute it and/or modify
			
@@ -157,6 +157,7 @@ myPROGRAMS +=				\
 
				 	microbenchs/display_structures_size	\
			
 
				 	microbenchs/local_pingpong		\
			
 
				 	sched_ctx/sched_ctx_list		\
			
 
				+	sched_ctx/sched_ctx_hierarchy		\
			
 
				 	perfmodels/value_nan
			
 
				 
			
 
				 if !STARPU_SIMGRID
			
@@ -579,6 +580,9 @@ endif
 
				 sched_ctx_sched_ctx_list_SOURCES =	\
			
 
				 	sched_ctx/sched_ctx_list.c
			
 
				 
			
 
				+sched_ctx_sched_ctx_hierarchy_SOURCES =	\
			
 
				+	sched_ctx/sched_ctx_hierarchy.c
			
 
				+
			
 
				 openmp_init_exit_01_SOURCES = 	\
			
 
				 	openmp/init_exit_01.c
			
 
				 
			
--- a/tests/microbenchs/display_structures_size.c
+++ b/tests/microbenchs/display_structures_size.c
@@ -1,6 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2016  Université de Bordeaux
			
 
				+ * Copyright (C) 2017        CNRS
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -23,7 +24,7 @@
 
				 
			
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				-	_starpu_debug_display_structures_size();
			
 
				+	_starpu_debug_display_structures_size(stderr);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 }
			
--- a/tests/sched_ctx/sched_ctx_hierarchy.c
+++ b/tests/sched_ctx/sched_ctx_hierarchy.c
@@ -0,0 +1,162 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2017  CNRS
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <config.h>
			
 
				+#include <starpu.h>
			
 
				+#include "../helper.h"
			
 
				+
			
 
				+struct starpu_codelet mycodelet_bis;
			
 
				+void func_cpu_bis(void *descr[], void *_args)
			
 
				+{
			
 
				+	char msg;
			
 
				+	char worker_name[256];
			
 
				+	int worker_id = starpu_worker_get_id();
			
 
				+	int worker_id_expected;
			
 
				+	int ntasks;
			
 
				+
			
 
				+	starpu_worker_get_name(worker_id, worker_name, 256);
			
 
				+	starpu_codelet_unpack_args(_args, &msg, &ntasks, &worker_id_expected);
			
 
				+
			
 
				+	STARPU_ASSERT(worker_id == worker_id_expected);
			
 
				+
			
 
				+	FPRINTF(stderr, "[msg '%c'] [worker id %d] [worker name %s] [tasks %d]\n", msg, worker_id, worker_name, ntasks);
			
 
				+	if (ntasks > 0)
			
 
				+	{
			
 
				+		int nntasks = ntasks - 1;
			
 
				+		starpu_task_insert(&mycodelet_bis,
			
 
				+				   STARPU_VALUE, &msg, sizeof(msg),
			
 
				+				   STARPU_VALUE, &nntasks, sizeof(ntasks),
			
 
				+				   STARPU_VALUE, &worker_id, sizeof(worker_id),
			
 
				+				   0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet mycodelet_bis =
			
 
				+{
			
 
				+	.cpu_funcs = {func_cpu_bis},
			
 
				+	.cpu_funcs_name = {"func_cpu_bis"},
			
 
				+};
			
 
				+
			
 
				+void func_cpu(void *descr[], void *_args)
			
 
				+{
			
 
				+	char msg;
			
 
				+	char worker_name[256];
			
 
				+	int worker_id = starpu_worker_get_id();
			
 
				+	int worker_id_expected;
			
 
				+	int ntasks;
			
 
				+	unsigned sched_ctx_id;
			
 
				+	unsigned *sched_ctx_id_p;
			
 
				+
			
 
				+	starpu_worker_get_name(worker_id, worker_name, 256);
			
 
				+	starpu_codelet_unpack_args(_args, &msg, &ntasks, &sched_ctx_id, &worker_id_expected, &sched_ctx_id_p);
			
 
				+
			
 
				+	STARPU_ASSERT(worker_id == worker_id_expected);
			
 
				+
			
 
				+	*sched_ctx_id_p = sched_ctx_id;
			
 
				+	starpu_sched_ctx_set_context(sched_ctx_id_p);
			
 
				+
			
 
				+	FPRINTF(stderr, "[msg '%c'] [worker id %d] [worker name %s] [sched_ctx_id %u] [tasks %d] [buffer %p]\n", msg, worker_id, worker_name, sched_ctx_id, ntasks, sched_ctx_id_p);
			
 
				+	if (ntasks > 0)
			
 
				+	{
			
 
				+		int nntasks = ntasks - 1;
			
 
				+		starpu_task_insert(&mycodelet_bis,
			
 
				+				   STARPU_VALUE, &msg, sizeof(msg),
			
 
				+				   STARPU_VALUE, &nntasks, sizeof(nntasks),
			
 
				+				   STARPU_VALUE, &worker_id, sizeof(worker_id),
			
 
				+				   0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet mycodelet =
			
 
				+{
			
 
				+	.cpu_funcs = {func_cpu},
			
 
				+	.cpu_funcs_name = {"func_cpu"},
			
 
				+};
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+        int i, ret;
			
 
				+	int nprocs, nprocs_per_context=1;
			
 
				+        int procs[STARPU_NMAXWORKERS];
			
 
				+	int ntasks=10;
			
 
				+	char msg[2] = "ab";
			
 
				+	unsigned *buffer[2];
			
 
				+
			
 
				+	ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+        nprocs = starpu_cpu_worker_get_count();
			
 
				+	if (nprocs < 2) goto enodev;
			
 
				+
			
 
				+	nprocs_per_context = 1;
			
 
				+	FPRINTF(stderr, "# Workers = %d -> %d worker for each sched context\n", nprocs, nprocs_per_context);
			
 
				+        starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs);
			
 
				+
			
 
				+	unsigned sched_ctx_0 = starpu_sched_ctx_create(procs, nprocs_per_context, "ctx_0", 0);
			
 
				+	unsigned sched_ctx_1 = starpu_sched_ctx_create(&procs[nprocs_per_context], nprocs_per_context, "ctx_1", 0);
			
 
				+
			
 
				+	if (!getenv("STARPU_SSILENT"))
			
 
				+	{
			
 
				+		char name0[256];
			
 
				+		char name1[256];
			
 
				+
			
 
				+		starpu_worker_get_name(procs[0], name0, 256);
			
 
				+		starpu_worker_get_name(procs[1], name1, 256);
			
 
				+
			
 
				+		FPRINTF(stderr, "Creating first sched_ctx with %d worker [id %d name %s]\n", nprocs_per_context, procs[0], name0);
			
 
				+		FPRINTF(stderr, "Creating second sched_ctx with %d worker [id %d name %s]\n", nprocs_per_context, procs[1], name1);
			
 
				+
			
 
				+		starpu_sched_ctx_display_workers(sched_ctx_0, stderr);
			
 
				+		starpu_sched_ctx_display_workers(sched_ctx_1, stderr);
			
 
				+	}
			
 
				+
			
 
				+	buffer[0] = malloc(sizeof(unsigned));
			
 
				+	buffer[1] = malloc(sizeof(unsigned));
			
 
				+	FPRINTF(stderr, "allocating %p and %p\n", buffer[0], buffer[1]);
			
 
				+
			
 
				+	ret = starpu_task_insert(&mycodelet, STARPU_SCHED_CTX, sched_ctx_0,
			
 
				+				 STARPU_VALUE, &msg[0], sizeof(msg[0]),
			
 
				+				 STARPU_VALUE, &ntasks, sizeof(ntasks),
			
 
				+				 STARPU_VALUE, &sched_ctx_0, sizeof(sched_ctx_0),
			
 
				+				 STARPU_VALUE, &procs[0], sizeof(procs[0]),
			
 
				+				 STARPU_VALUE, &buffer[0], sizeof(buffer[0]),
			
 
				+				 0);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
			
 
				+	ret = starpu_task_insert(&mycodelet, STARPU_SCHED_CTX, sched_ctx_1,
			
 
				+				 STARPU_VALUE, &msg[1], sizeof(msg[1]),
			
 
				+				 STARPU_VALUE, &ntasks, sizeof(ntasks),
			
 
				+				 STARPU_VALUE, &sched_ctx_1, sizeof(sched_ctx_1),
			
 
				+				 STARPU_VALUE, &procs[1], sizeof(procs[1]),
			
 
				+				 STARPU_VALUE, &buffer[1], sizeof(buffer[1]),
			
 
				+				 0);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
			
 
				+
			
 
				+	starpu_task_wait_for_all();
			
 
				+	starpu_sched_ctx_delete(sched_ctx_0);
			
 
				+	starpu_sched_ctx_delete(sched_ctx_1);
			
 
				+	starpu_shutdown();
			
 
				+	free(buffer[0]);
			
 
				+	free(buffer[1]);
			
 
				+	return 0;
			
 
				+
			
 
				+enodev:
			
 
				+	starpu_shutdown();
			
 
				+	fprintf(stderr, "WARNING: No one can execute this task\n");
			
 
				+	/* yes, we do not perform the computation but we did detect that no one
			
 
				+ 	 * could perform the kernel, so this is not an error from StarPU */
			
 
				+	return STARPU_TEST_SKIPPED;
			
 
				+}
			
--- a/tools/cppcheck/suppressions.txt
+++ b/tools/cppcheck/suppressions.txt
@@ -28,8 +28,8 @@ unusedPrivateFunction:tests/main/combined_workers/bfs/timer.h:45
 
				 redundantAssignment:tests/main/driver_api/init_run_deinit.c
			
 
				 redundantAssignment:tests/main/driver_api/run_driver.c
			
 
				 
			
 
				-uselessAssignmentPtrArg:mpi/src/starpu_mpi.c:155
			
 
				-unreadVariable:mpi/src/starpu_mpi.c:849
			
 
				+uselessAssignmentPtrArg:mpi/src/starpu_mpi.c:171
			
 
				+unreadVariable:mpi/src/starpu_mpi.c:945
			
 
				 redundantAssignment:src/core/workers.c
			
 
				 
			
 
				 invalidPointerCast:src/core/perfmodel/perfmodel_nan.c:74
			
@@ -42,20 +42,20 @@ unusedStructMember:src/core/perfmodel/perfmodel_bus.c:65
 
				 unusedStructMember:src/core/perfmodel/perfmodel_bus.c:66
			
 
				 unusedStructMember:src/core/simgrid.c:225
			
 
				 unusedStructMember:src/core/simgrid.c:226
			
 
				+wrongPrintfScanfArgNum:src/core/simgrid.c:719
			
 
				 duplicateExpression:src/util/starpu_task_insert.c:52
			
 
				 
			
 
				 // TODO: this could be an error?
			
 
				 redundantCopy:src/core/disk_ops/disk_leveldb.cpp:192
			
 
				 
			
 
				 nullPointerRedundantCheck:src/common/rbtree.c
			
 
				-wrongPrintfScanfArgNum:src/core/simgrid.c:715
			
 
				 unreadVariable:src/datawizard/interfaces/*
			
 
				 unreadVariable:src/drivers/driver_common/driver_common.c:482
			
 
				 clarifyCondition:src/drivers/opencl/driver_opencl.c:936
			
 
				 unreadVariable:src/drivers/opencl/driver_opencl.c:767
			
 
				 clarifyCondition:src/drivers/cuda/driver_cuda.c:506
			
 
				 arithOperationsOnVoidPointer:src/drivers/scc/*
			
 
				-nullPointerRedundantCheck:src/sched_policies/deque_modeling_policy_data_aware.c:197
			
 
				+nullPointerRedundantCheck:src/sched_policies/deque_modeling_policy_data_aware.c:196
			
 
				 sizeofDereferencedVoidPointer:src/util/fstarpu.c
			
 
				 
			
 
				 allocaCalled:gcc-plugin/src/*