12 年之前 · 3b7abcc2cf
--- a/configure.ac
+++ b/configure.ac
@@ -964,7 +964,7 @@ AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
 
				 AC_MSG_CHECKING(maximum number of MIC threads)
			
 
				 AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
			
 
				 			[maximum number of MIC threads])],
			
 
				-			nmaxmicthreads=$enableval, nmaxmicthreads=960)
			
 
				+			nmaxmicthreads=$enableval, nmaxmicthreads=940)
			
 
				 AC_MSG_RESULT($nmaxmicthread)
			
 
				 
			
 
				 AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],
			
--- a/doc/doxygen/chapters/performance_feedback.doxy
+++ b/doc/doxygen/chapters/performance_feedback.doxy
@@ -10,7 +10,7 @@
 
				 
			
 
				 \section UsingTheTemanejoTaskDebugger Using The Temanejo Task Debugger
			
 
				 
			
 
				-StarPU can connect to Temanejo (see
			
 
				+StarPU can connect to Temanejo >= 1.0rc2 (see
			
 
				 http://www.hlrs.de/temanejo), to permit
			
 
				 nice visual task debugging. To do so, build Temanejo's <c>libayudame.so</c>,
			
 
				 install <c>Ayudame.h</c> to e.g. <c>/usr/local/include</c>, apply the
			
--- a/examples/stencil/stencil-tasks.c
+++ b/examples/stencil/stencil-tasks.c
@@ -221,7 +221,8 @@ static struct starpu_codelet null =
 
				 	.cpu_funcs_name = {"null_func", NULL},
			
 
				 	.cuda_funcs = {null_func, NULL},
			
 
				 	.opencl_funcs = {null_func, NULL},
			
 
				-	.nbuffers = 2
			
 
				+	.nbuffers = 2,
			
 
				+	.name = "start"
			
 
				 };
			
 
				 
			
 
				 void create_start_task(int z, int dir)
			
@@ -267,11 +268,15 @@ void create_tasks(int rank)
 
				 	}
			
 
				 
			
 
				 	for (iter = 0; iter <= niter; iter++)
			
 
				+	{
			
 
				 	for (bz = 0; bz < nbz; bz++)
			
 
				 	{
			
 
				 		if ((iter > 0) && (get_block_mpi_node(bz) == rank))
			
 
				 			create_task_update(iter, bz, rank);
			
 
				 
			
 
				+	}
			
 
				+	for (bz = 0; bz < nbz; bz++)
			
 
				+	{
			
 
				 		if (iter != niter)
			
 
				 		{
			
 
				 			if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
			
@@ -281,6 +286,7 @@ void create_tasks(int rank)
 
				 				create_task_save(iter, bz, -1, rank);
			
 
				 		}
			
 
				 	}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/mpi/src/starpu_mpi.c
+++ b/mpi/src/starpu_mpi.c
@@ -37,7 +37,8 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 
				 static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle,
			
 
				 							int source, int mpi_tag, MPI_Comm comm,
			
 
				 							unsigned detached, void (*callback)(void *), void *arg,
			
 
				-							int sequential_consistency);
			
 
				+							int sequential_consistency, int is_internal_req,
			
 
				+							ssize_t psize);
			
 
				 static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req);
			
 
				 
			
 
				 /* The list of requests that have been newly submitted by the application */
			
@@ -76,59 +77,60 @@ struct _starpu_mpi_copy_handle
 
				  /*                                                      */
			
 
				  /********************************************************/
			
 
				 
			
 
				-static struct _starpu_mpi_req *_starpu_mpi_req_hashmap = NULL;
			
 
				+/** stores application requests for which data have not been received yet */
			
 
				+static struct _starpu_mpi_req *_starpu_mpi_app_req_hashmap = NULL;
			
 
				 /** stores data which have been received by MPI but have not been requested by the application */
			
 
				 static struct _starpu_mpi_copy_handle *_starpu_mpi_copy_handle_hashmap = NULL;
			
 
				 
			
 
				-static struct _starpu_mpi_req* find_req(int mpi_tag)
			
 
				+static struct _starpu_mpi_req* find_app_req(int mpi_tag)
			
 
				 {
			
 
				-	struct _starpu_mpi_req* req; // = malloc(sizeof(struct _starpu_mpi_req));
			
 
				+	struct _starpu_mpi_req* req;
			
 
				 
			
 
				-	HASH_FIND_INT(_starpu_mpi_req_hashmap, &mpi_tag, req);
			
 
				+	HASH_FIND_INT(_starpu_mpi_app_req_hashmap, &mpi_tag, req);
			
 
				 
			
 
				 	return req;
			
 
				 }
			
 
				 
			
 
				-static void add_req(struct _starpu_mpi_req *req)
			
 
				+static void add_app_req(struct _starpu_mpi_req *req)
			
 
				 {
			
 
				 	struct _starpu_mpi_req *test_req;
			
 
				 
			
 
				-	test_req = find_req(req->mpi_tag);
			
 
				+	test_req = find_app_req(req->mpi_tag);
			
 
				 
			
 
				 	if (test_req == NULL)
			
 
				 	{
			
 
				-		HASH_ADD_INT(_starpu_mpi_req_hashmap, mpi_tag, req);
			
 
				-		_STARPU_MPI_DEBUG(3, "Adding request %p with tag %d in the hashmap. \n", req, req->mpi_tag);
			
 
				+		HASH_ADD_INT(_starpu_mpi_app_req_hashmap, mpi_tag, req);
			
 
				+		_STARPU_MPI_DEBUG(3, "Adding request %p with tag %d in the application request hashmap. \n", req, req->mpi_tag);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		_STARPU_MPI_DEBUG(3, "Error add_req : request %p with tag %d already in the hashmap. \n", req, req->mpi_tag);
			
 
				+		_STARPU_MPI_DEBUG(3, "[Error] request %p with tag %d already in the application request hashmap. \n", req, req->mpi_tag);
			
 
				 		int seq_const = starpu_data_get_sequential_consistency_flag(req->data_handle);
			
 
				 		if (seq_const &&  req->sequential_consistency)
			
 
				 		{
			
 
				-			STARPU_ASSERT_MSG(!test_req, "Error add_req : request %p with tag %d wanted to be added to the hashmap, while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, test_req);
			
 
				+			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap, while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, test_req);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			STARPU_ASSERT_MSG(!test_req, "Error add_req : request %p with tag %d wanted to be added to the hashmap, while another request %p with the same tag is already in it. \n Sequential consistency isn't activated for this handle : you should want to add dependencies between requests for which the sequential consistency is deactivated.", req, req->mpi_tag, test_req);
			
 
				+			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap, while another request %p with the same tag is already in it. \n Sequential consistency isn't activated for this handle : you should want to add dependencies between requests for which the sequential consistency is deactivated.", req, req->mpi_tag, test_req);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void delete_req(struct _starpu_mpi_req *req)
			
 
				+static void delete_app_req(struct _starpu_mpi_req *req)
			
 
				 {
			
 
				 	struct _starpu_mpi_req *test_req;
			
 
				 
			
 
				-	test_req = find_req(req->mpi_tag);
			
 
				+	test_req = find_app_req(req->mpi_tag);
			
 
				 
			
 
				 	if (test_req != NULL)
			
 
				 	{
			
 
				-		HASH_DEL(_starpu_mpi_req_hashmap, req);
			
 
				-		_STARPU_MPI_DEBUG(3, "Deleting request %p with tag %d from the hashmap. \n", req, req->mpi_tag);
			
 
				+		HASH_DEL(_starpu_mpi_app_req_hashmap, req);
			
 
				+		_STARPU_MPI_DEBUG(3, "Deleting application request %p with tag %d from the application request hashmap. \n", req, req->mpi_tag);
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		_STARPU_MPI_DEBUG(3, "Warning delete_req : request %p with tag %d isn't in the hashmap. \n", req, req->mpi_tag);
			
 
				+		_STARPU_MPI_DEBUG(3, "[Warning] request %p with tag %d is NOT in the application request hashmap. \n", req, req->mpi_tag);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -219,7 +221,7 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 	req->is_internal_req = 0;
			
 
				 	req->envelope = NULL;
			
 
				 	req->sequential_consistency = 1;
			
 
				- }
			
 
				+}
			
 
				 
			
 
				  /********************************************************/
			
 
				  /*                                                      */
			
@@ -232,8 +234,10 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 							       unsigned detached, void (*callback)(void *), void *arg,
			
 
				 							       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
			
 
				 							       enum starpu_data_access_mode mode,
			
 
				-							       int sequential_consistency)
			
 
				- {
			
 
				+							       int sequential_consistency,
			
 
				+							       int is_internal_req,
			
 
				+							       ssize_t psize)
			
 
				+{
			
 
				 
			
 
				 	 _STARPU_MPI_LOG_IN();
			
 
				 	 struct _starpu_mpi_req *req = malloc(sizeof(struct _starpu_mpi_req));
			
@@ -253,6 +257,8 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 
				 	 req->callback_arg = arg;
			
 
				 	 req->func = func;
			
 
				 	 req->sequential_consistency = sequential_consistency;
			
 
				+	 req->is_internal_req = is_internal_req;
			
 
				+	 req->count = psize;
			
 
				 
			
 
				 	 /* Asynchronously request StarPU to fetch the data in main memory: when
			
 
				 	  * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
			
@@ -354,7 +360,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 
				 							unsigned detached, void (*callback)(void *), void *arg,
			
 
				 							int sequential_consistency)
			
 
				 {
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R, sequential_consistency);
			
 
				+	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R, sequential_consistency, 0, 0);
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
			
@@ -429,9 +435,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
 
				 
			
 
				-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, ssize_t psize)
			
 
				 {
			
 
				-	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W, sequential_consistency);
			
 
				+	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W, sequential_consistency, is_internal_req, psize);
			
 
				 }
			
 
				 
			
 
				 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
			
@@ -447,7 +453,7 @@ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 
				 		starpu_data_set_tag(data_handle, mpi_tag);
			
 
				 
			
 
				 	struct _starpu_mpi_req *req;
			
 
				-	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL, 1);
			
 
				+	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL, 1, 0, 0);
			
 
				 
			
 
				 	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common");
			
 
				 	*public_req = req;
			
@@ -467,7 +473,7 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 
				 	if (tag == -1)
			
 
				 		starpu_data_set_tag(data_handle, mpi_tag);
			
 
				 
			
 
				-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, 1);
			
 
				+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, 1, 0, 0);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return 0;
			
 
				 }
			
@@ -475,7 +481,8 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 
				 int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
			
 
				 {
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, sequential_consistency);
			
 
				+
			
 
				+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, sequential_consistency, 0, 0);
			
 
				 
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 	return 0;
			
@@ -766,58 +773,54 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
				 
			
 
				 	_STARPU_MPI_LOG_IN();
			
 
				 
			
 
				-	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n",
			
 
				-			  req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
			
 
				+	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d internal_req %p\n",
			
 
				+			  req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr,
			
 
				+			  _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype, req->internal_req);
			
 
				 
			
 
				-	if (req->request_type == RECV_REQ || req->request_type == SEND_REQ)
			
 
				+	if (req->internal_req)
			
 
				 	{
			
 
				-		if (req->user_datatype == 1)
			
 
				-		{
			
 
				-			if (req->request_type == SEND_REQ)
			
 
				-			{
			
 
				-				// We need to make sure the communication for sending the size
			
 
				-				// has completed, as MPI can re-order messages, let's call
			
 
				-				// MPI_Wait to make sure data have been sent
			
 
				-				ret = MPI_Wait(&req->size_req, MPI_STATUS_IGNORE);
			
 
				-				STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %d", ret);
			
 
				-
			
 
				-			}
			
 
				-			if (req->request_type == RECV_REQ)
			
 
				-				// req->ptr is freed by starpu_data_unpack
			
 
				-				starpu_data_unpack(req->data_handle, req->ptr, req->count);
			
 
				-			else
			
 
				-				free(req->ptr);
			
 
				-		}
			
 
				-		else
			
 
				+		struct _starpu_mpi_copy_handle *chandle = find_chandle(starpu_data_get_tag(req->data_handle));
			
 
				+		_STARPU_MPI_DEBUG(3, "Handling deleting of copy_handle structure from the hashmap..\n");
			
 
				+		delete_chandle(chandle);
			
 
				+		free(chandle);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		if (req->request_type == RECV_REQ || req->request_type == SEND_REQ)
			
 
				 		{
			
 
				-			struct _starpu_mpi_copy_handle *chandle = find_chandle(starpu_data_get_tag(req->data_handle));
			
 
				-			if (chandle && (req->data_handle != chandle->handle))
			
 
				+			if (req->user_datatype == 1)
			
 
				 			{
			
 
				-				_STARPU_MPI_DEBUG(3, "Handling deleting of copy_handle structure from the hashmap..\n");
			
 
				-				delete_chandle(chandle);
			
 
				-				free(chandle);
			
 
				+				if (req->request_type == SEND_REQ)
			
 
				+				{
			
 
				+					// We need to make sure the communication for sending the size
			
 
				+					// has completed, as MPI can re-order messages, let's call
			
 
				+					// MPI_Wait to make sure data have been sent
			
 
				+					ret = MPI_Wait(&req->size_req, MPI_STATUS_IGNORE);
			
 
				+					STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %d", ret);
			
 
				+					free(req->ptr);
			
 
				+				}
			
 
				+				if (req->request_type == RECV_REQ)
			
 
				+				{
			
 
				+					// req->ptr is freed by starpu_data_unpack
			
 
				+					starpu_data_unpack(req->data_handle, req->ptr, req->count);
			
 
				+				}
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				_STARPU_MPI_DEBUG(3, "NOT deleting chandle %p from hashmap (tag %d %d)\n", chandle, req->mpi_tag, starpu_data_get_tag(req->data_handle));
			
 
				 				_starpu_mpi_handle_free_datatype(req->data_handle, &req->datatype);
			
 
				 			}
			
 
				 		}
			
 
				-		starpu_data_release(req->data_handle);
			
 
				 	}
			
 
				 
			
 
				+	if (req->data_handle)
			
 
				+		starpu_data_release(req->data_handle);
			
 
				+
			
 
				 	if (req->envelope)
			
 
				 	{
			
 
				 		free(req->envelope);
			
 
				 		req->envelope = NULL;
			
 
				 	}
			
 
				 
			
 
				-	if (req->internal_req)
			
 
				-	{
			
 
				-		free(req->internal_req);
			
 
				-		req->internal_req = NULL;
			
 
				-	}
			
 
				-
			
 
				 	/* Execute the specified callback, if any */
			
 
				 	if (req->callback)
			
 
				 		req->callback(req->callback_arg);
			
@@ -869,12 +872,13 @@ static void _starpu_mpi_copy_cb(void* arg)
 
				 	starpu_data_unregister_submit(args->copy_handle);
			
 
				 
			
 
				 	_STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req);
			
 
				+	// If the request is detached, we need to call _starpu_mpi_handle_request_termination
			
 
				+	// as it will not be called automatically as the request is not in the list detached_requests
			
 
				 	if (args->req->detached)
			
 
				 		_starpu_mpi_handle_request_termination(args->req);
			
 
				 	// else: If the request is not detached its termination will
			
 
				 	// be handled when calling starpu_mpi_wait
			
 
				 
			
 
				-
			
 
				 	free(args);
			
 
				 }
			
 
				 
			
@@ -891,82 +895,78 @@ static void _starpu_mpi_submit_new_mpi_request(void *arg)
 
				 
			
 
				 	if (req->request_type == RECV_REQ)
			
 
				 	{
			
 
				-		/* test whether the receive request has already been submitted internally by StarPU-MPI*/
			
 
				-		struct _starpu_mpi_copy_handle *chandle = find_chandle(req->mpi_tag);
			
 
				-
			
 
				-		/* Case : the request has already been submitted internally by StarPU.
			
 
				-		 * We'll asynchronously ask a Read permission over the temporary handle, so as when
			
 
				-		 * the internal receive will be over, the _starpu_mpi_copy_cb function will be called to
			
 
				-		 * bring the data back to the original data handle associated to the request.*/
			
 
				-		if (chandle && (req->data_handle != chandle->handle))
			
 
				+		/* Case : the request is the internal receive request submitted by StarPU-MPI to receive
			
 
				+		 * incoming data without a matching pending receive already submitted by the application.
			
 
				+		 * We immediately allocate the pointer associated to the data_handle, and pushing it into
			
 
				+		 * the list of new_requests, so as the real MPI request can be submitted before the next
			
 
				+		 * submission of the envelope-catching request. */
			
 
				+		if (req->is_internal_req)
			
 
				 		{
			
 
				-			_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->mpi_tag);
			
 
				-
			
 
				-			req->internal_req = chandle->req;
			
 
				+			_starpu_mpi_handle_allocate_datatype(req->data_handle, &req->datatype, &req->user_datatype);
			
 
				+			if (req->user_datatype == 0)
			
 
				+			{
			
 
				+				req->count = 1;
			
 
				+				req->ptr = starpu_data_get_local_ptr(req->data_handle);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				STARPU_ASSERT(req->count);
			
 
				+				req->ptr = malloc(req->count);
			
 
				+				STARPU_ASSERT_MSG(req->ptr, "cannot allocate message of size %ld\n", req->count);
			
 
				+			}
			
 
				 
			
 
				-			struct _starpu_mpi_copy_cb_args *cb_args = malloc(sizeof(struct _starpu_mpi_copy_cb_args));
			
 
				-			cb_args->data_handle = req->data_handle;
			
 
				-			cb_args->copy_handle = chandle->handle;
			
 
				-			cb_args->req = req;
			
 
				+			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
			
 
				+			_starpu_mpi_req_list_push_front(new_requests, req);
			
 
				 
			
 
				-			_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
			
 
				-			starpu_data_acquire_cb(chandle->handle,STARPU_R,_starpu_mpi_copy_cb,(void*) cb_args);
			
 
				+			/* inform the starpu mpi thread that the request has beenbe pushed in the new_requests list */
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&req->posted_mutex);
			
 
				+			req->posted = 1;
			
 
				+			STARPU_PTHREAD_COND_BROADCAST(&req->posted_cond);
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&req->posted_mutex);
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			/* Case : the request is the internal receive request submitted by StarPU-MPI to receive
			
 
				-			 * incoming data without a matching pending receive already submitted by the application.
			
 
				-			 * We immediately allocate the pointer associated to the data_handle, and pushing it into
			
 
				-			 * the list of new_requests, so as the real MPI request can be submitted before the next
			
 
				-			 * submission of the envelope-catching request. */
			
 
				-			if (chandle && (req->data_handle == chandle->handle))
			
 
				+			/* test whether the receive request has already been submitted internally by StarPU-MPI*/
			
 
				+			struct _starpu_mpi_copy_handle *chandle = find_chandle(req->mpi_tag);
			
 
				+
			
 
				+			/* Case : the request has already been submitted internally by StarPU.
			
 
				+			 * We'll asynchronously ask a Read permission over the temporary handle, so as when
			
 
				+			 * the internal receive will be over, the _starpu_mpi_copy_cb function will be called to
			
 
				+			 * bring the data back to the original data handle associated to the request.*/
			
 
				+			if (chandle)
			
 
				 			{
			
 
				-				_starpu_mpi_handle_allocate_datatype(req->data_handle, &req->datatype, &req->user_datatype);
			
 
				-				if (req->user_datatype == 0)
			
 
				-				{
			
 
				-					req->count = 1;
			
 
				-					req->ptr = starpu_data_get_local_ptr(req->data_handle);
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					req->count = chandle->env->psize;
			
 
				-					req->ptr = malloc(req->count);
			
 
				+				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->mpi_tag);
			
 
				+				STARPU_ASSERT(req->data_handle != chandle->handle);
			
 
				 
			
 
				-					STARPU_ASSERT_MSG(req->ptr, "cannot allocate message of size %ld\n", req->count);
			
 
				-				}
			
 
				+				req->internal_req = chandle->req;
			
 
				 
			
 
				-				_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
			
 
				-				_starpu_mpi_req_list_push_front(new_requests, req);
			
 
				+				struct _starpu_mpi_copy_cb_args *cb_args = malloc(sizeof(struct _starpu_mpi_copy_cb_args));
			
 
				+				cb_args->data_handle = req->data_handle;
			
 
				+				cb_args->copy_handle = chandle->handle;
			
 
				+				cb_args->req = req;
			
 
				 
			
 
				-				/* inform the starpu mpi thread that the request has beenbe pushed in the new_requests list */
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				-				STARPU_PTHREAD_MUTEX_LOCK(&req->posted_mutex);
			
 
				-				req->posted = 1;
			
 
				-				STARPU_PTHREAD_COND_BROADCAST(&req->posted_cond);
			
 
				-				STARPU_PTHREAD_MUTEX_UNLOCK(&req->posted_mutex);
			
 
				-				STARPU_PTHREAD_MUTEX_LOCK(&mutex);
			
 
				+				_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
			
 
				+				starpu_data_acquire_cb(chandle->handle,STARPU_R,_starpu_mpi_copy_cb,(void*) cb_args);
			
 
				 			}
			
 
				 			/* Case : a classic receive request with no send received earlier than expected.
			
 
				 			 * We just add the pending receive request to the requests' hashmap. */
			
 
				 			else
			
 
				 			{
			
 
				-				add_req(req);
			
 
				+				add_app_req(req);
			
 
				 			}
			
 
				-
			
 
				-			newer_requests = 1;
			
 
				-			STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
			
 
				 		}
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				 		_starpu_mpi_req_list_push_front(new_requests, req);
			
 
				-
			
 
				-		newer_requests = 1;
			
 
				 		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n",
			
 
				 				  req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
			
 
				-		STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
			
 
				 	}
			
 
				 
			
 
				+	newer_requests = 1;
			
 
				+	STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
			
 
				 	_STARPU_MPI_LOG_OUT();
			
 
				 }
			
@@ -1043,6 +1043,9 @@ static void _starpu_mpi_test_detached_requests(void)
 
				 		if (flag)
			
 
				 		{
			
 
				 			_starpu_mpi_req_list_erase(detached_requests, req);
			
 
				+#ifdef STARPU_DEVEL
			
 
				+#warning FIXME: when do we free internal requests
			
 
				+#endif
			
 
				 			if (!req->is_internal_req)
			
 
				 				free(req);
			
 
				 		}
			
@@ -1135,12 +1138,12 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	}
			
 
				 
			
 
				 	{
			
 
				-	     int rank, worldsize;
			
 
				-	     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				-	     MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
			
 
				-	     TRACE_MPI_START(rank, worldsize);
			
 
				+		int rank, worldsize;
			
 
				+		MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+		MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
			
 
				+		TRACE_MPI_START(rank, worldsize);
			
 
				 #ifdef STARPU_USE_FXT
			
 
				-	     starpu_profiling_set_id(rank);
			
 
				+		starpu_profiling_set_id(rank);
			
 
				 #endif //STARPU_USE_FXT
			
 
				 	}
			
 
				 
			
@@ -1159,7 +1162,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	while (running || posted_requests || !(_starpu_mpi_req_list_empty(new_requests)) || !(_starpu_mpi_req_list_empty(detached_requests)))
			
 
				 	{
			
 
				 		/* shall we block ? */
			
 
				-		unsigned block = _starpu_mpi_req_list_empty(new_requests) && (HASH_COUNT(_starpu_mpi_req_hashmap) == 0);
			
 
				+		unsigned block = _starpu_mpi_req_list_empty(new_requests) && (HASH_COUNT(_starpu_mpi_app_req_hashmap) == 0);
			
 
				 
			
 
				 #ifndef STARPU_MPI_ACTIVITY
			
 
				 		STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
			
@@ -1199,7 +1202,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 		/* If there is no currently submitted header_req submitted to catch envelopes from senders, and there is some pending receive
			
 
				 		 * requests in our side, we resubmit a header request. */
			
 
				 		MPI_Request header_req;
			
 
				-		if ((HASH_COUNT(_starpu_mpi_req_hashmap) > 0) && (header_req_submitted == 0))// && (HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0))
			
 
				+		if ((HASH_COUNT(_starpu_mpi_app_req_hashmap) > 0) && (header_req_submitted == 0))// && (HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0))
			
 
				 		{
			
 
				 			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop\n");
			
 
				 			MPI_Irecv(recv_env, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _starpu_mpi_tag, MPI_COMM_WORLD, &header_req);
			
@@ -1223,9 +1226,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 
			
 
				 			if (flag)
			
 
				 			{
			
 
				-				_STARPU_MPI_DEBUG(3, "Searching for request with tag %d (size %ld)\n", recv_env->mpi_tag, recv_env->psize);
			
 
				+				_STARPU_MPI_DEBUG(3, "Searching for application request with tag %d (size %ld)\n", recv_env->mpi_tag, recv_env->psize);
			
 
				 
			
 
				-				struct _starpu_mpi_req *found_req = find_req(recv_env->mpi_tag);
			
 
				+				struct _starpu_mpi_req *found_req = find_app_req(recv_env->mpi_tag);
			
 
				 
			
 
				 				/* Case : a data will arrive before the matching receive has been submitted in our side of the application.
			
 
				 				 * We will allow a temporary handle to store the incoming data, by submitting a starpu_mpi_irecv_detached
			
@@ -1253,8 +1256,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 					add_chandle(chandle);
			
 
				 
			
 
				 					_STARPU_MPI_DEBUG(3, "Posting internal detached irecv on copy_handle with tag %d from src %d ..\n", chandle->mpi_tag, status.MPI_SOURCE);
			
 
				-					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1);
			
 
				-					chandle->req->is_internal_req = 1;
			
 
				+					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1, 1, recv_env->psize);
			
 
				 
			
 
				 					// We wait until the request is pushed in the
			
 
				 					// new_request list, that ensures that the next loop
			
@@ -1272,9 +1274,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 				 * the data handle, then submit the corresponding receive with _starpu_mpi_handle_new_request. */
			
 
				 				else
			
 
				 				{
			
 
				-					_STARPU_MPI_DEBUG(3, "Found !\n");
			
 
				+					_STARPU_MPI_DEBUG(3, "A matching receive has been found for the incoming data with tag %d\n", recv_env->mpi_tag);
			
 
				 
			
 
				-					delete_req(found_req);
			
 
				+					delete_app_req(found_req);
			
 
				 
			
 
				 					_starpu_mpi_handle_allocate_datatype(found_req->data_handle, &found_req->datatype, &found_req->user_datatype);
			
 
				 					if (found_req->user_datatype == 0)
			
@@ -1311,8 +1313,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
				 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(detached_requests), "List of detached requests not empty");
			
 
				 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(new_requests), "List of new requests not empty");
			
 
				 	STARPU_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero");
			
 
				-	STARPU_ASSERT_MSG(HASH_COUNT(_starpu_mpi_req_hashmap) == 0, "Number of receive requests left is not zero");
			
 
				-
			
 
				+	STARPU_ASSERT_MSG(HASH_COUNT(_starpu_mpi_app_req_hashmap) == 0, "Number of receive requests left is not zero");
			
 
				+	STARPU_ASSERT_MSG(HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0, "Number of copy requests left is not zero");
			
 
				 	if (argc_argv->initialize_mpi)
			
 
				 	{
			
 
				 		_STARPU_MPI_DEBUG(3, "Calling MPI_Finalize()\n");
			
--- a/src/common/uthash.h
+++ b/src/common/uthash.h
@@ -229,7 +229,7 @@ do {
 
				 #define HASH_FIND_STR(head,findstr,out)                                          \
			
 
				     HASH_FIND(hh,head,findstr,strlen(findstr),out)
			
 
				 #define HASH_ADD_STR(head,strfield,add)                                          \
			
 
				-    HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
			
 
				+    HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add)
			
 
				 #define HASH_FIND_INT(head,findint,out)                                          \
			
 
				     HASH_FIND(hh,head,findint,sizeof(int),out)
			
 
				 #define HASH_ADD_INT(head,intfield,add)                                          \
			
--- a/src/core/combined_workers.c
+++ b/src/core/combined_workers.c
@@ -100,8 +100,19 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 
				 		&config->combined_workers[combined_worker_id];
			
 
				 
			
 
				 	combined_worker->worker_size = nworkers;
			
 
				-	combined_worker->perf_arch = (enum starpu_perfmodel_archtype) (STARPU_CPU_DEFAULT + nworkers - 1);
			
 
				-	combined_worker->worker_mask = STARPU_CPU;
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	if(config->workers[workerid_array[0]].worker_mask == STARPU_MIC)
			
 
				+	{
			
 
				+		combined_worker->perf_arch = (enum starpu_perfmodel_archtype) (STARPU_MIC_DEFAULT + config->workers[workerid_array[0]].mp_nodeid /* *STARPU_MAXMICCPUS + nworkers - 1*/);
			
 
				+		combined_worker->worker_mask = STARPU_MIC;
			
 
				+	}
			
 
				+#endif
			
 
				+	if(config->workers[workerid_array[0]].worker_mask == STARPU_CPU)
			
 
				+	{
			
 
				+		combined_worker->perf_arch = (enum starpu_perfmodel_archtype) (STARPU_CPU_DEFAULT + nworkers - 1);
			
 
				+		combined_worker->worker_mask = STARPU_CPU;
			
 
				+	}
			
 
				 	combined_worker->count = nworkers -1;
			
 
				 	pthread_mutex_init(&combined_worker->count_mutex,NULL);
			
 
				 
			
--- a/src/core/debug.c
+++ b/src/core/debug.c
@@ -82,7 +82,7 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 
				 	unsigned i;
			
 
				 	const char *name;
			
 
				 	if (!cl)
			
 
				-		return -1;
			
 
				+		return 0;
			
 
				 	name = _starpu_codelet_get_model_name(cl);
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&ayudame_mutex);
			
 
				 	for (i=0; i < ncodelets; i++)
			
@@ -92,7 +92,7 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 
				 				((name && codelets[i].name) && !strcmp(codelets[i].name, name))))
			
 
				 		{
			
 
				 			STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex);
			
 
				-			return i;
			
 
				+			return i + 1;
			
 
				 		}
			
 
				 	}
			
 
				 	if (ncodelets == ncodelets_alloc)
			
@@ -111,8 +111,8 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 
				 		codelets[ncodelets].name = NULL;
			
 
				 	i = ncodelets++;
			
 
				 	if (name)
			
 
				-		AYU_event(AYU_REGISTERFUNCTION, i, (void*) name);
			
 
				+		AYU_event(AYU_REGISTERFUNCTION, i+1, (void*) name);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex);
			
 
				-	return i;
			
 
				+	return i + 1;
			
 
				 }
			
 
				 #endif
			
--- a/src/core/dependencies/tags.c
+++ b/src/core/dependencies/tags.c
@@ -203,7 +203,7 @@ static struct _starpu_tag *_gettag_struct(starpu_tag_t id)
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 		if (AYU_event)
			
 
				 		{
			
 
				-			int64_t AYU_data[2] = {-1, 0};
			
 
				+			int64_t AYU_data[2] = {0, 0};
			
 
				 			STARPU_ASSERT(id < AYUDAME_OFFSET);
			
 
				 			AYU_event(AYU_ADDTASK, id + AYUDAME_OFFSET, AYU_data);
			
 
				 		}
			
@@ -244,7 +244,7 @@ void _starpu_tag_set_ready(struct _starpu_tag *tag)
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 	if (AYU_event)
			
 
				 	{
			
 
				-		int id = -1;
			
 
				+		intptr_t id = 0;
			
 
				 		AYU_event(AYU_PRERUNTASK, tag->id + AYUDAME_OFFSET, &id);
			
 
				 		AYU_event(AYU_POSTRUNTASK, tag->id + AYUDAME_OFFSET, NULL);
			
 
				 	}
			
--- a/src/core/disk.h
+++ b/src/core/disk.h
@@ -23,6 +23,8 @@
 
				 #define STARPU_DISK_ALL 1
			
 
				 #define STARPU_DISK_NO_RECLAIM 2
			
 
				 
			
 
				+#include <datawizard/copy_driver.h>
			
 
				+
			
 
				 /* interface to manipulate memory disk */
			
 
				 void * _starpu_disk_alloc (unsigned node, size_t size);
			
 
				 
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -284,7 +284,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 		if (AYU_event)
			
 
				 		{
			
 
				-			int64_t AYU_data[2] = {j->exclude_from_dag?-1:_starpu_ayudame_get_func_id(task->cl), task->priority > STARPU_MIN_PRIO};
			
 
				+			int64_t AYU_data[2] = {j->exclude_from_dag?0:_starpu_ayudame_get_func_id(task->cl), task->priority > STARPU_MIN_PRIO};
			
 
				 			AYU_event(AYU_ADDTASK, j->job_id, AYU_data);
			
 
				 		}
			
 
				 #endif
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -326,7 +326,7 @@ int _starpu_push_task(struct _starpu_job *j)
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 	if (AYU_event)
			
 
				 	{
			
 
				-		int id = -1;
			
 
				+		intptr_t id = -1;
			
 
				 		AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
			
 
				 	}
			
 
				 #endif
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -437,11 +437,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 #endif
			
 
				 
			
 
				 #ifdef HAVE_AYUDAME_H
			
 
				-	if (AYU_event)
			
 
				-	{
			
 
				-		unsigned long n = nworkers;
			
 
				-		AYU_event(AYU_INIT, 0, (void*) &n);
			
 
				-	}
			
 
				+	if (AYU_event) AYU_event(AYU_INIT, 0, NULL);
			
 
				 #endif
			
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -358,6 +358,7 @@ static int starpu_handle_data_request(struct _starpu_data_request *r, unsigned m
 
				 	/* perform the transfer */
			
 
				 	/* the header of the data must be locked by the worker that submitted the request */
			
 
				 
			
 
				+
			
 
				 	r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate,
			
 
				 						    dst_replicate, !(r_mode & STARPU_R), r, may_alloc);
			
 
				 
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -258,9 +258,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
				 		void *ptr;
			
 
				 		ptr = starpu_data_handle_to_pointer(child, 0);
			
 
				 		if (ptr != NULL)
			
 
				-		{
			
 
				 			_starpu_data_register_ram_pointer(child, ptr);
			
 
				-		}
			
 
				 	}
			
 
				 	/* now let the header */
			
 
				 	_starpu_spin_unlock(&initial_handle->header_lock);
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -86,19 +86,16 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
				 #ifdef STARPU_USE_MIC
			
 
				 	if (starpu_worker_get_type(workerid) == STARPU_MIC_WORKER)
			
 
				 	{
			
 
				-		const struct _starpu_mp_node *node = _starpu_mic_src_get_actual_thread_mp_node();
			
 
				-		enum _starpu_mp_command answer;
			
 
				-		void *arg = NULL;
			
 
				-		int arg_size = 0;
			
 
				-
			
 
				-		// XXX: give the correct coreid.
			
 
				-	       _starpu_src_common_execute_kernel(node,
			
 
				-						 (void(*)(void))init_func, 0,
			
 
				+		struct _starpu_mp_node *node = _starpu_mic_src_get_actual_thread_mp_node();
			
 
				+		int devid = _starpu_get_worker_struct(workerid)->devid;
			
 
				+		void * arg;
			
 
				+		int arg_size;
			
 
				+		_starpu_src_common_execute_kernel(node,
			
 
				+						 (void(*)(void))init_func, devid,
			
 
				 						 STARPU_SEQ, 0, 0, &handle, 
			
 
				 						 &(replicate->data_interface), 1,
			
 
				 						 NULL, 0);
			
 
				-		answer = _starpu_mp_common_recv_command (node, &arg, &arg_size);
			
 
				-		STARPU_ASSERT (answer == STARPU_EXECUTION_COMPLETED);
			
 
				+		_starpu_src_common_wait_completed_execution(node,devid,&arg,&arg_size);
			
 
				 	}
			
 
				 	else
			
 
				 #endif
			
--- a/src/drivers/driver_common/driver_common.c
+++ b/src/drivers/driver_common/driver_common.c
@@ -258,7 +258,7 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int wor
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 	if (AYU_event)
			
 
				 	{
			
 
				-		int id = workerid;
			
 
				+		intptr_t id = workerid;
			
 
				 		AYU_event(AYU_PRERUNTASK, _starpu_get_job_associated_to_task(task)->job_id, &id);
			
 
				 	}
			
 
				 #endif
			
--- a/src/drivers/mic/driver_mic_sink.c
+++ b/src/drivers/mic/driver_mic_sink.c
@@ -33,11 +33,8 @@
 
				  */
			
 
				 void _starpu_mic_sink_init(struct _starpu_mp_node *node)
			
 
				 {
			
 
				-	pthread_t thread, self;
			
 
				+	pthread_t self;
			
 
				 	cpu_set_t cpuset;
			
 
				-	pthread_attr_t attr;
			
 
				-	int i, ret;
			
 
				-	struct arg_sink_thread * arg;
			
 
				 
			
 
				 	/*Bind on the first core*/
			
 
				 	self = pthread_self();
			
@@ -53,29 +50,35 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 
				 	_starpu_mic_common_accept(&node->host_sink_dt_connection.mic_endpoint,
			
 
				 									 STARPU_MIC_SOURCE_DT_PORT_NUMBER);
			
 
				 	
			
 
				-	node->is_running = 1;
			
 
				-
			
 
				 	node->nb_cores = COISysGetHardwareThreadCount() - COISysGetHardwareThreadCount() / COISysGetCoreCount();
			
 
				 	node->thread_table = malloc(sizeof(pthread_t)*node->nb_cores);
			
 
				 
			
 
				-	node->run_table = malloc(sizeof(struct mp_task *)*node->nb_cores);
			
 
				-	node->sem_run_table = malloc(sizeof(sem_t)*node->nb_cores);
			
 
				-
			
 
				-	node->barrier_list = mp_barrier_list_new();
			
 
				-	node->message_queue = mp_message_list_new();
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL);
			
 
				-	STARPU_PTHREAD_MUTEX_INIT(&node->barrier_mutex,NULL);
			
 
				+	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
			
 
				 
			
 
				-	STARPU_PTHREAD_BARRIER_INIT(&node->init_completed_barrier, NULL, node->nb_cores+1);
			
 
				+	//for (i = 0; i < (unsigned int)node->devid; ++i)
			
 
				+	//	_starpu_mic_common_connect(&node->sink_sink_dt_connections[i].mic_endpoint,
			
 
				+	//								STARPU_TO_MIC_ID(i),
			
 
				+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i),	
			
 
				+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(i, node->devid));
			
 
				 
			
 
				+	//for (i = node->devid + 1; i < node->nb_mp_sinks; ++i)
			
 
				+	//	_starpu_mic_common_accept(&node->sink_sink_dt_connections[i].mic_endpoint,
			
 
				+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i));
			
 
				+}
			
 
				 
			
 
				+/* Launch all workers on the mic
			
 
				+ */
			
 
				+void _starpu_mic_sink_launch_workers(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	int i, ret;
			
 
				+	struct arg_sink_thread * arg;
			
 
				+	cpu_set_t cpuset;
			
 
				+	pthread_attr_t attr;
			
 
				+	pthread_t thread;
			
 
				+	
			
 
				 	/*for each core init the mutex, the task pointer and launch the thread */
			
 
				 	for(i=0; i<node->nb_cores; i++)
			
 
				 	{
			
 
				-		node->run_table[i] = NULL;
			
 
				-
			
 
				-		sem_init(&node->sem_run_table[i],0,0);
			
 
				-
			
 
				 		//init the set
			
 
				 		CPU_ZERO(&cpuset);
			
 
				 		CPU_SET(i,&cpuset);
			
@@ -89,24 +92,12 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 
				 		arg= malloc(sizeof(struct arg_sink_thread));
			
 
				 		arg->coreid = i;
			
 
				 		arg->node = node;
			
 
				-		arg->sem = &node->sem_run_table[i];
			
 
				 		
			
 
				 		ret = pthread_create(&thread, &attr, _starpu_sink_thread, arg);
			
 
				-		((pthread_t *)node->thread_table)[i] = thread;
			
 
				 		STARPU_ASSERT(ret == 0);
			
 
				+		((pthread_t *)node->thread_table)[i] = thread;
			
 
				 	}
			
 
				 
			
 
				-	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
			
 
				-
			
 
				-	//for (i = 0; i < (unsigned int)node->devid; ++i)
			
 
				-	//	_starpu_mic_common_connect(&node->sink_sink_dt_connections[i].mic_endpoint,
			
 
				-	//								STARPU_TO_MIC_ID(i),
			
 
				-	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i),	
			
 
				-	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(i, node->devid));
			
 
				-
			
 
				-	//for (i = node->devid + 1; i < node->nb_mp_sinks; ++i)
			
 
				-	//	_starpu_mic_common_accept(&node->sink_sink_dt_connections[i].mic_endpoint,
			
 
				-	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i));
			
 
				 }
			
 
				 
			
 
				 /* Deinitialize the MIC sink, close all the connections.
			
@@ -120,19 +111,13 @@ void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
 
				 	{
			
 
				 		sem_post(&node->sem_run_table[i]);
			
 
				 		pthread_join(((pthread_t *)node->thread_table)[i],NULL);
			
 
				-		sem_destroy(&node->sem_run_table[i]);
			
 
				 	}
			
 
				 
			
 
				 	free(node->thread_table);
			
 
				-	free(node->run_table);
			
 
				-	free(node->sem_run_table);
			
 
				 
			
 
				-	mp_barrier_list_delete(node->barrier_list);
			
 
				-	mp_message_list_delete(node->message_queue);
			
 
				+	scif_close(node->host_sink_dt_connection.mic_endpoint);
			
 
				+	scif_close(node->mp_connection.mic_endpoint);
			
 
				 
			
 
				-	STARPU_PTHREAD_MUTEX_DESTROY(&node->message_queue_mutex);
			
 
				-	STARPU_PTHREAD_MUTEX_DESTROY(&node->barrier_mutex);
			
 
				-	STARPU_PTHREAD_BARRIER_DESTROY(&node->init_completed_barrier);
			
 
				 	//unsigned int i;
			
 
				 
			
 
				 	//for (i = 0; i < node->nb_mp_sinks; ++i)
			
@@ -143,14 +128,11 @@ void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
 
				 
			
 
				 	//free(node->sink_sink_dt_connections);
			
 
				 
			
 
				-	scif_close(node->host_sink_dt_connection.mic_endpoint);
			
 
				-	scif_close(node->mp_connection.mic_endpoint);
			
 
				 }
			
 
				 
			
 
				 /* Report an error which occured when using a MIC device
			
 
				  * and print this error in a human-readable style
			
 
				  */
			
 
				-
			
 
				 void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status)
			
 
				 {
			
 
				 	const char *errormsg = strerror(status);
			
--- a/src/drivers/mic/driver_mic_sink.h
+++ b/src/drivers/mic/driver_mic_sink.h
@@ -34,7 +34,7 @@
 
				 void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status);
			
 
				 
			
 
				 void _starpu_mic_sink_init(struct _starpu_mp_node *node);
			
 
				-
			
 
				+void _starpu_mic_sink_launch_workers(struct _starpu_mp_node *node);
			
 
				 void _starpu_mic_sink_deinit(struct _starpu_mp_node *node);
			
 
				 
			
 
				 void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size);
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -73,7 +73,7 @@ starpu_pthread_mutex_t nb_mic_worker_init_mutex = PTHREAD_MUTEX_INITIALIZER;
 
				 //	return config->workers[workerid].devid;
			
 
				 //}
			
 
				 
			
 
				-const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
			
 
				+struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
			
 
				 {
			
 
				 	struct _starpu_worker *actual_worker = _starpu_get_local_worker_key();
			
 
				 	STARPU_ASSERT(actual_worker);
			
--- a/src/drivers/mic/driver_mic_source.h
+++ b/src/drivers/mic/driver_mic_source.h
@@ -42,7 +42,7 @@ struct _starpu_mic_async_event *event;
 
				 #define STARPU_MIC_SRC_REPORT_SCIF_ERROR(status) \
			
 
				 	_starpu_mic_src_report_scif_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				 
			
 
				-const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node();
			
 
				+struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node();
			
 
				 const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node);
			
 
				 
			
 
				 void(* _starpu_mic_src_get_kernel_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void);
			
--- a/src/drivers/mp_common/mp_common.c
+++ b/src/drivers/mp_common/mp_common.c
@@ -53,6 +53,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 
				 		node->devid = peer_id;
			
 
				 
			
 
				 		node->init = _starpu_mic_src_init;
			
 
				+		node->launch_workers= NULL;
			
 
				 		node->deinit = _starpu_mic_src_deinit;
			
 
				 		node->report_error = _starpu_mic_src_report_scif_error;
			
 
				 
			
@@ -77,6 +78,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 
				 		node->nb_mp_sinks = atoi(getenv("NB_MIC"));
			
 
				 
			
 
				 		node->init = _starpu_mic_sink_init;
			
 
				+		node->launch_workers = _starpu_mic_sink_launch_workers;
			
 
				 		node->deinit = _starpu_mic_sink_deinit;
			
 
				 		node->report_error = _starpu_mic_sink_report_error;
			
 
				 
			
@@ -102,6 +104,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 
				 	{
			
 
				 		node->init = _starpu_scc_src_init;
			
 
				 		node->deinit = NULL;
			
 
				+		node->deinit = NULL;
			
 
				 		node->report_error = _starpu_scc_common_report_rcce_error;
			
 
				 				
			
 
				 		node->mp_recv_is_ready = _starpu_scc_common_recv_is_ready;
			
@@ -124,6 +127,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 
				 	case STARPU_SCC_SINK:
			
 
				 	{
			
 
				 		node->init = _starpu_scc_sink_init;
			
 
				+		node->launch_workers = _starpu_scc_sink_launch_workers;
			
 
				 		node->deinit = _starpu_scc_sink_deinit;
			
 
				 		node->report_error = _starpu_scc_common_report_rcce_error;
			
 
				 
			
@@ -166,15 +170,60 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 
				 	if (node->init)
			
 
				 		node->init(node);
			
 
				 
			
 
				+	node->message_queue = mp_message_list_new();
			
 
				+	STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL);
			
 
				+
			
 
				+	/* If the node is a sink then we must initialize some field */
			
 
				+	if(node->kind == STARPU_MIC_SINK || node->kind == STARPU_SCC_SINK)
			
 
				+	{
			
 
				+		int i;
			
 
				+		node->is_running = 1;
			
 
				+		node->run_table = malloc(sizeof(struct mp_task *)*node->nb_cores);
			
 
				+		node->sem_run_table = malloc(sizeof(sem_t)*node->nb_cores);
			
 
				+
			
 
				+		for(i=0; i<node->nb_cores; i++)
			
 
				+		{
			
 
				+			node->run_table[i] = NULL;
			
 
				+			sem_init(&node->sem_run_table[i],0,0);
			
 
				+		}
			
 
				+		node->barrier_list = mp_barrier_list_new();
			
 
				+		STARPU_PTHREAD_MUTEX_INIT(&node->barrier_mutex,NULL);
			
 
				+
			
 
				+		STARPU_PTHREAD_BARRIER_INIT(&node->init_completed_barrier, NULL, node->nb_cores+1);
			
 
				+
			
 
				+		node->launch_workers(node);
			
 
				+	}	
			
 
				+
			
 
				+
			
 
				 	return node;
			
 
				 }
			
 
				 
			
 
				 /* Deinitialize the sink structure and release the structure */
			
 
				-
			
 
				 void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node)
			
 
				 {
			
 
				 	if (node->deinit)
			
 
				 		node->deinit(node);
			
 
				+		
			
 
				+	mp_message_list_delete(node->message_queue);
			
 
				+	STARPU_PTHREAD_MUTEX_DESTROY(&node->message_queue_mutex);
			
 
				+
			
 
				+	/* If the node is a sink then we must destroy some field */
			
 
				+	if(node->kind == STARPU_MIC_SINK || node->kind == STARPU_SCC_SINK)
			
 
				+	{
			
 
				+		int i;
			
 
				+		for(i=0; i<node->nb_cores; i++)
			
 
				+		{
			
 
				+			sem_destroy(&node->sem_run_table[i]);
			
 
				+		}
			
 
				+
			
 
				+		free(node->run_table);
			
 
				+		free(node->sem_run_table);
			
 
				+
			
 
				+		mp_barrier_list_delete(node->barrier_list);
			
 
				+
			
 
				+		STARPU_PTHREAD_MUTEX_DESTROY(&node->barrier_mutex);
			
 
				+		STARPU_PTHREAD_BARRIER_DESTROY(&node->init_completed_barrier);
			
 
				+	}
			
 
				 
			
 
				 	free(node->buffer);
			
 
				 
			
@@ -182,7 +231,6 @@ void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node)
 
				 }
			
 
				 
			
 
				 /* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */
			
 
				-
			
 
				 void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
			
 
				 				    const enum _starpu_mp_command command,
			
 
				 				    void *arg, int arg_size)
			
@@ -209,7 +257,6 @@ void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
 
				  * However, the data pointed by arg shouldn't be relied on after a new call to
			
 
				  * STARPU_MP_COMMON_RECV_COMMAND as it might corrupt it.
			
 
				  */
			
 
				-
			
 
				 enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node,
			
 
				 						       void **arg, int *arg_size)
			
 
				 {
			
--- a/src/drivers/mp_common/mp_common.h
+++ b/src/drivers/mp_common/mp_common.h
@@ -25,6 +25,8 @@
 
				 #include <common/list.h>
			
 
				 #include <common/barrier.h>
			
 
				 #include <common/thread.h>
			
 
				+#include <datawizard/interfaces/data_interface.h>
			
 
				+
			
 
				 #ifdef STARPU_USE_MP
			
 
				 
			
 
				 #ifdef STARPU_USE_MIC
			
@@ -113,7 +115,8 @@ LIST_TYPE(mp_message,
 
				 struct mp_task 
			
 
				 {
			
 
				 	void (*kernel)(void **, void *);
			
 
				-	void *interfaces[STARPU_NMAXBUFS]; 
			
 
				+	void * interfaces[STARPU_NMAXBUFS]; 
			
 
				+	unsigned nb_interfaces;
			
 
				 	void *cl_arg;
			
 
				 	unsigned coreid;
			
 
				 	enum starpu_codelet_type type;
			
@@ -194,6 +197,7 @@ struct _starpu_mp_node
 
				 
			
 
				 	/* Node general functions */
			
 
				 	void (*init)(struct _starpu_mp_node *node);
			
 
				+	void (*launch_workers)(struct _starpu_mp_node *node);
			
 
				 	void (*deinit)(struct _starpu_mp_node *node);
			
 
				 	void (*report_error)(const char *, const char *, const int, const int);
			
 
				 
			
--- a/src/drivers/mp_common/sink_common.c
+++ b/src/drivers/mp_common/sink_common.c
@@ -251,6 +251,7 @@ void _starpu_sink_common_worker(void)
 
				 	STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
			
 
				 
			
 
				 
			
 
				+	struct _starpu_machine_config *config;
			
 
				 	while (!exit_starpu)
			
 
				 	{
			
 
				 		/* If we have received a message */
			
@@ -264,6 +265,7 @@ void _starpu_sink_common_worker(void)
 
				 					exit_starpu = 1;
			
 
				 					break;
			
 
				 				case STARPU_EXECUTE:
			
 
				+					config = _starpu_get_machine_config();
			
 
				 					node->execute(node, arg, arg_size);
			
 
				 					break;
			
 
				 				case STARPU_SINK_NBCORES:
			
@@ -305,21 +307,22 @@ void _starpu_sink_common_worker(void)
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		pthread_mutex_lock(&node->message_queue_mutex);
			
 
				+		STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
			
 
				 		/* If the list is not empty */
			
 
				 		if(!mp_message_list_empty(node->message_queue))
			
 
				 		{
			
 
				 			/* We pop a message and send it to the host */
			
 
				 			struct mp_message * message = mp_message_list_pop_back(node->message_queue);
			
 
				-			pthread_mutex_unlock(&node->message_queue_mutex);
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
			
 
				 			//_STARPU_DEBUG("telling host that we have finished the task %p sur %d.\n", task->kernel, task->coreid);
			
 
				+			config = _starpu_get_machine_config();
			
 
				 			_starpu_mp_common_send_command(node, message->type, 
			
 
				 					&message->buffer, message->size);
			
 
				 			mp_message_delete(message);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			pthread_mutex_unlock(&node->message_queue_mutex);
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -376,6 +379,7 @@ static void _starpu_sink_common_erase_barrier(struct _starpu_mp_node * node, str
 
				  */
			
 
				 static void _starpu_sink_common_append_message(struct _starpu_mp_node *node, struct mp_message * message)
			
 
				 {
			
 
				+	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				 	STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
			
 
				 	mp_message_list_push_front(node->message_queue,message);
			
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
			
@@ -506,6 +510,10 @@ static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int
 
				 	/* tell the sink that the execution is completed */
			
 
				 	_starpu_sink_common_execution_completed_message(node,task);
			
 
				 
			
 
				+	/*free the task*/
			
 
				+	unsigned i;
			
 
				+	for (i = 0; i < task->nb_interfaces; i++)
			
 
				+		free(task->interfaces[i]);
			
 
				 	free(task);
			
 
				 
			
 
				 }
			
@@ -518,7 +526,6 @@ void* _starpu_sink_thread(void * thread_arg)
 
				 {
			
 
				 	/* Retrieve the information from the structure */
			
 
				 	struct _starpu_mp_node *node = ((struct arg_sink_thread *)thread_arg)->node;
			
 
				-	sem_t * sem = ((struct arg_sink_thread *)thread_arg)->sem;
			
 
				 	int coreid =((struct arg_sink_thread *)thread_arg)->coreid;
			
 
				 	/* free the structure */
			
 
				 	free(thread_arg);
			
@@ -531,7 +538,7 @@ void* _starpu_sink_thread(void * thread_arg)
 
				 	while(node->is_running)
			
 
				 	{
			
 
				 		/*Wait there is a task available */
			
 
				-		sem_wait(sem);
			
 
				+		sem_wait(&node->sem_run_table[coreid]);
			
 
				 		if(node->run_table[coreid] != NULL)
			
 
				 			_starpu_sink_common_execute_kernel(node,coreid,node->run_table[coreid],worker);
			
 
				 
			
@@ -562,7 +569,7 @@ static void _starpu_sink_common_execute_thread(struct _starpu_mp_node *node, str
 
				 void _starpu_sink_common_execute(struct _starpu_mp_node *node,
			
 
				 		void *arg, int arg_size)
			
 
				 {
			
 
				-	unsigned nb_interfaces, i;
			
 
				+	unsigned i;
			
 
				 
			
 
				 	void *arg_ptr = arg;
			
 
				 	struct mp_task *task = malloc(sizeof(struct mp_task));
			
@@ -587,16 +594,19 @@ void _starpu_sink_common_execute(struct _starpu_mp_node *node,
 
				 	task->coreid = *(unsigned *) arg_ptr;
			
 
				 	arg_ptr += sizeof(task->coreid);
			
 
				 
			
 
				-	nb_interfaces = *(unsigned *) arg_ptr;
			
 
				-	arg_ptr += sizeof(nb_interfaces);
			
 
				+	task->nb_interfaces = *(unsigned *) arg_ptr;
			
 
				+	arg_ptr += sizeof(task->nb_interfaces);
			
 
				 
			
 
				 	/* The function needs an array pointing to each interface it needs
			
 
				 	 * during execution. As in sink-side there is no mean to know which
			
 
				 	 * kind of interface to expect, the array is composed of unions of
			
 
				 	 * interfaces, thus we expect the same size anyway */
			
 
				-	for (i = 0; i < nb_interfaces; i++)
			
 
				+	for (i = 0; i < task->nb_interfaces; i++)
			
 
				 	{
			
 
				-		task->interfaces[i] = arg_ptr;
			
 
				+		union _starpu_interface * interface = malloc(sizeof(union _starpu_interface));   
			
 
				+		memcpy(interface, arg_ptr, 
			
 
				+				sizeof(union _starpu_interface));
			
 
				+		task->interfaces[i] = interface;
			
 
				 		arg_ptr += sizeof(union _starpu_interface);
			
 
				 	}
			
 
				 
			
--- a/src/drivers/mp_common/sink_common.h
+++ b/src/drivers/mp_common/sink_common.h
@@ -35,7 +35,6 @@ struct _starpu_sink_topology
 
				 struct arg_sink_thread
			
 
				 {
			
 
				 	struct _starpu_mp_node *node;
			
 
				-	sem_t* sem;
			
 
				 	int coreid;
			
 
				 };
			
 
				 
			
--- a/src/drivers/mp_common/source_common.c
+++ b/src/drivers/mp_common/source_common.c
@@ -38,7 +38,8 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 
				 	struct timespec codelet_end;
			
 
				 	_starpu_driver_end_job(worker, j, worker->perf_arch, &codelet_end, 0,
			
 
				 			profiling);
			
 
				-	int count = 0;
			
 
				+	
			
 
				+	int count = worker->current_rank;
			
 
				 
			
 
				 	/* If it's a combined worker, we check if it's the last one of his combined */
			
 
				 	if(j->task_size > 1)
			
@@ -52,7 +53,6 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 
				 		pthread_mutex_unlock(&cb_worker->count_mutex);
			
 
				 	}
			
 
				 
			
 
				-	_STARPU_DEBUG("\nworkerid:%d\n",worker->workerid);
			
 
				 	/* Finalize the execution */
			
 
				 	if(count == 0)
			
 
				 	{
			
@@ -69,7 +69,7 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 
				 }
			
 
				 
			
 
				 
			
 
				-/* */
			
 
				+/* Complete the execution of the job */
			
 
				 static int _starpu_src_common_process_completed_job(struct _starpu_worker_set *workerset, void * arg, int arg_size)
			
 
				 {
			
 
				 	int coreid;
			
@@ -79,17 +79,15 @@ static int _starpu_src_common_process_completed_job(struct _starpu_worker_set *w
 
				 	coreid = *(int *) arg;
			
 
				 
			
 
				 	struct _starpu_worker *worker = &workerset->workers[coreid];
			
 
				-	struct starpu_task *task = worker->current_task;
			
 
				-	struct _starpu_job *j = _starpu_get_job_associated_to_task (task);
			
 
				+	struct _starpu_job *j = _starpu_get_job_associated_to_task(worker->current_task);
			
 
				 
			
 
				 	struct _starpu_worker * old_worker = _starpu_get_local_worker_key();
			
 
				+
			
 
				 	_starpu_set_local_worker_key(worker);
			
 
				-	
			
 
				 	_starpu_src_common_finalize_job (j, worker);
			
 
				-	worker->current_task = NULL;
			
 
				-
			
 
				 	_starpu_set_local_worker_key(old_worker);
			
 
				 
			
 
				+	worker->current_task = NULL;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -112,51 +110,131 @@ static void _starpu_src_common_pre_exec(void * arg, int arg_size)
 
				  * return 0 if the message has not been handle (it's certainly mean that it's a synchronous message)
			
 
				  * return 1 if the message has been handle
			
 
				  */
			
 
				-static int _starpu_src_common_handle_async(const struct _starpu_mp_node *node, 
			
 
				-		void ** arg, int* arg_size, 
			
 
				-		enum _starpu_mp_command *answer)
			
 
				+static int _starpu_src_common_handle_async(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, 
			
 
				+		void * arg, int arg_size, 
			
 
				+		enum _starpu_mp_command answer)
			
 
				 {
			
 
				-	struct _starpu_worker_set * worker_set = _starpu_get_worker_struct(starpu_worker_get_id())->set;
			
 
				-	*answer = _starpu_mp_common_recv_command(node, arg, arg_size);
			
 
				-	switch(*answer) 
			
 
				+	struct _starpu_worker_set * worker_set=NULL; 
			
 
				+	switch(answer) 
			
 
				 	{
			
 
				 		case STARPU_EXECUTION_COMPLETED:
			
 
				-			_starpu_src_common_process_completed_job(worker_set, *arg, *arg_size);
			
 
				+			worker_set = _starpu_get_worker_struct(starpu_worker_get_id())->set;
			
 
				+			_starpu_src_common_process_completed_job(worker_set, arg, arg_size);
			
 
				 			break;
			
 
				 		case STARPU_PRE_EXECUTION:
			
 
				-			_starpu_src_common_pre_exec(*arg,*arg_size);
			
 
				+			_starpu_src_common_pre_exec(arg,arg_size);
			
 
				 			break;
			
 
				 		default:
			
 
				 			return 0;
			
 
				 			break;
			
 
				 	}
			
 
				-
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				 
			
 
				-/* Handle all asynchronous messages and return when a synchronous message is received */
			
 
				-static enum _starpu_mp_command _starpu_src_common_wait_command_sync(const struct _starpu_mp_node *node, 
			
 
				+static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
			
 
				+	/* while the list is not empty */
			
 
				+	while(!mp_message_list_empty(node->message_queue))
			
 
				+	{
			
 
				+		/* We pop a message and handle it */
			
 
				+		struct mp_message * message = mp_message_list_pop_back(node->message_queue);
			
 
				+		_starpu_src_common_handle_async(node, message->buffer, 
			
 
				+				message->size, message->type);
			
 
				+		mp_message_delete(message);
			
 
				+	}
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
			
 
				+}
			
 
				+
			
 
				+/* Store a message if is asynchronous 
			
 
				+ * return 1 if the message has been stored
			
 
				+ * return 0 if the message is unknown or synchrone */
			
 
				+int _starpu_src_common_store_message(struct _starpu_mp_node *node, 
			
 
				+		void * arg, int arg_size, enum _starpu_mp_command answer)
			
 
				+{
			
 
				+	struct mp_message * message = NULL;
			
 
				+	switch(answer)
			
 
				+	{
			
 
				+		case STARPU_EXECUTION_COMPLETED:
			
 
				+		case STARPU_PRE_EXECUTION:
			
 
				+			message = mp_message_new();
			
 
				+			message->type = answer;
			
 
				+			memcpy(message->buffer, arg, arg_size); 
			
 
				+			message->size = arg_size; 
			
 
				+
			
 
				+			STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
			
 
				+			mp_message_list_push_front(node->message_queue,message);
			
 
				+			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
			
 
				+			return 1;
			
 
				+			break;
			
 
				+		default:
			
 
				+			return 0;
			
 
				+			break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Store all asynchronous messages and return when a synchronous message is received */
			
 
				+static enum _starpu_mp_command _starpu_src_common_wait_command_sync(struct _starpu_mp_node *node, 
			
 
				 		void ** arg, int* arg_size)
			
 
				 {
			
 
				 	enum _starpu_mp_command answer;
			
 
				-	while(_starpu_src_common_handle_async(node,arg,arg_size,&answer));
			
 
				+	int is_sync = 0;
			
 
				+	while(!is_sync)
			
 
				+	{
			
 
				+		answer = _starpu_mp_common_recv_command(node, arg, arg_size);
			
 
				+		if(!_starpu_src_common_store_message(node,*arg,*arg_size,answer))
			
 
				+			is_sync=1;
			
 
				+	}
			
 
				 	return answer;
			
 
				 }
			
 
				 
			
 
				 /* Handle a asynchrone message and return a error if a synchronous message is received */
			
 
				-static void _starpu_src_common_recv_async(struct _starpu_mp_node * baseworker_node)
			
 
				+static void _starpu_src_common_recv_async(struct _starpu_mp_node * node)
			
 
				 {
			
 
				 	enum _starpu_mp_command answer;
			
 
				 	void *arg;
			
 
				 	int arg_size;
			
 
				-	if(!_starpu_src_common_handle_async(baseworker_node,&arg,&arg_size,&answer))
			
 
				+	answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
			
 
				+	if(!_starpu_src_common_handle_async(node,arg,arg_size,answer))
			
 
				 	{
			
 
				 		printf("incorrect commande: unknown command or sync command");
			
 
				 		STARPU_ASSERT(0);
			
 
				 	}	
			
 
				 }
			
 
				 
			
 
				+/* Handle all asynchrone message while a completed execution message from a specific worker has been receive */
			
 
				+ enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size)
			
 
				+{
			
 
				+	enum _starpu_mp_command answer;
			
 
				+
			
 
				+	int completed = 0;	
			
 
				+	while(!completed)
			
 
				+	{
			
 
				+		answer = _starpu_mp_common_recv_command (node, arg, arg_size);
			
 
				+
			
 
				+		if(answer == STARPU_EXECUTION_COMPLETED)
			
 
				+		{
			
 
				+			int coreid;
			
 
				+			STARPU_ASSERT(sizeof(coreid) == *arg_size);	
			
 
				+			coreid = *(int *) *arg;
			
 
				+			if(devid == coreid)
			
 
				+				completed = 1;
			
 
				+			else
			
 
				+				if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer))
			
 
				+					/* We receive a unknown or asynchronous message  */
			
 
				+					STARPU_ASSERT(0);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer))
			
 
				+				/* We receive a unknown or asynchronous message  */
			
 
				+				STARPU_ASSERT(0);
			
 
				+		}
			
 
				+	}
			
 
				+	return answer;
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /* Send a request to the sink NODE for the number of cores on it. */
			
 
				 int _starpu_src_common_sink_nbcores (const struct _starpu_mp_node *node, int *buf)
			
@@ -227,7 +305,7 @@ int _starpu_src_common_lookup(struct _starpu_mp_node *node,
 
				  * pointer.
			
 
				  * Data interfaces in task are send to the sink.
			
 
				  */
			
 
				-int _starpu_src_common_execute_kernel(const struct _starpu_mp_node *node,
			
 
				+int _starpu_src_common_execute_kernel(struct _starpu_mp_node *node,
			
 
				 		void (*kernel)(void), unsigned coreid,
			
 
				 		enum starpu_codelet_type type,
			
 
				 		int is_parallel_task, int cb_workerid,
			
@@ -288,9 +366,11 @@ int _starpu_src_common_execute_kernel(const struct _starpu_mp_node *node,
 
				 	 * executed on a sink with a different memory, whereas a codelet is
			
 
				 	 * executed on the host part for the other accelerators.
			
 
				 	 * Thus we need to send a copy of each interface on the MP device */
			
 
				+
			
 
				 	for (i = 0; i < nb_interfaces; i++)
			
 
				 	{
			
 
				 		starpu_data_handle_t handle = handles[i];
			
 
				+
			
 
				 		memcpy (buffer_ptr, interfaces[i],
			
 
				 				handle->ops->interface_size);
			
 
				 		/* The sink side has no mean to get the type of each
			
@@ -366,7 +446,7 @@ static int _starpu_src_common_execute(struct _starpu_job *j,
 
				  * allocated area ;
			
 
				  * else it returns 1 if the allocation fail.
			
 
				  */
			
 
				-int _starpu_src_common_allocate(const struct _starpu_mp_node *mp_node,
			
 
				+int _starpu_src_common_allocate(struct _starpu_mp_node *mp_node,
			
 
				 		void **addr, size_t size)
			
 
				 {
			
 
				 	enum _starpu_mp_command answer;
			
@@ -376,7 +456,7 @@ int _starpu_src_common_allocate(const struct _starpu_mp_node *mp_node,
 
				 	_starpu_mp_common_send_command(mp_node, STARPU_ALLOCATE, &size,
			
 
				 			sizeof(size));
			
 
				 
			
 
				-	answer = _starpu_mp_common_recv_command(mp_node, &arg, &arg_size);
			
 
				+	answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size);
			
 
				 
			
 
				 	if (answer == STARPU_ERROR_ALLOCATE)
			
 
				 		return 1;
			
@@ -595,8 +675,7 @@ void _starpu_src_common_worker(struct _starpu_worker_set * worker_set,
 
				 		unsigned baseworkerid, 
			
 
				 		struct _starpu_mp_node * mp_node)
			
 
				 { 
			
 
				-	struct _starpu_worker * baseworker = &worker_set->workers[baseworkerid];
			
 
				-	unsigned memnode = baseworker->memory_node;
			
 
				+	unsigned memnode = worker_set->workers[0].memory_node;
			
 
				 	struct starpu_task **tasks = malloc(sizeof(struct starpu_task *)*worker_set->nworkers);
			
 
				 
			
 
				 	_starpu_src_common_send_workers(mp_node, baseworkerid, worker_set->nworkers);
			
@@ -611,6 +690,9 @@ void _starpu_src_common_worker(struct _starpu_worker_set * worker_set,
 
				 		_starpu_datawizard_progress(memnode, 1);
			
 
				 		_STARPU_TRACE_END_PROGRESS(memnode);
			
 
				 
			
 
				+		/* Handle message which have been store */
			
 
				+		_starpu_src_common_handle_stored_async(mp_node);
			
 
				+
			
 
				 		/* poll the device for completed jobs.*/
			
 
				 		while(mp_node->mp_recv_is_ready(mp_node))
			
 
				 			_starpu_src_common_recv_async(mp_node);
			
--- a/src/drivers/mp_common/source_common.h
+++ b/src/drivers/mp_common/source_common.h
@@ -31,6 +31,11 @@ enum _starpu_mp_command _starpu_src_common_wait_command_sync(struct _starpu_mp_n
 
				 void _starpu_src_common_recv_async(struct _starpu_worker_set *worker_set, 
			
 
				 				   struct _starpu_mp_node * baseworker_node);
			
 
				 
			
 
				+int _starpu_src_common_store_message(struct _starpu_mp_node *node, 
			
 
				+		void * arg, int arg_size, enum _starpu_mp_command answer);
			
 
				+
			
 
				+enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size);
			
 
				+
			
 
				 int _starpu_src_common_sink_nbcores (const struct _starpu_mp_node *node, int *buf);
			
 
				 
			
 
				 int _starpu_src_common_lookup(const struct _starpu_mp_node *node,
			
--- a/src/drivers/scc/driver_scc_sink.c
+++ b/src/drivers/scc/driver_scc_sink.c
@@ -34,11 +34,17 @@ void _starpu_scc_sink_init(struct _starpu_mp_node *node)
 
				 	 * get nb_cores *
			
 
				 	 ****************/
			
 
				 	node->nb_cores = 1; 
			
 
				+	STARPU_ASSERT(0);
			
 
				+
			
 
				+}
			
 
				 
			
 
				+void _starpu_scc_sink_launch_workers(struct _starpu_mp_node *node)
			
 
				+{
			
 
				 	/*****************
			
 
				 	 *     TODO      *
			
 
				 	 * init thread   *
			
 
				 	 *****************/
			
 
				+	STARPU_ASSERT(0);
			
 
				 }
			
 
				 
			
 
				 void _starpu_scc_sink_deinit(struct _starpu_mp_node *node)
			
--- a/src/drivers/scc/driver_scc_sink.h
+++ b/src/drivers/scc/driver_scc_sink.h
@@ -25,6 +25,7 @@
 
				 #include <drivers/mp_common/mp_common.h>
			
 
				 
			
 
				 void _starpu_scc_sink_init(struct _starpu_mp_node *node);
			
 
				+void _starpu_scc_sink_launch_workers(struct _starpu_mp_node *node);
			
 
				 void _starpu_scc_sink_deinit(struct _starpu_mp_node *node);
			
 
				 
			
 
				 void _starpu_scc_sink_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len);
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -349,7 +349,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 	if (AYU_event)
			
 
				 	{
			
 
				-		int id = best_workerid;
			
 
				+		intptr_t id = best_workerid;
			
 
				 		AYU_event(AYU_ADDTASKTOQUEUE, _starpu_get_job_associated_to_task(task)->job_id, &id);
			
 
				 	}
			
 
				 #endif
			
--- a/src/sched_policies/random_policy.c
+++ b/src/sched_policies/random_policy.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -89,7 +89,7 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 	if (AYU_event)
			
 
				 	{
			
 
				-		int id = selected;
			
 
				+		intptr_t id = selected;
			
 
				 		AYU_event(AYU_ADDTASKTOQUEUE, _starpu_get_job_associated_to_task(task)->job_id, &id);
			
 
				 	}
			
 
				 #endif
			
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -369,7 +369,7 @@ int ws_push_task(struct starpu_task *task)
 
				 #ifdef HAVE_AYUDAME_H
			
 
				 	if (AYU_event)
			
 
				 	{
			
 
				-		int id = workerid;
			
 
				+		intptr_t id = workerid;
			
 
				 		AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
			
 
				 	}
			
 
				 #endif
			
--- a/tests/datawizard/interfaces/coo/coo_interface.c
+++ b/tests/datawizard/interfaces/coo/coo_interface.c
@@ -21,7 +21,7 @@
 
				 #define MATRIX_SIZE (NX*NY)
			
 
				 
			
 
				 #if defined(STARPU_USE_CPU) || defined(STAPRU_USE_MIC)
			
 
				-static void test_coo_cpu_func(void *buffers[], void *args);
			
 
				+void test_coo_cpu_func(void *buffers[], void *args);
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 extern void test_coo_cuda_func(void *buffers[], void *args);
			
--- a/tests/disk/disk_copy.c
+++ b/tests/disk/disk_copy.c
@@ -41,7 +41,7 @@ int main(int argc, char **argv)
 
				 	if (ret == -ENODEV) goto enodev;
			
 
				 
			
 
				 	/* register a disk */
			
 
				-	int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp", 1024*1024*200);
			
 
				+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp", 1024*1024*200);
			
 
				 	/* can't write on /tmp/ */
			
 
				 	if (new_dd == -ENOENT) goto enoent;
			
 
				 	
			
--- a/tests/main/starpu_init.c
+++ b/tests/main/starpu_init.c
@@ -83,6 +83,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	ret = check_cpu(-1, -1, -1, &cpu_init);
			
 
				 	if (ret) return ret;
			
 
				+	if (cpu_init == 0) return STARPU_TEST_SKIPPED;
			
 
				 
			
 
				 	if (cpu_init >= STARPU_MAXCPUS-5)
			
 
				 	{
			
--- a/tests/microbenchs/async_tasks_overhead.c
+++ b/tests/microbenchs/async_tasks_overhead.c
@@ -31,7 +31,6 @@ static double cumulated_pop = 0.0;
 
				 
			
 
				 void dummy_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				-	usleep(10000);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_codelet dummy_codelet =
			
--- a/tests/parallel_tasks/explicit_combined_worker.c
+++ b/tests/parallel_tasks/explicit_combined_worker.c
@@ -21,7 +21,7 @@
 
				 #include <unistd.h>
			
 
				 #include "../helper.h"
			
 
				 
			
 
				-#define N	10
			
 
				+#define N	1000
			
 
				 #define VECTORSIZE	1024
			
 
				 
			
 
				 void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
			
@@ -43,9 +43,9 @@ static struct starpu_codelet cl =
 
				 	.type = STARPU_FORKJOIN,
			
 
				 	.max_parallelism = INT_MAX,
			
 
				 	.cpu_funcs = {codelet_null, NULL},
			
 
				+	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				 	.cuda_funcs = {codelet_null, NULL},
			
 
				         .opencl_funcs = {codelet_null, NULL},
			
 
				-	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				 	.nbuffers = 1,
			
 
				 	.modes = {STARPU_R}
			
 
				 };
			
--- a/tests/parallel_tasks/parallel_kernels.c
+++ b/tests/parallel_tasks/parallel_kernels.c
@@ -50,8 +50,8 @@ static struct starpu_codelet cl =
 
				 	.max_parallelism = INT_MAX,
			
 
				 	.cpu_funcs = {codelet_null, NULL},
			
 
				 	.cuda_funcs = {codelet_null, NULL},
			
 
				-        .opencl_funcs = {codelet_null, NULL},
			
 
				 	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				+        .opencl_funcs = {codelet_null, NULL},
			
 
				 	.model = &model,
			
 
				 	.nbuffers = 1,
			
 
				 	.modes = {STARPU_R}
			
@@ -66,7 +66,7 @@ int main(int argc, char **argv)
 
				 
			
 
				         struct starpu_conf conf;
			
 
				 	starpu_conf_init(&conf);
			
 
				-	conf.sched_policy_name = "peager";
			
 
				+	conf.sched_policy_name = "pheft";
			
 
				 	conf.calibrate = 1;
			
 
				 
			
 
				 	ret = starpu_init(&conf);
			
--- a/tests/parallel_tasks/parallel_kernels_spmd.c
+++ b/tests/parallel_tasks/parallel_kernels_spmd.c
@@ -21,17 +21,17 @@
 
				 #include <unistd.h>
			
 
				 #include "../helper.h"
			
 
				 
			
 
				-#define N	100
			
 
				+#define N	1000
			
 
				 #define VECTORSIZE	1024
			
 
				 
			
 
				 void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
			
 
				 {
			
 
				 	STARPU_SKIP_IF_VALGRIND;
			
 
				 
			
 
				-//	int worker_size = starpu_combined_worker_get_size();
			
 
				-//	STARPU_ASSERT(worker_size > 0);
			
 
				+	int worker_size = starpu_combined_worker_get_size();
			
 
				+	STARPU_ASSERT(worker_size > 0);
			
 
				 
			
 
				-//	usleep(1000/worker_size);
			
 
				+	usleep(1000/worker_size);
			
 
				 #if 0
			
 
				 	int id = starpu_worker_get_id();
			
 
				 	int combined_id = starpu_combined_worker_get_id();
			
@@ -51,9 +51,9 @@ static struct starpu_codelet cl =
 
				 	.type = STARPU_SPMD,
			
 
				 	.max_parallelism = INT_MAX,
			
 
				 	.cpu_funcs = {codelet_null, NULL},
			
 
				+	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				 	.cuda_funcs = {codelet_null, NULL},
			
 
				         .opencl_funcs = {codelet_null, NULL},
			
 
				-	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				 	.model = &model,
			
 
				 	.nbuffers = 1,
			
 
				 	.modes = {STARPU_R}
			
@@ -68,7 +68,7 @@ int main(int argc, char **argv)
 
				 
			
 
				         struct starpu_conf conf;
			
 
				 	starpu_conf_init(&conf);
			
 
				-	conf.sched_policy_name = "peager";
			
 
				+	conf.sched_policy_name = "pheft";
			
 
				 	conf.calibrate = 1;
			
 
				 
			
 
				 	ret = starpu_init(&conf);
			
--- a/tests/parallel_tasks/spmd_peager.c
+++ b/tests/parallel_tasks/spmd_peager.c
@@ -20,7 +20,7 @@
 
				 #include <unistd.h>
			
 
				 #include "../helper.h"
			
 
				 
			
 
				-#define N	1000	
			
 
				+#define N	1000
			
 
				 #define VECTORSIZE	1024
			
 
				 
			
 
				 starpu_data_handle_t v_handle;
			
@@ -33,10 +33,9 @@ void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 
				 	int worker_size = starpu_combined_worker_get_size();
			
 
				 	STARPU_ASSERT(worker_size > 0);
			
 
				 
			
 
				-	//FPRINTF(stderr, "WORKERSIZE : %d\n", worker_size);
			
 
				+//	FPRINTF(stderr, "WORKERSIZE : %d\n", worker_size);
			
 
				 
			
 
				 	usleep(1000/worker_size);
			
 
				-
			
 
				 #if 0
			
 
				 	int id = starpu_worker_get_id();
			
 
				 	int combined_id = starpu_combined_worker_get_id();
			
@@ -50,9 +49,9 @@ static struct starpu_codelet cl =
 
				 	.type = STARPU_SPMD,
			
 
				 	.max_parallelism = INT_MAX,
			
 
				 	.cpu_funcs = {codelet_null, NULL},
			
 
				+	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				 	.cuda_funcs = {codelet_null, NULL},
			
 
				         .opencl_funcs = {codelet_null, NULL},
			
 
				-	.cpu_funcs_name = {"codelet_null", NULL},
			
 
				 	.nbuffers = 1,
			
 
				 	.modes = {STARPU_R}
			
 
				 };
			
--- a/tests/sched_policies/execute_all_tasks.c
+++ b/tests/sched_policies/execute_all_tasks.c
@@ -26,7 +26,7 @@
 
				 
			
 
				 #define NTASKS           8
			
 
				 
			
 
				-static void
			
 
				+void
			
 
				 dummy(void *buffers[], void *args)
			
 
				 {
			
 
				 	(void) buffers;
			
@@ -50,6 +50,7 @@ run(struct starpu_sched_policy *p)
 
				 	struct starpu_codelet cl =
			
 
				 	{
			
 
				 		.cpu_funcs    = {dummy, NULL},
			
 
				+		.cpu_funcs_name = {"dummy", NULL},
			
 
				 		.cuda_funcs   = {dummy, NULL},
			
 
				 		.opencl_funcs = {dummy, NULL},
			
 
				 		.nbuffers     = 0