瀏覽代碼

mic: Fix bugs

Thibaud Lambert 12 年之前
父節點
當前提交
3b7abcc2cf
共有 40 個文件被更改,包括 417 次插入268 次删除
  1. 1 1
      configure.ac
  2. 1 1
      doc/doxygen/chapters/performance_feedback.doxy
  3. 7 1
      examples/stencil/stencil-tasks.c
  4. 129 127
      mpi/src/starpu_mpi.c
  5. 1 1
      src/common/uthash.h
  6. 13 2
      src/core/combined_workers.c
  7. 4 4
      src/core/debug.c
  8. 2 2
      src/core/dependencies/tags.c
  9. 2 0
      src/core/disk.h
  10. 1 1
      src/core/jobs.c
  11. 1 1
      src/core/sched_policy.c
  12. 1 5
      src/core/workers.c
  13. 1 0
      src/datawizard/data_request.c
  14. 0 2
      src/datawizard/filters.c
  15. 7 10
      src/datawizard/reduction.c
  16. 1 1
      src/drivers/driver_common/driver_common.c
  17. 24 42
      src/drivers/mic/driver_mic_sink.c
  18. 1 1
      src/drivers/mic/driver_mic_sink.h
  19. 1 1
      src/drivers/mic/driver_mic_source.c
  20. 1 1
      src/drivers/mic/driver_mic_source.h
  21. 50 3
      src/drivers/mp_common/mp_common.c
  22. 5 1
      src/drivers/mp_common/mp_common.h
  23. 20 10
      src/drivers/mp_common/sink_common.c
  24. 0 1
      src/drivers/mp_common/sink_common.h
  25. 109 27
      src/drivers/mp_common/source_common.c
  26. 5 0
      src/drivers/mp_common/source_common.h
  27. 6 0
      src/drivers/scc/driver_scc_sink.c
  28. 1 0
      src/drivers/scc/driver_scc_sink.h
  29. 1 1
      src/sched_policies/deque_modeling_policy_data_aware.c
  30. 2 2
      src/sched_policies/random_policy.c
  31. 1 1
      src/sched_policies/work_stealing_policy.c
  32. 1 1
      tests/datawizard/interfaces/coo/coo_interface.c
  33. 1 1
      tests/disk/disk_copy.c
  34. 1 0
      tests/main/starpu_init.c
  35. 0 1
      tests/microbenchs/async_tasks_overhead.c
  36. 2 2
      tests/parallel_tasks/explicit_combined_worker.c
  37. 2 2
      tests/parallel_tasks/parallel_kernels.c
  38. 6 6
      tests/parallel_tasks/parallel_kernels_spmd.c
  39. 3 4
      tests/parallel_tasks/spmd_peager.c
  40. 2 1
      tests/sched_policies/execute_all_tasks.c

+ 1 - 1
configure.ac

@@ -964,7 +964,7 @@ AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
 AC_MSG_CHECKING(maximum number of MIC threads)
 AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
 			[maximum number of MIC threads])],
-			nmaxmicthreads=$enableval, nmaxmicthreads=960)
+			nmaxmicthreads=$enableval, nmaxmicthreads=940)
 AC_MSG_RESULT($nmaxmicthread)
 
 AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],

+ 1 - 1
doc/doxygen/chapters/performance_feedback.doxy

@@ -10,7 +10,7 @@
 
 \section UsingTheTemanejoTaskDebugger Using The Temanejo Task Debugger
 
-StarPU can connect to Temanejo (see
+StarPU can connect to Temanejo >= 1.0rc2 (see
 http://www.hlrs.de/temanejo), to permit
 nice visual task debugging. To do so, build Temanejo's <c>libayudame.so</c>,
 install <c>Ayudame.h</c> to e.g. <c>/usr/local/include</c>, apply the

+ 7 - 1
examples/stencil/stencil-tasks.c

@@ -221,7 +221,8 @@ static struct starpu_codelet null =
 	.cpu_funcs_name = {"null_func", NULL},
 	.cuda_funcs = {null_func, NULL},
 	.opencl_funcs = {null_func, NULL},
-	.nbuffers = 2
+	.nbuffers = 2,
+	.name = "start"
 };
 
 void create_start_task(int z, int dir)
@@ -267,11 +268,15 @@ void create_tasks(int rank)
 	}
 
 	for (iter = 0; iter <= niter; iter++)
+	{
 	for (bz = 0; bz < nbz; bz++)
 	{
 		if ((iter > 0) && (get_block_mpi_node(bz) == rank))
 			create_task_update(iter, bz, rank);
 
+	}
+	for (bz = 0; bz < nbz; bz++)
+	{
 		if (iter != niter)
 		{
 			if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank))
@@ -281,6 +286,7 @@ void create_tasks(int rank)
 				create_task_save(iter, bz, -1, rank);
 		}
 	}
+	}
 }
 
 /*

+ 129 - 127
mpi/src/starpu_mpi.c

@@ -37,7 +37,8 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle,
 							int source, int mpi_tag, MPI_Comm comm,
 							unsigned detached, void (*callback)(void *), void *arg,
-							int sequential_consistency);
+							int sequential_consistency, int is_internal_req,
+							ssize_t psize);
 static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req);
 
 /* The list of requests that have been newly submitted by the application */
@@ -76,59 +77,60 @@ struct _starpu_mpi_copy_handle
  /*                                                      */
  /********************************************************/
 
-static struct _starpu_mpi_req *_starpu_mpi_req_hashmap = NULL;
+/** stores application requests for which data have not been received yet */
+static struct _starpu_mpi_req *_starpu_mpi_app_req_hashmap = NULL;
 /** stores data which have been received by MPI but have not been requested by the application */
 static struct _starpu_mpi_copy_handle *_starpu_mpi_copy_handle_hashmap = NULL;
 
-static struct _starpu_mpi_req* find_req(int mpi_tag)
+static struct _starpu_mpi_req* find_app_req(int mpi_tag)
 {
-	struct _starpu_mpi_req* req; // = malloc(sizeof(struct _starpu_mpi_req));
+	struct _starpu_mpi_req* req;
 
-	HASH_FIND_INT(_starpu_mpi_req_hashmap, &mpi_tag, req);
+	HASH_FIND_INT(_starpu_mpi_app_req_hashmap, &mpi_tag, req);
 
 	return req;
 }
 
-static void add_req(struct _starpu_mpi_req *req)
+static void add_app_req(struct _starpu_mpi_req *req)
 {
 	struct _starpu_mpi_req *test_req;
 
-	test_req = find_req(req->mpi_tag);
+	test_req = find_app_req(req->mpi_tag);
 
 	if (test_req == NULL)
 	{
-		HASH_ADD_INT(_starpu_mpi_req_hashmap, mpi_tag, req);
-		_STARPU_MPI_DEBUG(3, "Adding request %p with tag %d in the hashmap. \n", req, req->mpi_tag);
+		HASH_ADD_INT(_starpu_mpi_app_req_hashmap, mpi_tag, req);
+		_STARPU_MPI_DEBUG(3, "Adding request %p with tag %d in the application request hashmap. \n", req, req->mpi_tag);
 	}
 	else
 	{
-		_STARPU_MPI_DEBUG(3, "Error add_req : request %p with tag %d already in the hashmap. \n", req, req->mpi_tag);
+		_STARPU_MPI_DEBUG(3, "[Error] request %p with tag %d already in the application request hashmap. \n", req, req->mpi_tag);
 		int seq_const = starpu_data_get_sequential_consistency_flag(req->data_handle);
 		if (seq_const &&  req->sequential_consistency)
 		{
-			STARPU_ASSERT_MSG(!test_req, "Error add_req : request %p with tag %d wanted to be added to the hashmap, while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, test_req);
+			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap, while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, test_req);
 		}
 		else
 		{
-			STARPU_ASSERT_MSG(!test_req, "Error add_req : request %p with tag %d wanted to be added to the hashmap, while another request %p with the same tag is already in it. \n Sequential consistency isn't activated for this handle : you should want to add dependencies between requests for which the sequential consistency is deactivated.", req, req->mpi_tag, test_req);
+			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap, while another request %p with the same tag is already in it. \n Sequential consistency isn't activated for this handle : you should want to add dependencies between requests for which the sequential consistency is deactivated.", req, req->mpi_tag, test_req);
 		}
 	}
 }
 
-static void delete_req(struct _starpu_mpi_req *req)
+static void delete_app_req(struct _starpu_mpi_req *req)
 {
 	struct _starpu_mpi_req *test_req;
 
-	test_req = find_req(req->mpi_tag);
+	test_req = find_app_req(req->mpi_tag);
 
 	if (test_req != NULL)
 	{
-		HASH_DEL(_starpu_mpi_req_hashmap, req);
-		_STARPU_MPI_DEBUG(3, "Deleting request %p with tag %d from the hashmap. \n", req, req->mpi_tag);
+		HASH_DEL(_starpu_mpi_app_req_hashmap, req);
+		_STARPU_MPI_DEBUG(3, "Deleting application request %p with tag %d from the application request hashmap. \n", req, req->mpi_tag);
 	}
 	else
 	{
-		_STARPU_MPI_DEBUG(3, "Warning delete_req : request %p with tag %d isn't in the hashmap. \n", req, req->mpi_tag);
+		_STARPU_MPI_DEBUG(3, "[Warning] request %p with tag %d is NOT in the application request hashmap. \n", req, req->mpi_tag);
 	}
 }
 
@@ -219,7 +221,7 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 	req->is_internal_req = 0;
 	req->envelope = NULL;
 	req->sequential_consistency = 1;
- }
+}
 
  /********************************************************/
  /*                                                      */
@@ -232,8 +234,10 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 							       unsigned detached, void (*callback)(void *), void *arg,
 							       enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
 							       enum starpu_data_access_mode mode,
-							       int sequential_consistency)
- {
+							       int sequential_consistency,
+							       int is_internal_req,
+							       ssize_t psize)
+{
 
 	 _STARPU_MPI_LOG_IN();
 	 struct _starpu_mpi_req *req = malloc(sizeof(struct _starpu_mpi_req));
@@ -253,6 +257,8 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 	 req->callback_arg = arg;
 	 req->func = func;
 	 req->sequential_consistency = sequential_consistency;
+	 req->is_internal_req = is_internal_req;
+	 req->count = psize;
 
 	 /* Asynchronously request StarPU to fetch the data in main memory: when
 	  * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
@@ -354,7 +360,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 							unsigned detached, void (*callback)(void *), void *arg,
 							int sequential_consistency)
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R, sequential_consistency);
+	return _starpu_mpi_isend_irecv_common(data_handle, dest, mpi_tag, comm, detached, callback, arg, SEND_REQ, _starpu_mpi_isend_size_func, STARPU_R, sequential_consistency, 0, 0);
 }
 
 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
@@ -429,9 +435,9 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 	_STARPU_MPI_LOG_OUT();
 }
 
-static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg, int sequential_consistency)
+static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, unsigned detached, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, ssize_t psize)
 {
-	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W, sequential_consistency);
+	return _starpu_mpi_isend_irecv_common(data_handle, source, mpi_tag, comm, detached, callback, arg, RECV_REQ, _starpu_mpi_irecv_data_func, STARPU_W, sequential_consistency, is_internal_req, psize);
 }
 
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
@@ -447,7 +453,7 @@ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_re
 		starpu_data_set_tag(data_handle, mpi_tag);
 
 	struct _starpu_mpi_req *req;
-	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL, 1);
+	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL, 1, 0, 0);
 
 	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common");
 	*public_req = req;
@@ -467,7 +473,7 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 	if (tag == -1)
 		starpu_data_set_tag(data_handle, mpi_tag);
 
-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, 1);
+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, 1, 0, 0);
 	_STARPU_MPI_LOG_OUT();
 	return 0;
 }
@@ -475,7 +481,8 @@ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int
 int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)
 {
 	_STARPU_MPI_LOG_IN();
-	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, sequential_consistency);
+
+	_starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 1, callback, arg, sequential_consistency, 0, 0);
 
 	_STARPU_MPI_LOG_OUT();
 	return 0;
@@ -766,58 +773,54 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
 	_STARPU_MPI_LOG_IN();
 
-	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n",
-			  req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
+	_STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d internal_req %p\n",
+			  req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr,
+			  _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype, req->internal_req);
 
-	if (req->request_type == RECV_REQ || req->request_type == SEND_REQ)
+	if (req->internal_req)
 	{
-		if (req->user_datatype == 1)
-		{
-			if (req->request_type == SEND_REQ)
-			{
-				// We need to make sure the communication for sending the size
-				// has completed, as MPI can re-order messages, let's call
-				// MPI_Wait to make sure data have been sent
-				ret = MPI_Wait(&req->size_req, MPI_STATUS_IGNORE);
-				STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %d", ret);
-
-			}
-			if (req->request_type == RECV_REQ)
-				// req->ptr is freed by starpu_data_unpack
-				starpu_data_unpack(req->data_handle, req->ptr, req->count);
-			else
-				free(req->ptr);
-		}
-		else
+		struct _starpu_mpi_copy_handle *chandle = find_chandle(starpu_data_get_tag(req->data_handle));
+		_STARPU_MPI_DEBUG(3, "Handling deleting of copy_handle structure from the hashmap..\n");
+		delete_chandle(chandle);
+		free(chandle);
+	}
+	else
+	{
+		if (req->request_type == RECV_REQ || req->request_type == SEND_REQ)
 		{
-			struct _starpu_mpi_copy_handle *chandle = find_chandle(starpu_data_get_tag(req->data_handle));
-			if (chandle && (req->data_handle != chandle->handle))
+			if (req->user_datatype == 1)
 			{
-				_STARPU_MPI_DEBUG(3, "Handling deleting of copy_handle structure from the hashmap..\n");
-				delete_chandle(chandle);
-				free(chandle);
+				if (req->request_type == SEND_REQ)
+				{
+					// We need to make sure the communication for sending the size
+					// has completed, as MPI can re-order messages, let's call
+					// MPI_Wait to make sure data have been sent
+					ret = MPI_Wait(&req->size_req, MPI_STATUS_IGNORE);
+					STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %d", ret);
+					free(req->ptr);
+				}
+				if (req->request_type == RECV_REQ)
+				{
+					// req->ptr is freed by starpu_data_unpack
+					starpu_data_unpack(req->data_handle, req->ptr, req->count);
+				}
 			}
 			else
 			{
-				_STARPU_MPI_DEBUG(3, "NOT deleting chandle %p from hashmap (tag %d %d)\n", chandle, req->mpi_tag, starpu_data_get_tag(req->data_handle));
 				_starpu_mpi_handle_free_datatype(req->data_handle, &req->datatype);
 			}
 		}
-		starpu_data_release(req->data_handle);
 	}
 
+	if (req->data_handle)
+		starpu_data_release(req->data_handle);
+
 	if (req->envelope)
 	{
 		free(req->envelope);
 		req->envelope = NULL;
 	}
 
-	if (req->internal_req)
-	{
-		free(req->internal_req);
-		req->internal_req = NULL;
-	}
-
 	/* Execute the specified callback, if any */
 	if (req->callback)
 		req->callback(req->callback_arg);
@@ -869,12 +872,13 @@ static void _starpu_mpi_copy_cb(void* arg)
 	starpu_data_unregister_submit(args->copy_handle);
 
 	_STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req);
+	// If the request is detached, we need to call _starpu_mpi_handle_request_termination
+	// as it will not be called automatically as the request is not in the list detached_requests
 	if (args->req->detached)
 		_starpu_mpi_handle_request_termination(args->req);
 	// else: If the request is not detached its termination will
 	// be handled when calling starpu_mpi_wait
 
-
 	free(args);
 }
 
@@ -891,82 +895,78 @@ static void _starpu_mpi_submit_new_mpi_request(void *arg)
 
 	if (req->request_type == RECV_REQ)
 	{
-		/* test whether the receive request has already been submitted internally by StarPU-MPI*/
-		struct _starpu_mpi_copy_handle *chandle = find_chandle(req->mpi_tag);
-
-		/* Case : the request has already been submitted internally by StarPU.
-		 * We'll asynchronously ask a Read permission over the temporary handle, so as when
-		 * the internal receive will be over, the _starpu_mpi_copy_cb function will be called to
-		 * bring the data back to the original data handle associated to the request.*/
-		if (chandle && (req->data_handle != chandle->handle))
+		/* Case : the request is the internal receive request submitted by StarPU-MPI to receive
+		 * incoming data without a matching pending receive already submitted by the application.
+		 * We immediately allocate the pointer associated to the data_handle, and pushing it into
+		 * the list of new_requests, so as the real MPI request can be submitted before the next
+		 * submission of the envelope-catching request. */
+		if (req->is_internal_req)
 		{
-			_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->mpi_tag);
-
-			req->internal_req = chandle->req;
+			_starpu_mpi_handle_allocate_datatype(req->data_handle, &req->datatype, &req->user_datatype);
+			if (req->user_datatype == 0)
+			{
+				req->count = 1;
+				req->ptr = starpu_data_get_local_ptr(req->data_handle);
+			}
+			else
+			{
+				STARPU_ASSERT(req->count);
+				req->ptr = malloc(req->count);
+				STARPU_ASSERT_MSG(req->ptr, "cannot allocate message of size %ld\n", req->count);
+			}
 
-			struct _starpu_mpi_copy_cb_args *cb_args = malloc(sizeof(struct _starpu_mpi_copy_cb_args));
-			cb_args->data_handle = req->data_handle;
-			cb_args->copy_handle = chandle->handle;
-			cb_args->req = req;
+			_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
+			_starpu_mpi_req_list_push_front(new_requests, req);
 
-			_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
-			starpu_data_acquire_cb(chandle->handle,STARPU_R,_starpu_mpi_copy_cb,(void*) cb_args);
+			/* inform the starpu mpi thread that the request has beenbe pushed in the new_requests list */
+			STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+			STARPU_PTHREAD_MUTEX_LOCK(&req->posted_mutex);
+			req->posted = 1;
+			STARPU_PTHREAD_COND_BROADCAST(&req->posted_cond);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&req->posted_mutex);
+			STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 		}
 		else
 		{
-			/* Case : the request is the internal receive request submitted by StarPU-MPI to receive
-			 * incoming data without a matching pending receive already submitted by the application.
-			 * We immediately allocate the pointer associated to the data_handle, and pushing it into
-			 * the list of new_requests, so as the real MPI request can be submitted before the next
-			 * submission of the envelope-catching request. */
-			if (chandle && (req->data_handle == chandle->handle))
+			/* test whether the receive request has already been submitted internally by StarPU-MPI*/
+			struct _starpu_mpi_copy_handle *chandle = find_chandle(req->mpi_tag);
+
+			/* Case : the request has already been submitted internally by StarPU.
+			 * We'll asynchronously ask a Read permission over the temporary handle, so as when
+			 * the internal receive will be over, the _starpu_mpi_copy_cb function will be called to
+			 * bring the data back to the original data handle associated to the request.*/
+			if (chandle)
 			{
-				_starpu_mpi_handle_allocate_datatype(req->data_handle, &req->datatype, &req->user_datatype);
-				if (req->user_datatype == 0)
-				{
-					req->count = 1;
-					req->ptr = starpu_data_get_local_ptr(req->data_handle);
-				}
-				else
-				{
-					req->count = chandle->env->psize;
-					req->ptr = malloc(req->count);
+				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->mpi_tag);
+				STARPU_ASSERT(req->data_handle != chandle->handle);
 
-					STARPU_ASSERT_MSG(req->ptr, "cannot allocate message of size %ld\n", req->count);
-				}
+				req->internal_req = chandle->req;
 
-				_STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
-				_starpu_mpi_req_list_push_front(new_requests, req);
+				struct _starpu_mpi_copy_cb_args *cb_args = malloc(sizeof(struct _starpu_mpi_copy_cb_args));
+				cb_args->data_handle = req->data_handle;
+				cb_args->copy_handle = chandle->handle;
+				cb_args->req = req;
 
-				/* inform the starpu mpi thread that the request has beenbe pushed in the new_requests list */
-				STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-				STARPU_PTHREAD_MUTEX_LOCK(&req->posted_mutex);
-				req->posted = 1;
-				STARPU_PTHREAD_COND_BROADCAST(&req->posted_cond);
-				STARPU_PTHREAD_MUTEX_UNLOCK(&req->posted_mutex);
-				STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+				_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
+				starpu_data_acquire_cb(chandle->handle,STARPU_R,_starpu_mpi_copy_cb,(void*) cb_args);
 			}
 			/* Case : a classic receive request with no send received earlier than expected.
 			 * We just add the pending receive request to the requests' hashmap. */
 			else
 			{
-				add_req(req);
+				add_app_req(req);
 			}
-
-			newer_requests = 1;
-			STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
 		}
 	}
 	else
 	{
 		_starpu_mpi_req_list_push_front(new_requests, req);
-
-		newer_requests = 1;
 		_STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n",
 				  req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
-		STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
 	}
 
+	newer_requests = 1;
+	STARPU_PTHREAD_COND_BROADCAST(&cond_progression);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 	_STARPU_MPI_LOG_OUT();
 }
@@ -1043,6 +1043,9 @@ static void _starpu_mpi_test_detached_requests(void)
 		if (flag)
 		{
 			_starpu_mpi_req_list_erase(detached_requests, req);
+#ifdef STARPU_DEVEL
+#warning FIXME: when do we free internal requests
+#endif
 			if (!req->is_internal_req)
 				free(req);
 		}
@@ -1135,12 +1138,12 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	}
 
 	{
-	     int rank, worldsize;
-	     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	     MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
-	     TRACE_MPI_START(rank, worldsize);
+		int rank, worldsize;
+		MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+		MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
+		TRACE_MPI_START(rank, worldsize);
 #ifdef STARPU_USE_FXT
-	     starpu_profiling_set_id(rank);
+		starpu_profiling_set_id(rank);
 #endif //STARPU_USE_FXT
 	}
 
@@ -1159,7 +1162,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	while (running || posted_requests || !(_starpu_mpi_req_list_empty(new_requests)) || !(_starpu_mpi_req_list_empty(detached_requests)))
 	{
 		/* shall we block ? */
-		unsigned block = _starpu_mpi_req_list_empty(new_requests) && (HASH_COUNT(_starpu_mpi_req_hashmap) == 0);
+		unsigned block = _starpu_mpi_req_list_empty(new_requests) && (HASH_COUNT(_starpu_mpi_app_req_hashmap) == 0);
 
 #ifndef STARPU_MPI_ACTIVITY
 		STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
@@ -1199,7 +1202,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		/* If there is no currently submitted header_req submitted to catch envelopes from senders, and there is some pending receive
 		 * requests in our side, we resubmit a header request. */
 		MPI_Request header_req;
-		if ((HASH_COUNT(_starpu_mpi_req_hashmap) > 0) && (header_req_submitted == 0))// && (HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0))
+		if ((HASH_COUNT(_starpu_mpi_app_req_hashmap) > 0) && (header_req_submitted == 0))// && (HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0))
 		{
 			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop\n");
 			MPI_Irecv(recv_env, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _starpu_mpi_tag, MPI_COMM_WORLD, &header_req);
@@ -1223,9 +1226,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
 			if (flag)
 			{
-				_STARPU_MPI_DEBUG(3, "Searching for request with tag %d (size %ld)\n", recv_env->mpi_tag, recv_env->psize);
+				_STARPU_MPI_DEBUG(3, "Searching for application request with tag %d (size %ld)\n", recv_env->mpi_tag, recv_env->psize);
 
-				struct _starpu_mpi_req *found_req = find_req(recv_env->mpi_tag);
+				struct _starpu_mpi_req *found_req = find_app_req(recv_env->mpi_tag);
 
 				/* Case : a data will arrive before the matching receive has been submitted in our side of the application.
 				 * We will allow a temporary handle to store the incoming data, by submitting a starpu_mpi_irecv_detached
@@ -1253,8 +1256,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					add_chandle(chandle);
 
 					_STARPU_MPI_DEBUG(3, "Posting internal detached irecv on copy_handle with tag %d from src %d ..\n", chandle->mpi_tag, status.MPI_SOURCE);
-					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1);
-					chandle->req->is_internal_req = 1;
+					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1, 1, recv_env->psize);
 
 					// We wait until the request is pushed in the
 					// new_request list, that ensures that the next loop
@@ -1272,9 +1274,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 				 * the data handle, then submit the corresponding receive with _starpu_mpi_handle_new_request. */
 				else
 				{
-					_STARPU_MPI_DEBUG(3, "Found !\n");
+					_STARPU_MPI_DEBUG(3, "A matching receive has been found for the incoming data with tag %d\n", recv_env->mpi_tag);
 
-					delete_req(found_req);
+					delete_app_req(found_req);
 
 					_starpu_mpi_handle_allocate_datatype(found_req->data_handle, &found_req->datatype, &found_req->user_datatype);
 					if (found_req->user_datatype == 0)
@@ -1311,8 +1313,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(detached_requests), "List of detached requests not empty");
 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(new_requests), "List of new requests not empty");
 	STARPU_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero");
-	STARPU_ASSERT_MSG(HASH_COUNT(_starpu_mpi_req_hashmap) == 0, "Number of receive requests left is not zero");
-
+	STARPU_ASSERT_MSG(HASH_COUNT(_starpu_mpi_app_req_hashmap) == 0, "Number of receive requests left is not zero");
+	STARPU_ASSERT_MSG(HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0, "Number of copy requests left is not zero");
 	if (argc_argv->initialize_mpi)
 	{
 		_STARPU_MPI_DEBUG(3, "Calling MPI_Finalize()\n");

+ 1 - 1
src/common/uthash.h

@@ -229,7 +229,7 @@ do {
 #define HASH_FIND_STR(head,findstr,out)                                          \
     HASH_FIND(hh,head,findstr,strlen(findstr),out)
 #define HASH_ADD_STR(head,strfield,add)                                          \
-    HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
+    HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add)
 #define HASH_FIND_INT(head,findint,out)                                          \
     HASH_FIND(hh,head,findint,sizeof(int),out)
 #define HASH_ADD_INT(head,intfield,add)                                          \

+ 13 - 2
src/core/combined_workers.c

@@ -100,8 +100,19 @@ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[])
 		&config->combined_workers[combined_worker_id];
 
 	combined_worker->worker_size = nworkers;
-	combined_worker->perf_arch = (enum starpu_perfmodel_archtype) (STARPU_CPU_DEFAULT + nworkers - 1);
-	combined_worker->worker_mask = STARPU_CPU;
+
+#ifdef STARPU_USE_MIC
+	if(config->workers[workerid_array[0]].worker_mask == STARPU_MIC)
+	{
+		combined_worker->perf_arch = (enum starpu_perfmodel_archtype) (STARPU_MIC_DEFAULT + config->workers[workerid_array[0]].mp_nodeid /* *STARPU_MAXMICCPUS + nworkers - 1*/);
+		combined_worker->worker_mask = STARPU_MIC;
+	}
+#endif
+	if(config->workers[workerid_array[0]].worker_mask == STARPU_CPU)
+	{
+		combined_worker->perf_arch = (enum starpu_perfmodel_archtype) (STARPU_CPU_DEFAULT + nworkers - 1);
+		combined_worker->worker_mask = STARPU_CPU;
+	}
 	combined_worker->count = nworkers -1;
 	pthread_mutex_init(&combined_worker->count_mutex,NULL);
 

+ 4 - 4
src/core/debug.c

@@ -82,7 +82,7 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 	unsigned i;
 	const char *name;
 	if (!cl)
-		return -1;
+		return 0;
 	name = _starpu_codelet_get_model_name(cl);
 	STARPU_PTHREAD_MUTEX_LOCK(&ayudame_mutex);
 	for (i=0; i < ncodelets; i++)
@@ -92,7 +92,7 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 				((name && codelets[i].name) && !strcmp(codelets[i].name, name))))
 		{
 			STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex);
-			return i;
+			return i + 1;
 		}
 	}
 	if (ncodelets == ncodelets_alloc)
@@ -111,8 +111,8 @@ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl)
 		codelets[ncodelets].name = NULL;
 	i = ncodelets++;
 	if (name)
-		AYU_event(AYU_REGISTERFUNCTION, i, (void*) name);
+		AYU_event(AYU_REGISTERFUNCTION, i+1, (void*) name);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex);
-	return i;
+	return i + 1;
 }
 #endif

+ 2 - 2
src/core/dependencies/tags.c

@@ -203,7 +203,7 @@ static struct _starpu_tag *_gettag_struct(starpu_tag_t id)
 #ifdef HAVE_AYUDAME_H
 		if (AYU_event)
 		{
-			int64_t AYU_data[2] = {-1, 0};
+			int64_t AYU_data[2] = {0, 0};
 			STARPU_ASSERT(id < AYUDAME_OFFSET);
 			AYU_event(AYU_ADDTASK, id + AYUDAME_OFFSET, AYU_data);
 		}
@@ -244,7 +244,7 @@ void _starpu_tag_set_ready(struct _starpu_tag *tag)
 #ifdef HAVE_AYUDAME_H
 	if (AYU_event)
 	{
-		int id = -1;
+		intptr_t id = 0;
 		AYU_event(AYU_PRERUNTASK, tag->id + AYUDAME_OFFSET, &id);
 		AYU_event(AYU_POSTRUNTASK, tag->id + AYUDAME_OFFSET, NULL);
 	}

+ 2 - 0
src/core/disk.h

@@ -23,6 +23,8 @@
 #define STARPU_DISK_ALL 1
 #define STARPU_DISK_NO_RECLAIM 2
 
+#include <datawizard/copy_driver.h>
+
 /* interface to manipulate memory disk */
 void * _starpu_disk_alloc (unsigned node, size_t size);
 

+ 1 - 1
src/core/jobs.c

@@ -284,7 +284,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 #ifdef HAVE_AYUDAME_H
 		if (AYU_event)
 		{
-			int64_t AYU_data[2] = {j->exclude_from_dag?-1:_starpu_ayudame_get_func_id(task->cl), task->priority > STARPU_MIN_PRIO};
+			int64_t AYU_data[2] = {j->exclude_from_dag?0:_starpu_ayudame_get_func_id(task->cl), task->priority > STARPU_MIN_PRIO};
 			AYU_event(AYU_ADDTASK, j->job_id, AYU_data);
 		}
 #endif

+ 1 - 1
src/core/sched_policy.c

@@ -326,7 +326,7 @@ int _starpu_push_task(struct _starpu_job *j)
 #ifdef HAVE_AYUDAME_H
 	if (AYU_event)
 	{
-		int id = -1;
+		intptr_t id = -1;
 		AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
 	}
 #endif

+ 1 - 5
src/core/workers.c

@@ -437,11 +437,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 #endif
 
 #ifdef HAVE_AYUDAME_H
-	if (AYU_event)
-	{
-		unsigned long n = nworkers;
-		AYU_event(AYU_INIT, 0, (void*) &n);
-	}
+	if (AYU_event) AYU_event(AYU_INIT, 0, NULL);
 #endif
 	for (worker = 0; worker < nworkers; worker++)
 	{

+ 1 - 0
src/datawizard/data_request.c

@@ -358,6 +358,7 @@ static int starpu_handle_data_request(struct _starpu_data_request *r, unsigned m
 	/* perform the transfer */
 	/* the header of the data must be locked by the worker that submitted the request */
 
+
 	r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate,
 						    dst_replicate, !(r_mode & STARPU_R), r, may_alloc);
 

+ 0 - 2
src/datawizard/filters.c

@@ -258,9 +258,7 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 		void *ptr;
 		ptr = starpu_data_handle_to_pointer(child, 0);
 		if (ptr != NULL)
-		{
 			_starpu_data_register_ram_pointer(child, ptr);
-		}
 	}
 	/* now let the header */
 	_starpu_spin_unlock(&initial_handle->header_lock);

+ 7 - 10
src/datawizard/reduction.c

@@ -86,19 +86,16 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 #ifdef STARPU_USE_MIC
 	if (starpu_worker_get_type(workerid) == STARPU_MIC_WORKER)
 	{
-		const struct _starpu_mp_node *node = _starpu_mic_src_get_actual_thread_mp_node();
-		enum _starpu_mp_command answer;
-		void *arg = NULL;
-		int arg_size = 0;
-
-		// XXX: give the correct coreid.
-	       _starpu_src_common_execute_kernel(node,
-						 (void(*)(void))init_func, 0,
+		struct _starpu_mp_node *node = _starpu_mic_src_get_actual_thread_mp_node();
+		int devid = _starpu_get_worker_struct(workerid)->devid;
+		void * arg;
+		int arg_size;
+		_starpu_src_common_execute_kernel(node,
+						 (void(*)(void))init_func, devid,
 						 STARPU_SEQ, 0, 0, &handle, 
 						 &(replicate->data_interface), 1,
 						 NULL, 0);
-		answer = _starpu_mp_common_recv_command (node, &arg, &arg_size);
-		STARPU_ASSERT (answer == STARPU_EXECUTION_COMPLETED);
+		_starpu_src_common_wait_completed_execution(node,devid,&arg,&arg_size);
 	}
 	else
 #endif

+ 1 - 1
src/drivers/driver_common/driver_common.c

@@ -258,7 +258,7 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int wor
 #ifdef HAVE_AYUDAME_H
 	if (AYU_event)
 	{
-		int id = workerid;
+		intptr_t id = workerid;
 		AYU_event(AYU_PRERUNTASK, _starpu_get_job_associated_to_task(task)->job_id, &id);
 	}
 #endif

+ 24 - 42
src/drivers/mic/driver_mic_sink.c

@@ -33,11 +33,8 @@
  */
 void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 {
-	pthread_t thread, self;
+	pthread_t self;
 	cpu_set_t cpuset;
-	pthread_attr_t attr;
-	int i, ret;
-	struct arg_sink_thread * arg;
 
 	/*Bind on the first core*/
 	self = pthread_self();
@@ -53,29 +50,35 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 	_starpu_mic_common_accept(&node->host_sink_dt_connection.mic_endpoint,
 									 STARPU_MIC_SOURCE_DT_PORT_NUMBER);
 	
-	node->is_running = 1;
-
 	node->nb_cores = COISysGetHardwareThreadCount() - COISysGetHardwareThreadCount() / COISysGetCoreCount();
 	node->thread_table = malloc(sizeof(pthread_t)*node->nb_cores);
 
-	node->run_table = malloc(sizeof(struct mp_task *)*node->nb_cores);
-	node->sem_run_table = malloc(sizeof(sem_t)*node->nb_cores);
-
-	node->barrier_list = mp_barrier_list_new();
-	node->message_queue = mp_message_list_new();
-	STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL);
-	STARPU_PTHREAD_MUTEX_INIT(&node->barrier_mutex,NULL);
+	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
 
-	STARPU_PTHREAD_BARRIER_INIT(&node->init_completed_barrier, NULL, node->nb_cores+1);
+	//for (i = 0; i < (unsigned int)node->devid; ++i)
+	//	_starpu_mic_common_connect(&node->sink_sink_dt_connections[i].mic_endpoint,
+	//								STARPU_TO_MIC_ID(i),
+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i),	
+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(i, node->devid));
 
+	//for (i = node->devid + 1; i < node->nb_mp_sinks; ++i)
+	//	_starpu_mic_common_accept(&node->sink_sink_dt_connections[i].mic_endpoint,
+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i));
+}
 
+/* Launch all workers on the mic
+ */
+void _starpu_mic_sink_launch_workers(struct _starpu_mp_node *node)
+{
+	int i, ret;
+	struct arg_sink_thread * arg;
+	cpu_set_t cpuset;
+	pthread_attr_t attr;
+	pthread_t thread;
+	
 	/*for each core init the mutex, the task pointer and launch the thread */
 	for(i=0; i<node->nb_cores; i++)
 	{
-		node->run_table[i] = NULL;
-
-		sem_init(&node->sem_run_table[i],0,0);
-
 		//init the set
 		CPU_ZERO(&cpuset);
 		CPU_SET(i,&cpuset);
@@ -89,24 +92,12 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 		arg= malloc(sizeof(struct arg_sink_thread));
 		arg->coreid = i;
 		arg->node = node;
-		arg->sem = &node->sem_run_table[i];
 		
 		ret = pthread_create(&thread, &attr, _starpu_sink_thread, arg);
-		((pthread_t *)node->thread_table)[i] = thread;
 		STARPU_ASSERT(ret == 0);
+		((pthread_t *)node->thread_table)[i] = thread;
 	}
 
-	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
-
-	//for (i = 0; i < (unsigned int)node->devid; ++i)
-	//	_starpu_mic_common_connect(&node->sink_sink_dt_connections[i].mic_endpoint,
-	//								STARPU_TO_MIC_ID(i),
-	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i),	
-	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(i, node->devid));
-
-	//for (i = node->devid + 1; i < node->nb_mp_sinks; ++i)
-	//	_starpu_mic_common_accept(&node->sink_sink_dt_connections[i].mic_endpoint,
-	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i));
 }
 
 /* Deinitialize the MIC sink, close all the connections.
@@ -120,19 +111,13 @@ void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
 	{
 		sem_post(&node->sem_run_table[i]);
 		pthread_join(((pthread_t *)node->thread_table)[i],NULL);
-		sem_destroy(&node->sem_run_table[i]);
 	}
 
 	free(node->thread_table);
-	free(node->run_table);
-	free(node->sem_run_table);
 
-	mp_barrier_list_delete(node->barrier_list);
-	mp_message_list_delete(node->message_queue);
+	scif_close(node->host_sink_dt_connection.mic_endpoint);
+	scif_close(node->mp_connection.mic_endpoint);
 
-	STARPU_PTHREAD_MUTEX_DESTROY(&node->message_queue_mutex);
-	STARPU_PTHREAD_MUTEX_DESTROY(&node->barrier_mutex);
-	STARPU_PTHREAD_BARRIER_DESTROY(&node->init_completed_barrier);
 	//unsigned int i;
 
 	//for (i = 0; i < node->nb_mp_sinks; ++i)
@@ -143,14 +128,11 @@ void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
 
 	//free(node->sink_sink_dt_connections);
 
-	scif_close(node->host_sink_dt_connection.mic_endpoint);
-	scif_close(node->mp_connection.mic_endpoint);
 }
 
 /* Report an error which occured when using a MIC device
  * and print this error in a human-readable style
  */
-
 void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status)
 {
 	const char *errormsg = strerror(status);

+ 1 - 1
src/drivers/mic/driver_mic_sink.h

@@ -34,7 +34,7 @@
 void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status);
 
 void _starpu_mic_sink_init(struct _starpu_mp_node *node);
-
+void _starpu_mic_sink_launch_workers(struct _starpu_mp_node *node);
 void _starpu_mic_sink_deinit(struct _starpu_mp_node *node);
 
 void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size);

+ 1 - 1
src/drivers/mic/driver_mic_source.c

@@ -73,7 +73,7 @@ starpu_pthread_mutex_t nb_mic_worker_init_mutex = PTHREAD_MUTEX_INITIALIZER;
 //	return config->workers[workerid].devid;
 //}
 
-const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
+struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
 {
 	struct _starpu_worker *actual_worker = _starpu_get_local_worker_key();
 	STARPU_ASSERT(actual_worker);

+ 1 - 1
src/drivers/mic/driver_mic_source.h

@@ -42,7 +42,7 @@ struct _starpu_mic_async_event *event;
 #define STARPU_MIC_SRC_REPORT_SCIF_ERROR(status) \
 	_starpu_mic_src_report_scif_error(__starpu_func__, __FILE__, __LINE__, status)
 
-const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node();
+struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node();
 const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node);
 
 void(* _starpu_mic_src_get_kernel_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void);

+ 50 - 3
src/drivers/mp_common/mp_common.c

@@ -53,6 +53,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 		node->devid = peer_id;
 
 		node->init = _starpu_mic_src_init;
+		node->launch_workers= NULL;
 		node->deinit = _starpu_mic_src_deinit;
 		node->report_error = _starpu_mic_src_report_scif_error;
 
@@ -77,6 +78,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 		node->nb_mp_sinks = atoi(getenv("NB_MIC"));
 
 		node->init = _starpu_mic_sink_init;
+		node->launch_workers = _starpu_mic_sink_launch_workers;
 		node->deinit = _starpu_mic_sink_deinit;
 		node->report_error = _starpu_mic_sink_report_error;
 
@@ -102,6 +104,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 	{
 		node->init = _starpu_scc_src_init;
 		node->deinit = NULL;
+		node->deinit = NULL;
 		node->report_error = _starpu_scc_common_report_rcce_error;
 				
 		node->mp_recv_is_ready = _starpu_scc_common_recv_is_ready;
@@ -124,6 +127,7 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 	case STARPU_SCC_SINK:
 	{
 		node->init = _starpu_scc_sink_init;
+		node->launch_workers = _starpu_scc_sink_launch_workers;
 		node->deinit = _starpu_scc_sink_deinit;
 		node->report_error = _starpu_scc_common_report_rcce_error;
 
@@ -166,15 +170,60 @@ _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
 	if (node->init)
 		node->init(node);
 
+	node->message_queue = mp_message_list_new();
+	STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL);
+
+	/* If the node is a sink then we must initialize some field */
+	if(node->kind == STARPU_MIC_SINK || node->kind == STARPU_SCC_SINK)
+	{
+		int i;
+		node->is_running = 1;
+		node->run_table = malloc(sizeof(struct mp_task *)*node->nb_cores);
+		node->sem_run_table = malloc(sizeof(sem_t)*node->nb_cores);
+
+		for(i=0; i<node->nb_cores; i++)
+		{
+			node->run_table[i] = NULL;
+			sem_init(&node->sem_run_table[i],0,0);
+		}
+		node->barrier_list = mp_barrier_list_new();
+		STARPU_PTHREAD_MUTEX_INIT(&node->barrier_mutex,NULL);
+
+		STARPU_PTHREAD_BARRIER_INIT(&node->init_completed_barrier, NULL, node->nb_cores+1);
+
+		node->launch_workers(node);
+	}	
+
+
 	return node;
 }
 
 /* Deinitialize the sink structure and release the structure */
-
 void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node)
 {
 	if (node->deinit)
 		node->deinit(node);
+		
+	mp_message_list_delete(node->message_queue);
+	STARPU_PTHREAD_MUTEX_DESTROY(&node->message_queue_mutex);
+
+	/* If the node is a sink then we must destroy some field */
+	if(node->kind == STARPU_MIC_SINK || node->kind == STARPU_SCC_SINK)
+	{
+		int i;
+		for(i=0; i<node->nb_cores; i++)
+		{
+			sem_destroy(&node->sem_run_table[i]);
+		}
+
+		free(node->run_table);
+		free(node->sem_run_table);
+
+		mp_barrier_list_delete(node->barrier_list);
+
+		STARPU_PTHREAD_MUTEX_DESTROY(&node->barrier_mutex);
+		STARPU_PTHREAD_BARRIER_DESTROY(&node->init_completed_barrier);
+	}
 
 	free(node->buffer);
 
@@ -182,7 +231,6 @@ void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node)
 }
 
 /* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */
-
 void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
 				    const enum _starpu_mp_command command,
 				    void *arg, int arg_size)
@@ -209,7 +257,6 @@ void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
  * However, the data pointed by arg shouldn't be relied on after a new call to
  * STARPU_MP_COMMON_RECV_COMMAND as it might corrupt it.
  */
-
 enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node,
 						       void **arg, int *arg_size)
 {

+ 5 - 1
src/drivers/mp_common/mp_common.h

@@ -25,6 +25,8 @@
 #include <common/list.h>
 #include <common/barrier.h>
 #include <common/thread.h>
+#include <datawizard/interfaces/data_interface.h>
+
 #ifdef STARPU_USE_MP
 
 #ifdef STARPU_USE_MIC
@@ -113,7 +115,8 @@ LIST_TYPE(mp_message,
 struct mp_task 
 {
 	void (*kernel)(void **, void *);
-	void *interfaces[STARPU_NMAXBUFS]; 
+	void * interfaces[STARPU_NMAXBUFS]; 
+	unsigned nb_interfaces;
 	void *cl_arg;
 	unsigned coreid;
 	enum starpu_codelet_type type;
@@ -194,6 +197,7 @@ struct _starpu_mp_node
 
 	/* Node general functions */
 	void (*init)(struct _starpu_mp_node *node);
+	void (*launch_workers)(struct _starpu_mp_node *node);
 	void (*deinit)(struct _starpu_mp_node *node);
 	void (*report_error)(const char *, const char *, const int, const int);
 

+ 20 - 10
src/drivers/mp_common/sink_common.c

@@ -251,6 +251,7 @@ void _starpu_sink_common_worker(void)
 	STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
 
 
+	struct _starpu_machine_config *config;
 	while (!exit_starpu)
 	{
 		/* If we have received a message */
@@ -264,6 +265,7 @@ void _starpu_sink_common_worker(void)
 					exit_starpu = 1;
 					break;
 				case STARPU_EXECUTE:
+					config = _starpu_get_machine_config();
 					node->execute(node, arg, arg_size);
 					break;
 				case STARPU_SINK_NBCORES:
@@ -305,21 +307,22 @@ void _starpu_sink_common_worker(void)
 			}
 		}
 
-		pthread_mutex_lock(&node->message_queue_mutex);
+		STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
 		/* If the list is not empty */
 		if(!mp_message_list_empty(node->message_queue))
 		{
 			/* We pop a message and send it to the host */
 			struct mp_message * message = mp_message_list_pop_back(node->message_queue);
-			pthread_mutex_unlock(&node->message_queue_mutex);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
 			//_STARPU_DEBUG("telling host that we have finished the task %p sur %d.\n", task->kernel, task->coreid);
+			config = _starpu_get_machine_config();
 			_starpu_mp_common_send_command(node, message->type, 
 					&message->buffer, message->size);
 			mp_message_delete(message);
 		}
 		else
 		{
-			pthread_mutex_unlock(&node->message_queue_mutex);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
 		}
 	}
 
@@ -376,6 +379,7 @@ static void _starpu_sink_common_erase_barrier(struct _starpu_mp_node * node, str
  */
 static void _starpu_sink_common_append_message(struct _starpu_mp_node *node, struct mp_message * message)
 {
+	struct _starpu_machine_config *config = _starpu_get_machine_config();
 	STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
 	mp_message_list_push_front(node->message_queue,message);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
@@ -506,6 +510,10 @@ static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int
 	/* tell the sink that the execution is completed */
 	_starpu_sink_common_execution_completed_message(node,task);
 
+	/*free the task*/
+	unsigned i;
+	for (i = 0; i < task->nb_interfaces; i++)
+		free(task->interfaces[i]);
 	free(task);
 
 }
@@ -518,7 +526,6 @@ void* _starpu_sink_thread(void * thread_arg)
 {
 	/* Retrieve the information from the structure */
 	struct _starpu_mp_node *node = ((struct arg_sink_thread *)thread_arg)->node;
-	sem_t * sem = ((struct arg_sink_thread *)thread_arg)->sem;
 	int coreid =((struct arg_sink_thread *)thread_arg)->coreid;
 	/* free the structure */
 	free(thread_arg);
@@ -531,7 +538,7 @@ void* _starpu_sink_thread(void * thread_arg)
 	while(node->is_running)
 	{
 		/*Wait there is a task available */
-		sem_wait(sem);
+		sem_wait(&node->sem_run_table[coreid]);
 		if(node->run_table[coreid] != NULL)
 			_starpu_sink_common_execute_kernel(node,coreid,node->run_table[coreid],worker);
 
@@ -562,7 +569,7 @@ static void _starpu_sink_common_execute_thread(struct _starpu_mp_node *node, str
 void _starpu_sink_common_execute(struct _starpu_mp_node *node,
 		void *arg, int arg_size)
 {
-	unsigned nb_interfaces, i;
+	unsigned i;
 
 	void *arg_ptr = arg;
 	struct mp_task *task = malloc(sizeof(struct mp_task));
@@ -587,16 +594,19 @@ void _starpu_sink_common_execute(struct _starpu_mp_node *node,
 	task->coreid = *(unsigned *) arg_ptr;
 	arg_ptr += sizeof(task->coreid);
 
-	nb_interfaces = *(unsigned *) arg_ptr;
-	arg_ptr += sizeof(nb_interfaces);
+	task->nb_interfaces = *(unsigned *) arg_ptr;
+	arg_ptr += sizeof(task->nb_interfaces);
 
 	/* The function needs an array pointing to each interface it needs
 	 * during execution. As in sink-side there is no mean to know which
 	 * kind of interface to expect, the array is composed of unions of
 	 * interfaces, thus we expect the same size anyway */
-	for (i = 0; i < nb_interfaces; i++)
+	for (i = 0; i < task->nb_interfaces; i++)
 	{
-		task->interfaces[i] = arg_ptr;
+		union _starpu_interface * interface = malloc(sizeof(union _starpu_interface));   
+		memcpy(interface, arg_ptr, 
+				sizeof(union _starpu_interface));
+		task->interfaces[i] = interface;
 		arg_ptr += sizeof(union _starpu_interface);
 	}
 

+ 0 - 1
src/drivers/mp_common/sink_common.h

@@ -35,7 +35,6 @@ struct _starpu_sink_topology
 struct arg_sink_thread
 {
 	struct _starpu_mp_node *node;
-	sem_t* sem;
 	int coreid;
 };
 

+ 109 - 27
src/drivers/mp_common/source_common.c

@@ -38,7 +38,8 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 	struct timespec codelet_end;
 	_starpu_driver_end_job(worker, j, worker->perf_arch, &codelet_end, 0,
 			profiling);
-	int count = 0;
+	
+	int count = worker->current_rank;
 
 	/* If it's a combined worker, we check if it's the last one of his combined */
 	if(j->task_size > 1)
@@ -52,7 +53,6 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 		pthread_mutex_unlock(&cb_worker->count_mutex);
 	}
 
-	_STARPU_DEBUG("\nworkerid:%d\n",worker->workerid);
 	/* Finalize the execution */
 	if(count == 0)
 	{
@@ -69,7 +69,7 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 }
 
 
-/* */
+/* Complete the execution of the job */
 static int _starpu_src_common_process_completed_job(struct _starpu_worker_set *workerset, void * arg, int arg_size)
 {
 	int coreid;
@@ -79,17 +79,15 @@ static int _starpu_src_common_process_completed_job(struct _starpu_worker_set *w
 	coreid = *(int *) arg;
 
 	struct _starpu_worker *worker = &workerset->workers[coreid];
-	struct starpu_task *task = worker->current_task;
-	struct _starpu_job *j = _starpu_get_job_associated_to_task (task);
+	struct _starpu_job *j = _starpu_get_job_associated_to_task(worker->current_task);
 
 	struct _starpu_worker * old_worker = _starpu_get_local_worker_key();
+
 	_starpu_set_local_worker_key(worker);
-	
 	_starpu_src_common_finalize_job (j, worker);
-	worker->current_task = NULL;
-
 	_starpu_set_local_worker_key(old_worker);
 
+	worker->current_task = NULL;
 	return 0;
 }
 
@@ -112,51 +110,131 @@ static void _starpu_src_common_pre_exec(void * arg, int arg_size)
  * return 0 if the message has not been handle (it's certainly mean that it's a synchronous message)
  * return 1 if the message has been handle
  */
-static int _starpu_src_common_handle_async(const struct _starpu_mp_node *node, 
-		void ** arg, int* arg_size, 
-		enum _starpu_mp_command *answer)
+static int _starpu_src_common_handle_async(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, 
+		void * arg, int arg_size, 
+		enum _starpu_mp_command answer)
 {
-	struct _starpu_worker_set * worker_set = _starpu_get_worker_struct(starpu_worker_get_id())->set;
-	*answer = _starpu_mp_common_recv_command(node, arg, arg_size);
-	switch(*answer) 
+	struct _starpu_worker_set * worker_set=NULL; 
+	switch(answer) 
 	{
 		case STARPU_EXECUTION_COMPLETED:
-			_starpu_src_common_process_completed_job(worker_set, *arg, *arg_size);
+			worker_set = _starpu_get_worker_struct(starpu_worker_get_id())->set;
+			_starpu_src_common_process_completed_job(worker_set, arg, arg_size);
 			break;
 		case STARPU_PRE_EXECUTION:
-			_starpu_src_common_pre_exec(*arg,*arg_size);
+			_starpu_src_common_pre_exec(arg,arg_size);
 			break;
 		default:
 			return 0;
 			break;
 	}
-
 	return 1;
 }
 
 
-/* Handle all asynchronous messages and return when a synchronous message is received */
-static enum _starpu_mp_command _starpu_src_common_wait_command_sync(const struct _starpu_mp_node *node, 
+static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node)
+{
+	STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
+	/* while the list is not empty */
+	while(!mp_message_list_empty(node->message_queue))
+	{
+		/* We pop a message and handle it */
+		struct mp_message * message = mp_message_list_pop_back(node->message_queue);
+		_starpu_src_common_handle_async(node, message->buffer, 
+				message->size, message->type);
+		mp_message_delete(message);
+	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
+}
+
+/* Store a message if is asynchronous 
+ * return 1 if the message has been stored
+ * return 0 if the message is unknown or synchrone */
+int _starpu_src_common_store_message(struct _starpu_mp_node *node, 
+		void * arg, int arg_size, enum _starpu_mp_command answer)
+{
+	struct mp_message * message = NULL;
+	switch(answer)
+	{
+		case STARPU_EXECUTION_COMPLETED:
+		case STARPU_PRE_EXECUTION:
+			message = mp_message_new();
+			message->type = answer;
+			memcpy(message->buffer, arg, arg_size); 
+			message->size = arg_size; 
+
+			STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
+			mp_message_list_push_front(node->message_queue,message);
+			STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
+			return 1;
+			break;
+		default:
+			return 0;
+			break;
+	}
+}
+
+/* Store all asynchronous messages and return when a synchronous message is received */
+static enum _starpu_mp_command _starpu_src_common_wait_command_sync(struct _starpu_mp_node *node, 
 		void ** arg, int* arg_size)
 {
 	enum _starpu_mp_command answer;
-	while(_starpu_src_common_handle_async(node,arg,arg_size,&answer));
+	int is_sync = 0;
+	while(!is_sync)
+	{
+		answer = _starpu_mp_common_recv_command(node, arg, arg_size);
+		if(!_starpu_src_common_store_message(node,*arg,*arg_size,answer))
+			is_sync=1;
+	}
 	return answer;
 }
 
 /* Handle a asynchrone message and return a error if a synchronous message is received */
-static void _starpu_src_common_recv_async(struct _starpu_mp_node * baseworker_node)
+static void _starpu_src_common_recv_async(struct _starpu_mp_node * node)
 {
 	enum _starpu_mp_command answer;
 	void *arg;
 	int arg_size;
-	if(!_starpu_src_common_handle_async(baseworker_node,&arg,&arg_size,&answer))
+	answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
+	if(!_starpu_src_common_handle_async(node,arg,arg_size,answer))
 	{
 		printf("incorrect commande: unknown command or sync command");
 		STARPU_ASSERT(0);
 	}	
 }
 
+/* Handle all asynchrone message while a completed execution message from a specific worker has been receive */
+ enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size)
+{
+	enum _starpu_mp_command answer;
+
+	int completed = 0;	
+	while(!completed)
+	{
+		answer = _starpu_mp_common_recv_command (node, arg, arg_size);
+
+		if(answer == STARPU_EXECUTION_COMPLETED)
+		{
+			int coreid;
+			STARPU_ASSERT(sizeof(coreid) == *arg_size);	
+			coreid = *(int *) *arg;
+			if(devid == coreid)
+				completed = 1;
+			else
+				if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer))
+					/* We receive a unknown or asynchronous message  */
+					STARPU_ASSERT(0);
+		}
+		else
+		{
+			if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer))
+				/* We receive a unknown or asynchronous message  */
+				STARPU_ASSERT(0);
+		}
+	}
+	return answer;
+}
+
 
 /* Send a request to the sink NODE for the number of cores on it. */
 int _starpu_src_common_sink_nbcores (const struct _starpu_mp_node *node, int *buf)
@@ -227,7 +305,7 @@ int _starpu_src_common_lookup(struct _starpu_mp_node *node,
  * pointer.
  * Data interfaces in task are send to the sink.
  */
-int _starpu_src_common_execute_kernel(const struct _starpu_mp_node *node,
+int _starpu_src_common_execute_kernel(struct _starpu_mp_node *node,
 		void (*kernel)(void), unsigned coreid,
 		enum starpu_codelet_type type,
 		int is_parallel_task, int cb_workerid,
@@ -288,9 +366,11 @@ int _starpu_src_common_execute_kernel(const struct _starpu_mp_node *node,
 	 * executed on a sink with a different memory, whereas a codelet is
 	 * executed on the host part for the other accelerators.
 	 * Thus we need to send a copy of each interface on the MP device */
+
 	for (i = 0; i < nb_interfaces; i++)
 	{
 		starpu_data_handle_t handle = handles[i];
+
 		memcpy (buffer_ptr, interfaces[i],
 				handle->ops->interface_size);
 		/* The sink side has no mean to get the type of each
@@ -366,7 +446,7 @@ static int _starpu_src_common_execute(struct _starpu_job *j,
  * allocated area ;
  * else it returns 1 if the allocation fail.
  */
-int _starpu_src_common_allocate(const struct _starpu_mp_node *mp_node,
+int _starpu_src_common_allocate(struct _starpu_mp_node *mp_node,
 		void **addr, size_t size)
 {
 	enum _starpu_mp_command answer;
@@ -376,7 +456,7 @@ int _starpu_src_common_allocate(const struct _starpu_mp_node *mp_node,
 	_starpu_mp_common_send_command(mp_node, STARPU_ALLOCATE, &size,
 			sizeof(size));
 
-	answer = _starpu_mp_common_recv_command(mp_node, &arg, &arg_size);
+	answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size);
 
 	if (answer == STARPU_ERROR_ALLOCATE)
 		return 1;
@@ -595,8 +675,7 @@ void _starpu_src_common_worker(struct _starpu_worker_set * worker_set,
 		unsigned baseworkerid, 
 		struct _starpu_mp_node * mp_node)
 { 
-	struct _starpu_worker * baseworker = &worker_set->workers[baseworkerid];
-	unsigned memnode = baseworker->memory_node;
+	unsigned memnode = worker_set->workers[0].memory_node;
 	struct starpu_task **tasks = malloc(sizeof(struct starpu_task *)*worker_set->nworkers);
 
 	_starpu_src_common_send_workers(mp_node, baseworkerid, worker_set->nworkers);
@@ -611,6 +690,9 @@ void _starpu_src_common_worker(struct _starpu_worker_set * worker_set,
 		_starpu_datawizard_progress(memnode, 1);
 		_STARPU_TRACE_END_PROGRESS(memnode);
 
+		/* Handle message which have been store */
+		_starpu_src_common_handle_stored_async(mp_node);
+
 		/* poll the device for completed jobs.*/
 		while(mp_node->mp_recv_is_ready(mp_node))
 			_starpu_src_common_recv_async(mp_node);

+ 5 - 0
src/drivers/mp_common/source_common.h

@@ -31,6 +31,11 @@ enum _starpu_mp_command _starpu_src_common_wait_command_sync(struct _starpu_mp_n
 void _starpu_src_common_recv_async(struct _starpu_worker_set *worker_set, 
 				   struct _starpu_mp_node * baseworker_node);
 
+int _starpu_src_common_store_message(struct _starpu_mp_node *node, 
+		void * arg, int arg_size, enum _starpu_mp_command answer);
+
+enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size);
+
 int _starpu_src_common_sink_nbcores (const struct _starpu_mp_node *node, int *buf);
 
 int _starpu_src_common_lookup(const struct _starpu_mp_node *node,

+ 6 - 0
src/drivers/scc/driver_scc_sink.c

@@ -34,11 +34,17 @@ void _starpu_scc_sink_init(struct _starpu_mp_node *node)
 	 * get nb_cores *
 	 ****************/
 	node->nb_cores = 1; 
+	STARPU_ASSERT(0);
+
+}
 
+void _starpu_scc_sink_launch_workers(struct _starpu_mp_node *node)
+{
 	/*****************
 	 *     TODO      *
 	 * init thread   *
 	 *****************/
+	STARPU_ASSERT(0);
 }
 
 void _starpu_scc_sink_deinit(struct _starpu_mp_node *node)

+ 1 - 0
src/drivers/scc/driver_scc_sink.h

@@ -25,6 +25,7 @@
 #include <drivers/mp_common/mp_common.h>
 
 void _starpu_scc_sink_init(struct _starpu_mp_node *node);
+void _starpu_scc_sink_launch_workers(struct _starpu_mp_node *node);
 void _starpu_scc_sink_deinit(struct _starpu_mp_node *node);
 
 void _starpu_scc_sink_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len);

+ 1 - 1
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -349,7 +349,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 #ifdef HAVE_AYUDAME_H
 	if (AYU_event)
 	{
-		int id = best_workerid;
+		intptr_t id = best_workerid;
 		AYU_event(AYU_ADDTASKTOQUEUE, _starpu_get_job_associated_to_task(task)->job_id, &id);
 	}
 #endif

+ 2 - 2
src/sched_policies/random_policy.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -89,7 +89,7 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 #ifdef HAVE_AYUDAME_H
 	if (AYU_event)
 	{
-		int id = selected;
+		intptr_t id = selected;
 		AYU_event(AYU_ADDTASKTOQUEUE, _starpu_get_job_associated_to_task(task)->job_id, &id);
 	}
 #endif

+ 1 - 1
src/sched_policies/work_stealing_policy.c

@@ -369,7 +369,7 @@ int ws_push_task(struct starpu_task *task)
 #ifdef HAVE_AYUDAME_H
 	if (AYU_event)
 	{
-		int id = workerid;
+		intptr_t id = workerid;
 		AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
 	}
 #endif

+ 1 - 1
tests/datawizard/interfaces/coo/coo_interface.c

@@ -21,7 +21,7 @@
 #define MATRIX_SIZE (NX*NY)
 
 #if defined(STARPU_USE_CPU) || defined(STAPRU_USE_MIC)
-static void test_coo_cpu_func(void *buffers[], void *args);
+void test_coo_cpu_func(void *buffers[], void *args);
 #endif
 #ifdef STARPU_USE_CUDA
 extern void test_coo_cuda_func(void *buffers[], void *args);

+ 1 - 1
tests/disk/disk_copy.c

@@ -41,7 +41,7 @@ int main(int argc, char **argv)
 	if (ret == -ENODEV) goto enodev;
 
 	/* register a disk */
-	int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp", 1024*1024*200);
+	int new_dd = starpu_disk_register(&starpu_disk_stdio_ops, (void *) "/tmp", 1024*1024*200);
 	/* can't write on /tmp/ */
 	if (new_dd == -ENOENT) goto enoent;
 	

+ 1 - 0
tests/main/starpu_init.c

@@ -83,6 +83,7 @@ int main(int argc, char **argv)
 
 	ret = check_cpu(-1, -1, -1, &cpu_init);
 	if (ret) return ret;
+	if (cpu_init == 0) return STARPU_TEST_SKIPPED;
 
 	if (cpu_init >= STARPU_MAXCPUS-5)
 	{

+ 0 - 1
tests/microbenchs/async_tasks_overhead.c

@@ -31,7 +31,6 @@ static double cumulated_pop = 0.0;
 
 void dummy_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
 {
-	usleep(10000);
 }
 
 static struct starpu_codelet dummy_codelet =

+ 2 - 2
tests/parallel_tasks/explicit_combined_worker.c

@@ -21,7 +21,7 @@
 #include <unistd.h>
 #include "../helper.h"
 
-#define N	10
+#define N	1000
 #define VECTORSIZE	1024
 
 void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
@@ -43,9 +43,9 @@ static struct starpu_codelet cl =
 	.type = STARPU_FORKJOIN,
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {codelet_null, NULL},
+	.cpu_funcs_name = {"codelet_null", NULL},
 	.cuda_funcs = {codelet_null, NULL},
         .opencl_funcs = {codelet_null, NULL},
-	.cpu_funcs_name = {"codelet_null", NULL},
 	.nbuffers = 1,
 	.modes = {STARPU_R}
 };

+ 2 - 2
tests/parallel_tasks/parallel_kernels.c

@@ -50,8 +50,8 @@ static struct starpu_codelet cl =
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {codelet_null, NULL},
 	.cuda_funcs = {codelet_null, NULL},
-        .opencl_funcs = {codelet_null, NULL},
 	.cpu_funcs_name = {"codelet_null", NULL},
+        .opencl_funcs = {codelet_null, NULL},
 	.model = &model,
 	.nbuffers = 1,
 	.modes = {STARPU_R}
@@ -66,7 +66,7 @@ int main(int argc, char **argv)
 
         struct starpu_conf conf;
 	starpu_conf_init(&conf);
-	conf.sched_policy_name = "peager";
+	conf.sched_policy_name = "pheft";
 	conf.calibrate = 1;
 
 	ret = starpu_init(&conf);

+ 6 - 6
tests/parallel_tasks/parallel_kernels_spmd.c

@@ -21,17 +21,17 @@
 #include <unistd.h>
 #include "../helper.h"
 
-#define N	100
+#define N	1000
 #define VECTORSIZE	1024
 
 void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 {
 	STARPU_SKIP_IF_VALGRIND;
 
-//	int worker_size = starpu_combined_worker_get_size();
-//	STARPU_ASSERT(worker_size > 0);
+	int worker_size = starpu_combined_worker_get_size();
+	STARPU_ASSERT(worker_size > 0);
 
-//	usleep(1000/worker_size);
+	usleep(1000/worker_size);
 #if 0
 	int id = starpu_worker_get_id();
 	int combined_id = starpu_combined_worker_get_id();
@@ -51,9 +51,9 @@ static struct starpu_codelet cl =
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {codelet_null, NULL},
+	.cpu_funcs_name = {"codelet_null", NULL},
 	.cuda_funcs = {codelet_null, NULL},
         .opencl_funcs = {codelet_null, NULL},
-	.cpu_funcs_name = {"codelet_null", NULL},
 	.model = &model,
 	.nbuffers = 1,
 	.modes = {STARPU_R}
@@ -68,7 +68,7 @@ int main(int argc, char **argv)
 
         struct starpu_conf conf;
 	starpu_conf_init(&conf);
-	conf.sched_policy_name = "peager";
+	conf.sched_policy_name = "pheft";
 	conf.calibrate = 1;
 
 	ret = starpu_init(&conf);

+ 3 - 4
tests/parallel_tasks/spmd_peager.c

@@ -20,7 +20,7 @@
 #include <unistd.h>
 #include "../helper.h"
 
-#define N	1000	
+#define N	1000
 #define VECTORSIZE	1024
 
 starpu_data_handle_t v_handle;
@@ -33,10 +33,9 @@ void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 	int worker_size = starpu_combined_worker_get_size();
 	STARPU_ASSERT(worker_size > 0);
 
-	//FPRINTF(stderr, "WORKERSIZE : %d\n", worker_size);
+//	FPRINTF(stderr, "WORKERSIZE : %d\n", worker_size);
 
 	usleep(1000/worker_size);
-
 #if 0
 	int id = starpu_worker_get_id();
 	int combined_id = starpu_combined_worker_get_id();
@@ -50,9 +49,9 @@ static struct starpu_codelet cl =
 	.type = STARPU_SPMD,
 	.max_parallelism = INT_MAX,
 	.cpu_funcs = {codelet_null, NULL},
+	.cpu_funcs_name = {"codelet_null", NULL},
 	.cuda_funcs = {codelet_null, NULL},
         .opencl_funcs = {codelet_null, NULL},
-	.cpu_funcs_name = {"codelet_null", NULL},
 	.nbuffers = 1,
 	.modes = {STARPU_R}
 };

+ 2 - 1
tests/sched_policies/execute_all_tasks.c

@@ -26,7 +26,7 @@
 
 #define NTASKS           8
 
-static void
+void
 dummy(void *buffers[], void *args)
 {
 	(void) buffers;
@@ -50,6 +50,7 @@ run(struct starpu_sched_policy *p)
 	struct starpu_codelet cl =
 	{
 		.cpu_funcs    = {dummy, NULL},
+		.cpu_funcs_name = {"dummy", NULL},
 		.cuda_funcs   = {dummy, NULL},
 		.opencl_funcs = {dummy, NULL},
 		.nbuffers     = 0