Samuel Thibault лет назад: 11
Родитель
Сommit
84a578d9d6

+ 6 - 0
ChangeLog

@@ -17,6 +17,12 @@
 StarPU 1.2.0 (svn revision xxxx)
 StarPU 1.2.0 (svn revision xxxx)
 ==============================================
 ==============================================
 
 
+Small features:
+  * New function starpu_sched_ctx_display_workers() to display worker
+    information belonging to a given scheduler context
+  * The option --enable-verbose can be called with
+    --enable-verbose=extra to increase the verbosity
+
 StarPU 1.1.2 (svn revision xxxx)
 StarPU 1.1.2 (svn revision xxxx)
 ==============================================
 ==============================================
 The scheduling context release
 The scheduling context release

+ 7 - 2
configure.ac

@@ -994,7 +994,8 @@ if test x$enable_simgrid = xyes ; then
 			AC_MSG_ERROR(Simgrid support needs simgrid installed)
 			AC_MSG_ERROR(Simgrid support needs simgrid installed)
 		]
 		]
 	)
 	)
-   	AC_CHECK_FUNCS([MSG_process_join])
+   	AC_CHECK_FUNCS([MSG_process_join MSG_get_as_by_name])
+	AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])])
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 		    		[[#include <msg/msg.h>]],
 		    		[[#include <msg/msg.h>]],
 				[[msg_host_t foo; ]]
 				[[msg_host_t foo; ]]
@@ -1393,12 +1394,16 @@ fi
 
 
 AC_MSG_CHECKING(whether debug messages should be displayed)
 AC_MSG_CHECKING(whether debug messages should be displayed)
 AC_ARG_ENABLE(verbose, [AS_HELP_STRING([--enable-verbose],
 AC_ARG_ENABLE(verbose, [AS_HELP_STRING([--enable-verbose],
-			[display verbose debug messages])],
+			[display verbose debug messages (--enable-verbose=extra increase the verbosity)])],
 			enable_verbose=$enableval, enable_verbose=no)
 			enable_verbose=$enableval, enable_verbose=no)
 AC_MSG_RESULT($enable_verbose)
 AC_MSG_RESULT($enable_verbose)
 if test x$enable_verbose = xyes; then
 if test x$enable_verbose = xyes; then
 	AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages])
 	AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages])
 fi
 fi
+if test x$enable_verbose = xextra; then
+	AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages])
+	AC_DEFINE(STARPU_EXTRA_VERBOSE, [1], [display verbose debug messages])
+fi
 
 
 AC_MSG_CHECKING(whether coverage testing should be enabled)
 AC_MSG_CHECKING(whether coverage testing should be enabled)
 AC_ARG_ENABLE(coverage, [AS_HELP_STRING([--enable-coverage],
 AC_ARG_ENABLE(coverage, [AS_HELP_STRING([--enable-coverage],

+ 1 - 1
doc/doxygen/chapters/41configure_options.doxy

@@ -42,7 +42,7 @@ Disable assertion checks, which saves computation time.
 \addindex __configure__--enable-verbose
 \addindex __configure__--enable-verbose
 Increase the verbosity of the debugging messages.  This can be disabled
 Increase the verbosity of the debugging messages.  This can be disabled
 at runtime by setting the environment variable \ref STARPU_SILENT to
 at runtime by setting the environment variable \ref STARPU_SILENT to
-any value.
+any value. <c>--enable-verbose=extra</c> increase even more the verbosity.
 
 
 \verbatim
 \verbatim
 $ STARPU_SILENT=1 ./vector_scal
 $ STARPU_SILENT=1 ./vector_scal

+ 4 - 0
doc/doxygen/chapters/api/scheduling_contexts.doxy

@@ -108,6 +108,10 @@ This function removes the workers in \p workerids_ctx from the context
 \p sched_ctx_id. The last argument cannot be greater than
 \p sched_ctx_id. The last argument cannot be greater than
 STARPU_NMAX_SCHED_CTXS.
 STARPU_NMAX_SCHED_CTXS.
 
 
+\fn void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f)
+\ingroup API_Scheduling_Contexts
+This function prints on the file \p f the worker names belonging to the context \p sched_ctx_id
+
 \fn void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 \fn void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 \ingroup API_Scheduling_Contexts
 \ingroup API_Scheduling_Contexts
 Delete scheduling context \p sched_ctx_id and transfer remaining
 Delete scheduling context \p sched_ctx_id and transfer remaining

+ 58 - 24
examples/sched_ctx/sched_ctx.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Université de Bordeaux 1
- * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -13,7 +13,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
+OB */
 
 
 #include <starpu.h>
 #include <starpu.h>
 
 
@@ -26,16 +26,20 @@
 int tasks_executed = 0;
 int tasks_executed = 0;
 starpu_pthread_mutex_t mut;
 starpu_pthread_mutex_t mut;
 
 
-static void sched_ctx_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
+static void sched_ctx_cpu_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
 {
 {
 	starpu_pthread_mutex_lock(&mut);
 	starpu_pthread_mutex_lock(&mut);
 	tasks_executed++;
 	tasks_executed++;
 	starpu_pthread_mutex_unlock(&mut);
 	starpu_pthread_mutex_unlock(&mut);
 }
 }
 
 
-static struct starpu_codelet sched_ctx_codelet =
+static void sched_ctx_cuda_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
 {
 {
-	.cpu_funcs = {sched_ctx_func, NULL},
+}
+
+static struct starpu_codelet sched_ctx_codelet1 =
+{
+	.cpu_funcs = {sched_ctx_cpu_func, NULL},
 	.cuda_funcs = {NULL},
 	.cuda_funcs = {NULL},
 	.opencl_funcs = {NULL},
 	.opencl_funcs = {NULL},
 	.model = NULL,
 	.model = NULL,
@@ -43,11 +47,25 @@ static struct starpu_codelet sched_ctx_codelet =
 	.name = "sched_ctx"
 	.name = "sched_ctx"
 };
 };
 
 
+static struct starpu_codelet sched_ctx_codelet2 =
+{
+	.cpu_funcs = {sched_ctx_cpu_func, NULL},
+	.cuda_funcs = {sched_ctx_cuda_func, NULL},
+	.opencl_funcs = {NULL},
+	.model = NULL,
+	.nbuffers = 0,
+	.name = "sched_ctx"
+};
+
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
 	int ntasks = NTASKS;
 	int ntasks = NTASKS;
 	int ret;
 	int ret;
+	unsigned ncuda = 0;
+	int nprocs1 = 0;
+	int nprocs2 = 0;
+	int procs1[STARPU_NMAXWORKERS], procs2[STARPU_NMAXWORKERS];
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
@@ -55,25 +73,23 @@ int main(int argc, char **argv)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	starpu_pthread_mutex_init(&mut, NULL);
 	starpu_pthread_mutex_init(&mut, NULL);
-	int nprocs1 = 1;
-	int nprocs2 = 1;
-	int procs1[STARPU_NMAXWORKERS], procs2[STARPU_NMAXWORKERS];
-	procs1[0] = 0;
-	procs2[0] = 0;
 
 
 #ifdef STARPU_USE_CPU
 #ifdef STARPU_USE_CPU
-	unsigned ncpus =  starpu_cpu_worker_get_count();
-	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus);
-
-	nprocs1 = ncpus;
+	nprocs1 = starpu_cpu_worker_get_count();
+	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1);
 #endif
 #endif
+	// if there is no cpu, skip
+	if (nprocs1 == 0) goto enodev;
 
 
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
-	unsigned ncuda = starpu_cuda_worker_get_count();
-	starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procs2, ncuda);
-
-	nprocs2 = ncuda == 0 ? 1 : ncuda;
+	ncuda = nprocs2 = starpu_cuda_worker_get_count();
+	starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procs2, nprocs2);
 #endif
 #endif
+	if (nprocs2 == 0)
+	{
+	     nprocs2 = 1;
+	     procs2[0] = procs1[0];
+	}
 
 
 	/*create contexts however you want*/
 	/*create contexts however you want*/
 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "eager", 0);
 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "eager", 0);
@@ -82,17 +98,18 @@ int main(int argc, char **argv)
 	/*indicate what to do with the resources when context 2 finishes (it depends on your application)*/
 	/*indicate what to do with the resources when context 2 finishes (it depends on your application)*/
 	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);
 	starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1);
 
 
+	starpu_sched_ctx_display_workers(sched_ctx2, stderr);
+
 	int i;
 	int i;
 	for (i = 0; i < ntasks/2; i++)
 	for (i = 0; i < ntasks/2; i++)
 	{
 	{
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
 
 
-		task->cl = &sched_ctx_codelet;
+		task->cl = &sched_ctx_codelet1;
 		task->cl_arg = NULL;
 		task->cl_arg = NULL;
 
 
 		/*submit tasks to context*/
 		/*submit tasks to context*/
 		ret = starpu_task_submit_to_ctx(task,sched_ctx1);
 		ret = starpu_task_submit_to_ctx(task,sched_ctx1);
-
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	}
 	}
 
 
@@ -102,11 +119,27 @@ int main(int argc, char **argv)
 
 
 	starpu_sched_ctx_finished_submit(sched_ctx1);
 	starpu_sched_ctx_finished_submit(sched_ctx1);
 
 
+	/* task with no cuda impl submitted to a ctx with gpus only */
+	struct starpu_task *task2 = starpu_task_create();
+	task2->cl = &sched_ctx_codelet1;
+	task2->cl_arg = NULL;
+
+	/*submit tasks to context*/
+	ret = starpu_task_submit_to_ctx(task2,sched_ctx2);
+	if (ncuda == 0)
+	{
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+	else
+	{
+		STARPU_ASSERT_MSG(ret == -ENODEV, "submit task should ret enodev when the ctx does not have the PUs needed by the task");
+	}
+
 	for (i = 0; i < ntasks/2; i++)
 	for (i = 0; i < ntasks/2; i++)
 	{
 	{
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
 
 
-		task->cl = &sched_ctx_codelet;
+		task->cl = &sched_ctx_codelet2;
 		task->cl_arg = NULL;
 		task->cl_arg = NULL;
 
 
 		ret = starpu_task_submit_to_ctx(task,sched_ctx2);
 		ret = starpu_task_submit_to_ctx(task,sched_ctx2);
@@ -121,8 +154,9 @@ int main(int argc, char **argv)
 
 
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
 	starpu_sched_ctx_delete(sched_ctx2);
-	printf("tasks executed %d out of %d\n", tasks_executed, ntasks);
-	starpu_shutdown();
+	printf("tasks executed %d out of %d\n", tasks_executed, ntasks/2);
 
 
-	return 0;
+enodev:
+	starpu_shutdown();
+	return nprocs1 == 0 ? 77 : 0;
 }
 }

+ 8 - 4
examples/sched_ctx/sched_ctx_without_sched_policy.c

@@ -73,6 +73,7 @@ int main(int argc, char **argv)
 	tasks_executed[1] = 0;
 	tasks_executed[1] = 0;
 	int ntasks = NTASKS;
 	int ntasks = NTASKS;
 	int ret, j, k;
 	int ret, j, k;
+	unsigned ncpus = 0;
 
 
 	ret = starpu_init(NULL);
 	ret = starpu_init(NULL);
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)
@@ -85,7 +86,7 @@ int main(int argc, char **argv)
 	int *procs1, *procs2;
 	int *procs1, *procs2;
 
 
 #ifdef STARPU_USE_CPU
 #ifdef STARPU_USE_CPU
-	unsigned ncpus =  starpu_cpu_worker_get_count();
+	ncpus = starpu_cpu_worker_get_count();
 	procs1 = (int*)malloc(ncpus*sizeof(int));
 	procs1 = (int*)malloc(ncpus*sizeof(int));
 	procs2 = (int*)malloc(ncpus*sizeof(int));
 	procs2 = (int*)malloc(ncpus*sizeof(int));
 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus);
 	starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus);
@@ -96,7 +97,7 @@ int main(int argc, char **argv)
 		nprocs2 =  ncpus-nprocs1;
 		nprocs2 =  ncpus-nprocs1;
 		k = 0;
 		k = 0;
 		for(j = nprocs1; j < nprocs1+nprocs2; j++)
 		for(j = nprocs1; j < nprocs1+nprocs2; j++)
-			procs2[k++] = j;
+			procs2[k++] = procs1[j];
 	}
 	}
 	else
 	else
 	{
 	{
@@ -113,6 +114,8 @@ int main(int argc, char **argv)
 	procs2[0] = 0;
 	procs2[0] = 0;
 #endif
 #endif
 
 
+	if (ncpus == 0) goto enodev;
+
 	/*create contexts however you want*/
 	/*create contexts however you want*/
 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", 0);
 	unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", 0);
 	unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", 0);
 	unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", 0);
@@ -158,7 +161,8 @@ int main(int argc, char **argv)
 	starpu_sched_ctx_delete(sched_ctx2);
 	starpu_sched_ctx_delete(sched_ctx2);
 	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS);
 	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS);
 	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS);
 	printf("ctx%d: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS);
-	starpu_shutdown();
 
 
-	return 0;
+enodev:
+	starpu_shutdown();
+	return ncpus == 0 ? 77 : 0;
 }
 }

+ 6 - 1
examples/worker_collections/worker_tree_example.c

@@ -30,7 +30,12 @@ int main(int argc, char **argv)
 
 
 int main()
 int main()
 {
 {
-	starpu_init(NULL);
+	int ret;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV)
+		return 77;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
 
 
 	int procs[STARPU_NMAXWORKERS];
 	int procs[STARPU_NMAXWORKERS];
 	unsigned ncpus =  starpu_cpu_worker_get_count();
 	unsigned ncpus =  starpu_cpu_worker_get_count();

+ 1 - 0
include/starpu_config.h.in

@@ -29,6 +29,7 @@
 #undef STARPU_USE_SCC
 #undef STARPU_USE_SCC
 
 
 #undef STARPU_SIMGRID
 #undef STARPU_SIMGRID
+#undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT
 
 
 #undef STARPU_HAVE_ICC
 #undef STARPU_HAVE_ICC
 
 

+ 2 - 0
include/starpu_sched_ctx.h

@@ -40,6 +40,8 @@ void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned
 
 
 void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id);
 void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id);
 
 
+void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f);
+
 void starpu_sched_ctx_delete(unsigned sched_ctx_id);
 void starpu_sched_ctx_delete(unsigned sched_ctx_id);
 
 
 void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);
 void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);

+ 6 - 0
include/starpu_thread.h

@@ -200,6 +200,11 @@ int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock);
 
 
 #if defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_BARRIER)
 #if defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_BARRIER)
 
 
+#if defined(STARPU_SIMGRID) && defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT)
+typedef xbt_bar_t starpu_pthread_barrier_t;
+typedef int starpu_pthread_barrierattr_t;
+#define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1
+#else
 typedef struct {
 typedef struct {
 	starpu_pthread_mutex_t mutex;
 	starpu_pthread_mutex_t mutex;
 	starpu_pthread_cond_t cond;
 	starpu_pthread_cond_t cond;
@@ -208,6 +213,7 @@ typedef struct {
 } starpu_pthread_barrier_t;
 } starpu_pthread_barrier_t;
 typedef int starpu_pthread_barrierattr_t;
 typedef int starpu_pthread_barrierattr_t;
 #define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1
 #define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1
+#endif
 
 
 int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count);
 int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count);
 int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier);
 int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier);

+ 6 - 2
mpi/src/Makefile.am

@@ -39,7 +39,9 @@ noinst_HEADERS =					\
 	starpu_mpi_task_insert.h			\
 	starpu_mpi_task_insert.h			\
 	starpu_mpi_datatype.h				\
 	starpu_mpi_datatype.h				\
 	starpu_mpi_cache.h				\
 	starpu_mpi_cache.h				\
-	starpu_mpi_cache_stats.h
+	starpu_mpi_cache_stats.h			\
+	starpu_mpi_early_data.h				\
+	starpu_mpi_early_request.h
 
 
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi.c					\
 	starpu_mpi.c					\
@@ -50,7 +52,9 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi_stats.c				\
 	starpu_mpi_stats.c				\
 	starpu_mpi_private.c				\
 	starpu_mpi_private.c				\
 	starpu_mpi_cache.c				\
 	starpu_mpi_cache.c				\
-	starpu_mpi_cache_stats.c
+	starpu_mpi_cache_stats.c			\
+	starpu_mpi_early_data.c				\
+	starpu_mpi_early_request.c
 
 
 showcheck:
 showcheck:
 	-cat /dev/null
 	-cat /dev/null

+ 138 - 348
mpi/src/starpu_mpi.c

@@ -22,6 +22,8 @@
 #include <starpu_profiling.h>
 #include <starpu_profiling.h>
 #include <starpu_mpi_stats.h>
 #include <starpu_mpi_stats.h>
 #include <starpu_mpi_task_insert.h>
 #include <starpu_mpi_task_insert.h>
+#include <starpu_mpi_early_data.h>
+#include <starpu_mpi_early_request.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <common/thread.h>
 #include <common/thread.h>
 #include <datawizard/interfaces/data_interface.h>
 #include <datawizard/interfaces/data_interface.h>
@@ -65,234 +67,52 @@ static int posted_requests = 0, newer_requests, barrier_running = 0;
 
 
 #define _STARPU_MPI_INC_POSTED_REQUESTS(value) { STARPU_PTHREAD_MUTEX_LOCK(&mutex_posted_requests); posted_requests += value; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_posted_requests); }
 #define _STARPU_MPI_INC_POSTED_REQUESTS(value) { STARPU_PTHREAD_MUTEX_LOCK(&mutex_posted_requests); posted_requests += value; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex_posted_requests); }
 
 
-LIST_TYPE(_starpu_mpi_copy_handle,
-	  starpu_data_handle_t handle;
-	  struct _starpu_mpi_envelope *env;
-	  struct _starpu_mpi_req *req;
-	  void *buffer;
-	  int mpi_tag;
-	  int source;
-	  int req_ready;
-	  starpu_pthread_mutex_t req_mutex;
-	  starpu_pthread_cond_t req_cond;
-);
-
-struct _starpu_mpi_copy_handle_hashlist
+static void _starpu_mpi_request_init(struct _starpu_mpi_req **req)
 {
 {
-	struct _starpu_mpi_copy_handle_list *list;
-	UT_hash_handle hh;
-	int mpi_tag;
-};
-
-/********************************************************/
-/*                                                      */
-/*  Hashmap's requests functionalities                  */
-/*                                                      */
-/********************************************************/
-
-/** stores application requests for which data have not been received yet */
-static struct _starpu_mpi_req **_starpu_mpi_app_req_hashmap = NULL;
-static int _starpu_mpi_app_req_hashmap_count = 0;
-/** stores data which have been received by MPI but have not been requested by the application */
-static struct _starpu_mpi_copy_handle_hashlist **_starpu_mpi_copy_handle_hashmap = NULL;
-static int _starpu_mpi_copy_handle_hashmap_count = 0;
-
-static struct _starpu_mpi_req* find_app_req(int mpi_tag, int source)
-{
-	struct _starpu_mpi_req* req;
-
-	HASH_FIND_INT(_starpu_mpi_app_req_hashmap[source], &mpi_tag, req);
-
-	return req;
-}
-
-static void add_app_req(struct _starpu_mpi_req *req)
-{
-	struct _starpu_mpi_req *test_req;
-
-	test_req = find_app_req(req->mpi_tag, req->srcdst);
-
-	if (test_req == NULL)
-	{
-		HASH_ADD_INT(_starpu_mpi_app_req_hashmap[req->srcdst], mpi_tag, req);
-		_starpu_mpi_app_req_hashmap_count ++;
-		_STARPU_MPI_DEBUG(3, "Adding request %p with tag %d in the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
-	}
-	else
-	{
-		_STARPU_MPI_DEBUG(3, "[Error] request %p with tag %d already in the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
-		int seq_const = starpu_data_get_sequential_consistency_flag(req->data_handle);
-		if (seq_const &&  req->sequential_consistency)
-		{
-			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap[%d], while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, req->srcdst, test_req);
-		}
-		else
-		{
-			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap[%d], while another request %p with the same tag is already in it. \n Sequential consistency isn't activated for this handle : you should want to add dependencies between requests for which the sequential consistency is deactivated.", req, req->mpi_tag, req->srcdst, test_req);
-		}
-	}
-}
-
-static void delete_app_req(struct _starpu_mpi_req *req)
-{
-	struct _starpu_mpi_req *test_req;
+	*req = malloc(sizeof(struct _starpu_mpi_req));
+	STARPU_ASSERT_MSG(*req, "Invalid request");
 
 
-	test_req = find_app_req(req->mpi_tag, req->srcdst);
-
-	if (test_req != NULL)
-	{
-		HASH_DEL(_starpu_mpi_app_req_hashmap[req->srcdst], req);
-		_starpu_mpi_app_req_hashmap_count --;
-		_STARPU_MPI_DEBUG(3, "Deleting application request %p with tag %d from the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
-	}
-	else
-	{
-		_STARPU_MPI_DEBUG(3, "[Warning] request %p with tag %d is NOT in the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
-	}
-}
-
-#ifdef STARPU_VERBOSE
-static void _starpu_mpi_copy_handle_display_hash(int source, int tag)
-{
-	struct _starpu_mpi_copy_handle_hashlist *hashlist;
-	HASH_FIND_INT(_starpu_mpi_copy_handle_hashmap[source], &tag, hashlist);
-
-	if (hashlist == NULL)
-	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for source %d and tag %d does not exist\n", source, tag);
-	}
-	else if (_starpu_mpi_copy_handle_list_empty(hashlist->list))
-	{
-		_STARPU_MPI_DEBUG(60, "Hashlist for source %d and tag %d is empty\n", source, tag);
-	}
-	else
-	{
-		struct _starpu_mpi_copy_handle *cur;
-		for (cur = _starpu_mpi_copy_handle_list_begin(hashlist->list) ;
-		     cur != _starpu_mpi_copy_handle_list_end(hashlist->list);
-		     cur = _starpu_mpi_copy_handle_list_next(cur))
-		{
-			_STARPU_MPI_DEBUG(60, "Element for source %d and tag %d: %p\n", source, tag, cur);
-		}
-	}
-}
-#endif
-
-static struct _starpu_mpi_copy_handle *pop_chandle(int mpi_tag, int source, int delete)
-{
-	struct _starpu_mpi_copy_handle_hashlist *hashlist;
-	struct _starpu_mpi_copy_handle *chandle;
-
-	_STARPU_MPI_DEBUG(60, "Looking for chandle with tag %d in the hashmap[%d]\n", mpi_tag, source);
-	HASH_FIND_INT(_starpu_mpi_copy_handle_hashmap[source], &mpi_tag, hashlist);
-	if (hashlist == NULL)
-	{
-		chandle = NULL;
-	}
-	else
-	{
-		if (_starpu_mpi_copy_handle_list_empty(hashlist->list))
-		{
-			chandle = NULL;
-		}
-		else
-		{
-			if (delete == 1)
-			{
-				chandle = _starpu_mpi_copy_handle_list_pop_front(hashlist->list);
-			}
-			else
-			{
-				chandle = _starpu_mpi_copy_handle_list_front(hashlist->list);
-			}
-		}
-	}
-	_STARPU_MPI_DEBUG(60, "Found chandle %p with tag %d in the hashmap[%d]\n", chandle, mpi_tag, source);
-	return chandle;
-}
-
-static struct _starpu_mpi_copy_handle *find_chandle(int mpi_tag, int source)
-{
-	return pop_chandle(mpi_tag, source, 0);
-}
-
-static void add_chandle(struct _starpu_mpi_copy_handle *chandle)
-{
-	_STARPU_MPI_DEBUG(60, "Trying to add chandle %p with tag %d in the hashmap[%d]\n", chandle, chandle->mpi_tag, chandle->source);
-
-	struct _starpu_mpi_copy_handle_hashlist *hashlist;
-	HASH_FIND_INT(_starpu_mpi_copy_handle_hashmap[chandle->source], &chandle->mpi_tag, hashlist);
-	if (hashlist == NULL)
-	{
-		hashlist = malloc(sizeof(struct _starpu_mpi_copy_handle_hashlist));
-		hashlist->list = _starpu_mpi_copy_handle_list_new();
-		hashlist->mpi_tag = chandle->mpi_tag;
-		HASH_ADD_INT(_starpu_mpi_copy_handle_hashmap[chandle->source], mpi_tag, hashlist);
-	}
-	_starpu_mpi_copy_handle_list_push_back(hashlist->list, chandle);
-	_starpu_mpi_copy_handle_hashmap_count ++;
-#ifdef STARPU_VERBOSE
-	_starpu_mpi_copy_handle_display_hash(chandle->source, chandle->mpi_tag);
-#endif
-}
-
-static void delete_chandle(struct _starpu_mpi_copy_handle *chandle)
-{
-	_STARPU_MPI_DEBUG(60, "Trying to delete chandle %p with tag %d in the hashmap[%d]\n", chandle, chandle->mpi_tag, chandle->source);
-	struct _starpu_mpi_copy_handle *found = pop_chandle(chandle->mpi_tag, chandle->source, 1);
-
-	STARPU_ASSERT_MSG(found == chandle,
-			  "Error delete_chandle : chandle %p with tag %d is NOT in the hashmap[%d]\n", chandle, chandle->mpi_tag, chandle->source);
-
-	_starpu_mpi_copy_handle_hashmap_count --;
-#ifdef STARPU_VERBOSE
-	_starpu_mpi_copy_handle_display_hash(chandle->source, chandle->mpi_tag);
-#endif
-}
-
-static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
-{
 	/* Initialize the request structure */
 	/* Initialize the request structure */
-	req->data_handle = NULL;
+	(*req)->data_handle = NULL;
 
 
-	req->datatype = 0;
-	req->ptr = NULL;
-	req->count = -1;
-	req->user_datatype = -1;
+	(*req)->datatype = 0;
+	(*req)->ptr = NULL;
+	(*req)->count = -1;
+	(*req)->user_datatype = -1;
 
 
-	req->srcdst = -1;
-	req->mpi_tag = -1;
-	req->comm = 0;
+	(*req)->srcdst = -1;
+	(*req)->mpi_tag = -1;
+	(*req)->comm = 0;
 
 
-	req->func = NULL;
+	(*req)->func = NULL;
 
 
-	req->status = NULL;
-	req->request = 0;
-	req->flag = NULL;
+	(*req)->status = NULL;
+	(*req)->request = 0;
+	(*req)->flag = NULL;
 
 
-	req->ret = -1;
-	STARPU_PTHREAD_MUTEX_INIT(&req->req_mutex, NULL);
-	STARPU_PTHREAD_COND_INIT(&req->req_cond, NULL);
-	STARPU_PTHREAD_MUTEX_INIT(&req->posted_mutex, NULL);
-	STARPU_PTHREAD_COND_INIT(&req->posted_cond, NULL);
+	(*req)->ret = -1;
+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->req_mutex), NULL);
+	STARPU_PTHREAD_COND_INIT(&((*req)->req_cond), NULL);
+	STARPU_PTHREAD_MUTEX_INIT(&((*req)->posted_mutex), NULL);
+	STARPU_PTHREAD_COND_INIT(&((*req)->posted_cond), NULL);
 
 
-	req->request_type = UNKNOWN_REQ;
+	(*req)->request_type = UNKNOWN_REQ;
 
 
-	req->submitted = 0;
-	req->completed = 0;
-	req->posted = 0;
+	(*req)->submitted = 0;
+	(*req)->completed = 0;
+	(*req)->posted = 0;
 
 
-	req->other_request = NULL;
+	(*req)->other_request = NULL;
 
 
-	req->detached = -1;
-	req->callback = NULL;
-	req->callback_arg = NULL;
+	(*req)->detached = -1;
+	(*req)->callback = NULL;
+	(*req)->callback_arg = NULL;
 
 
-	req->size_req = 0;
-	req->internal_req = NULL;
-	req->is_internal_req = 0;
-	req->envelope = NULL;
-	req->sequential_consistency = 1;
+	(*req)->size_req = 0;
+	(*req)->internal_req = NULL;
+	(*req)->is_internal_req = 0;
+	(*req)->envelope = NULL;
+	(*req)->sequential_consistency = 1;
 }
 }
 
 
  /********************************************************/
  /********************************************************/
@@ -310,35 +130,33 @@ static void _starpu_mpi_request_init(struct _starpu_mpi_req *req)
 							       int is_internal_req,
 							       int is_internal_req,
 							       ssize_t count)
 							       ssize_t count)
 {
 {
+	struct _starpu_mpi_req *req;
 
 
-	 _STARPU_MPI_LOG_IN();
-	 struct _starpu_mpi_req *req = malloc(sizeof(struct _starpu_mpi_req));
-	 STARPU_ASSERT_MSG(req, "Invalid request");
-
-	 _STARPU_MPI_INC_POSTED_REQUESTS(1);
-
-	 /* Initialize the request structure */
-	 _starpu_mpi_request_init(req);
-	 req->request_type = request_type;
-	 req->data_handle = data_handle;
-	 req->srcdst = srcdst;
-	 req->mpi_tag = mpi_tag;
-	 req->comm = comm;
-	 req->detached = detached;
-	 req->callback = callback;
-	 req->callback_arg = arg;
-	 req->func = func;
-	 req->sequential_consistency = sequential_consistency;
-	 req->is_internal_req = is_internal_req;
-	 req->count = count;
-
-	 /* Asynchronously request StarPU to fetch the data in main memory: when
-	  * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
-	  * the request is actually submitted */
-	 starpu_data_acquire_cb_sequential_consistency(data_handle, mode, _starpu_mpi_submit_new_mpi_request, (void *)req, sequential_consistency);
+	_STARPU_MPI_LOG_IN();
+	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
 
-	 _STARPU_MPI_LOG_OUT();
-	 return req;
+	/* Initialize the request structure */
+	_starpu_mpi_request_init(&req);
+	req->request_type = request_type;
+	req->data_handle = data_handle;
+	req->srcdst = srcdst;
+	req->mpi_tag = mpi_tag;
+	req->comm = comm;
+	req->detached = detached;
+	req->callback = callback;
+	req->callback_arg = arg;
+	req->func = func;
+	req->sequential_consistency = sequential_consistency;
+	req->is_internal_req = is_internal_req;
+	req->count = count;
+
+	/* Asynchronously request StarPU to fetch the data in main memory: when
+	 * it is available in main memory, _starpu_mpi_submit_new_mpi_request(req) is called and
+	 * the request is actually submitted */
+	starpu_data_acquire_cb_sequential_consistency(data_handle, mode, _starpu_mpi_submit_new_mpi_request, (void *)req, sequential_consistency);
+
+	_STARPU_MPI_LOG_OUT();
+	return req;
  }
  }
 
 
  /********************************************************/
  /********************************************************/
@@ -608,15 +426,11 @@ static void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 
 
 int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 {
 {
-	_STARPU_MPI_LOG_IN();
 	int ret;
 	int ret;
-
-	struct _starpu_mpi_req *waiting_req = malloc(sizeof(struct _starpu_mpi_req));
-	_starpu_mpi_request_init(waiting_req);
-	STARPU_ASSERT_MSG(waiting_req, "Allocation failed");
-
 	struct _starpu_mpi_req *req = *public_req;
 	struct _starpu_mpi_req *req = *public_req;
+	struct _starpu_mpi_req *waiting_req;
 
 
+	_STARPU_MPI_LOG_IN();
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
 
 	/* We cannot try to complete a MPI request that was not actually posted
 	/* We cannot try to complete a MPI request that was not actually posted
@@ -627,7 +441,7 @@ int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 	STARPU_PTHREAD_MUTEX_UNLOCK(&(req->req_mutex));
 	STARPU_PTHREAD_MUTEX_UNLOCK(&(req->req_mutex));
 
 
 	/* Initialize the request structure */
 	/* Initialize the request structure */
-	 _starpu_mpi_request_init(waiting_req);
+	 _starpu_mpi_request_init(&waiting_req);
 	waiting_req->status = status;
 	waiting_req->status = status;
 	waiting_req->other_request = req;
 	waiting_req->other_request = req;
 	waiting_req->func = _starpu_mpi_wait_func;
 	waiting_req->func = _starpu_mpi_wait_func;
@@ -704,9 +518,8 @@ int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 
 
 	if (submitted)
 	if (submitted)
 	{
 	{
-		struct _starpu_mpi_req *testing_req = malloc(sizeof(struct _starpu_mpi_req));
-		STARPU_ASSERT_MSG(testing_req, "allocation failed");
-		_starpu_mpi_request_init(testing_req);
+		struct _starpu_mpi_req *testing_req;
+		_starpu_mpi_request_init(&testing_req);
 
 
 		/* Initialize the request structure */
 		/* Initialize the request structure */
 		STARPU_PTHREAD_MUTEX_INIT(&(testing_req->req_mutex), NULL);
 		STARPU_PTHREAD_MUTEX_INIT(&(testing_req->req_mutex), NULL);
@@ -768,11 +581,11 @@ static void _starpu_mpi_barrier_func(struct _starpu_mpi_req *barrier_req)
 
 
 int starpu_mpi_barrier(MPI_Comm comm)
 int starpu_mpi_barrier(MPI_Comm comm)
 {
 {
-	_STARPU_MPI_LOG_IN();
 	int ret;
 	int ret;
-	struct _starpu_mpi_req *barrier_req = malloc(sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT_MSG(barrier_req, "allocation failed");
-	_starpu_mpi_request_init(barrier_req);
+	struct _starpu_mpi_req *barrier_req;
+
+	_STARPU_MPI_LOG_IN();
+	_starpu_mpi_request_init(&barrier_req);
 
 
 	/* First wait for *both* all tasks and MPI requests to finish, in case
 	/* First wait for *both* all tasks and MPI requests to finish, in case
 	 * some tasks generate MPI requests, MPI requests generate tasks, etc.
 	 * some tasks generate MPI requests, MPI requests generate tasks, etc.
@@ -855,11 +668,11 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 
 
 	if (req->internal_req)
 	if (req->internal_req)
 	{
 	{
-		struct _starpu_mpi_copy_handle *chandle = find_chandle(req->mpi_tag, req->srcdst);
-		STARPU_ASSERT_MSG(chandle, "Could not find a copy data handle with the tag %d and the node %d\n", req->mpi_tag, req->srcdst);
-		_STARPU_MPI_DEBUG(3, "Handling deleting of copy_handle structure from the hashmap..\n");
-		delete_chandle(chandle);
-		free(chandle);
+		struct _starpu_mpi_early_data_handle *early_data_handle = _starpu_mpi_early_data_find(req->mpi_tag, req->srcdst);
+		STARPU_ASSERT_MSG(early_data_handle, "Could not find a copy data handle with the tag %d and the node %d\n", req->mpi_tag, req->srcdst);
+		_STARPU_MPI_DEBUG(3, "Handling deleting of early_data structure from the hashmap..\n");
+		_starpu_mpi_early_data_delete(early_data_handle);
+		free(early_data_handle);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -911,17 +724,17 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 	_STARPU_MPI_LOG_OUT();
 	_STARPU_MPI_LOG_OUT();
 }
 }
 
 
-struct _starpu_mpi_copy_cb_args
+struct _starpu_mpi_early_data_cb_args
 {
 {
 	starpu_data_handle_t data_handle;
 	starpu_data_handle_t data_handle;
-	starpu_data_handle_t copy_handle;
+	starpu_data_handle_t early_handle;
 	struct _starpu_mpi_req *req;
 	struct _starpu_mpi_req *req;
 	void *buffer;
 	void *buffer;
 };
 };
 
 
-static void _starpu_mpi_copy_cb(void* arg)
+static void _starpu_mpi_early_data_cb(void* arg)
 {
 {
-	struct _starpu_mpi_copy_cb_args *args = arg;
+	struct _starpu_mpi_early_data_cb_args *args = arg;
 
 
 	// We store in the application request the internal MPI
 	// We store in the application request the internal MPI
 	// request so that it can be used by starpu_mpi_wait
 	// request so that it can be used by starpu_mpi_wait
@@ -931,16 +744,16 @@ static void _starpu_mpi_copy_cb(void* arg)
 	if (args->buffer)
 	if (args->buffer)
 	{
 	{
 		/* Data has been received as a raw memory, it has to be unpacked */
 		/* Data has been received as a raw memory, it has to be unpacked */
-		struct starpu_data_interface_ops *itf_src = starpu_data_get_interface_ops(args->copy_handle);
+		struct starpu_data_interface_ops *itf_src = starpu_data_get_interface_ops(args->early_handle);
 		struct starpu_data_interface_ops *itf_dst = starpu_data_get_interface_ops(args->data_handle);
 		struct starpu_data_interface_ops *itf_dst = starpu_data_get_interface_ops(args->data_handle);
 		STARPU_ASSERT_MSG(itf_dst->unpack_data, "The data interface does not define an unpack function\n");
 		STARPU_ASSERT_MSG(itf_dst->unpack_data, "The data interface does not define an unpack function\n");
-		itf_dst->unpack_data(args->data_handle, STARPU_MAIN_RAM, args->buffer, itf_src->get_size(args->copy_handle));
+		itf_dst->unpack_data(args->data_handle, STARPU_MAIN_RAM, args->buffer, itf_src->get_size(args->early_handle));
 		free(args->buffer);
 		free(args->buffer);
 	}
 	}
 	else
 	else
 	{
 	{
-		struct starpu_data_interface_ops *itf = starpu_data_get_interface_ops(args->copy_handle);
-		void* itf_src = starpu_data_get_interface_on_node(args->copy_handle, STARPU_MAIN_RAM);
+		struct starpu_data_interface_ops *itf = starpu_data_get_interface_ops(args->early_handle);
+		void* itf_src = starpu_data_get_interface_on_node(args->early_handle, STARPU_MAIN_RAM);
 		void* itf_dst = starpu_data_get_interface_on_node(args->data_handle, STARPU_MAIN_RAM);
 		void* itf_dst = starpu_data_get_interface_on_node(args->data_handle, STARPU_MAIN_RAM);
 
 
 		if (!itf->copy_methods->ram_to_ram)
 		if (!itf->copy_methods->ram_to_ram)
@@ -955,11 +768,11 @@ static void _starpu_mpi_copy_cb(void* arg)
 		}
 		}
 	}
 	}
 
 
-	_STARPU_MPI_DEBUG(3, "Done, handling release of copy_handle..\n");
-	starpu_data_release(args->copy_handle);
+	_STARPU_MPI_DEBUG(3, "Done, handling release of early_handle..\n");
+	starpu_data_release(args->early_handle);
 
 
-	_STARPU_MPI_DEBUG(3, "Done, handling unregister of copy_handle..\n");
-	starpu_data_unregister_submit(args->copy_handle);
+	_STARPU_MPI_DEBUG(3, "Done, handling unregister of early_handle..\n");
+	starpu_data_unregister_submit(args->early_handle);
 
 
 	_STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req);
 	_STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req);
 	// If the request is detached, we need to call _starpu_mpi_handle_request_termination
 	// If the request is detached, we need to call _starpu_mpi_handle_request_termination
@@ -1019,41 +832,41 @@ static void _starpu_mpi_submit_new_mpi_request(void *arg)
 		else
 		else
 		{
 		{
 			/* test whether the receive request has already been submitted internally by StarPU-MPI*/
 			/* test whether the receive request has already been submitted internally by StarPU-MPI*/
-			struct _starpu_mpi_copy_handle *chandle = find_chandle(req->mpi_tag, req->srcdst);
+			struct _starpu_mpi_early_data_handle *early_data_handle = _starpu_mpi_early_data_find(req->mpi_tag, req->srcdst);
 
 
 			/* Case : the request has already been submitted internally by StarPU.
 			/* Case : the request has already been submitted internally by StarPU.
 			 * We'll asynchronously ask a Read permission over the temporary handle, so as when
 			 * We'll asynchronously ask a Read permission over the temporary handle, so as when
-			 * the internal receive will be over, the _starpu_mpi_copy_cb function will be called to
+			 * the internal receive will be over, the _starpu_mpi_early_data_cb function will be called to
 			 * bring the data back to the original data handle associated to the request.*/
 			 * bring the data back to the original data handle associated to the request.*/
-			if (chandle)
+			if (early_data_handle)
 			{
 			{
 				STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 				STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-				STARPU_PTHREAD_MUTEX_LOCK(&(chandle->req_mutex));
-				while (!(chandle->req_ready))
-					STARPU_PTHREAD_COND_WAIT(&(chandle->req_cond), &(chandle->req_mutex));
-				STARPU_PTHREAD_MUTEX_UNLOCK(&(chandle->req_mutex));
+				STARPU_PTHREAD_MUTEX_LOCK(&(early_data_handle->req_mutex));
+				while (!(early_data_handle->req_ready))
+					STARPU_PTHREAD_COND_WAIT(&(early_data_handle->req_cond), &(early_data_handle->req_mutex));
+				STARPU_PTHREAD_MUTEX_UNLOCK(&(early_data_handle->req_mutex));
 				STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 				STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 
 
 				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->mpi_tag);
 				_STARPU_MPI_DEBUG(3, "The RECV request %p with tag %d has already been received, copying previously received data into handle's pointer..\n", req, req->mpi_tag);
-				STARPU_ASSERT(req->data_handle != chandle->handle);
+				STARPU_ASSERT(req->data_handle != early_data_handle->handle);
 
 
-				req->internal_req = chandle->req;
+				req->internal_req = early_data_handle->req;
 
 
-				struct _starpu_mpi_copy_cb_args *cb_args = malloc(sizeof(struct _starpu_mpi_copy_cb_args));
+				struct _starpu_mpi_early_data_cb_args *cb_args = malloc(sizeof(struct _starpu_mpi_early_data_cb_args));
 				cb_args->data_handle = req->data_handle;
 				cb_args->data_handle = req->data_handle;
-				cb_args->copy_handle = chandle->handle;
-				cb_args->buffer = chandle->buffer;
+				cb_args->early_handle = early_data_handle->handle;
+				cb_args->buffer = early_data_handle->buffer;
 				cb_args->req = req;
 				cb_args->req = req;
 
 
 				_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
 				_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
-				starpu_data_acquire_cb(chandle->handle,STARPU_R,_starpu_mpi_copy_cb,(void*) cb_args);
+				starpu_data_acquire_cb(early_data_handle->handle,STARPU_R,_starpu_mpi_early_data_cb,(void*) cb_args);
 			}
 			}
 			/* Case : a classic receive request with no send received earlier than expected.
 			/* Case : a classic receive request with no send received earlier than expected.
 			 * We just add the pending receive request to the requests' hashmap. */
 			 * We just add the pending receive request to the requests' hashmap. */
 			else
 			else
 			{
 			{
 				_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %d) into the request hashmap\n", req, req->srcdst, req->mpi_tag);
 				_STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %d) into the request hashmap\n", req, req->srcdst, req->mpi_tag);
-				add_app_req(req);
+				_starpu_mpi_early_request_add(req);
 			}
 			}
 		}
 		}
 	}
 	}
@@ -1252,14 +1065,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	_starpu_mpi_comm_amounts_init(MPI_COMM_WORLD);
 	_starpu_mpi_comm_amounts_init(MPI_COMM_WORLD);
 	_starpu_mpi_cache_init(MPI_COMM_WORLD);
 	_starpu_mpi_cache_init(MPI_COMM_WORLD);
 
 
-	{
-		int nb_nodes, k;
-		MPI_Comm_size(MPI_COMM_WORLD, &nb_nodes);
-		_starpu_mpi_app_req_hashmap = malloc(nb_nodes * sizeof(struct _starpu_mpi_req *));
-		for(k=0 ; k<nb_nodes ; k++) _starpu_mpi_app_req_hashmap[k] = NULL;
-		_starpu_mpi_copy_handle_hashmap = malloc(nb_nodes * sizeof(struct _starpu_mpi_copy_handle_hash_list *));
-		for(k=0 ; k<nb_nodes ; k++) _starpu_mpi_copy_handle_hashmap[k] = NULL;
-	}
+	_starpu_mpi_early_request_init(worldsize);
+	_starpu_mpi_early_data_init(worldsize);
 
 
 	/* notify the main thread that the progression thread is ready */
 	/* notify the main thread that the progression thread is ready */
 	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
@@ -1276,7 +1083,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	while (running || posted_requests || !(_starpu_mpi_req_list_empty(new_requests)) || !(_starpu_mpi_req_list_empty(detached_requests)))
 	while (running || posted_requests || !(_starpu_mpi_req_list_empty(new_requests)) || !(_starpu_mpi_req_list_empty(detached_requests)))
 	{
 	{
 		/* shall we block ? */
 		/* shall we block ? */
-		unsigned block = _starpu_mpi_req_list_empty(new_requests) && (_starpu_mpi_app_req_hashmap_count == 0);
+		unsigned block = _starpu_mpi_req_list_empty(new_requests) && _starpu_mpi_early_request_count() == 0;
 
 
 #ifndef STARPU_MPI_ACTIVITY
 #ifndef STARPU_MPI_ACTIVITY
 		STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
 		STARPU_PTHREAD_MUTEX_LOCK(&detached_requests_mutex);
@@ -1316,7 +1123,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		/* If there is no currently submitted header_req submitted to catch envelopes from senders, and there is some pending receive
 		/* If there is no currently submitted header_req submitted to catch envelopes from senders, and there is some pending receive
 		 * requests in our side, we resubmit a header request. */
 		 * requests in our side, we resubmit a header request. */
 		MPI_Request header_req;
 		MPI_Request header_req;
-		if ((_starpu_mpi_app_req_hashmap_count > 0) && (header_req_submitted == 0))// && (HASH_COUNT(_starpu_mpi_copy_handle_hashmap) == 0))
+		if ((_starpu_mpi_early_request_count() > 0) && (header_req_submitted == 0))// && (HASH_COUNT(_starpu_mpi_early_data_handle_hashmap) == 0))
 		{
 		{
 			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop\n");
 			_STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop\n");
 			MPI_Irecv(recv_env, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _starpu_mpi_tag, MPI_COMM_WORLD, &header_req);
 			MPI_Irecv(recv_env, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _starpu_mpi_tag, MPI_COMM_WORLD, &header_req);
@@ -1342,14 +1149,14 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 			{
 			{
 				_STARPU_MPI_DEBUG(3, "Searching for application request with tag %d and source %d (size %ld)\n", recv_env->mpi_tag, status.MPI_SOURCE, recv_env->size);
 				_STARPU_MPI_DEBUG(3, "Searching for application request with tag %d and source %d (size %ld)\n", recv_env->mpi_tag, status.MPI_SOURCE, recv_env->size);
 
 
-				struct _starpu_mpi_req *found_req = find_app_req(recv_env->mpi_tag, status.MPI_SOURCE);
+				struct _starpu_mpi_req *found_req = _starpu_mpi_early_request_find(recv_env->mpi_tag, status.MPI_SOURCE);
 
 
 				/* Case : a data will arrive before the matching receive has been submitted in our side of the application.
 				/* Case : a data will arrive before the matching receive has been submitted in our side of the application.
 				 * We will allow a temporary handle to store the incoming data, by submitting a starpu_mpi_irecv_detached
 				 * We will allow a temporary handle to store the incoming data, by submitting a starpu_mpi_irecv_detached
 				 * on this handle, and register this so as the StarPU-MPI layer can remember it.*/
 				 * on this handle, and register this so as the StarPU-MPI layer can remember it.*/
 				if (!found_req)
 				if (!found_req)
 				{
 				{
-					_STARPU_MPI_DEBUG(3, "Request with tag %d and source %d not found, creating a copy_handle to receive incoming data..\n", recv_env->mpi_tag, status.MPI_SOURCE);
+					_STARPU_MPI_DEBUG(3, "Request with tag %d and source %d not found, creating a early_handle to receive incoming data..\n", recv_env->mpi_tag, status.MPI_SOURCE);
 
 
 					starpu_data_handle_t data_handle = NULL;
 					starpu_data_handle_t data_handle = NULL;
 
 
@@ -1357,19 +1164,19 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					data_handle = _starpu_data_get_data_handle_from_tag(recv_env->mpi_tag);
 					data_handle = _starpu_data_get_data_handle_from_tag(recv_env->mpi_tag);
 					STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 					STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 
 
-					struct _starpu_mpi_copy_handle* chandle = calloc(1, sizeof(struct _starpu_mpi_copy_handle));
-					STARPU_ASSERT(chandle);
-					STARPU_PTHREAD_MUTEX_INIT(&chandle->req_mutex, NULL);
-					STARPU_PTHREAD_COND_INIT(&chandle->req_cond, NULL);
-					chandle->mpi_tag = recv_env->mpi_tag;
-					chandle->env = recv_env;
-					chandle->source = status.MPI_SOURCE;
+					struct _starpu_mpi_early_data_handle* early_data_handle = calloc(1, sizeof(struct _starpu_mpi_early_data_handle));
+					STARPU_ASSERT(early_data_handle);
+					STARPU_PTHREAD_MUTEX_INIT(&early_data_handle->req_mutex, NULL);
+					STARPU_PTHREAD_COND_INIT(&early_data_handle->req_cond, NULL);
+					early_data_handle->mpi_tag = recv_env->mpi_tag;
+					early_data_handle->env = recv_env;
+					early_data_handle->source = status.MPI_SOURCE;
 
 
 					if (data_handle)
 					if (data_handle)
 					{
 					{
-						chandle->buffer = NULL;
-						starpu_data_register_same(&chandle->handle, data_handle);
-						add_chandle(chandle);
+						early_data_handle->buffer = NULL;
+						starpu_data_register_same(&early_data_handle->handle, data_handle);
+						_starpu_mpi_early_data_add(early_data_handle);
 					}
 					}
 					else
 					else
 					{
 					{
@@ -1377,15 +1184,17 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 						 * we are going to receive the data as a raw memory, and give it
 						 * we are going to receive the data as a raw memory, and give it
 						 * to the application when it post a receive for this tag
 						 * to the application when it post a receive for this tag
 						 */
 						 */
-						_STARPU_MPI_DEBUG(20, "Posting a receive for a data of size %d which has not yet been registered\n", (int)chandle->env->size);
-						chandle->buffer = malloc(chandle->env->size);
-						starpu_vector_data_register(&chandle->handle, STARPU_MAIN_RAM, (uintptr_t) chandle->buffer, chandle->env->size, 1);
-						add_chandle(chandle);
+						_STARPU_MPI_DEBUG(20, "Posting a receive for a data of size %d which has not yet been registered\n", (int)early_data_handle->env->size);
+						early_data_handle->buffer = malloc(early_data_handle->env->size);
+						starpu_vector_data_register(&early_data_handle->handle, STARPU_MAIN_RAM, (uintptr_t) early_data_handle->buffer, early_data_handle->env->size, 1);
+						_starpu_mpi_early_data_add(early_data_handle);
 					}
 					}
 
 
-					_STARPU_MPI_DEBUG(20, "Posting internal detached irecv on copy_handle with tag %d from src %d ..\n", chandle->mpi_tag, status.MPI_SOURCE);
+					_STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_handle with tag %d from src %d ..\n", early_data_handle->mpi_tag, status.MPI_SOURCE);
 					STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 					STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-					chandle->req = _starpu_mpi_irecv_common(chandle->handle, status.MPI_SOURCE, chandle->mpi_tag, MPI_COMM_WORLD, 1, NULL, NULL, 1, 1, recv_env->size);
+					early_data_handle->req = _starpu_mpi_irecv_common(early_data_handle->handle, status.MPI_SOURCE,
+											  early_data_handle->mpi_tag, MPI_COMM_WORLD, 1,
+											  NULL, NULL, 1, 1, recv_env->size);
 					STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 					STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 
 
 					// We wait until the request is pushed in the
 					// We wait until the request is pushed in the
@@ -1394,15 +1203,15 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 					// on the request and post the corresponding mpi_irecv,
 					// on the request and post the corresponding mpi_irecv,
 					// otherwise, it may lead to read data as envelop
 					// otherwise, it may lead to read data as envelop
 					STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 					STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
-					STARPU_PTHREAD_MUTEX_LOCK(&(chandle->req->posted_mutex));
-					while (!(chandle->req->posted))
-					     STARPU_PTHREAD_COND_WAIT(&(chandle->req->posted_cond), &(chandle->req->posted_mutex));
-					STARPU_PTHREAD_MUTEX_UNLOCK(&(chandle->req->posted_mutex));
-
-					STARPU_PTHREAD_MUTEX_LOCK(&chandle->req_mutex);
-					chandle->req_ready = 1;
-					STARPU_PTHREAD_COND_BROADCAST(&chandle->req_cond);
-					STARPU_PTHREAD_MUTEX_UNLOCK(&chandle->req_mutex);
+					STARPU_PTHREAD_MUTEX_LOCK(&(early_data_handle->req->posted_mutex));
+					while (!(early_data_handle->req->posted))
+					     STARPU_PTHREAD_COND_WAIT(&(early_data_handle->req->posted_cond), &(early_data_handle->req->posted_mutex));
+					STARPU_PTHREAD_MUTEX_UNLOCK(&(early_data_handle->req->posted_mutex));
+
+					STARPU_PTHREAD_MUTEX_LOCK(&early_data_handle->req_mutex);
+					early_data_handle->req_ready = 1;
+					STARPU_PTHREAD_COND_BROADCAST(&early_data_handle->req_cond);
+					STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_handle->req_mutex);
 					STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 					STARPU_PTHREAD_MUTEX_LOCK(&mutex);
 				}
 				}
 				/* Case : a matching receive has been found for the incoming data, we handle the correct allocation of the pointer associated to
 				/* Case : a matching receive has been found for the incoming data, we handle the correct allocation of the pointer associated to
@@ -1411,7 +1220,7 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 				{
 				{
 					_STARPU_MPI_DEBUG(3, "A matching receive has been found for the incoming data with tag %d\n", recv_env->mpi_tag);
 					_STARPU_MPI_DEBUG(3, "A matching receive has been found for the incoming data with tag %d\n", recv_env->mpi_tag);
 
 
-					delete_app_req(found_req);
+					_starpu_mpi_early_request_delete(found_req);
 
 
 					_starpu_mpi_handle_allocate_datatype(found_req->data_handle, &found_req->datatype, &found_req->user_datatype);
 					_starpu_mpi_handle_allocate_datatype(found_req->data_handle, &found_req->datatype, &found_req->user_datatype);
 					if (found_req->user_datatype == 0)
 					if (found_req->user_datatype == 0)
@@ -1448,8 +1257,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(detached_requests), "List of detached requests not empty");
 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(detached_requests), "List of detached requests not empty");
 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(new_requests), "List of new requests not empty");
 	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(new_requests), "List of new requests not empty");
 	STARPU_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero");
 	STARPU_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero");
-	STARPU_ASSERT_MSG(_starpu_mpi_app_req_hashmap_count == 0, "Number of receive requests left is not zero");
-	STARPU_ASSERT_MSG(_starpu_mpi_copy_handle_hashmap_count == 0, "Number of copy requests left is not zero");
+	_starpu_mpi_early_request_check_termination();
+	_starpu_mpi_early_data_check_termination();
 
 
 	if (argc_argv->initialize_mpi)
 	if (argc_argv->initialize_mpi)
 	{
 	{
@@ -1459,27 +1268,8 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 
 
 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
 
 
-	{
-		int n;
-		struct _starpu_mpi_copy_handle_hashlist *hashlist;
-
-		for(n=0 ; n<worldsize; n++)
-		{
-			for(hashlist=_starpu_mpi_copy_handle_hashmap[n]; hashlist != NULL; hashlist=hashlist->hh.next)
-			{
-				_starpu_mpi_copy_handle_list_delete(hashlist->list);
-			}
-			struct _starpu_mpi_copy_handle_hashlist *current, *tmp;
-			HASH_ITER(hh, _starpu_mpi_copy_handle_hashmap[n], current, tmp)
-			{
-				HASH_DEL(_starpu_mpi_copy_handle_hashmap[n], current);
-				free(current);
-			}
-		}
-	}
-
-	free(_starpu_mpi_app_req_hashmap);
-	free(_starpu_mpi_copy_handle_hashmap);
+	_starpu_mpi_early_data_free(worldsize);
+	_starpu_mpi_early_request_free();
 	free(argc_argv);
 	free(argc_argv);
 	free(recv_env);
 	free(recv_env);
 
 

+ 54 - 5
mpi/src/starpu_mpi_cache.c

@@ -30,6 +30,8 @@ struct _starpu_data_entry
 	starpu_data_handle_t data;
 	starpu_data_handle_t data;
 };
 };
 
 
+static starpu_pthread_mutex_t *_cache_sent_mutex;
+static starpu_pthread_mutex_t *_cache_received_mutex;
 static struct _starpu_data_entry **_cache_sent_data = NULL;
 static struct _starpu_data_entry **_cache_sent_data = NULL;
 static struct _starpu_data_entry **_cache_received_data = NULL;
 static struct _starpu_data_entry **_cache_received_data = NULL;
 int _cache_enabled=1;
 int _cache_enabled=1;
@@ -53,11 +55,19 @@ void _starpu_mpi_cache_init(MPI_Comm comm)
 
 
 	MPI_Comm_size(comm, &nb_nodes);
 	MPI_Comm_size(comm, &nb_nodes);
 	_STARPU_MPI_DEBUG(2, "Initialising htable for cache\n");
 	_STARPU_MPI_DEBUG(2, "Initialising htable for cache\n");
+
 	_cache_sent_data = malloc(nb_nodes * sizeof(struct _starpu_data_entry *));
 	_cache_sent_data = malloc(nb_nodes * sizeof(struct _starpu_data_entry *));
-	for(i=0 ; i<nb_nodes ; i++) _cache_sent_data[i] = NULL;
 	_cache_received_data = malloc(nb_nodes * sizeof(struct _starpu_data_entry *));
 	_cache_received_data = malloc(nb_nodes * sizeof(struct _starpu_data_entry *));
-	for(i=0 ; i<nb_nodes ; i++) _cache_received_data[i] = NULL;
-	_starpu_mpi_cache_stats_init(comm);
+	_cache_sent_mutex = malloc(nb_nodes * sizeof(starpu_pthread_mutex_t));
+	_cache_received_mutex = malloc(nb_nodes * sizeof(starpu_pthread_mutex_t));
+
+	for(i=0 ; i<nb_nodes ; i++)
+	{
+		_cache_sent_data[i] = NULL;
+		_cache_received_data[i] = NULL;
+		STARPU_PTHREAD_MUTEX_INIT(&_cache_sent_mutex[i], NULL);
+		STARPU_PTHREAD_MUTEX_INIT(&_cache_received_mutex[i], NULL);
+	}
 }
 }
 
 
 static
 static
@@ -72,27 +82,44 @@ void _starpu_mpi_cache_empty_tables(int world_size)
 	for(i=0 ; i<world_size ; i++)
 	for(i=0 ; i<world_size ; i++)
 	{
 	{
 		struct _starpu_data_entry *entry, *tmp;
 		struct _starpu_data_entry *entry, *tmp;
+
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_sent_mutex[i]);
 		HASH_ITER(hh, _cache_sent_data[i], entry, tmp)
 		HASH_ITER(hh, _cache_sent_data[i], entry, tmp)
 		{
 		{
 			HASH_DEL(_cache_sent_data[i], entry);
 			HASH_DEL(_cache_sent_data[i], entry);
 			free(entry);
 			free(entry);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_sent_mutex[i]);
+
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_received_mutex[i]);
 		HASH_ITER(hh, _cache_received_data[i], entry, tmp)
 		HASH_ITER(hh, _cache_received_data[i], entry, tmp)
 		{
 		{
 			HASH_DEL(_cache_received_data[i], entry);
 			HASH_DEL(_cache_received_data[i], entry);
 			_starpu_mpi_cache_stats_dec(-1, i, entry->data);
 			_starpu_mpi_cache_stats_dec(-1, i, entry->data);
 			free(entry);
 			free(entry);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_received_mutex[i]);
 	}
 	}
 }
 }
 
 
 void _starpu_mpi_cache_free(int world_size)
 void _starpu_mpi_cache_free(int world_size)
 {
 {
+	int i;
+
 	if (_cache_enabled == 0) return;
 	if (_cache_enabled == 0) return;
 
 
 	_starpu_mpi_cache_empty_tables(world_size);
 	_starpu_mpi_cache_empty_tables(world_size);
 	free(_cache_sent_data);
 	free(_cache_sent_data);
 	free(_cache_received_data);
 	free(_cache_received_data);
+
+	for(i=0 ; i<world_size ; i++)
+	{
+		STARPU_PTHREAD_MUTEX_DESTROY(&_cache_sent_mutex[i]);
+		STARPU_PTHREAD_MUTEX_DESTROY(&_cache_received_mutex[i]);
+	}
+	free(_cache_sent_mutex);
+	free(_cache_received_mutex);
+
 	_starpu_mpi_cache_stats_free();
 	_starpu_mpi_cache_stats_free();
 }
 }
 
 
@@ -104,6 +131,8 @@ void _starpu_mpi_cache_flush_sent(MPI_Comm comm, starpu_data_handle_t data)
 	for(n=0 ; n<size ; n++)
 	for(n=0 ; n<size ; n++)
 	{
 	{
 		struct _starpu_data_entry *already_sent;
 		struct _starpu_data_entry *already_sent;
+
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_sent_mutex[n]);
 		HASH_FIND_PTR(_cache_sent_data[n], &data, already_sent);
 		HASH_FIND_PTR(_cache_sent_data[n], &data, already_sent);
 		if (already_sent)
 		if (already_sent)
 		{
 		{
@@ -111,6 +140,7 @@ void _starpu_mpi_cache_flush_sent(MPI_Comm comm, starpu_data_handle_t data)
 			HASH_DEL(_cache_sent_data[n], already_sent);
 			HASH_DEL(_cache_sent_data[n], already_sent);
 			free(already_sent);
 			free(already_sent);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_sent_mutex[n]);
 	}
 	}
 }
 }
 
 
@@ -119,6 +149,7 @@ void _starpu_mpi_cache_flush_recv(starpu_data_handle_t data, int me)
 	int mpi_rank = starpu_data_get_rank(data);
 	int mpi_rank = starpu_data_get_rank(data);
 	struct _starpu_data_entry *already_received;
 	struct _starpu_data_entry *already_received;
 
 
+	STARPU_PTHREAD_MUTEX_LOCK(&_cache_received_mutex[mpi_rank]);
 	HASH_FIND_PTR(_cache_received_data[mpi_rank], &data, already_received);
 	HASH_FIND_PTR(_cache_received_data[mpi_rank], &data, already_received);
 	if (already_received)
 	if (already_received)
 	{
 	{
@@ -131,6 +162,7 @@ void _starpu_mpi_cache_flush_recv(starpu_data_handle_t data, int me)
 		free(already_received);
 		free(already_received);
 		starpu_data_invalidate_submit(data);
 		starpu_data_invalidate_submit(data);
 	}
 	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_received_mutex[mpi_rank]);
 }
 }
 
 
 void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
 void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
@@ -146,6 +178,8 @@ void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
 	for(i=0 ; i<nb_nodes ; i++)
 	for(i=0 ; i<nb_nodes ; i++)
 	{
 	{
 		struct _starpu_data_entry *entry, *tmp;
 		struct _starpu_data_entry *entry, *tmp;
+
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_sent_mutex[i]);
 		HASH_ITER(hh, _cache_sent_data[i], entry, tmp)
 		HASH_ITER(hh, _cache_sent_data[i], entry, tmp)
 		{
 		{
 			mpi_rank = starpu_data_get_rank(entry->data);
 			mpi_rank = starpu_data_get_rank(entry->data);
@@ -154,6 +188,9 @@ void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
 			HASH_DEL(_cache_sent_data[i], entry);
 			HASH_DEL(_cache_sent_data[i], entry);
 			free(entry);
 			free(entry);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_sent_mutex[i]);
+
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_received_mutex[i]);
 		HASH_ITER(hh, _cache_received_data[i], entry, tmp)
 		HASH_ITER(hh, _cache_received_data[i], entry, tmp)
 		{
 		{
 			mpi_rank = starpu_data_get_rank(entry->data);
 			mpi_rank = starpu_data_get_rank(entry->data);
@@ -163,6 +200,7 @@ void starpu_mpi_cache_flush_all_data(MPI_Comm comm)
 			_starpu_mpi_cache_stats_dec(my_rank, i, entry->data);
 			_starpu_mpi_cache_stats_dec(my_rank, i, entry->data);
 			free(entry);
 			free(entry);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_received_mutex[i]);
 	}
 	}
 }
 }
 
 
@@ -180,6 +218,7 @@ void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 
 
 	for(i=0 ; i<nb_nodes ; i++)
 	for(i=0 ; i<nb_nodes ; i++)
 	{
 	{
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_sent_mutex[i]);
 		HASH_FIND_PTR(_cache_sent_data[i], &data_handle, avail);
 		HASH_FIND_PTR(_cache_sent_data[i], &data_handle, avail);
 		if (avail)
 		if (avail)
 		{
 		{
@@ -187,6 +226,9 @@ void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 			HASH_DEL(_cache_sent_data[i], avail);
 			HASH_DEL(_cache_sent_data[i], avail);
 			free(avail);
 			free(avail);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_sent_mutex[i]);
+
+		STARPU_PTHREAD_MUTEX_LOCK(&_cache_received_mutex[i]);
 		HASH_FIND_PTR(_cache_received_data[i], &data_handle, avail);
 		HASH_FIND_PTR(_cache_received_data[i], &data_handle, avail);
 		if (avail)
 		if (avail)
 		{
 		{
@@ -195,6 +237,7 @@ void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 			_starpu_mpi_cache_stats_dec(my_rank, i, data_handle);
 			_starpu_mpi_cache_stats_dec(my_rank, i, data_handle);
 			free(avail);
 			free(avail);
 		}
 		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_received_mutex[i]);
 	}
 	}
 
 
 	if (mpi_rank != my_rank && mpi_rank != -1)
 	if (mpi_rank != my_rank && mpi_rank != -1)
@@ -203,9 +246,11 @@ void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)
 
 
 void *_starpu_mpi_already_received(int src, starpu_data_handle_t data, int mpi_rank)
 void *_starpu_mpi_already_received(int src, starpu_data_handle_t data, int mpi_rank)
 {
 {
+	struct _starpu_data_entry *already_received;
+
 	if (_cache_enabled == 0) return NULL;
 	if (_cache_enabled == 0) return NULL;
 
 
-	struct _starpu_data_entry *already_received;
+	STARPU_PTHREAD_MUTEX_LOCK(&_cache_received_mutex[mpi_rank]);
 	HASH_FIND_PTR(_cache_received_data[mpi_rank], &data, already_received);
 	HASH_FIND_PTR(_cache_received_data[mpi_rank], &data, already_received);
 	if (already_received == NULL)
 	if (already_received == NULL)
 	{
 	{
@@ -218,14 +263,17 @@ void *_starpu_mpi_already_received(int src, starpu_data_handle_t data, int mpi_r
 	{
 	{
 		_STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data, mpi_rank);
 		_STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data, mpi_rank);
 	}
 	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_received_mutex[mpi_rank]);
 	return already_received;
 	return already_received;
 }
 }
 
 
 void *_starpu_mpi_already_sent(starpu_data_handle_t data, int dest)
 void *_starpu_mpi_already_sent(starpu_data_handle_t data, int dest)
 {
 {
+	struct _starpu_data_entry *already_sent;
+
 	if (_cache_enabled == 0) return NULL;
 	if (_cache_enabled == 0) return NULL;
 
 
-	struct _starpu_data_entry *already_sent;
+	STARPU_PTHREAD_MUTEX_LOCK(&_cache_sent_mutex[dest]);
 	HASH_FIND_PTR(_cache_sent_data[dest], &data, already_sent);
 	HASH_FIND_PTR(_cache_sent_data[dest], &data, already_sent);
 	if (already_sent == NULL)
 	if (already_sent == NULL)
 	{
 	{
@@ -238,6 +286,7 @@ void *_starpu_mpi_already_sent(starpu_data_handle_t data, int dest)
 	{
 	{
 		_STARPU_MPI_DEBUG(2, "Do not send data %p to node %d as it has already been sent\n", data, dest);
 		_STARPU_MPI_DEBUG(2, "Do not send data %p to node %d as it has already been sent\n", data, dest);
 	}
 	}
+	STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_sent_mutex[dest]);
 	return already_sent;
 	return already_sent;
 }
 }
 
 

+ 168 - 0
mpi/src/starpu_mpi_early_data.c

@@ -0,0 +1,168 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <starpu_mpi.h>
+#include <starpu_mpi_early_data.h>
+#include <starpu_mpi_private.h>
+#include <common/uthash.h>
+
+struct _starpu_mpi_early_data_handle_hashlist
+{
+	struct _starpu_mpi_early_data_handle_list *list;
+	UT_hash_handle hh;
+	int mpi_tag;
+};
+
+/** stores data which have been received by MPI but have not been requested by the application */
+static struct _starpu_mpi_early_data_handle_hashlist **_starpu_mpi_early_data_handle_hashmap = NULL;
+static int _starpu_mpi_early_data_handle_hashmap_count = 0;
+
+void _starpu_mpi_early_data_init(int world_size)
+{
+	int k;
+
+	_starpu_mpi_early_data_handle_hashmap = malloc(world_size * sizeof(struct _starpu_mpi_early_data_handle_hash_list *));
+	for(k=0 ; k<world_size ; k++) _starpu_mpi_early_data_handle_hashmap[k] = NULL;
+}
+
+void _starpu_mpi_early_data_check_termination()
+{
+	STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of copy requests left is not zero");
+}
+
+void _starpu_mpi_early_data_free(int world_size)
+{
+	int n;
+	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
+
+	for(n=0 ; n<world_size; n++)
+	{
+		for(hashlist=_starpu_mpi_early_data_handle_hashmap[n]; hashlist != NULL; hashlist=hashlist->hh.next)
+		{
+			_starpu_mpi_early_data_handle_list_delete(hashlist->list);
+		}
+		struct _starpu_mpi_early_data_handle_hashlist *current, *tmp;
+		HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap[n], current, tmp)
+		{
+			HASH_DEL(_starpu_mpi_early_data_handle_hashmap[n], current);
+			free(current);
+		}
+	}
+	free(_starpu_mpi_early_data_handle_hashmap);
+}
+
+#ifdef STARPU_VERBOSE
+static void _starpu_mpi_early_data_handle_display_hash(int source, int tag)
+{
+	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
+	HASH_FIND_INT(_starpu_mpi_early_data_handle_hashmap[source], &tag, hashlist);
+
+	if (hashlist == NULL)
+	{
+		_STARPU_MPI_DEBUG(60, "Hashlist for source %d and tag %d does not exist\n", source, tag);
+	}
+	else if (_starpu_mpi_early_data_handle_list_empty(hashlist->list))
+	{
+		_STARPU_MPI_DEBUG(60, "Hashlist for source %d and tag %d is empty\n", source, tag);
+	}
+	else
+	{
+		struct _starpu_mpi_early_data_handle *cur;
+		for (cur = _starpu_mpi_early_data_handle_list_begin(hashlist->list) ;
+		     cur != _starpu_mpi_early_data_handle_list_end(hashlist->list);
+		     cur = _starpu_mpi_early_data_handle_list_next(cur))
+		{
+			_STARPU_MPI_DEBUG(60, "Element for source %d and tag %d: %p\n", source, tag, cur);
+		}
+	}
+}
+#endif
+
+static
+struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_pop(int mpi_tag, int source, int delete)
+{
+	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
+	struct _starpu_mpi_early_data_handle *early_data_handle;
+
+	_STARPU_MPI_DEBUG(60, "Looking for early_data_handle with tag %d in the hashmap[%d]\n", mpi_tag, source);
+	HASH_FIND_INT(_starpu_mpi_early_data_handle_hashmap[source], &mpi_tag, hashlist);
+	if (hashlist == NULL)
+	{
+		early_data_handle = NULL;
+	}
+	else
+	{
+		if (_starpu_mpi_early_data_handle_list_empty(hashlist->list))
+		{
+			early_data_handle = NULL;
+		}
+		else
+		{
+			if (delete == 1)
+			{
+				early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(hashlist->list);
+			}
+			else
+			{
+				early_data_handle = _starpu_mpi_early_data_handle_list_front(hashlist->list);
+			}
+		}
+	}
+	_STARPU_MPI_DEBUG(60, "Found early_data_handle %p with tag %d in the hashmap[%d]\n", early_data_handle, mpi_tag, source);
+	return early_data_handle;
+}
+
+struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(int mpi_tag, int source)
+{
+	return _starpu_mpi_early_data_pop(mpi_tag, source, 0);
+}
+
+void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle)
+{
+	_STARPU_MPI_DEBUG(60, "Trying to add early_data_handle %p with tag %d in the hashmap[%d]\n", early_data_handle, early_data_handle->mpi_tag, early_data_handle->source);
+
+	struct _starpu_mpi_early_data_handle_hashlist *hashlist;
+	HASH_FIND_INT(_starpu_mpi_early_data_handle_hashmap[early_data_handle->source], &early_data_handle->mpi_tag, hashlist);
+	if (hashlist == NULL)
+	{
+		hashlist = malloc(sizeof(struct _starpu_mpi_early_data_handle_hashlist));
+		hashlist->list = _starpu_mpi_early_data_handle_list_new();
+		hashlist->mpi_tag = early_data_handle->mpi_tag;
+		HASH_ADD_INT(_starpu_mpi_early_data_handle_hashmap[early_data_handle->source], mpi_tag, hashlist);
+	}
+	_starpu_mpi_early_data_handle_list_push_back(hashlist->list, early_data_handle);
+	_starpu_mpi_early_data_handle_hashmap_count ++;
+#ifdef STARPU_VERBOSE
+	_starpu_mpi_early_data_handle_display_hash(early_data_handle->source, early_data_handle->mpi_tag);
+#endif
+}
+
+void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle)
+{
+	_STARPU_MPI_DEBUG(60, "Trying to delete early_data_handle %p with tag %d in the hashmap[%d]\n", early_data_handle, early_data_handle->mpi_tag, early_data_handle->source);
+	struct _starpu_mpi_early_data_handle *found = _starpu_mpi_early_data_pop(early_data_handle->mpi_tag, early_data_handle->source, 1);
+
+	STARPU_ASSERT_MSG(found == early_data_handle,
+			  "[_starpu_mpi_early_data_delete][error] early_data_handle %p with tag %d is NOT in the hashmap[%d]\n", early_data_handle, early_data_handle->mpi_tag, early_data_handle->source);
+
+	_starpu_mpi_early_data_handle_hashmap_count --;
+#ifdef STARPU_VERBOSE
+	_starpu_mpi_early_data_handle_display_hash(early_data_handle->source, early_data_handle->mpi_tag);
+#endif
+}
+

+ 55 - 0
mpi/src/starpu_mpi_early_data.h

@@ -0,0 +1,55 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_MPI_EARLY_DATA_H__
+#define __STARPU_MPI_EARLY_DATA_H__
+
+#include <starpu.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include <common/config.h>
+#include <common/list.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+LIST_TYPE(_starpu_mpi_early_data_handle,
+	  starpu_data_handle_t handle;
+	  struct _starpu_mpi_envelope *env;
+	  struct _starpu_mpi_req *req;
+	  void *buffer;
+	  int mpi_tag;
+	  int source;
+	  int req_ready;
+	  starpu_pthread_mutex_t req_mutex;
+	  starpu_pthread_cond_t req_cond;
+);
+
+void _starpu_mpi_early_data_init(int world_size);
+void _starpu_mpi_early_data_check_termination();
+void _starpu_mpi_early_data_free(int world_size);
+
+struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(int mpi_tag, int source);
+void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle);
+void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __STARPU_MPI_EARLY_DATA_H__ */

+ 104 - 0
mpi/src/starpu_mpi_early_request.c

@@ -0,0 +1,104 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdlib.h>
+#include <starpu_mpi.h>
+#include <starpu_mpi_private.h>
+#include <starpu_mpi_early_request.h>
+#include <common/uthash.h>
+
+/** stores application requests for which data have not been received yet */
+static struct _starpu_mpi_req **_starpu_mpi_app_req_hashmap = NULL;
+static int _starpu_mpi_app_req_hashmap_count = 0;
+
+void _starpu_mpi_early_request_init(int world_size)
+{
+	int k;
+
+	_starpu_mpi_app_req_hashmap = malloc(world_size * sizeof(struct _starpu_mpi_req *));
+	for(k=0 ; k<world_size ; k++) _starpu_mpi_app_req_hashmap[k] = NULL;
+}
+
+void _starpu_mpi_early_request_free()
+{
+	free(_starpu_mpi_app_req_hashmap);
+}
+
+int _starpu_mpi_early_request_count()
+{
+	return _starpu_mpi_app_req_hashmap_count;
+}
+
+void _starpu_mpi_early_request_check_termination()
+{
+	STARPU_ASSERT_MSG(_starpu_mpi_early_request_count() == 0, "Number of receive requests left is not zero");
+}
+
+struct _starpu_mpi_req* _starpu_mpi_early_request_find(int mpi_tag, int source)
+{
+	struct _starpu_mpi_req* req;
+
+	HASH_FIND_INT(_starpu_mpi_app_req_hashmap[source], &mpi_tag, req);
+
+	return req;
+}
+
+void _starpu_mpi_early_request_add(struct _starpu_mpi_req *req)
+{
+	struct _starpu_mpi_req *test_req;
+
+	test_req = _starpu_mpi_early_request_find(req->mpi_tag, req->srcdst);
+
+	if (test_req == NULL)
+	{
+		HASH_ADD_INT(_starpu_mpi_app_req_hashmap[req->srcdst], mpi_tag, req);
+		_starpu_mpi_app_req_hashmap_count ++;
+		_STARPU_MPI_DEBUG(3, "Adding request %p with tag %d in the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
+	}
+	else
+	{
+		_STARPU_MPI_DEBUG(3, "[Error] request %p with tag %d already in the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
+		int seq_const = starpu_data_get_sequential_consistency_flag(req->data_handle);
+		if (seq_const &&  req->sequential_consistency)
+		{
+			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap[%d], while another request %p with the same tag is already in it. \n Sequential consistency is activated : this is not supported by StarPU.", req, req->mpi_tag, req->srcdst, test_req);
+		}
+		else
+		{
+			STARPU_ASSERT_MSG(!test_req, "[Error] request %p with tag %d wanted to be added to the application request hashmap[%d], while another request %p with the same tag is already in it. \n Sequential consistency isn't activated for this handle : you should want to add dependencies between requests for which the sequential consistency is deactivated.", req, req->mpi_tag, req->srcdst, test_req);
+		}
+	}
+}
+
+void _starpu_mpi_early_request_delete(struct _starpu_mpi_req *req)
+{
+	struct _starpu_mpi_req *test_req;
+
+	test_req = _starpu_mpi_early_request_find(req->mpi_tag, req->srcdst);
+
+	if (test_req != NULL)
+	{
+		HASH_DEL(_starpu_mpi_app_req_hashmap[req->srcdst], req);
+		_starpu_mpi_app_req_hashmap_count --;
+		_STARPU_MPI_DEBUG(3, "Deleting application request %p with tag %d from the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
+	}
+	else
+	{
+		_STARPU_MPI_DEBUG(3, "[Warning] request %p with tag %d is NOT in the application request hashmap[%d]\n", req, req->mpi_tag, req->srcdst);
+	}
+}
+

+ 44 - 0
mpi/src/starpu_mpi_early_request.h

@@ -0,0 +1,44 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010-2014  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_MPI_EARLY_REQUEST_H__
+#define __STARPU_MPI_EARLY_REQUEST_H__
+
+#include <starpu.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include <common/config.h>
+#include <common/list.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void _starpu_mpi_early_request_init(int world_size);
+void _starpu_mpi_early_request_free();
+int _starpu_mpi_early_request_count();
+void _starpu_mpi_early_request_check_termination();
+
+void _starpu_mpi_early_request_add(struct _starpu_mpi_req *req);
+struct _starpu_mpi_req* _starpu_mpi_early_request_find(int mpi_tag, int source);
+void _starpu_mpi_early_request_delete(struct _starpu_mpi_req *req);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __STARPU_MPI_EARLY_REQUEST_H__ */

+ 26 - 17
mpi/tests/mpi_earlyrecv.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,9 +21,9 @@
 
 
 int main(int argc, char **argv)
 int main(int argc, char **argv)
 {
 {
-	int ret, rank, size, i, nb_requests;
-	starpu_data_handle_t tab_handle[3];
-	starpu_mpi_req request[3];
+	int ret, rank, size, i;
+	starpu_data_handle_t tab_handle[4];
+	starpu_mpi_req request[2] = {NULL, NULL};
 
 
 	MPI_Init(NULL, NULL);
 	MPI_Init(NULL, NULL);
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -43,11 +43,14 @@ int main(int argc, char **argv)
 	ret = starpu_mpi_init(NULL, NULL, 0);
 	ret = starpu_mpi_init(NULL, NULL, 0);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init");
 
 
-	for(i=0 ; i<3 ; i++)
+	for(i=0 ; i<4 ; i++)
 	{
 	{
-		starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
-		starpu_data_set_tag(tab_handle[i], i);
-		request[i] = NULL;
+		if (i<3 || rank%2)
+		{
+			// all data are registered on all nodes, bu the 4th data which is not registered on the receiving node
+			starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
+			starpu_mpi_data_register(tab_handle[i], i, rank);
+		}
 	}
 	}
 
 
 	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
 	int other_rank = rank%2 == 0 ? rank+1 : rank-1;
@@ -56,23 +59,30 @@ int main(int argc, char **argv)
 
 
 	if (rank%2)
 	if (rank%2)
 	{
 	{
+		// this data will be received as an early registered data
 		starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
 		starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
+		// this data will be received as an early UNregistered data
+		starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD);
+
+		starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD);
 		starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
 		starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-		starpu_mpi_isend(tab_handle[1], &request[1], other_rank, 1, MPI_COMM_WORLD);
-		nb_requests = 2;
 	}
 	}
 	else
 	else
 	{
 	{
+		starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+		starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD);
+
+		// we register the data
+		starpu_variable_data_register(&tab_handle[3], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int));
+		starpu_mpi_data_register(tab_handle[3], 3, rank);
+		starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD);
 		starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
 		starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD);
-		starpu_mpi_irecv(tab_handle[1], &request[1], other_rank, 1, MPI_COMM_WORLD);
-		starpu_mpi_isend(tab_handle[2], &request[2], other_rank, 2, MPI_COMM_WORLD);
-		nb_requests = 3;
 	}
 	}
 
 
 	int finished=0;
 	int finished=0;
 	while (!finished)
 	while (!finished)
 	{
 	{
-		for(i=0 ; i<nb_requests ; i++)
+		for(i=0 ; i<2 ; i++)
 		{
 		{
 			if (request[i])
 			if (request[i])
 			{
 			{
@@ -83,11 +93,10 @@ int main(int argc, char **argv)
 					FPRINTF_MPI("request[%d] = %d %p\n", i, flag, request[i]);
 					FPRINTF_MPI("request[%d] = %d %p\n", i, flag, request[i]);
 			}
 			}
 		}
 		}
-		finished = request[0] == NULL;
-		for(i=1 ; i<nb_requests ; i++) finished = finished && request[i] == NULL;
+		finished = request[0] == NULL && request[1] == NULL;
 	}
 	}
 
 
-	for(i=0 ; i<3 ; i++)
+	for(i=0 ; i<4 ; i++)
 		starpu_data_unregister(tab_handle[i]);
 		starpu_data_unregister(tab_handle[i]);
 
 
 	starpu_mpi_shutdown();
 	starpu_mpi_shutdown();

+ 3 - 3
mpi/tests/mpi_earlyrecv2.c

@@ -131,7 +131,7 @@ int exchange_variable(int rank, int detached)
 	{
 	{
 		value[i]=i*rank;
 		value[i]=i*rank;
 		starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&value[i], sizeof(int));
 		starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&value[i], sizeof(int));
-		starpu_data_set_tag(tab_handle[i], i);
+		starpu_mpi_data_register(tab_handle[i], i, rank);
 	}
 	}
 	ret = exchange(rank, tab_handle, check_variable, detached);
 	ret = exchange(rank, tab_handle, check_variable, detached);
 	for(i=0 ; i<NB ; i++)
 	for(i=0 ; i<NB ; i++)
@@ -154,7 +154,7 @@ int exchange_void(int rank, int detached)
 	for(i=0 ; i<NB ; i++)
 	for(i=0 ; i<NB ; i++)
 	{
 	{
 		starpu_void_data_register(&tab_handle[i]);
 		starpu_void_data_register(&tab_handle[i]);
-		starpu_data_set_tag(tab_handle[i], i);
+		starpu_mpi_data_register(tab_handle[i], i, rank);
 	}
 	}
 	ret = exchange(rank, tab_handle, check_void, detached);
 	ret = exchange(rank, tab_handle, check_void, detached);
 	for(i=0 ; i<NB ; i++)
 	for(i=0 ; i<NB ; i++)
@@ -191,7 +191,7 @@ int exchange_complex(int rank, int detached)
 		real[i] = (i*rank)+12;
 		real[i] = (i*rank)+12;
 		imaginary[i] = (i*rank)+45;
 		imaginary[i] = (i*rank)+45;
 		starpu_complex_data_register(&handle[i], STARPU_MAIN_RAM, &real[i], &imaginary[i], 1);
 		starpu_complex_data_register(&handle[i], STARPU_MAIN_RAM, &real[i], &imaginary[i], 1);
-		starpu_data_set_tag(handle[i], i);
+		starpu_mpi_data_register(handle[i], i, rank);
 	}
 	}
 	ret = exchange(rank, handle, check_complex, detached);
 	ret = exchange(rank, handle, check_complex, detached);
 	for(i=0 ; i<NB ; i++)
 	for(i=0 ; i<NB ; i++)

+ 27 - 1
src/common/thread.c

@@ -288,9 +288,35 @@ int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock)
 
 
 	return p_ret;
 	return p_ret;
 }
 }
+
+#if defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT)
+int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr, unsigned count)
+{
+	*barrier = xbt_barrier_init(count);
+	return 0;
+}
+
+int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier)
+{
+	if (*barrier)
+		xbt_barrier_destroy(*barrier);
+	return 0;
+}
+
+int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier)
+{
+	_STARPU_TRACE_BARRIER_WAIT_BEGIN();
+
+	xbt_barrier_wait(*barrier);
+
+	_STARPU_TRACE_BARRIER_WAIT_END();
+	return 0;
+}
+#endif /* defined(STARPU_SIMGRID) */
+
 #endif /* STARPU_SIMGRID */
 #endif /* STARPU_SIMGRID */
 
 
-#if defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_BARRIER)
+#if (defined(STARPU_SIMGRID) && !defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT)) || !defined(STARPU_HAVE_PTHREAD_BARRIER)
 int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr, unsigned count)
 int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr, unsigned count)
 {
 {
 	int ret = starpu_pthread_mutex_init(&barrier->mutex, NULL);
 	int ret = starpu_pthread_mutex_init(&barrier->mutex, NULL);

+ 4 - 4
src/common/utils.h

@@ -82,10 +82,10 @@
 #  define _STARPU_DEBUG(fmt, ...) do { } while (0)
 #  define _STARPU_DEBUG(fmt, ...) do { } while (0)
 #endif
 #endif
 
 
-#ifdef STARPU_VERBOSE0
-#  define _STARPU_LOG_IN()             do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s] -->\n", pthread_self(), __starpu_func__ ); }} while(0)
-#  define _STARPU_LOG_OUT()            do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s] <--\n", pthread_self(), __starpu_func__ ); }} while(0)
-#  define _STARPU_LOG_OUT_TAG(outtag)  do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s] <-- (%s)\n", pthread_self(), __starpu_func__, outtag); }} while(0)
+#ifdef STARPU_EXTRA_VERBOSE
+#  define _STARPU_LOG_IN()             do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] -->\n", pthread_self(), __starpu_func__,__FILE__,  __LINE__); }} while(0)
+#  define _STARPU_LOG_OUT()            do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] <--\n", pthread_self(), __starpu_func__, __FILE__,  __LINE__); }} while(0)
+#  define _STARPU_LOG_OUT_TAG(outtag)  do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] <-- (%s)\n", pthread_self(), __starpu_func__, __FILE__, __LINE__, outtag); }} while(0)
 #else
 #else
 #  define _STARPU_LOG_IN()
 #  define _STARPU_LOG_IN()
 #  define _STARPU_LOG_OUT()
 #  define _STARPU_LOG_OUT()

+ 26 - 0
src/core/sched_ctx.c

@@ -60,7 +60,11 @@ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker
 	{
 	{
 		struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 		struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 		if(sched_ctx && sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers)
 		if(sched_ctx && sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers)
+		{
+			_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 			sched_ctx->sched_policy->remove_workers(sched_ctx_id, &worker->workerid, 1);
 			sched_ctx->sched_policy->remove_workers(sched_ctx_id, &worker->workerid, 1);
+			_STARPU_TRACE_WORKER_SCHEDULING_POP;
+		}
 		_starpu_sched_ctx_list_remove(&worker->sched_ctx_list, sched_ctx_id);
 		_starpu_sched_ctx_list_remove(&worker->sched_ctx_list, sched_ctx_id);
 		worker->nsched_ctxs--;
 		worker->nsched_ctxs--;
 	}
 	}
@@ -185,6 +189,7 @@ static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx
 	}
 	}
 	else if(sched_ctx->sched_policy->add_workers)
 	else if(sched_ctx->sched_policy->add_workers)
 	{
 	{
+		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		if(added_workers)
 		if(added_workers)
 		{
 		{
 			if(*n_added_workers > 0)
 			if(*n_added_workers > 0)
@@ -192,6 +197,7 @@ static void _starpu_add_workers_to_sched_ctx(struct _starpu_sched_ctx *sched_ctx
 		}
 		}
 		else
 		else
 			sched_ctx->sched_policy->add_workers(sched_ctx->id, workers_to_add, nworkers_to_add);
 			sched_ctx->sched_policy->add_workers(sched_ctx->id, workers_to_add, nworkers_to_add);
+		_STARPU_TRACE_WORKER_SCHEDULING_POP;
 	}
 	}
 	return;
 	return;
 }
 }
@@ -229,7 +235,11 @@ static void _starpu_sched_ctx_free_scheduling_data(struct _starpu_sched_ctx *sch
 	unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids);
 	unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids);
 
 
 	if(nworkers_ctx > 0 && sched_ctx->sched_policy->remove_workers)
 	if(nworkers_ctx > 0 && sched_ctx->sched_policy->remove_workers)
+	{
+		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		sched_ctx->sched_policy->remove_workers(sched_ctx->id, workerids, nworkers_ctx);
 		sched_ctx->sched_policy->remove_workers(sched_ctx->id, workerids, nworkers_ctx);
+		_STARPU_TRACE_WORKER_SCHEDULING_POP;
+	}
 
 
 	free(workerids);
 	free(workerids);
 	return;
 	return;
@@ -1157,6 +1167,22 @@ struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsig
 	return sched_ctx->workers;
 	return sched_ctx->workers;
 }
 }
 
 
+void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f)
+{
+	int *workerids = NULL;
+	unsigned nworkers;
+	unsigned i;
+
+	nworkers = starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids);
+	fprintf(f, "[sched_ctx %d]: %d worker%s\n", sched_ctx_id, nworkers, nworkers>1?"s":"");
+	for (i = 0; i < nworkers; i++)
+	{
+		char name[256];
+		starpu_worker_get_name(workerids[i], name, 256);
+		fprintf(f, "\t\t%s\n", name);
+	}
+}
+
 unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids)
 unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids)
 {
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);

+ 25 - 2
src/core/sched_policy.c

@@ -185,14 +185,20 @@ void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _st
 
 
 	load_sched_policy(selected_policy, sched_ctx);
 	load_sched_policy(selected_policy, sched_ctx);
 
 
+	_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 	sched_ctx->sched_policy->init_sched(sched_ctx->id);
 	sched_ctx->sched_policy->init_sched(sched_ctx->id);
+	_STARPU_TRACE_WORKER_SCHEDULING_POP;
 }
 }
 
 
 void _starpu_deinit_sched_policy(struct _starpu_sched_ctx *sched_ctx)
 void _starpu_deinit_sched_policy(struct _starpu_sched_ctx *sched_ctx)
 {
 {
 	struct starpu_sched_policy *policy = sched_ctx->sched_policy;
 	struct starpu_sched_policy *policy = sched_ctx->sched_policy;
 	if (policy->deinit_sched)
 	if (policy->deinit_sched)
+	{
+		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		policy->deinit_sched(sched_ctx->id);
 		policy->deinit_sched(sched_ctx->id);
+		_STARPU_TRACE_WORKER_SCHEDULING_POP;
+	}
 }
 }
 
 
 static void _starpu_push_task_on_specific_worker_notify_sched(struct starpu_task *task, struct _starpu_worker *worker, int workerid, int perf_workerid)
 static void _starpu_push_task_on_specific_worker_notify_sched(struct starpu_task *task, struct _starpu_worker *worker, int workerid, int perf_workerid)
@@ -204,7 +210,11 @@ static void _starpu_push_task_on_specific_worker_notify_sched(struct starpu_task
         {
         {
 		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
 		sched_ctx = _starpu_get_sched_ctx_struct(l->sched_ctx);
 		if (sched_ctx->sched_policy != NULL && sched_ctx->sched_policy->push_task_notify)
 		if (sched_ctx->sched_policy != NULL && sched_ctx->sched_policy->push_task_notify)
+		{
+			_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 			sched_ctx->sched_policy->push_task_notify(task, workerid, perf_workerid, sched_ctx->id);
 			sched_ctx->sched_policy->push_task_notify(task, workerid, perf_workerid, sched_ctx->id);
+			_STARPU_TRACE_WORKER_SCHEDULING_POP;
+		}
 	}
 	}
 }
 }
 
 
@@ -878,22 +888,31 @@ profiling:
 
 
 struct starpu_task *_starpu_pop_every_task(struct _starpu_sched_ctx *sched_ctx)
 struct starpu_task *_starpu_pop_every_task(struct _starpu_sched_ctx *sched_ctx)
 {
 {
+	struct starpu_task *task = NULL;
 	if(sched_ctx->sched_policy)
 	if(sched_ctx->sched_policy)
 	{
 	{
 		STARPU_ASSERT(sched_ctx->sched_policy->pop_every_task);
 		STARPU_ASSERT(sched_ctx->sched_policy->pop_every_task);
 		
 		
 		/* TODO set profiling info */
 		/* TODO set profiling info */
 		if(sched_ctx->sched_policy->pop_every_task)
 		if(sched_ctx->sched_policy->pop_every_task)
-			return sched_ctx->sched_policy->pop_every_task(sched_ctx->id);
+		{
+			_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
+			task = sched_ctx->sched_policy->pop_every_task(sched_ctx->id);
+			_STARPU_TRACE_WORKER_SCHEDULING_POP;
+		}
 	}
 	}
-	return NULL;
+	return task;
 }
 }
 
 
 void _starpu_sched_pre_exec_hook(struct starpu_task *task)
 void _starpu_sched_pre_exec_hook(struct starpu_task *task)
 {
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 	if (sched_ctx->sched_policy && sched_ctx->sched_policy->pre_exec_hook)
 	if (sched_ctx->sched_policy && sched_ctx->sched_policy->pre_exec_hook)
+	{
+		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		sched_ctx->sched_policy->pre_exec_hook(task);
 		sched_ctx->sched_policy->pre_exec_hook(task);
+		_STARPU_TRACE_WORKER_SCHEDULING_POP;
+	}
 }
 }
 
 
 void _starpu_sched_post_exec_hook(struct starpu_task *task)
 void _starpu_sched_post_exec_hook(struct starpu_task *task)
@@ -901,7 +920,11 @@ void _starpu_sched_post_exec_hook(struct starpu_task *task)
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
 
 
 	if (sched_ctx->sched_policy && sched_ctx->sched_policy->post_exec_hook)
 	if (sched_ctx->sched_policy && sched_ctx->sched_policy->post_exec_hook)
+	{
+		_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
 		sched_ctx->sched_policy->post_exec_hook(task);
 		sched_ctx->sched_policy->post_exec_hook(task);
+		_STARPU_TRACE_WORKER_SCHEDULING_POP;
+	}
 }
 }
 
 
 void _starpu_wait_on_sched_event(void)
 void _starpu_wait_on_sched_event(void)

+ 7 - 0
src/core/simgrid.c

@@ -48,6 +48,12 @@ int do_starpu_main(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBU
 	return starpu_main(args->argc, args->argv);
 	return starpu_main(args->argc, args->argv);
 }
 }
 
 
+#ifdef HAVE_MSG_GET_AS_BY_NAME
+static msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
+{
+	return MSG_get_as_by_name(name);
+}
+#else /* HAVE_MSG_GET_AS_BY_NAME */
 static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name)
 static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name)
 {
 {
 	xbt_dict_t dict;
 	xbt_dict_t dict;
@@ -69,6 +75,7 @@ static msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
 {
 {
 	return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name);
 	return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name);
 }
 }
+#endif /* HAVE_MSG_GET_AS_BY_NAME */
 
 
 int _starpu_simgrid_get_nbhosts(const char *prefix)
 int _starpu_simgrid_get_nbhosts(const char *prefix)
 {
 {

+ 26 - 10
src/core/workers.c

@@ -95,12 +95,17 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 						      enum starpu_worker_archtype arch)
 						      enum starpu_worker_archtype arch)
 {
 {
 	int i;
 	int i;
-	int nworkers = starpu_worker_get_count();
-
 	_starpu_codelet_check_deprecated_fields(task->cl);
 	_starpu_codelet_check_deprecated_fields(task->cl);
+	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
+	struct starpu_worker_collection *workers = sched_ctx->workers;
 
 
-	for (i = 0; i < nworkers; i++)
-	{
+        struct starpu_sched_ctx_iterator it;
+        if(workers->init_iterator)
+                workers->init_iterator(workers, &it);
+
+        while(workers->has_next(workers, &it))
+        {
+                i = workers->get_next(workers, &it);
 		if (starpu_worker_get_type(i) != arch)
 		if (starpu_worker_get_type(i) != arch)
 			continue;
 			continue;
 
 
@@ -141,7 +146,10 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 			if (!test_implementation)
 			if (!test_implementation)
 				break;
 				break;
 
 
-			if (task->cl->can_execute(i, task, impl))
+			if (task->cl->can_execute)
+				return task->cl->can_execute(i, task, impl);
+
+			if(test_implementation)
 				return 1;
 				return 1;
 		}
 		}
 	}
 	}
@@ -155,11 +163,18 @@ uint32_t _starpu_worker_exists(struct starpu_task *task)
 {
 {
 	_starpu_codelet_check_deprecated_fields(task->cl);
 	_starpu_codelet_check_deprecated_fields(task->cl);
 
 
-	if (!(task->cl->where & config.worker_mask))
-		return 0;
-
-	if (!task->cl->can_execute)
-		return 1;
+	/* if the task belongs to the init context we can
+	   check out all the worker mask of the machine
+	   if not we should iterate on the workers of the ctx
+	   and verify if it exists a worker able to exec the task */
+	if(task->sched_ctx == 0)
+	{
+		if (!(task->cl->where & config.worker_mask))
+			return 0;
+		
+		if (!task->cl->can_execute)
+			return 1;
+	}
 
 
 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
 	if ((task->cl->where & STARPU_CPU) &&
 	if ((task->cl->where & STARPU_CPU) &&
@@ -186,6 +201,7 @@ uint32_t _starpu_worker_exists(struct starpu_task *task)
 	    _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
 	    _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
 		return 1;
 		return 1;
 #endif
 #endif
+
 	return 0;
 	return 0;
 }
 }
 
 

+ 2 - 2
src/datawizard/malloc.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux 1
  * Copyright (C) 2009-2010, 2012-2014  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -217,7 +217,7 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 end:
 end:
 	if (ret == 0)
 	if (ret == 0)
 	{
 	{
-		STARPU_ASSERT(*A);
+		STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %ld b\n", dim);
 	}
 	}
 
 
 	return ret;
 	return ret;

+ 0 - 4
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -547,10 +547,6 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 	{
 	{
 		worker = workers->get_next(workers, &it);
 		worker = workers->get_next(workers, &it);
 
 
-		if (worker >= nworkers)
-			/* This is a just-added worker, discard it */
-			continue;
-
 		struct _starpu_fifo_taskq *fifo = dt->queue_array[worker];
 		struct _starpu_fifo_taskq *fifo = dt->queue_array[worker];
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker);
 		struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);

+ 10 - 0
tests/main/starpu_worker_exists.c

@@ -67,21 +67,31 @@ main(int argc, char **argv)
 	task = starpu_task_create();
 	task = starpu_task_create();
 	task->cl = &cl;
 	task->cl = &cl;
 	task->destroy = 0;
 	task->destroy = 0;
+	task->sched_ctx = 0;
 
 
 	cl.can_execute = NULL;
 	cl.can_execute = NULL;
 	ret = _starpu_worker_exists(task);
 	ret = _starpu_worker_exists(task);
 	if (!ret)
 	if (!ret)
+	{
+		FPRINTF(stderr, "failure with can_execute=NULL\n");
 		return EXIT_FAILURE;
 		return EXIT_FAILURE;
+	}
 
 
 	cl.can_execute = can_always_execute;
 	cl.can_execute = can_always_execute;
 	ret = _starpu_worker_exists(task);
 	ret = _starpu_worker_exists(task);
 	if (!ret)
 	if (!ret)
+	{
+		FPRINTF(stderr, "failure with can_always_execute\n");
 		return EXIT_FAILURE;
 		return EXIT_FAILURE;
+	}
 
 
 	cl.can_execute = can_never_execute;
 	cl.can_execute = can_never_execute;
 	ret = _starpu_worker_exists(task);
 	ret = _starpu_worker_exists(task);
 	if (ret)
 	if (ret)
+	{
+		FPRINTF(stderr, "failure with can_never_execute\n");
 		return EXIT_FAILURE;
 		return EXIT_FAILURE;
+	}
 
 
 	starpu_task_destroy(task);
 	starpu_task_destroy(task);
 	starpu_shutdown();
 	starpu_shutdown();

+ 1 - 0
tests/perfmodels/regression_based.c

@@ -80,6 +80,7 @@ static struct starpu_codelet nl_memset_cl =
 {
 {
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 	.cuda_funcs = {memset_cuda, NULL},
 	.cuda_funcs = {memset_cuda, NULL},
+	.cuda_flags = {STARPU_CUDA_ASYNC},
 #endif
 #endif
 #ifdef STARPU_USE_OPENCL
 #ifdef STARPU_USE_OPENCL
 	.opencl_funcs = {memset_opencl, NULL},
 	.opencl_funcs = {memset_opencl, NULL},