12 years ago · f722a8b4c2
--- a/ChangeLog
+++ b/ChangeLog
@@ -119,6 +119,8 @@ New features:
 
																     pthread API. It is provided with 2 implementations: a pthread one
															
 
																     and a Simgrid one. Applications using StarPU and wishing to use
															
 
																     the Simgrid StarPU features should use it.
															
 
																+  * Allow to have a dynamically allocated number of buffers per task,
															
 
																+    and so overwrite the value defined --enable-maxbuffers=XXX
															
 
																 Small features:
															
 
																   * Add starpu_worker_get_by_type and starpu_worker_get_by_devid
															
@@ -134,6 +136,9 @@ Small features:
 
																   * New configure option --enable-mpi-progression-hook to enable the
															
 
																     activity polling method for StarPU-MPI.
															
 
																   * Permit to disable sequential consistency for a given task.
															
 
																+  * New macro STARPU_RELEASE_VERSION
															
 
																+  * New function starpu_get_version() to return as 3 integers the
															
 
																+    release version of StarPU.
															
 
																 Changes:
															
 
																   * Fix the block filter functions.
															
--- a/configure.ac
+++ b/configure.ac
@@ -25,11 +25,14 @@ dnl Versioning.
 
																 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
															
 
																 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
															
 
																+STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`"
															
 
																 AC_SUBST([STARPU_MAJOR_VERSION])
															
 
																 AC_SUBST([STARPU_MINOR_VERSION])
															
 
																+AC_SUBST([STARPU_RELEASE_VERSION])
															
 
																 AC_SUBST([STARPU_EFFECTIVE_VERSION])
															
 
																 AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION], [Major version number of StarPU.])
															
 
																 AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION], [Minor version number of StarPU.])
															
 
																+AC_DEFINE_UNQUOTED([STARPU_RELEASE_VERSION], [$STARPU_RELEASE_VERSION], [Release version number of StarPU.])
															
 
																 . "$srcdir/STARPU-VERSION"
															
 
																 AC_SUBST([LIBSTARPU_INTERFACE_CURRENT])
															
@@ -264,7 +267,7 @@ AC_MSG_RESULT($max_sched_ctxs)
 
																 AC_DEFINE_UNQUOTED(STARPU_NMAX_SCHED_CTXS, [$max_sched_ctxs], [Maximum number of sched_ctxs supported])
															
 
																 AC_ARG_ENABLE([sc_hypervisor],
															
 
																-  [AS_HELP_STRING([--enable-sct-hypervisor],
															
 
																+  [AS_HELP_STRING([--enable-sc-hypervisor],
															
 
																     [enable resizing contexts (experimental)])],
															
 
																   [enable_sc_hypervisor="yes"],
															
 
																   [enable_sc_hypervisor="no"])
															
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -23,6 +23,7 @@
 
																 * Defining a New Scheduling Policy::
															
 
																 * On-GPU rendering::
															
 
																 * Defining a New Data Interface::
															
 
																+* Setting the Data Handles for a Task::
															
 
																 * More examples::               More examples shipped with StarPU
															
 
																 @end menu
															
@@ -473,14 +474,15 @@ probably use @code{lp_solve -timeout 1 test.pl -wmps test.mps} to convert the
 
																 problem to MPS format and then use a better solver, @code{glpsol} might be
															
 
																 better than @code{lp_solve} for instance (the @code{--pcost} option may be
															
 
																 useful), but sometimes doesn't manage to converge. @code{cbc} might look
															
 
																-slower, but it is parallel. Be sure to try at least all the @code{-B} options
															
 
																-of @code{lp_solve}. For instance, we often just use
															
 
																-@code{lp_solve -cc -B1 -Bb -Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi} , and
															
 
																-the @code{-gr} option can also be quite useful.
															
 
																+slower, but it is parallel. For @code{lp_solve}, be sure to try at least all the
															
 
																+@code{-B} options. For instance, we often just use @code{lp_solve -cc -B1 -Bb
															
 
																+-Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi} , and the @code{-gr} option can
															
 
																+also be quite useful. The resulting schedule can be observed by using the
															
 
																+@code{starpu_lp2paje} tool, which converts it into the Paje format.
															
 
																 Data transfer time can only be taken into account when @code{deps} is set. Only
															
 
																 data transfers inferred from implicit data dependencies between tasks are taken
															
 
																-into account.
															
 
																+into account. Other data transfers are assumed to be completely overlapped.
															
 
																 Setting @code{deps} to 0 will only take into account the actual computations
															
 
																 on processing units. It however still properly takes into account the varying
															
@@ -492,9 +494,6 @@ the priorities as the StarPU scheduler would, i.e. schedule prioritized
 
																 tasks before less prioritized tasks, to check to which extend this results
															
 
																 to a less optimal solution. This increases even more computation time.
															
 
																-Note that for simplicity, all this however doesn't take into account data
															
 
																-transfers, which are assumed to be completely overlapped.
															
 
																-
															
 
																 @node Insert Task Utility
															
 
																 @section Insert Task Utility
															
@@ -1264,6 +1263,62 @@ void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args
 
																 The whole code for this complex data interface is available in the
															
 
																 directory @code{examples/interface/}.
															
 
																+
															
 
																+@node Setting the Data Handles for a Task
															
 
																+@section Setting the Data Handles for a Task
															
 
																+
															
 
																+The number of data a task can manage is fixed by the
															
 
																+@code{STARPU_NMAXBUFS} which has a default value which can be changed
															
 
																+through the configure option @code{--enable-maxbuffers} (see
															
 
																+@ref{--enable-maxbuffers}).
															
 
																+
															
 
																+However, it is possible to define tasks managing more data by using
															
 
																+the field @code{dyn_handles} when defining a task and the field
															
 
																+@code{dyn_modes} when defining the corresponding codelet.
															
 
																+
															
 
																+@cartouche
															
 
																+@smallexample
															
 
																+enum starpu_access_mode modes[STARPU_NMAXBUFS+1] = @{
															
 
																+	STARPU_R, STARPU_R, ...
															
 
																+@};
															
 
																+
															
 
																+struct starpu_codelet dummy_big_cl =
															
 
																+@{
															
 
																+	.cuda_funcs = @{dummy_big_kernel, NULL@},
															
 
																+	.opencl_funcs = @{dummy_big_kernel, NULL@},
															
 
																+	.cpu_funcs = @{dummy_big_kernel, NULL@},
															
 
																+	.nbuffers = STARPU_NMAXBUFS+1,
															
 
																+	.dyn_modes = modes
															
 
																+@};
															
 
																+
															
 
																+task = starpu_task_create();
															
 
																+task->cl = &dummy_big_cl;
															
 
																+task->dyn_handles = malloc(task->cl->nbuffers * sizeof(starpu_data_handle_t));
															
 
																+for(i=0 ; i<task->cl->nbuffers ; i++)
															
 
																+@{
															
 
																+	task->dyn_handles[i] = handle;
															
 
																+@}
															
 
																+starpu_task_submit(task);
															
 
																+@end smallexample
															
 
																+@end cartouche
															
 
																+
															
 
																+@cartouche
															
 
																+@smallexample
															
 
																+starpu_data_handle_t *handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
															
 
																+for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
															
 
																+@{
															
 
																+	handles[i] = handle;
															
 
																+@}
															
 
																+starpu_insert_task(&dummy_big_cl,
															
 
																+        	 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
															
 
																+		 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
															
 
																+		 0);
															
 
																+@end smallexample
															
 
																+@end cartouche
															
 
																+
															
 
																+The whole code for this complex data interface is available in the
															
 
																+directory @code{examples/basic_examples/dynamic_handles.c}.
															
 
																+
															
 
																 @node More examples
															
 
																 @section More examples
															
--- a/doc/chapters/api.texi
+++ b/doc/chapters/api.texi
@@ -47,6 +47,14 @@ Define the major version of StarPU
 
																 Define the minor version of StarPU
															
 
																 @end defmac
															
 
																+@defmac STARPU_RELEASE_VERSION
															
 
																+Define the release version of StarPU
															
 
																+@end defmac
															
 
																+
															
 
																+@deftypefun void starpu_get_version (int *@var{major}, int *@var{minor}, int *@var{release})
															
 
																+Return as 3 integers the release version of StarPU.
															
 
																+@end deftypefun
															
 
																+
															
 
																 @node Initialization and Termination
															
 
																 @section Initialization and Termination
															
@@ -1898,6 +1906,17 @@ exceed @code{STARPU_NMAXBUFS}.
 
																 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
															
 
																 option when configuring StarPU.
															
 
																+@item @code{enum starpu_access_mode *dyn_modes}
															
 
																+Is an array of @code{enum starpu_access_mode}. It describes the
															
 
																+required access modes to the data neeeded by the codelet (e.g.
															
 
																+@code{STARPU_RW}). The number of entries in this array must be
															
 
																+specified in the @code{nbuffers} field (defined above).
															
 
																+This field should be used for codelets having a number of datas
															
 
																+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
															
 
																+for a Task}).
															
 
																+When defining a codelet, one should either define this field or the
															
 
																+field @code{modes} defined above. 
															
 
																+
															
 
																 @item @code{struct starpu_perfmodel *model} (optional)
															
 
																 This is a pointer to the task duration performance model associated to this
															
 
																 codelet. This optional field is ignored when set to @code{NULL} or
															
@@ -1913,8 +1932,8 @@ involved in the parallel execution.
 
																 @item @code{unsigned long per_worker_stats[STARPU_NMAXWORKERS]} (optional)
															
 
																 Statistics collected at runtime: this is filled by StarPU and should not be
															
 
																 accessed directly, but for example by calling the
															
 
																-@code{starpu_display_codelet_stats} function (See
															
 
																-@ref{starpu_display_codelet_stats} for details).
															
 
																+@code{starpu_codelet_display_stats} function (See
															
 
																+@ref{starpu_codelet_display_stats} for details).
															
 
																 @item @code{const char *name} (optional)
															
 
																 Define the name of the codelet. This can be useful for debugging purposes.
															
@@ -1923,6 +1942,7 @@ Define the name of the codelet. This can be useful for debugging purposes.
 
																 @end deftp
															
 
																 @deftypefun void starpu_codelet_init ({struct starpu_codelet} *@var{cl})
															
 
																+@anchor{starpu_codelet_init}
															
 
																 Initialize @var{cl} with default values. Codelets should preferably be
															
 
																 initialized statically as shown in @ref{Defining a Codelet}. However
															
 
																 such a initialisation is not always possible, e.g. when using C++.
															
@@ -1983,10 +2003,25 @@ of entries in this array must be specified in the @code{nbuffers} field of the
 
																 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
															
 
																 option when configuring StarPU.
															
 
																+@item @code{starpu_data_handle_t *dyn_handles}
															
 
																+Is an array of @code{starpu_data_handle_t}. It specifies the handles
															
 
																+to the different pieces of data accessed by the task. The number
															
 
																+of entries in this array must be specified in the @code{nbuffers} field of the
															
 
																+@code{struct starpu_codelet} structure.
															
 
																+This field should be used for tasks having a number of datas
															
 
																+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
															
 
																+for a Task}).
															
 
																+When defining a task, one should either define this field or the
															
 
																+field @code{handles} defined above.
															
 
																+
															
 
																 @item @code{void *interfaces[STARPU_NMAXBUFS]}
															
 
																 The actual data pointers to the memory node where execution will happen, managed
															
 
																 by the DSM.
															
 
																+@item @code{void **dyn_interfaces}
															
 
																+The actual data pointers to the memory node where execution will happen, managed
															
 
																+by the DSM. Is used when the field @code{dyn_handles} is defined.
															
 
																+
															
 
																 @item @code{void *cl_arg} (optional; default: @code{NULL})
															
 
																 This pointer is passed to the codelet through the second argument
															
 
																 of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
															
@@ -2134,6 +2169,37 @@ value. This is equivalent to initializing a starpu_task structure with
 
																 the @code{starpu_task_init} function defined above.
															
 
																 @end defmac
															
 
																+@defmac STARPU_TASK_GET_HANDLE ({struct starpu_task} *@var{task}, int @var{i})
															
 
																+Return the i-th data handle of the given task. If the task is defined
															
 
																+with a static or dynamic number of handles, will either return the
															
 
																+i-th element of the field @code{handles} or the i-th element of the field
															
 
																+@code{dyn_handles} (@pxref{Setting the Data Handles for a Task})
															
 
																+@end defmac
															
 
																+
															
 
																+@defmac STARPU_TASK_SET_HANDLE ({struct starpu_task} *@var{task}, starpu_data_handle_t @var{handle}, int @var{i})
															
 
																+Set the i-th data handle of the given task with the given dat handle.
															
 
																+If the task is defined with a static or dynamic number of handles,
															
 
																+will either set the i-th element of the field @code{handles} or the
															
 
																+i-th element of the field @code{dyn_handles} (@pxref{Setting the Data
															
 
																+Handles for a Task})
															
 
																+@end defmac
															
 
																+
															
 
																+@defmac STARPU_CODELET_GET_MODE ({struct starpu_codelet *}@var{codelet}, int @var{i})
															
 
																+Return the access mode of the i-th data handle of the given codelet.
															
 
																+If the codelet is defined with a static or dynamic number of handles,
															
 
																+will either return the i-th element of the field @code{modes} or the
															
 
																+i-th element of the field @code{dyn_modes} (@pxref{Setting the Data
															
 
																+Handles for a Task})
															
 
																+@end defmac
															
 
																+
															
 
																+@defmac STARPU_CODELET_SET_MODE ({struct starpu_codelet *}@var{codelet}codelet, {enum starpu_access_mode} @var{mode}, int @var{i})
															
 
																+Set the access mode of the i-th data handle of the given codelet.
															
 
																+If the codelet is defined with a static or dynamic number of handles,
															
 
																+will either set the i-th element of the field @code{modes} or the
															
 
																+i-th element of the field @code{dyn_modes} (@pxref{Setting the Data
															
 
																+Handles for a Task})
															
 
																+@end defmac
															
 
																+
															
 
																 @deftypefun {struct starpu_task *} starpu_task_create (void)
															
 
																 Allocate a task structure and initialize it with default values. Tasks
															
 
																 allocated dynamically with @code{starpu_task_create} are automatically freed when the
															
@@ -2145,6 +2211,10 @@ by the task have to be freed by calling
 
																 @code{starpu_task_destroy}.
															
 
																 @end deftypefun
															
 
																+@deftypefun {struct starpu_task *}starpu_task_dup ({struct starpu_task *}@var{task})
															
 
																+Allocate a task structure which is the exact duplicate of the given task.
															
 
																+@end deftypefun
															
 
																+
															
 
																 @deftypefun void starpu_task_clean ({struct starpu_task} *@var{task})
															
 
																 Release all the structures automatically allocated to execute @var{task}, but
															
 
																 not the task structure itself and values set by the user remain unchanged.
															
@@ -2218,8 +2288,8 @@ NULL if it is called either from a thread that is not a task or simply
 
																 because there is no task being executed at the moment.
															
 
																 @end deftypefun
															
 
																-@deftypefun void starpu_display_codelet_stats ({struct starpu_codelet} *@var{cl})
															
 
																-@anchor{starpu_display_codelet_stats}
															
 
																+@deftypefun void starpu_codelet_display_stats ({struct starpu_codelet} *@var{cl})
															
 
																+@anchor{starpu_codelet_display_stats}
															
 
																 Output on @code{stderr} some statistics on the codelet @var{cl}.
															
 
																 @end deftypefun
															
@@ -3650,6 +3720,11 @@ Get the description of a combined worker
 
																 Variant of starpu_worker_can_execute_task compatible with combined workers
															
 
																 @end deftypefun
															
 
																+@deftypefun void starpu_parallel_task_barrier_init ({struct starpu_task* }@var{task}, int @var{best_workerid})
															
 
																+Initialise the barrier for the parallel task, and dispatch the task
															
 
																+between the different combined workers
															
 
																+@end deftypefun
															
 
																+
															
 
																 @deftp {Data Type} {struct starpu_machine_topology}
															
 
																 @table @asis
															
 
																 @item @code{unsigned nworkers}
															
@@ -3776,10 +3851,6 @@ Delete the worker collection of the specified scheduling context
 
																 Return the worker collection managed by the indicated context
															
 
																 @end deftypefun
															
 
																-@deftypefun pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex (unsigned @var{sched_ctx_id})
															
 
																-TODO
															
 
																-@end deftypefun
															
 
																-
															
 
																 @deftypefun void starpu_sched_ctx_set_context (unsigned *@var{sched_ctx_id})
															
 
																 Set the scheduling context the subsequent tasks will be submitted to
															
 
																 @end deftypefun
															
--- a/doc/chapters/basic-examples.texi
+++ b/doc/chapters/basic-examples.texi
@@ -140,7 +140,8 @@ struct starpu_codelet cl =
 
																 A codelet is a structure that represents a computational kernel. Such a codelet
															
 
																 may contain an implementation of the same kernel on different architectures
															
 
																 (e.g. CUDA, x86, ...). For compatibility, make sure that the whole
															
 
																-structure is initialized to zero, either by using memset, or by letting the
															
 
																+structure is properly initialized to zero, either by using the
															
 
																+function starpu_codelet_init (@pxref{starpu_codelet_init}), or by letting the
															
 
																 compiler implicitly do it as examplified above.
															
 
																 The @code{nbuffers} field specifies the number of data buffers that are
															
--- a/doc/chapters/configuration.texi
+++ b/doc/chapters/configuration.texi
@@ -234,6 +234,7 @@ Enable gathering of various data statistics (@pxref{Data statistics}).
 
																 @end defvr
															
 
																 @defvr {Configure option} --enable-maxbuffers
															
 
																+@anchor{--enable-maxbuffers}
															
 
																 Define the maximum number of buffers that tasks will be able to take
															
 
																 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
															
 
																 @end defvr
															
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -409,9 +409,10 @@ STARPU_BUS_STATS=1} and @code{export STARPU_WORKER_STATS=1} .
 
																 Due to CUDA limitations, StarPU will have a hard time overlapping its own
															
 
																 communications and the codelet computations if the application does not use a
															
 
																-dedicated CUDA stream for its computations. StarPU provides one by the use of
															
 
																-@code{starpu_cuda_get_local_stream()} which should be used by all CUDA codelet
															
 
																-operations. For instance:
															
 
																+dedicated CUDA stream for its computations instead of the default stream,
															
 
																+which synchronizes all operations of the GPU. StarPU provides one by the use
															
 
																+of @code{starpu_cuda_get_local_stream()} which can be used by all CUDA codelet
															
 
																+operations to avoid this issue. For instance:
															
 
																 @cartouche
															
 
																 @smallexample
															
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -52,7 +52,6 @@ EXTRA_DIST = 					\
 
																 	basic_examples/variable_kernels_opencl_kernel.cl	\
															
 
																 	matvecmult/matvecmult_kernel.cl				\
															
 
																 	basic_examples/block_opencl_kernel.cl			\
															
 
																-	openmp/vector_scal.c			\
															
 
																 	filters/fblock_opencl_kernel.cl		\
															
 
																 	filters/custom_mf/conversion_opencl.cl  \
															
 
																 	filters/custom_mf/custom_opencl.cl \
															
@@ -159,6 +158,7 @@ examplebin_PROGRAMS +=				\
 
																 	basic_examples/block			\
															
 
																 	basic_examples/variable			\
															
 
																 	basic_examples/multiformat              \
															
 
																+	basic_examples/dynamic_handles		\
															
 
																 	cpp/incrementer_cpp			\
															
 
																 	filters/custom_mf/custom_mf_filter      \
															
 
																 	filters/fvector				\
															
@@ -876,6 +876,18 @@ pipeline_pipeline_LDADD =		\
 
																 	$(STARPU_BLAS_LDFLAGS)
															
 
																 endif
															
 
																+##################
															
 
																+# openmp example #
															
 
																+##################
															
 
																+
															
 
																+if !STARPU_HAVE_WINDOWS
															
 
																+examplebin_PROGRAMS +=		\
															
 
																+	openmp/vector_scal_omp
															
 
																+
															
 
																+openmp_vector_scal_omp_CFLAGS = \
															
 
																+	$(AM_CFLAGS) -fopenmp
															
 
																+endif
															
 
																+
															
 
																 showcheck:
															
 
																 	-cat $(TEST_LOGS) /dev/null
															
 
																 	for i in $(SUBDIRS) ; do \
															
--- a/examples/basic_examples/dynamic_handles.c
+++ b/examples/basic_examples/dynamic_handles.c
@@ -0,0 +1,150 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2013  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
															
 
																+
															
 
																+static void dummy_small_kernel(void *descr[], void *cl_arg)
															
 
																+{
															
 
																+	int nb_data;
															
 
																+	int i;
															
 
																+
															
 
																+	starpu_codelet_unpack_args(cl_arg, &nb_data);
															
 
																+	assert(nb_data == 1);
															
 
																+	FPRINTF(stderr, "Number of data: %d\n", nb_data);
															
 
																+
															
 
																+	for(i=0 ; i<nb_data; i++)
															
 
																+	{
															
 
																+		int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
															
 
																+		assert(*val == 42);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static void dummy_big_kernel(void *descr[], void *cl_arg)
															
 
																+{
															
 
																+	int nb_data;
															
 
																+	int i;
															
 
																+
															
 
																+	starpu_codelet_unpack_args(cl_arg, &nb_data);
															
 
																+	assert(nb_data == 9);
															
 
																+	FPRINTF(stderr, "Number of data: %d\n", nb_data);
															
 
																+
															
 
																+	for(i=0 ; i<nb_data; i++)
															
 
																+	{
															
 
																+		int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
															
 
																+		assert(*val == 42);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+static struct starpu_codelet dummy_small_cl =
															
 
																+{
															
 
																+	.cuda_funcs = {dummy_small_kernel, NULL},
															
 
																+	.opencl_funcs = {dummy_small_kernel, NULL},
															
 
																+	.cpu_funcs = {dummy_small_kernel, NULL},
															
 
																+	.modes = {STARPU_RW},
															
 
																+	.nbuffers = 1
															
 
																+};
															
 
																+
															
 
																+struct starpu_codelet dummy_big_cl =
															
 
																+{
															
 
																+	.cuda_funcs = {dummy_big_kernel, NULL},
															
 
																+	.opencl_funcs = {dummy_big_kernel, NULL},
															
 
																+	.cpu_funcs = {dummy_big_kernel, NULL},
															
 
																+	.nbuffers = STARPU_NMAXBUFS+1
															
 
																+};
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	starpu_data_handle_t handle, *handles;
															
 
																+	int ret;
															
 
																+	int val=42;
															
 
																+	unsigned i;
															
 
																+	struct starpu_task *task, *task2;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return 77;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	dummy_big_cl.dyn_modes = malloc(dummy_big_cl.nbuffers * sizeof(enum starpu_access_mode));
															
 
																+	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
															
 
																+	     dummy_big_cl.dyn_modes[i] = STARPU_RW;
															
 
																+
															
 
																+	starpu_variable_data_register(&handle, 0, (uintptr_t)&val, sizeof(int));
															
 
																+
															
 
																+	task = starpu_task_create();
															
 
																+	task->synchronous = 1;
															
 
																+	task->cl = &dummy_small_cl;
															
 
																+	starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size,
															
 
																+				 STARPU_VALUE, &(task->cl->nbuffers), sizeof(task->cl->nbuffers),
															
 
																+				 0);
															
 
																+	task->dyn_handles = malloc(sizeof(starpu_data_handle_t));
															
 
																+	task->dyn_handles[0] = handle;
															
 
																+	ret = starpu_task_submit(task);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																+	task2 = starpu_task_create();
															
 
																+	task2->synchronous = 1;
															
 
																+	task2->cl = &dummy_big_cl;
															
 
																+	starpu_codelet_pack_args(&task2->cl_arg, &task2->cl_arg_size,
															
 
																+				 STARPU_VALUE, &task2->cl->nbuffers, sizeof(task2->cl->nbuffers),
															
 
																+				 0);
															
 
																+	task2->dyn_handles = malloc(task2->cl->nbuffers * sizeof(starpu_data_handle_t));
															
 
																+	for(i=0 ; i<task2->cl->nbuffers ; i++)
															
 
																+	{
															
 
																+		task2->dyn_handles[i] = handle;
															
 
																+	}
															
 
																+	ret = starpu_task_submit(task2);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																+	ret = starpu_insert_task(&dummy_small_cl,
															
 
																+				 STARPU_VALUE, &dummy_small_cl.nbuffers, sizeof(dummy_small_cl.nbuffers),
															
 
																+				 STARPU_RW, handle,
															
 
																+				 0);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
															
 
																+        ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																+
															
 
																+	handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
															
 
																+	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
															
 
																+	{
															
 
																+		handles[i] = handle;
															
 
																+	}
															
 
																+	ret = starpu_insert_task(&dummy_big_cl,
															
 
																+				 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
															
 
																+				 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
															
 
																+				 0);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
															
 
																+        ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																+	free(handles);
															
 
																+
															
 
																+	starpu_data_unregister(handle);
															
 
																+	free(dummy_big_cl.dyn_modes);
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+	free(dummy_big_cl.dyn_modes);
															
 
																+	starpu_shutdown();
															
 
																+	return 77;
															
 
																+}
															
--- a/examples/cholesky/cholesky.h
+++ b/examples/cholesky/cholesky.h
@@ -122,6 +122,7 @@ static unsigned check = 0;
 
																 static unsigned bound = 0;
															
 
																 static unsigned bound_deps = 0;
															
 
																 static unsigned bound_lp = 0;
															
 
																+static unsigned bound_mps = 0;
															
 
																 static unsigned with_ctxs = 0;
															
 
																 static unsigned with_noctxs = 0;
															
 
																 static unsigned chole1 = 0;
															
@@ -150,77 +151,83 @@ static void __attribute__((unused)) parse_args(int argc, char **argv)
 
																 		{
															
 
																 			with_ctxs = 1;
															
 
																 			break;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-with_noctxs") == 0) 
															
 
																 		{
															
 
																 			with_noctxs = 1;
															
 
																 			break;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-chole1") == 0) 
															
 
																 		{
															
 
																 			chole1 = 1;
															
 
																 			break;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-chole2") == 0) 
															
 
																 		{
															
 
																 			chole2 = 1;
															
 
																 			break;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-size") == 0)
															
 
																 		{
															
 
																 		        char *argptr;
															
 
																 			size = strtol(argv[++i], &argptr, 10);
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-nblocks") == 0)
															
 
																 		{
															
 
																 		        char *argptr;
															
 
																 			nblocks = strtol(argv[++i], &argptr, 10);
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-nbigblocks") == 0)
															
 
																 		{
															
 
																 		        char *argptr;
															
 
																 			nbigblocks = strtol(argv[++i], &argptr, 10);
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-no-pin") == 0)
															
 
																 		{
															
 
																 			pinned = 0;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-no-prio") == 0)
															
 
																 		{
															
 
																 			noprio = 1;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-bound") == 0)
															
 
																 		{
															
 
																 			bound = 1;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-bound-lp") == 0)
															
 
																 		{
															
 
																 			bound_lp = 1;
															
 
																-		}
															
 
																+		} else
															
 
																+
															
 
																+		if (strcmp(argv[i], "-bound-mps") == 0)
															
 
																+		{
															
 
																+			bound_mps = 1;
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-bound-deps") == 0)
															
 
																 		{
															
 
																 			bound_deps = 1;
															
 
																-		}
															
 
																+		} else
															
 
																 		if (strcmp(argv[i], "-check") == 0)
															
 
																 		{
															
 
																 			check = 1;
															
 
																-		}
															
 
																+		} else
															
 
																-		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0)
															
 
																+		/* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0) */
															
 
																 		{
															
 
																 			fprintf(stderr,"usage : %s [-size size] [-nblocks nblocks] [-no-pin] [-no-prio] [-bound] [-bound-deps] [-bound-lp] [-check]\n", argv[0]);
															
 
																 			fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks\n", size, size, nblocks, nblocks);
															
 
																+			exit(0);
															
 
																 		}
															
 
																 	}
															
 
																 }
															
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -89,7 +89,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
																 	start = starpu_timing_now();
															
 
																-	if (bound)
															
 
																+	if (bound || bound_lp || bound_mps)
															
 
																 		starpu_bound_start(bound_deps, 0);
															
 
																 	/* create all the DAG nodes */
															
 
																 	for (k = 0; k < nblocks; k++)
															
@@ -140,7 +140,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
																 	}
															
 
																 	starpu_task_wait_for_all();
															
 
																-	if (bound)
															
 
																+	if (bound || bound_lp || bound_mps)
															
 
																 		starpu_bound_stop();
															
 
																 	end = starpu_timing_now();
															
@@ -162,6 +162,11 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
																 			FILE *f = fopen("cholesky.lp", "w");
															
 
																 			starpu_bound_print_lp(f);
															
 
																 		}
															
 
																+		if (bound_mps)
															
 
																+		{
															
 
																+			FILE *f = fopen("cholesky.mps", "w");
															
 
																+			starpu_bound_print_mps(f);
															
 
																+		}
															
 
																 		if (bound)
															
 
																 		{
															
 
																 			double res;
															
--- a/examples/openmp/vector_scal.c
+++ b/examples/openmp/vector_scal.c
@@ -25,7 +25,12 @@
 
																 #include <stdio.h>
															
 
																 #include <limits.h>
															
 
																+#ifdef STARPU_QUICK_CHECK
															
 
																+#define	NX	2048
															
 
																+#else
															
 
																 #define	NX	2048000
															
 
																+#endif
															
 
																+
															
 
																 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
															
 
																 void scal_cpu_func(void *buffers[], void *_args)
															
@@ -94,7 +99,8 @@ int main(int argc, char **argv)
 
																 	float factor = 1.001;
															
 
																-	for (i = 0; i < 100; i++) {
															
 
																+	for (i = 0; i < 100; i++)
															
 
																+	{
															
 
																 		struct starpu_task *task = starpu_task_create();
															
 
																 		task->cl = &cl;
															
--- a/examples/pi/pi.c
+++ b/examples/pi/pi.c
@@ -198,7 +198,7 @@ int main(int argc, char **argv)
 
																 	FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0);
															
 
																 	FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing));
															
 
																-	if (!getenv("STARPU_SSILENT")) starpu_display_codelet_stats(&pi_cl);
															
 
																+	if (!getenv("STARPU_SSILENT")) starpu_codelet_display_stats(&pi_cl);
															
 
																 	starpu_shutdown();
															
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -153,6 +153,8 @@ int starpu_asynchronous_opencl_copy_disabled(void);
 
																 void starpu_profiling_init();
															
 
																 void starpu_display_stats();
															
 
																+void starpu_get_version(int *major, int *minor, int *release);
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -20,6 +20,7 @@
 
																 #undef STARPU_MAJOR_VERSION
															
 
																 #undef STARPU_MINOR_VERSION
															
 
																+#undef STARPU_RELEASE_VERSION
															
 
																 #undef STARPU_USE_CPU
															
 
																 #undef STARPU_USE_CUDA
															
@@ -113,4 +114,6 @@ struct timespec
 
																 #undef STARPU_HAVE_RINTF
															
 
																 #undef STARPU_USE_TOP
															
 
																+#undef STARPU_HAVE_HWLOC
															
 
																+
															
 
																 #endif
															
--- a/include/starpu_deprecated_api.h
+++ b/include/starpu_deprecated_api.h
@@ -88,6 +88,8 @@ typedef enum starpu_access_mode starpu_access_mode;
 
																 #define starpu_depth_block_filter_func_block		starpu_block_filter_depth_block
															
 
																 #define starpu_depth_block_shadow_filter_func_block	starpu_block_filter_depth_block_shadow
															
 
																+#define starpu_display_codelet_stats		starpu_codelet_display_stats
															
 
																+
															
 
																 #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
															
 
																 #ifdef __cplusplus
															
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -48,9 +48,6 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id);
 
																 /* indicate which context whill inherit the resources of this context when he will be deleted */
															
 
																 void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);
															
 
																-/* mutex synchronising several simultaneous modifications of a context */
															
 
																-starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
															
 
																-
															
 
																 /* indicate that the current thread is submitting only to the current context */
															
 
																 void starpu_sched_ctx_set_context(unsigned *sched_ctx_id);
															
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -96,6 +96,7 @@ struct starpu_codelet
 
																 	unsigned nbuffers;
															
 
																 	/* which are the access modes for these buffers */
															
 
																 	enum starpu_access_mode modes[STARPU_NMAXBUFS];
															
 
																+	enum starpu_access_mode *dyn_modes;
															
 
																 	/* performance model of the codelet */
															
 
																 	struct starpu_perfmodel *model;
															
@@ -104,7 +105,7 @@ struct starpu_codelet
 
																 	struct starpu_perfmodel *power_model;
															
 
																 	/* statistics collected at runtime: this is filled by StarPU and should
															
 
																-	 * not be accessed directly (use the starpu_display_codelet_stats
															
 
																+	 * not be accessed directly (use the starpu_codelet_display_stats
															
 
																 	 * function instead for instance). */
															
 
																 	unsigned long per_worker_stats[STARPU_NMAXWORKERS];
															
@@ -120,6 +121,9 @@ struct starpu_task
 
																 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
															
 
																 	void *interfaces[STARPU_NMAXBUFS];
															
 
																+	starpu_data_handle_t *dyn_handles;
															
 
																+	void **dyn_interfaces;
															
 
																+
															
 
																 	/* arguments not managed by the DSM are given as a buffer */
															
 
																 	void *cl_arg;
															
 
																 	/* in case the argument buffer has to be uploaded explicitely */
															
@@ -240,9 +244,17 @@ struct starpu_task
 
																 	.sched_ctx = 0,					\
															
 
																 	.hypervisor_tag = 0,				\
															
 
																 	.flops = 0.0,					\
															
 
																-		.scheduled = 0				\
															
 
																+	.scheduled = 0,					\
															
 
																+	.dyn_handles = NULL,				\
															
 
																+	.dyn_interfaces = NULL				\
															
 
																 }
															
 
																+#define STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])
															
 
																+#define STARPU_TASK_SET_HANDLE(task, handle, i) do { if (task->dyn_handles) task->dyn_handles[i] = handle; else task->handles[i] = handle; } while(0)
															
 
																+
															
 
																+#define STARPU_CODELET_GET_MODE(codelet, i) ((codelet->dyn_modes) ? codelet->dyn_modes[i] : codelet->modes[i])
															
 
																+#define STARPU_CODELET_SET_MODE(codelet, mode, i) do { if (codelet->dyn_modes) codelet->dyn_modes[i] = mode; else codelet->modes[i] = mode; } while(0)
															
 
																+
															
 
																 /*
															
 
																  * handle task dependencies: it is possible to associate a task with a unique
															
 
																  * "tag" and to express dependencies between tasks by the means of those tags
															
@@ -317,10 +329,13 @@ int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id);
 
																  * indicates that the waited task was either synchronous or detached. */
															
 
																 int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
															
 
																-/* This function waits until all the tasks that were already submitted have
															
 
																+/* This function waits until all the tasks that were already submitted 
															
 
																+ * (to the current context or the global one if there aren't any) have
															
 
																  * been executed. */
															
 
																 int starpu_task_wait_for_all(void);
															
 
																+/* This function waits until all the tasks that were already submitted to the 
															
 
																+ * context have been executed */
															
 
																 int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id);
															
 
																 /* This function waits until there is no more ready task. */
															
@@ -331,13 +346,20 @@ int starpu_task_nsubmitted(void);
 
																 void starpu_codelet_init(struct starpu_codelet *cl);
															
 
																-void starpu_display_codelet_stats(struct starpu_codelet *cl);
															
 
																+void starpu_codelet_display_stats(struct starpu_codelet *cl);
															
 
																 /* Return the task currently executed by the worker, or NULL if this is called
															
 
																  * either from a thread that is not a task or simply because there is no task
															
 
																  * being executed at the moment. */
															
 
																 struct starpu_task *starpu_task_get_current(void);
															
 
																+/* initialise the barrier for the parallel task, st all workers start it 
															
 
																+ * at the same time */
															
 
																+void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid);
															
 
																+
															
 
																+/* duplicate the given task */
															
 
																+struct starpu_task *starpu_task_dup(struct starpu_task *task);
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/include/starpu_task_util.h
+++ b/include/starpu_task_util.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -57,7 +57,7 @@ void starpu_codelet_unpack_args(void *cl_arg, ...);
 
																 /* Pack arguments of type STARPU_VALUE into a buffer which can be
															
 
																  * given to a codelet and later unpacked with starpu_codelet_unpack_args */
															
 
																-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...);
															
 
																+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
															
 
																 #ifdef __cplusplus
															
 
																 }
															
--- a/include/starpu_top.h
+++ b/include/starpu_top.h
@@ -195,6 +195,14 @@ void starpu_top_update_data_float(const struct starpu_top_data* data,
 
																 				  double value);
															
 
																 /*
															
 
																+ * This function notifies UI than the task have been planed to
															
 
																+ * run from start to end, on computation-core
															
 
																+ */
															
 
																+void starpu_top_task_prevision(struct starpu_task *task,
															
 
																+			       int devid, unsigned long long start,
															
 
																+			       unsigned long long end);
															
 
																+
															
 
																+/*
															
 
																  * This functions are usefull in debug mode. The starpu developper doesn't need
															
 
																  * to check if the debug mode is active.
															
 
																  * This is checked by starpu_top itsefl.
															
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -123,7 +123,6 @@ int starpu_combined_worker_get_id(void);
 
																 int starpu_combined_worker_get_size(void);
															
 
																 int starpu_combined_worker_get_rank(void);
															
 
																-
															
 
																 /* This function returns the type of worker associated to an identifier (as
															
 
																  * returned by the starpu_worker_get_id function). The returned value indicates
															
 
																  * the architecture of the worker: STARPU_CPU_WORKER for a CPU core,
															
--- a/mpi/src/starpu_mpi_insert_task.c
+++ b/mpi/src/starpu_mpi_insert_task.c
@@ -24,6 +24,7 @@
 
																 #include <common/uthash.h>
															
 
																 #include <util/starpu_insert_task_utils.h>
															
 
																 #include <datawizard/coherency.h>
															
 
																+#include <core/task.h>
															
 
																 #include <starpu_mpi_private.h>
															
@@ -369,7 +370,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
																 	int me, do_execute, xrank, nb_nodes;
															
 
																 	size_t *size_on_nodes;
															
 
																 	size_t arg_buffer_size = 0;
															
 
																-	char *arg_buffer = NULL;
															
 
																+	void *arg_buffer = NULL;
															
 
																 	int dest=0, inconsistent_execute;
															
 
																 	int current_data = 0;
															
@@ -420,7 +421,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
																 			int i;
															
 
																 			for(i=0 ; i<nb_handles ; i++)
															
 
																 			{
															
 
																-				enum starpu_access_mode mode = codelet->modes[current_data];
															
 
																+				enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(codelet, current_data);
															
 
																 				int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, &do_execute, &inconsistent_execute, &dest, size_on_nodes);
															
 
																 				if (ret == -EINVAL)
															
 
																 				{
															
@@ -531,7 +532,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
																 			for(i=0 ; i<nb_handles ; i++)
															
 
																 			{
															
 
																-				_starpu_mpi_exchange_data_before_execution(datas[i], codelet->modes[current_data], me, dest, do_execute, comm);
															
 
																+				_starpu_mpi_exchange_data_before_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, dest, do_execute, comm);
															
 
																 				current_data++;
															
 
																 			}
															
 
																 		}
															
@@ -590,12 +591,16 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
																 		if (arg_buffer_size)
															
 
																 		{
															
 
																 			va_start(varg_list, codelet);
															
 
																-			_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
															
 
																+			_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list);
															
 
																 		}
															
 
																 		_STARPU_MPI_DEBUG(1, "Execution of the codelet %p (%s)\n", codelet, codelet->name);
															
 
																 		va_start(varg_list, codelet);
															
 
																 		struct starpu_task *task = starpu_task_create();
															
 
																+		if (codelet->nbuffers > STARPU_NMAXBUFS)
															
 
																+		{
															
 
																+			task->dyn_handles = malloc(codelet->nbuffers * sizeof(starpu_data_handle_t));
															
 
																+		}
															
 
																 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
															
 
																 		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
															
 
																 	}
															
@@ -622,7 +627,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
																 				for(i=0 ; i<nb_handles ; i++)
															
 
																 				{
															
 
																-					_starpu_mpi_exchange_data_after_execution(datas[i], codelet->modes[current_data], me, xrank, dest, do_execute, comm);
															
 
																+					_starpu_mpi_exchange_data_after_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, xrank, dest, do_execute, comm);
															
 
																 					current_data++;
															
 
																 				}
															
 
																 			}
															
@@ -692,7 +697,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
																 			for(i=0 ; i<nb_handles ; i++)
															
 
																 			{
															
 
																-				_starpu_mpi_clear_data_after_execution(datas[i], codelet->modes[current_data], me, do_execute, comm);
															
 
																+				_starpu_mpi_clear_data_after_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, do_execute, comm);
															
 
																 				current_data++;
															
 
																 			}
															
 
																 		}
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -73,7 +73,6 @@ noinst_HEADERS = 						\
 
																 	core/debug.h						\
															
 
																 	core/errorcheck.h					\
															
 
																 	core/combined_workers.h					\
															
 
																-	core/parallel_task.h					\
															
 
																 	core/simgrid.h						\
															
 
																 	core/task_bundle.h					\
															
 
																 	sched_policies/detect_combined_workers.h		\
															
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -70,16 +70,6 @@
 
																 	}                                                                      \
															
 
																 } while (0)
															
 
																-#define _STARPU_PTHREAD_MUTEX_TRYLOCK(mutex) do {                              \
															
 
																-	int p_ret = starpu_pthread_mutex_trylock(mutex);                       \
															
 
																-	if (STARPU_UNLIKELY(p_ret)) {                                          \
															
 
																-		fprintf(stderr,                                                \
															
 
																-			"%s:%d starpu_pthread_mutex_trylock: %s\n",            \
															
 
																-			__FILE__, __LINE__, strerror(p_ret));                  \
															
 
																-		STARPU_ABORT();                                                \
															
 
																-	}                                                                      \
															
 
																-} while (0)
															
 
																-
															
 
																 #define _STARPU_PTHREAD_MUTEX_UNLOCK(mutex) do {                               \
															
 
																 	int p_ret = starpu_pthread_mutex_unlock(mutex);                        \
															
 
																 	if (STARPU_UNLIKELY(p_ret)) {                                          \
															
--- a/src/core/combined_workers.c
+++ b/src/core/combined_workers.c
@@ -162,3 +162,4 @@ int starpu_combined_worker_get_description(int workerid, int *worker_size, int *
 
																 	return 0;
															
 
																 }
															
 
																+
															
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -192,8 +192,8 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 
																 {
															
 
																 	/* Note that we do not access j->task->handles, but j->ordered_buffers
															
 
																 	 * which is a sorted copy of it. */
															
 
																-	starpu_data_handle_t handle = j->ordered_buffers[buffer_index].handle;
															
 
																-	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
															
 
																+	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
															
 
																+	enum starpu_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
															
 
																 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
															
 
																 }
															
@@ -205,11 +205,16 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 
																 	unsigned nbuffers = j->task->cl->nbuffers;
															
 
																 	for (buf = start_buffer_index; buf < nbuffers; buf++)
															
 
																 	{
															
 
																-		if (buf && j->ordered_buffers[buf-1].handle == j->ordered_buffers[buf].handle)
															
 
																-			/* We have already requested this data, skip it. This
															
 
																-			 * depends on ordering putting writes before reads, see
															
 
																-			 * _starpu_compar_handles.  */
															
 
																-			continue;
															
 
																+		if (buf)
															
 
																+		{
															
 
																+			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf-1);
															
 
																+			starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf);
															
 
																+			if (handle_m1 == handle)
															
 
																+				/* We have already requested this data, skip it. This
															
 
																+				 * depends on ordering putting writes before reads, see
															
 
																+				 * _starpu_compar_handles.  */
															
 
																+				continue;
															
 
																+		}
															
 
																                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
															
 
																                 if (attempt_to_submit_data_request_from_job(j, buf))
															
@@ -238,11 +243,13 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 
																 	unsigned i;
															
 
																 	for (i=0 ; i<cl->nbuffers ; i++)
															
 
																 	{
															
 
																-		j->ordered_buffers[i].handle = j->task->handles[i];
															
 
																-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
															
 
																+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
															
 
																+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
															
 
																 	}
															
 
																-	_starpu_sort_task_handles(j->ordered_buffers, cl->nbuffers);
															
 
																+	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), cl->nbuffers);
															
 
																 	return _submit_job_enforce_data_deps(j, 0);
															
 
																 }
															
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -336,8 +336,8 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
																 	unsigned buffer;
															
 
																 	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 	{
															
 
																-		starpu_data_handle_t handle = task->handles[buffer];
															
 
																-		enum starpu_access_mode mode = task->cl->modes[buffer];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
															
 
																 		struct starpu_task *new_task;
															
 
																 		/* Scratch memory does not introduce any deps */
															
@@ -457,7 +457,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
															
 
																 {
															
 
																 	struct starpu_task *task = j->task;
															
 
																-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
															
 
																+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
															
 
																 	if (!task->cl)
															
 
																 		return;
															
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -52,6 +52,9 @@ struct _starpu_job* __attribute__((malloc)) _starpu_job_create(struct starpu_tas
 
																 	 * everywhere */
															
 
																 	memset(job, 0, sizeof(*job));
															
 
																+	if (task->dyn_handles)
															
 
																+	     job->dyn_ordered_buffers = malloc(task->cl->nbuffers * sizeof(struct starpu_buffer_descr));
															
 
																+
															
 
																 	job->task = task;
															
 
																 #ifndef STARPU_USE_FXT
															
@@ -104,6 +107,11 @@ void _starpu_job_destroy(struct _starpu_job *j)
 
																 	}
															
 
																 	_starpu_cg_list_deinit(&j->job_successors);
															
 
																+	if (j->dyn_ordered_buffers)
															
 
																+	{
															
 
																+	     free(j->dyn_ordered_buffers);
															
 
																+	     j->dyn_ordered_buffers = NULL;
															
 
																+	}
															
 
																 	_starpu_job_delete(j);
															
 
																 }
															
@@ -149,8 +157,11 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
																 	int i;
															
 
																 	size_t data_size = 0;
															
 
																 	for(i = 0; i < STARPU_NMAXBUFS; i++)
															
 
																-		if(task->handles[i] != NULL)
															
 
																-			data_size += _starpu_data_get_size(task->handles[i]);
															
 
																+	{
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																+		if (handle != NULL)
															
 
																+			data_size += _starpu_data_get_size(handle);
															
 
																+	}
															
 
																 #endif //STARPU_USE_SC_HYPERVISOR
															
 
																 	/* We release handle reference count */
															
@@ -159,7 +170,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
																 		unsigned i;
															
 
																 		for (i=0; i<task->cl->nbuffers; i++)
															
 
																 		{
															
 
																-			starpu_data_handle_t handle = task->handles[i];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 			_starpu_spin_lock(&handle->header_lock);
															
 
																 			handle->busy_count--;
															
 
																 			if (!_starpu_data_check_not_busy(handle))
															
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -70,6 +70,7 @@ LIST_TYPE(_starpu_job,
 
																 	 * the task so that we always grab the rw-lock associated to the
															
 
																 	 * handles in the same order. */
															
 
																 	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
															
 
																+	struct starpu_buffer_descr *dyn_ordered_buffers;
															
 
																 	/* If a tag is associated to the job, this points to the internal data
															
 
																 	 * structure that describes the tag status. */
															
@@ -172,4 +173,13 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
 
																  * enforce a FIFO ordering. */
															
 
																 int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
															
 
																+#define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
															
 
																+#define _STARPU_JOB_GET_ORDERED_BUFFER_MODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].mode : job->ordered_buffers[i].mode)
															
 
																+
															
 
																+#define _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(job, handle, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].handle = (handle); else job->ordered_buffers[i].handle = (handle);} while(0)
															
 
																+#define _STARPU_JOB_SET_ORDERED_BUFFER_MODE(job, mode, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].mode = mode; else job->ordered_buffers[i].mode = mode;} while(0)
															
 
																+
															
 
																+#define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0)
															
 
																+#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) (job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : job->ordered_buffers
															
 
																+
															
 
																 #endif // __JOBS_H__
															
--- a/src/core/parallel_task.c
+++ b/src/core/parallel_task.c
@@ -19,15 +19,38 @@
 
																 #include <core/jobs.h>
															
 
																 #include <core/task.h>
															
 
																 #include <common/utils.h>
															
 
																+#include <core/workers.h>
															
 
																+#include <common/barrier.h>
															
 
																-struct starpu_task *_starpu_create_task_alias(struct starpu_task *task)
															
 
																+struct starpu_task *starpu_task_dup(struct starpu_task *task)
															
 
																 {
															
 
																 	struct starpu_task *task_dup = (struct starpu_task *) malloc(sizeof(struct starpu_task));
															
 
																 	STARPU_ASSERT(task_dup);
															
 
																-	/* XXX perhaps this is a bit too much overhead and we should only copy
															
 
																+	/* TODO perhaps this is a bit too much overhead and we should only copy
															
 
																 	 * part of the structure ? */
															
 
																 	memcpy(task_dup, task, sizeof(struct starpu_task));
															
 
																 	return task_dup;
															
 
																 }
															
 
																+
															
 
																+void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid)
															
 
																+{
															
 
																+	/* The master needs to dispatch the task between the
															
 
																+	 * different combined workers */
															
 
																+	struct _starpu_combined_worker *combined_worker =  _starpu_get_combined_worker_struct(workerid);
															
 
																+	int worker_size = combined_worker->worker_size;
															
 
																+
															
 
																+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																+	j->task_size = worker_size;
															
 
																+	j->combined_workerid = workerid;
															
 
																+	j->active_task_alias_count = 0;
															
 
																+
															
 
																+	//fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size);
															
 
																+
															
 
																+	_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
															
 
																+	_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
															
 
																+
															
 
																+	return;
															
 
																+}
															
 
																+
															
--- a/src/core/parallel_task.h
+++ b/src/core/parallel_task.h
@@ -1,24 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-
															
 
																-#ifndef __PARALLEL_TASK_H__
															
 
																-#define __PARALLEL_TASK_H__
															
 
																-
															
 
																-#include <starpu.h>
															
 
																-
															
 
																-struct starpu_task *_starpu_create_task_alias(struct starpu_task *task);
															
 
																-
															
 
																-#endif /* __PARALLEL_TASK_H__ */
															
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -227,7 +227,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
																 		starpu_data_handle_t handle;
															
 
																 		struct starpu_task *conversion_task;
															
 
																-		handle = task->handles[i];
															
 
																+		handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 		if (!_starpu_data_is_multiformat_handle(handle))
															
 
																 			continue;
															
@@ -287,8 +287,8 @@ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct star
 
																 	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 	{
															
 
																-		starpu_data_handle_t handle = task->handles[buffer];
															
 
																-		enum starpu_access_mode mode = task->cl->modes[buffer];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
															
 
																 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
															
 
																 	}
															
@@ -375,8 +375,8 @@ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundl
 
																 			unsigned b;
															
 
																 			for (b = 0; b < task->cl->nbuffers; b++)
															
 
																 			{
															
 
																-				starpu_data_handle_t handle = task->handles[b];
															
 
																-				enum starpu_access_mode mode = task->cl->modes[b];
															
 
																+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, b);
															
 
																+				enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, b);
															
 
																 				if (!(mode & STARPU_R))
															
 
																 					continue;
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -72,7 +72,7 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_per
 
																 		unsigned buffer;
															
 
																 		for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 		{
															
 
																-			starpu_data_handle_t handle = task->handles[buffer];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
															
 
																 			size += _starpu_data_get_size(handle);
															
 
																 		}
															
 
																 		return size;
															
@@ -1267,7 +1267,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
																 		for (i = 0; i < task->cl->nbuffers; i++)
															
 
																 		{
															
 
																-			starpu_data_handle_t handle = task->handles[i];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 			STARPU_ASSERT(handle->ops);
															
 
																 			STARPU_ASSERT(handle->ops->display);
															
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -885,7 +885,7 @@ int starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu
 
																 	return npus;
															
 
																 }
															
 
																-starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id)
															
 
																+starpu_pthread_mutex_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id)
															
 
																 {
															
 
																 	return &changing_ctx_mutex[sched_ctx_id];
															
 
																 }
															
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -144,6 +144,9 @@ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker
 
																 /* Check if the worker belongs to another sched_ctx */
															
 
																 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
															
 
																+/* mutex synchronising several simultaneous modifications of a context */
															
 
																+starpu_pthread_mutex_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
															
 
																+
															
 
																 #ifdef STARPU_USE_SC_HYPERVISOR
															
 
																 /* Notifies the hypervisor that a tasks was poped from the workers' list */
															
 
																 void _starpu_sched_ctx_call_poped_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -23,7 +23,6 @@
 
																 #include <profiling/profiling.h>
															
 
																 #include <common/barrier.h>
															
 
																 #include <core/debug.h>
															
 
																-#include <core/parallel_task.h>
															
 
																 static int use_prefetch = 0;
															
@@ -236,7 +235,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 				struct starpu_task *conversion_task;
															
 
																 				starpu_data_handle_t handle;
															
 
																-				handle = task->handles[i];
															
 
																+				handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 				if (!_starpu_handle_needs_conversion_task(handle, node))
															
 
																 					continue;
															
@@ -249,7 +248,10 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 			}
															
 
																 			for (i = 0; i < task->cl->nbuffers; i++)
															
 
																-				task->handles[i]->mf_node = node;
															
 
																+			{
															
 
																+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																+				handle->mf_node = node;
															
 
																+			}
															
 
																 		}
															
 
																 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
															
@@ -281,7 +283,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
																 		int j;
															
 
																 		for (j = 0; j < worker_size; j++)
															
 
																 		{
															
 
																-			struct starpu_task *alias = _starpu_create_task_alias(task);
															
 
																+			struct starpu_task *alias = starpu_task_dup(task);
															
 
																 			worker = _starpu_get_worker_struct(combined_workerid[j]);
															
 
																 			ret |= _starpu_push_local_task(worker, alias, 0);
															
@@ -396,7 +398,13 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
																 	else
															
 
																 	{
															
 
																 		STARPU_ASSERT(sched_ctx->sched_policy->push_task);
															
 
																-		ret = sched_ctx->sched_policy->push_task(task);
															
 
																+		/* check out if there are any workers in the context */
															
 
																+		starpu_pthread_mutex_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																+		nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id);
															
 
																+		ret = nworkers == 0 ? -1 : sched_ctx->sched_policy->push_task(task);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																+
															
 
																 		if(ret == -1)
															
 
																 		{
															
 
																 			fprintf(stderr, "repush task \n");
															
@@ -441,7 +449,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 	conversion_task = starpu_task_create();
															
 
																 	conversion_task->synchronous = 0;
															
 
																-	conversion_task->handles[0] = handle;
															
 
																+	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
															
 
																 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
															
 
																 	/* The node does not really matter here */
															
@@ -504,7 +512,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 		STARPU_ABORT();
															
 
																 	}
															
 
																-	conversion_task->cl->modes[0] = STARPU_RW;
															
 
																+	STARPU_CODELET_SET_MODE(conversion_task->cl, STARPU_RW, 0);
															
 
																 	return conversion_task;
															
 
																 }
															
@@ -657,7 +665,7 @@ pick:
 
																 		struct starpu_task *conversion_task;
															
 
																 		starpu_data_handle_t handle;
															
 
																-		handle = task->handles[i];
															
 
																+		handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 		if (!_starpu_handle_needs_conversion_task(handle, node))
															
 
																 			continue;
															
 
																 		conversion_task = _starpu_create_conversion_task(handle, node);
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -77,6 +77,11 @@ void starpu_task_init(struct starpu_task *task)
 
																 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
															
 
																 	task->flops = 0.0;
															
 
																+
															
 
																+	task->scheduled = 0;
															
 
																+
															
 
																+	task->dyn_handles = NULL;
															
 
																+	task->dyn_interfaces = NULL;
															
 
																 }
															
 
																 /* Free all the ressources allocated for a task, without deallocating the task
															
@@ -99,6 +104,14 @@ void starpu_task_clean(struct starpu_task *task)
 
																 	if (bundle)
															
 
																 		starpu_task_bundle_remove(bundle, task);
															
 
																+	if (task->dyn_handles)
															
 
																+	{
															
 
																+		free(task->dyn_handles);
															
 
																+		task->dyn_handles = NULL;
															
 
																+		free(task->dyn_interfaces);
															
 
																+		task->dyn_interfaces = NULL;
															
 
																+	}
															
 
																+
															
 
																 	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
															
 
																 	if (j)
															
@@ -229,7 +242,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 
																 		unsigned i;
															
 
																 		for (i=0; i<task->cl->nbuffers; i++)
															
 
																 		{
															
 
																-			starpu_data_handle_t handle = task->handles[i];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 			_starpu_spin_lock(&handle->header_lock);
															
 
																 			handle->busy_count++;
															
 
																 			_starpu_spin_unlock(&handle->header_lock);
															
@@ -393,16 +406,23 @@ int starpu_task_submit(struct starpu_task *task)
 
																 		unsigned i;
															
 
																 		/* Check buffers */
															
 
																-		STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
															
 
																+		if (task->dyn_handles == NULL)
															
 
																+			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
															
 
																+
															
 
																+		if (task->dyn_handles)
															
 
																+		{
															
 
																+			task->dyn_interfaces = malloc(task->cl->nbuffers * sizeof(void *));
															
 
																+		}
															
 
																+
															
 
																 		for (i = 0; i < task->cl->nbuffers; i++)
															
 
																 		{
															
 
																-			starpu_data_handle_t handle = task->handles[i];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 			/* Make sure handles are not partitioned */
															
 
																 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
															
 
																 			/* Provide the home interface for now if any,
															
 
																 			 * for can_execute hooks */
															
 
																 			if (handle->home_node != -1)
															
 
																-				task->interfaces[i] = starpu_data_get_interface_on_node(task->handles[i], handle->home_node);
															
 
																+				_STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
															
 
																 		}
															
 
																 		/* Check the type of worker(s) required by the task exist */
															
@@ -526,8 +546,10 @@ int _starpu_task_submit_nodeps(struct starpu_task *task)
 
																 		unsigned i;
															
 
																 		for (i=0 ; i<task->cl->nbuffers ; i++)
															
 
																 		{
															
 
																-			j->ordered_buffers[i].handle = j->task->handles[i];
															
 
																-			j->ordered_buffers[i].mode = j->task->cl->modes[i];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
															
 
																+			_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
															
 
																+			enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
															
 
																+			_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
															
 
																 		}
															
 
																 	}
															
@@ -559,7 +581,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
																 	unsigned i;
															
 
																 	for (i=0; i<task->cl->nbuffers; i++)
															
 
																 	{
															
 
																-		starpu_data_handle_t handle = task->handles[i];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
															
 
																 		_starpu_spin_lock(&handle->header_lock);
															
 
																 		handle->busy_count++;
															
 
																 		_starpu_spin_unlock(&handle->header_lock);
															
@@ -574,8 +596,10 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
																 	for (i=0 ; i<task->cl->nbuffers ; i++)
															
 
																 	{
															
 
																-		j->ordered_buffers[i].handle = j->task->handles[i];
															
 
																-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
															
 
																+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
															
 
																+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
															
 
																 	}
															
 
																         _STARPU_LOG_IN();
															
@@ -604,7 +628,7 @@ void starpu_codelet_init(struct starpu_codelet *cl)
 
																 	memset(cl, 0, sizeof(struct starpu_codelet));
															
 
																 }
															
 
																-void starpu_display_codelet_stats(struct starpu_codelet *cl)
															
 
																+void starpu_codelet_display_stats(struct starpu_codelet *cl)
															
 
																 {
															
 
																 	unsigned worker;
															
 
																 	unsigned nworkers = starpu_worker_get_count();
															
@@ -811,7 +835,7 @@ _starpu_task_uses_multiformat_handles(struct starpu_task *task)
 
																 	unsigned i;
															
 
																 	for (i = 0; i < task->cl->nbuffers; i++)
															
 
																 	{
															
 
																-		if (_starpu_data_is_multiformat_handle(task->handles[i]))
															
 
																+		if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
															
 
																 			return 1;
															
 
																 	}
															
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -73,4 +73,7 @@ starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet
 
																 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+#define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
															
 
																+#define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
															
 
																+
															
 
																 #endif // __CORE_TASK_H__
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1437,3 +1437,10 @@ starpu_driver_deinit(struct starpu_driver *d)
 
																 		return -EINVAL;
															
 
																 	}
															
 
																 }
															
 
																+
															
 
																+void starpu_get_version(int *major, int *minor, int *release)
															
 
																+{
															
 
																+	*major = STARPU_MAJOR_VERSION;
															
 
																+	*minor = STARPU_MINOR_VERSION;
															
 
																+	*release = STARPU_RELEASE_VERSION;
															
 
																+}
															
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -22,6 +22,7 @@
 
																 #include <core/dependencies/data_concurrency.h>
															
 
																 #include <profiling/profiling.h>
															
 
																 #include <math.h>
															
 
																+#include <core/task.h>
															
 
																 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
															
 
																 unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
															
@@ -591,8 +592,8 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 
																 	for (index = 0; index < nbuffers; index++)
															
 
																 	{
															
 
																-		starpu_data_handle_t handle = task->handles[index];
															
 
																-		enum starpu_access_mode mode = task->cl->modes[index];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
															
 
																 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
															
 
																 			continue;
															
@@ -624,7 +625,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 
																 	if (profiling && task->profiling_info)
															
 
																 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
															
 
																-	struct starpu_buffer_descr *descrs = j->ordered_buffers;
															
 
																+	struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
															
 
																 	unsigned nbuffers = task->cl->nbuffers;
															
 
																 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
															
@@ -656,14 +657,14 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 
																 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
															
 
																 	for (index = 0; index < nbuffers; index++)
															
 
																 	{
															
 
																-		starpu_data_handle_t handle = task->handles[index];
															
 
																-		enum starpu_access_mode mode = task->cl->modes[index];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
															
 
																 		struct _starpu_data_replicate *local_replicate;
															
 
																 		local_replicate = get_replicate(handle, mode, workerid, local_memory_node);
															
 
																-		task->interfaces[index] = local_replicate->data_interface;
															
 
																+		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
															
 
																 		if (mode & STARPU_REDUX)
															
 
																 		{
															
@@ -699,7 +700,7 @@ void _starpu_push_task_output(struct _starpu_job *j, uint32_t mask)
 
																 	if (profiling && task->profiling_info)
															
 
																 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
															
 
																-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
															
 
																+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
															
 
																         unsigned nbuffers = task->cl->nbuffers;
															
 
																 	int workerid = starpu_worker_get_id();
															
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -305,7 +305,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 
																 				.nbuffers = 1
															
 
																 			};
															
 
																 			struct starpu_task *task = starpu_task_create();
															
 
																-			task->handles[0] = child_handle;
															
 
																+			STARPU_TASK_SET_HANDLE(task, child_handle, 0);
															
 
																 			task->cl = &cl;
															
 
																 			task->synchronous = 1;
															
 
																 			if (_starpu_task_submit_internally(task) != 0)
															
--- a/src/datawizard/footprint.c
+++ b/src/datawizard/footprint.c
@@ -43,7 +43,7 @@ uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum
 
																 	{
															
 
																 		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
															
 
																 		{
															
 
																-			starpu_data_handle_t handle = task->handles[buffer];
															
 
																+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
															
 
																 			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
															
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -217,16 +217,16 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
																 					redux_task->cl = handle->redux_cl;
															
 
																 					STARPU_ASSERT(redux_task->cl);
															
 
																-					if (!redux_task->cl->modes[0])
															
 
																-						redux_task->cl->modes[0] = STARPU_RW;
															
 
																-					if (!redux_task->cl->modes[1])
															
 
																-						redux_task->cl->modes[1] = STARPU_R;
															
 
																+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
															
 
																+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0);
															
 
																+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1)))
															
 
																+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
															
 
																-					STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
															
 
																-					STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
															
 
																+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet has to be RW");
															
 
																+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet has to be R");
															
 
																-					redux_task->handles[0] = replicate_array[i];
															
 
																-					redux_task->handles[1] = replicate_array[i+step];
															
 
																+					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0);
															
 
																+					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1);
															
 
																 					int ndeps = 0;
															
 
																 					struct starpu_task *task_deps[2];
															
@@ -278,10 +278,12 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
																 			redux_task->cl = handle->init_cl;
															
 
																 			STARPU_ASSERT(redux_task->cl);
															
 
																-			if (!redux_task->cl->modes[0])
															
 
																-				redux_task->cl->modes[0] = STARPU_W;
															
 
																-			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_W, "Parameter of initialization codelet has to be W");
															
 
																-			redux_task->handles[0] = handle;
															
 
																+
															
 
																+			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
															
 
																+				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_W, 0);
															
 
																+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_W, "Parameter of initialization codelet has to be W");
															
 
																+
															
 
																+			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
															
 
																 			int ret = _starpu_task_submit_internally(redux_task);
															
 
																 			STARPU_ASSERT(!ret);
															
@@ -311,8 +313,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
																 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
															
 
																 			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
															
 
																-			redux_task->handles[0] = handle;
															
 
																-			redux_task->handles[1] = replicate_array[replicate];
															
 
																+			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
															
 
																+			STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1);
															
 
																 			int ret = _starpu_task_submit_internally(redux_task);
															
 
																 			STARPU_ASSERT(!ret);
															
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -197,6 +197,12 @@ static char *memnode_container_alias(char *output, int len, const char *prefix,
 
																 	return output;
															
 
																 }
															
 
																+static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
															
 
																+{
															
 
																+	snprintf(output, len, "%smm%"PRIu64"", prefix, memnodeid);
															
 
																+	return output;
															
 
																+}
															
 
																+
															
 
																 static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid)
															
 
																 {
															
 
																 	snprintf(output, len, "%st%"PRIu64"", prefix, threadid);
															
@@ -232,10 +238,10 @@ static void memnode_set_state(double time, const char *prefix, unsigned int memn
 
																 {
															
 
																 #ifdef STARPU_HAVE_POTI
															
 
																 	char container[STARPU_POTI_STR_LEN];
															
 
																-	memnode_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid);
															
 
																+	memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid);
															
 
																 	poti_SetState(time, container, "MS", name);
															
 
																 #else
															
 
																-	fprintf(out_paje_file, "10	%.9f	%smn%u	MS	%s\n", time, prefix, memnodeid, name);
															
 
																+	fprintf(out_paje_file, "10	%.9f	%smm%u	MS	%s\n", time, prefix, memnodeid, name);
															
 
																 #endif
															
 
																 }
															
@@ -280,15 +286,21 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
																 		/* TODO: ramkind */
															
 
																 		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMNODE%"PRIu64"", prefix, ev->param[0]);
															
 
																 		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mn", program_container, new_memnode_container_name);
															
 
																+
															
 
																+		memmanager_container_alias (new_memnode_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]);
															
 
																+		/* TODO: ramkind */
															
 
																+		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMMANAGER%"PRIu64"", prefix, ev->param[0]);
															
 
																+		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mm", program_container, new_memnode_container_name);
															
 
																 #else
															
 
																 		fprintf(out_paje_file, "7	%.9f	%smn%"PRIu64"	Mn	%sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
															
 
																+		fprintf(out_paje_file, "7	%.9f	%smm%"PRIu64"	Mm	%sp	%sMEMMANAGER%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
															
 
																 #endif
															
 
																 		if (!options->no_bus)
															
 
																 #ifdef STARPU_HAVE_POTI
															
 
																 			poti_SetVariable(get_event_time_stamp(ev, options), new_memnode_container_alias, "bw", 0.0);
															
 
																 #else
															
 
																-			fprintf(out_paje_file, "13	%.9f	%smn%"PRIu64"	bw	0.0\n", 0.0f, prefix, ev->param[0]);
															
 
																+			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	bw	0.0\n", 0.0f, prefix, ev->param[0]);
															
 
																 #endif
															
 
																 	}
															
 
																 }
															
@@ -703,10 +715,10 @@ static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
																 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
															
 
																 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
															
 
																 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
															
 
																-			memnode_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src);
															
 
																+			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src);
															
 
																 			poti_StartLink(time, program_container, "L", src_memnode_container, paje_value, paje_key);
															
 
																 #else
															
 
																-			fprintf(out_paje_file, "18	%.9f	L	%sp	%u	%smn%u	com_%u\n", time, prefix, size, prefix, src, comid);
															
 
																+			fprintf(out_paje_file, "18	%.9f	L	%sp	%u	%smm%u	com_%u\n", time, prefix, size, prefix, src, comid);
															
 
																 #endif
															
 
																 		}
															
@@ -743,10 +755,10 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
																 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
															
 
																 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
															
 
																 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
															
 
																-			memnode_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
															
 
																+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
															
 
																 			poti_EndLink(time, program_container, "L", dst_memnode_container, paje_value, paje_key);
															
 
																 #else
															
 
																-			fprintf(out_paje_file, "19	%.9f	L	%sp	%u	%smn%u	com_%u\n", time, prefix, size, prefix, dst, comid);
															
 
																+			fprintf(out_paje_file, "19	%.9f	L	%sp	%u	%smm%u	com_%u\n", time, prefix, size, prefix, dst, comid);
															
 
																 #endif
															
 
																 		}
															
@@ -1187,10 +1199,10 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 
																 		{
															
 
																 #ifdef STARPU_HAVE_POTI
															
 
																 			char src_memnode_container[STARPU_POTI_STR_LEN];
															
 
																-			memnode_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
															
 
																+			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
															
 
																 			poti_SetVariable(itor->comm_start, src_memnode_container, "bw", current_bandwidth_per_node[itor->src_node]);
															
 
																 #else
															
 
																-			fprintf(out_paje_file, "13	%.9f	%smn%u	bw	%f\n",
															
 
																+			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
															
 
																 				itor->comm_start, prefix, itor->src_node, current_bandwidth_per_node[itor->src_node]);
															
 
																 #endif
															
 
																 		}
															
@@ -1200,10 +1212,10 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 
																 		{
															
 
																 #ifdef STARPU_HAVE_POTI
															
 
																 			char dst_memnode_container[STARPU_POTI_STR_LEN];
															
 
																-			memnode_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
															
 
																+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
															
 
																 			poti_SetVariable(itor->comm_start, dst_memnode_container, "bw", current_bandwidth_per_node[itor->dst_node]);
															
 
																 #else
															
 
																-			fprintf(out_paje_file, "13	%.9f	%smn%u	bw	%f\n",
															
 
																+			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
															
 
																 				itor->comm_start, prefix, itor->dst_node, current_bandwidth_per_node[itor->dst_node]);
															
 
																 #endif
															
 
																 		}
															
--- a/src/debug/traces/starpu_paje.c
+++ b/src/debug/traces/starpu_paje.c
@@ -137,13 +137,14 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
																 	poti_DefineContainerType("P", "MPIP", "Program");
															
 
																 	poti_DefineContainerType("Mn", "P", "Memory Node");
															
 
																 	poti_DefineContainerType("T", "Mn", "Thread");
															
 
																+	poti_DefineContainerType("Mm", "Mn", "Memory Manager");
															
 
																 	poti_DefineContainerType("W", "T", "Worker");
															
 
																 	poti_DefineContainerType("MPICt", "T", "MPI Communication Thread");
															
 
																 	poti_DefineContainerType("Sc", "P", "Scheduler");
															
 
																 	/* Types for the memory node */
															
 
																-	poti_DefineVariableType("bw", "Mn", "Bandwidth", "0 0 0");
															
 
																-	poti_DefineStateType("MS", "Mn", "Memory Node State");
															
 
																+	poti_DefineVariableType("bw", "Mm", "Bandwidth", "0 0 0");
															
 
																+	poti_DefineStateType("MS", "Mm", "Memory Node State");
															
 
																 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
															
 
																 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
															
 
																 	poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .4");
															
@@ -196,7 +197,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
																 	/* Link types */
															
 
																 	poti_DefineLinkType("MPIL", "P", "MPICt", "MPICt", "Links between two MPI Communication Threads");
															
 
																-	poti_DefineLinkType("L", "P", "Mn", "Mn", "Links between two Memory Nodes");
															
 
																+	poti_DefineLinkType("L", "P", "Mm", "Mm", "Links between two Memory Managers");
															
 
																 	/* Creating the MPI Program */
															
 
																 	poti_CreateContainer(0, "MPIroot", "MPIP", "0", "root");
															
@@ -206,6 +207,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
																 1       P      MPIP       \"Program\"                      	\n\
															
 
																 1       Mn      P       \"Memory Node\"                         \n\
															
 
																 1       T      Mn       \"Thread\"                               \n\
															
 
																+1       Mm      Mn       \"Memory Manager\"                         \n\
															
 
																 1       W      T       \"Worker\"                               \n\
															
 
																 1       MPICt   T       \"MPI Communication Thread\"              \n\
															
 
																 1       Sc       P       \"Scheduler State\"                        \n\
															
@@ -216,9 +218,9 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
																 	for (i=1; i<=10; i++)
															
 
																 		fprintf(file, "3       Ctx%u      T     \"InCtx%u\"         		\n", i, i);
															
 
																 	fprintf(file, "\
															
 
																-3       MS       Mn       \"Memory Node State\"                        \n\
															
 
																+3       MS       Mm       \"Memory Node State\"                        \n\
															
 
																 4       ntask    Sc       \"Number of tasks\"                        \n\
															
 
																-4       bw      Mn       \"Bandwidth\"                        \n\
															
 
																+4       bw      Mm       \"Bandwidth\"                        \n\
															
 
																 6       I       S      Initializing       \"0.0 .7 1.0\"            \n\
															
 
																 6       D       S      Deinitializing       \"0.0 .1 .7\"            \n\
															
 
																 6       Fi       S      FetchingInput       \"1.0 .1 1.0\"            \n\
															
@@ -255,7 +257,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
																 6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n\
															
 
																 6       No       MS     Nothing         \".0 .0 .0\"		\n\
															
 
																 5       MPIL     P	MPICt	MPICt   MPIL			\n\
															
 
																-5       L       P	Mn	Mn      L\n");
															
 
																+5       L       P	Mm	Mm      L\n");
															
 
																 	fprintf(file, "7      0.0 MPIroot      MPIP      0       root\n");
															
 
																 #endif
															
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -158,7 +158,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
																 #ifdef STARPU_SIMGRID
															
 
																 		_starpu_simgrid_execute_job(j, perf_arch, NAN);
															
 
																 #else
															
 
																-		func(task->interfaces, task->cl_arg);
															
 
																+		func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
															
 
																 #endif
															
 
																 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
															
 
																 			/* rebind to single CPU */
															
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -353,7 +353,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
																 #ifdef STARPU_SIMGRID
															
 
																 	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
															
 
																 #else
															
 
																-	func(task->interfaces, task->cl_arg);
															
 
																+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
															
 
																 #endif
															
 
																 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
															
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -102,7 +102,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
																 	unsigned nbuffers = cl->nbuffers;
															
 
																 	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 	{
															
 
																-		enum starpu_access_mode mode = cl->modes[buffer];
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
															
 
																 		switch (mode)
															
 
																 		{
															
@@ -122,7 +122,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
																 	for (buffer = 0; buffer < nbuffers; buffer++)
															
 
																 	{
															
 
																 		unsigned gordon_buffer;
															
 
																-		enum starpu_access_mode mode = cl->modes[buffer];
															
 
																+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
															
 
																 		switch (mode)
															
 
																 		{
															
@@ -138,7 +138,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
																 				break;
															
 
																 		}
															
 
																-		starpu_data_handle_t handle = task->handles[buffer];
															
 
																+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
															
 
																 		gordon_job->nalloc = 0;
															
 
																 		gordon_job->nin = nin;
															
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -824,7 +824,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
																 #ifdef STARPU_SIMGRID
															
 
																 	double length = NAN;
															
 
																   #ifdef STARPU_OPENCL_SIMULATOR
															
 
																-	func(task->interfaces, task->cl_arg);
															
 
																+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
															
 
																     #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
															
 
																       #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
															
 
																         #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
															
@@ -838,7 +838,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
																   #endif
															
 
																 	_starpu_simgrid_execute_job(j, args->perf_arch, length);
															
 
																 #else
															
 
																-	func(task->interfaces, task->cl_arg);
															
 
																+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
															
 
																 #endif
															
 
																 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
															
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																- * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2011  Télécom-SudParis
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -501,10 +501,16 @@ void starpu_bound_print_lp(FILE *output)
 
																 		}
															
 
																 		fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
															
 
																 		fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
															
 
																+
															
 
																 		fprintf(output, "/* We want to minimize total execution time (ms) */\n");
															
 
																 		fprintf(output, "min: tmax;\n\n");
															
 
																-		fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
															
 
																+		fprintf(output, "/* Number of tasks */\n");
															
 
																+		fprintf(output, "nt = %d;\n", nt);
															
 
																+		fprintf(output, "/* Number of workers */\n");
															
 
																+		fprintf(output, "nw = %d;\n", nw);
															
 
																+
															
 
																+		fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n");
															
 
																 		for (t1 = tasks; t1; t1 = t1->next)
															
 
																 			fprintf(output, "c%lu <= tmax;\n", t1->id);
															
@@ -836,12 +842,12 @@ void starpu_bound_print_mps(FILE *output)
 
																 		fprintf(output, "NAME           StarPU theoretical bound\n");
															
 
																-		fprintf(output, "\nROWS\n");
															
 
																+		fprintf(output, "*\nROWS\n");
															
 
																 		fprintf(output, "* We want to minimize total execution time (ms)\n");
															
 
																 		fprintf(output, " N  TMAX\n");
															
 
																-		fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
															
 
																+		fprintf(output, "* Which is the maximum of all worker execution times (ms)\n");
															
 
																 		for (w = 0; w < nw; w++)
															
 
																 		{
															
 
																 			char name[32];
															
@@ -850,36 +856,36 @@ void starpu_bound_print_mps(FILE *output)
 
																 			fprintf(output, " L  W%d\n", w);
															
 
																 		}
															
 
																-		fprintf(output, "\n* And we have to have computed exactly all tasks\n");
															
 
																+		fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n");
															
 
																 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																 		{
															
 
																 			fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
															
 
																 			fprintf(output, " E  T%d\n", t);
															
 
																 		}
															
 
																-		fprintf(output, "\nCOLUMNS\n");
															
 
																+		fprintf(output, "*\nCOLUMNS\n*\n");
															
 
																-		fprintf(output, "\n* Execution times and completion of all tasks\n");
															
 
																+		fprintf(output, "*\n* Execution times and completion of all tasks\n*\n");
															
 
																 		for (w = 0; w < nw; w++)
															
 
																 			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																 				if (!isnan(times[w*nt+t]))
															
 
																 				{
															
 
																 					char name[9];
															
 
																 					snprintf(name, sizeof(name), "W%dT%d", w, t);
															
 
																-					fprintf(stderr,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
															
 
																-					fprintf(stderr,"    %-8s  T%-7d  %12d\n", name, t, 1);
															
 
																+					fprintf(output,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
															
 
																+					fprintf(output,"    %-8s  T%-7d  %12d\n", name, t, 1);
															
 
																 				}
															
 
																-		fprintf(output, "\n* Total execution time\n");
															
 
																+		fprintf(output, "*\n* Total execution time\n*\n");
															
 
																 		for (w = 0; w < nw; w++)
															
 
																-			fprintf(stderr,"    TMAX      W%-2d       %12d\n", w, -1);
															
 
																-		fprintf(stderr,"    TMAX      TMAX      %12d\n", 1);
															
 
																+			fprintf(output,"    TMAX      W%-2d       %12d\n", w, -1);
															
 
																+		fprintf(output,"    TMAX      TMAX      %12d\n", 1);
															
 
																-		fprintf(output, "\nRHS\n");
															
 
																+		fprintf(output, "*\nRHS\n*\n");
															
 
																-		fprintf(output, "\n* Total number of tasks\n");
															
 
																+		fprintf(output, "*\n* Total number of tasks\n*\n");
															
 
																 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
															
 
																-			fprintf(stderr,"    NT%-2d      T%-7d  %12lu\n", t, t, tp->n);
															
 
																+			fprintf(output,"    NT%-2d      T%-7d  %12lu\n", t, t, tp->n);
															
 
																 		fprintf(output, "ENDATA\n");
															
 
																 	}
															
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -27,11 +27,7 @@
 
																 #include <core/workers.h>
															
 
																 #include <sched_policies/fifo_queues.h>
															
 
																 #include <core/perfmodel/perfmodel.h>
															
 
																-#include <starpu_parameters.h>
															
 
																 #include <core/debug.h>
															
 
																-#ifdef STARPU_USE_TOP
															
 
																-#include <top/starpu_top_core.h>
															
 
																-#endif /* !STARPU_USE_TOP */
															
 
																 #ifndef DBL_MIN
															
 
																 #define DBL_MIN __DBL_MIN__
															
@@ -54,12 +50,23 @@ struct _starpu_dmda_data
 
																 	long int ready_task_cnt;
															
 
																 };
															
 
																-static double alpha = _STARPU_DEFAULT_ALPHA;
															
 
																-static double beta = _STARPU_DEFAULT_BETA;
															
 
																-static double _gamma = _STARPU_DEFAULT_GAMMA;
															
 
																 static double idle_power = 0.0;
															
 
																+/* The dmda scheduling policy uses
															
 
																+ *
															
 
																+ * alpha * T_computation + beta * T_communication + gamma * Consumption
															
 
																+ *
															
 
																+ * Here are the default values of alpha, beta, gamma
															
 
																+ */
															
 
																+
															
 
																+#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
															
 
																+#define _STARPU_SCHED_BETA_DEFAULT 1.0
															
 
																+#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
															
 
																+
															
 
																 #ifdef STARPU_USE_TOP
															
 
																+static double alpha = _STARPU_SCHED_ALPHA_DEFAULT;
															
 
																+static double beta = _STARPU_SCHED_BETA_DEFAULT;
															
 
																+static double _gamma = _STARPU_SCHED_GAMMA_DEFAULT;
															
 
																 static const float alpha_minimum=0;
															
 
																 static const float alpha_maximum=10.0;
															
 
																 static const float beta_minimum=0;
															
@@ -80,7 +87,7 @@ static int count_non_ready_buffers(struct starpu_task *task, unsigned node)
 
																 	{
															
 
																 		starpu_data_handle_t handle;
															
 
																-		handle = task->handles[index];
															
 
																+		handle = STARPU_TASK_GET_HANDLE(task, index);
															
 
																 		int is_valid;
															
 
																 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
															
@@ -281,15 +288,10 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																-/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																+        /* Sometimes workers didn't take the tasks as early as we expected */
															
 
																 	fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
															
 
																 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
															
 
																-	if(!isnan(predicted))
															
 
																-	{
															
 
																-		fifo->exp_end += predicted;
															
 
																-		fifo->exp_len += predicted;
															
 
																-	}
															
 
																-	
															
 
																+
															
 
																 	if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
															
 
																 	{
															
 
																 		/* We may hope that the transfer will be finished by
															
@@ -309,16 +311,21 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 		fifo->exp_len += predicted_transfer;
															
 
																 	}
															
 
																+	if(!isnan(predicted))
															
 
																+	{
															
 
																+		fifo->exp_end += predicted;
															
 
																+		fifo->exp_len += predicted;
															
 
																+	}
															
 
																+
															
 
																 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																 	task->predicted = predicted;
															
 
																 	task->predicted_transfer = predicted_transfer;
															
 
																 #ifdef STARPU_USE_TOP
															
 
																-	if (_starpu_top_status_get())
															
 
																-		_starpu_top_task_prevision(task, best_workerid,
															
 
																-			(unsigned long long)(fifo->exp_end-predicted)/1000,
															
 
																-			(unsigned long long)fifo->exp_end/1000);
															
 
																+	starpu_top_task_prevision(task, best_workerid,
															
 
																+				  (unsigned long long)(fifo->exp_end-predicted)/1000,
															
 
																+				  (unsigned long long)fifo->exp_end/1000);
															
 
																 #endif /* !STARPU_USE_TOP */
															
 
																 	if (starpu_get_prefetch_flag())
															
@@ -388,6 +395,17 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
																 		unsigned memory_node = starpu_worker_get_memory_node(worker);
															
 
																 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
															
 
																+		/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																+		starpu_pthread_mutex_t *sched_mutex;
															
 
																+		starpu_pthread_cond_t *sched_cond;
															
 
																+		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
															
 
																+
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																+		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
															
 
																+		fifo->exp_end = fifo->exp_start + fifo->exp_len;
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																+
															
 
																+
															
 
																 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																 		{
															
 
																 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
															
@@ -398,27 +416,40 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
																 			}
															
 
																 			double exp_end;
															
 
																-			starpu_pthread_mutex_t *sched_mutex;
															
 
																-			starpu_pthread_cond_t *sched_cond;
															
 
																-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
															
 
																-
															
 
																-			/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
															
 
																-			fifo->exp_end = fifo->exp_start + fifo->exp_len;
															
 
																-			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																-
															
 
																-
															
 
																 			double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
															
 
																 			double local_penalty = starpu_task_expected_data_transfer_time(memory_node, task);
															
 
																 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
															
 
																 			//_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
															
 
																+			/*
															
 
																+			 * This implements a default greedy scheduler for the
															
 
																+			 * case of tasks which have no performance model, or
															
 
																+			 * whose performance model is not calibrated yet.
															
 
																+			 *
															
 
																+			 * It simply uses the number of tasks already pushed to
															
 
																+			 * the workers, divided by the relative performance of
															
 
																+			 * a CPU and of a GPU.
															
 
																+			 *
															
 
																+			 * This is always computed, but the ntasks_best
															
 
																+			 * selection is only really used if the task indeed has
															
 
																+			 * no performance model, or is not calibrated yet.
															
 
																+			 */
															
 
																 			if (ntasks_best == -1
															
 
																-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
															
 
																-			    || (!calibrating && isnan(local_length)) /* Not calibrating but this worker is being calibrated */
															
 
																-			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
															
 
																+			
															
 
																+			    /* Always compute the greedy decision, at least for
															
 
																+			     * the tasks with no performance model. */
															
 
																+			    || (!calibrating && ntasks_end < ntasks_best_end)
															
 
																+
															
 
																+			    /* The performance model of this task is not
															
 
																+			     * calibrated on this worker, try to run it there
															
 
																+			     * to calibrate it there. */
															
 
																+			    || (!calibrating && isnan(local_length))
															
 
																+
															
 
																+			    /* the performance model of this task is not
															
 
																+			     * calibrated on this worker either, rather run it
															
 
																+			     * there if this one is low on scheduled tasks. */
															
 
																+			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end)
															
 
																 				)
															
 
																 			{
															
 
																 				ntasks_best_end = ntasks_end;
															
@@ -509,6 +540,15 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
															
 
																 		unsigned memory_node = starpu_worker_get_memory_node(worker);
															
 
																+		/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																+		starpu_pthread_mutex_t *sched_mutex;
															
 
																+		starpu_pthread_cond_t *sched_cond;
															
 
																+		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
															
 
																+
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																+		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																+
															
 
																 		for(nimpl  = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
															
 
																 	 	{
															
 
																 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
															
@@ -517,15 +557,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 				continue;
															
 
																 			}
															
 
																-			/* Sometimes workers didn't take the tasks as early as we expected */
															
 
																-			starpu_pthread_mutex_t *sched_mutex;
															
 
																-			starpu_pthread_cond_t *sched_cond;
															
 
																-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
															
 
																-
															
 
																 			STARPU_ASSERT_MSG(fifo != NULL, "worker %d ctx %d\n", worker, sched_ctx_id);
															
 
																-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
															
 
																-			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																 			exp_end[worker_ctx][nimpl] = fifo->exp_start + fifo->exp_len;
															
 
																 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
															
 
																 				max_exp_end = exp_end[worker_ctx][nimpl];
															
@@ -551,10 +583,34 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
																 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
															
 
																+			/*
															
 
																+			 * This implements a default greedy scheduler for the
															
 
																+			 * case of tasks which have no performance model, or
															
 
																+			 * whose performance model is not calibrated yet.
															
 
																+			 *
															
 
																+			 * It simply uses the number of tasks already pushed to
															
 
																+			 * the workers, divided by the relative performance of
															
 
																+			 * a CPU and of a GPU.
															
 
																+			 *
															
 
																+			 * This is always computed, but the ntasks_best
															
 
																+			 * selection is only really used if the task indeed has
															
 
																+			 * no performance model, or is not calibrated yet.
															
 
																+			 */
															
 
																 			if (ntasks_best == -1
															
 
																-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better worker */
															
 
																-			    || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) /* Not calibrating but this worker is being calibrated */
															
 
																-			    || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
															
 
																+
															
 
																+			    /* Always compute the greedy decision, at least for
															
 
																+			     * the tasks with no performance model. */
															
 
																+			    || (!calibrating && ntasks_end < ntasks_best_end)
															
 
																+
															
 
																+			    /* The performance model of this task is not
															
 
																+			     * calibrated on this worker, try to run it there
															
 
																+			     * to calibrate it there. */
															
 
																+			    || (!calibrating && isnan(local_task_length[worker_ctx][nimpl]))
															
 
																+
															
 
																+			    /* the performance model of this task is not
															
 
																+			     * calibrated on this worker either, rather run it
															
 
																+			     * there if this one is low on scheduled tasks. */
															
 
																+			    || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end)
															
 
																 				)
															
 
																 			{
															
 
																 				ntasks_best_end = ntasks_end;
															
@@ -722,64 +778,18 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 
																 #ifdef STARPU_DEVEL
															
 
																 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
															
 
																 #endif
															
 
																-	unsigned sched_ctx_id = task->sched_ctx;
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																-	int ret_val = -1;
															
 
																-
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-	if(nworkers == 0)
															
 
																-	{
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-		return ret_val;
															
 
																-	}
															
 
																-
															
 
																-	ret_val = _dmda_push_task(task, 1, sched_ctx_id);
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-	return ret_val;
															
 
																-
															
 
																+	return _dmda_push_task(task, 1, task->sched_ctx);
															
 
																 }
															
 
																 static int dm_push_task(struct starpu_task *task)
															
 
																 {
															
 
																-	unsigned sched_ctx_id = task->sched_ctx;
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																-	int ret_val = -1;
															
 
																-
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-	if(nworkers == 0)
															
 
																-	{
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-		return ret_val;
															
 
																-	}
															
 
																-
															
 
																-	ret_val = _dm_push_task(task, 0, sched_ctx_id);
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-	return ret_val;
															
 
																+	return _dm_push_task(task, 0, task->sched_ctx);
															
 
																 }
															
 
																 static int dmda_push_task(struct starpu_task *task)
															
 
																 {
															
 
																-	unsigned sched_ctx_id = task->sched_ctx;
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																-	int ret_val = -1;
															
 
																-
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-	if(nworkers == 0)
															
 
																-	{
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-		return ret_val;
															
 
																-	}
															
 
																-
															
 
																 	STARPU_ASSERT(task);
															
 
																-	ret_val = _dmda_push_task(task, 0, sched_ctx_id);
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-	return ret_val;
															
 
																+	return _dmda_push_task(task, 0, task->sched_ctx);
															
 
																 }
															
 
																 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
															
@@ -820,9 +830,9 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 
																 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
															
 
																 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)malloc(sizeof(struct _starpu_dmda_data));
															
 
																-	dt->alpha = _STARPU_DEFAULT_ALPHA;
															
 
																-	dt->beta = _STARPU_DEFAULT_BETA;
															
 
																-	dt->_gamma = _STARPU_DEFAULT_GAMMA;
															
 
																+	dt->alpha = _STARPU_SCHED_ALPHA_DEFAULT;
															
 
																+	dt->beta = _STARPU_SCHED_BETA_DEFAULT;
															
 
																+	dt->_gamma = _STARPU_SCHED_GAMMA_DEFAULT;
															
 
																 	dt->idle_power = 0.0;
															
 
																 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt);
															
@@ -851,13 +861,13 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 
																 #ifdef STARPU_USE_TOP
															
 
																 	starpu_top_register_parameter_float("DMDA_ALPHA", &alpha,
															
 
																-		alpha_minimum, alpha_maximum, param_modified);
															
 
																+					    alpha_minimum, alpha_maximum, param_modified);
															
 
																 	starpu_top_register_parameter_float("DMDA_BETA", &beta,
															
 
																-		beta_minimum, beta_maximum, param_modified);
															
 
																+					    beta_minimum, beta_maximum, param_modified);
															
 
																 	starpu_top_register_parameter_float("DMDA_GAMMA", &_gamma,
															
 
																-		gamma_minimum, gamma_maximum, param_modified);
															
 
																+					    gamma_minimum, gamma_maximum, param_modified);
															
 
																 	starpu_top_register_parameter_float("DMDA_IDLE_POWER", &idle_power,
															
 
																-		idle_power_minimum, idle_power_maximum, param_modified);
															
 
																+					    idle_power_minimum, idle_power_maximum, param_modified);
															
 
																 #endif /* !STARPU_USE_TOP */
															
 
																 }
															
@@ -933,14 +943,6 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, unsign
 
																 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
															
 
																 	/* If there is no prediction available, we consider the task has a null length */
															
 
																-	if (!isnan(predicted))
															
 
																-	{
															
 
																-		task->predicted = predicted;
															
 
																-		fifo->exp_end += predicted;
															
 
																-		fifo->exp_len += predicted;
															
 
																-	}
															
 
																-
															
 
																-	/* If there is no prediction available, we consider the task has a null length */
															
 
																 	if (!isnan(predicted_transfer))
															
 
																 	{
															
 
																 		if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
															
@@ -960,6 +962,14 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, unsign
 
																 		fifo->exp_len += predicted_transfer;
															
 
																 	}
															
 
																+	/* If there is no prediction available, we consider the task has a null length */
															
 
																+	if (!isnan(predicted))
															
 
																+	{
															
 
																+		task->predicted = predicted;
															
 
																+		fifo->exp_end += predicted;
															
 
																+		fifo->exp_len += predicted;
															
 
																+	}
															
 
																+
															
 
																 	fifo->ntasks++;
															
 
																 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
--- a/src/sched_policies/deque_queues.c
+++ b/src/sched_policies/deque_queues.c
@@ -19,11 +19,10 @@
 
																 /* Deque queues, ready for use by schedulers */
															
 
																 #include <starpu.h>
															
 
																-#include <common/config.h>
															
 
																-#include <core/workers.h>
															
 
																+#include <starpu_scheduler.h>
															
 
																 #include <sched_policies/deque_queues.h>
															
 
																-#include <errno.h>
															
 
																-#include <common/utils.h>
															
 
																+
															
 
																+#include <core/workers.h>
															
 
																 struct _starpu_deque_jobq *_starpu_create_deque(void)
															
 
																 {
															
--- a/src/sched_policies/deque_queues.h
+++ b/src/sched_policies/deque_queues.h
@@ -20,7 +20,6 @@
 
																 #define __DEQUE_QUEUES_H__
															
 
																 #include <starpu.h>
															
 
																-#include <common/config.h>
															
 
																 #include <core/jobs.h>
															
 
																 struct _starpu_deque_jobq
															
--- a/src/sched_policies/detect_combined_workers.c
+++ b/src/sched_policies/detect_combined_workers.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																- * Copyright (C) 2011, 2012       Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2012, 2013       Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -15,7 +15,6 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																-#include <common/config.h>
															
 
																 #include <starpu.h>
															
 
																 #include <common/utils.h>
															
 
																 #include <core/workers.h>
															
--- a/src/sched_policies/eager_central_policy.c
+++ b/src/sched_policies/eager_central_policy.c
@@ -21,8 +21,9 @@
 
																  *	JOB QUEUE.
															
 
																  */
															
 
																-#include <core/workers.h>
															
 
																+#include <starpu_scheduler.h>
															
 
																 #include <sched_policies/fifo_queues.h>
															
 
																+#include <common/thread.h>
															
 
																 struct _starpu_eager_center_policy_data
															
 
																 {
															
@@ -63,18 +64,7 @@ static int push_task_eager_policy(struct starpu_task *task)
 
																  {
															
 
																 	unsigned sched_ctx_id = task->sched_ctx;
															
 
																 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																 	int ret_val = -1;
															
 
																-
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-	if(nworkers == 0)
															
 
																-	{
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-		return ret_val;
															
 
																-	}
															
 
																-
															
 
																 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
															
 
																 	ret_val = _starpu_fifo_push_task(data->fifo, task);
															
@@ -82,7 +72,6 @@ static int push_task_eager_policy(struct starpu_task *task)
 
																 	starpu_push_task_end(task);
															
 
																 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
															
 
																-
															
 
																 	/*if there are no tasks block */
															
 
																 	/* wake people waiting for a task */
															
 
																 	unsigned worker = 0;
															
@@ -103,8 +92,6 @@ static int push_task_eager_policy(struct starpu_task *task)
 
																 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																 	}
															
 
																-		
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																 	return ret_val;
															
 
																 }
															
--- a/src/sched_policies/eager_central_priority_policy.c
+++ b/src/sched_policies/eager_central_priority_policy.c
@@ -109,20 +109,8 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
																 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																 	struct _starpu_priority_taskq *taskq = data->taskq;
															
 
																-
															
 
																-	/* if the context has no workers return */
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																 	int ret_val = -1;
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-	if(nworkers == 0)
															
 
																-	{
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-		return ret_val;
															
 
																-	}
															
 
																-
															
 
																 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
															
 
																 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
															
@@ -153,7 +141,6 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
																 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																 	}
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																 	return 0;
															
 
																 }
															
--- a/src/sched_policies/fifo_queues.h
+++ b/src/sched_policies/fifo_queues.h
@@ -20,8 +20,6 @@
 
																 #define __FIFO_QUEUES_H__
															
 
																 #include <starpu.h>
															
 
																-#include <common/config.h>
															
 
																-#include <common/utils.h>
															
 
																 struct _starpu_fifo_taskq
															
 
																 {
															
--- a/src/sched_policies/parallel_eager.c
+++ b/src/sched_policies/parallel_eager.c
@@ -15,12 +15,10 @@
 
																  *
															
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																-
															
 
																-#include <core/workers.h>
															
 
																 #include <sched_policies/fifo_queues.h>
															
 
																-#include <common/barrier.h>
															
 
																 #include <sched_policies/detect_combined_workers.h>
															
 
																-#include <core/parallel_task.h>
															
 
																+#include <starpu_scheduler.h>
															
 
																+#include <core/workers.h>
															
 
																 struct _starpu_peager_data
															
 
																 {
															
@@ -28,12 +26,14 @@ struct _starpu_peager_data
 
																 	struct _starpu_fifo_taskq *local_fifo[STARPU_NMAXWORKERS];
															
 
																 	int master_id[STARPU_NMAXWORKERS];
															
 
																+        starpu_pthread_mutex_t policy_mutex;
															
 
																 };
															
 
																+#define STARPU_NMAXCOMBINED_WORKERS 10
															
 
																 /* XXX instead of 10, we should use some "MAX combination .."*/
															
 
																 static int possible_combinations_cnt[STARPU_NMAXWORKERS];
															
 
																-static int possible_combinations[STARPU_NMAXWORKERS][10];
															
 
																-static int possible_combinations_size[STARPU_NMAXWORKERS][10];
															
 
																+static int possible_combinations[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
															
 
																+static int possible_combinations_size[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
															
 
																 /*!!!!!!! It doesn't work with several contexts because the combined workers are constructed
															
@@ -135,6 +135,7 @@ static void initialize_peager_policy(unsigned sched_ctx_id)
 
																 	data->fifo = _starpu_create_fifo();
															
 
																 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
															
 
																+        _STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
															
 
																 }
															
 
																 static void deinitialize_peager_policy(unsigned sched_ctx_id)
															
@@ -146,6 +147,7 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 
																 	_starpu_destroy_fifo(data->fifo);
															
 
																 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
															
 
																+        _STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
															
 
																 	free(data);
															
 
																 }
															
@@ -153,44 +155,24 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 
																 static int push_task_peager_policy(struct starpu_task *task)
															
 
																 {
															
 
																 	unsigned sched_ctx_id = task->sched_ctx;
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																 	int ret_val = -1;
															
 
																-	/* if the context has no workers return */
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-	
															
 
																-   	if(nworkers == 0)
															
 
																-	{
															
 
																-   		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-		return ret_val;
															
 
																-	}
															
 
																 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
															
 
																-	int worker = 0;
															
 
																-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
															
 
																-	
															
 
																-	struct starpu_sched_ctx_iterator it;
															
 
																-	if(workers->init_iterator)
															
 
																-		workers->init_iterator(workers, &it);
															
 
																-	
															
 
																-	while(workers->has_next(workers, &it))
															
 
																-	{
															
 
																-		worker = workers->get_next(workers, &it);
															
 
																-		int master = data->master_id[worker];
															
 
																-		/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
															
 
																-		if (starpu_worker_get_type(worker) != STARPU_CPU_WORKER  || master == worker)
															
 
																-		{
															
 
																-			starpu_pthread_mutex_t *sched_mutex;
															
 
																-			starpu_pthread_cond_t *sched_cond;
															
 
																-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
															
 
																-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																-		}
															
 
																-	}
															
 
																-	
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
															
 
																 	ret_val = _starpu_fifo_push_task(data->fifo, task);
															
 
																 	starpu_push_task_end(task);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
															
 
																+
															
 
																+        /*if there are no tasks block */
															
 
																+        /* wake people waiting for a task */
															
 
																+        int worker = -1;
															
 
																+        struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
															
 
																+
															
 
																+        struct starpu_sched_ctx_iterator it;
															
 
																+        if(workers->init_iterator)
															
 
																+                workers->init_iterator(workers, &it);
															
 
																+
															
 
																 	while(workers->has_next(workers, &it))
															
 
																 	{
															
@@ -202,12 +184,11 @@ static int push_task_peager_policy(struct starpu_task *task)
 
																 			starpu_pthread_mutex_t *sched_mutex;
															
 
																 			starpu_pthread_cond_t *sched_cond;
															
 
																 			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
															
 
																+			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
															
 
																 			_STARPU_PTHREAD_COND_SIGNAL(sched_cond);
															
 
																 			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																 		}
															
 
																 	}
															
 
																-	
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																 	return ret_val;
															
 
																 }
															
@@ -220,14 +201,24 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
																 	/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
															
 
																 	if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER)
															
 
																-		return _starpu_fifo_pop_task(data->fifo, workerid);
															
 
																+	{
															
 
																+		struct starpu_task *task = NULL;
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
															
 
																+		task = _starpu_fifo_pop_task(data->fifo, workerid);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
															
 
																+
															
 
																+		return task;
															
 
																+	}
															
 
																 	int master = data->master_id[workerid];
															
 
																 	if (master == workerid)
															
 
																 	{
															
 
																 		/* The worker is a master */
															
 
																-		struct starpu_task *task = _starpu_fifo_pop_task(data->fifo, workerid);
															
 
																+		struct starpu_task *task = NULL;
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
															
 
																+		task = _starpu_fifo_pop_task(data->fifo, workerid);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
															
 
																 		if (!task)
															
 
																 			return NULL;
															
@@ -266,29 +257,17 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																-			/* The master needs to dispatch the task between the
															
 
																-			 * different combined workers */
															
 
																-			struct _starpu_combined_worker *combined_worker;
															
 
																-			combined_worker = _starpu_get_combined_worker_struct(best_workerid);
															
 
																-			int worker_size = combined_worker->worker_size;
															
 
																-			int *combined_workerid = combined_worker->combined_workerid;
															
 
																-
															
 
																-			struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																-			j->task_size = worker_size;
															
 
																-			j->combined_workerid = best_workerid;
															
 
																-			j->active_task_alias_count = 0;
															
 
																-
															
 
																-			//fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size);
															
 
																-
															
 
																-			_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
															
 
																-			_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
															
 
																+			starpu_parallel_task_barrier_init(task, best_workerid);
															
 
																+			int worker_size = 0;
															
 
																+			int *combined_workerid;
															
 
																+			starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid);
															
 
																 			/* Dispatch task aliases to the different slaves */
															
 
																 			for (i = 1; i < worker_size; i++)
															
 
																 			{
															
 
																-				struct starpu_task *alias = _starpu_create_task_alias(task);
															
 
																+				struct starpu_task *alias = starpu_task_dup(task);
															
 
																 				int local_worker = combined_workerid[i];
															
 
																-				
															
 
																+
															
 
																 				starpu_pthread_mutex_t *sched_mutex;
															
 
																 				starpu_pthread_cond_t *sched_cond;
															
 
																 				starpu_worker_get_sched_condition(local_worker, &sched_mutex, &sched_cond);
															
@@ -303,7 +282,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
																 			}
															
 
																 			/* The master also manipulated an alias */
															
 
																-			struct starpu_task *master_alias = _starpu_create_task_alias(task);
															
 
																+			struct starpu_task *master_alias = starpu_task_dup(task);
															
 
																 			return master_alias;
															
 
																 		}
															
 
																 	}
															
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -23,9 +23,7 @@
 
																 #include <core/workers.h>
															
 
																 #include <core/perfmodel/perfmodel.h>
															
 
																 #include <starpu_parameters.h>
															
 
																-#include <common/barrier.h>
															
 
																 #include <sched_policies/detect_combined_workers.h>
															
 
																-#include <core/parallel_task.h>
															
 
																 #ifndef DBL_MIN
															
 
																 #define DBL_MIN __DBL_MIN__
															
@@ -39,6 +37,14 @@
 
																 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
															
 
																 //static unsigned napplicable_perf_archtypes = 0;
															
 
																+/*
															
 
																+ * Here are the default values of alpha, beta, gamma
															
 
																+ */
															
 
																+
															
 
																+#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
															
 
																+#define _STARPU_SCHED_BETA_DEFAULT 1.0
															
 
																+#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
															
 
																+
															
 
																 struct _starpu_pheft_data
															
 
																 {
															
 
																 	double alpha;
															
@@ -128,33 +134,25 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-		/* This is a combined worker so we create task aliases */
															
 
																-		struct _starpu_combined_worker *combined_worker;
															
 
																-		combined_worker = _starpu_get_combined_worker_struct(best_workerid);
															
 
																-		int worker_size = combined_worker->worker_size;
															
 
																-		int *combined_workerid = combined_worker->combined_workerid;
															
 
																-
															
 
																-		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																-		j->task_size = worker_size;
															
 
																-		j->combined_workerid = best_workerid;
															
 
																-		j->active_task_alias_count = 0;
															
 
																-
															
 
																 		/* This task doesn't belong to an actual worker, it belongs
															
 
																 		 * to a combined worker and thus the scheduler doesn't care
															
 
																 		 * of its predicted values which are insignificant */
															
 
																 		task->predicted = 0;
															
 
																 		task->predicted_transfer = 0;
															
 
																-		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
															
 
																-		_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
															
 
																+		starpu_parallel_task_barrier_init(task, best_workerid);
															
 
																+		int worker_size = 0;
															
 
																+		int *combined_workerid;
															
 
																+		starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid);
															
 
																 		/* All cpu workers must be locked at once */
															
 
																 		_STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex);
															
 
																+		/* This is a combined worker so we create task aliases */
															
 
																 		int i;
															
 
																 		for (i = 0; i < worker_size; i++)
															
 
																 		{
															
 
																-			struct starpu_task *alias = _starpu_create_task_alias(task);
															
 
																+			struct starpu_task *alias = starpu_task_dup(task);
															
 
																 			int local_worker = combined_workerid[i];
															
 
																 			alias->predicted = exp_end_predicted - worker_exp_end[local_worker];
															
@@ -489,36 +487,15 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
																 static int parallel_heft_push_task(struct starpu_task *task)
															
 
																 {
															
 
																 	unsigned sched_ctx_id = task->sched_ctx;
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																 	int ret_val = -1;
															
 
																 	if (task->priority == STARPU_MAX_PRIO)
															
 
																 	{
															
 
																-		_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-                nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-                if(nworkers == 0)
															
 
																-                {
															
 
																-                        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-                        return ret_val;
															
 
																-                }
															
 
																-
															
 
																 		ret_val = _parallel_heft_push_task(task, 1, sched_ctx_id);
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-                return ret_val;
															
 
																-        }
															
 
																-
															
 
																-
															
 
																-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-        if(nworkers == 0)
															
 
																-	{
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																                 return ret_val;
															
 
																         }
															
 
																         ret_val = _parallel_heft_push_task(task, 0, sched_ctx_id);
															
 
																-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																 	return ret_val;
															
 
																 }
															
@@ -575,9 +552,9 @@ static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 
																 {
															
 
																 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
															
 
																 	struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)malloc(sizeof(struct _starpu_pheft_data));
															
 
																-	hd->alpha = _STARPU_DEFAULT_ALPHA;
															
 
																-	hd->beta = _STARPU_DEFAULT_BETA;
															
 
																-	hd->_gamma = _STARPU_DEFAULT_GAMMA;
															
 
																+	hd->alpha = _STARPU_SCHED_ALPHA_DEFAULT;
															
 
																+	hd->beta = _STARPU_SCHED_BETA_DEFAULT;
															
 
																+	hd->_gamma = _STARPU_SCHED_GAMMA_DEFAULT;
															
 
																 	hd->idle_power = 0.0;
															
 
																 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hd);
															
--- a/src/sched_policies/random_policy.c
+++ b/src/sched_policies/random_policy.c
@@ -83,22 +83,7 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 
																 static int random_push_task(struct starpu_task *task)
															
 
																 {
															
 
																-	unsigned sched_ctx_id = task->sched_ctx;
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-	unsigned nworkers;
															
 
																-        int ret_val = -1;
															
 
																-
															
 
																-        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-        if(nworkers == 0)
															
 
																-        {
															
 
																-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-                return ret_val;
															
 
																-        }
															
 
																-
															
 
																-        ret_val = _random_push_task(task, !!task->priority);
															
 
																-        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-        return ret_val;
															
 
																+        return _random_push_task(task, !!task->priority);
															
 
																 }
															
 
																 static void initialize_random_policy(unsigned sched_ctx_id)
															
--- a/src/sched_policies/stack_queues.h
+++ b/src/sched_policies/stack_queues.h
@@ -20,7 +20,6 @@
 
																 #define __STACK_QUEUES_H__
															
 
																 #include <starpu.h>
															
 
																-#include <common/config.h>
															
 
																 #include <core/jobs.h>
															
 
																 struct _starpu_stack_jobq
															
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -336,19 +336,6 @@ int ws_push_task(struct starpu_task *task)
 
																 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
															
 
																 	int workerid = starpu_worker_get_id();
															
 
																-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
															
 
																-        unsigned nworkers;
															
 
																-        int ret_val = -1;
															
 
																-
															
 
																-	/* if the context has no workers return */
															
 
																-        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
															
 
																-        nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
															
 
																-        if(nworkers == 0)
															
 
																-        {
															
 
																-                _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-                return ret_val;
															
 
																-        }
															
 
																-
															
 
																 	unsigned worker = 0;
															
 
																 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
															
 
																 	struct starpu_sched_ctx_iterator it;
															
@@ -394,8 +381,6 @@ int ws_push_task(struct starpu_task *task)
 
																 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
															
 
																 	}
															
 
																-        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
															
 
																-
															
 
																 	return 0;
															
 
																 }
															
--- a/src/starpu_parameters.h
+++ b/src/starpu_parameters.h
@@ -20,17 +20,6 @@
 
																 /* Parameters which are not worth being added to ./configure options, but
															
 
																  * still interesting to easily change */
															
 
																-/* The dmda scheduling policy uses
															
 
																- *
															
 
																- * alpha * T_computation + beta * T_communication + gamma * Consumption
															
 
																- *
															
 
																- * Here are the default values of alpha, beta, gamma
															
 
																- */
															
 
																-
															
 
																-#define _STARPU_DEFAULT_ALPHA 1.0
															
 
																-#define _STARPU_DEFAULT_BETA 1.0
															
 
																-#define _STARPU_DEFAULT_GAMMA 1000.0
															
 
																-
															
 
																 /* How many executions a codelet will have to be measured before we
															
 
																  * consider that calibration will provide a value good enough for scheduling */
															
 
																 #define _STARPU_CALIBRATION_MINIMUM 10
															
--- a/src/top/starpu_top_core.h
+++ b/src/top/starpu_top_core.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
															
 
																- * Copyright (C) 2011, 2012 Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2012, 2013 Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -48,7 +48,7 @@ void __starpu_top_task_prevision_timespec(struct starpu_task *task,
 
																 					int devid,
															
 
																 					const struct timespec* start,
															
 
																 					const struct timespec* end);
															
 
																-void _starpu_top_task_prevision(struct starpu_task *task,
															
 
																+void starpu_top_task_prevision(struct starpu_task *task,
															
 
																 			       int devid, unsigned long long start,
															
 
																 			       unsigned long long end);
															
--- a/src/top/starpu_top_task.c
+++ b/src/top/starpu_top_task.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
															
 
																- * Copyright (C) 2011 Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2013 Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -66,17 +66,20 @@ void __starpu_top_task_prevision_timespec(struct starpu_task *task,
 
																 					const struct timespec* start,
															
 
																 					const struct timespec* end)
															
 
																 {
															
 
																-	_starpu_top_task_prevision(task,
															
 
																+	starpu_top_task_prevision(task,
															
 
																 				  devid,
															
 
																 				  _starpu_top_timing_timespec_to_ms(start),
															
 
																 				  _starpu_top_timing_timespec_to_ms(end));
															
 
																 }
															
 
																-void _starpu_top_task_prevision(struct starpu_task *task,
															
 
																+void starpu_top_task_prevision(struct starpu_task *task,
															
 
																 			       int devid,
															
 
																 			       unsigned long long start,
															
 
																 			       unsigned long long end)
															
 
																 {
															
 
																+	if (!_starpu_top_status_get())
															
 
																+		return;
															
 
																+
															
 
																 	unsigned long long taskid = _starpu_get_job_associated_to_task(task)->job_id;
															
 
																 	STARPU_ASSERT(_starpu_top_status_get());
															
 
																 	struct timespec now;
															
--- a/src/util/starpu_data_cpy.c
+++ b/src/util/starpu_data_cpy.c
@@ -103,8 +103,8 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
																 	task->callback_func = callback_func;
															
 
																 	task->callback_arg = callback_arg;
															
 
																-	task->handles[0] = dst_handle;
															
 
																-	task->handles[1] = src_handle;
															
 
																+	STARPU_TASK_SET_HANDLE(task, dst_handle, 0);
															
 
																+	STARPU_TASK_SET_HANDLE(task, src_handle, 1);
															
 
																 	task->synchronous = !asynchronous;
															
--- a/src/util/starpu_insert_task.c
+++ b/src/util/starpu_insert_task.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -23,7 +23,7 @@
 
																 #include <stdarg.h>
															
 
																 #include <util/starpu_insert_task_utils.h>
															
 
																-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...)
															
 
																+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
															
 
																 {
															
 
																 	va_list varg_list;
															
@@ -32,7 +32,7 @@ void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...)
 
																 	*arg_buffer_size = _starpu_insert_task_get_arg_size(varg_list);
															
 
																 	va_start(varg_list, arg_buffer_size);
															
 
																-	_starpu_codelet_pack_args(*arg_buffer_size, arg_buffer, varg_list);
															
 
																+	_starpu_codelet_pack_args(arg_buffer, *arg_buffer_size, varg_list);
															
 
																 }
															
 
																 void starpu_codelet_unpack_args(void *_cl_arg, ...)
															
@@ -66,7 +66,7 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 
																 int starpu_insert_task(struct starpu_codelet *cl, ...)
															
 
																 {
															
 
																 	va_list varg_list;
															
 
																-	char *arg_buffer = NULL;
															
 
																+	void *arg_buffer = NULL;
															
 
																 	/* Compute the size */
															
 
																 	size_t arg_buffer_size = 0;
															
@@ -76,11 +76,17 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 
																 	if (arg_buffer_size)
															
 
																 	{
															
 
																 		va_start(varg_list, cl);
															
 
																-		_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
															
 
																+		_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list);
															
 
																 	}
															
 
																-	va_start(varg_list, cl);
															
 
																 	struct starpu_task *task = starpu_task_create();
															
 
																+
															
 
																+	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
															
 
																+	{
															
 
																+		task->dyn_handles = malloc(cl->nbuffers * sizeof(starpu_data_handle_t));
															
 
																+	}
															
 
																+
															
 
																+	va_start(varg_list, cl);
															
 
																 	int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, cl, &task, varg_list);
															
 
																 	if (ret == -ENODEV)
															
--- a/src/util/starpu_insert_task_utils.c
+++ b/src/util/starpu_insert_task_utils.c
@@ -18,6 +18,7 @@
 
																 #include <util/starpu_insert_task_utils.h>
															
 
																 #include <common/config.h>
															
 
																 #include <common/utils.h>
															
 
																+#include <core/task.h>
															
 
																 typedef void (*_starpu_callback_func_t)(void *);
															
@@ -120,15 +121,16 @@ size_t _starpu_insert_task_get_arg_size(va_list varg_list)
 
																 	return arg_buffer_size;
															
 
																 }
															
 
																-int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list varg_list)
															
 
																+int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list)
															
 
																 {
															
 
																 	int arg_type;
															
 
																 	unsigned current_arg_offset = 0;
															
 
																 	unsigned char nargs = 0;
															
 
																+	char *_arg_buffer;
															
 
																 	/* The buffer will contain : nargs, {size, content} (x nargs)*/
															
 
																-	*arg_buffer = (char *) malloc(arg_buffer_size);
															
 
																+	_arg_buffer = malloc(arg_buffer_size);
															
 
																 	/* We will begin the buffer with the number of args (which is stored as a char) */
															
 
																 	current_arg_offset += sizeof(char);
															
@@ -150,10 +152,10 @@ int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list
 
																 			void *ptr = va_arg(varg_list, void *);
															
 
																 			size_t cst_size = va_arg(varg_list, size_t);
															
 
																-			*(size_t *)(&(*arg_buffer)[current_arg_offset]) = cst_size;
															
 
																+			*(size_t *)(&(_arg_buffer)[current_arg_offset]) = cst_size;
															
 
																 			current_arg_offset += sizeof(size_t);
															
 
																-			memcpy(&(*arg_buffer)[current_arg_offset], ptr, cst_size);
															
 
																+			memcpy(&_arg_buffer[current_arg_offset], ptr, cst_size);
															
 
																 			current_arg_offset += cst_size;
															
 
																 			nargs++;
															
@@ -205,19 +207,20 @@ int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list
 
																 	if (nargs)
															
 
																 	{
															
 
																-		(*arg_buffer)[0] = nargs;
															
 
																+		_arg_buffer[0] = nargs;
															
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-		free(*arg_buffer);
															
 
																-		*arg_buffer = NULL;
															
 
																+		free(_arg_buffer);
															
 
																+		_arg_buffer = NULL;
															
 
																 	}
															
 
																+	*arg_buffer = _arg_buffer;
															
 
																 	va_end(varg_list);
															
 
																 	return 0;
															
 
																 }
															
 
																-int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
															
 
																+int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
															
 
																 {
															
 
																 	int arg_type;
															
 
																 	unsigned current_buffer = 0;
															
@@ -239,18 +242,20 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 
																 			STARPU_ASSERT(cl != NULL);
															
 
																-			(*task)->handles[current_buffer] = handle;
															
 
																-			if (cl->modes[current_buffer])
															
 
																+			STARPU_TASK_SET_HANDLE((*task), handle, current_buffer);
															
 
																+			if (STARPU_CODELET_GET_MODE(cl, current_buffer))
															
 
																 			{
															
 
																-				STARPU_ASSERT_MSG(cl->modes[current_buffer] == mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
															
 
																-						  cl->name, cl->modes[current_buffer], current_buffer, mode);
															
 
																+				STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, current_buffer) == mode,
															
 
																+						   "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
															
 
																+						  cl->name, STARPU_CODELET_GET_MODE(cl, current_buffer),
															
 
																+						  current_buffer, mode);
															
 
																 			}
															
 
																 			else
															
 
																 			{
															
 
																 #ifdef STARPU_DEVEL
															
 
																 #  warning shall we print a warning to the user
															
 
																 #endif
															
 
																-				cl->modes[current_buffer] = mode;
															
 
																+				STARPU_CODELET_SET_MODE(cl, mode, current_buffer);
															
 
																 			}
															
 
																 			current_buffer++;
															
@@ -264,7 +269,7 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 
																 			int i;
															
 
																 			for(i=0 ; i<nb_handles ; i++)
															
 
																 			{
															
 
																-				(*task)->handles[current_buffer] = handles[i];
															
 
																+				STARPU_TASK_SET_HANDLE((*task), handles[i], current_buffer);
															
 
																 				current_buffer++;
															
 
																 			}
															
--- a/src/util/starpu_insert_task_utils.h
+++ b/src/util/starpu_insert_task_utils.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -22,9 +22,9 @@
 
																 #include <starpu.h>
															
 
																 size_t _starpu_insert_task_get_arg_size(va_list varg_list);
															
 
																-int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list varg_list);
															
 
																-int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
															
 
																-int _starpu_insert_task_create_and_submit_array(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, starpu_data_handle_t *handles, unsigned nb_handles, va_list varg_list);
															
 
																+int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list);
															
 
																+int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
															
 
																+int _starpu_insert_task_create_and_submit_array(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, starpu_data_handle_t *handles, unsigned nb_handles, va_list varg_list);
															
 
																 #endif // __STARPU_INSERT_TASK_UTILS_H__
															
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -208,6 +208,7 @@ noinst_PROGRAMS =				\
 
																 	parallel_tasks/explicit_combined_worker	\
															
 
																 	parallel_tasks/parallel_kernels		\
															
 
																 	parallel_tasks/parallel_kernels_spmd	\
															
 
																+	parallel_tasks/spmd_peager		\
															
 
																 	perfmodels/regression_based		\
															
 
																 	perfmodels/non_linear_regression_based	\
															
 
																 	perfmodels/feed				\
															
--- a/tests/main/insert_task.c
+++ b/tests/main/insert_task.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -93,14 +93,10 @@ int main(int argc, char **argv)
 
																 	task->cl = &mycodelet;
															
 
																 	task->handles[0] = data_handles[0];
															
 
																 	task->handles[1] = data_handles[1];
															
 
																-	char *arg_buffer;
															
 
																-	size_t arg_buffer_size;
															
 
																-	starpu_codelet_pack_args(&arg_buffer, &arg_buffer_size,
															
 
																+	starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size,
															
 
																 			    STARPU_VALUE, &ifactor, sizeof(ifactor),
															
 
																 			    STARPU_VALUE, &ffactor, sizeof(ffactor),
															
 
																 			    0);
															
 
																-	task->cl_arg = arg_buffer;
															
 
																-	task->cl_arg_size = arg_buffer_size;
															
 
																 	ret = starpu_task_submit(task);
															
 
																 	if (ret == -ENODEV) goto enodev;
															
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -86,7 +86,8 @@ bin_PROGRAMS += 			\
 
																 	starpu_perfmodel_display	\
															
 
																 	starpu_perfmodel_plot 		\
															
 
																 	starpu_calibrate_bus		\
															
 
																-	starpu_machine_display
															
 
																+	starpu_machine_display		\
															
 
																+	starpu_lp2paje
															
 
																 starpu_perfmodel_plot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
															
@@ -104,8 +105,6 @@ STARPU_TOOLS	+=			\
 
																 	starpu_perfmodel_plot
															
 
																 endif
															
 
																-noinst_PROGRAMS =	cbc2paje lp2paje
															
 
																-
															
 
																 dist_bin_SCRIPTS +=			\
															
 
																 	starpu_workers_activity		\
															
 
																 	starpu_codelet_histo_profile	\
															
@@ -129,6 +128,8 @@ starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT)
 
																 	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																 starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
															
 
																 	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																+starpu_lp2paje.1: starpu_lp2paje$(EXEEXT)
															
 
																+	help2man --no-discard-stderr -N --output=$@ ./$<
															
 
																 starpu_workers_activity.1: starpu_workers_activity$(EXEEXT)
															
 
																 	chmod +x $<
															
 
																 	help2man --no-discard-stderr -N --output=$@ ./$<
															
@@ -153,6 +154,7 @@ dist_man1_MANS =\
 
																 	starpu_machine_display.1 \
															
 
																 	starpu_perfmodel_display.1 \
															
 
																 	starpu_perfmodel_plot.1	\
															
 
																+	starpu_lp2paje.1	\
															
 
																 	starpu_workers_activity.1 \
															
 
																 	starpu_codelet_profile.1 \
															
 
																 	starpu_codelet_histo_profile.1
															
--- a/tools/cbc2paje.c
+++ b/tools/cbc2paje.c
@@ -1,156 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-
															
 
																-#include <assert.h>
															
 
																-#include <stdio.h>
															
 
																-#include <stdlib.h>
															
 
																-#include <string.h>
															
 
																-
															
 
																-struct task {
															
 
																-	double start;
															
 
																-	double stop;
															
 
																-	int worker;
															
 
																-};
															
 
																-
															
 
																-int main(int argc, char *argv[]) {
															
 
																-	int nw, nt;
															
 
																-	double tmax;
															
 
																-	int i, w, t, t2;
															
 
																-	int foo;
															
 
																-	double bar;
															
 
																-	unsigned long num;
															
 
																-	int b;
															
 
																-	unsigned long next = 1;
															
 
																-
															
 
																-	if (argc != 3) {
															
 
																-		fprintf(stderr,"usage: %s nb_workers nb_tasks\n", argv[0]);
															
 
																-		exit(1);
															
 
																-	}
															
 
																-	nw = atoi(argv[1]);
															
 
																-	nt = atoi(argv[2]);
															
 
																-	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
															
 
																-	assert(scanf("Optimal - objective value       %lf", &tmax) == 1);
															
 
																-	printf(
															
 
																-"%%EventDef PajeDefineContainerType 1\n"
															
 
																-"%%  Alias         string\n"
															
 
																-"%%  ContainerType string\n"
															
 
																-"%%  Name          string\n"
															
 
																-"%%EndEventDef\n"
															
 
																-"%%EventDef PajeCreateContainer     2\n"
															
 
																-"%%  Time          date\n"
															
 
																-"%%  Alias         string\n"
															
 
																-"%%  Type          string\n"
															
 
																-"%%  Container     string\n"
															
 
																-"%%  Name          string\n"
															
 
																-"%%EndEventDef\n"
															
 
																-"%%EventDef PajeDefineStateType     3\n"
															
 
																-"%%  Alias         string\n"
															
 
																-"%%  ContainerType string\n"
															
 
																-"%%  Name          string\n"
															
 
																-"%%EndEventDef\n"
															
 
																-"%%EventDef PajeDestroyContainer    4\n"
															
 
																-"%%  Time          date\n"
															
 
																-"%%  Name          string\n"
															
 
																-"%%  Type          string\n"
															
 
																-"%%EndEventDef\n"
															
 
																-"%%EventDef PajeDefineEntityValue 5\n"
															
 
																-"%%  Alias         string\n"
															
 
																-"%%  EntityType    string\n"
															
 
																-"%%  Name          string\n"
															
 
																-"%%  Color         color\n"
															
 
																-"%%EndEventDef\n"
															
 
																-"%%EventDef PajeSetState 6\n"
															
 
																-"%%  Time          date\n"
															
 
																-"%%  Type          string\n"
															
 
																-"%%  Container     string\n"
															
 
																-"%%  Value         string\n"
															
 
																-"%%EndEventDef\n"
															
 
																-"1 W 0 Worker\n"
															
 
																-);
															
 
																-	printf("3 S W \"Worker State\"\n");
															
 
																-	printf("5 S S Running \"0.0 1.0 0.0\"\n");
															
 
																-	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
															
 
																-	for (i = 0; i < nw; i++)
															
 
																-		printf("2 0 W%d W 0 \"%d\"\n", i, i);
															
 
																-
															
 
																-	for (w = 0; w < nw; w++)
															
 
																-		printf("4 %f W%d W\n", tmax, w);
															
 
																-
															
 
																-	assert(scanf("%d C%d %lf %lf", &foo, &foo, &tmax, &bar) == 4);
															
 
																-	next++;
															
 
																-	{
															
 
																-		struct task task[nt];
															
 
																-		memset(&task, 0, sizeof(task));
															
 
																-		for (t = 0; t < nt; t++) {
															
 
																-			assert(scanf("%d C%d %lf %lf", &foo, &foo, &task[t].stop, &bar) == 4);
															
 
																-			next++;
															
 
																-		}
															
 
																-
															
 
																-		while (1) {
															
 
																-			assert(scanf("%d C%lu", &foo, &num) == 2);
															
 
																-			if (num >= next +
															
 
																-
															
 
																-				/* FIXME */
															
 
																-				//nw*nt
															
 
																-				8*20 + 5*16
															
 
																-
															
 
																-				) {
															
 
																-				next+= 8*20+5*16;
															
 
																-				break;
															
 
																-			}
															
 
																-			/* FIXME */
															
 
																-			if (num-next < 8*20) {
															
 
																-				t = (num - next) / nw;
															
 
																-				w = (num - next) % nw;
															
 
																-			} else {
															
 
																-				unsigned long nnum = (num-next)-8*20;
															
 
																-				t = (nnum / 5) + 20;
															
 
																-				w = (nnum % 5)+3;
															
 
																-			}
															
 
																-
															
 
																-			assert(scanf("%d %lf", &b, &bar) == 2);
															
 
																-			if (b) {
															
 
																-				task[t].worker = w;
															
 
																-				fprintf(stderr,"%lu: task %d on %d: %f\n", num, t, w, task[t].stop);
															
 
																-			}
															
 
																-		}
															
 
																-		while(1) {
															
 
																-			t = num - next;
															
 
																-			if (t > nt)
															
 
																-				break;
															
 
																-			assert(scanf("%lf %lf", &task[t].start, &bar) == 2);
															
 
																-			assert(scanf("%d C%lu", &foo, &num) == 2);
															
 
																-		}
															
 
																-
															
 
																-		for (t = 0; t < nt; t++) {
															
 
																-			printf("6 %f S W%d S\n", task[t].start, task[t].worker);
															
 
																-			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
															
 
																-		}
															
 
																-
															
 
																-		for (t = 0; t < nt; t++) {
															
 
																-			for (t2 = 0; t2 < nt; t2++) {
															
 
																-				if (t != t2 && task[t].worker == task[t2].worker) {
															
 
																-					if (!(task[t].start >= task[t2].stop
															
 
																-					    || task[t2].start >= task[t].stop)) {
															
 
																-						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", t, t2, task[t].worker);
															
 
																-					}
															
 
																-				}
															
 
																-			}
															
 
																-		}
															
 
																-	}
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
--- a/tools/lp2paje.c
+++ b/tools/lp2paje.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -14,34 +14,48 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <assert.h>
															
 
																 #include <stdio.h>
															
 
																 #include <stdlib.h>
															
 
																 #include <string.h>
															
 
																+#define PROGNAME "starpu_lp2paje"
															
 
																+
															
 
																 struct task {
															
 
																 	double start;
															
 
																 	double stop;
															
 
																+	int num;
															
 
																 	int worker;
															
 
																 };
															
 
																 int main(int argc, char *argv[]) {
															
 
																 	int nw, nt;
															
 
																 	double tmax;
															
 
																-	int i, w, t, t2;
															
 
																+	int i, w, ww, t, tt, t2;
															
 
																 	int foo;
															
 
																 	double bar;
															
 
																-	unsigned long num;
															
 
																-	unsigned long next = 1;
															
 
																-	if (argc != 3) {
															
 
																-		fprintf(stderr,"usage: %s nb_workers nb_tasks\n", argv[0]);
															
 
																-		exit(1);
															
 
																+	if (argc != 1) {
															
 
																+		if (strcmp(argv[1], "-v") == 0
															
 
																+		 || strcmp(argv[1], "--version") == 0)
															
 
																+		{
															
 
																+			fprintf(stderr, PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n");
															
 
																+			exit(EXIT_SUCCESS);
															
 
																+		}
															
 
																+		fprintf(stderr, "Convert schedule optimized by lp into the Paje format\n\n");
															
 
																+		fprintf(stderr, "Usage: lp_solve file.lp | %s > paje.trace\n", PROGNAME);
															
 
																+		fprintf(stderr, "Reports bugs to <"PACKAGE_BUGREPORT">.");
															
 
																+		fprintf(stderr, "\n");
															
 
																+		exit(EXIT_SUCCESS);
															
 
																 	}
															
 
																-	nw = atoi(argv[1]);
															
 
																-	nt = atoi(argv[2]);
															
 
																-	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
															
 
																+	scanf("Suboptimal solution\n");
															
 
																 	assert(scanf("\nValue of objective function: %lf\n", &tmax) == 1);
															
 
																+
															
 
																+	assert(scanf("Actual values of the variables:\n") == 0);
															
 
																+	assert(scanf("tmax %lf\n", &tmax) == 1);
															
 
																+	assert(scanf("nt %d\n", &nt) == 1);
															
 
																+	assert(scanf("nw %d\n", &nw) == 1);
															
 
																 	printf(
															
 
																 "%%EventDef PajeDefineContainerType 1\n"
															
 
																 "%%  Alias         string\n"
															
@@ -80,7 +94,8 @@ int main(int argc, char *argv[]) {
 
																 "1 W 0 Worker\n"
															
 
																 );
															
 
																 	printf("3 S W \"Worker State\"\n");
															
 
																-	printf("5 S S Running \"0.0 1.0 0.0\"\n");
															
 
																+	for (t = 0; t < nt; t++)
															
 
																+		printf("5 R%d S Running_%d \"0.0 1.0 0.0\"\n", t, t);
															
 
																 	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
															
 
																 	for (i = 0; i < nw; i++)
															
 
																 		printf("2 0 W%d W 0 \"%d\"\n", i, i);
															
@@ -88,52 +103,32 @@ int main(int argc, char *argv[]) {
 
																 	for (w = 0; w < nw; w++)
															
 
																 		printf("4 %f W%d W\n", tmax, w);
															
 
																-	assert(scanf("Actual values of the variables:\n") == 0);
															
 
																-	assert(scanf("tmax %lf\n", &tmax) == 1);
															
 
																-	next++;
															
 
																+	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
															
 
																 	{
															
 
																 		struct task task[nt];
															
 
																 		memset(&task, 0, sizeof(task));
															
 
																-		for (t = 0; t < nt; t++) {
															
 
																+		for (t = nt-1; t >= 0; t--) {
															
 
																 			assert(scanf("c%d %lf\n", &foo, &task[t].stop) == 2);
															
 
																-			next++;
															
 
																 		}
															
 
																-		num = next;
															
 
																-		while (1) {
															
 
																-			if (num >= next +
															
 
																-
															
 
																-				/* FIXME */
															
 
																-				//nw*nt
															
 
																-				8*84 + 5*49
															
 
																-
															
 
																-				) {
															
 
																-				next+= 8*84+5*49;
															
 
																-				break;
															
 
																-			}
															
 
																-			assert(scanf("t%dw%d %lf\n", &foo, &foo, &bar) == 3);
															
 
																-			/* FIXME */
															
 
																-			if (num-next < 8*84) {
															
 
																-				t = (num - next) / nw;
															
 
																-				w = (num - next) % nw;
															
 
																-			} else {
															
 
																-				unsigned long nnum = (num-next)-8*84;
															
 
																-				t = (nnum / 5) + 84;
															
 
																-				w = (nnum % 5)+3;
															
 
																-			}
															
 
																+		for (t = nt-1; t >= 0; t--)
															
 
																+			for (w = 0; w < nw; w++) {
															
 
																+				assert(scanf("t%dw%d %lf\n", &tt, &ww, &bar) == 3);
															
 
																+				assert(ww == w);
															
 
																-			if (bar > 0.5) {
															
 
																-				task[t].worker = w;
															
 
																-				fprintf(stderr,"%lu: task %d on %d: %f\n", num, t, w, task[t].stop);
															
 
																-			}
															
 
																-			num++;
															
 
																+				if (bar > 0.5) {
															
 
																+					task[t].num = tt;
															
 
																+					task[t].worker = w;
															
 
																+				}
															
 
																 		}
															
 
																-		for (t = 0; t < nt; t++) {
															
 
																-			assert(scanf("s%d %lf\n", &foo, &task[t].start) == 2);
															
 
																+		for (t = nt-1; t >= 0; t--) {
															
 
																+			assert(scanf("s%d %lf\n", &tt, &task[t].start) == 2);
															
 
																+			fprintf(stderr,"%d: task %d on %d: %f - %f\n", nt-1-t, tt, task[t].worker, task[t].start, task[t].stop);
															
 
																+			assert(tt == task[t].num);
															
 
																 		}
															
 
																 		for (t = 0; t < nt; t++) {
															
 
																-			printf("6 %f S W%d S\n", task[t].start, task[t].worker);
															
 
																+			printf("6 %f S W%d R%d\n", task[t].start, task[t].worker, t);
															
 
																 			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
															
 
																 		}
															
@@ -142,7 +137,7 @@ int main(int argc, char *argv[]) {
 
																 				if (t != t2 && task[t].worker == task[t2].worker) {
															
 
																 					if (!(task[t].start >= task[t2].stop
															
 
																 					    || task[t2].start >= task[t].stop)) {
															
 
																-						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", t, t2, task[t].worker);
															
 
																+						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", task[t].num, task[t2].num, task[t].worker);
															
 
																 					}
															
 
																 				}
															
 
																 			}