12 years ago · f722a8b4c2
--- a/ChangeLog
+++ b/ChangeLog
@@ -119,6 +119,8 @@ New features:
 
				     pthread API. It is provided with 2 implementations: a pthread one
			
 
				     and a Simgrid one. Applications using StarPU and wishing to use
			
 
				     the Simgrid StarPU features should use it.
			
 
				+  * Allow to have a dynamically allocated number of buffers per task,
			
 
				+    and so overwrite the value defined --enable-maxbuffers=XXX
			
 
				 
			
 
				 Small features:
			
 
				   * Add starpu_worker_get_by_type and starpu_worker_get_by_devid
			
@@ -134,6 +136,9 @@ Small features:
 
				   * New configure option --enable-mpi-progression-hook to enable the
			
 
				     activity polling method for StarPU-MPI.
			
 
				   * Permit to disable sequential consistency for a given task.
			
 
				+  * New macro STARPU_RELEASE_VERSION
			
 
				+  * New function starpu_get_version() to return as 3 integers the
			
 
				+    release version of StarPU.
			
 
				 
			
 
				 Changes:
			
 
				   * Fix the block filter functions.
			
--- a/configure.ac
+++ b/configure.ac
@@ -25,11 +25,14 @@ dnl Versioning.
 
				 
			
 
				 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
			
 
				 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
			
 
				+STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`"
			
 
				 AC_SUBST([STARPU_MAJOR_VERSION])
			
 
				 AC_SUBST([STARPU_MINOR_VERSION])
			
 
				+AC_SUBST([STARPU_RELEASE_VERSION])
			
 
				 AC_SUBST([STARPU_EFFECTIVE_VERSION])
			
 
				 AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION], [Major version number of StarPU.])
			
 
				 AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION], [Minor version number of StarPU.])
			
 
				+AC_DEFINE_UNQUOTED([STARPU_RELEASE_VERSION], [$STARPU_RELEASE_VERSION], [Release version number of StarPU.])
			
 
				 
			
 
				 . "$srcdir/STARPU-VERSION"
			
 
				 AC_SUBST([LIBSTARPU_INTERFACE_CURRENT])
			
@@ -264,7 +267,7 @@ AC_MSG_RESULT($max_sched_ctxs)
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAX_SCHED_CTXS, [$max_sched_ctxs], [Maximum number of sched_ctxs supported])
			
 
				 
			
 
				 AC_ARG_ENABLE([sc_hypervisor],
			
 
				-  [AS_HELP_STRING([--enable-sct-hypervisor],
			
 
				+  [AS_HELP_STRING([--enable-sc-hypervisor],
			
 
				     [enable resizing contexts (experimental)])],
			
 
				   [enable_sc_hypervisor="yes"],
			
 
				   [enable_sc_hypervisor="no"])
			
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -23,6 +23,7 @@
 
				 * Defining a New Scheduling Policy::
			
 
				 * On-GPU rendering::
			
 
				 * Defining a New Data Interface::
			
 
				+* Setting the Data Handles for a Task::
			
 
				 * More examples::               More examples shipped with StarPU
			
 
				 @end menu
			
 
				 
			
@@ -473,14 +474,15 @@ probably use @code{lp_solve -timeout 1 test.pl -wmps test.mps} to convert the
 
				 problem to MPS format and then use a better solver, @code{glpsol} might be
			
 
				 better than @code{lp_solve} for instance (the @code{--pcost} option may be
			
 
				 useful), but sometimes doesn't manage to converge. @code{cbc} might look
			
 
				-slower, but it is parallel. Be sure to try at least all the @code{-B} options
			
 
				-of @code{lp_solve}. For instance, we often just use
			
 
				-@code{lp_solve -cc -B1 -Bb -Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi} , and
			
 
				-the @code{-gr} option can also be quite useful.
			
 
				+slower, but it is parallel. For @code{lp_solve}, be sure to try at least all the
			
 
				+@code{-B} options. For instance, we often just use @code{lp_solve -cc -B1 -Bb
			
 
				+-Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi} , and the @code{-gr} option can
			
 
				+also be quite useful. The resulting schedule can be observed by using the
			
 
				+@code{starpu_lp2paje} tool, which converts it into the Paje format.
			
 
				 
			
 
				 Data transfer time can only be taken into account when @code{deps} is set. Only
			
 
				 data transfers inferred from implicit data dependencies between tasks are taken
			
 
				-into account.
			
 
				+into account. Other data transfers are assumed to be completely overlapped.
			
 
				 
			
 
				 Setting @code{deps} to 0 will only take into account the actual computations
			
 
				 on processing units. It however still properly takes into account the varying
			
@@ -492,9 +494,6 @@ the priorities as the StarPU scheduler would, i.e. schedule prioritized
 
				 tasks before less prioritized tasks, to check to which extend this results
			
 
				 to a less optimal solution. This increases even more computation time.
			
 
				 
			
 
				-Note that for simplicity, all this however doesn't take into account data
			
 
				-transfers, which are assumed to be completely overlapped.
			
 
				-
			
 
				 @node Insert Task Utility
			
 
				 @section Insert Task Utility
			
 
				 
			
@@ -1264,6 +1263,62 @@ void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args
 
				 
			
 
				 The whole code for this complex data interface is available in the
			
 
				 directory @code{examples/interface/}.
			
 
				+
			
 
				+@node Setting the Data Handles for a Task
			
 
				+@section Setting the Data Handles for a Task
			
 
				+
			
 
				+The number of data a task can manage is fixed by the
			
 
				+@code{STARPU_NMAXBUFS} which has a default value which can be changed
			
 
				+through the configure option @code{--enable-maxbuffers} (see
			
 
				+@ref{--enable-maxbuffers}).
			
 
				+
			
 
				+However, it is possible to define tasks managing more data by using
			
 
				+the field @code{dyn_handles} when defining a task and the field
			
 
				+@code{dyn_modes} when defining the corresponding codelet.
			
 
				+
			
 
				+@cartouche
			
 
				+@smallexample
			
 
				+enum starpu_access_mode modes[STARPU_NMAXBUFS+1] = @{
			
 
				+	STARPU_R, STARPU_R, ...
			
 
				+@};
			
 
				+
			
 
				+struct starpu_codelet dummy_big_cl =
			
 
				+@{
			
 
				+	.cuda_funcs = @{dummy_big_kernel, NULL@},
			
 
				+	.opencl_funcs = @{dummy_big_kernel, NULL@},
			
 
				+	.cpu_funcs = @{dummy_big_kernel, NULL@},
			
 
				+	.nbuffers = STARPU_NMAXBUFS+1,
			
 
				+	.dyn_modes = modes
			
 
				+@};
			
 
				+
			
 
				+task = starpu_task_create();
			
 
				+task->cl = &dummy_big_cl;
			
 
				+task->dyn_handles = malloc(task->cl->nbuffers * sizeof(starpu_data_handle_t));
			
 
				+for(i=0 ; i<task->cl->nbuffers ; i++)
			
 
				+@{
			
 
				+	task->dyn_handles[i] = handle;
			
 
				+@}
			
 
				+starpu_task_submit(task);
			
 
				+@end smallexample
			
 
				+@end cartouche
			
 
				+
			
 
				+@cartouche
			
 
				+@smallexample
			
 
				+starpu_data_handle_t *handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
			
 
				+for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
			
 
				+@{
			
 
				+	handles[i] = handle;
			
 
				+@}
			
 
				+starpu_insert_task(&dummy_big_cl,
			
 
				+        	 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
			
 
				+		 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
			
 
				+		 0);
			
 
				+@end smallexample
			
 
				+@end cartouche
			
 
				+
			
 
				+The whole code for this complex data interface is available in the
			
 
				+directory @code{examples/basic_examples/dynamic_handles.c}.
			
 
				+
			
 
				 @node More examples
			
 
				 @section More examples
			
 
				 
			
--- a/doc/chapters/api.texi
+++ b/doc/chapters/api.texi
@@ -47,6 +47,14 @@ Define the major version of StarPU
 
				 Define the minor version of StarPU
			
 
				 @end defmac
			
 
				 
			
 
				+@defmac STARPU_RELEASE_VERSION
			
 
				+Define the release version of StarPU
			
 
				+@end defmac
			
 
				+
			
 
				+@deftypefun void starpu_get_version (int *@var{major}, int *@var{minor}, int *@var{release})
			
 
				+Return as 3 integers the release version of StarPU.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @node Initialization and Termination
			
 
				 @section Initialization and Termination
			
 
				 
			
@@ -1898,6 +1906,17 @@ exceed @code{STARPU_NMAXBUFS}.
 
				 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
			
 
				 option when configuring StarPU.
			
 
				 
			
 
				+@item @code{enum starpu_access_mode *dyn_modes}
			
 
				+Is an array of @code{enum starpu_access_mode}. It describes the
			
 
				+required access modes to the data neeeded by the codelet (e.g.
			
 
				+@code{STARPU_RW}). The number of entries in this array must be
			
 
				+specified in the @code{nbuffers} field (defined above).
			
 
				+This field should be used for codelets having a number of datas
			
 
				+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
			
 
				+for a Task}).
			
 
				+When defining a codelet, one should either define this field or the
			
 
				+field @code{modes} defined above. 
			
 
				+
			
 
				 @item @code{struct starpu_perfmodel *model} (optional)
			
 
				 This is a pointer to the task duration performance model associated to this
			
 
				 codelet. This optional field is ignored when set to @code{NULL} or
			
@@ -1913,8 +1932,8 @@ involved in the parallel execution.
 
				 @item @code{unsigned long per_worker_stats[STARPU_NMAXWORKERS]} (optional)
			
 
				 Statistics collected at runtime: this is filled by StarPU and should not be
			
 
				 accessed directly, but for example by calling the
			
 
				-@code{starpu_display_codelet_stats} function (See
			
 
				-@ref{starpu_display_codelet_stats} for details).
			
 
				+@code{starpu_codelet_display_stats} function (See
			
 
				+@ref{starpu_codelet_display_stats} for details).
			
 
				 
			
 
				 @item @code{const char *name} (optional)
			
 
				 Define the name of the codelet. This can be useful for debugging purposes.
			
@@ -1923,6 +1942,7 @@ Define the name of the codelet. This can be useful for debugging purposes.
 
				 @end deftp
			
 
				 
			
 
				 @deftypefun void starpu_codelet_init ({struct starpu_codelet} *@var{cl})
			
 
				+@anchor{starpu_codelet_init}
			
 
				 Initialize @var{cl} with default values. Codelets should preferably be
			
 
				 initialized statically as shown in @ref{Defining a Codelet}. However
			
 
				 such a initialisation is not always possible, e.g. when using C++.
			
@@ -1983,10 +2003,25 @@ of entries in this array must be specified in the @code{nbuffers} field of the
 
				 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
			
 
				 option when configuring StarPU.
			
 
				 
			
 
				+@item @code{starpu_data_handle_t *dyn_handles}
			
 
				+Is an array of @code{starpu_data_handle_t}. It specifies the handles
			
 
				+to the different pieces of data accessed by the task. The number
			
 
				+of entries in this array must be specified in the @code{nbuffers} field of the
			
 
				+@code{struct starpu_codelet} structure.
			
 
				+This field should be used for tasks having a number of datas
			
 
				+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
			
 
				+for a Task}).
			
 
				+When defining a task, one should either define this field or the
			
 
				+field @code{handles} defined above.
			
 
				+
			
 
				 @item @code{void *interfaces[STARPU_NMAXBUFS]}
			
 
				 The actual data pointers to the memory node where execution will happen, managed
			
 
				 by the DSM.
			
 
				 
			
 
				+@item @code{void **dyn_interfaces}
			
 
				+The actual data pointers to the memory node where execution will happen, managed
			
 
				+by the DSM. Is used when the field @code{dyn_handles} is defined.
			
 
				+
			
 
				 @item @code{void *cl_arg} (optional; default: @code{NULL})
			
 
				 This pointer is passed to the codelet through the second argument
			
 
				 of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
			
@@ -2134,6 +2169,37 @@ value. This is equivalent to initializing a starpu_task structure with
 
				 the @code{starpu_task_init} function defined above.
			
 
				 @end defmac
			
 
				 
			
 
				+@defmac STARPU_TASK_GET_HANDLE ({struct starpu_task} *@var{task}, int @var{i})
			
 
				+Return the i-th data handle of the given task. If the task is defined
			
 
				+with a static or dynamic number of handles, will either return the
			
 
				+i-th element of the field @code{handles} or the i-th element of the field
			
 
				+@code{dyn_handles} (@pxref{Setting the Data Handles for a Task})
			
 
				+@end defmac
			
 
				+
			
 
				+@defmac STARPU_TASK_SET_HANDLE ({struct starpu_task} *@var{task}, starpu_data_handle_t @var{handle}, int @var{i})
			
 
				+Set the i-th data handle of the given task with the given dat handle.
			
 
				+If the task is defined with a static or dynamic number of handles,
			
 
				+will either set the i-th element of the field @code{handles} or the
			
 
				+i-th element of the field @code{dyn_handles} (@pxref{Setting the Data
			
 
				+Handles for a Task})
			
 
				+@end defmac
			
 
				+
			
 
				+@defmac STARPU_CODELET_GET_MODE ({struct starpu_codelet *}@var{codelet}, int @var{i})
			
 
				+Return the access mode of the i-th data handle of the given codelet.
			
 
				+If the codelet is defined with a static or dynamic number of handles,
			
 
				+will either return the i-th element of the field @code{modes} or the
			
 
				+i-th element of the field @code{dyn_modes} (@pxref{Setting the Data
			
 
				+Handles for a Task})
			
 
				+@end defmac
			
 
				+
			
 
				+@defmac STARPU_CODELET_SET_MODE ({struct starpu_codelet *}@var{codelet}codelet, {enum starpu_access_mode} @var{mode}, int @var{i})
			
 
				+Set the access mode of the i-th data handle of the given codelet.
			
 
				+If the codelet is defined with a static or dynamic number of handles,
			
 
				+will either set the i-th element of the field @code{modes} or the
			
 
				+i-th element of the field @code{dyn_modes} (@pxref{Setting the Data
			
 
				+Handles for a Task})
			
 
				+@end defmac
			
 
				+
			
 
				 @deftypefun {struct starpu_task *} starpu_task_create (void)
			
 
				 Allocate a task structure and initialize it with default values. Tasks
			
 
				 allocated dynamically with @code{starpu_task_create} are automatically freed when the
			
@@ -2145,6 +2211,10 @@ by the task have to be freed by calling
 
				 @code{starpu_task_destroy}.
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftypefun {struct starpu_task *}starpu_task_dup ({struct starpu_task *}@var{task})
			
 
				+Allocate a task structure which is the exact duplicate of the given task.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @deftypefun void starpu_task_clean ({struct starpu_task} *@var{task})
			
 
				 Release all the structures automatically allocated to execute @var{task}, but
			
 
				 not the task structure itself and values set by the user remain unchanged.
			
@@ -2218,8 +2288,8 @@ NULL if it is called either from a thread that is not a task or simply
 
				 because there is no task being executed at the moment.
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun void starpu_display_codelet_stats ({struct starpu_codelet} *@var{cl})
			
 
				-@anchor{starpu_display_codelet_stats}
			
 
				+@deftypefun void starpu_codelet_display_stats ({struct starpu_codelet} *@var{cl})
			
 
				+@anchor{starpu_codelet_display_stats}
			
 
				 Output on @code{stderr} some statistics on the codelet @var{cl}.
			
 
				 @end deftypefun
			
 
				 
			
@@ -3650,6 +3720,11 @@ Get the description of a combined worker
 
				 Variant of starpu_worker_can_execute_task compatible with combined workers
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftypefun void starpu_parallel_task_barrier_init ({struct starpu_task* }@var{task}, int @var{best_workerid})
			
 
				+Initialise the barrier for the parallel task, and dispatch the task
			
 
				+between the different combined workers
			
 
				+@end deftypefun
			
 
				+
			
 
				 @deftp {Data Type} {struct starpu_machine_topology}
			
 
				 @table @asis
			
 
				 @item @code{unsigned nworkers}
			
@@ -3776,10 +3851,6 @@ Delete the worker collection of the specified scheduling context
 
				 Return the worker collection managed by the indicated context
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex (unsigned @var{sched_ctx_id})
			
 
				-TODO
			
 
				-@end deftypefun
			
 
				-
			
 
				 @deftypefun void starpu_sched_ctx_set_context (unsigned *@var{sched_ctx_id})
			
 
				 Set the scheduling context the subsequent tasks will be submitted to
			
 
				 @end deftypefun
			
--- a/doc/chapters/basic-examples.texi
+++ b/doc/chapters/basic-examples.texi
@@ -140,7 +140,8 @@ struct starpu_codelet cl =
 
				 A codelet is a structure that represents a computational kernel. Such a codelet
			
 
				 may contain an implementation of the same kernel on different architectures
			
 
				 (e.g. CUDA, x86, ...). For compatibility, make sure that the whole
			
 
				-structure is initialized to zero, either by using memset, or by letting the
			
 
				+structure is properly initialized to zero, either by using the
			
 
				+function starpu_codelet_init (@pxref{starpu_codelet_init}), or by letting the
			
 
				 compiler implicitly do it as examplified above.
			
 
				 
			
 
				 The @code{nbuffers} field specifies the number of data buffers that are
			
--- a/doc/chapters/configuration.texi
+++ b/doc/chapters/configuration.texi
@@ -234,6 +234,7 @@ Enable gathering of various data statistics (@pxref{Data statistics}).
 
				 @end defvr
			
 
				 
			
 
				 @defvr {Configure option} --enable-maxbuffers
			
 
				+@anchor{--enable-maxbuffers}
			
 
				 Define the maximum number of buffers that tasks will be able to take
			
 
				 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
			
 
				 @end defvr
			
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -409,9 +409,10 @@ STARPU_BUS_STATS=1} and @code{export STARPU_WORKER_STATS=1} .
 
				 
			
 
				 Due to CUDA limitations, StarPU will have a hard time overlapping its own
			
 
				 communications and the codelet computations if the application does not use a
			
 
				-dedicated CUDA stream for its computations. StarPU provides one by the use of
			
 
				-@code{starpu_cuda_get_local_stream()} which should be used by all CUDA codelet
			
 
				-operations. For instance:
			
 
				+dedicated CUDA stream for its computations instead of the default stream,
			
 
				+which synchronizes all operations of the GPU. StarPU provides one by the use
			
 
				+of @code{starpu_cuda_get_local_stream()} which can be used by all CUDA codelet
			
 
				+operations to avoid this issue. For instance:
			
 
				 
			
 
				 @cartouche
			
 
				 @smallexample
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -52,7 +52,6 @@ EXTRA_DIST = 					\
 
				 	basic_examples/variable_kernels_opencl_kernel.cl	\
			
 
				 	matvecmult/matvecmult_kernel.cl				\
			
 
				 	basic_examples/block_opencl_kernel.cl			\
			
 
				-	openmp/vector_scal.c			\
			
 
				 	filters/fblock_opencl_kernel.cl		\
			
 
				 	filters/custom_mf/conversion_opencl.cl  \
			
 
				 	filters/custom_mf/custom_opencl.cl \
			
@@ -159,6 +158,7 @@ examplebin_PROGRAMS +=				\
 
				 	basic_examples/block			\
			
 
				 	basic_examples/variable			\
			
 
				 	basic_examples/multiformat              \
			
 
				+	basic_examples/dynamic_handles		\
			
 
				 	cpp/incrementer_cpp			\
			
 
				 	filters/custom_mf/custom_mf_filter      \
			
 
				 	filters/fvector				\
			
@@ -876,6 +876,18 @@ pipeline_pipeline_LDADD =		\
 
				 	$(STARPU_BLAS_LDFLAGS)
			
 
				 endif
			
 
				 
			
 
				+##################
			
 
				+# openmp example #
			
 
				+##################
			
 
				+
			
 
				+if !STARPU_HAVE_WINDOWS
			
 
				+examplebin_PROGRAMS +=		\
			
 
				+	openmp/vector_scal_omp
			
 
				+
			
 
				+openmp_vector_scal_omp_CFLAGS = \
			
 
				+	$(AM_CFLAGS) -fopenmp
			
 
				+endif
			
 
				+
			
 
				 showcheck:
			
 
				 	-cat $(TEST_LOGS) /dev/null
			
 
				 	for i in $(SUBDIRS) ; do \
			
--- a/examples/basic_examples/dynamic_handles.c
+++ b/examples/basic_examples/dynamic_handles.c
@@ -0,0 +1,150 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2013  Centre National de la Recherche Scientifique
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+
			
 
				+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				+
			
 
				+static void dummy_small_kernel(void *descr[], void *cl_arg)
			
 
				+{
			
 
				+	int nb_data;
			
 
				+	int i;
			
 
				+
			
 
				+	starpu_codelet_unpack_args(cl_arg, &nb_data);
			
 
				+	assert(nb_data == 1);
			
 
				+	FPRINTF(stderr, "Number of data: %d\n", nb_data);
			
 
				+
			
 
				+	for(i=0 ; i<nb_data; i++)
			
 
				+	{
			
 
				+		int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
			
 
				+		assert(*val == 42);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void dummy_big_kernel(void *descr[], void *cl_arg)
			
 
				+{
			
 
				+	int nb_data;
			
 
				+	int i;
			
 
				+
			
 
				+	starpu_codelet_unpack_args(cl_arg, &nb_data);
			
 
				+	assert(nb_data == 9);
			
 
				+	FPRINTF(stderr, "Number of data: %d\n", nb_data);
			
 
				+
			
 
				+	for(i=0 ; i<nb_data; i++)
			
 
				+	{
			
 
				+		int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
			
 
				+		assert(*val == 42);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet dummy_small_cl =
			
 
				+{
			
 
				+	.cuda_funcs = {dummy_small_kernel, NULL},
			
 
				+	.opencl_funcs = {dummy_small_kernel, NULL},
			
 
				+	.cpu_funcs = {dummy_small_kernel, NULL},
			
 
				+	.modes = {STARPU_RW},
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+struct starpu_codelet dummy_big_cl =
			
 
				+{
			
 
				+	.cuda_funcs = {dummy_big_kernel, NULL},
			
 
				+	.opencl_funcs = {dummy_big_kernel, NULL},
			
 
				+	.cpu_funcs = {dummy_big_kernel, NULL},
			
 
				+	.nbuffers = STARPU_NMAXBUFS+1
			
 
				+};
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	starpu_data_handle_t handle, *handles;
			
 
				+	int ret;
			
 
				+	int val=42;
			
 
				+	unsigned i;
			
 
				+	struct starpu_task *task, *task2;
			
 
				+
			
 
				+	ret = starpu_init(NULL);
			
 
				+	if (ret == -ENODEV) return 77;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	dummy_big_cl.dyn_modes = malloc(dummy_big_cl.nbuffers * sizeof(enum starpu_access_mode));
			
 
				+	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
			
 
				+	     dummy_big_cl.dyn_modes[i] = STARPU_RW;
			
 
				+
			
 
				+	starpu_variable_data_register(&handle, 0, (uintptr_t)&val, sizeof(int));
			
 
				+
			
 
				+	task = starpu_task_create();
			
 
				+	task->synchronous = 1;
			
 
				+	task->cl = &dummy_small_cl;
			
 
				+	starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size,
			
 
				+				 STARPU_VALUE, &(task->cl->nbuffers), sizeof(task->cl->nbuffers),
			
 
				+				 0);
			
 
				+	task->dyn_handles = malloc(sizeof(starpu_data_handle_t));
			
 
				+	task->dyn_handles[0] = handle;
			
 
				+	ret = starpu_task_submit(task);
			
 
				+	if (ret == -ENODEV) goto enodev;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+
			
 
				+	task2 = starpu_task_create();
			
 
				+	task2->synchronous = 1;
			
 
				+	task2->cl = &dummy_big_cl;
			
 
				+	starpu_codelet_pack_args(&task2->cl_arg, &task2->cl_arg_size,
			
 
				+				 STARPU_VALUE, &task2->cl->nbuffers, sizeof(task2->cl->nbuffers),
			
 
				+				 0);
			
 
				+	task2->dyn_handles = malloc(task2->cl->nbuffers * sizeof(starpu_data_handle_t));
			
 
				+	for(i=0 ; i<task2->cl->nbuffers ; i++)
			
 
				+	{
			
 
				+		task2->dyn_handles[i] = handle;
			
 
				+	}
			
 
				+	ret = starpu_task_submit(task2);
			
 
				+	if (ret == -ENODEV) goto enodev;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+
			
 
				+	ret = starpu_insert_task(&dummy_small_cl,
			
 
				+				 STARPU_VALUE, &dummy_small_cl.nbuffers, sizeof(dummy_small_cl.nbuffers),
			
 
				+				 STARPU_RW, handle,
			
 
				+				 0);
			
 
				+	if (ret == -ENODEV) goto enodev;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
			
 
				+        ret = starpu_task_wait_for_all();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+
			
 
				+	handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
			
 
				+	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
			
 
				+	{
			
 
				+		handles[i] = handle;
			
 
				+	}
			
 
				+	ret = starpu_insert_task(&dummy_big_cl,
			
 
				+				 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
			
 
				+				 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
			
 
				+				 0);
			
 
				+	if (ret == -ENODEV) goto enodev;
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
			
 
				+        ret = starpu_task_wait_for_all();
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				+	free(handles);
			
 
				+
			
 
				+	starpu_data_unregister(handle);
			
 
				+	free(dummy_big_cl.dyn_modes);
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return EXIT_SUCCESS;
			
 
				+
			
 
				+enodev:
			
 
				+	starpu_data_unregister(handle);
			
 
				+	free(dummy_big_cl.dyn_modes);
			
 
				+	starpu_shutdown();
			
 
				+	return 77;
			
 
				+}
			
--- a/examples/cholesky/cholesky.h
+++ b/examples/cholesky/cholesky.h
@@ -122,6 +122,7 @@ static unsigned check = 0;
 
				 static unsigned bound = 0;
			
 
				 static unsigned bound_deps = 0;
			
 
				 static unsigned bound_lp = 0;
			
 
				+static unsigned bound_mps = 0;
			
 
				 static unsigned with_ctxs = 0;
			
 
				 static unsigned with_noctxs = 0;
			
 
				 static unsigned chole1 = 0;
			
@@ -150,77 +151,83 @@ static void __attribute__((unused)) parse_args(int argc, char **argv)
 
				 		{
			
 
				 			with_ctxs = 1;
			
 
				 			break;
			
 
				-		}
			
 
				+		} else
			
 
				 		if (strcmp(argv[i], "-with_noctxs") == 0) 
			
 
				 		{
			
 
				 			with_noctxs = 1;
			
 
				 			break;
			
 
				-		}
			
 
				+		} else
			
 
				 		
			
 
				 		if (strcmp(argv[i], "-chole1") == 0) 
			
 
				 		{
			
 
				 			chole1 = 1;
			
 
				 			break;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-chole2") == 0) 
			
 
				 		{
			
 
				 			chole2 = 1;
			
 
				 			break;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-size") == 0)
			
 
				 		{
			
 
				 		        char *argptr;
			
 
				 			size = strtol(argv[++i], &argptr, 10);
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-nblocks") == 0)
			
 
				 		{
			
 
				 		        char *argptr;
			
 
				 			nblocks = strtol(argv[++i], &argptr, 10);
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-nbigblocks") == 0)
			
 
				 		{
			
 
				 		        char *argptr;
			
 
				 			nbigblocks = strtol(argv[++i], &argptr, 10);
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-no-pin") == 0)
			
 
				 		{
			
 
				 			pinned = 0;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-no-prio") == 0)
			
 
				 		{
			
 
				 			noprio = 1;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-bound") == 0)
			
 
				 		{
			
 
				 			bound = 1;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-bound-lp") == 0)
			
 
				 		{
			
 
				 			bound_lp = 1;
			
 
				-		}
			
 
				+		} else
			
 
				+
			
 
				+		if (strcmp(argv[i], "-bound-mps") == 0)
			
 
				+		{
			
 
				+			bound_mps = 1;
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-bound-deps") == 0)
			
 
				 		{
			
 
				 			bound_deps = 1;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				 		if (strcmp(argv[i], "-check") == 0)
			
 
				 		{
			
 
				 			check = 1;
			
 
				-		}
			
 
				+		} else
			
 
				 
			
 
				-		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0)
			
 
				+		/* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0) */
			
 
				 		{
			
 
				 			fprintf(stderr,"usage : %s [-size size] [-nblocks nblocks] [-no-pin] [-no-prio] [-bound] [-bound-deps] [-bound-lp] [-check]\n", argv[0]);
			
 
				 			fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks\n", size, size, nblocks, nblocks);
			
 
				+			exit(0);
			
 
				 		}
			
 
				 	}
			
 
				 }
			
--- a/examples/cholesky/cholesky_implicit.c
+++ b/examples/cholesky/cholesky_implicit.c
@@ -89,7 +89,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 
			
 
				 	start = starpu_timing_now();
			
 
				 
			
 
				-	if (bound)
			
 
				+	if (bound || bound_lp || bound_mps)
			
 
				 		starpu_bound_start(bound_deps, 0);
			
 
				 	/* create all the DAG nodes */
			
 
				 	for (k = 0; k < nblocks; k++)
			
@@ -140,7 +140,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 	}
			
 
				 
			
 
				 	starpu_task_wait_for_all();
			
 
				-	if (bound)
			
 
				+	if (bound || bound_lp || bound_mps)
			
 
				 		starpu_bound_stop();
			
 
				 
			
 
				 	end = starpu_timing_now();
			
@@ -162,6 +162,11 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
				 			FILE *f = fopen("cholesky.lp", "w");
			
 
				 			starpu_bound_print_lp(f);
			
 
				 		}
			
 
				+		if (bound_mps)
			
 
				+		{
			
 
				+			FILE *f = fopen("cholesky.mps", "w");
			
 
				+			starpu_bound_print_mps(f);
			
 
				+		}
			
 
				 		if (bound)
			
 
				 		{
			
 
				 			double res;
			
--- a/examples/openmp/vector_scal.c
+++ b/examples/openmp/vector_scal.c
@@ -25,7 +25,12 @@
 
				 #include <stdio.h>
			
 
				 #include <limits.h>
			
 
				 
			
 
				+#ifdef STARPU_QUICK_CHECK
			
 
				+#define	NX	2048
			
 
				+#else
			
 
				 #define	NX	2048000
			
 
				+#endif
			
 
				+
			
 
				 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
			
 
				 
			
 
				 void scal_cpu_func(void *buffers[], void *_args)
			
@@ -94,7 +99,8 @@ int main(int argc, char **argv)
 
				 
			
 
				 	float factor = 1.001;
			
 
				 
			
 
				-	for (i = 0; i < 100; i++) {
			
 
				+	for (i = 0; i < 100; i++)
			
 
				+	{
			
 
				 		struct starpu_task *task = starpu_task_create();
			
 
				 
			
 
				 		task->cl = &cl;
			
--- a/examples/pi/pi.c
+++ b/examples/pi/pi.c
@@ -198,7 +198,7 @@ int main(int argc, char **argv)
 
				 	FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0);
			
 
				 	FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing));
			
 
				 
			
 
				-	if (!getenv("STARPU_SSILENT")) starpu_display_codelet_stats(&pi_cl);
			
 
				+	if (!getenv("STARPU_SSILENT")) starpu_codelet_display_stats(&pi_cl);
			
 
				 
			
 
				 	starpu_shutdown();
			
 
				 
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -153,6 +153,8 @@ int starpu_asynchronous_opencl_copy_disabled(void);
 
				 void starpu_profiling_init();
			
 
				 void starpu_display_stats();
			
 
				 
			
 
				+void starpu_get_version(int *major, int *minor, int *release);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -20,6 +20,7 @@
 
				 
			
 
				 #undef STARPU_MAJOR_VERSION
			
 
				 #undef STARPU_MINOR_VERSION
			
 
				+#undef STARPU_RELEASE_VERSION
			
 
				 
			
 
				 #undef STARPU_USE_CPU
			
 
				 #undef STARPU_USE_CUDA
			
@@ -113,4 +114,6 @@ struct timespec
 
				 #undef STARPU_HAVE_RINTF
			
 
				 #undef STARPU_USE_TOP
			
 
				 
			
 
				+#undef STARPU_HAVE_HWLOC
			
 
				+
			
 
				 #endif
			
--- a/include/starpu_deprecated_api.h
+++ b/include/starpu_deprecated_api.h
@@ -88,6 +88,8 @@ typedef enum starpu_access_mode starpu_access_mode;
 
				 #define starpu_depth_block_filter_func_block		starpu_block_filter_depth_block
			
 
				 #define starpu_depth_block_shadow_filter_func_block	starpu_block_filter_depth_block_shadow
			
 
				 
			
 
				+#define starpu_display_codelet_stats		starpu_codelet_display_stats
			
 
				+
			
 
				 #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
			
 
				 
			
 
				 #ifdef __cplusplus
			
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -48,9 +48,6 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id);
 
				 /* indicate which context whill inherit the resources of this context when he will be deleted */
			
 
				 void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);
			
 
				 
			
 
				-/* mutex synchronising several simultaneous modifications of a context */
			
 
				-starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
			
 
				-
			
 
				 /* indicate that the current thread is submitting only to the current context */
			
 
				 void starpu_sched_ctx_set_context(unsigned *sched_ctx_id);
			
 
				 
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -96,6 +96,7 @@ struct starpu_codelet
 
				 	unsigned nbuffers;
			
 
				 	/* which are the access modes for these buffers */
			
 
				 	enum starpu_access_mode modes[STARPU_NMAXBUFS];
			
 
				+	enum starpu_access_mode *dyn_modes;
			
 
				 
			
 
				 	/* performance model of the codelet */
			
 
				 	struct starpu_perfmodel *model;
			
@@ -104,7 +105,7 @@ struct starpu_codelet
 
				 	struct starpu_perfmodel *power_model;
			
 
				 
			
 
				 	/* statistics collected at runtime: this is filled by StarPU and should
			
 
				-	 * not be accessed directly (use the starpu_display_codelet_stats
			
 
				+	 * not be accessed directly (use the starpu_codelet_display_stats
			
 
				 	 * function instead for instance). */
			
 
				 	unsigned long per_worker_stats[STARPU_NMAXWORKERS];
			
 
				 
			
@@ -120,6 +121,9 @@ struct starpu_task
 
				 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
			
 
				 	void *interfaces[STARPU_NMAXBUFS];
			
 
				 
			
 
				+	starpu_data_handle_t *dyn_handles;
			
 
				+	void **dyn_interfaces;
			
 
				+
			
 
				 	/* arguments not managed by the DSM are given as a buffer */
			
 
				 	void *cl_arg;
			
 
				 	/* in case the argument buffer has to be uploaded explicitely */
			
@@ -240,9 +244,17 @@ struct starpu_task
 
				 	.sched_ctx = 0,					\
			
 
				 	.hypervisor_tag = 0,				\
			
 
				 	.flops = 0.0,					\
			
 
				-		.scheduled = 0				\
			
 
				+	.scheduled = 0,					\
			
 
				+	.dyn_handles = NULL,				\
			
 
				+	.dyn_interfaces = NULL				\
			
 
				 }
			
 
				 
			
 
				+#define STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])
			
 
				+#define STARPU_TASK_SET_HANDLE(task, handle, i) do { if (task->dyn_handles) task->dyn_handles[i] = handle; else task->handles[i] = handle; } while(0)
			
 
				+
			
 
				+#define STARPU_CODELET_GET_MODE(codelet, i) ((codelet->dyn_modes) ? codelet->dyn_modes[i] : codelet->modes[i])
			
 
				+#define STARPU_CODELET_SET_MODE(codelet, mode, i) do { if (codelet->dyn_modes) codelet->dyn_modes[i] = mode; else codelet->modes[i] = mode; } while(0)
			
 
				+
			
 
				 /*
			
 
				  * handle task dependencies: it is possible to associate a task with a unique
			
 
				  * "tag" and to express dependencies between tasks by the means of those tags
			
@@ -317,10 +329,13 @@ int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id);
 
				  * indicates that the waited task was either synchronous or detached. */
			
 
				 int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
			
 
				 
			
 
				-/* This function waits until all the tasks that were already submitted have
			
 
				+/* This function waits until all the tasks that were already submitted 
			
 
				+ * (to the current context or the global one if there aren't any) have
			
 
				  * been executed. */
			
 
				 int starpu_task_wait_for_all(void);
			
 
				 
			
 
				+/* This function waits until all the tasks that were already submitted to the 
			
 
				+ * context have been executed */
			
 
				 int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id);
			
 
				 
			
 
				 /* This function waits until there is no more ready task. */
			
@@ -331,13 +346,20 @@ int starpu_task_nsubmitted(void);
 
				 
			
 
				 void starpu_codelet_init(struct starpu_codelet *cl);
			
 
				 
			
 
				-void starpu_display_codelet_stats(struct starpu_codelet *cl);
			
 
				+void starpu_codelet_display_stats(struct starpu_codelet *cl);
			
 
				 
			
 
				 /* Return the task currently executed by the worker, or NULL if this is called
			
 
				  * either from a thread that is not a task or simply because there is no task
			
 
				  * being executed at the moment. */
			
 
				 struct starpu_task *starpu_task_get_current(void);
			
 
				 
			
 
				+/* initialise the barrier for the parallel task, st all workers start it 
			
 
				+ * at the same time */
			
 
				+void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid);
			
 
				+
			
 
				+/* duplicate the given task */
			
 
				+struct starpu_task *starpu_task_dup(struct starpu_task *task);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/include/starpu_task_util.h
+++ b/include/starpu_task_util.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -57,7 +57,7 @@ void starpu_codelet_unpack_args(void *cl_arg, ...);
 
				 
			
 
				 /* Pack arguments of type STARPU_VALUE into a buffer which can be
			
 
				  * given to a codelet and later unpacked with starpu_codelet_unpack_args */
			
 
				-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...);
			
 
				+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_top.h
+++ b/include/starpu_top.h
@@ -195,6 +195,14 @@ void starpu_top_update_data_float(const struct starpu_top_data* data,
 
				 				  double value);
			
 
				 
			
 
				 /*
			
 
				+ * This function notifies UI than the task have been planed to
			
 
				+ * run from start to end, on computation-core
			
 
				+ */
			
 
				+void starpu_top_task_prevision(struct starpu_task *task,
			
 
				+			       int devid, unsigned long long start,
			
 
				+			       unsigned long long end);
			
 
				+
			
 
				+/*
			
 
				  * This functions are usefull in debug mode. The starpu developper doesn't need
			
 
				  * to check if the debug mode is active.
			
 
				  * This is checked by starpu_top itsefl.
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -123,7 +123,6 @@ int starpu_combined_worker_get_id(void);
 
				 int starpu_combined_worker_get_size(void);
			
 
				 int starpu_combined_worker_get_rank(void);
			
 
				 
			
 
				-
			
 
				 /* This function returns the type of worker associated to an identifier (as
			
 
				  * returned by the starpu_worker_get_id function). The returned value indicates
			
 
				  * the architecture of the worker: STARPU_CPU_WORKER for a CPU core,
			
--- a/mpi/src/starpu_mpi_insert_task.c
+++ b/mpi/src/starpu_mpi_insert_task.c
@@ -24,6 +24,7 @@
 
				 #include <common/uthash.h>
			
 
				 #include <util/starpu_insert_task_utils.h>
			
 
				 #include <datawizard/coherency.h>
			
 
				+#include <core/task.h>
			
 
				 
			
 
				 #include <starpu_mpi_private.h>
			
 
				 
			
@@ -369,7 +370,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
				 	int me, do_execute, xrank, nb_nodes;
			
 
				 	size_t *size_on_nodes;
			
 
				 	size_t arg_buffer_size = 0;
			
 
				-	char *arg_buffer = NULL;
			
 
				+	void *arg_buffer = NULL;
			
 
				 	int dest=0, inconsistent_execute;
			
 
				 	int current_data = 0;
			
 
				 
			
@@ -420,7 +421,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
				 			int i;
			
 
				 			for(i=0 ; i<nb_handles ; i++)
			
 
				 			{
			
 
				-				enum starpu_access_mode mode = codelet->modes[current_data];
			
 
				+				enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(codelet, current_data);
			
 
				 				int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, &do_execute, &inconsistent_execute, &dest, size_on_nodes);
			
 
				 				if (ret == -EINVAL)
			
 
				 				{
			
@@ -531,7 +532,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
				 
			
 
				 			for(i=0 ; i<nb_handles ; i++)
			
 
				 			{
			
 
				-				_starpu_mpi_exchange_data_before_execution(datas[i], codelet->modes[current_data], me, dest, do_execute, comm);
			
 
				+				_starpu_mpi_exchange_data_before_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, dest, do_execute, comm);
			
 
				 				current_data++;
			
 
				 			}
			
 
				 		}
			
@@ -590,12 +591,16 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
				 		if (arg_buffer_size)
			
 
				 		{
			
 
				 			va_start(varg_list, codelet);
			
 
				-			_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
			
 
				+			_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list);
			
 
				 		}
			
 
				 
			
 
				 		_STARPU_MPI_DEBUG(1, "Execution of the codelet %p (%s)\n", codelet, codelet->name);
			
 
				 		va_start(varg_list, codelet);
			
 
				 		struct starpu_task *task = starpu_task_create();
			
 
				+		if (codelet->nbuffers > STARPU_NMAXBUFS)
			
 
				+		{
			
 
				+			task->dyn_handles = malloc(codelet->nbuffers * sizeof(starpu_data_handle_t));
			
 
				+		}
			
 
				 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
			
 
				 		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
			
 
				 	}
			
@@ -622,7 +627,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
				 
			
 
				 				for(i=0 ; i<nb_handles ; i++)
			
 
				 				{
			
 
				-					_starpu_mpi_exchange_data_after_execution(datas[i], codelet->modes[current_data], me, xrank, dest, do_execute, comm);
			
 
				+					_starpu_mpi_exchange_data_after_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, xrank, dest, do_execute, comm);
			
 
				 					current_data++;
			
 
				 				}
			
 
				 			}
			
@@ -692,7 +697,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
				 
			
 
				 			for(i=0 ; i<nb_handles ; i++)
			
 
				 			{
			
 
				-				_starpu_mpi_clear_data_after_execution(datas[i], codelet->modes[current_data], me, do_execute, comm);
			
 
				+				_starpu_mpi_clear_data_after_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, do_execute, comm);
			
 
				 				current_data++;
			
 
				 			}
			
 
				 		}
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -73,7 +73,6 @@ noinst_HEADERS = 						\
 
				 	core/debug.h						\
			
 
				 	core/errorcheck.h					\
			
 
				 	core/combined_workers.h					\
			
 
				-	core/parallel_task.h					\
			
 
				 	core/simgrid.h						\
			
 
				 	core/task_bundle.h					\
			
 
				 	sched_policies/detect_combined_workers.h		\
			
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -70,16 +70,6 @@
 
				 	}                                                                      \
			
 
				 } while (0)
			
 
				 
			
 
				-#define _STARPU_PTHREAD_MUTEX_TRYLOCK(mutex) do {                              \
			
 
				-	int p_ret = starpu_pthread_mutex_trylock(mutex);                       \
			
 
				-	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
 
				-		fprintf(stderr,                                                \
			
 
				-			"%s:%d starpu_pthread_mutex_trylock: %s\n",            \
			
 
				-			__FILE__, __LINE__, strerror(p_ret));                  \
			
 
				-		STARPU_ABORT();                                                \
			
 
				-	}                                                                      \
			
 
				-} while (0)
			
 
				-
			
 
				 #define _STARPU_PTHREAD_MUTEX_UNLOCK(mutex) do {                               \
			
 
				 	int p_ret = starpu_pthread_mutex_unlock(mutex);                        \
			
 
				 	if (STARPU_UNLIKELY(p_ret)) {                                          \
			
--- a/src/core/combined_workers.c
+++ b/src/core/combined_workers.c
@@ -162,3 +162,4 @@ int starpu_combined_worker_get_description(int workerid, int *worker_size, int *
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -192,8 +192,8 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 
				 {
			
 
				 	/* Note that we do not access j->task->handles, but j->ordered_buffers
			
 
				 	 * which is a sorted copy of it. */
			
 
				-	starpu_data_handle_t handle = j->ordered_buffers[buffer_index].handle;
			
 
				-	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
			
 
				+	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
			
 
				+	enum starpu_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
			
 
				 
			
 
				 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
			
 
				 }
			
@@ -205,11 +205,16 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 
				 	unsigned nbuffers = j->task->cl->nbuffers;
			
 
				 	for (buf = start_buffer_index; buf < nbuffers; buf++)
			
 
				 	{
			
 
				-		if (buf && j->ordered_buffers[buf-1].handle == j->ordered_buffers[buf].handle)
			
 
				-			/* We have already requested this data, skip it. This
			
 
				-			 * depends on ordering putting writes before reads, see
			
 
				-			 * _starpu_compar_handles.  */
			
 
				-			continue;
			
 
				+		if (buf)
			
 
				+		{
			
 
				+			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf-1);
			
 
				+			starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf);
			
 
				+			if (handle_m1 == handle)
			
 
				+				/* We have already requested this data, skip it. This
			
 
				+				 * depends on ordering putting writes before reads, see
			
 
				+				 * _starpu_compar_handles.  */
			
 
				+				continue;
			
 
				+		}
			
 
				 
			
 
				                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
			
 
				                 if (attempt_to_submit_data_request_from_job(j, buf))
			
@@ -238,11 +243,13 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 
				 	unsigned i;
			
 
				 	for (i=0 ; i<cl->nbuffers ; i++)
			
 
				 	{
			
 
				-		j->ordered_buffers[i].handle = j->task->handles[i];
			
 
				-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
			
 
				+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
			
 
				+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
			
 
				 	}
			
 
				 
			
 
				-	_starpu_sort_task_handles(j->ordered_buffers, cl->nbuffers);
			
 
				+	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), cl->nbuffers);
			
 
				 
			
 
				 	return _submit_job_enforce_data_deps(j, 0);
			
 
				 }
			
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -336,8 +336,8 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 
				 	unsigned buffer;
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		starpu_data_handle_t handle = task->handles[buffer];
			
 
				-		enum starpu_access_mode mode = task->cl->modes[buffer];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
			
 
				 		struct starpu_task *new_task;
			
 
				 
			
 
				 		/* Scratch memory does not introduce any deps */
			
@@ -457,7 +457,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
				 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
			
 
				 {
			
 
				 	struct starpu_task *task = j->task;
			
 
				-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
			
 
				+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
			
 
				 
			
 
				 	if (!task->cl)
			
 
				 		return;
			
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -52,6 +52,9 @@ struct _starpu_job* __attribute__((malloc)) _starpu_job_create(struct starpu_tas
 
				 	 * everywhere */
			
 
				 	memset(job, 0, sizeof(*job));
			
 
				 
			
 
				+	if (task->dyn_handles)
			
 
				+	     job->dyn_ordered_buffers = malloc(task->cl->nbuffers * sizeof(struct starpu_buffer_descr));
			
 
				+
			
 
				 	job->task = task;
			
 
				 
			
 
				 #ifndef STARPU_USE_FXT
			
@@ -104,6 +107,11 @@ void _starpu_job_destroy(struct _starpu_job *j)
 
				 	}
			
 
				 
			
 
				 	_starpu_cg_list_deinit(&j->job_successors);
			
 
				+	if (j->dyn_ordered_buffers)
			
 
				+	{
			
 
				+	     free(j->dyn_ordered_buffers);
			
 
				+	     j->dyn_ordered_buffers = NULL;
			
 
				+	}
			
 
				 
			
 
				 	_starpu_job_delete(j);
			
 
				 }
			
@@ -149,8 +157,11 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 	int i;
			
 
				 	size_t data_size = 0;
			
 
				 	for(i = 0; i < STARPU_NMAXBUFS; i++)
			
 
				-		if(task->handles[i] != NULL)
			
 
				-			data_size += _starpu_data_get_size(task->handles[i]);
			
 
				+	{
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				+		if (handle != NULL)
			
 
				+			data_size += _starpu_data_get_size(handle);
			
 
				+	}
			
 
				 #endif //STARPU_USE_SC_HYPERVISOR
			
 
				 
			
 
				 	/* We release handle reference count */
			
@@ -159,7 +170,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 
				 		unsigned i;
			
 
				 		for (i=0; i<task->cl->nbuffers; i++)
			
 
				 		{
			
 
				-			starpu_data_handle_t handle = task->handles[i];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 			_starpu_spin_lock(&handle->header_lock);
			
 
				 			handle->busy_count--;
			
 
				 			if (!_starpu_data_check_not_busy(handle))
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -70,6 +70,7 @@ LIST_TYPE(_starpu_job,
 
				 	 * the task so that we always grab the rw-lock associated to the
			
 
				 	 * handles in the same order. */
			
 
				 	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
			
 
				+	struct starpu_buffer_descr *dyn_ordered_buffers;
			
 
				 
			
 
				 	/* If a tag is associated to the job, this points to the internal data
			
 
				 	 * structure that describes the tag status. */
			
@@ -172,4 +173,13 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
 
				  * enforce a FIFO ordering. */
			
 
				 int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
			
 
				 
			
 
				+#define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
			
 
				+#define _STARPU_JOB_GET_ORDERED_BUFFER_MODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].mode : job->ordered_buffers[i].mode)
			
 
				+
			
 
				+#define _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(job, handle, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].handle = (handle); else job->ordered_buffers[i].handle = (handle);} while(0)
			
 
				+#define _STARPU_JOB_SET_ORDERED_BUFFER_MODE(job, mode, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].mode = mode; else job->ordered_buffers[i].mode = mode;} while(0)
			
 
				+
			
 
				+#define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0)
			
 
				+#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) (job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : job->ordered_buffers
			
 
				+
			
 
				 #endif // __JOBS_H__
			
--- a/src/core/parallel_task.c
+++ b/src/core/parallel_task.c
@@ -19,15 +19,38 @@
 
				 #include <core/jobs.h>
			
 
				 #include <core/task.h>
			
 
				 #include <common/utils.h>
			
 
				+#include <core/workers.h>
			
 
				+#include <common/barrier.h>
			
 
				 
			
 
				-struct starpu_task *_starpu_create_task_alias(struct starpu_task *task)
			
 
				+struct starpu_task *starpu_task_dup(struct starpu_task *task)
			
 
				 {
			
 
				 	struct starpu_task *task_dup = (struct starpu_task *) malloc(sizeof(struct starpu_task));
			
 
				 	STARPU_ASSERT(task_dup);
			
 
				 
			
 
				-	/* XXX perhaps this is a bit too much overhead and we should only copy
			
 
				+	/* TODO perhaps this is a bit too much overhead and we should only copy
			
 
				 	 * part of the structure ? */
			
 
				 	memcpy(task_dup, task, sizeof(struct starpu_task));
			
 
				 
			
 
				 	return task_dup;
			
 
				 }
			
 
				+
			
 
				+void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid)
			
 
				+{
			
 
				+	/* The master needs to dispatch the task between the
			
 
				+	 * different combined workers */
			
 
				+	struct _starpu_combined_worker *combined_worker =  _starpu_get_combined_worker_struct(workerid);
			
 
				+	int worker_size = combined_worker->worker_size;
			
 
				+
			
 
				+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				+	j->task_size = worker_size;
			
 
				+	j->combined_workerid = workerid;
			
 
				+	j->active_task_alias_count = 0;
			
 
				+
			
 
				+	//fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size);
			
 
				+
			
 
				+	_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
			
 
				+	_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
--- a/src/core/parallel_task.h
+++ b/src/core/parallel_task.h
@@ -1,24 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#ifndef __PARALLEL_TASK_H__
			
 
				-#define __PARALLEL_TASK_H__
			
 
				-
			
 
				-#include <starpu.h>
			
 
				-
			
 
				-struct starpu_task *_starpu_create_task_alias(struct starpu_task *task);
			
 
				-
			
 
				-#endif /* __PARALLEL_TASK_H__ */
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -227,7 +227,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 		starpu_data_handle_t handle;
			
 
				 		struct starpu_task *conversion_task;
			
 
				 
			
 
				-		handle = task->handles[i];
			
 
				+		handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 		if (!_starpu_data_is_multiformat_handle(handle))
			
 
				 			continue;
			
 
				 
			
@@ -287,8 +287,8 @@ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct star
 
				 
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		starpu_data_handle_t handle = task->handles[buffer];
			
 
				-		enum starpu_access_mode mode = task->cl->modes[buffer];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
			
 
				 
			
 
				 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
			
 
				 	}
			
@@ -375,8 +375,8 @@ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundl
 
				 			unsigned b;
			
 
				 			for (b = 0; b < task->cl->nbuffers; b++)
			
 
				 			{
			
 
				-				starpu_data_handle_t handle = task->handles[b];
			
 
				-				enum starpu_access_mode mode = task->cl->modes[b];
			
 
				+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, b);
			
 
				+				enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, b);
			
 
				 
			
 
				 				if (!(mode & STARPU_R))
			
 
				 					continue;
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -72,7 +72,7 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_per
 
				 		unsigned buffer;
			
 
				 		for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 		{
			
 
				-			starpu_data_handle_t handle = task->handles[buffer];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
			
 
				 			size += _starpu_data_get_size(handle);
			
 
				 		}
			
 
				 		return size;
			
@@ -1267,7 +1267,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
				 
			
 
				 		for (i = 0; i < task->cl->nbuffers; i++)
			
 
				 		{
			
 
				-			starpu_data_handle_t handle = task->handles[i];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 
			
 
				 			STARPU_ASSERT(handle->ops);
			
 
				 			STARPU_ASSERT(handle->ops->display);
			
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -885,7 +885,7 @@ int starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu
 
				 	return npus;
			
 
				 }
			
 
				 
			
 
				-starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id)
			
 
				+starpu_pthread_mutex_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id)
			
 
				 {
			
 
				 	return &changing_ctx_mutex[sched_ctx_id];
			
 
				 }
			
--- a/src/core/sched_ctx.h
+++ b/src/core/sched_ctx.h
@@ -144,6 +144,9 @@ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker
 
				 /* Check if the worker belongs to another sched_ctx */
			
 
				 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
			
 
				 
			
 
				+/* mutex synchronising several simultaneous modifications of a context */
			
 
				+starpu_pthread_mutex_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
			
 
				+
			
 
				 #ifdef STARPU_USE_SC_HYPERVISOR
			
 
				 /* Notifies the hypervisor that a tasks was poped from the workers' list */
			
 
				 void _starpu_sched_ctx_call_poped_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -23,7 +23,6 @@
 
				 #include <profiling/profiling.h>
			
 
				 #include <common/barrier.h>
			
 
				 #include <core/debug.h>
			
 
				-#include <core/parallel_task.h>
			
 
				 
			
 
				 static int use_prefetch = 0;
			
 
				 
			
@@ -236,7 +235,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 				struct starpu_task *conversion_task;
			
 
				 				starpu_data_handle_t handle;
			
 
				 
			
 
				-				handle = task->handles[i];
			
 
				+				handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 				if (!_starpu_handle_needs_conversion_task(handle, node))
			
 
				 					continue;
			
 
				 
			
@@ -249,7 +248,10 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 			}
			
 
				 
			
 
				 			for (i = 0; i < task->cl->nbuffers; i++)
			
 
				-				task->handles[i]->mf_node = node;
			
 
				+			{
			
 
				+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				+				handle->mf_node = node;
			
 
				+			}
			
 
				 		}
			
 
				 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
			
 
				 
			
@@ -281,7 +283,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 
				 		int j;
			
 
				 		for (j = 0; j < worker_size; j++)
			
 
				 		{
			
 
				-			struct starpu_task *alias = _starpu_create_task_alias(task);
			
 
				+			struct starpu_task *alias = starpu_task_dup(task);
			
 
				 
			
 
				 			worker = _starpu_get_worker_struct(combined_workerid[j]);
			
 
				 			ret |= _starpu_push_local_task(worker, alias, 0);
			
@@ -396,7 +398,13 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
				 	else
			
 
				 	{
			
 
				 		STARPU_ASSERT(sched_ctx->sched_policy->push_task);
			
 
				-		ret = sched_ctx->sched_policy->push_task(task);
			
 
				+		/* check out if there are any workers in the context */
			
 
				+		starpu_pthread_mutex_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
			
 
				+		_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				+		nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id);
			
 
				+		ret = nworkers == 0 ? -1 : sched_ctx->sched_policy->push_task(task);
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				+
			
 
				 		if(ret == -1)
			
 
				 		{
			
 
				 			fprintf(stderr, "repush task \n");
			
@@ -441,7 +449,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
				 
			
 
				 	conversion_task = starpu_task_create();
			
 
				 	conversion_task->synchronous = 0;
			
 
				-	conversion_task->handles[0] = handle;
			
 
				+	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
			
 
				 
			
 
				 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
			
 
				 	/* The node does not really matter here */
			
@@ -504,7 +512,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
				 		STARPU_ABORT();
			
 
				 	}
			
 
				 
			
 
				-	conversion_task->cl->modes[0] = STARPU_RW;
			
 
				+	STARPU_CODELET_SET_MODE(conversion_task->cl, STARPU_RW, 0);
			
 
				 	return conversion_task;
			
 
				 }
			
 
				 
			
@@ -657,7 +665,7 @@ pick:
 
				 		struct starpu_task *conversion_task;
			
 
				 		starpu_data_handle_t handle;
			
 
				 
			
 
				-		handle = task->handles[i];
			
 
				+		handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 		if (!_starpu_handle_needs_conversion_task(handle, node))
			
 
				 			continue;
			
 
				 		conversion_task = _starpu_create_conversion_task(handle, node);
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -77,6 +77,11 @@ void starpu_task_init(struct starpu_task *task)
 
				 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
			
 
				 
			
 
				 	task->flops = 0.0;
			
 
				+
			
 
				+	task->scheduled = 0;
			
 
				+
			
 
				+	task->dyn_handles = NULL;
			
 
				+	task->dyn_interfaces = NULL;
			
 
				 }
			
 
				 
			
 
				 /* Free all the ressources allocated for a task, without deallocating the task
			
@@ -99,6 +104,14 @@ void starpu_task_clean(struct starpu_task *task)
 
				 	if (bundle)
			
 
				 		starpu_task_bundle_remove(bundle, task);
			
 
				 
			
 
				+	if (task->dyn_handles)
			
 
				+	{
			
 
				+		free(task->dyn_handles);
			
 
				+		task->dyn_handles = NULL;
			
 
				+		free(task->dyn_interfaces);
			
 
				+		task->dyn_interfaces = NULL;
			
 
				+	}
			
 
				+
			
 
				 	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
			
 
				 
			
 
				 	if (j)
			
@@ -229,7 +242,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 
				 		unsigned i;
			
 
				 		for (i=0; i<task->cl->nbuffers; i++)
			
 
				 		{
			
 
				-			starpu_data_handle_t handle = task->handles[i];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 			_starpu_spin_lock(&handle->header_lock);
			
 
				 			handle->busy_count++;
			
 
				 			_starpu_spin_unlock(&handle->header_lock);
			
@@ -393,16 +406,23 @@ int starpu_task_submit(struct starpu_task *task)
 
				 		unsigned i;
			
 
				 
			
 
				 		/* Check buffers */
			
 
				-		STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
			
 
				+		if (task->dyn_handles == NULL)
			
 
				+			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
			
 
				+
			
 
				+		if (task->dyn_handles)
			
 
				+		{
			
 
				+			task->dyn_interfaces = malloc(task->cl->nbuffers * sizeof(void *));
			
 
				+		}
			
 
				+
			
 
				 		for (i = 0; i < task->cl->nbuffers; i++)
			
 
				 		{
			
 
				-			starpu_data_handle_t handle = task->handles[i];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 			/* Make sure handles are not partitioned */
			
 
				 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
			
 
				 			/* Provide the home interface for now if any,
			
 
				 			 * for can_execute hooks */
			
 
				 			if (handle->home_node != -1)
			
 
				-				task->interfaces[i] = starpu_data_get_interface_on_node(task->handles[i], handle->home_node);
			
 
				+				_STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
			
 
				 		}
			
 
				 
			
 
				 		/* Check the type of worker(s) required by the task exist */
			
@@ -526,8 +546,10 @@ int _starpu_task_submit_nodeps(struct starpu_task *task)
 
				 		unsigned i;
			
 
				 		for (i=0 ; i<task->cl->nbuffers ; i++)
			
 
				 		{
			
 
				-			j->ordered_buffers[i].handle = j->task->handles[i];
			
 
				-			j->ordered_buffers[i].mode = j->task->cl->modes[i];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
			
 
				+			_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
			
 
				+			enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
			
 
				+			_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -559,7 +581,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
				 	unsigned i;
			
 
				 	for (i=0; i<task->cl->nbuffers; i++)
			
 
				 	{
			
 
				-		starpu_data_handle_t handle = task->handles[i];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
			
 
				 		_starpu_spin_lock(&handle->header_lock);
			
 
				 		handle->busy_count++;
			
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
@@ -574,8 +596,10 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
				 
			
 
				 	for (i=0 ; i<task->cl->nbuffers ; i++)
			
 
				 	{
			
 
				-		j->ordered_buffers[i].handle = j->task->handles[i];
			
 
				-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
			
 
				+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
			
 
				+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
			
 
				 	}
			
 
				 
			
 
				         _STARPU_LOG_IN();
			
@@ -604,7 +628,7 @@ void starpu_codelet_init(struct starpu_codelet *cl)
 
				 	memset(cl, 0, sizeof(struct starpu_codelet));
			
 
				 }
			
 
				 
			
 
				-void starpu_display_codelet_stats(struct starpu_codelet *cl)
			
 
				+void starpu_codelet_display_stats(struct starpu_codelet *cl)
			
 
				 {
			
 
				 	unsigned worker;
			
 
				 	unsigned nworkers = starpu_worker_get_count();
			
@@ -811,7 +835,7 @@ _starpu_task_uses_multiformat_handles(struct starpu_task *task)
 
				 	unsigned i;
			
 
				 	for (i = 0; i < task->cl->nbuffers; i++)
			
 
				 	{
			
 
				-		if (_starpu_data_is_multiformat_handle(task->handles[i]))
			
 
				+		if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
			
 
				 			return 1;
			
 
				 	}
			
 
				 
			
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -73,4 +73,7 @@ starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet
 
				 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				 
			
 
				+#define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
			
 
				+#define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
			
 
				+
			
 
				 #endif // __CORE_TASK_H__
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -1437,3 +1437,10 @@ starpu_driver_deinit(struct starpu_driver *d)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+void starpu_get_version(int *major, int *minor, int *release)
			
 
				+{
			
 
				+	*major = STARPU_MAJOR_VERSION;
			
 
				+	*minor = STARPU_MINOR_VERSION;
			
 
				+	*release = STARPU_RELEASE_VERSION;
			
 
				+}
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -22,6 +22,7 @@
 
				 #include <core/dependencies/data_concurrency.h>
			
 
				 #include <profiling/profiling.h>
			
 
				 #include <math.h>
			
 
				+#include <core/task.h>
			
 
				 
			
 
				 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
			
 
				 unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
			
@@ -591,8 +592,8 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 
				 
			
 
				 	for (index = 0; index < nbuffers; index++)
			
 
				 	{
			
 
				-		starpu_data_handle_t handle = task->handles[index];
			
 
				-		enum starpu_access_mode mode = task->cl->modes[index];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
			
 
				 
			
 
				 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
			
 
				 			continue;
			
@@ -624,7 +625,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 
				 	if (profiling && task->profiling_info)
			
 
				 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
			
 
				 
			
 
				-	struct starpu_buffer_descr *descrs = j->ordered_buffers;
			
 
				+	struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
			
 
				 	unsigned nbuffers = task->cl->nbuffers;
			
 
				 
			
 
				 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
			
@@ -656,14 +657,14 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 
				 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
			
 
				 	for (index = 0; index < nbuffers; index++)
			
 
				 	{
			
 
				-		starpu_data_handle_t handle = task->handles[index];
			
 
				-		enum starpu_access_mode mode = task->cl->modes[index];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
			
 
				 
			
 
				 		struct _starpu_data_replicate *local_replicate;
			
 
				 
			
 
				 		local_replicate = get_replicate(handle, mode, workerid, local_memory_node);
			
 
				 
			
 
				-		task->interfaces[index] = local_replicate->data_interface;
			
 
				+		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
			
 
				 
			
 
				 		if (mode & STARPU_REDUX)
			
 
				 		{
			
@@ -699,7 +700,7 @@ void _starpu_push_task_output(struct _starpu_job *j, uint32_t mask)
 
				 	if (profiling && task->profiling_info)
			
 
				 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
			
 
				 
			
 
				-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
			
 
				+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
			
 
				         unsigned nbuffers = task->cl->nbuffers;
			
 
				 
			
 
				 	int workerid = starpu_worker_get_id();
			
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -305,7 +305,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 
				 				.nbuffers = 1
			
 
				 			};
			
 
				 			struct starpu_task *task = starpu_task_create();
			
 
				-			task->handles[0] = child_handle;
			
 
				+			STARPU_TASK_SET_HANDLE(task, child_handle, 0);
			
 
				 			task->cl = &cl;
			
 
				 			task->synchronous = 1;
			
 
				 			if (_starpu_task_submit_internally(task) != 0)
			
--- a/src/datawizard/footprint.c
+++ b/src/datawizard/footprint.c
@@ -43,7 +43,7 @@ uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum
 
				 	{
			
 
				 		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
			
 
				 		{
			
 
				-			starpu_data_handle_t handle = task->handles[buffer];
			
 
				+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
			
 
				 
			
 
				 			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
			
 
				 
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -217,16 +217,16 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 
			
 
				 					redux_task->cl = handle->redux_cl;
			
 
				 					STARPU_ASSERT(redux_task->cl);
			
 
				-					if (!redux_task->cl->modes[0])
			
 
				-						redux_task->cl->modes[0] = STARPU_RW;
			
 
				-					if (!redux_task->cl->modes[1])
			
 
				-						redux_task->cl->modes[1] = STARPU_R;
			
 
				+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
			
 
				+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0);
			
 
				+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1)))
			
 
				+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
			
 
				 
			
 
				-					STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
			
 
				-					STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
			
 
				+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet has to be RW");
			
 
				+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet has to be R");
			
 
				 
			
 
				-					redux_task->handles[0] = replicate_array[i];
			
 
				-					redux_task->handles[1] = replicate_array[i+step];
			
 
				+					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0);
			
 
				+					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1);
			
 
				 
			
 
				 					int ndeps = 0;
			
 
				 					struct starpu_task *task_deps[2];
			
@@ -278,10 +278,12 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 
			
 
				 			redux_task->cl = handle->init_cl;
			
 
				 			STARPU_ASSERT(redux_task->cl);
			
 
				-			if (!redux_task->cl->modes[0])
			
 
				-				redux_task->cl->modes[0] = STARPU_W;
			
 
				-			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_W, "Parameter of initialization codelet has to be W");
			
 
				-			redux_task->handles[0] = handle;
			
 
				+
			
 
				+			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
			
 
				+				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_W, 0);
			
 
				+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_W, "Parameter of initialization codelet has to be W");
			
 
				+
			
 
				+			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
			
 
				 
			
 
				 			int ret = _starpu_task_submit_internally(redux_task);
			
 
				 			STARPU_ASSERT(!ret);
			
@@ -311,8 +313,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
				 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
			
 
				 			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
			
 
				 
			
 
				-			redux_task->handles[0] = handle;
			
 
				-			redux_task->handles[1] = replicate_array[replicate];
			
 
				+			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
			
 
				+			STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1);
			
 
				 
			
 
				 			int ret = _starpu_task_submit_internally(redux_task);
			
 
				 			STARPU_ASSERT(!ret);
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -197,6 +197,12 @@ static char *memnode_container_alias(char *output, int len, const char *prefix,
 
				 	return output;
			
 
				 }
			
 
				 
			
 
				+static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
			
 
				+{
			
 
				+	snprintf(output, len, "%smm%"PRIu64"", prefix, memnodeid);
			
 
				+	return output;
			
 
				+}
			
 
				+
			
 
				 static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid)
			
 
				 {
			
 
				 	snprintf(output, len, "%st%"PRIu64"", prefix, threadid);
			
@@ -232,10 +238,10 @@ static void memnode_set_state(double time, const char *prefix, unsigned int memn
 
				 {
			
 
				 #ifdef STARPU_HAVE_POTI
			
 
				 	char container[STARPU_POTI_STR_LEN];
			
 
				-	memnode_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid);
			
 
				+	memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid);
			
 
				 	poti_SetState(time, container, "MS", name);
			
 
				 #else
			
 
				-	fprintf(out_paje_file, "10	%.9f	%smn%u	MS	%s\n", time, prefix, memnodeid, name);
			
 
				+	fprintf(out_paje_file, "10	%.9f	%smm%u	MS	%s\n", time, prefix, memnodeid, name);
			
 
				 #endif
			
 
				 }
			
 
				 
			
@@ -280,15 +286,21 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
				 		/* TODO: ramkind */
			
 
				 		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMNODE%"PRIu64"", prefix, ev->param[0]);
			
 
				 		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mn", program_container, new_memnode_container_name);
			
 
				+
			
 
				+		memmanager_container_alias (new_memnode_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]);
			
 
				+		/* TODO: ramkind */
			
 
				+		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMMANAGER%"PRIu64"", prefix, ev->param[0]);
			
 
				+		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mm", program_container, new_memnode_container_name);
			
 
				 #else
			
 
				 		fprintf(out_paje_file, "7	%.9f	%smn%"PRIu64"	Mn	%sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
			
 
				+		fprintf(out_paje_file, "7	%.9f	%smm%"PRIu64"	Mm	%sp	%sMEMMANAGER%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
			
 
				 #endif
			
 
				 
			
 
				 		if (!options->no_bus)
			
 
				 #ifdef STARPU_HAVE_POTI
			
 
				 			poti_SetVariable(get_event_time_stamp(ev, options), new_memnode_container_alias, "bw", 0.0);
			
 
				 #else
			
 
				-			fprintf(out_paje_file, "13	%.9f	%smn%"PRIu64"	bw	0.0\n", 0.0f, prefix, ev->param[0]);
			
 
				+			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	bw	0.0\n", 0.0f, prefix, ev->param[0]);
			
 
				 #endif
			
 
				 	}
			
 
				 }
			
@@ -703,10 +715,10 @@ static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
				 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
			
 
				 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
			
 
				 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
			
 
				-			memnode_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src);
			
 
				+			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src);
			
 
				 			poti_StartLink(time, program_container, "L", src_memnode_container, paje_value, paje_key);
			
 
				 #else
			
 
				-			fprintf(out_paje_file, "18	%.9f	L	%sp	%u	%smn%u	com_%u\n", time, prefix, size, prefix, src, comid);
			
 
				+			fprintf(out_paje_file, "18	%.9f	L	%sp	%u	%smm%u	com_%u\n", time, prefix, size, prefix, src, comid);
			
 
				 #endif
			
 
				 		}
			
 
				 
			
@@ -743,10 +755,10 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
				 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
			
 
				 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
			
 
				 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
			
 
				-			memnode_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
			
 
				+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
			
 
				 			poti_EndLink(time, program_container, "L", dst_memnode_container, paje_value, paje_key);
			
 
				 #else
			
 
				-			fprintf(out_paje_file, "19	%.9f	L	%sp	%u	%smn%u	com_%u\n", time, prefix, size, prefix, dst, comid);
			
 
				+			fprintf(out_paje_file, "19	%.9f	L	%sp	%u	%smm%u	com_%u\n", time, prefix, size, prefix, dst, comid);
			
 
				 #endif
			
 
				 		}
			
 
				 
			
@@ -1187,10 +1199,10 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 
				 		{
			
 
				 #ifdef STARPU_HAVE_POTI
			
 
				 			char src_memnode_container[STARPU_POTI_STR_LEN];
			
 
				-			memnode_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
			
 
				+			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
			
 
				 			poti_SetVariable(itor->comm_start, src_memnode_container, "bw", current_bandwidth_per_node[itor->src_node]);
			
 
				 #else
			
 
				-			fprintf(out_paje_file, "13	%.9f	%smn%u	bw	%f\n",
			
 
				+			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
			
 
				 				itor->comm_start, prefix, itor->src_node, current_bandwidth_per_node[itor->src_node]);
			
 
				 #endif
			
 
				 		}
			
@@ -1200,10 +1212,10 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 
				 		{
			
 
				 #ifdef STARPU_HAVE_POTI
			
 
				 			char dst_memnode_container[STARPU_POTI_STR_LEN];
			
 
				-			memnode_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
			
 
				+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
			
 
				 			poti_SetVariable(itor->comm_start, dst_memnode_container, "bw", current_bandwidth_per_node[itor->dst_node]);
			
 
				 #else
			
 
				-			fprintf(out_paje_file, "13	%.9f	%smn%u	bw	%f\n",
			
 
				+			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
			
 
				 				itor->comm_start, prefix, itor->dst_node, current_bandwidth_per_node[itor->dst_node]);
			
 
				 #endif
			
 
				 		}
			
--- a/src/debug/traces/starpu_paje.c
+++ b/src/debug/traces/starpu_paje.c
@@ -137,13 +137,14 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 	poti_DefineContainerType("P", "MPIP", "Program");
			
 
				 	poti_DefineContainerType("Mn", "P", "Memory Node");
			
 
				 	poti_DefineContainerType("T", "Mn", "Thread");
			
 
				+	poti_DefineContainerType("Mm", "Mn", "Memory Manager");
			
 
				 	poti_DefineContainerType("W", "T", "Worker");
			
 
				 	poti_DefineContainerType("MPICt", "T", "MPI Communication Thread");
			
 
				 	poti_DefineContainerType("Sc", "P", "Scheduler");
			
 
				 
			
 
				 	/* Types for the memory node */
			
 
				-	poti_DefineVariableType("bw", "Mn", "Bandwidth", "0 0 0");
			
 
				-	poti_DefineStateType("MS", "Mn", "Memory Node State");
			
 
				+	poti_DefineVariableType("bw", "Mm", "Bandwidth", "0 0 0");
			
 
				+	poti_DefineStateType("MS", "Mm", "Memory Node State");
			
 
				 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
			
 
				 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
			
 
				 	poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .4");
			
@@ -196,7 +197,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 
			
 
				 	/* Link types */
			
 
				 	poti_DefineLinkType("MPIL", "P", "MPICt", "MPICt", "Links between two MPI Communication Threads");
			
 
				-	poti_DefineLinkType("L", "P", "Mn", "Mn", "Links between two Memory Nodes");
			
 
				+	poti_DefineLinkType("L", "P", "Mm", "Mm", "Links between two Memory Managers");
			
 
				 
			
 
				 	/* Creating the MPI Program */
			
 
				 	poti_CreateContainer(0, "MPIroot", "MPIP", "0", "root");
			
@@ -206,6 +207,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 1       P      MPIP       \"Program\"                      	\n\
			
 
				 1       Mn      P       \"Memory Node\"                         \n\
			
 
				 1       T      Mn       \"Thread\"                               \n\
			
 
				+1       Mm      Mn       \"Memory Manager\"                         \n\
			
 
				 1       W      T       \"Worker\"                               \n\
			
 
				 1       MPICt   T       \"MPI Communication Thread\"              \n\
			
 
				 1       Sc       P       \"Scheduler State\"                        \n\
			
@@ -216,9 +218,9 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 	for (i=1; i<=10; i++)
			
 
				 		fprintf(file, "3       Ctx%u      T     \"InCtx%u\"         		\n", i, i);
			
 
				 	fprintf(file, "\
			
 
				-3       MS       Mn       \"Memory Node State\"                        \n\
			
 
				+3       MS       Mm       \"Memory Node State\"                        \n\
			
 
				 4       ntask    Sc       \"Number of tasks\"                        \n\
			
 
				-4       bw      Mn       \"Bandwidth\"                        \n\
			
 
				+4       bw      Mm       \"Bandwidth\"                        \n\
			
 
				 6       I       S      Initializing       \"0.0 .7 1.0\"            \n\
			
 
				 6       D       S      Deinitializing       \"0.0 .1 .7\"            \n\
			
 
				 6       Fi       S      FetchingInput       \"1.0 .1 1.0\"            \n\
			
@@ -255,7 +257,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
				 6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n\
			
 
				 6       No       MS     Nothing         \".0 .0 .0\"		\n\
			
 
				 5       MPIL     P	MPICt	MPICt   MPIL			\n\
			
 
				-5       L       P	Mn	Mn      L\n");
			
 
				+5       L       P	Mm	Mm      L\n");
			
 
				 
			
 
				 	fprintf(file, "7      0.0 MPIroot      MPIP      0       root\n");
			
 
				 #endif
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -158,7 +158,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 
				 #ifdef STARPU_SIMGRID
			
 
				 		_starpu_simgrid_execute_job(j, perf_arch, NAN);
			
 
				 #else
			
 
				-		func(task->interfaces, task->cl_arg);
			
 
				+		func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
			
 
				 #endif
			
 
				 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
			
 
				 			/* rebind to single CPU */
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -353,7 +353,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 
				 #ifdef STARPU_SIMGRID
			
 
				 	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
			
 
				 #else
			
 
				-	func(task->interfaces, task->cl_arg);
			
 
				+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
			
 
				 #endif
			
 
				 
			
 
				 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -102,7 +102,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 	unsigned nbuffers = cl->nbuffers;
			
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				-		enum starpu_access_mode mode = cl->modes[buffer];
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
			
 
				 
			
 
				 		switch (mode)
			
 
				 		{
			
@@ -122,7 +122,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 	for (buffer = 0; buffer < nbuffers; buffer++)
			
 
				 	{
			
 
				 		unsigned gordon_buffer;
			
 
				-		enum starpu_access_mode mode = cl->modes[buffer];
			
 
				+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
			
 
				 
			
 
				 		switch (mode)
			
 
				 		{
			
@@ -138,7 +138,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 
				 				break;
			
 
				 		}
			
 
				 
			
 
				-		starpu_data_handle_t handle = task->handles[buffer];
			
 
				+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
			
 
				 
			
 
				 		gordon_job->nalloc = 0;
			
 
				 		gordon_job->nin = nin;
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -824,7 +824,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
				 #ifdef STARPU_SIMGRID
			
 
				 	double length = NAN;
			
 
				   #ifdef STARPU_OPENCL_SIMULATOR
			
 
				-	func(task->interfaces, task->cl_arg);
			
 
				+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
			
 
				     #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
			
 
				       #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
			
 
				         #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
			
@@ -838,7 +838,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 
				   #endif
			
 
				 	_starpu_simgrid_execute_job(j, args->perf_arch, length);
			
 
				 #else
			
 
				-	func(task->interfaces, task->cl_arg);
			
 
				+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
			
 
				 #endif
			
 
				 
			
 
				 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
			
--- a/src/profiling/bound.c
+++ b/src/profiling/bound.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				- * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -501,10 +501,16 @@ void starpu_bound_print_lp(FILE *output)
 
				 		}
			
 
				 		fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
			
 
				 		fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
			
 
				+
			
 
				 		fprintf(output, "/* We want to minimize total execution time (ms) */\n");
			
 
				 		fprintf(output, "min: tmax;\n\n");
			
 
				 
			
 
				-		fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
			
 
				+		fprintf(output, "/* Number of tasks */\n");
			
 
				+		fprintf(output, "nt = %d;\n", nt);
			
 
				+		fprintf(output, "/* Number of workers */\n");
			
 
				+		fprintf(output, "nw = %d;\n", nw);
			
 
				+
			
 
				+		fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n");
			
 
				 		for (t1 = tasks; t1; t1 = t1->next)
			
 
				 			fprintf(output, "c%lu <= tmax;\n", t1->id);
			
 
				 
			
@@ -836,12 +842,12 @@ void starpu_bound_print_mps(FILE *output)
 
				 
			
 
				 		fprintf(output, "NAME           StarPU theoretical bound\n");
			
 
				 
			
 
				-		fprintf(output, "\nROWS\n");
			
 
				+		fprintf(output, "*\nROWS\n");
			
 
				 
			
 
				 		fprintf(output, "* We want to minimize total execution time (ms)\n");
			
 
				 		fprintf(output, " N  TMAX\n");
			
 
				 
			
 
				-		fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
			
 
				+		fprintf(output, "* Which is the maximum of all worker execution times (ms)\n");
			
 
				 		for (w = 0; w < nw; w++)
			
 
				 		{
			
 
				 			char name[32];
			
@@ -850,36 +856,36 @@ void starpu_bound_print_mps(FILE *output)
 
				 			fprintf(output, " L  W%d\n", w);
			
 
				 		}
			
 
				 
			
 
				-		fprintf(output, "\n* And we have to have computed exactly all tasks\n");
			
 
				+		fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n");
			
 
				 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				 		{
			
 
				 			fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
			
 
				 			fprintf(output, " E  T%d\n", t);
			
 
				 		}
			
 
				 
			
 
				-		fprintf(output, "\nCOLUMNS\n");
			
 
				+		fprintf(output, "*\nCOLUMNS\n*\n");
			
 
				 
			
 
				-		fprintf(output, "\n* Execution times and completion of all tasks\n");
			
 
				+		fprintf(output, "*\n* Execution times and completion of all tasks\n*\n");
			
 
				 		for (w = 0; w < nw; w++)
			
 
				 			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				 				if (!isnan(times[w*nt+t]))
			
 
				 				{
			
 
				 					char name[9];
			
 
				 					snprintf(name, sizeof(name), "W%dT%d", w, t);
			
 
				-					fprintf(stderr,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
			
 
				-					fprintf(stderr,"    %-8s  T%-7d  %12d\n", name, t, 1);
			
 
				+					fprintf(output,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
			
 
				+					fprintf(output,"    %-8s  T%-7d  %12d\n", name, t, 1);
			
 
				 				}
			
 
				 
			
 
				-		fprintf(output, "\n* Total execution time\n");
			
 
				+		fprintf(output, "*\n* Total execution time\n*\n");
			
 
				 		for (w = 0; w < nw; w++)
			
 
				-			fprintf(stderr,"    TMAX      W%-2d       %12d\n", w, -1);
			
 
				-		fprintf(stderr,"    TMAX      TMAX      %12d\n", 1);
			
 
				+			fprintf(output,"    TMAX      W%-2d       %12d\n", w, -1);
			
 
				+		fprintf(output,"    TMAX      TMAX      %12d\n", 1);
			
 
				 
			
 
				-		fprintf(output, "\nRHS\n");
			
 
				+		fprintf(output, "*\nRHS\n*\n");
			
 
				 
			
 
				-		fprintf(output, "\n* Total number of tasks\n");
			
 
				+		fprintf(output, "*\n* Total number of tasks\n*\n");
			
 
				 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
			
 
				-			fprintf(stderr,"    NT%-2d      T%-7d  %12lu\n", t, t, tp->n);
			
 
				+			fprintf(output,"    NT%-2d      T%-7d  %12lu\n", t, t, tp->n);
			
 
				 
			
 
				 		fprintf(output, "ENDATA\n");
			
 
				 	}
			
--- a/src/sched_policies/deque_modeling_policy_data_aware.c
+++ b/src/sched_policies/deque_modeling_policy_data_aware.c
@@ -27,11 +27,7 @@
 
				 #include <core/workers.h>
			
 
				 #include <sched_policies/fifo_queues.h>
			
 
				 #include <core/perfmodel/perfmodel.h>
			
 
				-#include <starpu_parameters.h>
			
 
				 #include <core/debug.h>
			
 
				-#ifdef STARPU_USE_TOP
			
 
				-#include <top/starpu_top_core.h>
			
 
				-#endif /* !STARPU_USE_TOP */
			
 
				 
			
 
				 #ifndef DBL_MIN
			
 
				 #define DBL_MIN __DBL_MIN__
			
@@ -54,12 +50,23 @@ struct _starpu_dmda_data
 
				 	long int ready_task_cnt;
			
 
				 };
			
 
				 
			
 
				-static double alpha = _STARPU_DEFAULT_ALPHA;
			
 
				-static double beta = _STARPU_DEFAULT_BETA;
			
 
				-static double _gamma = _STARPU_DEFAULT_GAMMA;
			
 
				 static double idle_power = 0.0;
			
 
				 
			
 
				+/* The dmda scheduling policy uses
			
 
				+ *
			
 
				+ * alpha * T_computation + beta * T_communication + gamma * Consumption
			
 
				+ *
			
 
				+ * Here are the default values of alpha, beta, gamma
			
 
				+ */
			
 
				+
			
 
				+#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
			
 
				+#define _STARPU_SCHED_BETA_DEFAULT 1.0
			
 
				+#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
			
 
				+
			
 
				 #ifdef STARPU_USE_TOP
			
 
				+static double alpha = _STARPU_SCHED_ALPHA_DEFAULT;
			
 
				+static double beta = _STARPU_SCHED_BETA_DEFAULT;
			
 
				+static double _gamma = _STARPU_SCHED_GAMMA_DEFAULT;
			
 
				 static const float alpha_minimum=0;
			
 
				 static const float alpha_maximum=10.0;
			
 
				 static const float beta_minimum=0;
			
@@ -80,7 +87,7 @@ static int count_non_ready_buffers(struct starpu_task *task, unsigned node)
 
				 	{
			
 
				 		starpu_data_handle_t handle;
			
 
				 
			
 
				-		handle = task->handles[index];
			
 
				+		handle = STARPU_TASK_GET_HANDLE(task, index);
			
 
				 
			
 
				 		int is_valid;
			
 
				 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
			
@@ -281,15 +288,10 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				 
			
 
				-/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				+        /* Sometimes workers didn't take the tasks as early as we expected */
			
 
				 	fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				-	if(!isnan(predicted))
			
 
				-	{
			
 
				-		fifo->exp_end += predicted;
			
 
				-		fifo->exp_len += predicted;
			
 
				-	}
			
 
				-	
			
 
				+
			
 
				 	if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
			
 
				 	{
			
 
				 		/* We may hope that the transfer will be finished by
			
@@ -309,16 +311,21 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 		fifo->exp_len += predicted_transfer;
			
 
				 	}
			
 
				 
			
 
				+	if(!isnan(predicted))
			
 
				+	{
			
 
				+		fifo->exp_end += predicted;
			
 
				+		fifo->exp_len += predicted;
			
 
				+	}
			
 
				+
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 
			
 
				 	task->predicted = predicted;
			
 
				 	task->predicted_transfer = predicted_transfer;
			
 
				 
			
 
				 #ifdef STARPU_USE_TOP
			
 
				-	if (_starpu_top_status_get())
			
 
				-		_starpu_top_task_prevision(task, best_workerid,
			
 
				-			(unsigned long long)(fifo->exp_end-predicted)/1000,
			
 
				-			(unsigned long long)fifo->exp_end/1000);
			
 
				+	starpu_top_task_prevision(task, best_workerid,
			
 
				+				  (unsigned long long)(fifo->exp_end-predicted)/1000,
			
 
				+				  (unsigned long long)fifo->exp_end/1000);
			
 
				 #endif /* !STARPU_USE_TOP */
			
 
				 
			
 
				 	if (starpu_get_prefetch_flag())
			
@@ -388,6 +395,17 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
				 		unsigned memory_node = starpu_worker_get_memory_node(worker);
			
 
				 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
			
 
				 
			
 
				+		/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				+		starpu_pthread_mutex_t *sched_mutex;
			
 
				+		starpu_pthread_cond_t *sched_cond;
			
 
				+		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
			
 
				+
			
 
				+		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				+		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				+		fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				+
			
 
				+
			
 
				 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 		{
			
 
				 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
			
@@ -398,27 +416,40 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 
				 			}
			
 
				 
			
 
				 			double exp_end;
			
 
				-			starpu_pthread_mutex_t *sched_mutex;
			
 
				-			starpu_pthread_cond_t *sched_cond;
			
 
				-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
			
 
				-
			
 
				-			/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				-			fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				-			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				-
			
 
				-
			
 
				 			double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
			
 
				 			double local_penalty = starpu_task_expected_data_transfer_time(memory_node, task);
			
 
				 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
			
 
				 
			
 
				 			//_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
			
 
				 
			
 
				+			/*
			
 
				+			 * This implements a default greedy scheduler for the
			
 
				+			 * case of tasks which have no performance model, or
			
 
				+			 * whose performance model is not calibrated yet.
			
 
				+			 *
			
 
				+			 * It simply uses the number of tasks already pushed to
			
 
				+			 * the workers, divided by the relative performance of
			
 
				+			 * a CPU and of a GPU.
			
 
				+			 *
			
 
				+			 * This is always computed, but the ntasks_best
			
 
				+			 * selection is only really used if the task indeed has
			
 
				+			 * no performance model, or is not calibrated yet.
			
 
				+			 */
			
 
				 			if (ntasks_best == -1
			
 
				-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
			
 
				-			    || (!calibrating && isnan(local_length)) /* Not calibrating but this worker is being calibrated */
			
 
				-			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				+			
			
 
				+			    /* Always compute the greedy decision, at least for
			
 
				+			     * the tasks with no performance model. */
			
 
				+			    || (!calibrating && ntasks_end < ntasks_best_end)
			
 
				+
			
 
				+			    /* The performance model of this task is not
			
 
				+			     * calibrated on this worker, try to run it there
			
 
				+			     * to calibrate it there. */
			
 
				+			    || (!calibrating && isnan(local_length))
			
 
				+
			
 
				+			    /* the performance model of this task is not
			
 
				+			     * calibrated on this worker either, rather run it
			
 
				+			     * there if this one is low on scheduled tasks. */
			
 
				+			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end)
			
 
				 				)
			
 
				 			{
			
 
				 				ntasks_best_end = ntasks_end;
			
@@ -509,6 +540,15 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
			
 
				 		unsigned memory_node = starpu_worker_get_memory_node(worker);
			
 
				 
			
 
				+		/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				+		starpu_pthread_mutex_t *sched_mutex;
			
 
				+		starpu_pthread_cond_t *sched_cond;
			
 
				+		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
			
 
				+
			
 
				+		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				+		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				+
			
 
				 		for(nimpl  = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
			
 
				 	 	{
			
 
				 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
			
@@ -517,15 +557,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				-			/* Sometimes workers didn't take the tasks as early as we expected */
			
 
				-			starpu_pthread_mutex_t *sched_mutex;
			
 
				-			starpu_pthread_cond_t *sched_cond;
			
 
				-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
			
 
				-
			
 
				 			STARPU_ASSERT_MSG(fifo != NULL, "worker %d ctx %d\n", worker, sched_ctx_id);
			
 
				-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
			
 
				-			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 			exp_end[worker_ctx][nimpl] = fifo->exp_start + fifo->exp_len;
			
 
				 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
			
 
				 				max_exp_end = exp_end[worker_ctx][nimpl];
			
@@ -551,10 +583,34 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 
				 			
			
 
				 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
			
 
				 
			
 
				+			/*
			
 
				+			 * This implements a default greedy scheduler for the
			
 
				+			 * case of tasks which have no performance model, or
			
 
				+			 * whose performance model is not calibrated yet.
			
 
				+			 *
			
 
				+			 * It simply uses the number of tasks already pushed to
			
 
				+			 * the workers, divided by the relative performance of
			
 
				+			 * a CPU and of a GPU.
			
 
				+			 *
			
 
				+			 * This is always computed, but the ntasks_best
			
 
				+			 * selection is only really used if the task indeed has
			
 
				+			 * no performance model, or is not calibrated yet.
			
 
				+			 */
			
 
				 			if (ntasks_best == -1
			
 
				-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better worker */
			
 
				-			    || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) /* Not calibrating but this worker is being calibrated */
			
 
				-			    || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
			
 
				+
			
 
				+			    /* Always compute the greedy decision, at least for
			
 
				+			     * the tasks with no performance model. */
			
 
				+			    || (!calibrating && ntasks_end < ntasks_best_end)
			
 
				+
			
 
				+			    /* The performance model of this task is not
			
 
				+			     * calibrated on this worker, try to run it there
			
 
				+			     * to calibrate it there. */
			
 
				+			    || (!calibrating && isnan(local_task_length[worker_ctx][nimpl]))
			
 
				+
			
 
				+			    /* the performance model of this task is not
			
 
				+			     * calibrated on this worker either, rather run it
			
 
				+			     * there if this one is low on scheduled tasks. */
			
 
				+			    || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end)
			
 
				 				)
			
 
				 			{
			
 
				 				ntasks_best_end = ntasks_end;
			
@@ -722,64 +778,18 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 
				 #ifdef STARPU_DEVEL
			
 
				 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
			
 
				 #endif
			
 
				-	unsigned sched_ctx_id = task->sched_ctx;
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				-	int ret_val = -1;
			
 
				-
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-	if(nworkers == 0)
			
 
				-	{
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-		return ret_val;
			
 
				-	}
			
 
				-
			
 
				-	ret_val = _dmda_push_task(task, 1, sched_ctx_id);
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-	return ret_val;
			
 
				-
			
 
				+	return _dmda_push_task(task, 1, task->sched_ctx);
			
 
				 }
			
 
				 
			
 
				 static int dm_push_task(struct starpu_task *task)
			
 
				 {
			
 
				-	unsigned sched_ctx_id = task->sched_ctx;
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				-	int ret_val = -1;
			
 
				-
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-	if(nworkers == 0)
			
 
				-	{
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-		return ret_val;
			
 
				-	}
			
 
				-
			
 
				-	ret_val = _dm_push_task(task, 0, sched_ctx_id);
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-	return ret_val;
			
 
				+	return _dm_push_task(task, 0, task->sched_ctx);
			
 
				 }
			
 
				 
			
 
				 static int dmda_push_task(struct starpu_task *task)
			
 
				 {
			
 
				-	unsigned sched_ctx_id = task->sched_ctx;
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				-	int ret_val = -1;
			
 
				-
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-	if(nworkers == 0)
			
 
				-	{
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-		return ret_val;
			
 
				-	}
			
 
				-
			
 
				 	STARPU_ASSERT(task);
			
 
				-	ret_val = _dmda_push_task(task, 0, sched_ctx_id);
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-	return ret_val;
			
 
				+	return _dmda_push_task(task, 0, task->sched_ctx);
			
 
				 }
			
 
				 
			
 
				 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
			
@@ -820,9 +830,9 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 
				 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
			
 
				 
			
 
				 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)malloc(sizeof(struct _starpu_dmda_data));
			
 
				-	dt->alpha = _STARPU_DEFAULT_ALPHA;
			
 
				-	dt->beta = _STARPU_DEFAULT_BETA;
			
 
				-	dt->_gamma = _STARPU_DEFAULT_GAMMA;
			
 
				+	dt->alpha = _STARPU_SCHED_ALPHA_DEFAULT;
			
 
				+	dt->beta = _STARPU_SCHED_BETA_DEFAULT;
			
 
				+	dt->_gamma = _STARPU_SCHED_GAMMA_DEFAULT;
			
 
				 	dt->idle_power = 0.0;
			
 
				 
			
 
				 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt);
			
@@ -851,13 +861,13 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 
				 
			
 
				 #ifdef STARPU_USE_TOP
			
 
				 	starpu_top_register_parameter_float("DMDA_ALPHA", &alpha,
			
 
				-		alpha_minimum, alpha_maximum, param_modified);
			
 
				+					    alpha_minimum, alpha_maximum, param_modified);
			
 
				 	starpu_top_register_parameter_float("DMDA_BETA", &beta,
			
 
				-		beta_minimum, beta_maximum, param_modified);
			
 
				+					    beta_minimum, beta_maximum, param_modified);
			
 
				 	starpu_top_register_parameter_float("DMDA_GAMMA", &_gamma,
			
 
				-		gamma_minimum, gamma_maximum, param_modified);
			
 
				+					    gamma_minimum, gamma_maximum, param_modified);
			
 
				 	starpu_top_register_parameter_float("DMDA_IDLE_POWER", &idle_power,
			
 
				-		idle_power_minimum, idle_power_maximum, param_modified);
			
 
				+					    idle_power_minimum, idle_power_maximum, param_modified);
			
 
				 #endif /* !STARPU_USE_TOP */
			
 
				 }
			
 
				 
			
@@ -933,14 +943,6 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, unsign
 
				 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
			
 
				 
			
 
				 	/* If there is no prediction available, we consider the task has a null length */
			
 
				-	if (!isnan(predicted))
			
 
				-	{
			
 
				-		task->predicted = predicted;
			
 
				-		fifo->exp_end += predicted;
			
 
				-		fifo->exp_len += predicted;
			
 
				-	}
			
 
				-
			
 
				-	/* If there is no prediction available, we consider the task has a null length */
			
 
				 	if (!isnan(predicted_transfer))
			
 
				 	{
			
 
				 		if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
			
@@ -960,6 +962,14 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, unsign
 
				 		fifo->exp_len += predicted_transfer;
			
 
				 	}
			
 
				 
			
 
				+	/* If there is no prediction available, we consider the task has a null length */
			
 
				+	if (!isnan(predicted))
			
 
				+	{
			
 
				+		task->predicted = predicted;
			
 
				+		fifo->exp_end += predicted;
			
 
				+		fifo->exp_len += predicted;
			
 
				+	}
			
 
				+
			
 
				 	fifo->ntasks++;
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
--- a/src/sched_policies/deque_queues.c
+++ b/src/sched_policies/deque_queues.c
@@ -19,11 +19,10 @@
 
				 /* Deque queues, ready for use by schedulers */
			
 
				 
			
 
				 #include <starpu.h>
			
 
				-#include <common/config.h>
			
 
				-#include <core/workers.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				 #include <sched_policies/deque_queues.h>
			
 
				-#include <errno.h>
			
 
				-#include <common/utils.h>
			
 
				+
			
 
				+#include <core/workers.h>
			
 
				 
			
 
				 struct _starpu_deque_jobq *_starpu_create_deque(void)
			
 
				 {
			
--- a/src/sched_policies/deque_queues.h
+++ b/src/sched_policies/deque_queues.h
@@ -20,7 +20,6 @@
 
				 #define __DEQUE_QUEUES_H__
			
 
				 
			
 
				 #include <starpu.h>
			
 
				-#include <common/config.h>
			
 
				 #include <core/jobs.h>
			
 
				 
			
 
				 struct _starpu_deque_jobq
			
--- a/src/sched_policies/detect_combined_workers.c
+++ b/src/sched_policies/detect_combined_workers.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				- * Copyright (C) 2011, 2012       Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2012, 2013       Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -15,7 +15,6 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				-#include <common/config.h>
			
 
				 #include <starpu.h>
			
 
				 #include <common/utils.h>
			
 
				 #include <core/workers.h>
			
--- a/src/sched_policies/eager_central_policy.c
+++ b/src/sched_policies/eager_central_policy.c
@@ -21,8 +21,9 @@
 
				  *	JOB QUEUE.
			
 
				  */
			
 
				 
			
 
				-#include <core/workers.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				 #include <sched_policies/fifo_queues.h>
			
 
				+#include <common/thread.h>
			
 
				 
			
 
				 struct _starpu_eager_center_policy_data
			
 
				 {
			
@@ -63,18 +64,7 @@ static int push_task_eager_policy(struct starpu_task *task)
 
				  {
			
 
				 	unsigned sched_ctx_id = task->sched_ctx;
			
 
				 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				 	int ret_val = -1;
			
 
				-
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-	if(nworkers == 0)
			
 
				-	{
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-		return ret_val;
			
 
				-	}
			
 
				-
			
 
				 		
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
			
 
				 	ret_val = _starpu_fifo_push_task(data->fifo, task);
			
@@ -82,7 +72,6 @@ static int push_task_eager_policy(struct starpu_task *task)
 
				 	starpu_push_task_end(task);
			
 
				 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
			
 
				 
			
 
				-
			
 
				 	/*if there are no tasks block */
			
 
				 	/* wake people waiting for a task */
			
 
				 	unsigned worker = 0;
			
@@ -103,8 +92,6 @@ static int push_task_eager_policy(struct starpu_task *task)
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 	}
			
 
				 
			
 
				-		
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
--- a/src/sched_policies/eager_central_priority_policy.c
+++ b/src/sched_policies/eager_central_priority_policy.c
@@ -109,20 +109,8 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
				 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				 
			
 
				 	struct _starpu_priority_taskq *taskq = data->taskq;
			
 
				-
			
 
				-	/* if the context has no workers return */
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				 	int ret_val = -1;
			
 
				 	
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-	if(nworkers == 0)
			
 
				-	{
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-		return ret_val;
			
 
				-	}
			
 
				-
			
 
				 
			
 
				 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
			
 
				 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
			
@@ -153,7 +141,6 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 	}
			
 
				 
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/src/sched_policies/fifo_queues.h
+++ b/src/sched_policies/fifo_queues.h
@@ -20,8 +20,6 @@
 
				 #define __FIFO_QUEUES_H__
			
 
				 
			
 
				 #include <starpu.h>
			
 
				-#include <common/config.h>
			
 
				-#include <common/utils.h>
			
 
				 
			
 
				 struct _starpu_fifo_taskq
			
 
				 {
			
--- a/src/sched_policies/parallel_eager.c
+++ b/src/sched_policies/parallel_eager.c
@@ -15,12 +15,10 @@
 
				  *
			
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				-
			
 
				-#include <core/workers.h>
			
 
				 #include <sched_policies/fifo_queues.h>
			
 
				-#include <common/barrier.h>
			
 
				 #include <sched_policies/detect_combined_workers.h>
			
 
				-#include <core/parallel_task.h>
			
 
				+#include <starpu_scheduler.h>
			
 
				+#include <core/workers.h>
			
 
				 
			
 
				 struct _starpu_peager_data
			
 
				 {
			
@@ -28,12 +26,14 @@ struct _starpu_peager_data
 
				 	struct _starpu_fifo_taskq *local_fifo[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 	int master_id[STARPU_NMAXWORKERS];
			
 
				+        starpu_pthread_mutex_t policy_mutex;
			
 
				 };
			
 
				 
			
 
				+#define STARPU_NMAXCOMBINED_WORKERS 10
			
 
				 /* XXX instead of 10, we should use some "MAX combination .."*/
			
 
				 static int possible_combinations_cnt[STARPU_NMAXWORKERS];
			
 
				-static int possible_combinations[STARPU_NMAXWORKERS][10];
			
 
				-static int possible_combinations_size[STARPU_NMAXWORKERS][10];
			
 
				+static int possible_combinations[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
			
 
				+static int possible_combinations_size[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
			
 
				 
			
 
				 
			
 
				 /*!!!!!!! It doesn't work with several contexts because the combined workers are constructed
			
@@ -135,6 +135,7 @@ static void initialize_peager_policy(unsigned sched_ctx_id)
 
				 	data->fifo = _starpu_create_fifo();
			
 
				 
			
 
				 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
			
 
				+        _STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
			
 
				 }
			
 
				 
			
 
				 static void deinitialize_peager_policy(unsigned sched_ctx_id)
			
@@ -146,6 +147,7 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 
				 	_starpu_destroy_fifo(data->fifo);
			
 
				 
			
 
				 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
			
 
				+        _STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
			
 
				 
			
 
				 	free(data);
			
 
				 }
			
@@ -153,44 +155,24 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 
				 static int push_task_peager_policy(struct starpu_task *task)
			
 
				 {
			
 
				 	unsigned sched_ctx_id = task->sched_ctx;
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				 	int ret_val = -1;
			
 
				 	
			
 
				-	/* if the context has no workers return */
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-	
			
 
				-   	if(nworkers == 0)
			
 
				-	{
			
 
				-   		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-		return ret_val;
			
 
				-	}
			
 
				 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
			
 
				-	int worker = 0;
			
 
				-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
			
 
				-	
			
 
				-	struct starpu_sched_ctx_iterator it;
			
 
				-	if(workers->init_iterator)
			
 
				-		workers->init_iterator(workers, &it);
			
 
				-	
			
 
				-	while(workers->has_next(workers, &it))
			
 
				-	{
			
 
				-		worker = workers->get_next(workers, &it);
			
 
				-		int master = data->master_id[worker];
			
 
				-		/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
			
 
				-		if (starpu_worker_get_type(worker) != STARPU_CPU_WORKER  || master == worker)
			
 
				-		{
			
 
				-			starpu_pthread_mutex_t *sched_mutex;
			
 
				-			starpu_pthread_cond_t *sched_cond;
			
 
				-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
			
 
				-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				 	
			
 
				+	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
			
 
				 	ret_val = _starpu_fifo_push_task(data->fifo, task);
			
 
				 	starpu_push_task_end(task);
			
 
				+	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
			
 
				+
			
 
				+        /*if there are no tasks block */
			
 
				+        /* wake people waiting for a task */
			
 
				+        int worker = -1;
			
 
				+        struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
			
 
				+
			
 
				+        struct starpu_sched_ctx_iterator it;
			
 
				+        if(workers->init_iterator)
			
 
				+                workers->init_iterator(workers, &it);
			
 
				+
			
 
				 
			
 
				 	while(workers->has_next(workers, &it))
			
 
				 	{
			
@@ -202,12 +184,11 @@ static int push_task_peager_policy(struct starpu_task *task)
 
				 			starpu_pthread_mutex_t *sched_mutex;
			
 
				 			starpu_pthread_cond_t *sched_cond;
			
 
				 			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
			
 
				+			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
			
 
				 			_STARPU_PTHREAD_COND_SIGNAL(sched_cond);
			
 
				 			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 		}
			
 
				 	}
			
 
				-	
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				 
			
 
				 	return ret_val;
			
 
				 }
			
@@ -220,14 +201,24 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
				 
			
 
				 	/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
			
 
				 	if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER)
			
 
				-		return _starpu_fifo_pop_task(data->fifo, workerid);
			
 
				+	{
			
 
				+		struct starpu_task *task = NULL;
			
 
				+		_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
			
 
				+		task = _starpu_fifo_pop_task(data->fifo, workerid);
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
			
 
				+
			
 
				+		return task;
			
 
				+	}
			
 
				 
			
 
				 	int master = data->master_id[workerid];
			
 
				 
			
 
				 	if (master == workerid)
			
 
				 	{
			
 
				 		/* The worker is a master */
			
 
				-		struct starpu_task *task = _starpu_fifo_pop_task(data->fifo, workerid);
			
 
				+		struct starpu_task *task = NULL;
			
 
				+		_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
			
 
				+		task = _starpu_fifo_pop_task(data->fifo, workerid);
			
 
				+		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
			
 
				 
			
 
				 		if (!task)
			
 
				 			return NULL;
			
@@ -266,29 +257,17 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			/* The master needs to dispatch the task between the
			
 
				-			 * different combined workers */
			
 
				-			struct _starpu_combined_worker *combined_worker;
			
 
				-			combined_worker = _starpu_get_combined_worker_struct(best_workerid);
			
 
				-			int worker_size = combined_worker->worker_size;
			
 
				-			int *combined_workerid = combined_worker->combined_workerid;
			
 
				-
			
 
				-			struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				-			j->task_size = worker_size;
			
 
				-			j->combined_workerid = best_workerid;
			
 
				-			j->active_task_alias_count = 0;
			
 
				-
			
 
				-			//fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size);
			
 
				-
			
 
				-			_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
			
 
				-			_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
			
 
				+			starpu_parallel_task_barrier_init(task, best_workerid);
			
 
				+			int worker_size = 0;
			
 
				+			int *combined_workerid;
			
 
				+			starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid);
			
 
				 
			
 
				 			/* Dispatch task aliases to the different slaves */
			
 
				 			for (i = 1; i < worker_size; i++)
			
 
				 			{
			
 
				-				struct starpu_task *alias = _starpu_create_task_alias(task);
			
 
				+				struct starpu_task *alias = starpu_task_dup(task);
			
 
				 				int local_worker = combined_workerid[i];
			
 
				-				
			
 
				+
			
 
				 				starpu_pthread_mutex_t *sched_mutex;
			
 
				 				starpu_pthread_cond_t *sched_cond;
			
 
				 				starpu_worker_get_sched_condition(local_worker, &sched_mutex, &sched_cond);
			
@@ -303,7 +282,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
				 			}
			
 
				 
			
 
				 			/* The master also manipulated an alias */
			
 
				-			struct starpu_task *master_alias = _starpu_create_task_alias(task);
			
 
				+			struct starpu_task *master_alias = starpu_task_dup(task);
			
 
				 			return master_alias;
			
 
				 		}
			
 
				 	}
			
--- a/src/sched_policies/parallel_heft.c
+++ b/src/sched_policies/parallel_heft.c
@@ -23,9 +23,7 @@
 
				 #include <core/workers.h>
			
 
				 #include <core/perfmodel/perfmodel.h>
			
 
				 #include <starpu_parameters.h>
			
 
				-#include <common/barrier.h>
			
 
				 #include <sched_policies/detect_combined_workers.h>
			
 
				-#include <core/parallel_task.h>
			
 
				 
			
 
				 #ifndef DBL_MIN
			
 
				 #define DBL_MIN __DBL_MIN__
			
@@ -39,6 +37,14 @@
 
				 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
			
 
				 //static unsigned napplicable_perf_archtypes = 0;
			
 
				 
			
 
				+/*
			
 
				+ * Here are the default values of alpha, beta, gamma
			
 
				+ */
			
 
				+
			
 
				+#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
			
 
				+#define _STARPU_SCHED_BETA_DEFAULT 1.0
			
 
				+#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
			
 
				+
			
 
				 struct _starpu_pheft_data
			
 
				 {
			
 
				 	double alpha;
			
@@ -128,33 +134,25 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		/* This is a combined worker so we create task aliases */
			
 
				-		struct _starpu_combined_worker *combined_worker;
			
 
				-		combined_worker = _starpu_get_combined_worker_struct(best_workerid);
			
 
				-		int worker_size = combined_worker->worker_size;
			
 
				-		int *combined_workerid = combined_worker->combined_workerid;
			
 
				-
			
 
				-		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				-		j->task_size = worker_size;
			
 
				-		j->combined_workerid = best_workerid;
			
 
				-		j->active_task_alias_count = 0;
			
 
				-
			
 
				 		/* This task doesn't belong to an actual worker, it belongs
			
 
				 		 * to a combined worker and thus the scheduler doesn't care
			
 
				 		 * of its predicted values which are insignificant */
			
 
				 		task->predicted = 0;
			
 
				 		task->predicted_transfer = 0;
			
 
				 
			
 
				-		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
			
 
				-		_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
			
 
				+		starpu_parallel_task_barrier_init(task, best_workerid);
			
 
				+		int worker_size = 0;
			
 
				+		int *combined_workerid;
			
 
				+		starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid);
			
 
				 
			
 
				 		/* All cpu workers must be locked at once */
			
 
				 		_STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex);
			
 
				 
			
 
				+		/* This is a combined worker so we create task aliases */
			
 
				 		int i;
			
 
				 		for (i = 0; i < worker_size; i++)
			
 
				 		{
			
 
				-			struct starpu_task *alias = _starpu_create_task_alias(task);
			
 
				+			struct starpu_task *alias = starpu_task_dup(task);
			
 
				 			int local_worker = combined_workerid[i];
			
 
				 
			
 
				 			alias->predicted = exp_end_predicted - worker_exp_end[local_worker];
			
@@ -489,36 +487,15 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 
				 static int parallel_heft_push_task(struct starpu_task *task)
			
 
				 {
			
 
				 	unsigned sched_ctx_id = task->sched_ctx;
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				 	int ret_val = -1;
			
 
				 
			
 
				 	if (task->priority == STARPU_MAX_PRIO)
			
 
				 	{
			
 
				-		_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-                nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-                if(nworkers == 0)
			
 
				-                {
			
 
				-                        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-                        return ret_val;
			
 
				-                }
			
 
				-
			
 
				 		ret_val = _parallel_heft_push_task(task, 1, sched_ctx_id);
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-                return ret_val;
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-        if(nworkers == 0)
			
 
				-	{
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				                 return ret_val;
			
 
				         }
			
 
				 
			
 
				         ret_val = _parallel_heft_push_task(task, 0, sched_ctx_id);
			
 
				-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				 	return ret_val;
			
 
				 }
			
 
				 
			
@@ -575,9 +552,9 @@ static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 
				 {
			
 
				 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
			
 
				 	struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)malloc(sizeof(struct _starpu_pheft_data));
			
 
				-	hd->alpha = _STARPU_DEFAULT_ALPHA;
			
 
				-	hd->beta = _STARPU_DEFAULT_BETA;
			
 
				-	hd->_gamma = _STARPU_DEFAULT_GAMMA;
			
 
				+	hd->alpha = _STARPU_SCHED_ALPHA_DEFAULT;
			
 
				+	hd->beta = _STARPU_SCHED_BETA_DEFAULT;
			
 
				+	hd->_gamma = _STARPU_SCHED_GAMMA_DEFAULT;
			
 
				 	hd->idle_power = 0.0;
			
 
				 
			
 
				 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hd);
			
--- a/src/sched_policies/random_policy.c
+++ b/src/sched_policies/random_policy.c
@@ -83,22 +83,7 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 
				 
			
 
				 static int random_push_task(struct starpu_task *task)
			
 
				 {
			
 
				-	unsigned sched_ctx_id = task->sched_ctx;
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-	unsigned nworkers;
			
 
				-        int ret_val = -1;
			
 
				-
			
 
				-        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-        if(nworkers == 0)
			
 
				-        {
			
 
				-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-                return ret_val;
			
 
				-        }
			
 
				-
			
 
				-        ret_val = _random_push_task(task, !!task->priority);
			
 
				-        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-        return ret_val;
			
 
				+        return _random_push_task(task, !!task->priority);
			
 
				 }
			
 
				 
			
 
				 static void initialize_random_policy(unsigned sched_ctx_id)
			
--- a/src/sched_policies/stack_queues.h
+++ b/src/sched_policies/stack_queues.h
@@ -20,7 +20,6 @@
 
				 #define __STACK_QUEUES_H__
			
 
				 
			
 
				 #include <starpu.h>
			
 
				-#include <common/config.h>
			
 
				 #include <core/jobs.h>
			
 
				 
			
 
				 struct _starpu_stack_jobq
			
--- a/src/sched_policies/work_stealing_policy.c
+++ b/src/sched_policies/work_stealing_policy.c
@@ -336,19 +336,6 @@ int ws_push_task(struct starpu_task *task)
 
				 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 
			
 
				-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
			
 
				-        unsigned nworkers;
			
 
				-        int ret_val = -1;
			
 
				-
			
 
				-	/* if the context has no workers return */
			
 
				-        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
			
 
				-        nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
			
 
				-        if(nworkers == 0)
			
 
				-        {
			
 
				-                _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-                return ret_val;
			
 
				-        }
			
 
				-
			
 
				 	unsigned worker = 0;
			
 
				 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
			
 
				 	struct starpu_sched_ctx_iterator it;
			
@@ -394,8 +381,6 @@ int ws_push_task(struct starpu_task *task)
 
				 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
			
 
				 	}
			
 
				 		
			
 
				-        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/src/starpu_parameters.h
+++ b/src/starpu_parameters.h
@@ -20,17 +20,6 @@
 
				 /* Parameters which are not worth being added to ./configure options, but
			
 
				  * still interesting to easily change */
			
 
				 
			
 
				-/* The dmda scheduling policy uses
			
 
				- *
			
 
				- * alpha * T_computation + beta * T_communication + gamma * Consumption
			
 
				- *
			
 
				- * Here are the default values of alpha, beta, gamma
			
 
				- */
			
 
				-
			
 
				-#define _STARPU_DEFAULT_ALPHA 1.0
			
 
				-#define _STARPU_DEFAULT_BETA 1.0
			
 
				-#define _STARPU_DEFAULT_GAMMA 1000.0
			
 
				-
			
 
				 /* How many executions a codelet will have to be measured before we
			
 
				  * consider that calibration will provide a value good enough for scheduling */
			
 
				 #define _STARPU_CALIBRATION_MINIMUM 10
			
--- a/src/top/starpu_top_core.h
+++ b/src/top/starpu_top_core.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
			
 
				- * Copyright (C) 2011, 2012 Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2012, 2013 Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -48,7 +48,7 @@ void __starpu_top_task_prevision_timespec(struct starpu_task *task,
 
				 					int devid,
			
 
				 					const struct timespec* start,
			
 
				 					const struct timespec* end);
			
 
				-void _starpu_top_task_prevision(struct starpu_task *task,
			
 
				+void starpu_top_task_prevision(struct starpu_task *task,
			
 
				 			       int devid, unsigned long long start,
			
 
				 			       unsigned long long end);
			
 
				 
			
--- a/src/top/starpu_top_task.c
+++ b/src/top/starpu_top_task.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
			
 
				- * Copyright (C) 2011 Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2013 Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -66,17 +66,20 @@ void __starpu_top_task_prevision_timespec(struct starpu_task *task,
 
				 					const struct timespec* start,
			
 
				 					const struct timespec* end)
			
 
				 {
			
 
				-	_starpu_top_task_prevision(task,
			
 
				+	starpu_top_task_prevision(task,
			
 
				 				  devid,
			
 
				 				  _starpu_top_timing_timespec_to_ms(start),
			
 
				 				  _starpu_top_timing_timespec_to_ms(end));
			
 
				 }
			
 
				 
			
 
				-void _starpu_top_task_prevision(struct starpu_task *task,
			
 
				+void starpu_top_task_prevision(struct starpu_task *task,
			
 
				 			       int devid,
			
 
				 			       unsigned long long start,
			
 
				 			       unsigned long long end)
			
 
				 {
			
 
				+	if (!_starpu_top_status_get())
			
 
				+		return;
			
 
				+
			
 
				 	unsigned long long taskid = _starpu_get_job_associated_to_task(task)->job_id;
			
 
				 	STARPU_ASSERT(_starpu_top_status_get());
			
 
				 	struct timespec now;
			
--- a/src/util/starpu_data_cpy.c
+++ b/src/util/starpu_data_cpy.c
@@ -103,8 +103,8 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
				 	task->callback_func = callback_func;
			
 
				 	task->callback_arg = callback_arg;
			
 
				 
			
 
				-	task->handles[0] = dst_handle;
			
 
				-	task->handles[1] = src_handle;
			
 
				+	STARPU_TASK_SET_HANDLE(task, dst_handle, 0);
			
 
				+	STARPU_TASK_SET_HANDLE(task, src_handle, 1);
			
 
				 
			
 
				 	task->synchronous = !asynchronous;
			
 
				 
			
--- a/src/util/starpu_insert_task.c
+++ b/src/util/starpu_insert_task.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2012  Université de Bordeaux 1
			
 
				- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -23,7 +23,7 @@
 
				 #include <stdarg.h>
			
 
				 #include <util/starpu_insert_task_utils.h>
			
 
				 
			
 
				-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...)
			
 
				+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
			
 
				 {
			
 
				 	va_list varg_list;
			
 
				 
			
@@ -32,7 +32,7 @@ void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...)
 
				 	*arg_buffer_size = _starpu_insert_task_get_arg_size(varg_list);
			
 
				 
			
 
				 	va_start(varg_list, arg_buffer_size);
			
 
				-	_starpu_codelet_pack_args(*arg_buffer_size, arg_buffer, varg_list);
			
 
				+	_starpu_codelet_pack_args(arg_buffer, *arg_buffer_size, varg_list);
			
 
				 }
			
 
				 
			
 
				 void starpu_codelet_unpack_args(void *_cl_arg, ...)
			
@@ -66,7 +66,7 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 
				 int starpu_insert_task(struct starpu_codelet *cl, ...)
			
 
				 {
			
 
				 	va_list varg_list;
			
 
				-	char *arg_buffer = NULL;
			
 
				+	void *arg_buffer = NULL;
			
 
				 
			
 
				 	/* Compute the size */
			
 
				 	size_t arg_buffer_size = 0;
			
@@ -76,11 +76,17 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 
				 	if (arg_buffer_size)
			
 
				 	{
			
 
				 		va_start(varg_list, cl);
			
 
				-		_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
			
 
				+		_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list);
			
 
				 	}
			
 
				 
			
 
				-	va_start(varg_list, cl);
			
 
				 	struct starpu_task *task = starpu_task_create();
			
 
				+
			
 
				+	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
			
 
				+	{
			
 
				+		task->dyn_handles = malloc(cl->nbuffers * sizeof(starpu_data_handle_t));
			
 
				+	}
			
 
				+
			
 
				+	va_start(varg_list, cl);
			
 
				 	int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, cl, &task, varg_list);
			
 
				 
			
 
				 	if (ret == -ENODEV)
			
--- a/src/util/starpu_insert_task_utils.c
+++ b/src/util/starpu_insert_task_utils.c
@@ -18,6 +18,7 @@
 
				 #include <util/starpu_insert_task_utils.h>
			
 
				 #include <common/config.h>
			
 
				 #include <common/utils.h>
			
 
				+#include <core/task.h>
			
 
				 
			
 
				 typedef void (*_starpu_callback_func_t)(void *);
			
 
				 
			
@@ -120,15 +121,16 @@ size_t _starpu_insert_task_get_arg_size(va_list varg_list)
 
				 	return arg_buffer_size;
			
 
				 }
			
 
				 
			
 
				-int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list varg_list)
			
 
				+int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list)
			
 
				 {
			
 
				 	int arg_type;
			
 
				 	unsigned current_arg_offset = 0;
			
 
				 	unsigned char nargs = 0;
			
 
				+	char *_arg_buffer;
			
 
				 
			
 
				 	/* The buffer will contain : nargs, {size, content} (x nargs)*/
			
 
				 
			
 
				-	*arg_buffer = (char *) malloc(arg_buffer_size);
			
 
				+	_arg_buffer = malloc(arg_buffer_size);
			
 
				 
			
 
				 	/* We will begin the buffer with the number of args (which is stored as a char) */
			
 
				 	current_arg_offset += sizeof(char);
			
@@ -150,10 +152,10 @@ int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list
 
				 			void *ptr = va_arg(varg_list, void *);
			
 
				 			size_t cst_size = va_arg(varg_list, size_t);
			
 
				 
			
 
				-			*(size_t *)(&(*arg_buffer)[current_arg_offset]) = cst_size;
			
 
				+			*(size_t *)(&(_arg_buffer)[current_arg_offset]) = cst_size;
			
 
				 			current_arg_offset += sizeof(size_t);
			
 
				 
			
 
				-			memcpy(&(*arg_buffer)[current_arg_offset], ptr, cst_size);
			
 
				+			memcpy(&_arg_buffer[current_arg_offset], ptr, cst_size);
			
 
				 			current_arg_offset += cst_size;
			
 
				 
			
 
				 			nargs++;
			
@@ -205,19 +207,20 @@ int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list
 
				 
			
 
				 	if (nargs)
			
 
				 	{
			
 
				-		(*arg_buffer)[0] = nargs;
			
 
				+		_arg_buffer[0] = nargs;
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
 
				-		free(*arg_buffer);
			
 
				-		*arg_buffer = NULL;
			
 
				+		free(_arg_buffer);
			
 
				+		_arg_buffer = NULL;
			
 
				 	}
			
 
				 
			
 
				+	*arg_buffer = _arg_buffer;
			
 
				 	va_end(varg_list);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
			
 
				+int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
			
 
				 {
			
 
				 	int arg_type;
			
 
				 	unsigned current_buffer = 0;
			
@@ -239,18 +242,20 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 
				 
			
 
				 			STARPU_ASSERT(cl != NULL);
			
 
				 
			
 
				-			(*task)->handles[current_buffer] = handle;
			
 
				-			if (cl->modes[current_buffer])
			
 
				+			STARPU_TASK_SET_HANDLE((*task), handle, current_buffer);
			
 
				+			if (STARPU_CODELET_GET_MODE(cl, current_buffer))
			
 
				 			{
			
 
				-				STARPU_ASSERT_MSG(cl->modes[current_buffer] == mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
			
 
				-						  cl->name, cl->modes[current_buffer], current_buffer, mode);
			
 
				+				STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, current_buffer) == mode,
			
 
				+						   "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
			
 
				+						  cl->name, STARPU_CODELET_GET_MODE(cl, current_buffer),
			
 
				+						  current_buffer, mode);
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				 #ifdef STARPU_DEVEL
			
 
				 #  warning shall we print a warning to the user
			
 
				 #endif
			
 
				-				cl->modes[current_buffer] = mode;
			
 
				+				STARPU_CODELET_SET_MODE(cl, mode, current_buffer);
			
 
				 			}
			
 
				 
			
 
				 			current_buffer++;
			
@@ -264,7 +269,7 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 
				 			int i;
			
 
				 			for(i=0 ; i<nb_handles ; i++)
			
 
				 			{
			
 
				-				(*task)->handles[current_buffer] = handles[i];
			
 
				+				STARPU_TASK_SET_HANDLE((*task), handles[i], current_buffer);
			
 
				 				current_buffer++;
			
 
				 			}
			
 
				 
			
--- a/src/util/starpu_insert_task_utils.h
+++ b/src/util/starpu_insert_task_utils.h
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -22,9 +22,9 @@
 
				 #include <starpu.h>
			
 
				 
			
 
				 size_t _starpu_insert_task_get_arg_size(va_list varg_list);
			
 
				-int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list varg_list);
			
 
				-int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
			
 
				-int _starpu_insert_task_create_and_submit_array(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, starpu_data_handle_t *handles, unsigned nb_handles, va_list varg_list);
			
 
				+int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list);
			
 
				+int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
			
 
				+int _starpu_insert_task_create_and_submit_array(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, starpu_data_handle_t *handles, unsigned nb_handles, va_list varg_list);
			
 
				 
			
 
				 #endif // __STARPU_INSERT_TASK_UTILS_H__
			
 
				 
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -208,6 +208,7 @@ noinst_PROGRAMS =				\
 
				 	parallel_tasks/explicit_combined_worker	\
			
 
				 	parallel_tasks/parallel_kernels		\
			
 
				 	parallel_tasks/parallel_kernels_spmd	\
			
 
				+	parallel_tasks/spmd_peager		\
			
 
				 	perfmodels/regression_based		\
			
 
				 	perfmodels/non_linear_regression_based	\
			
 
				 	perfmodels/feed				\
			
--- a/tests/main/insert_task.c
+++ b/tests/main/insert_task.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -93,14 +93,10 @@ int main(int argc, char **argv)
 
				 	task->cl = &mycodelet;
			
 
				 	task->handles[0] = data_handles[0];
			
 
				 	task->handles[1] = data_handles[1];
			
 
				-	char *arg_buffer;
			
 
				-	size_t arg_buffer_size;
			
 
				-	starpu_codelet_pack_args(&arg_buffer, &arg_buffer_size,
			
 
				+	starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size,
			
 
				 			    STARPU_VALUE, &ifactor, sizeof(ifactor),
			
 
				 			    STARPU_VALUE, &ffactor, sizeof(ffactor),
			
 
				 			    0);
			
 
				-	task->cl_arg = arg_buffer;
			
 
				-	task->cl_arg_size = arg_buffer_size;
			
 
				 
			
 
				 	ret = starpu_task_submit(task);
			
 
				 	if (ret == -ENODEV) goto enodev;
			
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -86,7 +86,8 @@ bin_PROGRAMS += 			\
 
				 	starpu_perfmodel_display	\
			
 
				 	starpu_perfmodel_plot 		\
			
 
				 	starpu_calibrate_bus		\
			
 
				-	starpu_machine_display
			
 
				+	starpu_machine_display		\
			
 
				+	starpu_lp2paje
			
 
				 
			
 
				 starpu_perfmodel_plot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
			
 
				 
			
@@ -104,8 +105,6 @@ STARPU_TOOLS	+=			\
 
				 	starpu_perfmodel_plot
			
 
				 endif
			
 
				 
			
 
				-noinst_PROGRAMS =	cbc2paje lp2paje
			
 
				-
			
 
				 dist_bin_SCRIPTS +=			\
			
 
				 	starpu_workers_activity		\
			
 
				 	starpu_codelet_histo_profile	\
			
@@ -129,6 +128,8 @@ starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT)
 
				 	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				 starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
			
 
				 	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				+starpu_lp2paje.1: starpu_lp2paje$(EXEEXT)
			
 
				+	help2man --no-discard-stderr -N --output=$@ ./$<
			
 
				 starpu_workers_activity.1: starpu_workers_activity$(EXEEXT)
			
 
				 	chmod +x $<
			
 
				 	help2man --no-discard-stderr -N --output=$@ ./$<
			
@@ -153,6 +154,7 @@ dist_man1_MANS =\
 
				 	starpu_machine_display.1 \
			
 
				 	starpu_perfmodel_display.1 \
			
 
				 	starpu_perfmodel_plot.1	\
			
 
				+	starpu_lp2paje.1	\
			
 
				 	starpu_workers_activity.1 \
			
 
				 	starpu_codelet_profile.1 \
			
 
				 	starpu_codelet_histo_profile.1
			
--- a/tools/cbc2paje.c
+++ b/tools/cbc2paje.c
@@ -1,156 +0,0 @@
 
				-/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				- *
			
 
				- * Copyright (C) 2010  Université de Bordeaux 1
			
 
				- *
			
 
				- * StarPU is free software; you can redistribute it and/or modify
			
 
				- * it under the terms of the GNU Lesser General Public License as published by
			
 
				- * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				- * your option) any later version.
			
 
				- *
			
 
				- * StarPU is distributed in the hope that it will be useful, but
			
 
				- * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				- *
			
 
				- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				- */
			
 
				-
			
 
				-#include <assert.h>
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-struct task {
			
 
				-	double start;
			
 
				-	double stop;
			
 
				-	int worker;
			
 
				-};
			
 
				-
			
 
				-int main(int argc, char *argv[]) {
			
 
				-	int nw, nt;
			
 
				-	double tmax;
			
 
				-	int i, w, t, t2;
			
 
				-	int foo;
			
 
				-	double bar;
			
 
				-	unsigned long num;
			
 
				-	int b;
			
 
				-	unsigned long next = 1;
			
 
				-
			
 
				-	if (argc != 3) {
			
 
				-		fprintf(stderr,"usage: %s nb_workers nb_tasks\n", argv[0]);
			
 
				-		exit(1);
			
 
				-	}
			
 
				-	nw = atoi(argv[1]);
			
 
				-	nt = atoi(argv[2]);
			
 
				-	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
			
 
				-	assert(scanf("Optimal - objective value       %lf", &tmax) == 1);
			
 
				-	printf(
			
 
				-"%%EventDef PajeDefineContainerType 1\n"
			
 
				-"%%  Alias         string\n"
			
 
				-"%%  ContainerType string\n"
			
 
				-"%%  Name          string\n"
			
 
				-"%%EndEventDef\n"
			
 
				-"%%EventDef PajeCreateContainer     2\n"
			
 
				-"%%  Time          date\n"
			
 
				-"%%  Alias         string\n"
			
 
				-"%%  Type          string\n"
			
 
				-"%%  Container     string\n"
			
 
				-"%%  Name          string\n"
			
 
				-"%%EndEventDef\n"
			
 
				-"%%EventDef PajeDefineStateType     3\n"
			
 
				-"%%  Alias         string\n"
			
 
				-"%%  ContainerType string\n"
			
 
				-"%%  Name          string\n"
			
 
				-"%%EndEventDef\n"
			
 
				-"%%EventDef PajeDestroyContainer    4\n"
			
 
				-"%%  Time          date\n"
			
 
				-"%%  Name          string\n"
			
 
				-"%%  Type          string\n"
			
 
				-"%%EndEventDef\n"
			
 
				-"%%EventDef PajeDefineEntityValue 5\n"
			
 
				-"%%  Alias         string\n"
			
 
				-"%%  EntityType    string\n"
			
 
				-"%%  Name          string\n"
			
 
				-"%%  Color         color\n"
			
 
				-"%%EndEventDef\n"
			
 
				-"%%EventDef PajeSetState 6\n"
			
 
				-"%%  Time          date\n"
			
 
				-"%%  Type          string\n"
			
 
				-"%%  Container     string\n"
			
 
				-"%%  Value         string\n"
			
 
				-"%%EndEventDef\n"
			
 
				-"1 W 0 Worker\n"
			
 
				-);
			
 
				-	printf("3 S W \"Worker State\"\n");
			
 
				-	printf("5 S S Running \"0.0 1.0 0.0\"\n");
			
 
				-	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
			
 
				-	for (i = 0; i < nw; i++)
			
 
				-		printf("2 0 W%d W 0 \"%d\"\n", i, i);
			
 
				-
			
 
				-	for (w = 0; w < nw; w++)
			
 
				-		printf("4 %f W%d W\n", tmax, w);
			
 
				-
			
 
				-	assert(scanf("%d C%d %lf %lf", &foo, &foo, &tmax, &bar) == 4);
			
 
				-	next++;
			
 
				-	{
			
 
				-		struct task task[nt];
			
 
				-		memset(&task, 0, sizeof(task));
			
 
				-		for (t = 0; t < nt; t++) {
			
 
				-			assert(scanf("%d C%d %lf %lf", &foo, &foo, &task[t].stop, &bar) == 4);
			
 
				-			next++;
			
 
				-		}
			
 
				-
			
 
				-		while (1) {
			
 
				-			assert(scanf("%d C%lu", &foo, &num) == 2);
			
 
				-			if (num >= next +
			
 
				-
			
 
				-				/* FIXME */
			
 
				-				//nw*nt
			
 
				-				8*20 + 5*16
			
 
				-
			
 
				-				) {
			
 
				-				next+= 8*20+5*16;
			
 
				-				break;
			
 
				-			}
			
 
				-			/* FIXME */
			
 
				-			if (num-next < 8*20) {
			
 
				-				t = (num - next) / nw;
			
 
				-				w = (num - next) % nw;
			
 
				-			} else {
			
 
				-				unsigned long nnum = (num-next)-8*20;
			
 
				-				t = (nnum / 5) + 20;
			
 
				-				w = (nnum % 5)+3;
			
 
				-			}
			
 
				-
			
 
				-			assert(scanf("%d %lf", &b, &bar) == 2);
			
 
				-			if (b) {
			
 
				-				task[t].worker = w;
			
 
				-				fprintf(stderr,"%lu: task %d on %d: %f\n", num, t, w, task[t].stop);
			
 
				-			}
			
 
				-		}
			
 
				-		while(1) {
			
 
				-			t = num - next;
			
 
				-			if (t > nt)
			
 
				-				break;
			
 
				-			assert(scanf("%lf %lf", &task[t].start, &bar) == 2);
			
 
				-			assert(scanf("%d C%lu", &foo, &num) == 2);
			
 
				-		}
			
 
				-
			
 
				-		for (t = 0; t < nt; t++) {
			
 
				-			printf("6 %f S W%d S\n", task[t].start, task[t].worker);
			
 
				-			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
			
 
				-		}
			
 
				-
			
 
				-		for (t = 0; t < nt; t++) {
			
 
				-			for (t2 = 0; t2 < nt; t2++) {
			
 
				-				if (t != t2 && task[t].worker == task[t2].worker) {
			
 
				-					if (!(task[t].start >= task[t2].stop
			
 
				-					    || task[t2].start >= task[t].stop)) {
			
 
				-						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", t, t2, task[t].worker);
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
--- a/tools/lp2paje.c
+++ b/tools/lp2paje.c
@@ -1,6 +1,6 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -14,34 +14,48 @@
 
				  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				  */
			
 
				 
			
 
				+#include <config.h>
			
 
				 #include <assert.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
 
				 
			
 
				+#define PROGNAME "starpu_lp2paje"
			
 
				+
			
 
				 struct task {
			
 
				 	double start;
			
 
				 	double stop;
			
 
				+	int num;
			
 
				 	int worker;
			
 
				 };
			
 
				 
			
 
				 int main(int argc, char *argv[]) {
			
 
				 	int nw, nt;
			
 
				 	double tmax;
			
 
				-	int i, w, t, t2;
			
 
				+	int i, w, ww, t, tt, t2;
			
 
				 	int foo;
			
 
				 	double bar;
			
 
				-	unsigned long num;
			
 
				-	unsigned long next = 1;
			
 
				 
			
 
				-	if (argc != 3) {
			
 
				-		fprintf(stderr,"usage: %s nb_workers nb_tasks\n", argv[0]);
			
 
				-		exit(1);
			
 
				+	if (argc != 1) {
			
 
				+		if (strcmp(argv[1], "-v") == 0
			
 
				+		 || strcmp(argv[1], "--version") == 0)
			
 
				+		{
			
 
				+			fprintf(stderr, PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n");
			
 
				+			exit(EXIT_SUCCESS);
			
 
				+		}
			
 
				+		fprintf(stderr, "Convert schedule optimized by lp into the Paje format\n\n");
			
 
				+		fprintf(stderr, "Usage: lp_solve file.lp | %s > paje.trace\n", PROGNAME);
			
 
				+		fprintf(stderr, "Reports bugs to <"PACKAGE_BUGREPORT">.");
			
 
				+		fprintf(stderr, "\n");
			
 
				+		exit(EXIT_SUCCESS);
			
 
				 	}
			
 
				-	nw = atoi(argv[1]);
			
 
				-	nt = atoi(argv[2]);
			
 
				-	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
			
 
				+	scanf("Suboptimal solution\n");
			
 
				 	assert(scanf("\nValue of objective function: %lf\n", &tmax) == 1);
			
 
				+
			
 
				+	assert(scanf("Actual values of the variables:\n") == 0);
			
 
				+	assert(scanf("tmax %lf\n", &tmax) == 1);
			
 
				+	assert(scanf("nt %d\n", &nt) == 1);
			
 
				+	assert(scanf("nw %d\n", &nw) == 1);
			
 
				 	printf(
			
 
				 "%%EventDef PajeDefineContainerType 1\n"
			
 
				 "%%  Alias         string\n"
			
@@ -80,7 +94,8 @@ int main(int argc, char *argv[]) {
 
				 "1 W 0 Worker\n"
			
 
				 );
			
 
				 	printf("3 S W \"Worker State\"\n");
			
 
				-	printf("5 S S Running \"0.0 1.0 0.0\"\n");
			
 
				+	for (t = 0; t < nt; t++)
			
 
				+		printf("5 R%d S Running_%d \"0.0 1.0 0.0\"\n", t, t);
			
 
				 	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
			
 
				 	for (i = 0; i < nw; i++)
			
 
				 		printf("2 0 W%d W 0 \"%d\"\n", i, i);
			
@@ -88,52 +103,32 @@ int main(int argc, char *argv[]) {
 
				 	for (w = 0; w < nw; w++)
			
 
				 		printf("4 %f W%d W\n", tmax, w);
			
 
				 
			
 
				-	assert(scanf("Actual values of the variables:\n") == 0);
			
 
				-	assert(scanf("tmax %lf\n", &tmax) == 1);
			
 
				-	next++;
			
 
				+	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
			
 
				 	{
			
 
				 		struct task task[nt];
			
 
				 		memset(&task, 0, sizeof(task));
			
 
				-		for (t = 0; t < nt; t++) {
			
 
				+		for (t = nt-1; t >= 0; t--) {
			
 
				 			assert(scanf("c%d %lf\n", &foo, &task[t].stop) == 2);
			
 
				-			next++;
			
 
				 		}
			
 
				 
			
 
				-		num = next;
			
 
				-		while (1) {
			
 
				-			if (num >= next +
			
 
				-
			
 
				-				/* FIXME */
			
 
				-				//nw*nt
			
 
				-				8*84 + 5*49
			
 
				-
			
 
				-				) {
			
 
				-				next+= 8*84+5*49;
			
 
				-				break;
			
 
				-			}
			
 
				-			assert(scanf("t%dw%d %lf\n", &foo, &foo, &bar) == 3);
			
 
				-			/* FIXME */
			
 
				-			if (num-next < 8*84) {
			
 
				-				t = (num - next) / nw;
			
 
				-				w = (num - next) % nw;
			
 
				-			} else {
			
 
				-				unsigned long nnum = (num-next)-8*84;
			
 
				-				t = (nnum / 5) + 84;
			
 
				-				w = (nnum % 5)+3;
			
 
				-			}
			
 
				+		for (t = nt-1; t >= 0; t--)
			
 
				+			for (w = 0; w < nw; w++) {
			
 
				+				assert(scanf("t%dw%d %lf\n", &tt, &ww, &bar) == 3);
			
 
				+				assert(ww == w);
			
 
				 
			
 
				-			if (bar > 0.5) {
			
 
				-				task[t].worker = w;
			
 
				-				fprintf(stderr,"%lu: task %d on %d: %f\n", num, t, w, task[t].stop);
			
 
				-			}
			
 
				-			num++;
			
 
				+				if (bar > 0.5) {
			
 
				+					task[t].num = tt;
			
 
				+					task[t].worker = w;
			
 
				+				}
			
 
				 		}
			
 
				-		for (t = 0; t < nt; t++) {
			
 
				-			assert(scanf("s%d %lf\n", &foo, &task[t].start) == 2);
			
 
				+		for (t = nt-1; t >= 0; t--) {
			
 
				+			assert(scanf("s%d %lf\n", &tt, &task[t].start) == 2);
			
 
				+			fprintf(stderr,"%d: task %d on %d: %f - %f\n", nt-1-t, tt, task[t].worker, task[t].start, task[t].stop);
			
 
				+			assert(tt == task[t].num);
			
 
				 		}
			
 
				 
			
 
				 		for (t = 0; t < nt; t++) {
			
 
				-			printf("6 %f S W%d S\n", task[t].start, task[t].worker);
			
 
				+			printf("6 %f S W%d R%d\n", task[t].start, task[t].worker, t);
			
 
				 			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
			
 
				 		}
			
 
				 
			
@@ -142,7 +137,7 @@ int main(int argc, char *argv[]) {
 
				 				if (t != t2 && task[t].worker == task[t2].worker) {
			
 
				 					if (!(task[t].start >= task[t2].stop
			
 
				 					    || task[t2].start >= task[t].stop)) {
			
 
				-						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", t, t2, task[t].worker);
			
 
				+						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", task[t].num, task[t2].num, task[t].worker);
			
 
				 					}
			
 
				 				}
			
 
				 			}