Browse Source

Merge @9469:9571

Marc Sergent 12 years ago
parent
commit
f722a8b4c2
74 changed files with 916 additions and 731 deletions
  1. 5 0
      ChangeLog
  2. 4 1
      configure.ac
  3. 63 8
      doc/chapters/advanced-examples.texi
  4. 79 8
      doc/chapters/api.texi
  5. 2 1
      doc/chapters/basic-examples.texi
  6. 1 0
      doc/chapters/configuration.texi
  7. 4 3
      doc/chapters/perf-optimization.texi
  8. 13 1
      examples/Makefile.am
  9. 150 0
      examples/basic_examples/dynamic_handles.c
  10. 21 14
      examples/cholesky/cholesky.h
  11. 7 2
      examples/cholesky/cholesky_implicit.c
  12. 7 1
      examples/openmp/vector_scal.c
  13. 1 1
      examples/pi/pi.c
  14. 2 0
      include/starpu.h
  15. 3 0
      include/starpu_config.h.in
  16. 2 0
      include/starpu_deprecated_api.h
  17. 0 3
      include/starpu_sched_ctx.h
  18. 26 4
      include/starpu_task.h
  19. 2 2
      include/starpu_task_util.h
  20. 8 0
      include/starpu_top.h
  21. 0 1
      include/starpu_worker.h
  22. 11 6
      mpi/src/starpu_mpi_insert_task.c
  23. 0 1
      src/Makefile.am
  24. 0 10
      src/common/thread.h
  25. 1 0
      src/core/combined_workers.c
  26. 18 11
      src/core/dependencies/data_concurrency.c
  27. 3 3
      src/core/dependencies/implicit_data_deps.c
  28. 14 3
      src/core/jobs.c
  29. 10 0
      src/core/jobs.h
  30. 25 2
      src/core/parallel_task.c
  31. 0 24
      src/core/parallel_task.h
  32. 5 5
      src/core/perfmodel/perfmodel.c
  33. 2 2
      src/core/perfmodel/perfmodel_history.c
  34. 1 1
      src/core/sched_ctx.c
  35. 3 0
      src/core/sched_ctx.h
  36. 16 8
      src/core/sched_policy.c
  37. 35 11
      src/core/task.c
  38. 3 0
      src/core/task.h
  39. 7 0
      src/core/workers.c
  40. 8 7
      src/datawizard/coherency.c
  41. 1 1
      src/datawizard/filters.c
  42. 1 1
      src/datawizard/footprint.c
  43. 16 14
      src/datawizard/reduction.c
  44. 23 11
      src/debug/traces/starpu_fxt.c
  45. 8 6
      src/debug/traces/starpu_paje.c
  46. 1 1
      src/drivers/cpu/driver_cpu.c
  47. 1 1
      src/drivers/cuda/driver_cuda.c
  48. 3 3
      src/drivers/gordon/driver_gordon.c
  49. 2 2
      src/drivers/opencl/driver_opencl.c
  50. 21 15
      src/profiling/bound.c
  51. 118 108
      src/sched_policies/deque_modeling_policy_data_aware.c
  52. 3 4
      src/sched_policies/deque_queues.c
  53. 0 1
      src/sched_policies/deque_queues.h
  54. 1 2
      src/sched_policies/detect_combined_workers.c
  55. 2 15
      src/sched_policies/eager_central_policy.c
  56. 0 13
      src/sched_policies/eager_central_priority_policy.c
  57. 0 2
      src/sched_policies/fifo_queues.h
  58. 40 61
      src/sched_policies/parallel_eager.c
  59. 17 40
      src/sched_policies/parallel_heft.c
  60. 1 16
      src/sched_policies/random_policy.c
  61. 0 1
      src/sched_policies/stack_queues.h
  62. 0 15
      src/sched_policies/work_stealing_policy.c
  63. 0 11
      src/starpu_parameters.h
  64. 2 2
      src/top/starpu_top_core.h
  65. 6 3
      src/top/starpu_top_task.c
  66. 2 2
      src/util/starpu_data_cpy.c
  67. 12 6
      src/util/starpu_insert_task.c
  68. 19 14
      src/util/starpu_insert_task_utils.c
  69. 4 4
      src/util/starpu_insert_task_utils.h
  70. 1 0
      tests/Makefile.am
  71. 2 6
      tests/main/insert_task.c
  72. 5 3
      tools/Makefile.am
  73. 0 156
      tools/cbc2paje.c
  74. 42 47
      tools/lp2paje.c

+ 5 - 0
ChangeLog

@@ -119,6 +119,8 @@ New features:
     pthread API. It is provided with 2 implementations: a pthread one
     pthread API. It is provided with 2 implementations: a pthread one
     and a Simgrid one. Applications using StarPU and wishing to use
     and a Simgrid one. Applications using StarPU and wishing to use
     the Simgrid StarPU features should use it.
     the Simgrid StarPU features should use it.
+  * Allow to have a dynamically allocated number of buffers per task,
+    and so overwrite the value defined --enable-maxbuffers=XXX
 
 
 Small features:
 Small features:
   * Add starpu_worker_get_by_type and starpu_worker_get_by_devid
   * Add starpu_worker_get_by_type and starpu_worker_get_by_devid
@@ -134,6 +136,9 @@ Small features:
   * New configure option --enable-mpi-progression-hook to enable the
   * New configure option --enable-mpi-progression-hook to enable the
     activity polling method for StarPU-MPI.
     activity polling method for StarPU-MPI.
   * Permit to disable sequential consistency for a given task.
   * Permit to disable sequential consistency for a given task.
+  * New macro STARPU_RELEASE_VERSION
+  * New function starpu_get_version() to return as 3 integers the
+    release version of StarPU.
 
 
 Changes:
 Changes:
   * Fix the block filter functions.
   * Fix the block filter functions.

+ 4 - 1
configure.ac

@@ -25,11 +25,14 @@ dnl Versioning.
 
 
 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
 STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
 STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
+STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`"
 AC_SUBST([STARPU_MAJOR_VERSION])
 AC_SUBST([STARPU_MAJOR_VERSION])
 AC_SUBST([STARPU_MINOR_VERSION])
 AC_SUBST([STARPU_MINOR_VERSION])
+AC_SUBST([STARPU_RELEASE_VERSION])
 AC_SUBST([STARPU_EFFECTIVE_VERSION])
 AC_SUBST([STARPU_EFFECTIVE_VERSION])
 AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION], [Major version number of StarPU.])
 AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION], [Major version number of StarPU.])
 AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION], [Minor version number of StarPU.])
 AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION], [Minor version number of StarPU.])
+AC_DEFINE_UNQUOTED([STARPU_RELEASE_VERSION], [$STARPU_RELEASE_VERSION], [Release version number of StarPU.])
 
 
 . "$srcdir/STARPU-VERSION"
 . "$srcdir/STARPU-VERSION"
 AC_SUBST([LIBSTARPU_INTERFACE_CURRENT])
 AC_SUBST([LIBSTARPU_INTERFACE_CURRENT])
@@ -264,7 +267,7 @@ AC_MSG_RESULT($max_sched_ctxs)
 AC_DEFINE_UNQUOTED(STARPU_NMAX_SCHED_CTXS, [$max_sched_ctxs], [Maximum number of sched_ctxs supported])
 AC_DEFINE_UNQUOTED(STARPU_NMAX_SCHED_CTXS, [$max_sched_ctxs], [Maximum number of sched_ctxs supported])
 
 
 AC_ARG_ENABLE([sc_hypervisor],
 AC_ARG_ENABLE([sc_hypervisor],
-  [AS_HELP_STRING([--enable-sct-hypervisor],
+  [AS_HELP_STRING([--enable-sc-hypervisor],
     [enable resizing contexts (experimental)])],
     [enable resizing contexts (experimental)])],
   [enable_sc_hypervisor="yes"],
   [enable_sc_hypervisor="yes"],
   [enable_sc_hypervisor="no"])
   [enable_sc_hypervisor="no"])

+ 63 - 8
doc/chapters/advanced-examples.texi

@@ -23,6 +23,7 @@
 * Defining a New Scheduling Policy::
 * Defining a New Scheduling Policy::
 * On-GPU rendering::
 * On-GPU rendering::
 * Defining a New Data Interface::
 * Defining a New Data Interface::
+* Setting the Data Handles for a Task::
 * More examples::               More examples shipped with StarPU
 * More examples::               More examples shipped with StarPU
 @end menu
 @end menu
 
 
@@ -473,14 +474,15 @@ probably use @code{lp_solve -timeout 1 test.pl -wmps test.mps} to convert the
 problem to MPS format and then use a better solver, @code{glpsol} might be
 problem to MPS format and then use a better solver, @code{glpsol} might be
 better than @code{lp_solve} for instance (the @code{--pcost} option may be
 better than @code{lp_solve} for instance (the @code{--pcost} option may be
 useful), but sometimes doesn't manage to converge. @code{cbc} might look
 useful), but sometimes doesn't manage to converge. @code{cbc} might look
-slower, but it is parallel. Be sure to try at least all the @code{-B} options
-of @code{lp_solve}. For instance, we often just use
-@code{lp_solve -cc -B1 -Bb -Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi} , and
-the @code{-gr} option can also be quite useful.
+slower, but it is parallel. For @code{lp_solve}, be sure to try at least all the
+@code{-B} options. For instance, we often just use @code{lp_solve -cc -B1 -Bb
+-Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi} , and the @code{-gr} option can
+also be quite useful. The resulting schedule can be observed by using the
+@code{starpu_lp2paje} tool, which converts it into the Paje format.
 
 
 Data transfer time can only be taken into account when @code{deps} is set. Only
 Data transfer time can only be taken into account when @code{deps} is set. Only
 data transfers inferred from implicit data dependencies between tasks are taken
 data transfers inferred from implicit data dependencies between tasks are taken
-into account.
+into account. Other data transfers are assumed to be completely overlapped.
 
 
 Setting @code{deps} to 0 will only take into account the actual computations
 Setting @code{deps} to 0 will only take into account the actual computations
 on processing units. It however still properly takes into account the varying
 on processing units. It however still properly takes into account the varying
@@ -492,9 +494,6 @@ the priorities as the StarPU scheduler would, i.e. schedule prioritized
 tasks before less prioritized tasks, to check to which extend this results
 tasks before less prioritized tasks, to check to which extend this results
 to a less optimal solution. This increases even more computation time.
 to a less optimal solution. This increases even more computation time.
 
 
-Note that for simplicity, all this however doesn't take into account data
-transfers, which are assumed to be completely overlapped.
-
 @node Insert Task Utility
 @node Insert Task Utility
 @section Insert Task Utility
 @section Insert Task Utility
 
 
@@ -1264,6 +1263,62 @@ void display_complex_codelet(void *descr[], __attribute__ ((unused)) void *_args
 
 
 The whole code for this complex data interface is available in the
 The whole code for this complex data interface is available in the
 directory @code{examples/interface/}.
 directory @code{examples/interface/}.
+
+@node Setting the Data Handles for a Task
+@section Setting the Data Handles for a Task
+
+The number of data a task can manage is fixed by the
+@code{STARPU_NMAXBUFS} which has a default value which can be changed
+through the configure option @code{--enable-maxbuffers} (see
+@ref{--enable-maxbuffers}).
+
+However, it is possible to define tasks managing more data by using
+the field @code{dyn_handles} when defining a task and the field
+@code{dyn_modes} when defining the corresponding codelet.
+
+@cartouche
+@smallexample
+enum starpu_access_mode modes[STARPU_NMAXBUFS+1] = @{
+	STARPU_R, STARPU_R, ...
+@};
+
+struct starpu_codelet dummy_big_cl =
+@{
+	.cuda_funcs = @{dummy_big_kernel, NULL@},
+	.opencl_funcs = @{dummy_big_kernel, NULL@},
+	.cpu_funcs = @{dummy_big_kernel, NULL@},
+	.nbuffers = STARPU_NMAXBUFS+1,
+	.dyn_modes = modes
+@};
+
+task = starpu_task_create();
+task->cl = &dummy_big_cl;
+task->dyn_handles = malloc(task->cl->nbuffers * sizeof(starpu_data_handle_t));
+for(i=0 ; i<task->cl->nbuffers ; i++)
+@{
+	task->dyn_handles[i] = handle;
+@}
+starpu_task_submit(task);
+@end smallexample
+@end cartouche
+
+@cartouche
+@smallexample
+starpu_data_handle_t *handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
+for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
+@{
+	handles[i] = handle;
+@}
+starpu_insert_task(&dummy_big_cl,
+        	 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
+		 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
+		 0);
+@end smallexample
+@end cartouche
+
+The whole code for this complex data interface is available in the
+directory @code{examples/basic_examples/dynamic_handles.c}.
+
 @node More examples
 @node More examples
 @section More examples
 @section More examples
 
 

+ 79 - 8
doc/chapters/api.texi

@@ -47,6 +47,14 @@ Define the major version of StarPU
 Define the minor version of StarPU
 Define the minor version of StarPU
 @end defmac
 @end defmac
 
 
+@defmac STARPU_RELEASE_VERSION
+Define the release version of StarPU
+@end defmac
+
+@deftypefun void starpu_get_version (int *@var{major}, int *@var{minor}, int *@var{release})
+Return as 3 integers the release version of StarPU.
+@end deftypefun
+
 @node Initialization and Termination
 @node Initialization and Termination
 @section Initialization and Termination
 @section Initialization and Termination
 
 
@@ -1898,6 +1906,17 @@ exceed @code{STARPU_NMAXBUFS}.
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 option when configuring StarPU.
 option when configuring StarPU.
 
 
+@item @code{enum starpu_access_mode *dyn_modes}
+Is an array of @code{enum starpu_access_mode}. It describes the
+required access modes to the data neeeded by the codelet (e.g.
+@code{STARPU_RW}). The number of entries in this array must be
+specified in the @code{nbuffers} field (defined above).
+This field should be used for codelets having a number of datas
+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
+for a Task}).
+When defining a codelet, one should either define this field or the
+field @code{modes} defined above. 
+
 @item @code{struct starpu_perfmodel *model} (optional)
 @item @code{struct starpu_perfmodel *model} (optional)
 This is a pointer to the task duration performance model associated to this
 This is a pointer to the task duration performance model associated to this
 codelet. This optional field is ignored when set to @code{NULL} or
 codelet. This optional field is ignored when set to @code{NULL} or
@@ -1913,8 +1932,8 @@ involved in the parallel execution.
 @item @code{unsigned long per_worker_stats[STARPU_NMAXWORKERS]} (optional)
 @item @code{unsigned long per_worker_stats[STARPU_NMAXWORKERS]} (optional)
 Statistics collected at runtime: this is filled by StarPU and should not be
 Statistics collected at runtime: this is filled by StarPU and should not be
 accessed directly, but for example by calling the
 accessed directly, but for example by calling the
-@code{starpu_display_codelet_stats} function (See
-@ref{starpu_display_codelet_stats} for details).
+@code{starpu_codelet_display_stats} function (See
+@ref{starpu_codelet_display_stats} for details).
 
 
 @item @code{const char *name} (optional)
 @item @code{const char *name} (optional)
 Define the name of the codelet. This can be useful for debugging purposes.
 Define the name of the codelet. This can be useful for debugging purposes.
@@ -1923,6 +1942,7 @@ Define the name of the codelet. This can be useful for debugging purposes.
 @end deftp
 @end deftp
 
 
 @deftypefun void starpu_codelet_init ({struct starpu_codelet} *@var{cl})
 @deftypefun void starpu_codelet_init ({struct starpu_codelet} *@var{cl})
+@anchor{starpu_codelet_init}
 Initialize @var{cl} with default values. Codelets should preferably be
 Initialize @var{cl} with default values. Codelets should preferably be
 initialized statically as shown in @ref{Defining a Codelet}. However
 initialized statically as shown in @ref{Defining a Codelet}. However
 such a initialisation is not always possible, e.g. when using C++.
 such a initialisation is not always possible, e.g. when using C++.
@@ -1983,10 +2003,25 @@ of entries in this array must be specified in the @code{nbuffers} field of the
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 If unsufficient, this value can be set with the @code{--enable-maxbuffers}
 option when configuring StarPU.
 option when configuring StarPU.
 
 
+@item @code{starpu_data_handle_t *dyn_handles}
+Is an array of @code{starpu_data_handle_t}. It specifies the handles
+to the different pieces of data accessed by the task. The number
+of entries in this array must be specified in the @code{nbuffers} field of the
+@code{struct starpu_codelet} structure.
+This field should be used for tasks having a number of datas
+greater than @code{STARPU_NMAXBUFS} (@pxref{Setting the Data Handles
+for a Task}).
+When defining a task, one should either define this field or the
+field @code{handles} defined above.
+
 @item @code{void *interfaces[STARPU_NMAXBUFS]}
 @item @code{void *interfaces[STARPU_NMAXBUFS]}
 The actual data pointers to the memory node where execution will happen, managed
 The actual data pointers to the memory node where execution will happen, managed
 by the DSM.
 by the DSM.
 
 
+@item @code{void **dyn_interfaces}
+The actual data pointers to the memory node where execution will happen, managed
+by the DSM. Is used when the field @code{dyn_handles} is defined.
+
 @item @code{void *cl_arg} (optional; default: @code{NULL})
 @item @code{void *cl_arg} (optional; default: @code{NULL})
 This pointer is passed to the codelet through the second argument
 This pointer is passed to the codelet through the second argument
 of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
 of the codelet implementation (e.g. @code{cpu_func} or @code{cuda_func}).
@@ -2134,6 +2169,37 @@ value. This is equivalent to initializing a starpu_task structure with
 the @code{starpu_task_init} function defined above.
 the @code{starpu_task_init} function defined above.
 @end defmac
 @end defmac
 
 
+@defmac STARPU_TASK_GET_HANDLE ({struct starpu_task} *@var{task}, int @var{i})
+Return the i-th data handle of the given task. If the task is defined
+with a static or dynamic number of handles, will either return the
+i-th element of the field @code{handles} or the i-th element of the field
+@code{dyn_handles} (@pxref{Setting the Data Handles for a Task})
+@end defmac
+
+@defmac STARPU_TASK_SET_HANDLE ({struct starpu_task} *@var{task}, starpu_data_handle_t @var{handle}, int @var{i})
+Set the i-th data handle of the given task with the given dat handle.
+If the task is defined with a static or dynamic number of handles,
+will either set the i-th element of the field @code{handles} or the
+i-th element of the field @code{dyn_handles} (@pxref{Setting the Data
+Handles for a Task})
+@end defmac
+
+@defmac STARPU_CODELET_GET_MODE ({struct starpu_codelet *}@var{codelet}, int @var{i})
+Return the access mode of the i-th data handle of the given codelet.
+If the codelet is defined with a static or dynamic number of handles,
+will either return the i-th element of the field @code{modes} or the
+i-th element of the field @code{dyn_modes} (@pxref{Setting the Data
+Handles for a Task})
+@end defmac
+
+@defmac STARPU_CODELET_SET_MODE ({struct starpu_codelet *}@var{codelet}codelet, {enum starpu_access_mode} @var{mode}, int @var{i})
+Set the access mode of the i-th data handle of the given codelet.
+If the codelet is defined with a static or dynamic number of handles,
+will either set the i-th element of the field @code{modes} or the
+i-th element of the field @code{dyn_modes} (@pxref{Setting the Data
+Handles for a Task})
+@end defmac
+
 @deftypefun {struct starpu_task *} starpu_task_create (void)
 @deftypefun {struct starpu_task *} starpu_task_create (void)
 Allocate a task structure and initialize it with default values. Tasks
 Allocate a task structure and initialize it with default values. Tasks
 allocated dynamically with @code{starpu_task_create} are automatically freed when the
 allocated dynamically with @code{starpu_task_create} are automatically freed when the
@@ -2145,6 +2211,10 @@ by the task have to be freed by calling
 @code{starpu_task_destroy}.
 @code{starpu_task_destroy}.
 @end deftypefun
 @end deftypefun
 
 
+@deftypefun {struct starpu_task *}starpu_task_dup ({struct starpu_task *}@var{task})
+Allocate a task structure which is the exact duplicate of the given task.
+@end deftypefun
+
 @deftypefun void starpu_task_clean ({struct starpu_task} *@var{task})
 @deftypefun void starpu_task_clean ({struct starpu_task} *@var{task})
 Release all the structures automatically allocated to execute @var{task}, but
 Release all the structures automatically allocated to execute @var{task}, but
 not the task structure itself and values set by the user remain unchanged.
 not the task structure itself and values set by the user remain unchanged.
@@ -2218,8 +2288,8 @@ NULL if it is called either from a thread that is not a task or simply
 because there is no task being executed at the moment.
 because there is no task being executed at the moment.
 @end deftypefun
 @end deftypefun
 
 
-@deftypefun void starpu_display_codelet_stats ({struct starpu_codelet} *@var{cl})
-@anchor{starpu_display_codelet_stats}
+@deftypefun void starpu_codelet_display_stats ({struct starpu_codelet} *@var{cl})
+@anchor{starpu_codelet_display_stats}
 Output on @code{stderr} some statistics on the codelet @var{cl}.
 Output on @code{stderr} some statistics on the codelet @var{cl}.
 @end deftypefun
 @end deftypefun
 
 
@@ -3650,6 +3720,11 @@ Get the description of a combined worker
 Variant of starpu_worker_can_execute_task compatible with combined workers
 Variant of starpu_worker_can_execute_task compatible with combined workers
 @end deftypefun
 @end deftypefun
 
 
+@deftypefun void starpu_parallel_task_barrier_init ({struct starpu_task* }@var{task}, int @var{best_workerid})
+Initialise the barrier for the parallel task, and dispatch the task
+between the different combined workers
+@end deftypefun
+
 @deftp {Data Type} {struct starpu_machine_topology}
 @deftp {Data Type} {struct starpu_machine_topology}
 @table @asis
 @table @asis
 @item @code{unsigned nworkers}
 @item @code{unsigned nworkers}
@@ -3776,10 +3851,6 @@ Delete the worker collection of the specified scheduling context
 Return the worker collection managed by the indicated context
 Return the worker collection managed by the indicated context
 @end deftypefun
 @end deftypefun
 
 
-@deftypefun pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex (unsigned @var{sched_ctx_id})
-TODO
-@end deftypefun
-
 @deftypefun void starpu_sched_ctx_set_context (unsigned *@var{sched_ctx_id})
 @deftypefun void starpu_sched_ctx_set_context (unsigned *@var{sched_ctx_id})
 Set the scheduling context the subsequent tasks will be submitted to
 Set the scheduling context the subsequent tasks will be submitted to
 @end deftypefun
 @end deftypefun

+ 2 - 1
doc/chapters/basic-examples.texi

@@ -140,7 +140,8 @@ struct starpu_codelet cl =
 A codelet is a structure that represents a computational kernel. Such a codelet
 A codelet is a structure that represents a computational kernel. Such a codelet
 may contain an implementation of the same kernel on different architectures
 may contain an implementation of the same kernel on different architectures
 (e.g. CUDA, x86, ...). For compatibility, make sure that the whole
 (e.g. CUDA, x86, ...). For compatibility, make sure that the whole
-structure is initialized to zero, either by using memset, or by letting the
+structure is properly initialized to zero, either by using the
+function starpu_codelet_init (@pxref{starpu_codelet_init}), or by letting the
 compiler implicitly do it as examplified above.
 compiler implicitly do it as examplified above.
 
 
 The @code{nbuffers} field specifies the number of data buffers that are
 The @code{nbuffers} field specifies the number of data buffers that are

+ 1 - 0
doc/chapters/configuration.texi

@@ -234,6 +234,7 @@ Enable gathering of various data statistics (@pxref{Data statistics}).
 @end defvr
 @end defvr
 
 
 @defvr {Configure option} --enable-maxbuffers
 @defvr {Configure option} --enable-maxbuffers
+@anchor{--enable-maxbuffers}
 Define the maximum number of buffers that tasks will be able to take
 Define the maximum number of buffers that tasks will be able to take
 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
 as parameters, then available as the @code{STARPU_NMAXBUFS} macro.
 @end defvr
 @end defvr

+ 4 - 3
doc/chapters/perf-optimization.texi

@@ -409,9 +409,10 @@ STARPU_BUS_STATS=1} and @code{export STARPU_WORKER_STATS=1} .
 
 
 Due to CUDA limitations, StarPU will have a hard time overlapping its own
 Due to CUDA limitations, StarPU will have a hard time overlapping its own
 communications and the codelet computations if the application does not use a
 communications and the codelet computations if the application does not use a
-dedicated CUDA stream for its computations. StarPU provides one by the use of
-@code{starpu_cuda_get_local_stream()} which should be used by all CUDA codelet
-operations. For instance:
+dedicated CUDA stream for its computations instead of the default stream,
+which synchronizes all operations of the GPU. StarPU provides one by the use
+of @code{starpu_cuda_get_local_stream()} which can be used by all CUDA codelet
+operations to avoid this issue. For instance:
 
 
 @cartouche
 @cartouche
 @smallexample
 @smallexample

+ 13 - 1
examples/Makefile.am

@@ -52,7 +52,6 @@ EXTRA_DIST = 					\
 	basic_examples/variable_kernels_opencl_kernel.cl	\
 	basic_examples/variable_kernels_opencl_kernel.cl	\
 	matvecmult/matvecmult_kernel.cl				\
 	matvecmult/matvecmult_kernel.cl				\
 	basic_examples/block_opencl_kernel.cl			\
 	basic_examples/block_opencl_kernel.cl			\
-	openmp/vector_scal.c			\
 	filters/fblock_opencl_kernel.cl		\
 	filters/fblock_opencl_kernel.cl		\
 	filters/custom_mf/conversion_opencl.cl  \
 	filters/custom_mf/conversion_opencl.cl  \
 	filters/custom_mf/custom_opencl.cl \
 	filters/custom_mf/custom_opencl.cl \
@@ -159,6 +158,7 @@ examplebin_PROGRAMS +=				\
 	basic_examples/block			\
 	basic_examples/block			\
 	basic_examples/variable			\
 	basic_examples/variable			\
 	basic_examples/multiformat              \
 	basic_examples/multiformat              \
+	basic_examples/dynamic_handles		\
 	cpp/incrementer_cpp			\
 	cpp/incrementer_cpp			\
 	filters/custom_mf/custom_mf_filter      \
 	filters/custom_mf/custom_mf_filter      \
 	filters/fvector				\
 	filters/fvector				\
@@ -876,6 +876,18 @@ pipeline_pipeline_LDADD =		\
 	$(STARPU_BLAS_LDFLAGS)
 	$(STARPU_BLAS_LDFLAGS)
 endif
 endif
 
 
+##################
+# openmp example #
+##################
+
+if !STARPU_HAVE_WINDOWS
+examplebin_PROGRAMS +=		\
+	openmp/vector_scal_omp
+
+openmp_vector_scal_omp_CFLAGS = \
+	$(AM_CFLAGS) -fopenmp
+endif
+
 showcheck:
 showcheck:
 	-cat $(TEST_LOGS) /dev/null
 	-cat $(TEST_LOGS) /dev/null
 	for i in $(SUBDIRS) ; do \
 	for i in $(SUBDIRS) ; do \

+ 150 - 0
examples/basic_examples/dynamic_handles.c

@@ -0,0 +1,150 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+static void dummy_small_kernel(void *descr[], void *cl_arg)
+{
+	int nb_data;
+	int i;
+
+	starpu_codelet_unpack_args(cl_arg, &nb_data);
+	assert(nb_data == 1);
+	FPRINTF(stderr, "Number of data: %d\n", nb_data);
+
+	for(i=0 ; i<nb_data; i++)
+	{
+		int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
+		assert(*val == 42);
+	}
+}
+
+static void dummy_big_kernel(void *descr[], void *cl_arg)
+{
+	int nb_data;
+	int i;
+
+	starpu_codelet_unpack_args(cl_arg, &nb_data);
+	assert(nb_data == 9);
+	FPRINTF(stderr, "Number of data: %d\n", nb_data);
+
+	for(i=0 ; i<nb_data; i++)
+	{
+		int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[i]);
+		assert(*val == 42);
+	}
+}
+
+static struct starpu_codelet dummy_small_cl =
+{
+	.cuda_funcs = {dummy_small_kernel, NULL},
+	.opencl_funcs = {dummy_small_kernel, NULL},
+	.cpu_funcs = {dummy_small_kernel, NULL},
+	.modes = {STARPU_RW},
+	.nbuffers = 1
+};
+
+struct starpu_codelet dummy_big_cl =
+{
+	.cuda_funcs = {dummy_big_kernel, NULL},
+	.opencl_funcs = {dummy_big_kernel, NULL},
+	.cpu_funcs = {dummy_big_kernel, NULL},
+	.nbuffers = STARPU_NMAXBUFS+1
+};
+
+int main(int argc, char **argv)
+{
+	starpu_data_handle_t handle, *handles;
+	int ret;
+	int val=42;
+	unsigned i;
+	struct starpu_task *task, *task2;
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return 77;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	dummy_big_cl.dyn_modes = malloc(dummy_big_cl.nbuffers * sizeof(enum starpu_access_mode));
+	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
+	     dummy_big_cl.dyn_modes[i] = STARPU_RW;
+
+	starpu_variable_data_register(&handle, 0, (uintptr_t)&val, sizeof(int));
+
+	task = starpu_task_create();
+	task->synchronous = 1;
+	task->cl = &dummy_small_cl;
+	starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size,
+				 STARPU_VALUE, &(task->cl->nbuffers), sizeof(task->cl->nbuffers),
+				 0);
+	task->dyn_handles = malloc(sizeof(starpu_data_handle_t));
+	task->dyn_handles[0] = handle;
+	ret = starpu_task_submit(task);
+	if (ret == -ENODEV) goto enodev;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+	task2 = starpu_task_create();
+	task2->synchronous = 1;
+	task2->cl = &dummy_big_cl;
+	starpu_codelet_pack_args(&task2->cl_arg, &task2->cl_arg_size,
+				 STARPU_VALUE, &task2->cl->nbuffers, sizeof(task2->cl->nbuffers),
+				 0);
+	task2->dyn_handles = malloc(task2->cl->nbuffers * sizeof(starpu_data_handle_t));
+	for(i=0 ; i<task2->cl->nbuffers ; i++)
+	{
+		task2->dyn_handles[i] = handle;
+	}
+	ret = starpu_task_submit(task2);
+	if (ret == -ENODEV) goto enodev;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+	ret = starpu_insert_task(&dummy_small_cl,
+				 STARPU_VALUE, &dummy_small_cl.nbuffers, sizeof(dummy_small_cl.nbuffers),
+				 STARPU_RW, handle,
+				 0);
+	if (ret == -ENODEV) goto enodev;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
+        ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+
+	handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t));
+	for(i=0 ; i<dummy_big_cl.nbuffers ; i++)
+	{
+		handles[i] = handle;
+	}
+	ret = starpu_insert_task(&dummy_big_cl,
+				 STARPU_VALUE, &dummy_big_cl.nbuffers, sizeof(dummy_big_cl.nbuffers),
+				 STARPU_DATA_ARRAY, handles, dummy_big_cl.nbuffers,
+				 0);
+	if (ret == -ENODEV) goto enodev;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
+        ret = starpu_task_wait_for_all();
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
+	free(handles);
+
+	starpu_data_unregister(handle);
+	free(dummy_big_cl.dyn_modes);
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+
+enodev:
+	starpu_data_unregister(handle);
+	free(dummy_big_cl.dyn_modes);
+	starpu_shutdown();
+	return 77;
+}

+ 21 - 14
examples/cholesky/cholesky.h

@@ -122,6 +122,7 @@ static unsigned check = 0;
 static unsigned bound = 0;
 static unsigned bound = 0;
 static unsigned bound_deps = 0;
 static unsigned bound_deps = 0;
 static unsigned bound_lp = 0;
 static unsigned bound_lp = 0;
+static unsigned bound_mps = 0;
 static unsigned with_ctxs = 0;
 static unsigned with_ctxs = 0;
 static unsigned with_noctxs = 0;
 static unsigned with_noctxs = 0;
 static unsigned chole1 = 0;
 static unsigned chole1 = 0;
@@ -150,77 +151,83 @@ static void __attribute__((unused)) parse_args(int argc, char **argv)
 		{
 		{
 			with_ctxs = 1;
 			with_ctxs = 1;
 			break;
 			break;
-		}
+		} else
 		if (strcmp(argv[i], "-with_noctxs") == 0) 
 		if (strcmp(argv[i], "-with_noctxs") == 0) 
 		{
 		{
 			with_noctxs = 1;
 			with_noctxs = 1;
 			break;
 			break;
-		}
+		} else
 		
 		
 		if (strcmp(argv[i], "-chole1") == 0) 
 		if (strcmp(argv[i], "-chole1") == 0) 
 		{
 		{
 			chole1 = 1;
 			chole1 = 1;
 			break;
 			break;
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-chole2") == 0) 
 		if (strcmp(argv[i], "-chole2") == 0) 
 		{
 		{
 			chole2 = 1;
 			chole2 = 1;
 			break;
 			break;
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-size") == 0)
 		if (strcmp(argv[i], "-size") == 0)
 		{
 		{
 		        char *argptr;
 		        char *argptr;
 			size = strtol(argv[++i], &argptr, 10);
 			size = strtol(argv[++i], &argptr, 10);
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-nblocks") == 0)
 		if (strcmp(argv[i], "-nblocks") == 0)
 		{
 		{
 		        char *argptr;
 		        char *argptr;
 			nblocks = strtol(argv[++i], &argptr, 10);
 			nblocks = strtol(argv[++i], &argptr, 10);
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-nbigblocks") == 0)
 		if (strcmp(argv[i], "-nbigblocks") == 0)
 		{
 		{
 		        char *argptr;
 		        char *argptr;
 			nbigblocks = strtol(argv[++i], &argptr, 10);
 			nbigblocks = strtol(argv[++i], &argptr, 10);
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-no-pin") == 0)
 		if (strcmp(argv[i], "-no-pin") == 0)
 		{
 		{
 			pinned = 0;
 			pinned = 0;
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-no-prio") == 0)
 		if (strcmp(argv[i], "-no-prio") == 0)
 		{
 		{
 			noprio = 1;
 			noprio = 1;
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-bound") == 0)
 		if (strcmp(argv[i], "-bound") == 0)
 		{
 		{
 			bound = 1;
 			bound = 1;
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-bound-lp") == 0)
 		if (strcmp(argv[i], "-bound-lp") == 0)
 		{
 		{
 			bound_lp = 1;
 			bound_lp = 1;
-		}
+		} else
+
+		if (strcmp(argv[i], "-bound-mps") == 0)
+		{
+			bound_mps = 1;
+		} else
 
 
 		if (strcmp(argv[i], "-bound-deps") == 0)
 		if (strcmp(argv[i], "-bound-deps") == 0)
 		{
 		{
 			bound_deps = 1;
 			bound_deps = 1;
-		}
+		} else
 
 
 		if (strcmp(argv[i], "-check") == 0)
 		if (strcmp(argv[i], "-check") == 0)
 		{
 		{
 			check = 1;
 			check = 1;
-		}
+		} else
 
 
-		if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0)
+		/* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0) */
 		{
 		{
 			fprintf(stderr,"usage : %s [-size size] [-nblocks nblocks] [-no-pin] [-no-prio] [-bound] [-bound-deps] [-bound-lp] [-check]\n", argv[0]);
 			fprintf(stderr,"usage : %s [-size size] [-nblocks nblocks] [-no-pin] [-no-prio] [-bound] [-bound-deps] [-bound-lp] [-check]\n", argv[0]);
 			fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks\n", size, size, nblocks, nblocks);
 			fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks\n", size, size, nblocks, nblocks);
+			exit(0);
 		}
 		}
 	}
 	}
 }
 }

+ 7 - 2
examples/cholesky/cholesky_implicit.c

@@ -89,7 +89,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 
 
 	start = starpu_timing_now();
 	start = starpu_timing_now();
 
 
-	if (bound)
+	if (bound || bound_lp || bound_mps)
 		starpu_bound_start(bound_deps, 0);
 		starpu_bound_start(bound_deps, 0);
 	/* create all the DAG nodes */
 	/* create all the DAG nodes */
 	for (k = 0; k < nblocks; k++)
 	for (k = 0; k < nblocks; k++)
@@ -140,7 +140,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 	}
 	}
 
 
 	starpu_task_wait_for_all();
 	starpu_task_wait_for_all();
-	if (bound)
+	if (bound || bound_lp || bound_mps)
 		starpu_bound_stop();
 		starpu_bound_stop();
 
 
 	end = starpu_timing_now();
 	end = starpu_timing_now();
@@ -162,6 +162,11 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 			FILE *f = fopen("cholesky.lp", "w");
 			FILE *f = fopen("cholesky.lp", "w");
 			starpu_bound_print_lp(f);
 			starpu_bound_print_lp(f);
 		}
 		}
+		if (bound_mps)
+		{
+			FILE *f = fopen("cholesky.mps", "w");
+			starpu_bound_print_mps(f);
+		}
 		if (bound)
 		if (bound)
 		{
 		{
 			double res;
 			double res;

+ 7 - 1
examples/openmp/vector_scal.c

@@ -25,7 +25,12 @@
 #include <stdio.h>
 #include <stdio.h>
 #include <limits.h>
 #include <limits.h>
 
 
+#ifdef STARPU_QUICK_CHECK
+#define	NX	2048
+#else
 #define	NX	2048000
 #define	NX	2048000
+#endif
+
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
 
 void scal_cpu_func(void *buffers[], void *_args)
 void scal_cpu_func(void *buffers[], void *_args)
@@ -94,7 +99,8 @@ int main(int argc, char **argv)
 
 
 	float factor = 1.001;
 	float factor = 1.001;
 
 
-	for (i = 0; i < 100; i++) {
+	for (i = 0; i < 100; i++)
+	{
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
 
 
 		task->cl = &cl;
 		task->cl = &cl;

+ 1 - 1
examples/pi/pi.c

@@ -198,7 +198,7 @@ int main(int argc, char **argv)
 	FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0);
 	FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0);
 	FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing));
 	FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing));
 
 
-	if (!getenv("STARPU_SSILENT")) starpu_display_codelet_stats(&pi_cl);
+	if (!getenv("STARPU_SSILENT")) starpu_codelet_display_stats(&pi_cl);
 
 
 	starpu_shutdown();
 	starpu_shutdown();
 
 

+ 2 - 0
include/starpu.h

@@ -153,6 +153,8 @@ int starpu_asynchronous_opencl_copy_disabled(void);
 void starpu_profiling_init();
 void starpu_profiling_init();
 void starpu_display_stats();
 void starpu_display_stats();
 
 
+void starpu_get_version(int *major, int *minor, int *release);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 3 - 0
include/starpu_config.h.in

@@ -20,6 +20,7 @@
 
 
 #undef STARPU_MAJOR_VERSION
 #undef STARPU_MAJOR_VERSION
 #undef STARPU_MINOR_VERSION
 #undef STARPU_MINOR_VERSION
+#undef STARPU_RELEASE_VERSION
 
 
 #undef STARPU_USE_CPU
 #undef STARPU_USE_CPU
 #undef STARPU_USE_CUDA
 #undef STARPU_USE_CUDA
@@ -113,4 +114,6 @@ struct timespec
 #undef STARPU_HAVE_RINTF
 #undef STARPU_HAVE_RINTF
 #undef STARPU_USE_TOP
 #undef STARPU_USE_TOP
 
 
+#undef STARPU_HAVE_HWLOC
+
 #endif
 #endif

+ 2 - 0
include/starpu_deprecated_api.h

@@ -88,6 +88,8 @@ typedef enum starpu_access_mode starpu_access_mode;
 #define starpu_depth_block_filter_func_block		starpu_block_filter_depth_block
 #define starpu_depth_block_filter_func_block		starpu_block_filter_depth_block
 #define starpu_depth_block_shadow_filter_func_block	starpu_block_filter_depth_block_shadow
 #define starpu_depth_block_shadow_filter_func_block	starpu_block_filter_depth_block_shadow
 
 
+#define starpu_display_codelet_stats		starpu_codelet_display_stats
+
 #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
 #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus

+ 0 - 3
include/starpu_sched_ctx.h

@@ -48,9 +48,6 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id);
 /* indicate which context whill inherit the resources of this context when he will be deleted */
 /* indicate which context whill inherit the resources of this context when he will be deleted */
 void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);
 void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor);
 
 
-/* mutex synchronising several simultaneous modifications of a context */
-starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
-
 /* indicate that the current thread is submitting only to the current context */
 /* indicate that the current thread is submitting only to the current context */
 void starpu_sched_ctx_set_context(unsigned *sched_ctx_id);
 void starpu_sched_ctx_set_context(unsigned *sched_ctx_id);
 
 

+ 26 - 4
include/starpu_task.h

@@ -96,6 +96,7 @@ struct starpu_codelet
 	unsigned nbuffers;
 	unsigned nbuffers;
 	/* which are the access modes for these buffers */
 	/* which are the access modes for these buffers */
 	enum starpu_access_mode modes[STARPU_NMAXBUFS];
 	enum starpu_access_mode modes[STARPU_NMAXBUFS];
+	enum starpu_access_mode *dyn_modes;
 
 
 	/* performance model of the codelet */
 	/* performance model of the codelet */
 	struct starpu_perfmodel *model;
 	struct starpu_perfmodel *model;
@@ -104,7 +105,7 @@ struct starpu_codelet
 	struct starpu_perfmodel *power_model;
 	struct starpu_perfmodel *power_model;
 
 
 	/* statistics collected at runtime: this is filled by StarPU and should
 	/* statistics collected at runtime: this is filled by StarPU and should
-	 * not be accessed directly (use the starpu_display_codelet_stats
+	 * not be accessed directly (use the starpu_codelet_display_stats
 	 * function instead for instance). */
 	 * function instead for instance). */
 	unsigned long per_worker_stats[STARPU_NMAXWORKERS];
 	unsigned long per_worker_stats[STARPU_NMAXWORKERS];
 
 
@@ -120,6 +121,9 @@ struct starpu_task
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
 	void *interfaces[STARPU_NMAXBUFS];
 	void *interfaces[STARPU_NMAXBUFS];
 
 
+	starpu_data_handle_t *dyn_handles;
+	void **dyn_interfaces;
+
 	/* arguments not managed by the DSM are given as a buffer */
 	/* arguments not managed by the DSM are given as a buffer */
 	void *cl_arg;
 	void *cl_arg;
 	/* in case the argument buffer has to be uploaded explicitely */
 	/* in case the argument buffer has to be uploaded explicitely */
@@ -240,9 +244,17 @@ struct starpu_task
 	.sched_ctx = 0,					\
 	.sched_ctx = 0,					\
 	.hypervisor_tag = 0,				\
 	.hypervisor_tag = 0,				\
 	.flops = 0.0,					\
 	.flops = 0.0,					\
-		.scheduled = 0				\
+	.scheduled = 0,					\
+	.dyn_handles = NULL,				\
+	.dyn_interfaces = NULL				\
 }
 }
 
 
+#define STARPU_TASK_GET_HANDLE(task, i) ((task->dyn_handles) ? task->dyn_handles[i] : task->handles[i])
+#define STARPU_TASK_SET_HANDLE(task, handle, i) do { if (task->dyn_handles) task->dyn_handles[i] = handle; else task->handles[i] = handle; } while(0)
+
+#define STARPU_CODELET_GET_MODE(codelet, i) ((codelet->dyn_modes) ? codelet->dyn_modes[i] : codelet->modes[i])
+#define STARPU_CODELET_SET_MODE(codelet, mode, i) do { if (codelet->dyn_modes) codelet->dyn_modes[i] = mode; else codelet->modes[i] = mode; } while(0)
+
 /*
 /*
  * handle task dependencies: it is possible to associate a task with a unique
  * handle task dependencies: it is possible to associate a task with a unique
  * "tag" and to express dependencies between tasks by the means of those tags
  * "tag" and to express dependencies between tasks by the means of those tags
@@ -317,10 +329,13 @@ int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id);
  * indicates that the waited task was either synchronous or detached. */
  * indicates that the waited task was either synchronous or detached. */
 int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
 int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT;
 
 
-/* This function waits until all the tasks that were already submitted have
+/* This function waits until all the tasks that were already submitted 
+ * (to the current context or the global one if there aren't any) have
  * been executed. */
  * been executed. */
 int starpu_task_wait_for_all(void);
 int starpu_task_wait_for_all(void);
 
 
+/* This function waits until all the tasks that were already submitted to the 
+ * context have been executed */
 int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id);
 int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id);
 
 
 /* This function waits until there is no more ready task. */
 /* This function waits until there is no more ready task. */
@@ -331,13 +346,20 @@ int starpu_task_nsubmitted(void);
 
 
 void starpu_codelet_init(struct starpu_codelet *cl);
 void starpu_codelet_init(struct starpu_codelet *cl);
 
 
-void starpu_display_codelet_stats(struct starpu_codelet *cl);
+void starpu_codelet_display_stats(struct starpu_codelet *cl);
 
 
 /* Return the task currently executed by the worker, or NULL if this is called
 /* Return the task currently executed by the worker, or NULL if this is called
  * either from a thread that is not a task or simply because there is no task
  * either from a thread that is not a task or simply because there is no task
  * being executed at the moment. */
  * being executed at the moment. */
 struct starpu_task *starpu_task_get_current(void);
 struct starpu_task *starpu_task_get_current(void);
 
 
+/* initialise the barrier for the parallel task, st all workers start it 
+ * at the same time */
+void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid);
+
+/* duplicate the given task */
+struct starpu_task *starpu_task_dup(struct starpu_task *task);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 2 - 2
include/starpu_task_util.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -57,7 +57,7 @@ void starpu_codelet_unpack_args(void *cl_arg, ...);
 
 
 /* Pack arguments of type STARPU_VALUE into a buffer which can be
 /* Pack arguments of type STARPU_VALUE into a buffer which can be
  * given to a codelet and later unpacked with starpu_codelet_unpack_args */
  * given to a codelet and later unpacked with starpu_codelet_unpack_args */
-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...);
+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }

+ 8 - 0
include/starpu_top.h

@@ -195,6 +195,14 @@ void starpu_top_update_data_float(const struct starpu_top_data* data,
 				  double value);
 				  double value);
 
 
 /*
 /*
+ * This function notifies UI than the task have been planed to
+ * run from start to end, on computation-core
+ */
+void starpu_top_task_prevision(struct starpu_task *task,
+			       int devid, unsigned long long start,
+			       unsigned long long end);
+
+/*
  * This functions are usefull in debug mode. The starpu developper doesn't need
  * This functions are usefull in debug mode. The starpu developper doesn't need
  * to check if the debug mode is active.
  * to check if the debug mode is active.
  * This is checked by starpu_top itsefl.
  * This is checked by starpu_top itsefl.

+ 0 - 1
include/starpu_worker.h

@@ -123,7 +123,6 @@ int starpu_combined_worker_get_id(void);
 int starpu_combined_worker_get_size(void);
 int starpu_combined_worker_get_size(void);
 int starpu_combined_worker_get_rank(void);
 int starpu_combined_worker_get_rank(void);
 
 
-
 /* This function returns the type of worker associated to an identifier (as
 /* This function returns the type of worker associated to an identifier (as
  * returned by the starpu_worker_get_id function). The returned value indicates
  * returned by the starpu_worker_get_id function). The returned value indicates
  * the architecture of the worker: STARPU_CPU_WORKER for a CPU core,
  * the architecture of the worker: STARPU_CPU_WORKER for a CPU core,

+ 11 - 6
mpi/src/starpu_mpi_insert_task.c

@@ -24,6 +24,7 @@
 #include <common/uthash.h>
 #include <common/uthash.h>
 #include <util/starpu_insert_task_utils.h>
 #include <util/starpu_insert_task_utils.h>
 #include <datawizard/coherency.h>
 #include <datawizard/coherency.h>
+#include <core/task.h>
 
 
 #include <starpu_mpi_private.h>
 #include <starpu_mpi_private.h>
 
 
@@ -369,7 +370,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 	int me, do_execute, xrank, nb_nodes;
 	int me, do_execute, xrank, nb_nodes;
 	size_t *size_on_nodes;
 	size_t *size_on_nodes;
 	size_t arg_buffer_size = 0;
 	size_t arg_buffer_size = 0;
-	char *arg_buffer = NULL;
+	void *arg_buffer = NULL;
 	int dest=0, inconsistent_execute;
 	int dest=0, inconsistent_execute;
 	int current_data = 0;
 	int current_data = 0;
 
 
@@ -420,7 +421,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 			int i;
 			int i;
 			for(i=0 ; i<nb_handles ; i++)
 			for(i=0 ; i<nb_handles ; i++)
 			{
 			{
-				enum starpu_access_mode mode = codelet->modes[current_data];
+				enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(codelet, current_data);
 				int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, &do_execute, &inconsistent_execute, &dest, size_on_nodes);
 				int ret = _starpu_mpi_find_executee_node(datas[i], mode, me, &do_execute, &inconsistent_execute, &dest, size_on_nodes);
 				if (ret == -EINVAL)
 				if (ret == -EINVAL)
 				{
 				{
@@ -531,7 +532,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
 
 			for(i=0 ; i<nb_handles ; i++)
 			for(i=0 ; i<nb_handles ; i++)
 			{
 			{
-				_starpu_mpi_exchange_data_before_execution(datas[i], codelet->modes[current_data], me, dest, do_execute, comm);
+				_starpu_mpi_exchange_data_before_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, dest, do_execute, comm);
 				current_data++;
 				current_data++;
 			}
 			}
 		}
 		}
@@ -590,12 +591,16 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 		if (arg_buffer_size)
 		if (arg_buffer_size)
 		{
 		{
 			va_start(varg_list, codelet);
 			va_start(varg_list, codelet);
-			_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
+			_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list);
 		}
 		}
 
 
 		_STARPU_MPI_DEBUG(1, "Execution of the codelet %p (%s)\n", codelet, codelet->name);
 		_STARPU_MPI_DEBUG(1, "Execution of the codelet %p (%s)\n", codelet, codelet->name);
 		va_start(varg_list, codelet);
 		va_start(varg_list, codelet);
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
+		if (codelet->nbuffers > STARPU_NMAXBUFS)
+		{
+			task->dyn_handles = malloc(codelet->nbuffers * sizeof(starpu_data_handle_t));
+		}
 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
 		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
 		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
 	}
 	}
@@ -622,7 +627,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
 
 				for(i=0 ; i<nb_handles ; i++)
 				for(i=0 ; i<nb_handles ; i++)
 				{
 				{
-					_starpu_mpi_exchange_data_after_execution(datas[i], codelet->modes[current_data], me, xrank, dest, do_execute, comm);
+					_starpu_mpi_exchange_data_after_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, xrank, dest, do_execute, comm);
 					current_data++;
 					current_data++;
 				}
 				}
 			}
 			}
@@ -692,7 +697,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 
 
 			for(i=0 ; i<nb_handles ; i++)
 			for(i=0 ; i<nb_handles ; i++)
 			{
 			{
-				_starpu_mpi_clear_data_after_execution(datas[i], codelet->modes[current_data], me, do_execute, comm);
+				_starpu_mpi_clear_data_after_execution(datas[i], STARPU_CODELET_GET_MODE(codelet, current_data), me, do_execute, comm);
 				current_data++;
 				current_data++;
 			}
 			}
 		}
 		}

+ 0 - 1
src/Makefile.am

@@ -73,7 +73,6 @@ noinst_HEADERS = 						\
 	core/debug.h						\
 	core/debug.h						\
 	core/errorcheck.h					\
 	core/errorcheck.h					\
 	core/combined_workers.h					\
 	core/combined_workers.h					\
-	core/parallel_task.h					\
 	core/simgrid.h						\
 	core/simgrid.h						\
 	core/task_bundle.h					\
 	core/task_bundle.h					\
 	sched_policies/detect_combined_workers.h		\
 	sched_policies/detect_combined_workers.h		\

+ 0 - 10
src/common/thread.h

@@ -70,16 +70,6 @@
 	}                                                                      \
 	}                                                                      \
 } while (0)
 } while (0)
 
 
-#define _STARPU_PTHREAD_MUTEX_TRYLOCK(mutex) do {                              \
-	int p_ret = starpu_pthread_mutex_trylock(mutex);                       \
-	if (STARPU_UNLIKELY(p_ret)) {                                          \
-		fprintf(stderr,                                                \
-			"%s:%d starpu_pthread_mutex_trylock: %s\n",            \
-			__FILE__, __LINE__, strerror(p_ret));                  \
-		STARPU_ABORT();                                                \
-	}                                                                      \
-} while (0)
-
 #define _STARPU_PTHREAD_MUTEX_UNLOCK(mutex) do {                               \
 #define _STARPU_PTHREAD_MUTEX_UNLOCK(mutex) do {                               \
 	int p_ret = starpu_pthread_mutex_unlock(mutex);                        \
 	int p_ret = starpu_pthread_mutex_unlock(mutex);                        \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \
 	if (STARPU_UNLIKELY(p_ret)) {                                          \

+ 1 - 0
src/core/combined_workers.c

@@ -162,3 +162,4 @@ int starpu_combined_worker_get_description(int workerid, int *worker_size, int *
 
 
 	return 0;
 	return 0;
 }
 }
+

+ 18 - 11
src/core/dependencies/data_concurrency.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2012  Université de Bordeaux 1
  * Copyright (C) 2010-2012  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -192,8 +192,8 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 {
 {
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	/* Note that we do not access j->task->handles, but j->ordered_buffers
 	 * which is a sorted copy of it. */
 	 * which is a sorted copy of it. */
-	starpu_data_handle_t handle = j->ordered_buffers[buffer_index].handle;
-	enum starpu_access_mode mode = j->ordered_buffers[buffer_index].mode;
+	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
+	enum starpu_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
 
 
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
 }
 }
@@ -205,11 +205,16 @@ static unsigned _submit_job_enforce_data_deps(struct _starpu_job *j, unsigned st
 	unsigned nbuffers = j->task->cl->nbuffers;
 	unsigned nbuffers = j->task->cl->nbuffers;
 	for (buf = start_buffer_index; buf < nbuffers; buf++)
 	for (buf = start_buffer_index; buf < nbuffers; buf++)
 	{
 	{
-		if (buf && j->ordered_buffers[buf-1].handle == j->ordered_buffers[buf].handle)
-			/* We have already requested this data, skip it. This
-			 * depends on ordering putting writes before reads, see
-			 * _starpu_compar_handles.  */
-			continue;
+		if (buf)
+		{
+			starpu_data_handle_t handle_m1 = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf-1);
+			starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf);
+			if (handle_m1 == handle)
+				/* We have already requested this data, skip it. This
+				 * depends on ordering putting writes before reads, see
+				 * _starpu_compar_handles.  */
+				continue;
+		}
 
 
                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
                 j->task->status = STARPU_TASK_BLOCKED_ON_DATA;
                 if (attempt_to_submit_data_request_from_job(j, buf))
                 if (attempt_to_submit_data_request_from_job(j, buf))
@@ -238,11 +243,13 @@ unsigned _starpu_submit_job_enforce_data_deps(struct _starpu_job *j)
 	unsigned i;
 	unsigned i;
 	for (i=0 ; i<cl->nbuffers ; i++)
 	for (i=0 ; i<cl->nbuffers ; i++)
 	{
 	{
-		j->ordered_buffers[i].handle = j->task->handles[i];
-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 	}
 	}
 
 
-	_starpu_sort_task_handles(j->ordered_buffers, cl->nbuffers);
+	_starpu_sort_task_handles(_STARPU_JOB_GET_ORDERED_BUFFERS(j), cl->nbuffers);
 
 
 	return _submit_job_enforce_data_deps(j, 0);
 	return _submit_job_enforce_data_deps(j, 0);
 }
 }

+ 3 - 3
src/core/dependencies/implicit_data_deps.c

@@ -336,8 +336,8 @@ void _starpu_detect_implicit_data_deps(struct starpu_task *task)
 	unsigned buffer;
 	unsigned buffer;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[buffer];
-		enum starpu_access_mode mode = task->cl->modes[buffer];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
 		struct starpu_task *new_task;
 		struct starpu_task *new_task;
 
 
 		/* Scratch memory does not introduce any deps */
 		/* Scratch memory does not introduce any deps */
@@ -457,7 +457,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
 void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j)
 {
 {
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 
 
 	if (!task->cl)
 	if (!task->cl)
 		return;
 		return;

+ 14 - 3
src/core/jobs.c

@@ -52,6 +52,9 @@ struct _starpu_job* __attribute__((malloc)) _starpu_job_create(struct starpu_tas
 	 * everywhere */
 	 * everywhere */
 	memset(job, 0, sizeof(*job));
 	memset(job, 0, sizeof(*job));
 
 
+	if (task->dyn_handles)
+	     job->dyn_ordered_buffers = malloc(task->cl->nbuffers * sizeof(struct starpu_buffer_descr));
+
 	job->task = task;
 	job->task = task;
 
 
 #ifndef STARPU_USE_FXT
 #ifndef STARPU_USE_FXT
@@ -104,6 +107,11 @@ void _starpu_job_destroy(struct _starpu_job *j)
 	}
 	}
 
 
 	_starpu_cg_list_deinit(&j->job_successors);
 	_starpu_cg_list_deinit(&j->job_successors);
+	if (j->dyn_ordered_buffers)
+	{
+	     free(j->dyn_ordered_buffers);
+	     j->dyn_ordered_buffers = NULL;
+	}
 
 
 	_starpu_job_delete(j);
 	_starpu_job_delete(j);
 }
 }
@@ -149,8 +157,11 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	int i;
 	int i;
 	size_t data_size = 0;
 	size_t data_size = 0;
 	for(i = 0; i < STARPU_NMAXBUFS; i++)
 	for(i = 0; i < STARPU_NMAXBUFS; i++)
-		if(task->handles[i] != NULL)
-			data_size += _starpu_data_get_size(task->handles[i]);
+	{
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
+		if (handle != NULL)
+			data_size += _starpu_data_get_size(handle);
+	}
 #endif //STARPU_USE_SC_HYPERVISOR
 #endif //STARPU_USE_SC_HYPERVISOR
 
 
 	/* We release handle reference count */
 	/* We release handle reference count */
@@ -159,7 +170,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		unsigned i;
 		unsigned i;
 		for (i=0; i<task->cl->nbuffers; i++)
 		for (i=0; i<task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);
 			_starpu_spin_lock(&handle->header_lock);
 			handle->busy_count--;
 			handle->busy_count--;
 			if (!_starpu_data_check_not_busy(handle))
 			if (!_starpu_data_check_not_busy(handle))

+ 10 - 0
src/core/jobs.h

@@ -70,6 +70,7 @@ LIST_TYPE(_starpu_job,
 	 * the task so that we always grab the rw-lock associated to the
 	 * the task so that we always grab the rw-lock associated to the
 	 * handles in the same order. */
 	 * handles in the same order. */
 	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
 	struct starpu_buffer_descr ordered_buffers[STARPU_NMAXBUFS];
+	struct starpu_buffer_descr *dyn_ordered_buffers;
 
 
 	/* If a tag is associated to the job, this points to the internal data
 	/* If a tag is associated to the job, this points to the internal data
 	 * structure that describes the tag status. */
 	 * structure that describes the tag status. */
@@ -172,4 +173,13 @@ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker);
  * enforce a FIFO ordering. */
  * enforce a FIFO ordering. */
 int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
 int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task, int back);
 
 
+#define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle)
+#define _STARPU_JOB_GET_ORDERED_BUFFER_MODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].mode : job->ordered_buffers[i].mode)
+
+#define _STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(job, handle, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].handle = (handle); else job->ordered_buffers[i].handle = (handle);} while(0)
+#define _STARPU_JOB_SET_ORDERED_BUFFER_MODE(job, mode, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i].mode = mode; else job->ordered_buffers[i].mode = mode;} while(0)
+
+#define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0)
+#define _STARPU_JOB_GET_ORDERED_BUFFERS(job) (job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : job->ordered_buffers
+
 #endif // __JOBS_H__
 #endif // __JOBS_H__

+ 25 - 2
src/core/parallel_task.c

@@ -19,15 +19,38 @@
 #include <core/jobs.h>
 #include <core/jobs.h>
 #include <core/task.h>
 #include <core/task.h>
 #include <common/utils.h>
 #include <common/utils.h>
+#include <core/workers.h>
+#include <common/barrier.h>
 
 
-struct starpu_task *_starpu_create_task_alias(struct starpu_task *task)
+struct starpu_task *starpu_task_dup(struct starpu_task *task)
 {
 {
 	struct starpu_task *task_dup = (struct starpu_task *) malloc(sizeof(struct starpu_task));
 	struct starpu_task *task_dup = (struct starpu_task *) malloc(sizeof(struct starpu_task));
 	STARPU_ASSERT(task_dup);
 	STARPU_ASSERT(task_dup);
 
 
-	/* XXX perhaps this is a bit too much overhead and we should only copy
+	/* TODO perhaps this is a bit too much overhead and we should only copy
 	 * part of the structure ? */
 	 * part of the structure ? */
 	memcpy(task_dup, task, sizeof(struct starpu_task));
 	memcpy(task_dup, task, sizeof(struct starpu_task));
 
 
 	return task_dup;
 	return task_dup;
 }
 }
+
+void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid)
+{
+	/* The master needs to dispatch the task between the
+	 * different combined workers */
+	struct _starpu_combined_worker *combined_worker =  _starpu_get_combined_worker_struct(workerid);
+	int worker_size = combined_worker->worker_size;
+
+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
+	j->task_size = worker_size;
+	j->combined_workerid = workerid;
+	j->active_task_alias_count = 0;
+
+	//fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size);
+
+	_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
+	_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
+
+	return;
+}
+

+ 0 - 24
src/core/parallel_task.h

@@ -1,24 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __PARALLEL_TASK_H__
-#define __PARALLEL_TASK_H__
-
-#include <starpu.h>
-
-struct starpu_task *_starpu_create_task_alias(struct starpu_task *task);
-
-#endif /* __PARALLEL_TASK_H__ */

+ 5 - 5
src/core/perfmodel/perfmodel.c

@@ -227,7 +227,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 		starpu_data_handle_t handle;
 		starpu_data_handle_t handle;
 		struct starpu_task *conversion_task;
 		struct starpu_task *conversion_task;
 
 
-		handle = task->handles[i];
+		handle = STARPU_TASK_GET_HANDLE(task, i);
 		if (!_starpu_data_is_multiformat_handle(handle))
 		if (!_starpu_data_is_multiformat_handle(handle))
 			continue;
 			continue;
 
 
@@ -287,8 +287,8 @@ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct star
 
 
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[buffer];
-		enum starpu_access_mode mode = task->cl->modes[buffer];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, buffer);
 
 
 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
 		penalty += starpu_data_expected_transfer_time(handle, memory_node, mode);
 	}
 	}
@@ -375,8 +375,8 @@ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundl
 			unsigned b;
 			unsigned b;
 			for (b = 0; b < task->cl->nbuffers; b++)
 			for (b = 0; b < task->cl->nbuffers; b++)
 			{
 			{
-				starpu_data_handle_t handle = task->handles[b];
-				enum starpu_access_mode mode = task->cl->modes[b];
+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, b);
+				enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, b);
 
 
 				if (!(mode & STARPU_R))
 				if (!(mode & STARPU_R))
 					continue;
 					continue;

+ 2 - 2
src/core/perfmodel/perfmodel_history.c

@@ -72,7 +72,7 @@ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, enum starpu_per
 		unsigned buffer;
 		unsigned buffer;
 		for (buffer = 0; buffer < nbuffers; buffer++)
 		for (buffer = 0; buffer < nbuffers; buffer++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[buffer];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
 			size += _starpu_data_get_size(handle);
 			size += _starpu_data_get_size(handle);
 		}
 		}
 		return size;
 		return size;
@@ -1267,7 +1267,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 
 
 		for (i = 0; i < task->cl->nbuffers; i++)
 		for (i = 0; i < task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 
 
 			STARPU_ASSERT(handle->ops);
 			STARPU_ASSERT(handle->ops);
 			STARPU_ASSERT(handle->ops->display);
 			STARPU_ASSERT(handle->ops->display);

+ 1 - 1
src/core/sched_ctx.c

@@ -885,7 +885,7 @@ int starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu
 	return npus;
 	return npus;
 }
 }
 
 
-starpu_pthread_mutex_t* starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id)
+starpu_pthread_mutex_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id)
 {
 {
 	return &changing_ctx_mutex[sched_ctx_id];
 	return &changing_ctx_mutex[sched_ctx_id];
 }
 }

+ 3 - 0
src/core/sched_ctx.h

@@ -144,6 +144,9 @@ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker
 /* Check if the worker belongs to another sched_ctx */
 /* Check if the worker belongs to another sched_ctx */
 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
 unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id);
 
 
+/* mutex synchronising several simultaneous modifications of a context */
+starpu_pthread_mutex_t* _starpu_sched_ctx_get_changing_ctx_mutex(unsigned sched_ctx_id);
+
 #ifdef STARPU_USE_SC_HYPERVISOR
 #ifdef STARPU_USE_SC_HYPERVISOR
 /* Notifies the hypervisor that a tasks was poped from the workers' list */
 /* Notifies the hypervisor that a tasks was poped from the workers' list */
 void _starpu_sched_ctx_call_poped_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);
 void _starpu_sched_ctx_call_poped_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint);

+ 16 - 8
src/core/sched_policy.c

@@ -23,7 +23,6 @@
 #include <profiling/profiling.h>
 #include <profiling/profiling.h>
 #include <common/barrier.h>
 #include <common/barrier.h>
 #include <core/debug.h>
 #include <core/debug.h>
-#include <core/parallel_task.h>
 
 
 static int use_prefetch = 0;
 static int use_prefetch = 0;
 
 
@@ -236,7 +235,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 				struct starpu_task *conversion_task;
 				struct starpu_task *conversion_task;
 				starpu_data_handle_t handle;
 				starpu_data_handle_t handle;
 
 
-				handle = task->handles[i];
+				handle = STARPU_TASK_GET_HANDLE(task, i);
 				if (!_starpu_handle_needs_conversion_task(handle, node))
 				if (!_starpu_handle_needs_conversion_task(handle, node))
 					continue;
 					continue;
 
 
@@ -249,7 +248,10 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 			}
 			}
 
 
 			for (i = 0; i < task->cl->nbuffers; i++)
 			for (i = 0; i < task->cl->nbuffers; i++)
-				task->handles[i]->mf_node = node;
+			{
+				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
+				handle->mf_node = node;
+			}
 		}
 		}
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 //		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)
 
 
@@ -281,7 +283,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo
 		int j;
 		int j;
 		for (j = 0; j < worker_size; j++)
 		for (j = 0; j < worker_size; j++)
 		{
 		{
-			struct starpu_task *alias = _starpu_create_task_alias(task);
+			struct starpu_task *alias = starpu_task_dup(task);
 
 
 			worker = _starpu_get_worker_struct(combined_workerid[j]);
 			worker = _starpu_get_worker_struct(combined_workerid[j]);
 			ret |= _starpu_push_local_task(worker, alias, 0);
 			ret |= _starpu_push_local_task(worker, alias, 0);
@@ -396,7 +398,13 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 	else
 	else
 	{
 	{
 		STARPU_ASSERT(sched_ctx->sched_policy->push_task);
 		STARPU_ASSERT(sched_ctx->sched_policy->push_task);
-		ret = sched_ctx->sched_policy->push_task(task);
+		/* check out if there are any workers in the context */
+		starpu_pthread_mutex_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
+		_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
+		nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id);
+		ret = nworkers == 0 ? -1 : sched_ctx->sched_policy->push_task(task);
+		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
+
 		if(ret == -1)
 		if(ret == -1)
 		{
 		{
 			fprintf(stderr, "repush task \n");
 			fprintf(stderr, "repush task \n");
@@ -441,7 +449,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
 
 	conversion_task = starpu_task_create();
 	conversion_task = starpu_task_create();
 	conversion_task->synchronous = 0;
 	conversion_task->synchronous = 0;
-	conversion_task->handles[0] = handle;
+	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
 
 
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 	/* The node does not really matter here */
 	/* The node does not really matter here */
@@ -504,7 +512,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 		STARPU_ABORT();
 		STARPU_ABORT();
 	}
 	}
 
 
-	conversion_task->cl->modes[0] = STARPU_RW;
+	STARPU_CODELET_SET_MODE(conversion_task->cl, STARPU_RW, 0);
 	return conversion_task;
 	return conversion_task;
 }
 }
 
 
@@ -657,7 +665,7 @@ pick:
 		struct starpu_task *conversion_task;
 		struct starpu_task *conversion_task;
 		starpu_data_handle_t handle;
 		starpu_data_handle_t handle;
 
 
-		handle = task->handles[i];
+		handle = STARPU_TASK_GET_HANDLE(task, i);
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 			continue;
 			continue;
 		conversion_task = _starpu_create_conversion_task(handle, node);
 		conversion_task = _starpu_create_conversion_task(handle, node);

+ 35 - 11
src/core/task.c

@@ -77,6 +77,11 @@ void starpu_task_init(struct starpu_task *task)
 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
 	task->sched_ctx = _starpu_get_initial_sched_ctx()->id;
 
 
 	task->flops = 0.0;
 	task->flops = 0.0;
+
+	task->scheduled = 0;
+
+	task->dyn_handles = NULL;
+	task->dyn_interfaces = NULL;
 }
 }
 
 
 /* Free all the ressources allocated for a task, without deallocating the task
 /* Free all the ressources allocated for a task, without deallocating the task
@@ -99,6 +104,14 @@ void starpu_task_clean(struct starpu_task *task)
 	if (bundle)
 	if (bundle)
 		starpu_task_bundle_remove(bundle, task);
 		starpu_task_bundle_remove(bundle, task);
 
 
+	if (task->dyn_handles)
+	{
+		free(task->dyn_handles);
+		task->dyn_handles = NULL;
+		free(task->dyn_interfaces);
+		task->dyn_interfaces = NULL;
+	}
+
 	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
 	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
 
 
 	if (j)
 	if (j)
@@ -229,7 +242,7 @@ int _starpu_submit_job(struct _starpu_job *j)
 		unsigned i;
 		unsigned i;
 		for (i=0; i<task->cl->nbuffers; i++)
 		for (i=0; i<task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			_starpu_spin_lock(&handle->header_lock);
 			_starpu_spin_lock(&handle->header_lock);
 			handle->busy_count++;
 			handle->busy_count++;
 			_starpu_spin_unlock(&handle->header_lock);
 			_starpu_spin_unlock(&handle->header_lock);
@@ -393,16 +406,23 @@ int starpu_task_submit(struct starpu_task *task)
 		unsigned i;
 		unsigned i;
 
 
 		/* Check buffers */
 		/* Check buffers */
-		STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
+		if (task->dyn_handles == NULL)
+			STARPU_ASSERT_MSG(task->cl->nbuffers <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d)", task->cl, task->cl->nbuffers, STARPU_NMAXBUFS);
+
+		if (task->dyn_handles)
+		{
+			task->dyn_interfaces = malloc(task->cl->nbuffers * sizeof(void *));
+		}
+
 		for (i = 0; i < task->cl->nbuffers; i++)
 		for (i = 0; i < task->cl->nbuffers; i++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[i];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 			/* Make sure handles are not partitioned */
 			/* Make sure handles are not partitioned */
 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
 			STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data can be used in a task");
 			/* Provide the home interface for now if any,
 			/* Provide the home interface for now if any,
 			 * for can_execute hooks */
 			 * for can_execute hooks */
 			if (handle->home_node != -1)
 			if (handle->home_node != -1)
-				task->interfaces[i] = starpu_data_get_interface_on_node(task->handles[i], handle->home_node);
+				_STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i);
 		}
 		}
 
 
 		/* Check the type of worker(s) required by the task exist */
 		/* Check the type of worker(s) required by the task exist */
@@ -526,8 +546,10 @@ int _starpu_task_submit_nodeps(struct starpu_task *task)
 		unsigned i;
 		unsigned i;
 		for (i=0 ; i<task->cl->nbuffers ; i++)
 		for (i=0 ; i<task->cl->nbuffers ; i++)
 		{
 		{
-			j->ordered_buffers[i].handle = j->task->handles[i];
-			j->ordered_buffers[i].mode = j->task->cl->modes[i];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
+			_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
+			enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
+			_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 		}
 		}
 	}
 	}
 
 
@@ -559,7 +581,7 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 	unsigned i;
 	unsigned i;
 	for (i=0; i<task->cl->nbuffers; i++)
 	for (i=0; i<task->cl->nbuffers; i++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[i];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
 		_starpu_spin_lock(&handle->header_lock);
 		_starpu_spin_lock(&handle->header_lock);
 		handle->busy_count++;
 		handle->busy_count++;
 		_starpu_spin_unlock(&handle->header_lock);
 		_starpu_spin_unlock(&handle->header_lock);
@@ -574,8 +596,10 @@ int _starpu_task_submit_conversion_task(struct starpu_task *task,
 
 
 	for (i=0 ; i<task->cl->nbuffers ; i++)
 	for (i=0 ; i<task->cl->nbuffers ; i++)
 	{
 	{
-		j->ordered_buffers[i].handle = j->task->handles[i];
-		j->ordered_buffers[i].mode = j->task->cl->modes[i];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_HANDLE(j, handle, i);
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(j->task->cl, i);
+		_STARPU_JOB_SET_ORDERED_BUFFER_MODE(j, mode, i);
 	}
 	}
 
 
         _STARPU_LOG_IN();
         _STARPU_LOG_IN();
@@ -604,7 +628,7 @@ void starpu_codelet_init(struct starpu_codelet *cl)
 	memset(cl, 0, sizeof(struct starpu_codelet));
 	memset(cl, 0, sizeof(struct starpu_codelet));
 }
 }
 
 
-void starpu_display_codelet_stats(struct starpu_codelet *cl)
+void starpu_codelet_display_stats(struct starpu_codelet *cl)
 {
 {
 	unsigned worker;
 	unsigned worker;
 	unsigned nworkers = starpu_worker_get_count();
 	unsigned nworkers = starpu_worker_get_count();
@@ -811,7 +835,7 @@ _starpu_task_uses_multiformat_handles(struct starpu_task *task)
 	unsigned i;
 	unsigned i;
 	for (i = 0; i < task->cl->nbuffers; i++)
 	for (i = 0; i < task->cl->nbuffers; i++)
 	{
 	{
-		if (_starpu_data_is_multiformat_handle(task->handles[i]))
+		if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i)))
 			return 1;
 			return 1;
 	}
 	}
 
 

+ 3 - 0
src/core/task.h

@@ -73,4 +73,7 @@ starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet
 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
 
 
+#define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
+#define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
+
 #endif // __CORE_TASK_H__
 #endif // __CORE_TASK_H__

+ 7 - 0
src/core/workers.c

@@ -1437,3 +1437,10 @@ starpu_driver_deinit(struct starpu_driver *d)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 }
 }
+
+void starpu_get_version(int *major, int *minor, int *release)
+{
+	*major = STARPU_MAJOR_VERSION;
+	*minor = STARPU_MINOR_VERSION;
+	*release = STARPU_RELEASE_VERSION;
+}

+ 8 - 7
src/datawizard/coherency.c

@@ -22,6 +22,7 @@
 #include <core/dependencies/data_concurrency.h>
 #include <core/dependencies/data_concurrency.h>
 #include <profiling/profiling.h>
 #include <profiling/profiling.h>
 #include <math.h>
 #include <math.h>
+#include <core/task.h>
 
 
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
 static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node);
 unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 unsigned _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
@@ -591,8 +592,8 @@ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node)
 
 
 	for (index = 0; index < nbuffers; index++)
 	for (index = 0; index < nbuffers; index++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[index];
-		enum starpu_access_mode mode = task->cl->modes[index];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
 
 
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 		if (mode & (STARPU_SCRATCH|STARPU_REDUX))
 			continue;
 			continue;
@@ -624,7 +625,7 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 	if (profiling && task->profiling_info)
 	if (profiling && task->profiling_info)
 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
 		_starpu_clock_gettime(&task->profiling_info->acquire_data_start_time);
 
 
-	struct starpu_buffer_descr *descrs = j->ordered_buffers;
+	struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
 	unsigned nbuffers = task->cl->nbuffers;
 	unsigned nbuffers = task->cl->nbuffers;
 
 
 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
 	unsigned local_memory_node = _starpu_memory_node_get_local_key();
@@ -656,14 +657,14 @@ int _starpu_fetch_task_input(struct _starpu_job *j, uint32_t mask)
 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
 	/* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order.  */
 	for (index = 0; index < nbuffers; index++)
 	for (index = 0; index < nbuffers; index++)
 	{
 	{
-		starpu_data_handle_t handle = task->handles[index];
-		enum starpu_access_mode mode = task->cl->modes[index];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(task->cl, index);
 
 
 		struct _starpu_data_replicate *local_replicate;
 		struct _starpu_data_replicate *local_replicate;
 
 
 		local_replicate = get_replicate(handle, mode, workerid, local_memory_node);
 		local_replicate = get_replicate(handle, mode, workerid, local_memory_node);
 
 
-		task->interfaces[index] = local_replicate->data_interface;
+		_STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, index);
 
 
 		if (mode & STARPU_REDUX)
 		if (mode & STARPU_REDUX)
 		{
 		{
@@ -699,7 +700,7 @@ void _starpu_push_task_output(struct _starpu_job *j, uint32_t mask)
 	if (profiling && task->profiling_info)
 	if (profiling && task->profiling_info)
 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
 		_starpu_clock_gettime(&task->profiling_info->release_data_start_time);
 
 
-        struct starpu_buffer_descr *descrs = j->ordered_buffers;
+        struct starpu_buffer_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j);
         unsigned nbuffers = task->cl->nbuffers;
         unsigned nbuffers = task->cl->nbuffers;
 
 
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();

+ 1 - 1
src/datawizard/filters.c

@@ -305,7 +305,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin
 				.nbuffers = 1
 				.nbuffers = 1
 			};
 			};
 			struct starpu_task *task = starpu_task_create();
 			struct starpu_task *task = starpu_task_create();
-			task->handles[0] = child_handle;
+			STARPU_TASK_SET_HANDLE(task, child_handle, 0);
 			task->cl = &cl;
 			task->cl = &cl;
 			task->synchronous = 1;
 			task->synchronous = 1;
 			if (_starpu_task_submit_internally(task) != 0)
 			if (_starpu_task_submit_internally(task) != 0)

+ 1 - 1
src/datawizard/footprint.c

@@ -43,7 +43,7 @@ uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, enum
 	{
 	{
 		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
 		for (buffer = 0; buffer < task->cl->nbuffers; buffer++)
 		{
 		{
-			starpu_data_handle_t handle = task->handles[buffer];
+			starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
 
 
 			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
 			uint32_t handle_footprint = _starpu_data_get_footprint(handle);
 
 

+ 16 - 14
src/datawizard/reduction.c

@@ -217,16 +217,16 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
 
 					redux_task->cl = handle->redux_cl;
 					redux_task->cl = handle->redux_cl;
 					STARPU_ASSERT(redux_task->cl);
 					STARPU_ASSERT(redux_task->cl);
-					if (!redux_task->cl->modes[0])
-						redux_task->cl->modes[0] = STARPU_RW;
-					if (!redux_task->cl->modes[1])
-						redux_task->cl->modes[1] = STARPU_R;
+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0);
+					if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1)))
+						STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
 
 
-					STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
-					STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet has to be RW");
+					STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet has to be R");
 
 
-					redux_task->handles[0] = replicate_array[i];
-					redux_task->handles[1] = replicate_array[i+step];
+					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0);
+					STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1);
 
 
 					int ndeps = 0;
 					int ndeps = 0;
 					struct starpu_task *task_deps[2];
 					struct starpu_task *task_deps[2];
@@ -278,10 +278,12 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
 
 			redux_task->cl = handle->init_cl;
 			redux_task->cl = handle->init_cl;
 			STARPU_ASSERT(redux_task->cl);
 			STARPU_ASSERT(redux_task->cl);
-			if (!redux_task->cl->modes[0])
-				redux_task->cl->modes[0] = STARPU_W;
-			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_W, "Parameter of initialization codelet has to be W");
-			redux_task->handles[0] = handle;
+
+			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0)))
+				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_W, 0);
+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_W, "Parameter of initialization codelet has to be W");
+
+			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
 
 
 			int ret = _starpu_task_submit_internally(redux_task);
 			int ret = _starpu_task_submit_internally(redux_task);
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);
@@ -311,8 +313,8 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
 			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
 
 
-			redux_task->handles[0] = handle;
-			redux_task->handles[1] = replicate_array[replicate];
+			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
+			STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1);
 
 
 			int ret = _starpu_task_submit_internally(redux_task);
 			int ret = _starpu_task_submit_internally(redux_task);
 			STARPU_ASSERT(!ret);
 			STARPU_ASSERT(!ret);

+ 23 - 11
src/debug/traces/starpu_fxt.c

@@ -197,6 +197,12 @@ static char *memnode_container_alias(char *output, int len, const char *prefix,
 	return output;
 	return output;
 }
 }
 
 
+static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid)
+{
+	snprintf(output, len, "%smm%"PRIu64"", prefix, memnodeid);
+	return output;
+}
+
 static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid)
 static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid)
 {
 {
 	snprintf(output, len, "%st%"PRIu64"", prefix, threadid);
 	snprintf(output, len, "%st%"PRIu64"", prefix, threadid);
@@ -232,10 +238,10 @@ static void memnode_set_state(double time, const char *prefix, unsigned int memn
 {
 {
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 	char container[STARPU_POTI_STR_LEN];
 	char container[STARPU_POTI_STR_LEN];
-	memnode_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid);
+	memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid);
 	poti_SetState(time, container, "MS", name);
 	poti_SetState(time, container, "MS", name);
 #else
 #else
-	fprintf(out_paje_file, "10	%.9f	%smn%u	MS	%s\n", time, prefix, memnodeid, name);
+	fprintf(out_paje_file, "10	%.9f	%smm%u	MS	%s\n", time, prefix, memnodeid, name);
 #endif
 #endif
 }
 }
 
 
@@ -280,15 +286,21 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		/* TODO: ramkind */
 		/* TODO: ramkind */
 		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMNODE%"PRIu64"", prefix, ev->param[0]);
 		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMNODE%"PRIu64"", prefix, ev->param[0]);
 		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mn", program_container, new_memnode_container_name);
 		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mn", program_container, new_memnode_container_name);
+
+		memmanager_container_alias (new_memnode_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]);
+		/* TODO: ramkind */
+		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%sMEMMANAGER%"PRIu64"", prefix, ev->param[0]);
+		poti_CreateContainer(get_event_time_stamp(ev, options), new_memnode_container_alias, "Mm", program_container, new_memnode_container_name);
 #else
 #else
 		fprintf(out_paje_file, "7	%.9f	%smn%"PRIu64"	Mn	%sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
 		fprintf(out_paje_file, "7	%.9f	%smn%"PRIu64"	Mn	%sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
+		fprintf(out_paje_file, "7	%.9f	%smm%"PRIu64"	Mm	%sp	%sMEMMANAGER%"PRIu64"\n", get_event_time_stamp(ev, options), prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]);
 #endif
 #endif
 
 
 		if (!options->no_bus)
 		if (!options->no_bus)
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 			poti_SetVariable(get_event_time_stamp(ev, options), new_memnode_container_alias, "bw", 0.0);
 			poti_SetVariable(get_event_time_stamp(ev, options), new_memnode_container_alias, "bw", 0.0);
 #else
 #else
-			fprintf(out_paje_file, "13	%.9f	%smn%"PRIu64"	bw	0.0\n", 0.0f, prefix, ev->param[0]);
+			fprintf(out_paje_file, "13	%.9f	%smm%"PRIu64"	bw	0.0\n", 0.0f, prefix, ev->param[0]);
 #endif
 #endif
 	}
 	}
 }
 }
@@ -703,10 +715,10 @@ static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
-			memnode_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src);
+			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src);
 			poti_StartLink(time, program_container, "L", src_memnode_container, paje_value, paje_key);
 			poti_StartLink(time, program_container, "L", src_memnode_container, paje_value, paje_key);
 #else
 #else
-			fprintf(out_paje_file, "18	%.9f	L	%sp	%u	%smn%u	com_%u\n", time, prefix, size, prefix, src, comid);
+			fprintf(out_paje_file, "18	%.9f	L	%sp	%u	%smm%u	com_%u\n", time, prefix, size, prefix, src, comid);
 #endif
 #endif
 		}
 		}
 
 
@@ -743,10 +755,10 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
 			snprintf(paje_value, STARPU_POTI_STR_LEN, "%u", size);
 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
 			snprintf(paje_key, STARPU_POTI_STR_LEN, "com_%u", comid);
 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
 			program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
-			memnode_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst);
 			poti_EndLink(time, program_container, "L", dst_memnode_container, paje_value, paje_key);
 			poti_EndLink(time, program_container, "L", dst_memnode_container, paje_value, paje_key);
 #else
 #else
-			fprintf(out_paje_file, "19	%.9f	L	%sp	%u	%smn%u	com_%u\n", time, prefix, size, prefix, dst, comid);
+			fprintf(out_paje_file, "19	%.9f	L	%sp	%u	%smm%u	com_%u\n", time, prefix, size, prefix, dst, comid);
 #endif
 #endif
 		}
 		}
 
 
@@ -1187,10 +1199,10 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 		{
 		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 			char src_memnode_container[STARPU_POTI_STR_LEN];
 			char src_memnode_container[STARPU_POTI_STR_LEN];
-			memnode_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
+			memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node);
 			poti_SetVariable(itor->comm_start, src_memnode_container, "bw", current_bandwidth_per_node[itor->src_node]);
 			poti_SetVariable(itor->comm_start, src_memnode_container, "bw", current_bandwidth_per_node[itor->src_node]);
 #else
 #else
-			fprintf(out_paje_file, "13	%.9f	%smn%u	bw	%f\n",
+			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
 				itor->comm_start, prefix, itor->src_node, current_bandwidth_per_node[itor->src_node]);
 				itor->comm_start, prefix, itor->src_node, current_bandwidth_per_node[itor->src_node]);
 #endif
 #endif
 		}
 		}
@@ -1200,10 +1212,10 @@ void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 		{
 		{
 #ifdef STARPU_HAVE_POTI
 #ifdef STARPU_HAVE_POTI
 			char dst_memnode_container[STARPU_POTI_STR_LEN];
 			char dst_memnode_container[STARPU_POTI_STR_LEN];
-			memnode_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
+			memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node);
 			poti_SetVariable(itor->comm_start, dst_memnode_container, "bw", current_bandwidth_per_node[itor->dst_node]);
 			poti_SetVariable(itor->comm_start, dst_memnode_container, "bw", current_bandwidth_per_node[itor->dst_node]);
 #else
 #else
-			fprintf(out_paje_file, "13	%.9f	%smn%u	bw	%f\n",
+			fprintf(out_paje_file, "13	%.9f	%smm%u	bw	%f\n",
 				itor->comm_start, prefix, itor->dst_node, current_bandwidth_per_node[itor->dst_node]);
 				itor->comm_start, prefix, itor->dst_node, current_bandwidth_per_node[itor->dst_node]);
 #endif
 #endif
 		}
 		}

+ 8 - 6
src/debug/traces/starpu_paje.c

@@ -137,13 +137,14 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	poti_DefineContainerType("P", "MPIP", "Program");
 	poti_DefineContainerType("P", "MPIP", "Program");
 	poti_DefineContainerType("Mn", "P", "Memory Node");
 	poti_DefineContainerType("Mn", "P", "Memory Node");
 	poti_DefineContainerType("T", "Mn", "Thread");
 	poti_DefineContainerType("T", "Mn", "Thread");
+	poti_DefineContainerType("Mm", "Mn", "Memory Manager");
 	poti_DefineContainerType("W", "T", "Worker");
 	poti_DefineContainerType("W", "T", "Worker");
 	poti_DefineContainerType("MPICt", "T", "MPI Communication Thread");
 	poti_DefineContainerType("MPICt", "T", "MPI Communication Thread");
 	poti_DefineContainerType("Sc", "P", "Scheduler");
 	poti_DefineContainerType("Sc", "P", "Scheduler");
 
 
 	/* Types for the memory node */
 	/* Types for the memory node */
-	poti_DefineVariableType("bw", "Mn", "Bandwidth", "0 0 0");
-	poti_DefineStateType("MS", "Mn", "Memory Node State");
+	poti_DefineVariableType("bw", "Mm", "Bandwidth", "0 0 0");
+	poti_DefineStateType("MS", "Mm", "Memory Node State");
 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
 	poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0");
 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
 	poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8");
 	poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .4");
 	poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .4");
@@ -196,7 +197,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 
 
 	/* Link types */
 	/* Link types */
 	poti_DefineLinkType("MPIL", "P", "MPICt", "MPICt", "Links between two MPI Communication Threads");
 	poti_DefineLinkType("MPIL", "P", "MPICt", "MPICt", "Links between two MPI Communication Threads");
-	poti_DefineLinkType("L", "P", "Mn", "Mn", "Links between two Memory Nodes");
+	poti_DefineLinkType("L", "P", "Mm", "Mm", "Links between two Memory Managers");
 
 
 	/* Creating the MPI Program */
 	/* Creating the MPI Program */
 	poti_CreateContainer(0, "MPIroot", "MPIP", "0", "root");
 	poti_CreateContainer(0, "MPIroot", "MPIP", "0", "root");
@@ -206,6 +207,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 1       P      MPIP       \"Program\"                      	\n\
 1       P      MPIP       \"Program\"                      	\n\
 1       Mn      P       \"Memory Node\"                         \n\
 1       Mn      P       \"Memory Node\"                         \n\
 1       T      Mn       \"Thread\"                               \n\
 1       T      Mn       \"Thread\"                               \n\
+1       Mm      Mn       \"Memory Manager\"                         \n\
 1       W      T       \"Worker\"                               \n\
 1       W      T       \"Worker\"                               \n\
 1       MPICt   T       \"MPI Communication Thread\"              \n\
 1       MPICt   T       \"MPI Communication Thread\"              \n\
 1       Sc       P       \"Scheduler State\"                        \n\
 1       Sc       P       \"Scheduler State\"                        \n\
@@ -216,9 +218,9 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	for (i=1; i<=10; i++)
 	for (i=1; i<=10; i++)
 		fprintf(file, "3       Ctx%u      T     \"InCtx%u\"         		\n", i, i);
 		fprintf(file, "3       Ctx%u      T     \"InCtx%u\"         		\n", i, i);
 	fprintf(file, "\
 	fprintf(file, "\
-3       MS       Mn       \"Memory Node State\"                        \n\
+3       MS       Mm       \"Memory Node State\"                        \n\
 4       ntask    Sc       \"Number of tasks\"                        \n\
 4       ntask    Sc       \"Number of tasks\"                        \n\
-4       bw      Mn       \"Bandwidth\"                        \n\
+4       bw      Mm       \"Bandwidth\"                        \n\
 6       I       S      Initializing       \"0.0 .7 1.0\"            \n\
 6       I       S      Initializing       \"0.0 .7 1.0\"            \n\
 6       D       S      Deinitializing       \"0.0 .1 .7\"            \n\
 6       D       S      Deinitializing       \"0.0 .1 .7\"            \n\
 6       Fi       S      FetchingInput       \"1.0 .1 1.0\"            \n\
 6       Fi       S      FetchingInput       \"1.0 .1 1.0\"            \n\
@@ -255,7 +257,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n\
 6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n\
 6       No       MS     Nothing         \".0 .0 .0\"		\n\
 6       No       MS     Nothing         \".0 .0 .0\"		\n\
 5       MPIL     P	MPICt	MPICt   MPIL			\n\
 5       MPIL     P	MPICt	MPICt   MPIL			\n\
-5       L       P	Mn	Mn      L\n");
+5       L       P	Mm	Mm      L\n");
 
 
 	fprintf(file, "7      0.0 MPIroot      MPIP      0       root\n");
 	fprintf(file, "7      0.0 MPIroot      MPIP      0       root\n");
 #endif
 #endif

+ 1 - 1
src/drivers/cpu/driver_cpu.c

@@ -158,7 +158,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 		_starpu_simgrid_execute_job(j, perf_arch, NAN);
 		_starpu_simgrid_execute_job(j, perf_arch, NAN);
 #else
 #else
-		func(task->interfaces, task->cl_arg);
+		func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif
 #endif
 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
 		if (is_parallel_task && cl->type == STARPU_FORKJOIN)
 			/* rebind to single CPU */
 			/* rebind to single CPU */

+ 1 - 1
src/drivers/cuda/driver_cuda.c

@@ -353,7 +353,7 @@ static int execute_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *arg
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
 	_starpu_simgrid_execute_job(j, args->perf_arch, NAN);
 #else
 #else
-	func(task->interfaces, task->cl_arg);
+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif
 #endif
 
 
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);

+ 3 - 3
src/drivers/gordon/driver_gordon.c

@@ -102,7 +102,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	unsigned nbuffers = cl->nbuffers;
 	unsigned nbuffers = cl->nbuffers;
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
-		enum starpu_access_mode mode = cl->modes[buffer];
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
 
 
 		switch (mode)
 		switch (mode)
 		{
 		{
@@ -122,7 +122,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	for (buffer = 0; buffer < nbuffers; buffer++)
 	{
 	{
 		unsigned gordon_buffer;
 		unsigned gordon_buffer;
-		enum starpu_access_mode mode = cl->modes[buffer];
+		enum starpu_access_mode mode = STARPU_CODELET_GET_MODE(cl, buffer);
 
 
 		switch (mode)
 		switch (mode)
 		{
 		{
@@ -138,7 +138,7 @@ static void starpu_to_gordon_buffers(struct _starpu_job *j, struct gordon_ppu_jo
 				break;
 				break;
 		}
 		}
 
 
-		starpu_data_handle_t handle = task->handles[buffer];
+		starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer);
 
 
 		gordon_job->nalloc = 0;
 		gordon_job->nalloc = 0;
 		gordon_job->nin = nin;
 		gordon_job->nin = nin;

+ 2 - 2
src/drivers/opencl/driver_opencl.c

@@ -824,7 +824,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
 #ifdef STARPU_SIMGRID
 #ifdef STARPU_SIMGRID
 	double length = NAN;
 	double length = NAN;
   #ifdef STARPU_OPENCL_SIMULATOR
   #ifdef STARPU_OPENCL_SIMULATOR
-	func(task->interfaces, task->cl_arg);
+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
     #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
     #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT
       #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
       #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
         #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
         #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT
@@ -838,7 +838,7 @@ static int _starpu_opencl_execute_job(struct _starpu_job *j, struct _starpu_work
   #endif
   #endif
 	_starpu_simgrid_execute_job(j, args->perf_arch, length);
 	_starpu_simgrid_execute_job(j, args->perf_arch, length);
 #else
 #else
-	func(task->interfaces, task->cl_arg);
+	func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
 #endif
 #endif
 
 
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
 	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);

+ 21 - 15
src/profiling/bound.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2011  Télécom-SudParis
  * Copyright (C) 2011  Télécom-SudParis
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -501,10 +501,16 @@ void starpu_bound_print_lp(FILE *output)
 		}
 		}
 		fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
 		fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n");
 		fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
 		fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n");
+
 		fprintf(output, "/* We want to minimize total execution time (ms) */\n");
 		fprintf(output, "/* We want to minimize total execution time (ms) */\n");
 		fprintf(output, "min: tmax;\n\n");
 		fprintf(output, "min: tmax;\n\n");
 
 
-		fprintf(output, "/* Which is the maximum of all task completion times (ms) */\n");
+		fprintf(output, "/* Number of tasks */\n");
+		fprintf(output, "nt = %d;\n", nt);
+		fprintf(output, "/* Number of workers */\n");
+		fprintf(output, "nw = %d;\n", nw);
+
+		fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n");
 		for (t1 = tasks; t1; t1 = t1->next)
 		for (t1 = tasks; t1; t1 = t1->next)
 			fprintf(output, "c%lu <= tmax;\n", t1->id);
 			fprintf(output, "c%lu <= tmax;\n", t1->id);
 
 
@@ -836,12 +842,12 @@ void starpu_bound_print_mps(FILE *output)
 
 
 		fprintf(output, "NAME           StarPU theoretical bound\n");
 		fprintf(output, "NAME           StarPU theoretical bound\n");
 
 
-		fprintf(output, "\nROWS\n");
+		fprintf(output, "*\nROWS\n");
 
 
 		fprintf(output, "* We want to minimize total execution time (ms)\n");
 		fprintf(output, "* We want to minimize total execution time (ms)\n");
 		fprintf(output, " N  TMAX\n");
 		fprintf(output, " N  TMAX\n");
 
 
-		fprintf(output, "\n* Which is the maximum of all worker execution times (ms)\n");
+		fprintf(output, "* Which is the maximum of all worker execution times (ms)\n");
 		for (w = 0; w < nw; w++)
 		for (w = 0; w < nw; w++)
 		{
 		{
 			char name[32];
 			char name[32];
@@ -850,36 +856,36 @@ void starpu_bound_print_mps(FILE *output)
 			fprintf(output, " L  W%d\n", w);
 			fprintf(output, " L  W%d\n", w);
 		}
 		}
 
 
-		fprintf(output, "\n* And we have to have computed exactly all tasks\n");
+		fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n");
 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
 		{
 		{
 			fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
 			fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint);
 			fprintf(output, " E  T%d\n", t);
 			fprintf(output, " E  T%d\n", t);
 		}
 		}
 
 
-		fprintf(output, "\nCOLUMNS\n");
+		fprintf(output, "*\nCOLUMNS\n*\n");
 
 
-		fprintf(output, "\n* Execution times and completion of all tasks\n");
+		fprintf(output, "*\n* Execution times and completion of all tasks\n*\n");
 		for (w = 0; w < nw; w++)
 		for (w = 0; w < nw; w++)
 			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
 			for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
 				if (!isnan(times[w*nt+t]))
 				if (!isnan(times[w*nt+t]))
 				{
 				{
 					char name[9];
 					char name[9];
 					snprintf(name, sizeof(name), "W%dT%d", w, t);
 					snprintf(name, sizeof(name), "W%dT%d", w, t);
-					fprintf(stderr,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
-					fprintf(stderr,"    %-8s  T%-7d  %12d\n", name, t, 1);
+					fprintf(output,"    %-8s  W%-7d  %12f\n", name, w, times[w*nt+t]);
+					fprintf(output,"    %-8s  T%-7d  %12d\n", name, t, 1);
 				}
 				}
 
 
-		fprintf(output, "\n* Total execution time\n");
+		fprintf(output, "*\n* Total execution time\n*\n");
 		for (w = 0; w < nw; w++)
 		for (w = 0; w < nw; w++)
-			fprintf(stderr,"    TMAX      W%-2d       %12d\n", w, -1);
-		fprintf(stderr,"    TMAX      TMAX      %12d\n", 1);
+			fprintf(output,"    TMAX      W%-2d       %12d\n", w, -1);
+		fprintf(output,"    TMAX      TMAX      %12d\n", 1);
 
 
-		fprintf(output, "\nRHS\n");
+		fprintf(output, "*\nRHS\n*\n");
 
 
-		fprintf(output, "\n* Total number of tasks\n");
+		fprintf(output, "*\n* Total number of tasks\n*\n");
 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
 		for (t = 0, tp = task_pools; tp; t++, tp = tp->next)
-			fprintf(stderr,"    NT%-2d      T%-7d  %12lu\n", t, t, tp->n);
+			fprintf(output,"    NT%-2d      T%-7d  %12lu\n", t, t, tp->n);
 
 
 		fprintf(output, "ENDATA\n");
 		fprintf(output, "ENDATA\n");
 	}
 	}

+ 118 - 108
src/sched_policies/deque_modeling_policy_data_aware.c

@@ -27,11 +27,7 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <sched_policies/fifo_queues.h>
 #include <sched_policies/fifo_queues.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
-#include <starpu_parameters.h>
 #include <core/debug.h>
 #include <core/debug.h>
-#ifdef STARPU_USE_TOP
-#include <top/starpu_top_core.h>
-#endif /* !STARPU_USE_TOP */
 
 
 #ifndef DBL_MIN
 #ifndef DBL_MIN
 #define DBL_MIN __DBL_MIN__
 #define DBL_MIN __DBL_MIN__
@@ -54,12 +50,23 @@ struct _starpu_dmda_data
 	long int ready_task_cnt;
 	long int ready_task_cnt;
 };
 };
 
 
-static double alpha = _STARPU_DEFAULT_ALPHA;
-static double beta = _STARPU_DEFAULT_BETA;
-static double _gamma = _STARPU_DEFAULT_GAMMA;
 static double idle_power = 0.0;
 static double idle_power = 0.0;
 
 
+/* The dmda scheduling policy uses
+ *
+ * alpha * T_computation + beta * T_communication + gamma * Consumption
+ *
+ * Here are the default values of alpha, beta, gamma
+ */
+
+#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
+#define _STARPU_SCHED_BETA_DEFAULT 1.0
+#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
+
 #ifdef STARPU_USE_TOP
 #ifdef STARPU_USE_TOP
+static double alpha = _STARPU_SCHED_ALPHA_DEFAULT;
+static double beta = _STARPU_SCHED_BETA_DEFAULT;
+static double _gamma = _STARPU_SCHED_GAMMA_DEFAULT;
 static const float alpha_minimum=0;
 static const float alpha_minimum=0;
 static const float alpha_maximum=10.0;
 static const float alpha_maximum=10.0;
 static const float beta_minimum=0;
 static const float beta_minimum=0;
@@ -80,7 +87,7 @@ static int count_non_ready_buffers(struct starpu_task *task, unsigned node)
 	{
 	{
 		starpu_data_handle_t handle;
 		starpu_data_handle_t handle;
 
 
-		handle = task->handles[index];
+		handle = STARPU_TASK_GET_HANDLE(task, index);
 
 
 		int is_valid;
 		int is_valid;
 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
 		starpu_data_query_status(handle, node, NULL, &is_valid, NULL);
@@ -281,15 +288,10 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 
 
 	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
 	_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
 
 
-/* Sometimes workers didn't take the tasks as early as we expected */
+        /* Sometimes workers didn't take the tasks as early as we expected */
 	fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
 	fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
-	if(!isnan(predicted))
-	{
-		fifo->exp_end += predicted;
-		fifo->exp_len += predicted;
-	}
-	
+
 	if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
 	if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
 	{
 	{
 		/* We may hope that the transfer will be finished by
 		/* We may hope that the transfer will be finished by
@@ -309,16 +311,21 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 		fifo->exp_len += predicted_transfer;
 		fifo->exp_len += predicted_transfer;
 	}
 	}
 
 
+	if(!isnan(predicted))
+	{
+		fifo->exp_end += predicted;
+		fifo->exp_len += predicted;
+	}
+
 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 
 
 	task->predicted = predicted;
 	task->predicted = predicted;
 	task->predicted_transfer = predicted_transfer;
 	task->predicted_transfer = predicted_transfer;
 
 
 #ifdef STARPU_USE_TOP
 #ifdef STARPU_USE_TOP
-	if (_starpu_top_status_get())
-		_starpu_top_task_prevision(task, best_workerid,
-			(unsigned long long)(fifo->exp_end-predicted)/1000,
-			(unsigned long long)fifo->exp_end/1000);
+	starpu_top_task_prevision(task, best_workerid,
+				  (unsigned long long)(fifo->exp_end-predicted)/1000,
+				  (unsigned long long)fifo->exp_end/1000);
 #endif /* !STARPU_USE_TOP */
 #endif /* !STARPU_USE_TOP */
 
 
 	if (starpu_get_prefetch_flag())
 	if (starpu_get_prefetch_flag())
@@ -388,6 +395,17 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 
 
+		/* Sometimes workers didn't take the tasks as early as we expected */
+		starpu_pthread_mutex_t *sched_mutex;
+		starpu_pthread_cond_t *sched_cond;
+		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
+
+		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
+		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
+		fifo->exp_end = fifo->exp_start + fifo->exp_len;
+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
+
+
 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		{
 		{
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
@@ -398,27 +416,40 @@ static int _dm_push_task(struct starpu_task *task, unsigned prio, unsigned sched
 			}
 			}
 
 
 			double exp_end;
 			double exp_end;
-			starpu_pthread_mutex_t *sched_mutex;
-			starpu_pthread_cond_t *sched_cond;
-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-
-			/* Sometimes workers didn't take the tasks as early as we expected */
-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
-			fifo->exp_end = fifo->exp_start + fifo->exp_len;
-			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
-
-
 			double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
 			double local_length = starpu_task_expected_length(task, perf_arch, nimpl);
 			double local_penalty = starpu_task_expected_data_transfer_time(memory_node, task);
 			double local_penalty = starpu_task_expected_data_transfer_time(memory_node, task);
 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
 
 
 			//_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
 			//_STARPU_DEBUG("Scheduler dm: task length (%lf) worker (%u) kernel (%u) \n", local_length,worker,nimpl);
 
 
+			/*
+			 * This implements a default greedy scheduler for the
+			 * case of tasks which have no performance model, or
+			 * whose performance model is not calibrated yet.
+			 *
+			 * It simply uses the number of tasks already pushed to
+			 * the workers, divided by the relative performance of
+			 * a CPU and of a GPU.
+			 *
+			 * This is always computed, but the ntasks_best
+			 * selection is only really used if the task indeed has
+			 * no performance model, or is not calibrated yet.
+			 */
 			if (ntasks_best == -1
 			if (ntasks_best == -1
-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */
-			    || (!calibrating && isnan(local_length)) /* Not calibrating but this worker is being calibrated */
-			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
+			
+			    /* Always compute the greedy decision, at least for
+			     * the tasks with no performance model. */
+			    || (!calibrating && ntasks_end < ntasks_best_end)
+
+			    /* The performance model of this task is not
+			     * calibrated on this worker, try to run it there
+			     * to calibrate it there. */
+			    || (!calibrating && isnan(local_length))
+
+			    /* the performance model of this task is not
+			     * calibrated on this worker either, rather run it
+			     * there if this one is low on scheduled tasks. */
+			    || (calibrating && isnan(local_length) && ntasks_end < ntasks_best_end)
 				)
 				)
 			{
 			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best_end = ntasks_end;
@@ -509,6 +540,15 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 		enum starpu_perf_archtype perf_arch = starpu_worker_get_perf_archtype(worker);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 		unsigned memory_node = starpu_worker_get_memory_node(worker);
 
 
+		/* Sometimes workers didn't take the tasks as early as we expected */
+		starpu_pthread_mutex_t *sched_mutex;
+		starpu_pthread_cond_t *sched_cond;
+		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
+
+		_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
+		fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
+		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
+
 		for(nimpl  = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 		for(nimpl  = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++)
 	 	{
 	 	{
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
 			if (!starpu_worker_can_execute_task(worker, task, nimpl))
@@ -517,15 +557,7 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 				continue;
 				continue;
 			}
 			}
 
 
-			/* Sometimes workers didn't take the tasks as early as we expected */
-			starpu_pthread_mutex_t *sched_mutex;
-			starpu_pthread_cond_t *sched_cond;
-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-
 			STARPU_ASSERT_MSG(fifo != NULL, "worker %d ctx %d\n", worker, sched_ctx_id);
 			STARPU_ASSERT_MSG(fifo != NULL, "worker %d ctx %d\n", worker, sched_ctx_id);
-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-			fifo->exp_start = STARPU_MAX(fifo->exp_start, starpu_timing_now());
-			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 			exp_end[worker_ctx][nimpl] = fifo->exp_start + fifo->exp_len;
 			exp_end[worker_ctx][nimpl] = fifo->exp_start + fifo->exp_len;
 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
 			if (exp_end[worker_ctx][nimpl] > max_exp_end)
 				max_exp_end = exp_end[worker_ctx][nimpl];
 				max_exp_end = exp_end[worker_ctx][nimpl];
@@ -551,10 +583,34 @@ static void compute_all_performance_predictions(struct starpu_task *task,
 			
 			
 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
 			double ntasks_end = fifo->ntasks / starpu_worker_get_relative_speedup(perf_arch);
 
 
+			/*
+			 * This implements a default greedy scheduler for the
+			 * case of tasks which have no performance model, or
+			 * whose performance model is not calibrated yet.
+			 *
+			 * It simply uses the number of tasks already pushed to
+			 * the workers, divided by the relative performance of
+			 * a CPU and of a GPU.
+			 *
+			 * This is always computed, but the ntasks_best
+			 * selection is only really used if the task indeed has
+			 * no performance model, or is not calibrated yet.
+			 */
 			if (ntasks_best == -1
 			if (ntasks_best == -1
-			    || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better worker */
-			    || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) /* Not calibrating but this worker is being calibrated */
-			    || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) /* Calibrating, compete this worker with other non-calibrated */
+
+			    /* Always compute the greedy decision, at least for
+			     * the tasks with no performance model. */
+			    || (!calibrating && ntasks_end < ntasks_best_end)
+
+			    /* The performance model of this task is not
+			     * calibrated on this worker, try to run it there
+			     * to calibrate it there. */
+			    || (!calibrating && isnan(local_task_length[worker_ctx][nimpl]))
+
+			    /* the performance model of this task is not
+			     * calibrated on this worker either, rather run it
+			     * there if this one is low on scheduled tasks. */
+			    || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end)
 				)
 				)
 			{
 			{
 				ntasks_best_end = ntasks_end;
 				ntasks_best_end = ntasks_end;
@@ -722,64 +778,18 @@ static int dmda_push_sorted_task(struct starpu_task *task)
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
 #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks
 #endif
 #endif
-	unsigned sched_ctx_id = task->sched_ctx;
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
-	int ret_val = -1;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-	if(nworkers == 0)
-	{
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-		return ret_val;
-	}
-
-	ret_val = _dmda_push_task(task, 1, sched_ctx_id);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-	return ret_val;
-
+	return _dmda_push_task(task, 1, task->sched_ctx);
 }
 }
 
 
 static int dm_push_task(struct starpu_task *task)
 static int dm_push_task(struct starpu_task *task)
 {
 {
-	unsigned sched_ctx_id = task->sched_ctx;
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
-	int ret_val = -1;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-	if(nworkers == 0)
-	{
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-		return ret_val;
-	}
-
-	ret_val = _dm_push_task(task, 0, sched_ctx_id);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-	return ret_val;
+	return _dm_push_task(task, 0, task->sched_ctx);
 }
 }
 
 
 static int dmda_push_task(struct starpu_task *task)
 static int dmda_push_task(struct starpu_task *task)
 {
 {
-	unsigned sched_ctx_id = task->sched_ctx;
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
-	int ret_val = -1;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-	if(nworkers == 0)
-	{
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-		return ret_val;
-	}
-
 	STARPU_ASSERT(task);
 	STARPU_ASSERT(task);
-	ret_val = _dmda_push_task(task, 0, sched_ctx_id);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-	return ret_val;
+	return _dmda_push_task(task, 0, task->sched_ctx);
 }
 }
 
 
 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
 static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers)
@@ -820,9 +830,9 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 
 
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)malloc(sizeof(struct _starpu_dmda_data));
 	struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)malloc(sizeof(struct _starpu_dmda_data));
-	dt->alpha = _STARPU_DEFAULT_ALPHA;
-	dt->beta = _STARPU_DEFAULT_BETA;
-	dt->_gamma = _STARPU_DEFAULT_GAMMA;
+	dt->alpha = _STARPU_SCHED_ALPHA_DEFAULT;
+	dt->beta = _STARPU_SCHED_BETA_DEFAULT;
+	dt->_gamma = _STARPU_SCHED_GAMMA_DEFAULT;
 	dt->idle_power = 0.0;
 	dt->idle_power = 0.0;
 
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt);
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt);
@@ -851,13 +861,13 @@ static void initialize_dmda_policy(unsigned sched_ctx_id)
 
 
 #ifdef STARPU_USE_TOP
 #ifdef STARPU_USE_TOP
 	starpu_top_register_parameter_float("DMDA_ALPHA", &alpha,
 	starpu_top_register_parameter_float("DMDA_ALPHA", &alpha,
-		alpha_minimum, alpha_maximum, param_modified);
+					    alpha_minimum, alpha_maximum, param_modified);
 	starpu_top_register_parameter_float("DMDA_BETA", &beta,
 	starpu_top_register_parameter_float("DMDA_BETA", &beta,
-		beta_minimum, beta_maximum, param_modified);
+					    beta_minimum, beta_maximum, param_modified);
 	starpu_top_register_parameter_float("DMDA_GAMMA", &_gamma,
 	starpu_top_register_parameter_float("DMDA_GAMMA", &_gamma,
-		gamma_minimum, gamma_maximum, param_modified);
+					    gamma_minimum, gamma_maximum, param_modified);
 	starpu_top_register_parameter_float("DMDA_IDLE_POWER", &idle_power,
 	starpu_top_register_parameter_float("DMDA_IDLE_POWER", &idle_power,
-		idle_power_minimum, idle_power_maximum, param_modified);
+					    idle_power_minimum, idle_power_maximum, param_modified);
 #endif /* !STARPU_USE_TOP */
 #endif /* !STARPU_USE_TOP */
 }
 }
 
 
@@ -933,14 +943,6 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, unsign
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 	fifo->exp_end = fifo->exp_start + fifo->exp_len;
 
 
 	/* If there is no prediction available, we consider the task has a null length */
 	/* If there is no prediction available, we consider the task has a null length */
-	if (!isnan(predicted))
-	{
-		task->predicted = predicted;
-		fifo->exp_end += predicted;
-		fifo->exp_len += predicted;
-	}
-
-	/* If there is no prediction available, we consider the task has a null length */
 	if (!isnan(predicted_transfer))
 	if (!isnan(predicted_transfer))
 	{
 	{
 		if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
 		if (starpu_timing_now() + predicted_transfer < fifo->exp_end)
@@ -960,6 +962,14 @@ static void dmda_push_task_notify(struct starpu_task *task, int workerid, unsign
 		fifo->exp_len += predicted_transfer;
 		fifo->exp_len += predicted_transfer;
 	}
 	}
 
 
+	/* If there is no prediction available, we consider the task has a null length */
+	if (!isnan(predicted))
+	{
+		task->predicted = predicted;
+		fifo->exp_end += predicted;
+		fifo->exp_len += predicted;
+	}
+
 	fifo->ntasks++;
 	fifo->ntasks++;
 
 
 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);

+ 3 - 4
src/sched_policies/deque_queues.c

@@ -19,11 +19,10 @@
 /* Deque queues, ready for use by schedulers */
 /* Deque queues, ready for use by schedulers */
 
 
 #include <starpu.h>
 #include <starpu.h>
-#include <common/config.h>
-#include <core/workers.h>
+#include <starpu_scheduler.h>
 #include <sched_policies/deque_queues.h>
 #include <sched_policies/deque_queues.h>
-#include <errno.h>
-#include <common/utils.h>
+
+#include <core/workers.h>
 
 
 struct _starpu_deque_jobq *_starpu_create_deque(void)
 struct _starpu_deque_jobq *_starpu_create_deque(void)
 {
 {

+ 0 - 1
src/sched_policies/deque_queues.h

@@ -20,7 +20,6 @@
 #define __DEQUE_QUEUES_H__
 #define __DEQUE_QUEUES_H__
 
 
 #include <starpu.h>
 #include <starpu.h>
-#include <common/config.h>
 #include <core/jobs.h>
 #include <core/jobs.h>
 
 
 struct _starpu_deque_jobq
 struct _starpu_deque_jobq

+ 1 - 2
src/sched_policies/detect_combined_workers.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Université de Bordeaux 1
- * Copyright (C) 2011, 2012       Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013       Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,7 +15,6 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
-#include <common/config.h>
 #include <starpu.h>
 #include <starpu.h>
 #include <common/utils.h>
 #include <common/utils.h>
 #include <core/workers.h>
 #include <core/workers.h>

+ 2 - 15
src/sched_policies/eager_central_policy.c

@@ -21,8 +21,9 @@
  *	JOB QUEUE.
  *	JOB QUEUE.
  */
  */
 
 
-#include <core/workers.h>
+#include <starpu_scheduler.h>
 #include <sched_policies/fifo_queues.h>
 #include <sched_policies/fifo_queues.h>
+#include <common/thread.h>
 
 
 struct _starpu_eager_center_policy_data
 struct _starpu_eager_center_policy_data
 {
 {
@@ -63,18 +64,7 @@ static int push_task_eager_policy(struct starpu_task *task)
  {
  {
 	unsigned sched_ctx_id = task->sched_ctx;
 	unsigned sched_ctx_id = task->sched_ctx;
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
 	int ret_val = -1;
 	int ret_val = -1;
-
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-	if(nworkers == 0)
-	{
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-		return ret_val;
-	}
-
 		
 		
 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	ret_val = _starpu_fifo_push_task(data->fifo, task);
 	ret_val = _starpu_fifo_push_task(data->fifo, task);
@@ -82,7 +72,6 @@ static int push_task_eager_policy(struct starpu_task *task)
 	starpu_push_task_end(task);
 	starpu_push_task_end(task);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 
 
-
 	/*if there are no tasks block */
 	/*if there are no tasks block */
 	/* wake people waiting for a task */
 	/* wake people waiting for a task */
 	unsigned worker = 0;
 	unsigned worker = 0;
@@ -103,8 +92,6 @@ static int push_task_eager_policy(struct starpu_task *task)
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	}
 	}
 
 
-		
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
 	return ret_val;
 	return ret_val;
 }
 }
 
 

+ 0 - 13
src/sched_policies/eager_central_priority_policy.c

@@ -109,20 +109,8 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 
 
 	struct _starpu_priority_taskq *taskq = data->taskq;
 	struct _starpu_priority_taskq *taskq = data->taskq;
-
-	/* if the context has no workers return */
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
 	int ret_val = -1;
 	int ret_val = -1;
 	
 	
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-	if(nworkers == 0)
-	{
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-		return ret_val;
-	}
-
 
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
 	unsigned priolevel = task->priority - STARPU_MIN_PRIO;
@@ -153,7 +141,6 @@ static int _starpu_priority_push_task(struct starpu_task *task)
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	}
 	}
 
 
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
 	return 0;
 	return 0;
 }
 }
 
 

+ 0 - 2
src/sched_policies/fifo_queues.h

@@ -20,8 +20,6 @@
 #define __FIFO_QUEUES_H__
 #define __FIFO_QUEUES_H__
 
 
 #include <starpu.h>
 #include <starpu.h>
-#include <common/config.h>
-#include <common/utils.h>
 
 
 struct _starpu_fifo_taskq
 struct _starpu_fifo_taskq
 {
 {

+ 40 - 61
src/sched_policies/parallel_eager.c

@@ -15,12 +15,10 @@
  *
  *
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
-
-#include <core/workers.h>
 #include <sched_policies/fifo_queues.h>
 #include <sched_policies/fifo_queues.h>
-#include <common/barrier.h>
 #include <sched_policies/detect_combined_workers.h>
 #include <sched_policies/detect_combined_workers.h>
-#include <core/parallel_task.h>
+#include <starpu_scheduler.h>
+#include <core/workers.h>
 
 
 struct _starpu_peager_data
 struct _starpu_peager_data
 {
 {
@@ -28,12 +26,14 @@ struct _starpu_peager_data
 	struct _starpu_fifo_taskq *local_fifo[STARPU_NMAXWORKERS];
 	struct _starpu_fifo_taskq *local_fifo[STARPU_NMAXWORKERS];
 
 
 	int master_id[STARPU_NMAXWORKERS];
 	int master_id[STARPU_NMAXWORKERS];
+        starpu_pthread_mutex_t policy_mutex;
 };
 };
 
 
+#define STARPU_NMAXCOMBINED_WORKERS 10
 /* XXX instead of 10, we should use some "MAX combination .."*/
 /* XXX instead of 10, we should use some "MAX combination .."*/
 static int possible_combinations_cnt[STARPU_NMAXWORKERS];
 static int possible_combinations_cnt[STARPU_NMAXWORKERS];
-static int possible_combinations[STARPU_NMAXWORKERS][10];
-static int possible_combinations_size[STARPU_NMAXWORKERS][10];
+static int possible_combinations[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
+static int possible_combinations_size[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
 
 
 
 
 /*!!!!!!! It doesn't work with several contexts because the combined workers are constructed
 /*!!!!!!! It doesn't work with several contexts because the combined workers are constructed
@@ -135,6 +135,7 @@ static void initialize_peager_policy(unsigned sched_ctx_id)
 	data->fifo = _starpu_create_fifo();
 	data->fifo = _starpu_create_fifo();
 
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data);
+        _STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL);
 }
 }
 
 
 static void deinitialize_peager_policy(unsigned sched_ctx_id)
 static void deinitialize_peager_policy(unsigned sched_ctx_id)
@@ -146,6 +147,7 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 	_starpu_destroy_fifo(data->fifo);
 	_starpu_destroy_fifo(data->fifo);
 
 
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
 	starpu_sched_ctx_delete_worker_collection(sched_ctx_id);
+        _STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex);
 
 
 	free(data);
 	free(data);
 }
 }
@@ -153,44 +155,24 @@ static void deinitialize_peager_policy(unsigned sched_ctx_id)
 static int push_task_peager_policy(struct starpu_task *task)
 static int push_task_peager_policy(struct starpu_task *task)
 {
 {
 	unsigned sched_ctx_id = task->sched_ctx;
 	unsigned sched_ctx_id = task->sched_ctx;
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
 	int ret_val = -1;
 	int ret_val = -1;
 	
 	
-	/* if the context has no workers return */
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-	
-   	if(nworkers == 0)
-	{
-   		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-		return ret_val;
-	}
 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
 	struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);
-	int worker = 0;
-	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
-	
-	struct starpu_sched_ctx_iterator it;
-	if(workers->init_iterator)
-		workers->init_iterator(workers, &it);
-	
-	while(workers->has_next(workers, &it))
-	{
-		worker = workers->get_next(workers, &it);
-		int master = data->master_id[worker];
-		/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
-		if (starpu_worker_get_type(worker) != STARPU_CPU_WORKER  || master == worker)
-		{
-			starpu_pthread_mutex_t *sched_mutex;
-			starpu_pthread_cond_t *sched_cond;
-			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
-			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
-		}
-	}
-	
 	
 	
+	_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
 	ret_val = _starpu_fifo_push_task(data->fifo, task);
 	ret_val = _starpu_fifo_push_task(data->fifo, task);
 	starpu_push_task_end(task);
 	starpu_push_task_end(task);
+	_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
+
+        /*if there are no tasks block */
+        /* wake people waiting for a task */
+        int worker = -1;
+        struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
+
+        struct starpu_sched_ctx_iterator it;
+        if(workers->init_iterator)
+                workers->init_iterator(workers, &it);
+
 
 
 	while(workers->has_next(workers, &it))
 	while(workers->has_next(workers, &it))
 	{
 	{
@@ -202,12 +184,11 @@ static int push_task_peager_policy(struct starpu_task *task)
 			starpu_pthread_mutex_t *sched_mutex;
 			starpu_pthread_mutex_t *sched_mutex;
 			starpu_pthread_cond_t *sched_cond;
 			starpu_pthread_cond_t *sched_cond;
 			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
 			starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
+			_STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
 			_STARPU_PTHREAD_COND_SIGNAL(sched_cond);
 			_STARPU_PTHREAD_COND_SIGNAL(sched_cond);
 			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 			_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 		}
 		}
 	}
 	}
-	
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
 
 
 	return ret_val;
 	return ret_val;
 }
 }
@@ -220,14 +201,24 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 
 
 	/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
 	/* If this is not a CPU, then the worker simply grabs tasks from the fifo */
 	if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER)
 	if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER)
-		return _starpu_fifo_pop_task(data->fifo, workerid);
+	{
+		struct starpu_task *task = NULL;
+		_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
+		task = _starpu_fifo_pop_task(data->fifo, workerid);
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
+
+		return task;
+	}
 
 
 	int master = data->master_id[workerid];
 	int master = data->master_id[workerid];
 
 
 	if (master == workerid)
 	if (master == workerid)
 	{
 	{
 		/* The worker is a master */
 		/* The worker is a master */
-		struct starpu_task *task = _starpu_fifo_pop_task(data->fifo, workerid);
+		struct starpu_task *task = NULL;
+		_STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex);
+		task = _starpu_fifo_pop_task(data->fifo, workerid);
+		_STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex);
 
 
 		if (!task)
 		if (!task)
 			return NULL;
 			return NULL;
@@ -266,29 +257,17 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 		}
 		}
 		else
 		else
 		{
 		{
-			/* The master needs to dispatch the task between the
-			 * different combined workers */
-			struct _starpu_combined_worker *combined_worker;
-			combined_worker = _starpu_get_combined_worker_struct(best_workerid);
-			int worker_size = combined_worker->worker_size;
-			int *combined_workerid = combined_worker->combined_workerid;
-
-			struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
-			j->task_size = worker_size;
-			j->combined_workerid = best_workerid;
-			j->active_task_alias_count = 0;
-
-			//fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size);
-
-			_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
-			_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
+			starpu_parallel_task_barrier_init(task, best_workerid);
+			int worker_size = 0;
+			int *combined_workerid;
+			starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid);
 
 
 			/* Dispatch task aliases to the different slaves */
 			/* Dispatch task aliases to the different slaves */
 			for (i = 1; i < worker_size; i++)
 			for (i = 1; i < worker_size; i++)
 			{
 			{
-				struct starpu_task *alias = _starpu_create_task_alias(task);
+				struct starpu_task *alias = starpu_task_dup(task);
 				int local_worker = combined_workerid[i];
 				int local_worker = combined_workerid[i];
-				
+
 				starpu_pthread_mutex_t *sched_mutex;
 				starpu_pthread_mutex_t *sched_mutex;
 				starpu_pthread_cond_t *sched_cond;
 				starpu_pthread_cond_t *sched_cond;
 				starpu_worker_get_sched_condition(local_worker, &sched_mutex, &sched_cond);
 				starpu_worker_get_sched_condition(local_worker, &sched_mutex, &sched_cond);
@@ -303,7 +282,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 			}
 			}
 
 
 			/* The master also manipulated an alias */
 			/* The master also manipulated an alias */
-			struct starpu_task *master_alias = _starpu_create_task_alias(task);
+			struct starpu_task *master_alias = starpu_task_dup(task);
 			return master_alias;
 			return master_alias;
 		}
 		}
 	}
 	}

+ 17 - 40
src/sched_policies/parallel_heft.c

@@ -23,9 +23,7 @@
 #include <core/workers.h>
 #include <core/workers.h>
 #include <core/perfmodel/perfmodel.h>
 #include <core/perfmodel/perfmodel.h>
 #include <starpu_parameters.h>
 #include <starpu_parameters.h>
-#include <common/barrier.h>
 #include <sched_policies/detect_combined_workers.h>
 #include <sched_policies/detect_combined_workers.h>
-#include <core/parallel_task.h>
 
 
 #ifndef DBL_MIN
 #ifndef DBL_MIN
 #define DBL_MIN __DBL_MIN__
 #define DBL_MIN __DBL_MIN__
@@ -39,6 +37,14 @@
 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
 //static enum starpu_perf_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS];
 //static unsigned napplicable_perf_archtypes = 0;
 //static unsigned napplicable_perf_archtypes = 0;
 
 
+/*
+ * Here are the default values of alpha, beta, gamma
+ */
+
+#define _STARPU_SCHED_ALPHA_DEFAULT 1.0
+#define _STARPU_SCHED_BETA_DEFAULT 1.0
+#define _STARPU_SCHED_GAMMA_DEFAULT 1000.0
+
 struct _starpu_pheft_data
 struct _starpu_pheft_data
 {
 {
 	double alpha;
 	double alpha;
@@ -128,33 +134,25 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid,
 	}
 	}
 	else
 	else
 	{
 	{
-		/* This is a combined worker so we create task aliases */
-		struct _starpu_combined_worker *combined_worker;
-		combined_worker = _starpu_get_combined_worker_struct(best_workerid);
-		int worker_size = combined_worker->worker_size;
-		int *combined_workerid = combined_worker->combined_workerid;
-
-		struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
-		j->task_size = worker_size;
-		j->combined_workerid = best_workerid;
-		j->active_task_alias_count = 0;
-
 		/* This task doesn't belong to an actual worker, it belongs
 		/* This task doesn't belong to an actual worker, it belongs
 		 * to a combined worker and thus the scheduler doesn't care
 		 * to a combined worker and thus the scheduler doesn't care
 		 * of its predicted values which are insignificant */
 		 * of its predicted values which are insignificant */
 		task->predicted = 0;
 		task->predicted = 0;
 		task->predicted_transfer = 0;
 		task->predicted_transfer = 0;
 
 
-		_STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size);
-		_STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size);
+		starpu_parallel_task_barrier_init(task, best_workerid);
+		int worker_size = 0;
+		int *combined_workerid;
+		starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid);
 
 
 		/* All cpu workers must be locked at once */
 		/* All cpu workers must be locked at once */
 		_STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex);
 		_STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex);
 
 
+		/* This is a combined worker so we create task aliases */
 		int i;
 		int i;
 		for (i = 0; i < worker_size; i++)
 		for (i = 0; i < worker_size; i++)
 		{
 		{
-			struct starpu_task *alias = _starpu_create_task_alias(task);
+			struct starpu_task *alias = starpu_task_dup(task);
 			int local_worker = combined_workerid[i];
 			int local_worker = combined_workerid[i];
 
 
 			alias->predicted = exp_end_predicted - worker_exp_end[local_worker];
 			alias->predicted = exp_end_predicted - worker_exp_end[local_worker];
@@ -489,36 +487,15 @@ static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, uns
 static int parallel_heft_push_task(struct starpu_task *task)
 static int parallel_heft_push_task(struct starpu_task *task)
 {
 {
 	unsigned sched_ctx_id = task->sched_ctx;
 	unsigned sched_ctx_id = task->sched_ctx;
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
 	int ret_val = -1;
 	int ret_val = -1;
 
 
 	if (task->priority == STARPU_MAX_PRIO)
 	if (task->priority == STARPU_MAX_PRIO)
 	{
 	{
-		_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-                nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-                if(nworkers == 0)
-                {
-                        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-                        return ret_val;
-                }
-
 		ret_val = _parallel_heft_push_task(task, 1, sched_ctx_id);
 		ret_val = _parallel_heft_push_task(task, 1, sched_ctx_id);
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-                return ret_val;
-        }
-
-
-	_STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-        if(nworkers == 0)
-	{
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
                 return ret_val;
                 return ret_val;
         }
         }
 
 
         ret_val = _parallel_heft_push_task(task, 0, sched_ctx_id);
         ret_val = _parallel_heft_push_task(task, 0, sched_ctx_id);
-	_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
 	return ret_val;
 	return ret_val;
 }
 }
 
 
@@ -575,9 +552,9 @@ static void initialize_parallel_heft_policy(unsigned sched_ctx_id)
 {
 {
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	starpu_sched_ctx_create_worker_collection(sched_ctx_id, STARPU_WORKER_LIST);
 	struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)malloc(sizeof(struct _starpu_pheft_data));
 	struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)malloc(sizeof(struct _starpu_pheft_data));
-	hd->alpha = _STARPU_DEFAULT_ALPHA;
-	hd->beta = _STARPU_DEFAULT_BETA;
-	hd->_gamma = _STARPU_DEFAULT_GAMMA;
+	hd->alpha = _STARPU_SCHED_ALPHA_DEFAULT;
+	hd->beta = _STARPU_SCHED_BETA_DEFAULT;
+	hd->_gamma = _STARPU_SCHED_GAMMA_DEFAULT;
 	hd->idle_power = 0.0;
 	hd->idle_power = 0.0;
 
 
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hd);
 	starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hd);

+ 1 - 16
src/sched_policies/random_policy.c

@@ -83,22 +83,7 @@ static int _random_push_task(struct starpu_task *task, unsigned prio)
 
 
 static int random_push_task(struct starpu_task *task)
 static int random_push_task(struct starpu_task *task)
 {
 {
-	unsigned sched_ctx_id = task->sched_ctx;
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-	unsigned nworkers;
-        int ret_val = -1;
-
-        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-	nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-        if(nworkers == 0)
-        {
-		_STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-                return ret_val;
-        }
-
-        ret_val = _random_push_task(task, !!task->priority);
-        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-        return ret_val;
+        return _random_push_task(task, !!task->priority);
 }
 }
 
 
 static void initialize_random_policy(unsigned sched_ctx_id)
 static void initialize_random_policy(unsigned sched_ctx_id)

+ 0 - 1
src/sched_policies/stack_queues.h

@@ -20,7 +20,6 @@
 #define __STACK_QUEUES_H__
 #define __STACK_QUEUES_H__
 
 
 #include <starpu.h>
 #include <starpu.h>
-#include <common/config.h>
 #include <core/jobs.h>
 #include <core/jobs.h>
 
 
 struct _starpu_stack_jobq
 struct _starpu_stack_jobq

+ 0 - 15
src/sched_policies/work_stealing_policy.c

@@ -336,19 +336,6 @@ int ws_push_task(struct starpu_task *task)
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	int workerid = starpu_worker_get_id();
 	int workerid = starpu_worker_get_id();
 
 
-	starpu_pthread_mutex_t *changing_ctx_mutex = starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx_id);
-        unsigned nworkers;
-        int ret_val = -1;
-
-	/* if the context has no workers return */
-        _STARPU_PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
-        nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id);
-        if(nworkers == 0)
-        {
-                _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-                return ret_val;
-        }
-
 	unsigned worker = 0;
 	unsigned worker = 0;
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
 	struct starpu_sched_ctx_iterator it;
 	struct starpu_sched_ctx_iterator it;
@@ -394,8 +381,6 @@ int ws_push_task(struct starpu_task *task)
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 		_STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
 	}
 	}
 		
 		
-        _STARPU_PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
-
 	return 0;
 	return 0;
 }
 }
 
 

+ 0 - 11
src/starpu_parameters.h

@@ -20,17 +20,6 @@
 /* Parameters which are not worth being added to ./configure options, but
 /* Parameters which are not worth being added to ./configure options, but
  * still interesting to easily change */
  * still interesting to easily change */
 
 
-/* The dmda scheduling policy uses
- *
- * alpha * T_computation + beta * T_communication + gamma * Consumption
- *
- * Here are the default values of alpha, beta, gamma
- */
-
-#define _STARPU_DEFAULT_ALPHA 1.0
-#define _STARPU_DEFAULT_BETA 1.0
-#define _STARPU_DEFAULT_GAMMA 1000.0
-
 /* How many executions a codelet will have to be measured before we
 /* How many executions a codelet will have to be measured before we
  * consider that calibration will provide a value good enough for scheduling */
  * consider that calibration will provide a value good enough for scheduling */
 #define _STARPU_CALIBRATION_MINIMUM 10
 #define _STARPU_CALIBRATION_MINIMUM 10

+ 2 - 2
src/top/starpu_top_core.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
- * Copyright (C) 2011, 2012 Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013 Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -48,7 +48,7 @@ void __starpu_top_task_prevision_timespec(struct starpu_task *task,
 					int devid,
 					int devid,
 					const struct timespec* start,
 					const struct timespec* start,
 					const struct timespec* end);
 					const struct timespec* end);
-void _starpu_top_task_prevision(struct starpu_task *task,
+void starpu_top_task_prevision(struct starpu_task *task,
 			       int devid, unsigned long long start,
 			       int devid, unsigned long long start,
 			       unsigned long long end);
 			       unsigned long long end);
 
 

+ 6 - 3
src/top/starpu_top_task.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
  * Copyright (C) 2011 William Braik, Yann Courtois, Jean-Marie Couteyen, Anthony Roy
- * Copyright (C) 2011 Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2013 Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -66,17 +66,20 @@ void __starpu_top_task_prevision_timespec(struct starpu_task *task,
 					const struct timespec* start,
 					const struct timespec* start,
 					const struct timespec* end)
 					const struct timespec* end)
 {
 {
-	_starpu_top_task_prevision(task,
+	starpu_top_task_prevision(task,
 				  devid,
 				  devid,
 				  _starpu_top_timing_timespec_to_ms(start),
 				  _starpu_top_timing_timespec_to_ms(start),
 				  _starpu_top_timing_timespec_to_ms(end));
 				  _starpu_top_timing_timespec_to_ms(end));
 }
 }
 
 
-void _starpu_top_task_prevision(struct starpu_task *task,
+void starpu_top_task_prevision(struct starpu_task *task,
 			       int devid,
 			       int devid,
 			       unsigned long long start,
 			       unsigned long long start,
 			       unsigned long long end)
 			       unsigned long long end)
 {
 {
+	if (!_starpu_top_status_get())
+		return;
+
 	unsigned long long taskid = _starpu_get_job_associated_to_task(task)->job_id;
 	unsigned long long taskid = _starpu_get_job_associated_to_task(task)->job_id;
 	STARPU_ASSERT(_starpu_top_status_get());
 	STARPU_ASSERT(_starpu_top_status_get());
 	struct timespec now;
 	struct timespec now;

+ 2 - 2
src/util/starpu_data_cpy.c

@@ -103,8 +103,8 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 	task->callback_func = callback_func;
 	task->callback_func = callback_func;
 	task->callback_arg = callback_arg;
 	task->callback_arg = callback_arg;
 
 
-	task->handles[0] = dst_handle;
-	task->handles[1] = src_handle;
+	STARPU_TASK_SET_HANDLE(task, dst_handle, 0);
+	STARPU_TASK_SET_HANDLE(task, src_handle, 1);
 
 
 	task->synchronous = !asynchronous;
 	task->synchronous = !asynchronous;
 
 

+ 12 - 6
src/util/starpu_insert_task.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010, 2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2012  Université de Bordeaux 1
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,7 +23,7 @@
 #include <stdarg.h>
 #include <stdarg.h>
 #include <util/starpu_insert_task_utils.h>
 #include <util/starpu_insert_task_utils.h>
 
 
-void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...)
+void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...)
 {
 {
 	va_list varg_list;
 	va_list varg_list;
 
 
@@ -32,7 +32,7 @@ void starpu_codelet_pack_args(char **arg_buffer, size_t *arg_buffer_size, ...)
 	*arg_buffer_size = _starpu_insert_task_get_arg_size(varg_list);
 	*arg_buffer_size = _starpu_insert_task_get_arg_size(varg_list);
 
 
 	va_start(varg_list, arg_buffer_size);
 	va_start(varg_list, arg_buffer_size);
-	_starpu_codelet_pack_args(*arg_buffer_size, arg_buffer, varg_list);
+	_starpu_codelet_pack_args(arg_buffer, *arg_buffer_size, varg_list);
 }
 }
 
 
 void starpu_codelet_unpack_args(void *_cl_arg, ...)
 void starpu_codelet_unpack_args(void *_cl_arg, ...)
@@ -66,7 +66,7 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 int starpu_insert_task(struct starpu_codelet *cl, ...)
 int starpu_insert_task(struct starpu_codelet *cl, ...)
 {
 {
 	va_list varg_list;
 	va_list varg_list;
-	char *arg_buffer = NULL;
+	void *arg_buffer = NULL;
 
 
 	/* Compute the size */
 	/* Compute the size */
 	size_t arg_buffer_size = 0;
 	size_t arg_buffer_size = 0;
@@ -76,11 +76,17 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 	if (arg_buffer_size)
 	if (arg_buffer_size)
 	{
 	{
 		va_start(varg_list, cl);
 		va_start(varg_list, cl);
-		_starpu_codelet_pack_args(arg_buffer_size, &arg_buffer, varg_list);
+		_starpu_codelet_pack_args(&arg_buffer, arg_buffer_size, varg_list);
 	}
 	}
 
 
-	va_start(varg_list, cl);
 	struct starpu_task *task = starpu_task_create();
 	struct starpu_task *task = starpu_task_create();
+
+	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
+	{
+		task->dyn_handles = malloc(cl->nbuffers * sizeof(starpu_data_handle_t));
+	}
+
+	va_start(varg_list, cl);
 	int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, cl, &task, varg_list);
 	int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, cl, &task, varg_list);
 
 
 	if (ret == -ENODEV)
 	if (ret == -ENODEV)

+ 19 - 14
src/util/starpu_insert_task_utils.c

@@ -18,6 +18,7 @@
 #include <util/starpu_insert_task_utils.h>
 #include <util/starpu_insert_task_utils.h>
 #include <common/config.h>
 #include <common/config.h>
 #include <common/utils.h>
 #include <common/utils.h>
+#include <core/task.h>
 
 
 typedef void (*_starpu_callback_func_t)(void *);
 typedef void (*_starpu_callback_func_t)(void *);
 
 
@@ -120,15 +121,16 @@ size_t _starpu_insert_task_get_arg_size(va_list varg_list)
 	return arg_buffer_size;
 	return arg_buffer_size;
 }
 }
 
 
-int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list varg_list)
+int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list)
 {
 {
 	int arg_type;
 	int arg_type;
 	unsigned current_arg_offset = 0;
 	unsigned current_arg_offset = 0;
 	unsigned char nargs = 0;
 	unsigned char nargs = 0;
+	char *_arg_buffer;
 
 
 	/* The buffer will contain : nargs, {size, content} (x nargs)*/
 	/* The buffer will contain : nargs, {size, content} (x nargs)*/
 
 
-	*arg_buffer = (char *) malloc(arg_buffer_size);
+	_arg_buffer = malloc(arg_buffer_size);
 
 
 	/* We will begin the buffer with the number of args (which is stored as a char) */
 	/* We will begin the buffer with the number of args (which is stored as a char) */
 	current_arg_offset += sizeof(char);
 	current_arg_offset += sizeof(char);
@@ -150,10 +152,10 @@ int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list
 			void *ptr = va_arg(varg_list, void *);
 			void *ptr = va_arg(varg_list, void *);
 			size_t cst_size = va_arg(varg_list, size_t);
 			size_t cst_size = va_arg(varg_list, size_t);
 
 
-			*(size_t *)(&(*arg_buffer)[current_arg_offset]) = cst_size;
+			*(size_t *)(&(_arg_buffer)[current_arg_offset]) = cst_size;
 			current_arg_offset += sizeof(size_t);
 			current_arg_offset += sizeof(size_t);
 
 
-			memcpy(&(*arg_buffer)[current_arg_offset], ptr, cst_size);
+			memcpy(&_arg_buffer[current_arg_offset], ptr, cst_size);
 			current_arg_offset += cst_size;
 			current_arg_offset += cst_size;
 
 
 			nargs++;
 			nargs++;
@@ -205,19 +207,20 @@ int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list
 
 
 	if (nargs)
 	if (nargs)
 	{
 	{
-		(*arg_buffer)[0] = nargs;
+		_arg_buffer[0] = nargs;
 	}
 	}
 	else
 	else
 	{
 	{
-		free(*arg_buffer);
-		*arg_buffer = NULL;
+		free(_arg_buffer);
+		_arg_buffer = NULL;
 	}
 	}
 
 
+	*arg_buffer = _arg_buffer;
 	va_end(varg_list);
 	va_end(varg_list);
 	return 0;
 	return 0;
 }
 }
 
 
-int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
+int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list)
 {
 {
 	int arg_type;
 	int arg_type;
 	unsigned current_buffer = 0;
 	unsigned current_buffer = 0;
@@ -239,18 +242,20 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 
 
 			STARPU_ASSERT(cl != NULL);
 			STARPU_ASSERT(cl != NULL);
 
 
-			(*task)->handles[current_buffer] = handle;
-			if (cl->modes[current_buffer])
+			STARPU_TASK_SET_HANDLE((*task), handle, current_buffer);
+			if (STARPU_CODELET_GET_MODE(cl, current_buffer))
 			{
 			{
-				STARPU_ASSERT_MSG(cl->modes[current_buffer] == mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
-						  cl->name, cl->modes[current_buffer], current_buffer, mode);
+				STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, current_buffer) == mode,
+						   "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_insert_task\n",
+						  cl->name, STARPU_CODELET_GET_MODE(cl, current_buffer),
+						  current_buffer, mode);
 			}
 			}
 			else
 			else
 			{
 			{
 #ifdef STARPU_DEVEL
 #ifdef STARPU_DEVEL
 #  warning shall we print a warning to the user
 #  warning shall we print a warning to the user
 #endif
 #endif
-				cl->modes[current_buffer] = mode;
+				STARPU_CODELET_SET_MODE(cl, mode, current_buffer);
 			}
 			}
 
 
 			current_buffer++;
 			current_buffer++;
@@ -264,7 +269,7 @@ int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_si
 			int i;
 			int i;
 			for(i=0 ; i<nb_handles ; i++)
 			for(i=0 ; i<nb_handles ; i++)
 			{
 			{
-				(*task)->handles[current_buffer] = handles[i];
+				STARPU_TASK_SET_HANDLE((*task), handles[i], current_buffer);
 				current_buffer++;
 				current_buffer++;
 			}
 			}
 
 

+ 4 - 4
src/util/starpu_insert_task_utils.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,9 +22,9 @@
 #include <starpu.h>
 #include <starpu.h>
 
 
 size_t _starpu_insert_task_get_arg_size(va_list varg_list);
 size_t _starpu_insert_task_get_arg_size(va_list varg_list);
-int _starpu_codelet_pack_args(size_t arg_buffer_size, char **arg_buffer, va_list varg_list);
-int _starpu_insert_task_create_and_submit(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
-int _starpu_insert_task_create_and_submit_array(char *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, starpu_data_handle_t *handles, unsigned nb_handles, va_list varg_list);
+int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list varg_list);
+int _starpu_insert_task_create_and_submit(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, va_list varg_list);
+int _starpu_insert_task_create_and_submit_array(void *arg_buffer, size_t arg_buffer_size, struct starpu_codelet *cl, struct starpu_task **task, starpu_data_handle_t *handles, unsigned nb_handles, va_list varg_list);
 
 
 #endif // __STARPU_INSERT_TASK_UTILS_H__
 #endif // __STARPU_INSERT_TASK_UTILS_H__
 
 

+ 1 - 0
tests/Makefile.am

@@ -208,6 +208,7 @@ noinst_PROGRAMS =				\
 	parallel_tasks/explicit_combined_worker	\
 	parallel_tasks/explicit_combined_worker	\
 	parallel_tasks/parallel_kernels		\
 	parallel_tasks/parallel_kernels		\
 	parallel_tasks/parallel_kernels_spmd	\
 	parallel_tasks/parallel_kernels_spmd	\
+	parallel_tasks/spmd_peager		\
 	perfmodels/regression_based		\
 	perfmodels/regression_based		\
 	perfmodels/non_linear_regression_based	\
 	perfmodels/non_linear_regression_based	\
 	perfmodels/feed				\
 	perfmodels/feed				\

+ 2 - 6
tests/main/insert_task.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -93,14 +93,10 @@ int main(int argc, char **argv)
 	task->cl = &mycodelet;
 	task->cl = &mycodelet;
 	task->handles[0] = data_handles[0];
 	task->handles[0] = data_handles[0];
 	task->handles[1] = data_handles[1];
 	task->handles[1] = data_handles[1];
-	char *arg_buffer;
-	size_t arg_buffer_size;
-	starpu_codelet_pack_args(&arg_buffer, &arg_buffer_size,
+	starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size,
 			    STARPU_VALUE, &ifactor, sizeof(ifactor),
 			    STARPU_VALUE, &ifactor, sizeof(ifactor),
 			    STARPU_VALUE, &ffactor, sizeof(ffactor),
 			    STARPU_VALUE, &ffactor, sizeof(ffactor),
 			    0);
 			    0);
-	task->cl_arg = arg_buffer;
-	task->cl_arg_size = arg_buffer_size;
 
 
 	ret = starpu_task_submit(task);
 	ret = starpu_task_submit(task);
 	if (ret == -ENODEV) goto enodev;
 	if (ret == -ENODEV) goto enodev;

+ 5 - 3
tools/Makefile.am

@@ -86,7 +86,8 @@ bin_PROGRAMS += 			\
 	starpu_perfmodel_display	\
 	starpu_perfmodel_display	\
 	starpu_perfmodel_plot 		\
 	starpu_perfmodel_plot 		\
 	starpu_calibrate_bus		\
 	starpu_calibrate_bus		\
-	starpu_machine_display
+	starpu_machine_display		\
+	starpu_lp2paje
 
 
 starpu_perfmodel_plot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
 starpu_perfmodel_plot_CPPFLAGS = $(AM_CFLAGS) $(AM_CPPFLAGS) $(FXT_CFLAGS)
 
 
@@ -104,8 +105,6 @@ STARPU_TOOLS	+=			\
 	starpu_perfmodel_plot
 	starpu_perfmodel_plot
 endif
 endif
 
 
-noinst_PROGRAMS =	cbc2paje lp2paje
-
 dist_bin_SCRIPTS +=			\
 dist_bin_SCRIPTS +=			\
 	starpu_workers_activity		\
 	starpu_workers_activity		\
 	starpu_codelet_histo_profile	\
 	starpu_codelet_histo_profile	\
@@ -129,6 +128,8 @@ starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT)
 	help2man --no-discard-stderr -N --output=$@ ./$<
 	help2man --no-discard-stderr -N --output=$@ ./$<
 starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
 starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT)
 	help2man --no-discard-stderr -N --output=$@ ./$<
 	help2man --no-discard-stderr -N --output=$@ ./$<
+starpu_lp2paje.1: starpu_lp2paje$(EXEEXT)
+	help2man --no-discard-stderr -N --output=$@ ./$<
 starpu_workers_activity.1: starpu_workers_activity$(EXEEXT)
 starpu_workers_activity.1: starpu_workers_activity$(EXEEXT)
 	chmod +x $<
 	chmod +x $<
 	help2man --no-discard-stderr -N --output=$@ ./$<
 	help2man --no-discard-stderr -N --output=$@ ./$<
@@ -153,6 +154,7 @@ dist_man1_MANS =\
 	starpu_machine_display.1 \
 	starpu_machine_display.1 \
 	starpu_perfmodel_display.1 \
 	starpu_perfmodel_display.1 \
 	starpu_perfmodel_plot.1	\
 	starpu_perfmodel_plot.1	\
+	starpu_lp2paje.1	\
 	starpu_workers_activity.1 \
 	starpu_workers_activity.1 \
 	starpu_codelet_profile.1 \
 	starpu_codelet_profile.1 \
 	starpu_codelet_histo_profile.1
 	starpu_codelet_histo_profile.1

+ 0 - 156
tools/cbc2paje.c

@@ -1,156 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010  Université de Bordeaux 1
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-struct task {
-	double start;
-	double stop;
-	int worker;
-};
-
-int main(int argc, char *argv[]) {
-	int nw, nt;
-	double tmax;
-	int i, w, t, t2;
-	int foo;
-	double bar;
-	unsigned long num;
-	int b;
-	unsigned long next = 1;
-
-	if (argc != 3) {
-		fprintf(stderr,"usage: %s nb_workers nb_tasks\n", argv[0]);
-		exit(1);
-	}
-	nw = atoi(argv[1]);
-	nt = atoi(argv[2]);
-	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
-	assert(scanf("Optimal - objective value       %lf", &tmax) == 1);
-	printf(
-"%%EventDef PajeDefineContainerType 1\n"
-"%%  Alias         string\n"
-"%%  ContainerType string\n"
-"%%  Name          string\n"
-"%%EndEventDef\n"
-"%%EventDef PajeCreateContainer     2\n"
-"%%  Time          date\n"
-"%%  Alias         string\n"
-"%%  Type          string\n"
-"%%  Container     string\n"
-"%%  Name          string\n"
-"%%EndEventDef\n"
-"%%EventDef PajeDefineStateType     3\n"
-"%%  Alias         string\n"
-"%%  ContainerType string\n"
-"%%  Name          string\n"
-"%%EndEventDef\n"
-"%%EventDef PajeDestroyContainer    4\n"
-"%%  Time          date\n"
-"%%  Name          string\n"
-"%%  Type          string\n"
-"%%EndEventDef\n"
-"%%EventDef PajeDefineEntityValue 5\n"
-"%%  Alias         string\n"
-"%%  EntityType    string\n"
-"%%  Name          string\n"
-"%%  Color         color\n"
-"%%EndEventDef\n"
-"%%EventDef PajeSetState 6\n"
-"%%  Time          date\n"
-"%%  Type          string\n"
-"%%  Container     string\n"
-"%%  Value         string\n"
-"%%EndEventDef\n"
-"1 W 0 Worker\n"
-);
-	printf("3 S W \"Worker State\"\n");
-	printf("5 S S Running \"0.0 1.0 0.0\"\n");
-	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
-	for (i = 0; i < nw; i++)
-		printf("2 0 W%d W 0 \"%d\"\n", i, i);
-
-	for (w = 0; w < nw; w++)
-		printf("4 %f W%d W\n", tmax, w);
-
-	assert(scanf("%d C%d %lf %lf", &foo, &foo, &tmax, &bar) == 4);
-	next++;
-	{
-		struct task task[nt];
-		memset(&task, 0, sizeof(task));
-		for (t = 0; t < nt; t++) {
-			assert(scanf("%d C%d %lf %lf", &foo, &foo, &task[t].stop, &bar) == 4);
-			next++;
-		}
-
-		while (1) {
-			assert(scanf("%d C%lu", &foo, &num) == 2);
-			if (num >= next +
-
-				/* FIXME */
-				//nw*nt
-				8*20 + 5*16
-
-				) {
-				next+= 8*20+5*16;
-				break;
-			}
-			/* FIXME */
-			if (num-next < 8*20) {
-				t = (num - next) / nw;
-				w = (num - next) % nw;
-			} else {
-				unsigned long nnum = (num-next)-8*20;
-				t = (nnum / 5) + 20;
-				w = (nnum % 5)+3;
-			}
-
-			assert(scanf("%d %lf", &b, &bar) == 2);
-			if (b) {
-				task[t].worker = w;
-				fprintf(stderr,"%lu: task %d on %d: %f\n", num, t, w, task[t].stop);
-			}
-		}
-		while(1) {
-			t = num - next;
-			if (t > nt)
-				break;
-			assert(scanf("%lf %lf", &task[t].start, &bar) == 2);
-			assert(scanf("%d C%lu", &foo, &num) == 2);
-		}
-
-		for (t = 0; t < nt; t++) {
-			printf("6 %f S W%d S\n", task[t].start, task[t].worker);
-			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
-		}
-
-		for (t = 0; t < nt; t++) {
-			for (t2 = 0; t2 < nt; t2++) {
-				if (t != t2 && task[t].worker == task[t2].worker) {
-					if (!(task[t].start >= task[t2].stop
-					    || task[t2].start >= task[t].stop)) {
-						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", t, t2, task[t].worker);
-					}
-				}
-			}
-		}
-	}
-
-	return 0;
-}

+ 42 - 47
tools/lp2paje.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -14,34 +14,48 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
+#include <config.h>
 #include <assert.h>
 #include <assert.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include <string.h>
 #include <string.h>
 
 
+#define PROGNAME "starpu_lp2paje"
+
 struct task {
 struct task {
 	double start;
 	double start;
 	double stop;
 	double stop;
+	int num;
 	int worker;
 	int worker;
 };
 };
 
 
 int main(int argc, char *argv[]) {
 int main(int argc, char *argv[]) {
 	int nw, nt;
 	int nw, nt;
 	double tmax;
 	double tmax;
-	int i, w, t, t2;
+	int i, w, ww, t, tt, t2;
 	int foo;
 	int foo;
 	double bar;
 	double bar;
-	unsigned long num;
-	unsigned long next = 1;
 
 
-	if (argc != 3) {
-		fprintf(stderr,"usage: %s nb_workers nb_tasks\n", argv[0]);
-		exit(1);
+	if (argc != 1) {
+		if (strcmp(argv[1], "-v") == 0
+		 || strcmp(argv[1], "--version") == 0)
+		{
+			fprintf(stderr, PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n");
+			exit(EXIT_SUCCESS);
+		}
+		fprintf(stderr, "Convert schedule optimized by lp into the Paje format\n\n");
+		fprintf(stderr, "Usage: lp_solve file.lp | %s > paje.trace\n", PROGNAME);
+		fprintf(stderr, "Reports bugs to <"PACKAGE_BUGREPORT">.");
+		fprintf(stderr, "\n");
+		exit(EXIT_SUCCESS);
 	}
 	}
-	nw = atoi(argv[1]);
-	nt = atoi(argv[2]);
-	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
+	scanf("Suboptimal solution\n");
 	assert(scanf("\nValue of objective function: %lf\n", &tmax) == 1);
 	assert(scanf("\nValue of objective function: %lf\n", &tmax) == 1);
+
+	assert(scanf("Actual values of the variables:\n") == 0);
+	assert(scanf("tmax %lf\n", &tmax) == 1);
+	assert(scanf("nt %d\n", &nt) == 1);
+	assert(scanf("nw %d\n", &nw) == 1);
 	printf(
 	printf(
 "%%EventDef PajeDefineContainerType 1\n"
 "%%EventDef PajeDefineContainerType 1\n"
 "%%  Alias         string\n"
 "%%  Alias         string\n"
@@ -80,7 +94,8 @@ int main(int argc, char *argv[]) {
 "1 W 0 Worker\n"
 "1 W 0 Worker\n"
 );
 );
 	printf("3 S W \"Worker State\"\n");
 	printf("3 S W \"Worker State\"\n");
-	printf("5 S S Running \"0.0 1.0 0.0\"\n");
+	for (t = 0; t < nt; t++)
+		printf("5 R%d S Running_%d \"0.0 1.0 0.0\"\n", t, t);
 	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
 	printf("5 F S Idle \"1.0 0.0 0.0\"\n");
 	for (i = 0; i < nw; i++)
 	for (i = 0; i < nw; i++)
 		printf("2 0 W%d W 0 \"%d\"\n", i, i);
 		printf("2 0 W%d W 0 \"%d\"\n", i, i);
@@ -88,52 +103,32 @@ int main(int argc, char *argv[]) {
 	for (w = 0; w < nw; w++)
 	for (w = 0; w < nw; w++)
 		printf("4 %f W%d W\n", tmax, w);
 		printf("4 %f W%d W\n", tmax, w);
 
 
-	assert(scanf("Actual values of the variables:\n") == 0);
-	assert(scanf("tmax %lf\n", &tmax) == 1);
-	next++;
+	fprintf(stderr,"%d workers, %d tasks\n", nw, nt);
 	{
 	{
 		struct task task[nt];
 		struct task task[nt];
 		memset(&task, 0, sizeof(task));
 		memset(&task, 0, sizeof(task));
-		for (t = 0; t < nt; t++) {
+		for (t = nt-1; t >= 0; t--) {
 			assert(scanf("c%d %lf\n", &foo, &task[t].stop) == 2);
 			assert(scanf("c%d %lf\n", &foo, &task[t].stop) == 2);
-			next++;
 		}
 		}
 
 
-		num = next;
-		while (1) {
-			if (num >= next +
-
-				/* FIXME */
-				//nw*nt
-				8*84 + 5*49
-
-				) {
-				next+= 8*84+5*49;
-				break;
-			}
-			assert(scanf("t%dw%d %lf\n", &foo, &foo, &bar) == 3);
-			/* FIXME */
-			if (num-next < 8*84) {
-				t = (num - next) / nw;
-				w = (num - next) % nw;
-			} else {
-				unsigned long nnum = (num-next)-8*84;
-				t = (nnum / 5) + 84;
-				w = (nnum % 5)+3;
-			}
+		for (t = nt-1; t >= 0; t--)
+			for (w = 0; w < nw; w++) {
+				assert(scanf("t%dw%d %lf\n", &tt, &ww, &bar) == 3);
+				assert(ww == w);
 
 
-			if (bar > 0.5) {
-				task[t].worker = w;
-				fprintf(stderr,"%lu: task %d on %d: %f\n", num, t, w, task[t].stop);
-			}
-			num++;
+				if (bar > 0.5) {
+					task[t].num = tt;
+					task[t].worker = w;
+				}
 		}
 		}
-		for (t = 0; t < nt; t++) {
-			assert(scanf("s%d %lf\n", &foo, &task[t].start) == 2);
+		for (t = nt-1; t >= 0; t--) {
+			assert(scanf("s%d %lf\n", &tt, &task[t].start) == 2);
+			fprintf(stderr,"%d: task %d on %d: %f - %f\n", nt-1-t, tt, task[t].worker, task[t].start, task[t].stop);
+			assert(tt == task[t].num);
 		}
 		}
 
 
 		for (t = 0; t < nt; t++) {
 		for (t = 0; t < nt; t++) {
-			printf("6 %f S W%d S\n", task[t].start, task[t].worker);
+			printf("6 %f S W%d R%d\n", task[t].start, task[t].worker, t);
 			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
 			printf("6 %f S W%d F\n", task[t].stop, task[t].worker);
 		}
 		}
 
 
@@ -142,7 +137,7 @@ int main(int argc, char *argv[]) {
 				if (t != t2 && task[t].worker == task[t2].worker) {
 				if (t != t2 && task[t].worker == task[t2].worker) {
 					if (!(task[t].start >= task[t2].stop
 					if (!(task[t].start >= task[t2].stop
 					    || task[t2].start >= task[t].stop)) {
 					    || task[t2].start >= task[t].stop)) {
-						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", t, t2, task[t].worker);
+						fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", task[t].num, task[t2].num, task[t].worker);
 					}
 					}
 				}
 				}
 			}
 			}