Browse Source

Merge branch 'master' of git+ssh://scm.gforge.inria.fr/gitroot/starpu/starpu

Olivier Aumage 6 years ago
parent
commit
b1e43d0ffd

+ 4 - 1
.gitignore

@@ -30,11 +30,14 @@ starpu.log
 /tests/datawizard/handle_to_pointer
 /tests/datawizard/handle_to_pointer
 /tests/datawizard/data_lookup
 /tests/datawizard/data_lookup
 /doc/stamp-vti
 /doc/stamp-vti
-/doc/chapters/version.texi
 /doc/doxygen/chapters/version.sty
 /doc/doxygen/chapters/version.sty
 /doc/doxygen/chapters/version.html
 /doc/doxygen/chapters/version.html
 /doc/doxygen/doxygen-config.cfg
 /doc/doxygen/doxygen-config.cfg
 /doc/doxygen/doxygen_filter.sh
 /doc/doxygen/doxygen_filter.sh
+/doc/doxygen_dev/chapters/version.sty
+/doc/doxygen_dev/chapters/version.html
+/doc/doxygen_dev/doxygen-config.cfg
+/doc/doxygen_dev/doxygen_filter.sh
 /examples/basic_examples/block
 /examples/basic_examples/block
 /examples/basic_examples/hello_world
 /examples/basic_examples/hello_world
 /examples/basic_examples/mult
 /examples/basic_examples/mult

+ 4 - 1
ChangeLog

@@ -51,7 +51,9 @@ New features:
   * Add possibility to delay the termination of a task with the
   * Add possibility to delay the termination of a task with the
     functions starpu_task_end_dep_add() which specifies the number of
     functions starpu_task_end_dep_add() which specifies the number of
     calls to the function starpu_task_end_dep_release() needed to
     calls to the function starpu_task_end_dep_release() needed to
-    trigger the task termination.
+    trigger the task termination, or with starpu_task_declare_end_deps_array()
+    and starpu_task_declare_end_deps() to just declare termination dependencies
+    between tasks.
   * Add possibility to define the sequential consistency at the task level
   * Add possibility to define the sequential consistency at the task level
     for each handle used by the task.
     for each handle used by the task.
   * Add STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU, and
   * Add STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU, and
@@ -116,6 +118,7 @@ Small features:
   * New function starpu_data_partition_not_automatic() to disable the
   * New function starpu_data_partition_not_automatic() to disable the
     automatic partitioning of a data handle for which a asynchronous
     automatic partitioning of a data handle for which a asynchronous
     plan has previously been submitted
     plan has previously been submitted
+  * Add starpu_task_declare_deps()
 
 
 Changes:
 Changes:
   * Vastly improve simgrid simulation time.
   * Vastly improve simgrid simulation time.

+ 4 - 2
contrib/ci.inria.fr/job-1-check.sh

@@ -60,13 +60,15 @@ fi
 
 
 export CC=gcc
 export CC=gcc
 
 
+CONFIGURE_OPTIONS="--enable-debug --enable-verbose --enable-mpi-check --disable-build-doc"
 day=$(date +%u)
 day=$(date +%u)
 if test $day -le 5
 if test $day -le 5
 then
 then
-    ../configure --enable-quick-check --enable-verbose --enable-mpi-check --disable-build-doc $STARPU_CONFIGURE_OPTIONS
+    CONFIGURE_CHECK="--enable-quick-check"
 else
 else
-    ../configure --enable-long-check --enable-verbose --enable-mpi-check --disable-build-doc $STARPU_CONFIGURE_OPTIONS
+    CONFIGURE_CHECK="--enable-long-check"
 fi
 fi
+../configure $CONFIGURE_OPTIONS $CONFIGURE_CHECK  $STARPU_CONFIGURE_OPTIONS
 
 
 make
 make
 #make check
 #make check

+ 6 - 5
doc/doxygen/chapters/301_tasks.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2010-2018                                CNRS
- * Copyright (C) 2009-2011,2014-2016                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2016, 2018                      Université de Bordeaux
  * Copyright (C) 2011-2012,2018                           Inria
  * Copyright (C) 2011-2012,2018                           Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -105,15 +105,16 @@ if (sequential_consistency(task) == 1)
 \subsection TasksAndTagsDependencies Tasks And Tags Dependencies
 \subsection TasksAndTagsDependencies Tasks And Tags Dependencies
 
 
 One can explicitely set dependencies between tasks using
 One can explicitely set dependencies between tasks using
-starpu_task_declare_deps_array(). Dependencies between tasks can be
+starpu_task_declare_deps() or starpu_task_declare_deps_array(). Dependencies between tasks can be
 expressed through tags associated to a tag with the field
 expressed through tags associated to a tag with the field
 starpu_task::tag_id and using the function starpu_tag_declare_deps()
 starpu_task::tag_id and using the function starpu_tag_declare_deps()
 or starpu_tag_declare_deps_array().
 or starpu_tag_declare_deps_array().
 
 
 The termination of a task can be delayed through the function
 The termination of a task can be delayed through the function
-starpu_task_end_dep_add() which specifies the number of calls to the
-function starpu_task_end_dep_release() needed to trigger the task
-termination.
+starpu_task_end_dep_add() which specifies the number of calls to the function
+starpu_task_end_dep_release() needed to trigger the task termination. One can
+also use starpu_task_declare_end_deps() or starpu_task_declare_end_deps_array()
+to delay the termination of a task until the termination of other tasks.
 
 
 \section SettingManyDataHandlesForATask Setting Many Data Handles For a Task
 \section SettingManyDataHandlesForATask Setting Many Data Handles For a Task
 
 

+ 31 - 1
doc/doxygen/chapters/api/explicit_dependencies.doxy

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2010-2013,2015,2017,2018                      CNRS
  * Copyright (C) 2010-2013,2015,2017,2018                      CNRS
- * Copyright (C) 2009-2011,2014-2016                      Université de Bordeaux
+ * Copyright (C) 2009-2011,2014-2016, 2018                      Université de Bordeaux
  * Copyright (C) 2011-2012                                Inria
  * Copyright (C) 2011-2012                                Inria
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -32,6 +32,36 @@ call starpu_task_declare_deps_array() several times on the same task,
 in this case, the dependencies are added. It is possible to have
 in this case, the dependencies are added. It is possible to have
 redundancy in the task dependencies.
 redundancy in the task dependencies.
 
 
+\fn void starpu_task_declare_deps(starpu_task *task, unsigned ndeps, ...)
+\ingroup API_Explicit_Dependencies
+Declare task dependencies between a \p task and an series of \p ndeps tasks,
+similarly to starpu_task_declare_deps_array(), but the tasks are passed after \p
+ndeps, which indicates how many tasks \p task shall be made to depend on.
+If \p ndeps is 0, no dependency is added.
+
+\fn void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
+\ingroup API_Explicit_Dependencies
+Declare task end dependencies between a \p task and an array of
+tasks of length \p ndeps. \p task will appear as terminated not only when \p
+task is termination, but also when the tasks of \p task_array have terminated.
+This function must be called prior to the
+termination of the task, but it may called after the submission or the
+execution of the tasks in the array, provided the tasks are still
+valid (i.e. they were not automatically destroyed). Calling this
+function on a task that was already terminated or with an entry of
+\p task_array that is no longer a valid task results in an undefined
+behaviour. If \p ndeps is 0, no dependency is added. It is possible to
+call starpu_task_declare_end_deps_array() several times on the same task,
+in this case, the dependencies are added. It is currently not implemented to have
+redundancy in the task dependencies.
+
+\fn void starpu_task_declare_end_deps(starpu_task *task, unsigned ndeps, ...)
+\ingroup API_Explicit_Dependencies
+Declare task end dependencies between a \p task and an series of \p ndeps tasks,
+similarly to starpu_task_declare_end_deps_array(), but the tasks are passed after \p
+ndeps, which indicates how many tasks \p task 's termination shall be made to depend on.
+If \p ndeps is 0, no dependency is added.
+
 \fn int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
 \fn int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
 \ingroup API_Explicit_Dependencies
 \ingroup API_Explicit_Dependencies
 Fill \p task_array with the list of tasks which are direct children of \p task.
 Fill \p task_array with the list of tasks which are direct children of \p task.

+ 8 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -152,10 +152,17 @@ task to.
 \def STARPU_TASK_DEPS_ARRAY
 \def STARPU_TASK_DEPS_ARRAY
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
 Used when calling starpu_task_insert(), must
 Used when calling starpu_task_insert(), must
-be followed by a number of tasks, and an array containing these tasks.
+be followed by a number of tasks as int, and an array containing these tasks.
 The function starpu_task_declare_deps_array() will be called with the
 The function starpu_task_declare_deps_array() will be called with the
 given values.
 given values.
 
 
+\def STARPU_TASK_END_DEPS_ARRAY
+\ingroup API_Insert_Task
+Used when calling starpu_task_insert(), must
+be followed by a number of tasks as int, and an array containing these tasks.
+The function starpu_task_declare_end_deps_array() will be called with the
+given values.
+
 \def STARPU_TASK_COLOR
 \def STARPU_TASK_COLOR
 \ingroup API_Insert_Task
 \ingroup API_Insert_Task
 Used when calling starpu_task_insert(), must be followed by an integer
 Used when calling starpu_task_insert(), must be followed by an integer

+ 1 - 0
examples/Makefile.am

@@ -254,6 +254,7 @@ STARPU_EXAMPLES +=				\
 	reductions/dot_product			\
 	reductions/dot_product			\
 	reductions/minmax_reduction		\
 	reductions/minmax_reduction		\
 	dependency/task_end_dep			\
 	dependency/task_end_dep			\
+	dependency/task_end_dep_add		\
 	dependency/sequential_consistency
 	dependency/sequential_consistency
 
 
 endif
 endif

+ 35 - 9
examples/dependency/task_end_dep.c

@@ -14,20 +14,31 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
  */
 
 
+/* This shows how to defer termination of a task until the termination of
+ * another task.  */
+
 #include <starpu.h>
 #include <starpu.h>
 
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
 
+#define INIT 12
+
 void cpu_codelet2(void *descr[], void *args)
 void cpu_codelet2(void *descr[], void *args)
 {
 {
-	(void)descr;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	(void)args;
 	(void)args;
+	STARPU_ASSERT(*val == 2*INIT);
+	starpu_sleep(0.1);
+	STARPU_ASSERT(*val == 2*INIT);
+	*val *= 2;
 }
 }
 
 
 struct starpu_codelet cl2 =
 struct starpu_codelet cl2 =
 {
 {
 	.cpu_funcs = {cpu_codelet2},
 	.cpu_funcs = {cpu_codelet2},
 	.cpu_funcs_name = {"cpu_codelet2"},
 	.cpu_funcs_name = {"cpu_codelet2"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
 	.name = "codelet2"
 	.name = "codelet2"
 };
 };
 
 
@@ -35,14 +46,15 @@ void cpu_codelet(void *descr[], void *args)
 {
 {
 	(void)args;
 	(void)args;
 	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	struct starpu_task *task;
-
-	task = starpu_task_get_current();
-	starpu_task_end_dep_add(task, 1);
+	struct starpu_task *task = starpu_task_get_current();
 
 
 	starpu_task_insert(&cl2,
 	starpu_task_insert(&cl2,
-			   STARPU_CALLBACK_WITH_ARG, starpu_task_end_dep_release, task,
+			   STARPU_RW, task->handles[0],
+			   STARPU_TASK_END_DEPS_ARRAY, 1, &task,
 			   0);
 			   0);
+	STARPU_ASSERT(*val == INIT);
+	starpu_sleep(0.1);
+	STARPU_ASSERT(*val == INIT);
 	*val *= 2;
 	*val *= 2;
 }
 }
 
 
@@ -57,10 +69,11 @@ struct starpu_codelet cl =
 
 
 int main(void)
 int main(void)
 {
 {
-        int value=12;
+        int value=INIT;
 	int ret;
 	int ret;
 	starpu_data_handle_t value_handle;
 	starpu_data_handle_t value_handle;
 	struct starpu_conf conf;
 	struct starpu_conf conf;
+	struct starpu_task *task;
 
 
 	starpu_conf_init(&conf);
 	starpu_conf_init(&conf);
 	conf.nmic = 0;
 	conf.nmic = 0;
@@ -83,13 +96,26 @@ int main(void)
 
 
 	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
 	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
 
 
-	ret = starpu_task_insert(&cl,
+	task = starpu_task_build(&cl,
 				 STARPU_RW, value_handle,
 				 STARPU_RW, value_handle,
 				 0);
 				 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+	STARPU_ASSERT(task);
+	task->detach = 0;
+	ret = starpu_task_submit(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_task_wait(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
+
+	starpu_data_set_sequential_consistency_flag(value_handle, 0);
+	starpu_data_acquire_on_node(value_handle, STARPU_MAIN_RAM, STARPU_R);
+	/* Waiting for the main task should have also waited for the subtask */
+	STARPU_ASSERT(value == 2*2*INIT);
+	starpu_data_release_on_node(value_handle, STARPU_MAIN_RAM);
 
 
 	starpu_data_unregister(value_handle);
 	starpu_data_unregister(value_handle);
 
 
+	STARPU_ASSERT(value == 2*2*INIT);
+
         starpu_shutdown();
         starpu_shutdown();
 
 
 	FPRINTF(stderr, "Value = %d\n", value);
 	FPRINTF(stderr, "Value = %d\n", value);

+ 106 - 0
examples/dependency/task_end_dep_add.c

@@ -0,0 +1,106 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* This shows how to defer termination of a task thanks to
+ * starpu_task_end_dep_add.  */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+#define INIT 12
+
+void cpu_codelet2(void *descr[], void *args)
+{
+	(void)descr;
+	(void)args;
+}
+
+struct starpu_codelet cl2 =
+{
+	.cpu_funcs = {cpu_codelet2},
+	.cpu_funcs_name = {"cpu_codelet2"},
+	.name = "codelet2"
+};
+
+void cpu_codelet(void *descr[], void *args)
+{
+	(void)args;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	struct starpu_task *task;
+
+	task = starpu_task_get_current();
+	starpu_task_end_dep_add(task, 1);
+
+	starpu_task_insert(&cl2,
+			   STARPU_CALLBACK_WITH_ARG, starpu_task_end_dep_release, task,
+			   0);
+	STARPU_ASSERT(*val == INIT);
+	*val *= 2;
+}
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_codelet},
+	.cpu_funcs_name = {"cpu_codelet"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codelet"
+};
+
+int main(void)
+{
+        int value=INIT;
+	int ret;
+	starpu_data_handle_t value_handle;
+	struct starpu_conf conf;
+
+	starpu_conf_init(&conf);
+	conf.nmic = 0;
+	conf.nscc = 0;
+	conf.nmpi_ms = 0;
+
+        ret = starpu_init(&conf);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		return 77;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < 1)
+	{
+		FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+		starpu_shutdown();
+		return 77;
+	}
+
+	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
+
+	ret = starpu_task_insert(&cl,
+				 STARPU_RW, value_handle,
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	starpu_data_unregister(value_handle);
+
+	STARPU_ASSERT(value == 2*INIT);
+
+        starpu_shutdown();
+
+	FPRINTF(stderr, "Value = %d\n", value);
+
+	return ret;
+}

+ 10 - 20
examples/sched_ctx/sched_ctx.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012-2014,2017-2018                      Inria
  * Copyright (C) 2012-2014,2017-2018                      Inria
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2010-2014                                Université de Bordeaux
  * Copyright (C) 2010-2014                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -28,6 +28,8 @@
 int tasks_executed = 0;
 int tasks_executed = 0;
 int ctx1_tasks_executed = 0;
 int ctx1_tasks_executed = 0;
 int ctx2_tasks_executed = 0;
 int ctx2_tasks_executed = 0;
+int cpu_tasks_executed = 0;
+int gpu_tasks_executed = 0;
 
 
 static void sched_ctx_cpu_func(void *descr[], void *arg)
 static void sched_ctx_cpu_func(void *descr[], void *arg)
 {
 {
@@ -35,6 +37,7 @@ static void sched_ctx_cpu_func(void *descr[], void *arg)
 	(void)arg;
 	(void)arg;
 	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1);
+	(void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1);
 }
 }
 
 
 static void sched_ctx2_cpu_func(void *descr[], void *arg)
 static void sched_ctx2_cpu_func(void *descr[], void *arg)
@@ -43,6 +46,7 @@ static void sched_ctx2_cpu_func(void *descr[], void *arg)
 	(void)arg;
 	(void)arg;
 	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1);
+	(void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1);
 }
 }
 
 
 static void sched_ctx2_cuda_func(void *descr[], void *arg)
 static void sched_ctx2_cuda_func(void *descr[], void *arg)
@@ -51,6 +55,7 @@ static void sched_ctx2_cuda_func(void *descr[], void *arg)
 	(void)arg;
 	(void)arg;
 	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1);
 	(void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1);
+	(void)STARPU_ATOMIC_ADD(&gpu_tasks_executed,1);
 }
 }
 
 
 static struct starpu_codelet sched_ctx_codelet1 =
 static struct starpu_codelet sched_ctx_codelet1 =
@@ -127,25 +132,8 @@ int main(void)
 	/* tell starpu when you finished submitting tasks to this context
 	/* tell starpu when you finished submitting tasks to this context
 	   in order to allow moving resources from this context to the inheritor one
 	   in order to allow moving resources from this context to the inheritor one
 	   when its corresponding tasks finished executing */
 	   when its corresponding tasks finished executing */
-
 	starpu_sched_ctx_finished_submit(sched_ctx1);
 	starpu_sched_ctx_finished_submit(sched_ctx1);
 
 
-	/* task with no cuda impl submitted to a ctx with gpus only */
-	struct starpu_task *task2 = starpu_task_create();
-	task2->cl = &sched_ctx_codelet1;
-	task2->cl_arg = NULL;
-
-	/*submit tasks to context*/
-	ret = starpu_task_submit_to_ctx(task2,sched_ctx2);
-	if (ncuda == 0)
-	{
-		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
-	}
-	else
-	{
-		STARPU_ASSERT_MSG(ret == -ENODEV, "submit task should ret enodev when the ctx does not have the PUs needed by the task");
-	}
-
 	for (i = 0; i < ntasks/2; i++)
 	for (i = 0; i < ntasks/2; i++)
 	{
 	{
 		struct starpu_task *task = starpu_task_create();
 		struct starpu_task *task = starpu_task_create();
@@ -164,11 +152,13 @@ int main(void)
 	starpu_task_wait_for_all();
 	starpu_task_wait_for_all();
 
 
 	starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2);
 	starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2);
-	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
 	starpu_sched_ctx_delete(sched_ctx2);
-	printf("tasks executed %d out of %d\n", tasks_executed, ntasks+1);
+	starpu_sched_ctx_delete(sched_ctx1);
+	printf("tasks executed %d out of %d\n", tasks_executed, ntasks);
 	printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed);
 	printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed);
 	printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed);
 	printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed);
+	printf("tasks executed on CPU: %d\n", cpu_tasks_executed);
+	printf("tasks executed on GPU: %d\n", gpu_tasks_executed);
 
 
 enodev:
 enodev:
 	starpu_shutdown();
 	starpu_shutdown();

+ 2 - 2
examples/sched_ctx/two_cpu_contexts.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2017                                     CNRS
+ * Copyright (C) 2017, 2018                               CNRS
  * Copyright (C) 2016,2018                                Inria
  * Copyright (C) 2016,2018                                Inria
  * Copyright (C) 2016                                     Université de Bordeaux
  * Copyright (C) 2016                                     Université de Bordeaux
  *
  *
@@ -111,8 +111,8 @@ int main(void)
 	starpu_task_wait_for_all();
 	starpu_task_wait_for_all();
 
 
 	starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2);
 	starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2);
-	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_sched_ctx_delete(sched_ctx2);
 	starpu_sched_ctx_delete(sched_ctx2);
+	starpu_sched_ctx_delete(sched_ctx1);
 	starpu_shutdown();
 	starpu_shutdown();
 	free(procs);
 	free(procs);
 	free(procs1);
 	free(procs1);

+ 4 - 0
include/starpu_task.h

@@ -303,8 +303,12 @@ struct starpu_task
 void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);
 void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...);
 void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array);
 void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array);
 
 
+void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...);
 void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
 void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
 
 
+void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...);
+void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
+
 void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps);
 void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps);
 void starpu_task_end_dep_release(struct starpu_task *t);
 void starpu_task_end_dep_release(struct starpu_task *t);
 
 

+ 3 - 1
include/starpu_task_util.h

@@ -66,7 +66,9 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_TASK_COLOR       (28<<STARPU_MODE_SHIFT)
 #define STARPU_TASK_COLOR       (28<<STARPU_MODE_SHIFT)
 #define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29<<STARPU_MODE_SHIFT)
 #define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29<<STARPU_MODE_SHIFT)
 #define STARPU_TASK_SYNCHRONOUS (30<<STARPU_MODE_SHIFT)
 #define STARPU_TASK_SYNCHRONOUS (30<<STARPU_MODE_SHIFT)
-#define STARPU_SHIFTED_MODE_MAX (31<<STARPU_MODE_SHIFT)
+#define STARPU_TASK_END_DEPS_ARRAY	(31<<STARPU_MODE_SHIFT)
+
+#define STARPU_SHIFTED_MODE_MAX (32<<STARPU_MODE_SHIFT)
 
 
 int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...);
 int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);

+ 5 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -361,6 +361,11 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 			(void)va_arg(varg_list_copy, unsigned);
 			(void)va_arg(varg_list_copy, unsigned);
 			(void)va_arg(varg_list_copy, struct starpu_task **);
 			(void)va_arg(varg_list_copy, struct starpu_task **);
 		}
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			(void)va_arg(varg_list_copy, unsigned);
+			(void)va_arg(varg_list_copy, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		else if (arg_type==STARPU_CALLBACK)
 		{
 		{
 			(void)va_arg(varg_list_copy, _starpu_callback_func_t);
 			(void)va_arg(varg_list_copy, _starpu_callback_func_t);

+ 7 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -191,6 +191,13 @@ int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_no
 			arg_i++;
 			arg_i++;
 			/* struct starpu_task ** */
 			/* struct starpu_task ** */
 		}
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			arg_i++;
+			/* unsigned */
+			arg_i++;
+			/* struct starpu_task ** */
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		else if (arg_type==STARPU_CALLBACK)
 		{
 		{
 			arg_i++;
 			arg_i++;

+ 63 - 1
src/core/dependencies/task_deps.c

@@ -121,7 +121,7 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s
 		STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
 		STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
 		if (check)
 		if (check)
 		{
 		{
-			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Unless it is not to be destroyed automatically, a task dependencies have to be set before submission");
+			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task dependencies have to be set before submission");
 			STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
 			STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
 			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
 			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
 		}
 		}
@@ -156,6 +156,68 @@ void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, st
 	_starpu_task_declare_deps_array(task, ndeps, task_array, 1);
 	_starpu_task_declare_deps_array(task, ndeps, task_array, 1);
 }
 }
 
 
+void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...)
+{
+	if (ndeps == 0)
+		return;
+	struct starpu_task *tasks[ndeps];
+	unsigned i;
+	va_list pa;
+	va_start(pa, ndeps);
+	for (i = 0; i < ndeps; i++)
+	{
+		tasks[i] = va_arg(pa, struct starpu_task *);
+	}
+	va_end(pa);
+	starpu_task_declare_deps_array(task, ndeps, tasks);
+}
+
+void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
+{
+	unsigned i;
+
+	starpu_task_end_dep_add(task, ndeps);
+	for (i = 0; i < ndeps; i++)
+	{
+		struct starpu_task *dep_task = task_array[i];
+		struct _starpu_job *dep_job = _starpu_get_job_associated_to_task(dep_task);
+		int done = 0;
+
+		STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task end dependencies have to be set before submission");
+		STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
+		STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
+
+		STARPU_ASSERT_MSG(!dep_job->end_rdep, "multiple end dependencies are not supported yet");
+		STARPU_ASSERT_MSG(!dep_job->task->regenerate, "end dependencies are not supported yet for regenerated tasks");
+
+		STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
+		dep_job->end_rdep = task;
+		if (dep_job->terminated)
+			/* It's actually already over */
+			done = 1;
+		STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex);
+
+		if (done)
+			starpu_task_end_dep_release(task);
+	}
+}
+
+void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...)
+{
+	if (ndeps == 0)
+		return;
+	struct starpu_task *tasks[ndeps];
+	unsigned i;
+	va_list pa;
+	va_start(pa, ndeps);
+	for (i = 0; i < ndeps; i++)
+	{
+		tasks[i] = va_arg(pa, struct starpu_task *);
+	}
+	va_end(pa);
+	starpu_task_declare_end_deps_array(task, ndeps, tasks);
+}
+
 int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
 int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
 {
 {
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);

+ 7 - 0
src/core/jobs.c

@@ -269,7 +269,10 @@ void starpu_task_end_dep_release(struct starpu_task *t)
 
 
 void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps)
 void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps)
 {
 {
+	struct _starpu_job *j = _starpu_get_job_associated_to_task(t);
+	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 	t->nb_termination_call_required += nb_deps;
 	t->nb_termination_call_required += nb_deps;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 }
 }
 
 
 void _starpu_handle_job_termination(struct _starpu_job *j)
 void _starpu_handle_job_termination(struct _starpu_job *j)
@@ -284,6 +287,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	}
 	}
 
 
 	struct starpu_task *task = j->task;
 	struct starpu_task *task = j->task;
+	struct starpu_task *end_rdep = NULL;
 	unsigned sched_ctx = task->sched_ctx;
 	unsigned sched_ctx = task->sched_ctx;
 	double flops = task->flops;
 	double flops = task->flops;
 	const unsigned continuation =
 	const unsigned continuation =
@@ -316,6 +320,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		 * function. A value of 1 means that the codelet was executed but that
 		 * function. A value of 1 means that the codelet was executed but that
 		 * the callback is not done yet. */
 		 * the callback is not done yet. */
 		j->terminated = 1;
 		j->terminated = 1;
+		end_rdep = j->end_rdep;
 	}
 	}
 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 
 
@@ -411,6 +416,8 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	if (!continuation)
 	if (!continuation)
 	{
 	{
 		/* in case there are dependencies, wake up the proper tasks */
 		/* in case there are dependencies, wake up the proper tasks */
+		if (end_rdep)
+			starpu_task_end_dep_release(end_rdep);
 		_starpu_notify_dependencies(j);
 		_starpu_notify_dependencies(j);
 	}
 	}
 
 

+ 3 - 0
src/core/jobs.h

@@ -110,6 +110,9 @@ struct _starpu_job
 	 * */
 	 * */
 	struct _starpu_cg_list job_successors;
 	struct _starpu_cg_list job_successors;
 
 
+	/* Task whose termination depends on this task */
+	struct starpu_task *end_rdep;
+
 	/* For tasks with cl==NULL but submitted with explicit data dependency,
 	/* For tasks with cl==NULL but submitted with explicit data dependency,
 	 * the handle for this dependency, so as to remove the task from the
 	 * the handle for this dependency, so as to remove the task from the
 	 * last_writer/readers */
 	 * last_writer/readers */

+ 4 - 0
src/core/sched_ctx.c

@@ -1011,8 +1011,10 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 #endif //STARPU_USE_SC_HYPERVISOR
 #endif //STARPU_USE_SC_HYPERVISOR
 
 
 	_starpu_sched_ctx_lock_write(sched_ctx_id);
 	_starpu_sched_ctx_lock_write(sched_ctx_id);
+
 	unsigned inheritor_sched_ctx_id = sched_ctx->inheritor;
 	unsigned inheritor_sched_ctx_id = sched_ctx->inheritor;
 	struct _starpu_sched_ctx *inheritor_sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx->inheritor);
 	struct _starpu_sched_ctx *inheritor_sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx->inheritor);
+	_starpu_sched_ctx_lock_write(inheritor_sched_ctx_id);
 
 
 	STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS);
 	STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS);
 
 
@@ -1052,6 +1054,7 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 		notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
 		notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
 		occupied_sms -= sched_ctx->nsms;
 		occupied_sms -= sched_ctx->nsms;
 		_starpu_sched_ctx_unlock_write(sched_ctx_id);
 		_starpu_sched_ctx_unlock_write(sched_ctx_id);
+		_starpu_sched_ctx_unlock_write(inheritor_sched_ctx_id);
 		STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock);
 		STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock);
 		_starpu_delete_sched_ctx(sched_ctx);
 		_starpu_delete_sched_ctx(sched_ctx);
 	}
 	}
@@ -1060,6 +1063,7 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 		notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
 		notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids);
 		occupied_sms -= sched_ctx->nsms;
 		occupied_sms -= sched_ctx->nsms;
 		_starpu_sched_ctx_unlock_write(sched_ctx_id);
 		_starpu_sched_ctx_unlock_write(sched_ctx_id);
+		_starpu_sched_ctx_unlock_write(inheritor_sched_ctx_id);
 	}
 	}
 	/* workerids is malloc-ed in starpu_sched_ctx_get_workers_list, don't forget to free it when
 	/* workerids is malloc-ed in starpu_sched_ctx_get_workers_list, don't forget to free it when
 	   you don't use it anymore */
 	   you don't use it anymore */

+ 1 - 1
src/core/task.c

@@ -237,7 +237,7 @@ int starpu_task_wait(struct starpu_task *task)
 
 
 	STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait must not be called from a task or callback");
 	STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait must not be called from a task or callback");
 
 
-	struct _starpu_job *j = (struct _starpu_job *)task->starpu_private;
+	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
 
 
 	_STARPU_TRACE_TASK_WAIT_START(j);
 	_STARPU_TRACE_TASK_WAIT_START(j);
 
 

+ 2 - 2
src/datawizard/data_request.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2008-2017                                Université de Bordeaux
  * Copyright (C) 2008-2017                                Université de Bordeaux
- * Copyright (C) 2010-2017                                CNRS
+ * Copyright (C) 2010-2018                                CNRS
  * Copyright (C) 2011,2016-2017                           Inria
  * Copyright (C) 2011,2016-2017                           Inria
  * Copyright (C) 2013                                     Thibaut Lambert
  * Copyright (C) 2013                                     Thibaut Lambert
  *
  *
@@ -133,7 +133,7 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha
 
 
 	_starpu_spin_init(&r->lock);
 	_starpu_spin_init(&r->lock);
 
 
-       _STARPU_TRACE_DATA_REQUEST_CREATED(handle, src_replicate->memory_node, dst_replicate->memory_node, prio, is_prefetch);
+	_STARPU_TRACE_DATA_REQUEST_CREATED(handle, src_replicate?src_replicate->memory_node:-1, dst_replicate?dst_replicate->memory_node:-1, prio, is_prefetch);
 
 
 	r->origin = origin;
 	r->origin = origin;
 	r->handle = handle;
 	r->handle = handle;

+ 3 - 1
src/util/fstarpu.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2016-2018                                CNRS
  * Copyright (C) 2016-2018                                CNRS
  * Copyright (C) 2016-2017                                Inria
  * Copyright (C) 2016-2017                                Inria
- * Copyright (C) 2016-2017                                Université de Bordeaux
+ * Copyright (C) 2016-2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -38,6 +38,7 @@ static const intptr_t fstarpu_data_mode_array	= STARPU_DATA_MODE_ARRAY;
 static const intptr_t fstarpu_cl_args	= STARPU_CL_ARGS;
 static const intptr_t fstarpu_cl_args	= STARPU_CL_ARGS;
 static const intptr_t fstarpu_cl_args_nfree	= STARPU_CL_ARGS_NFREE;
 static const intptr_t fstarpu_cl_args_nfree	= STARPU_CL_ARGS_NFREE;
 static const intptr_t fstarpu_task_deps_array	= STARPU_TASK_DEPS_ARRAY;
 static const intptr_t fstarpu_task_deps_array	= STARPU_TASK_DEPS_ARRAY;
+static const intptr_t fstarpu_task_end_deps_array	= STARPU_TASK_END_DEPS_ARRAY;
 static const intptr_t fstarpu_callback	= STARPU_CALLBACK;
 static const intptr_t fstarpu_callback	= STARPU_CALLBACK;
 static const intptr_t fstarpu_callback_with_arg	= STARPU_CALLBACK_WITH_ARG;
 static const intptr_t fstarpu_callback_with_arg	= STARPU_CALLBACK_WITH_ARG;
 static const intptr_t fstarpu_callback_arg	= STARPU_CALLBACK_ARG;
 static const intptr_t fstarpu_callback_arg	= STARPU_CALLBACK_ARG;
@@ -114,6 +115,7 @@ intptr_t fstarpu_get_constant(char *s)
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS"))	{ return fstarpu_cl_args; }
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS"))	{ return fstarpu_cl_args; }
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS_NFREE"))	{ return fstarpu_cl_args_nfree; }
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS_NFREE"))	{ return fstarpu_cl_args_nfree; }
 	else if	(!strcmp(s, "FSTARPU_TASK_DEPS_ARRAY"))	{ return fstarpu_task_deps_array; }
 	else if	(!strcmp(s, "FSTARPU_TASK_DEPS_ARRAY"))	{ return fstarpu_task_deps_array; }
+	else if	(!strcmp(s, "FSTARPU_TASK_END_DEPS_ARRAY"))	{ return fstarpu_task_end_deps_array; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK"))	{ return fstarpu_callback; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK"))	{ return fstarpu_callback; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG"))	{ return fstarpu_callback_with_arg; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG"))	{ return fstarpu_callback_with_arg; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_ARG"))	{ return fstarpu_callback_arg; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_ARG"))	{ return fstarpu_callback_arg; }

+ 35 - 2
src/util/starpu_task_insert_utils.c

@@ -111,6 +111,11 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 			(void)va_arg(varg_list, unsigned);
 			(void)va_arg(varg_list, unsigned);
 			(void)va_arg(varg_list, struct starpu_task **);
 			(void)va_arg(varg_list, struct starpu_task **);
 		}
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			(void)va_arg(varg_list, unsigned);
+			(void)va_arg(varg_list, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		else if (arg_type==STARPU_CALLBACK)
 		{
 		{
 			(void)va_arg(varg_list, _starpu_callback_func_t);
 			(void)va_arg(varg_list, _starpu_callback_func_t);
@@ -333,7 +338,9 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta
 	int current_buffer;
 	int current_buffer;
 	int allocated_buffers = 0;
 	int allocated_buffers = 0;
 	unsigned ndeps = 0;
 	unsigned ndeps = 0;
+	unsigned nend_deps = 0;
 	struct starpu_task **task_deps_array = NULL;
 	struct starpu_task **task_deps_array = NULL;
+	struct starpu_task **task_end_deps_array = NULL;
 
 
 	_STARPU_TRACE_TASK_BUILD_START();
 	_STARPU_TRACE_TASK_BUILD_START();
 
 
@@ -385,10 +392,16 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta
 		}
 		}
 		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
 		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
 		{
 		{
-			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' cannot be set twice");
+			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet");
 			ndeps = va_arg(varg_list, unsigned);
 			ndeps = va_arg(varg_list, unsigned);
 			task_deps_array = va_arg(varg_list, struct starpu_task **);
 			task_deps_array = va_arg(varg_list, struct starpu_task **);
 		}
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet");
+			nend_deps = va_arg(varg_list, unsigned);
+			task_end_deps_array = va_arg(varg_list, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		else if (arg_type==STARPU_CALLBACK)
 		{
 		{
 			task->callback_func = va_arg(varg_list, _starpu_callback_func_t);
 			task->callback_func = va_arg(varg_list, _starpu_callback_func_t);
@@ -540,6 +553,11 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta
 		starpu_task_declare_deps_array(task, ndeps, task_deps_array);
 		starpu_task_declare_deps_array(task, ndeps, task_deps_array);
 	}
 	}
 
 
+	if (task_end_deps_array)
+	{
+		starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array);
+	}
+
 	_STARPU_TRACE_TASK_BUILD_END();
 	_STARPU_TRACE_TASK_BUILD_END();
 	return 0;
 	return 0;
 }
 }
@@ -550,7 +568,9 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t
 	int current_buffer = 0;
 	int current_buffer = 0;
 	int allocated_buffers = 0;
 	int allocated_buffers = 0;
 	unsigned ndeps = 0;
 	unsigned ndeps = 0;
+	unsigned nend_deps = 0;
 	struct starpu_task **task_deps_array = NULL;
 	struct starpu_task **task_deps_array = NULL;
+	struct starpu_task **task_end_deps_array = NULL;
 
 
 	_STARPU_TRACE_TASK_BUILD_START();
 	_STARPU_TRACE_TASK_BUILD_START();
 
 
@@ -614,12 +634,20 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t
 		}
 		}
 		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
 		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
 		{
 		{
-			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' cannot be set twice");
+			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet");
 			arg_i++;
 			arg_i++;
 			ndeps = *(unsigned *)arglist[arg_i];
 			ndeps = *(unsigned *)arglist[arg_i];
 			arg_i++;
 			arg_i++;
 			task_deps_array = arglist[arg_i];
 			task_deps_array = arglist[arg_i];
 		}
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet");
+			arg_i++;
+			nend_deps = *(unsigned *)arglist[arg_i];
+			arg_i++;
+			task_end_deps_array = arglist[arg_i];
+		}
 		else if (arg_type == STARPU_CALLBACK)
 		else if (arg_type == STARPU_CALLBACK)
 		{
 		{
 			arg_i++;
 			arg_i++;
@@ -789,6 +817,11 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t
 		starpu_task_declare_deps_array(task, ndeps, task_deps_array);
 		starpu_task_declare_deps_array(task, ndeps, task_deps_array);
 	}
 	}
 
 
+	if (task_end_deps_array)
+	{
+		starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array);
+	}
+
 	_STARPU_TRACE_TASK_BUILD_END();
 	_STARPU_TRACE_TASK_BUILD_END();
 
 
 	return 0;
 	return 0;

+ 1 - 0
tests/Makefile.am

@@ -257,6 +257,7 @@ myPROGRAMS +=				\
 	main/pack				\
 	main/pack				\
 	main/get_children_tasks			\
 	main/get_children_tasks			\
 	main/hwloc_cpuset			\
 	main/hwloc_cpuset			\
+	main/task_end_dep			\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release2		\
 	datawizard/acquire_release2		\

+ 8 - 4
tests/main/empty_task_chain.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2010-2012,2015,2017                      CNRS
  * Copyright (C) 2010-2012,2015,2017                      CNRS
- * Copyright (C) 2010,2013-2014,2016                      Université de Bordeaux
+ * Copyright (C) 2010,2013-2014,2016, 2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -43,15 +43,19 @@ int main(int argc, char **argv)
 		if (i > 0)
 		if (i > 0)
 		{
 		{
 			starpu_task_declare_deps_array(tasks[i], 1, &tasks[i-1]);
 			starpu_task_declare_deps_array(tasks[i], 1, &tasks[i-1]);
-			ret = starpu_task_submit(tasks[i]);
-			if (ret == -ENODEV) goto enodev;
-			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 		}
 		}
 
 
 		if (i == (N-1))
 		if (i == (N-1))
 			tasks[i]->detach = 0;
 			tasks[i]->detach = 0;
 	}
 	}
 
 
+	for (i = 1; i < N; i++)
+	{
+		ret = starpu_task_submit(tasks[i]);
+		if (ret == -ENODEV) goto enodev;
+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	}
+
 	ret = starpu_task_submit(tasks[0]);
 	ret = starpu_task_submit(tasks[0]);
 	if (ret == -ENODEV) goto enodev;
 	if (ret == -ENODEV) goto enodev;
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 2 - 3
tests/main/empty_task_sync_point_tasks.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2012                                     Inria
  * Copyright (C) 2010-2013,2015,2017                      CNRS
  * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2010,2013-2014,2016                      Université de Bordeaux
+ * Copyright (C) 2010,2013-2014,2016, 2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -66,8 +66,7 @@ int main(int argc, char **argv)
 	struct starpu_task *taskF = starpu_task_create();
 	struct starpu_task *taskF = starpu_task_create();
 	taskF->cl = &dummy_codelet;
 	taskF->cl = &dummy_codelet;
 
 
-	struct starpu_task *tasksABC[3] = {taskA, taskB, taskC};
-	starpu_task_declare_deps_array(taskD, 3, tasksABC);
+	starpu_task_declare_deps(taskD, 3, taskA, taskB, taskC);
 	starpu_task_declare_deps_array(taskE, 1, &taskD);
 	starpu_task_declare_deps_array(taskE, 1, &taskD);
 	starpu_task_declare_deps_array(taskF, 1, &taskD);
 	starpu_task_declare_deps_array(taskF, 1, &taskD);
 
 

+ 4 - 6
tests/main/subgraph_repeat.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2010-2015,2017                           CNRS
  * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2010,2012-2016                           Université de Bordeaux
+ * Copyright (C) 2010,2012-2016, 2018                           Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -138,12 +138,10 @@ int main(int argc, char **argv)
 	taskD.callback_func = callback_task_D;
 	taskD.callback_func = callback_task_D;
 	taskD.handles[0] = check_data;
 	taskD.handles[0] = check_data;
 
 
-	struct starpu_task *depsBC_array[1] = {&taskA};
-	starpu_task_declare_deps_array(&taskB, 1, depsBC_array);
-	starpu_task_declare_deps_array(&taskC, 1, depsBC_array);
+	starpu_task_declare_deps(&taskB, 1, &taskA);
+	starpu_task_declare_deps(&taskC, 1, &taskA);
 
 
-	struct starpu_task *depsD_array[2] = {&taskB, &taskC};
-	starpu_task_declare_deps_array(&taskD, 2, depsD_array);
+	starpu_task_declare_deps(&taskD, 2, &taskB, &taskC);
 
 
 	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 4 - 6
tests/main/subgraph_repeat_regenerate.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2010-2015,2017                           CNRS
  * Copyright (C) 2010-2015,2017                           CNRS
- * Copyright (C) 2010-2016                                Université de Bordeaux
+ * Copyright (C) 2010-2016, 2018                                Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -160,12 +160,10 @@ int main(int argc, char **argv)
 	taskD.regenerate = 1;
 	taskD.regenerate = 1;
 	taskD.handles[0] = check_data;
 	taskD.handles[0] = check_data;
 
 
-	struct starpu_task *depsBC_array[1] = {&taskA};
-	starpu_task_declare_deps_array(&taskB, 1, depsBC_array);
-	starpu_task_declare_deps_array(&taskC, 1, depsBC_array);
+	starpu_task_declare_deps(&taskB, 1, &taskA);
+	starpu_task_declare_deps(&taskC, 1, &taskA);
 
 
-	struct starpu_task *depsD_array[2] = {&taskB, &taskC};
-	starpu_task_declare_deps_array(&taskD, 2, depsD_array);
+	starpu_task_declare_deps(&taskD, 2, &taskB, &taskC);
 
 
 	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 4 - 6
tests/main/subgraph_repeat_tag.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
  * Copyright (C) 2012-2013                                Inria
  * Copyright (C) 2012-2013                                Inria
- * Copyright (C) 2010-2016                                Université de Bordeaux
+ * Copyright (C) 2010-2016, 2018                                Université de Bordeaux
  * Copyright (C) 2010-2015,2017                           CNRS
  * Copyright (C) 2010-2015,2017                           CNRS
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
@@ -169,12 +169,10 @@ int main(int argc, char **argv)
 	taskD.regenerate = 1;
 	taskD.regenerate = 1;
 	taskD.handles[0] = check_data;
 	taskD.handles[0] = check_data;
 
 
-	struct starpu_task *depsBC_array[1] = {&taskA};
-	starpu_task_declare_deps_array(&taskB, 1, depsBC_array);
-	starpu_task_declare_deps_array(&taskC, 1, depsBC_array);
+	starpu_task_declare_deps(&taskB, 1, &taskA);
+	starpu_task_declare_deps(&taskC, 1, &taskA);
 
 
-	struct starpu_task *depsD_array[2] = {&taskB, &taskC};
-	starpu_task_declare_deps_array(&taskD, 2, depsD_array);
+	starpu_task_declare_deps(&taskD, 2, &taskB, &taskC);
 
 
 	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 130 - 0
tests/main/task_end_dep.c

@@ -0,0 +1,130 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* This checks that adding an end dependency for an already-terminated task
+ * works */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+#define INIT 12
+
+void cpu_codelet2(void *descr[], void *args)
+{
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	(void)args;
+	STARPU_ASSERT(*val == INIT);
+	starpu_sleep(0.1);
+	STARPU_ASSERT(*val == INIT);
+	*val *= 2;
+}
+
+struct starpu_codelet cl2 =
+{
+	.cpu_funcs = {cpu_codelet2},
+	.cpu_funcs_name = {"cpu_codelet2"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codelet2"
+};
+
+void cpu_codelet(void *descr[], void *args)
+{
+	(void)args;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+
+	STARPU_ASSERT(*val == 2*INIT);
+	starpu_sleep(0.1);
+	STARPU_ASSERT(*val == 2*INIT);
+	*val *= 2;
+}
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_codelet},
+	.cpu_funcs_name = {"cpu_codelet"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codelet"
+};
+
+int main(void)
+{
+        int value=INIT;
+	int ret;
+	starpu_data_handle_t value_handle;
+	struct starpu_conf conf;
+	struct starpu_task *task, *task2;
+
+	starpu_conf_init(&conf);
+	conf.nmic = 0;
+	conf.nscc = 0;
+	conf.nmpi_ms = 0;
+
+        ret = starpu_init(&conf);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		return 77;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < 1)
+	{
+		FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+		starpu_shutdown();
+		return 77;
+	}
+
+	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
+
+	task = starpu_task_build(&cl,
+				 STARPU_RW, value_handle,
+				 0);
+	STARPU_ASSERT(task);
+	task->detach = 0;
+
+	task2 = starpu_task_build(&cl2,
+				 STARPU_RW, value_handle,
+				 0);
+	STARPU_ASSERT(task2);
+	task2->detach = 0;
+	task2->destroy = 0;
+
+	ret = starpu_task_submit(task2);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_task_wait(task2);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
+
+	starpu_task_declare_end_deps(task, 1, task2);
+	starpu_task_destroy(task2);
+
+	ret = starpu_task_submit(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+	ret = starpu_task_wait(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
+
+	starpu_data_unregister(value_handle);
+
+	STARPU_ASSERT(value == 2*2*INIT);
+
+        starpu_shutdown();
+
+	FPRINTF(stderr, "Value = %d\n", value);
+
+	return ret;
+}

+ 2 - 3
tests/main/task_wait_api.c

@@ -2,7 +2,7 @@
  *
  *
  * Copyright (C) 2012,2015                                Inria
  * Copyright (C) 2012,2015                                Inria
  * Copyright (C) 2010-2013,2015,2017                      CNRS
  * Copyright (C) 2010-2013,2015,2017                      CNRS
- * Copyright (C) 2010,2013-2014,2016                      Université de Bordeaux
+ * Copyright (C) 2010,2013-2014,2016, 2018                      Université de Bordeaux
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -89,8 +89,7 @@ int main(int argc, char **argv)
 	taskF = create_dummy_task();
 	taskF = create_dummy_task();
 	taskG = create_dummy_task();
 	taskG = create_dummy_task();
 
 
-	struct starpu_task *tasksCDEF[4] = {taskC, taskD, taskE, taskF};
-	starpu_task_declare_deps_array(taskG, 4, tasksCDEF);
+	starpu_task_declare_deps(taskG, 4, taskC, taskD, taskE, taskF);
 
 
 	ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");