Pārlūkot izejas kodu

Add starpu_task_declare_end_deps and starpu_task_declare_end_deps_array

Samuel Thibault 6 gadi atpakaļ
vecāks
revīzija
8e6ac3b45d

+ 4 - 2
ChangeLog

@@ -51,7 +51,9 @@ New features:
   * Add possibility to delay the termination of a task with the
     functions starpu_task_end_dep_add() which specifies the number of
     calls to the function starpu_task_end_dep_release() needed to
-    trigger the task termination.
+    trigger the task termination, or with starpu_task_declare_end_deps_array()
+    and starpu_task_declare_end_deps() to just declare termination dependencies
+    between tasks.
   * Add possibility to define the sequential consistency at the task level
     for each handle used by the task.
   * Add STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU, and
@@ -116,7 +118,7 @@ Small features:
   * New function starpu_data_partition_not_automatic() to disable the
     automatic partitioning of a data handle for which a asynchronous
     plan has previously been submitted
-  * Add starpu_task_declare_deps.
+  * Add starpu_task_declare_deps()
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 4 - 3
doc/doxygen/chapters/301_tasks.doxy

@@ -111,9 +111,10 @@ starpu_task::tag_id and using the function starpu_tag_declare_deps()
 or starpu_tag_declare_deps_array().
 
 The termination of a task can be delayed through the function
-starpu_task_end_dep_add() which specifies the number of calls to the
-function starpu_task_end_dep_release() needed to trigger the task
-termination.
+starpu_task_end_dep_add() which specifies the number of calls to the function
+starpu_task_end_dep_release() needed to trigger the task termination. One can
+also use starpu_task_declare_end_deps() or starpu_task_declare_end_deps_array()
+to delay the termination of a task until the termination of other tasks.
 
 \section SettingManyDataHandlesForATask Setting Many Data Handles For a Task
 

+ 22 - 6
doc/doxygen/chapters/api/explicit_dependencies.doxy

@@ -39,12 +39,28 @@ similarly to starpu_task_declare_deps_array(), but the tasks are passed after \p
 ndeps, which indicates how many tasks \p task shall be made to depend on.
 If \p ndeps is 0, no dependency is added.
 
-Specify the dependencies of the task identified by tag \p id.
-The first argument specifies the tag which is configured, the second
-argument gives the number of tag(s) on which \p id depends. The
-following arguments are the tags which have to be terminated to unlock
-the task. This function must be called before the associated task is
-submitted to StarPU with starpu_task_submit().
+\fn void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
+\ingroup API_Explicit_Dependencies
+Declare task end dependencies between a \p task and an array of
+tasks of length \p ndeps. \p task will appear as terminated not only when \p
+task is termination, but also when the tasks of \p task_array have terminated.
+This function must be called prior to the
+termination of the task, but it may called after the submission or the
+execution of the tasks in the array, provided the tasks are still
+valid (i.e. they were not automatically destroyed). Calling this
+function on a task that was already terminated or with an entry of
+\p task_array that is no longer a valid task results in an undefined
+behaviour. If \p ndeps is 0, no dependency is added. It is possible to
+call starpu_task_declare_end_deps_array() several times on the same task,
+in this case, the dependencies are added. It is currently not implemented to have
+redundancy in the task dependencies.
+
+\fn void starpu_task_declare_end_deps(starpu_task *task, unsigned ndeps, ...)
+\ingroup API_Explicit_Dependencies
+Declare task end dependencies between a \p task and an series of \p ndeps tasks,
+similarly to starpu_task_declare_end_deps_array(), but the tasks are passed after \p
+ndeps, which indicates how many tasks \p task 's termination shall be made to depend on.
+If \p ndeps is 0, no dependency is added.
 
 \fn int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
 \ingroup API_Explicit_Dependencies

+ 8 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -152,10 +152,17 @@ task to.
 \def STARPU_TASK_DEPS_ARRAY
 \ingroup API_Insert_Task
 Used when calling starpu_task_insert(), must
-be followed by a number of tasks, and an array containing these tasks.
+be followed by a number of tasks as int, and an array containing these tasks.
 The function starpu_task_declare_deps_array() will be called with the
 given values.
 
+\def STARPU_TASK_END_DEPS_ARRAY
+\ingroup API_Insert_Task
+Used when calling starpu_task_insert(), must
+be followed by a number of tasks as int, and an array containing these tasks.
+The function starpu_task_declare_end_deps_array() will be called with the
+given values.
+
 \def STARPU_TASK_COLOR
 \ingroup API_Insert_Task
 Used when calling starpu_task_insert(), must be followed by an integer

+ 1 - 0
examples/Makefile.am

@@ -254,6 +254,7 @@ STARPU_EXAMPLES +=				\
 	reductions/dot_product			\
 	reductions/minmax_reduction		\
 	dependency/task_end_dep			\
+	dependency/task_end_dep_add		\
 	dependency/sequential_consistency
 
 endif

+ 35 - 9
examples/dependency/task_end_dep.c

@@ -14,20 +14,31 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
+/* This shows how to defer termination of a task until the termination of
+ * another task.  */
+
 #include <starpu.h>
 
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
+#define INIT 12
+
 void cpu_codelet2(void *descr[], void *args)
 {
-	(void)descr;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	(void)args;
+	STARPU_ASSERT(*val == 2*INIT);
+	starpu_sleep(0.1);
+	STARPU_ASSERT(*val == 2*INIT);
+	*val *= 2;
 }
 
 struct starpu_codelet cl2 =
 {
 	.cpu_funcs = {cpu_codelet2},
 	.cpu_funcs_name = {"cpu_codelet2"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
 	.name = "codelet2"
 };
 
@@ -35,14 +46,15 @@ void cpu_codelet(void *descr[], void *args)
 {
 	(void)args;
 	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
-	struct starpu_task *task;
-
-	task = starpu_task_get_current();
-	starpu_task_end_dep_add(task, 1);
+	struct starpu_task *task = starpu_task_get_current();
 
 	starpu_task_insert(&cl2,
-			   STARPU_CALLBACK_WITH_ARG, starpu_task_end_dep_release, task,
+			   STARPU_RW, task->handles[0],
+			   STARPU_TASK_END_DEPS_ARRAY, 1, &task,
 			   0);
+	STARPU_ASSERT(*val == INIT);
+	starpu_sleep(0.1);
+	STARPU_ASSERT(*val == INIT);
 	*val *= 2;
 }
 
@@ -57,10 +69,11 @@ struct starpu_codelet cl =
 
 int main(void)
 {
-        int value=12;
+        int value=INIT;
 	int ret;
 	starpu_data_handle_t value_handle;
 	struct starpu_conf conf;
+	struct starpu_task *task;
 
 	starpu_conf_init(&conf);
 	conf.nmic = 0;
@@ -83,13 +96,26 @@ int main(void)
 
 	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
 
-	ret = starpu_task_insert(&cl,
+	task = starpu_task_build(&cl,
 				 STARPU_RW, value_handle,
 				 0);
-	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+	STARPU_ASSERT(task);
+	task->detach = 0;
+	ret = starpu_task_submit(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_task_wait(task);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
+
+	starpu_data_set_sequential_consistency_flag(value_handle, 0);
+	starpu_data_acquire_on_node(value_handle, STARPU_MAIN_RAM, STARPU_R);
+	/* Waiting for the main task should have also waited for the subtask */
+	STARPU_ASSERT(value == 2*2*INIT);
+	starpu_data_release_on_node(value_handle, STARPU_MAIN_RAM);
 
 	starpu_data_unregister(value_handle);
 
+	STARPU_ASSERT(value == 2*2*INIT);
+
         starpu_shutdown();
 
 	FPRINTF(stderr, "Value = %d\n", value);

+ 106 - 0
examples/dependency/task_end_dep_add.c

@@ -0,0 +1,106 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2018                                     CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/* This shows how to defer termination of a task thanks to
+ * starpu_task_end_dep_add.  */
+
+#include <starpu.h>
+
+#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
+
+#define INIT 12
+
+void cpu_codelet2(void *descr[], void *args)
+{
+	(void)descr;
+	(void)args;
+}
+
+struct starpu_codelet cl2 =
+{
+	.cpu_funcs = {cpu_codelet2},
+	.cpu_funcs_name = {"cpu_codelet2"},
+	.name = "codelet2"
+};
+
+void cpu_codelet(void *descr[], void *args)
+{
+	(void)args;
+	int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
+	struct starpu_task *task;
+
+	task = starpu_task_get_current();
+	starpu_task_end_dep_add(task, 1);
+
+	starpu_task_insert(&cl2,
+			   STARPU_CALLBACK_WITH_ARG, starpu_task_end_dep_release, task,
+			   0);
+	STARPU_ASSERT(*val == INIT);
+	*val *= 2;
+}
+
+struct starpu_codelet cl =
+{
+	.cpu_funcs = {cpu_codelet},
+	.cpu_funcs_name = {"cpu_codelet"},
+	.nbuffers = 1,
+	.modes = {STARPU_RW},
+	.name = "codelet"
+};
+
+int main(void)
+{
+        int value=INIT;
+	int ret;
+	starpu_data_handle_t value_handle;
+	struct starpu_conf conf;
+
+	starpu_conf_init(&conf);
+	conf.nmic = 0;
+	conf.nscc = 0;
+	conf.nmpi_ms = 0;
+
+        ret = starpu_init(&conf);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		return 77;
+	}
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	if (starpu_cpu_worker_get_count() < 1)
+	{
+		FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+		starpu_shutdown();
+		return 77;
+	}
+
+	starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value));
+
+	ret = starpu_task_insert(&cl,
+				 STARPU_RW, value_handle,
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	starpu_data_unregister(value_handle);
+
+	STARPU_ASSERT(value == 2*INIT);
+
+        starpu_shutdown();
+
+	FPRINTF(stderr, "Value = %d\n", value);
+
+	return ret;
+}

+ 3 - 0
include/starpu_task.h

@@ -306,6 +306,9 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t
 void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...);
 void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
 
+void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...);
+void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]);
+
 void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps);
 void starpu_task_end_dep_release(struct starpu_task *t);
 

+ 1 - 0
include/starpu_task_util.h

@@ -67,6 +67,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29<<STARPU_MODE_SHIFT)
 #define STARPU_TASK_SYNCHRONOUS (30<<STARPU_MODE_SHIFT)
 #define STARPU_SHIFTED_MODE_MAX (31<<STARPU_MODE_SHIFT)
+#define STARPU_TASK_END_DEPS_ARRAY	(32<<STARPU_MODE_SHIFT)
 
 int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...);
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);

+ 5 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -361,6 +361,11 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 			(void)va_arg(varg_list_copy, unsigned);
 			(void)va_arg(varg_list_copy, struct starpu_task **);
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			(void)va_arg(varg_list_copy, unsigned);
+			(void)va_arg(varg_list_copy, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			(void)va_arg(varg_list_copy, _starpu_callback_func_t);

+ 7 - 0
mpi/src/starpu_mpi_task_insert_fortran.c

@@ -191,6 +191,13 @@ int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_no
 			arg_i++;
 			/* struct starpu_task ** */
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			arg_i++;
+			/* unsigned */
+			arg_i++;
+			/* struct starpu_task ** */
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			arg_i++;

+ 48 - 1
src/core/dependencies/task_deps.c

@@ -164,13 +164,60 @@ void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...)
 	unsigned i;
 	va_list pa;
 	va_start(pa, ndeps);
-	for (i = 0; i < ndeps; i++) {
+	for (i = 0; i < ndeps; i++)
+	{
 		tasks[i] = va_arg(pa, struct starpu_task *);
 	}
 	va_end(pa);
 	starpu_task_declare_deps_array(task, ndeps, tasks);
 }
 
+void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
+{
+	unsigned i;
+
+	starpu_task_end_dep_add(task, ndeps);
+	for (i = 0; i < ndeps; i++)
+	{
+		struct starpu_task *dep_task = task_array[i];
+		struct _starpu_job *dep_job = _starpu_get_job_associated_to_task(dep_task);
+		int done = 0;
+
+		STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task end dependencies have to be set before submission");
+		STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
+		STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
+
+		STARPU_ASSERT_MSG(!dep_job->end_rdep, "multiple end dependencies are not supported yet");
+		STARPU_ASSERT_MSG(!dep_job->task->regenerate, "end dependencies are not supported yet for regenerated tasks");
+
+		STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
+		dep_job->end_rdep = task;
+		if (dep_job->terminated)
+			/* It's actually already over */
+			done = 1;
+		STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex);
+
+		if (done)
+			starpu_task_end_dep_release(task);
+	}
+}
+
+void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...)
+{
+	if (ndeps == 0)
+		return;
+	struct starpu_task *tasks[ndeps];
+	unsigned i;
+	va_list pa;
+	va_start(pa, ndeps);
+	for (i = 0; i < ndeps; i++)
+	{
+		tasks[i] = va_arg(pa, struct starpu_task *);
+	}
+	va_end(pa);
+	starpu_task_declare_end_deps_array(task, ndeps, tasks);
+}
+
 int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[])
 {
 	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);

+ 7 - 0
src/core/jobs.c

@@ -269,7 +269,10 @@ void starpu_task_end_dep_release(struct starpu_task *t)
 
 void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps)
 {
+	struct _starpu_job *j = _starpu_get_job_associated_to_task(t);
+	STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
 	t->nb_termination_call_required += nb_deps;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 }
 
 void _starpu_handle_job_termination(struct _starpu_job *j)
@@ -284,6 +287,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	}
 
 	struct starpu_task *task = j->task;
+	struct starpu_task *end_rdep = NULL;
 	unsigned sched_ctx = task->sched_ctx;
 	double flops = task->flops;
 	const unsigned continuation =
@@ -316,6 +320,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 		 * function. A value of 1 means that the codelet was executed but that
 		 * the callback is not done yet. */
 		j->terminated = 1;
+		end_rdep = j->end_rdep;
 	}
 	STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
 
@@ -411,6 +416,8 @@ void _starpu_handle_job_termination(struct _starpu_job *j)
 	if (!continuation)
 	{
 		/* in case there are dependencies, wake up the proper tasks */
+		if (end_rdep)
+			starpu_task_end_dep_release(end_rdep);
 		_starpu_notify_dependencies(j);
 	}
 

+ 3 - 0
src/core/jobs.h

@@ -110,6 +110,9 @@ struct _starpu_job
 	 * */
 	struct _starpu_cg_list job_successors;
 
+	/* Task whose termination depends on this task */
+	struct starpu_task *end_rdep;
+
 	/* For tasks with cl==NULL but submitted with explicit data dependency,
 	 * the handle for this dependency, so as to remove the task from the
 	 * last_writer/readers */

+ 3 - 1
src/util/fstarpu.c

@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2016-2018                                CNRS
  * Copyright (C) 2016-2017                                Inria
- * Copyright (C) 2016-2017                                Université de Bordeaux
+ * Copyright (C) 2016-2018                                Université de Bordeaux
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -38,6 +38,7 @@ static const intptr_t fstarpu_data_mode_array	= STARPU_DATA_MODE_ARRAY;
 static const intptr_t fstarpu_cl_args	= STARPU_CL_ARGS;
 static const intptr_t fstarpu_cl_args_nfree	= STARPU_CL_ARGS_NFREE;
 static const intptr_t fstarpu_task_deps_array	= STARPU_TASK_DEPS_ARRAY;
+static const intptr_t fstarpu_task_end_deps_array	= STARPU_TASK_END_DEPS_ARRAY;
 static const intptr_t fstarpu_callback	= STARPU_CALLBACK;
 static const intptr_t fstarpu_callback_with_arg	= STARPU_CALLBACK_WITH_ARG;
 static const intptr_t fstarpu_callback_arg	= STARPU_CALLBACK_ARG;
@@ -114,6 +115,7 @@ intptr_t fstarpu_get_constant(char *s)
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS"))	{ return fstarpu_cl_args; }
 	else if	(!strcmp(s, "FSTARPU_CL_ARGS_NFREE"))	{ return fstarpu_cl_args_nfree; }
 	else if	(!strcmp(s, "FSTARPU_TASK_DEPS_ARRAY"))	{ return fstarpu_task_deps_array; }
+	else if	(!strcmp(s, "FSTARPU_TASK_END_DEPS_ARRAY"))	{ return fstarpu_task_end_deps_array; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK"))	{ return fstarpu_callback; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG"))	{ return fstarpu_callback_with_arg; }
 	else if	(!strcmp(s, "FSTARPU_CALLBACK_ARG"))	{ return fstarpu_callback_arg; }

+ 35 - 2
src/util/starpu_task_insert_utils.c

@@ -111,6 +111,11 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, va_lis
 			(void)va_arg(varg_list, unsigned);
 			(void)va_arg(varg_list, struct starpu_task **);
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			(void)va_arg(varg_list, unsigned);
+			(void)va_arg(varg_list, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			(void)va_arg(varg_list, _starpu_callback_func_t);
@@ -333,7 +338,9 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta
 	int current_buffer;
 	int allocated_buffers = 0;
 	unsigned ndeps = 0;
+	unsigned nend_deps = 0;
 	struct starpu_task **task_deps_array = NULL;
+	struct starpu_task **task_end_deps_array = NULL;
 
 	_STARPU_TRACE_TASK_BUILD_START();
 
@@ -385,10 +392,16 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta
 		}
 		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
 		{
-			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' cannot be set twice");
+			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet");
 			ndeps = va_arg(varg_list, unsigned);
 			task_deps_array = va_arg(varg_list, struct starpu_task **);
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet");
+			nend_deps = va_arg(varg_list, unsigned);
+			task_end_deps_array = va_arg(varg_list, struct starpu_task **);
+		}
 		else if (arg_type==STARPU_CALLBACK)
 		{
 			task->callback_func = va_arg(varg_list, _starpu_callback_func_t);
@@ -540,6 +553,11 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta
 		starpu_task_declare_deps_array(task, ndeps, task_deps_array);
 	}
 
+	if (task_end_deps_array)
+	{
+		starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array);
+	}
+
 	_STARPU_TRACE_TASK_BUILD_END();
 	return 0;
 }
@@ -550,7 +568,9 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t
 	int current_buffer = 0;
 	int allocated_buffers = 0;
 	unsigned ndeps = 0;
+	unsigned nend_deps = 0;
 	struct starpu_task **task_deps_array = NULL;
+	struct starpu_task **task_end_deps_array = NULL;
 
 	_STARPU_TRACE_TASK_BUILD_START();
 
@@ -614,12 +634,20 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t
 		}
 		else if (arg_type==STARPU_TASK_DEPS_ARRAY)
 		{
-			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' cannot be set twice");
+			STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet");
 			arg_i++;
 			ndeps = *(unsigned *)arglist[arg_i];
 			arg_i++;
 			task_deps_array = arglist[arg_i];
 		}
+		else if (arg_type==STARPU_TASK_END_DEPS_ARRAY)
+		{
+			STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet");
+			arg_i++;
+			nend_deps = *(unsigned *)arglist[arg_i];
+			arg_i++;
+			task_end_deps_array = arglist[arg_i];
+		}
 		else if (arg_type == STARPU_CALLBACK)
 		{
 			arg_i++;
@@ -789,6 +817,11 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t
 		starpu_task_declare_deps_array(task, ndeps, task_deps_array);
 	}
 
+	if (task_end_deps_array)
+	{
+		starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array);
+	}
+
 	_STARPU_TRACE_TASK_BUILD_END();
 
 	return 0;

+ 1 - 0
tests/Makefile.am

@@ -257,6 +257,7 @@ myPROGRAMS +=				\
 	main/pack				\
 	main/get_children_tasks			\
 	main/hwloc_cpuset			\
+	main/task_end_dep			\
 	datawizard/acquire_cb_insert		\
 	datawizard/acquire_release		\
 	datawizard/acquire_release2		\