Przeglądaj źródła

Merge branch 'master' into fpga

Nathalie Furmento 4 lat temu
rodzic
commit
5b3487a939

+ 13 - 0
ChangeLog

@@ -300,6 +300,19 @@ Small changes:
   * STARPU_COMM_STATS also displays the bandwidth
   * Update data interfaces implementations to only use public API
 
+StarPU 1.2.11 (git revision xxx)
+====================================================================
+
+Small features:
+  * Add starpu_tag_notify_restart_from_apps().
+
+StarPU 1.2.10 (git revision beb6ac9cc07dc9ae1c838a38d11ed2dae3775996)
+====================================================================
+
+Small features:
+  * New script starpu_env to set up StarPU environment variables
+  * New configure option --disable-build-doc-pdf
+
 StarPU 1.2.9 (git revision 3aca8da3138a99e93d7f93905d2543bd6f1ea1df)
 ====================================================================
 

+ 3 - 3
contrib/ci.inria.fr/job-1-check.sh

@@ -47,15 +47,15 @@ cd build
 
 STARPU_CONFIGURE_OPTIONS=""
 suname=$(uname)
-if test "$suname" == "Darwin"
+if test "$suname" = "Darwin"
 then
     STARPU_CONFIGURE_OPTIONS="--without-hwloc"
 fi
-if test "$suname" == "OpenBSD"
+if test "$suname" = "OpenBSD"
 then
     STARPU_CONFIGURE_OPTIONS="--without-hwloc --disable-mlr"
 fi
-if test "$suname" == "FreeBSD"
+if test "$suname" = "FreeBSD"
 then
     STARPU_CONFIGURE_OPTIONS="--disable-fortran"
 fi

+ 2 - 1
examples/basic_examples/multiformat.c

@@ -322,7 +322,8 @@ main(void)
 #ifdef STARPU_USE_OPENCL
         ret = starpu_opencl_unload_opencl(&opencl_program);
         STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
-        starpu_opencl_unload_opencl(&opencl_conversion_program);
+        ret = starpu_opencl_unload_opencl(&opencl_conversion_program);
+        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
 #endif
 	starpu_shutdown();
 

+ 2 - 1
examples/binary/binary.c

@@ -103,7 +103,8 @@ int compute(char *file_name, int load_as_file, int with_malloc)
 	starpu_data_unregister(float_array_handle);
 
 #ifdef STARPU_USE_OPENCL
-	starpu_opencl_unload_opencl(&opencl_program);
+	ret = starpu_opencl_unload_opencl(&opencl_program);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
 #endif
 
 	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], float_array[1], float_array[2], float_array[3]);

+ 2 - 1
examples/incrementer/incrementer.c

@@ -121,7 +121,8 @@ int main(int argc, char **argv)
 	end = starpu_timing_now();
 
 #ifdef STARPU_USE_OPENCL
-	starpu_opencl_unload_opencl(&opencl_program);
+	ret = starpu_opencl_unload_opencl(&opencl_program);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
 #endif
 
 	FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0],

+ 1 - 1
include/starpu_helper.h

@@ -199,7 +199,7 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
    the handle has been copied, and it is given the pointer \p
    callback_arg as argument.
 */
-int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg);
+int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous);
 
 /**
    Call hwloc-ps to display binding of each processus and thread running on

+ 1 - 1
include/starpu_perfmodel.h

@@ -381,7 +381,7 @@ char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype);
 void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl);
 
 /**
-   Return the estimated time of a task with the given model and the given footprint.
+   Return the estimated time in µs of a task with the given model and the given footprint.
 */
 double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint);
 

+ 7 - 0
include/starpu_task_dep.h

@@ -210,6 +210,13 @@ void starpu_tag_remove(starpu_tag_t id);
 */
 void starpu_tag_notify_from_apps(starpu_tag_t id);
 
+/**
+   Atomically call starpu_tag_notify_from_apps() and starpu_tag_restart() on tag
+   \p id.
+   This is useful with cyclic graphs, when we want to safely trigger its startup.
+*/
+void starpu_tag_notify_restart_from_apps(starpu_tag_t id);
+
 struct starpu_task *starpu_tag_get_task(starpu_tag_t id);
 
 /** @} */

+ 2 - 2
mpi/examples/matrix_decomposition/mpi_cholesky.c

@@ -51,12 +51,12 @@ int main(int argc, char **argv)
 	parse_args(argc, argv, nodes);
 
 	matrix_init(&bmat, rank, nodes, 1);
-	matrix_display(bmat, rank);
+	matrix_display(bmat, rank, nodes);
 
 	dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops);
 
 #ifndef STARPU_SIMGRID
-	matrix_display(bmat, rank);
+	matrix_display(bmat, rank, nodes);
 
 	if (check && rank == 0)
 		dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops, 0.001);

+ 25 - 9
mpi/examples/matrix_decomposition/mpi_decomposition_matrix.c

@@ -26,17 +26,27 @@ int my_distrib(int y, int x, int nb_nodes)
 }
 
 
-void matrix_display(float ***bmat, int rank)
+void matrix_display(float ***bmat, int rank, int nodes)
 {
-	if (display)
-	{
-		unsigned y;
-		printf("[%d] Input :\n", rank);
+	int n;
+
+	if (!display)
+		return;
+
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+
+	for (n = 0; n < rank; n++)
+		starpu_mpi_barrier(MPI_COMM_WORLD);
 
-		for(y=0 ; y<nblocks ; y++)
+	unsigned y;
+	printf("[%d] Input :\n", rank);
+
+	for(y=0 ; y<nblocks ; y++)
+	{
+		unsigned x;
+		for(x=0 ; x<=y ; x++)
 		{
-			unsigned x;
-			for(x=0 ; x<nblocks ; x++)
+			if (my_distrib(y, x, nodes) == rank)
 			{
 				unsigned j;
 				printf("Block %u,%u :\n", x, y);
@@ -45,7 +55,7 @@ void matrix_display(float ***bmat, int rank)
 					unsigned i;
 					for (i = 0; i < BLOCKSIZE; i++)
 					{
-						if (i <= j)
+						if (x < y || i <= j)
 						{
 							printf("%2.2f\t", bmat[y][x][j +i*BLOCKSIZE]);
 						}
@@ -59,6 +69,11 @@ void matrix_display(float ***bmat, int rank)
 			}
 		}
 	}
+
+	starpu_mpi_barrier(MPI_COMM_WORLD);
+
+	for (n = rank+1; n < nodes; n++)
+		starpu_mpi_barrier(MPI_COMM_WORLD);
 }
 
 /* Note: bmat is indexed by bmat[m][n][mm+nn*BLOCKSIZE],
@@ -78,6 +93,7 @@ void matrix_init(float ****bmat, int rank, int nodes, int alloc_everywhere)
 			if (alloc_everywhere || (mpi_rank == rank))
 			{
 				starpu_malloc((void **)&(*bmat)[m][n], BLOCKSIZE*BLOCKSIZE*sizeof(float));
+				if (mpi_rank == rank)
 				for (nn = 0; nn < BLOCKSIZE; nn++)
 				{
 					for (mm = 0; mm < BLOCKSIZE; mm++)

+ 1 - 1
mpi/examples/matrix_decomposition/mpi_decomposition_matrix.h

@@ -21,7 +21,7 @@
 /* Returns the MPI node number where data indexes index is */
 int my_distrib(int y, int x, int nb_nodes);
 
-void matrix_display(float ***bmat, int rank);
+void matrix_display(float ***bmat, int rank, int nodes);
 void matrix_init(float ****bmat, int rank, int nodes, int alloc_everywhere);
 void matrix_free(float ****bmat, int rank, int nodes, int alloc_everywhere);
 

+ 6 - 1
src/core/dependencies/implicit_data_deps.c

@@ -232,7 +232,12 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
 			handle->initialized = 1;
 			/* We will change our value, disconnect from our readonly duplicates */
-			handle->readonly_dup = NULL;
+			if (handle->readonly_dup)
+			{
+				STARPU_ASSERT(handle->readonly_dup->readonly_dup_of == handle);
+				handle->readonly_dup->readonly_dup_of = NULL;
+				handle->readonly_dup = NULL;
+			}
 			if (write_hook)
 				write_hook(handle);
 		}

+ 27 - 0
src/core/dependencies/tags.c

@@ -290,6 +290,33 @@ void starpu_tag_notify_from_apps(starpu_tag_t id)
 	_starpu_notify_tag_dependencies(tag);
 }
 
+void _starpu_notify_restart_tag_dependencies(struct _starpu_tag *tag)
+{
+	_starpu_spin_lock(&tag->lock);
+
+	if (tag->state == STARPU_DONE)
+	{
+		tag->state = STARPU_BLOCKED;
+		_starpu_spin_unlock(&tag->lock);
+		return;
+	}
+
+	_STARPU_TRACE_TAG_DONE(tag);
+
+	tag->state = STARPU_BLOCKED;
+
+	_starpu_notify_cg_list(tag, &tag->tag_successors);
+
+	_starpu_spin_unlock(&tag->lock);
+}
+
+void starpu_tag_notify_restart_from_apps(starpu_tag_t id)
+{
+	struct _starpu_tag *tag = gettag_struct(id);
+
+	_starpu_notify_restart_tag_dependencies(tag);
+}
+
 void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job)
 {
 	_STARPU_TRACE_TAG(id, job);

+ 3 - 0
src/datawizard/coherency.h

@@ -197,6 +197,9 @@ struct _starpu_data_state
 	/** for a non-readonly handle, a readonly-only duplicate, that we can
 	    return from starpu_data_dup_ro */
 	starpu_data_handle_t readonly_dup;
+	/** for a readonly handle, the non-readonly handle that is referencing
+	    is in its readonly_dup field. */
+	starpu_data_handle_t readonly_dup_of;
 
 	/** in some case, the application may explicitly tell StarPU that a
  	 * piece of data is not likely to be used soon again */

+ 6 - 0
src/datawizard/filters.c

@@ -251,6 +251,8 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 		child->wt_mask = initial_handle->wt_mask;
 
 		child->aliases = initial_handle->aliases;
+		//child->readonly_dup = NULL;
+		//child->readonly_dup_of = NULL;
 
 		child->is_not_important = initial_handle->is_not_important;
 
@@ -302,6 +304,10 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d
 			f->filter_func(initial_interface, child_interface, f, i, nparts);
 		}
 
+		/* We compute the size and the footprint of the child once and
+		 * store it in the handle */
+		child->footprint = _starpu_compute_data_footprint(child);
+
 		for (node = 0; node < STARPU_MAXNODES; node++)
 		{
 			if (starpu_node_get_kind(node) != STARPU_CPU_RAM)

+ 43 - 17
src/datawizard/interfaces/data_interface.c

@@ -271,11 +271,17 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
 	handle->active = 1;
 
+	/* Store some values directly in the handle not to recompute them all
+	 * the time. */
+	handle->footprint = _starpu_compute_data_footprint(handle);
+
 	handle->home_node = home_node;
 
 	handle->wt_mask = wt_mask;
 
 	//handle->aliases = 0;
+	//handle->readonly_dup = NULL;
+	//handle->readonly_dup_of = NULL;
 
 	//handle->is_not_important = 0;
 
@@ -450,9 +456,7 @@ int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_int
 	//handle->per_worker = NULL;
 	//handle->ops above
 
-	/* Store some values directly in the handle not to recompute them all
-	 * the time. */
-	handle->footprint = _starpu_compute_data_footprint(handle);
+	//handle->footprint
 
 	//handle->home_node
 	//handle->wt_mask
@@ -780,6 +784,39 @@ void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_
 	handle->unregister_hook = func;
 }
 
+/*
+ * We are about to unregister this R/O data. There might be still other aliases,
+ * in which case this returns 0. If not, users are not supposed to see it
+ * any more, so detach it from their sight and return 1 to let unregistration happen.
+ */
+static int _starpu_ro_data_detach(starpu_data_handle_t handle)
+{
+	_starpu_spin_lock(&handle->header_lock);
+	if (handle->aliases)
+	{
+		handle->aliases--;
+		_starpu_spin_unlock(&handle->header_lock);
+		return 0;
+	}
+	if (handle->readonly_dup)
+	{
+		STARPU_ASSERT(handle->readonly_dup->readonly_dup_of == handle);
+		handle->readonly_dup->readonly_dup_of = NULL;
+		handle->readonly_dup = NULL;
+	}
+	if (handle->readonly_dup_of)
+	{
+		STARPU_ASSERT(handle->readonly_dup_of->readonly_dup == handle);
+		handle->readonly_dup_of->readonly_dup = NULL;
+		handle->readonly_dup_of = NULL;
+	}
+	/* So that unregistration can use write dependencies to wait for
+	 * anything to finish */
+	handle->readonly = 0;
+        _starpu_spin_unlock(&handle->header_lock);
+	return 1;
+}
+
 /* Unregister the data handle, perhaps we don't need to update the home_node
  * (in that case coherent is set to 0)
  * nowait is for internal use when we already know for sure that we won't have to wait.
@@ -793,14 +830,8 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 	/* TODO: also check that it has the latest coherency */
 	STARPU_ASSERT(!(nowait && handle->busy_count != 0));
 
-	_starpu_spin_lock(&handle->header_lock);
-	if (handle->aliases)
-	{
-		handle->aliases--;
-		_starpu_spin_unlock(&handle->header_lock);
+	if (!_starpu_ro_data_detach(handle))
 		return;
-	}
-        _starpu_spin_unlock(&handle->header_lock);
 
 	int sequential_consistency = handle->sequential_consistency;
 	if (sequential_consistency && !nowait)
@@ -1041,14 +1072,9 @@ void starpu_data_unregister_submit(starpu_data_handle_t handle)
 {
 	STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle);
 	STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle);
-	_starpu_spin_lock(&handle->header_lock);
-	if (handle->aliases)
-	{
-		handle->aliases--;
-		_starpu_spin_unlock(&handle->header_lock);
+
+	if (!_starpu_ro_data_detach(handle))
 		return;
-	}
-        _starpu_spin_unlock(&handle->header_lock);
 
 	/* Wait for all task dependencies on this handle before putting it for free */
 	starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle);

+ 3 - 7
src/util/starpu_data_cpy.c

@@ -177,8 +177,7 @@ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_ha
 }
 
 /* TODO: implement copy on write, and introduce starpu_data_dup as well */
-int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle,
-			int asynchronous, void (*callback_func)(void*), void *callback_arg)
+int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
 {
 	_starpu_spin_lock(&src_handle->header_lock);
 	if (src_handle->readonly_dup) {
@@ -188,26 +187,23 @@ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t sr
 		_starpu_spin_lock(&(*dst_handle)->header_lock);
 		(*dst_handle)->aliases++;
 		_starpu_spin_unlock(&(*dst_handle)->header_lock);
-		if (callback_func)
-			callback_func(callback_arg);
 		return 0;
 	}
 	if (src_handle->readonly) {
 		src_handle->aliases++;
 		_starpu_spin_unlock(&src_handle->header_lock);
 		*dst_handle = src_handle;
-		if (callback_func)
-			callback_func(callback_arg);
 		return 0;
 	}
 	_starpu_spin_unlock(&src_handle->header_lock);
 
 	starpu_data_register_same(dst_handle, src_handle);
-	_starpu_data_cpy(*dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
+	_starpu_data_cpy(*dst_handle, src_handle, asynchronous, NULL, NULL, 0, NULL);
 	(*dst_handle)->readonly = 1;
 
 	_starpu_spin_lock(&src_handle->header_lock);
 	src_handle->readonly_dup = (*dst_handle);
+	(*dst_handle)->readonly_dup_of = src_handle;
 	_starpu_spin_unlock(&src_handle->header_lock);
 
 	return 0;

+ 10 - 0
tests/Makefile.am

@@ -24,6 +24,7 @@ AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFL
 EXTRA_DIST =					\
 	helper.h				\
 	datawizard/locality.sh			\
+	main/increment_codelet.h		\
 	overlap/overlap.sh			\
 	datawizard/scal.h			\
 	datawizard/mpi_like.h			\
@@ -247,6 +248,7 @@ myPROGRAMS +=				\
 	main/subgraph_repeat_tag		\
 	main/subgraph_repeat_regenerate		\
 	main/subgraph_repeat_regenerate_tag	\
+	main/subgraph_repeat_regenerate_tag_cycle	\
 	main/empty_task_sync_point		\
 	main/empty_task_sync_point_tasks	\
 	main/tag_wait_api			\
@@ -684,6 +686,14 @@ main_subgraph_repeat_regenerate_tag_SOURCES +=		\
 	main/increment.cu
 endif
 
+main_subgraph_repeat_regenerate_tag_cycle_SOURCES =		\
+	main/subgraph_repeat_regenerate_tag_cycle.c		\
+	main/increment_codelet.c
+if STARPU_USE_CUDA
+main_subgraph_repeat_regenerate_tag_cycle_SOURCES +=		\
+	main/increment.cu
+endif
+
 sched_ctx_sched_ctx_list_SOURCES =	\
 	sched_ctx/sched_ctx_list.c
 

+ 28 - 8
tests/helper/starpu_data_dup_ro.c

@@ -37,23 +37,46 @@ int main(int argc, char **argv)
 	starpu_variable_data_register(&var1_handle, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(var1));
 
 	/* Make a duplicate of the original data */
-	ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1, NULL, NULL);
+	ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
+
+	/* Free it */
+	starpu_data_unregister(var2_handle);
+
+	/* Make another duplicate of the original data */
+	ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
+
+	/* Free it through submit */
+	starpu_data_unregister_submit(var2_handle);
+
+	/* Make another duplicate of the original data */
+	ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 
 	/* Make a second duplicate of the original data */
-	ret = starpu_data_dup_ro(&var3_handle, var1_handle, 1, NULL, NULL);
+	ret = starpu_data_dup_ro(&var3_handle, var1_handle, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 	STARPU_ASSERT(var3_handle == var2_handle);
 
 	/* Make a duplicate of a duplicate */
-	ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1, NULL, NULL);
+	ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 	STARPU_ASSERT(var4_handle == var2_handle);
 
-	starpu_task_insert(&increment_codelet, STARPU_RW, var1_handle, 0);
+	ret = starpu_task_insert(&increment_codelet, STARPU_RW, var1_handle, 0);
+	if (ret == -ENODEV)
+	{
+		starpu_data_unregister(var1_handle);
+		starpu_data_unregister(var2_handle);
+		starpu_data_unregister(var3_handle);
+		starpu_data_unregister(var4_handle);
+		starpu_shutdown();
+		return STARPU_TEST_SKIPPED;
+	}
 
 	/* Make a duplicate of the new value */
-	ret = starpu_data_dup_ro(&var5_handle, var1_handle, 1, NULL, NULL);
+	ret = starpu_data_dup_ro(&var5_handle, var1_handle, 1);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro");
 
 	starpu_data_acquire(var2_handle, STARPU_R);
@@ -68,7 +91,6 @@ int main(int argc, char **argv)
 
 	starpu_data_acquire(var3_handle, STARPU_R);
 	var = starpu_data_get_local_ptr(var3_handle);
-	ret = EXIT_SUCCESS;
 	if (*var != 42)
 	{
 	     FPRINTF(stderr, "var3 is %d but it should be %d\n", *var, 42);
@@ -78,7 +100,6 @@ int main(int argc, char **argv)
 
 	starpu_data_acquire(var4_handle, STARPU_R);
 	var = starpu_data_get_local_ptr(var4_handle);
-	ret = EXIT_SUCCESS;
 	if (*var != 42)
 	{
 	     FPRINTF(stderr, "var4 is %d but it should be %d\n", *var, 42);
@@ -88,7 +109,6 @@ int main(int argc, char **argv)
 
 	starpu_data_acquire(var5_handle, STARPU_R);
 	var = starpu_data_get_local_ptr(var5_handle);
-	ret = EXIT_SUCCESS;
 	if (*var != 43)
 	{
 	     FPRINTF(stderr, "var5 is %d but it should be %d\n", *var, 43);

+ 226 - 0
tests/main/subgraph_repeat_regenerate_tag_cycle.c

@@ -0,0 +1,226 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <common/thread.h>
+
+#include "increment_codelet.h"
+#include "../helper.h"
+
+/*
+ * Test that one can let a whole task graph repeatedly regenerate itself, using
+ * tag dependencies, with a complete cycle.
+ */
+
+#ifdef STARPU_QUICK_CHECK
+static unsigned niter = 64;
+#else
+static unsigned niter = 16384;
+#endif
+
+#define TAG_A 1
+#define TAG_B 2
+#define TAG_C 3
+#define TAG_D 4
+
+/*
+ *
+ *		    /-->B--\
+ *		    |      |
+ *	     -----> A      D---\--->
+ *		^   |      |   |
+ *		|   \-->C--/   |
+ *		|              |
+ *		\--------------/
+ *
+ *	- {B, C} depend on A
+ *	- D depends on {B, C}
+ *	- A, B, C and D are resubmitted at the end of the loop (or not)
+ */
+
+static struct starpu_task taskA, taskB, taskC, taskD;
+
+static unsigned loop_cnt = 0;
+static unsigned loop_cnt_A = 0;
+static unsigned loop_cnt_B = 0;
+static unsigned loop_cnt_C = 0;
+static unsigned *check_cnt;
+static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER;
+static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
+
+static void callback_task_A(void *arg)
+{
+	(void)arg;
+
+	loop_cnt_A++;
+
+	if (loop_cnt_A == niter)
+	{
+		/* We are done */
+		taskA.regenerate = 0;
+	}
+}
+
+static void callback_task_B(void *arg)
+{
+	(void)arg;
+
+	loop_cnt_B++;
+
+	if (loop_cnt_B == niter)
+	{
+		/* We are done */
+		taskB.regenerate = 0;
+	}
+}
+
+static void callback_task_C(void *arg)
+{
+	(void)arg;
+
+	loop_cnt_C++;
+
+	if (loop_cnt_C == niter)
+	{
+		/* We are done */
+		taskC.regenerate = 0;
+	}
+}
+
+static void callback_task_D(void *arg)
+{
+	(void)arg;
+
+	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+	loop_cnt++;
+
+	if (loop_cnt == niter)
+	{
+		/* We are done */
+		taskD.regenerate = 0;
+		STARPU_PTHREAD_COND_SIGNAL(&cond);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+	}
+	else
+	{
+		STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+		/* Let's go for another iteration */
+	}
+}
+
+int main(int argc, char **argv)
+{
+//	unsigned i;
+//	double timing;
+//	double start;
+//	double end;
+	int ret;
+
+	ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	/* Implicit data dependencies and regeneratable tasks are not compatible */
+	starpu_data_set_default_sequential_consistency_flag(0);
+
+	starpu_malloc((void**)&check_cnt, sizeof(*check_cnt));
+	*check_cnt = 0;
+
+	starpu_data_handle_t check_data;
+	starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt));
+
+	starpu_task_init(&taskA);
+	taskA.cl = &increment_codelet;
+	taskA.regenerate = 1; /* this task will be explicitely resubmitted if needed */
+	taskA.use_tag = 1;
+	taskA.tag_id = TAG_A;
+	taskA.callback_func = callback_task_A;
+	taskA.handles[0] = check_data;
+
+	starpu_task_init(&taskB);
+	taskB.cl = &increment_codelet;
+	taskB.regenerate = 1;
+	taskB.use_tag = 1;
+	taskB.tag_id = TAG_B;
+	taskB.callback_func = callback_task_B;
+	taskB.handles[0] = check_data;
+
+	starpu_task_init(&taskC);
+	taskC.cl = &increment_codelet;
+	taskC.regenerate = 1;
+	taskC.use_tag = 1;
+	taskC.tag_id = TAG_C;
+	taskC.callback_func = callback_task_C;
+	taskC.handles[0] = check_data;
+
+	starpu_task_init(&taskD);
+	taskD.cl = &increment_codelet;
+	taskD.callback_func = callback_task_D;
+	taskD.regenerate = 1;
+	taskD.use_tag = 1;
+	taskD.tag_id = TAG_D;
+	taskD.handles[0] = check_data;
+
+	starpu_tag_declare_deps((starpu_tag_t) TAG_B, 1, (starpu_tag_t) TAG_A);
+	starpu_tag_declare_deps((starpu_tag_t) TAG_C, 1, (starpu_tag_t) TAG_A);
+
+	starpu_tag_declare_deps((starpu_tag_t) TAG_D, 2, (starpu_tag_t) TAG_B, (starpu_tag_t) TAG_C);
+
+	starpu_tag_declare_deps((starpu_tag_t) TAG_A, 1, (starpu_tag_t) TAG_D);
+
+	ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+	ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
+
+	/* Break the loop */
+	starpu_tag_notify_restart_from_apps((starpu_tag_t) TAG_D);
+
+	starpu_do_schedule();
+	/* Wait for the termination of all loops */
+	STARPU_PTHREAD_MUTEX_LOCK(&mutex);
+	if (loop_cnt < niter)
+		STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
+
+	starpu_data_acquire(check_data, STARPU_R);
+	starpu_data_release(check_data);
+
+	STARPU_ASSERT(*check_cnt == (4*loop_cnt));
+
+	starpu_free(check_cnt);
+
+	starpu_data_unregister(check_data);
+
+	starpu_task_wait_for_all();
+	starpu_task_clean(&taskA);
+	starpu_task_clean(&taskB);
+	starpu_task_clean(&taskC);
+	starpu_task_clean(&taskD);
+
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+
+enodev:
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	starpu_data_unregister(check_data);
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+}
+