%!s(int64=12) %!d(string=hai) anos · f4a5edf775
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -449,6 +449,17 @@ needs to be called to destroy the dummy task afterwards. See
 
				 @node Theoretical lower bound on execution time
			
 
				 @section Theoretical lower bound on execution time
			
 
				 
			
 
				+This API (@pxref{Theoretical lower bound on execution time}) permits
			
 
				+to record a trace of what tasks are needed to complete the 
			
 
				+application, and then, by using a linear system, provide a theoretical lower
			
 
				+bound of the execution time (i.e. with an ideal scheduling).
			
 
				+
			
 
				+The computed bound is not really correct when not taking into account
			
 
				+dependencies, but for an application which have enough parallelism, it is very
			
 
				+near to the bound computed with dependencies enabled (which takes a huge lot
			
 
				+more time to compute), and thus provides a good-enough estimation of the ideal
			
 
				+execution time.
			
 
				+
			
 
				 For kernels with history-based performance models (and provided that they are completely calibrated), StarPU can very easily provide a theoretical lower
			
 
				 bound for the execution time of a whole set of tasks. See for
			
 
				 instance @code{examples/lu/lu_example.c}: before submitting tasks,
			
--- a/doc/chapters/api.texi
+++ b/doc/chapters/api.texi
@@ -21,6 +21,7 @@
 
				 * Implicit Data Dependencies::
			
 
				 * Performance Model API::
			
 
				 * Profiling API::
			
 
				+* Theoretical lower bound on execution time API::
			
 
				 * CUDA extensions::
			
 
				 * OpenCL extensions::
			
 
				 * Miscellaneous helpers::
			
@@ -2813,6 +2814,42 @@ StarPU. StarPU must have been configured with the option
 
				 @code{----enable-memory-stats} (@pxref{Memory feedback}).
			
 
				 @end deftypefun
			
 
				 
			
 
				+@node Theoretical lower bound on execution time API
			
 
				+@section Theoretical lower bound on execution time
			
 
				+
			
 
				+@deftypefun void starpu_bound_start (int @var{deps}, int @var{prio})
			
 
				+Start recording tasks (resets stats).  @var{deps} tells whether
			
 
				+dependencies should be recorded too (this is quite expensive)
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun void starpu_bound_stop (void)
			
 
				+Stop recording tasks
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun void starpu_bound_print_dot ({FILE *}@var{output})
			
 
				+Print the DAG that was recorded
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun void starpu_bound_compute ({double *}@var{res}, {double *}@var{integer_res}, int @var{integer})
			
 
				+Get theoretical upper bound (in ms) (needs glpk support detected by @code{configure} script). It returns 0 if some performance models are not calibrated.
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun void starpu_bound_print_lp ({FILE *}@var{output})
			
 
				+Emit the Linear Programming system on @var{output} for the recorded tasks, in
			
 
				+the lp format
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun void starpu_bound_print_mps ({FILE *}@var{output})
			
 
				+Emit the Linear Programming system on @var{output} for the recorded tasks, in
			
 
				+the mps format
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun void starpu_bound_print ({FILE *}@var{output}, int @var{integer})
			
 
				+Emit statistics of actual execution vs theoretical upper bound. @var{integer}
			
 
				+permits to choose between integer solving (which takes a long time but is
			
 
				+correct), and relaxed solving (which provides an approximate solution).
			
 
				+@end deftypefun
			
 
				+
			
 
				 @node CUDA extensions
			
 
				 @section CUDA extensions
			
 
				 
			
--- a/doc/chapters/perf-feedback.texi
+++ b/doc/chapters/perf-feedback.texi
@@ -11,7 +11,6 @@
 
				 * On-line::                     On-line performance feedback
			
 
				 * Off-line::                    Off-line performance feedback
			
 
				 * Codelet performance::         Performance of codelets
			
 
				-* Theoretical lower bound on execution time API::
			
 
				 * Memory feedback::
			
 
				 * Data statistics::
			
 
				 @end menu
			
@@ -478,53 +477,6 @@ $ starpu_codelet_histo_profile distrib.data
 
				 Which will create one pdf file per codelet and per input size, showing a
			
 
				 histogram of the codelet execution time distribution.
			
 
				 
			
 
				-@node Theoretical lower bound on execution time API
			
 
				-@section Theoretical lower bound on execution time
			
 
				-
			
 
				-See @ref{Theoretical lower bound on execution time} for an example on how to use
			
 
				-this API. It permits to record a trace of what tasks are needed to complete the
			
 
				-application, and then, by using a linear system, provide a theoretical lower
			
 
				-bound of the execution time (i.e. with an ideal scheduling).
			
 
				-
			
 
				-The computed bound is not really correct when not taking into account
			
 
				-dependencies, but for an application which have enough parallelism, it is very
			
 
				-near to the bound computed with dependencies enabled (which takes a huge lot
			
 
				-more time to compute), and thus provides a good-enough estimation of the ideal
			
 
				-execution time.
			
 
				-
			
 
				-@deftypefun void starpu_bound_start (int @var{deps}, int @var{prio})
			
 
				-Start recording tasks (resets stats).  @var{deps} tells whether
			
 
				-dependencies should be recorded too (this is quite expensive)
			
 
				-@end deftypefun
			
 
				-
			
 
				-@deftypefun void starpu_bound_stop (void)
			
 
				-Stop recording tasks
			
 
				-@end deftypefun
			
 
				-
			
 
				-@deftypefun void starpu_bound_print_dot ({FILE *}@var{output})
			
 
				-Print the DAG that was recorded
			
 
				-@end deftypefun
			
 
				-
			
 
				-@deftypefun void starpu_bound_compute ({double *}@var{res}, {double *}@var{integer_res}, int @var{integer})
			
 
				-Get theoretical upper bound (in ms) (needs glpk support detected by @code{configure} script). It returns 0 if some performance models are not calibrated.
			
 
				-@end deftypefun
			
 
				-
			
 
				-@deftypefun void starpu_bound_print_lp ({FILE *}@var{output})
			
 
				-Emit the Linear Programming system on @var{output} for the recorded tasks, in
			
 
				-the lp format
			
 
				-@end deftypefun
			
 
				-
			
 
				-@deftypefun void starpu_bound_print_mps ({FILE *}@var{output})
			
 
				-Emit the Linear Programming system on @var{output} for the recorded tasks, in
			
 
				-the mps format
			
 
				-@end deftypefun
			
 
				-
			
 
				-@deftypefun void starpu_bound_print ({FILE *}@var{output}, int @var{integer})
			
 
				-Emit statistics of actual execution vs theoretical upper bound. @var{integer}
			
 
				-permits to choose between integer solving (which takes a long time but is
			
 
				-correct), and relaxed solving (which provides an approximate solution).
			
 
				-@end deftypefun
			
 
				-
			
 
				 @node Memory feedback
			
 
				 @section Memory feedback