/* StarPU --- Runtime system for heterogeneous multicore architectures.
 *
 * Copyright (C) 2011-2013,2016                           Inria
 * Copyright (C) 2010-2017                                CNRS
 * Copyright (C) 2009-2011,2013-2017                      Université de Bordeaux
 *
 * StarPU is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * StarPU is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 */

/*! \defgroup API_Performance_Model Performance Model

\enum starpu_perfmodel_type
\ingroup API_Performance_Model
TODO
\var starpu_perfmodel_type::STARPU_PERFMODEL_INVALID
    todo
\var starpu_perfmodel_type::STARPU_PER_ARCH
    Application-provided per-arch cost model function
\var starpu_perfmodel_type::STARPU_COMMON
    Application-provided common cost model function, with per-arch
    factor
\var starpu_perfmodel_type::STARPU_HISTORY_BASED
    Automatic history-based cost model
\var starpu_perfmodel_type::STARPU_REGRESSION_BASED
    Automatic linear regression-based cost model  (alpha * size ^
    beta)
\var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED
    Automatic non-linear regression-based cost model (a * size ^ b +
    c)
\var starpu_perfmodel_type::STARPU_MULTIPLE_REGRESSION_BASED
    Automatic multiple linear regression-based cost model. Application
    provides parameters, their combinations and exponents.

\struct starpu_perfmodel_device
todo
\ingroup API_Performance_Model
\var enum starpu_worker_archtype starpu_perfmodel_device::type
    type of the device
\var int starpu_perfmodel_device::devid
    identifier of the precise device
\var int starpu_perfmodel_device::ncore
    number of execution in parallel, minus 1

\struct starpu_perfmodel_arch
todo
\ingroup API_Performance_Model
\var int starpu_perfmodel_arch::ndevices
    number of the devices for the given arch
\var struct starpu_perfmodel_device *starpu_perfmodel_arch::devices
    list of the devices for the given arch

\struct starpu_perfmodel
Contain all information about a performance model. At least the
type and symbol fields have to be filled when defining a performance
model for a codelet. For compatibility, make sure to initialize the
whole structure to zero, either by using explicit memset, or by
letting the compiler implicitly do it in e.g. static storage case. If
not provided, other fields have to be zero.
\ingroup API_Performance_Model
\var enum starpu_perfmodel_type starpu_perfmodel::type
    type of performance model
    <ul>
    <li>
    ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED,
    ::STARPU_NL_REGRESSION_BASED: No other fields needs to be
    provided, this is purely history-based.
    </li>
    <li>
    ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields
    starpu_perfmodel::nparameters (number of different parameters),
    starpu_perfmodel::ncombinations (number of parameters
    combinations-tuples) and table starpu_perfmodel::combinations
    which defines exponents of the equation. Function cl_perf_func
    also needs to define how to extract parameters from the task. 
    </li>
    <li>
    ::STARPU_PER_ARCH: either field
    starpu_perfmodel::arch_cost_function has to be filled with a
    function that returns the cost in micro-seconds on the arch given
    as parameter, or field starpu_perfmodel::per_arch has to be filled
    with functions which return the cost in micro-seconds.
    </li>
    <li>
    ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be
    filled with a function that returns the cost in micro-seconds on a
    CPU, timing on other archs will be determined by multiplying by an
    arch-specific factor.
    </li>
    </ul>
\var const char *starpu_perfmodel::symbol
    symbol name for the performance model, which will be used as file
    name to store the model. It must be set otherwise the model will
    be ignored.
\var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl)
    Used by ::STARPU_COMMON. Take a task and implementation number,
    and must return a task duration estimation in micro-seconds.
\var double (*starpu_perfmodel::arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch* arch, unsigned nimpl)
    Used by ::STARPU_COMMON. Take a task, an arch and implementation
    number, and must return a task duration estimation in
    micro-seconds on that arch.
\var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl)
    Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and
    ::STARPU_NL_REGRESSION_BASED. If not <c>NULL</c>, take a task and
    implementation number, and return the size to be used as index to
    distinguish histories and as a base for regressions.
\var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *)
    Used by ::STARPU_HISTORY_BASED. If not <c>NULL</c>, take a task
    and return the footprint to be used as index to distinguish
    histories. The default is to use the starpu_task_data_footprint()
    function.
\var unsigned starpu_perfmodel::is_loaded
\private
    Whether the performance model is already loaded from the disk.
\var unsigned starpu_perfmodel::benchmarking
\private
    todo
\var unsigned starpu_perfmodel::is_init
    todo
\var starpu_perfmodel_state_t starpu_perfmodel::state
\private
    todo
\var void (*starpu_perfmodel::parameters)(struct starpu_task * task, double *parameters);
    todo
\var const char ** starpu_perfmodel::parameters_names
\private
    Names of parameters used for multiple linear regression models (M,
    N, K)
\var unsigned starpu_perfmodel::nparameters
\private
    Number of parameters used for multiple linear regression models
\var unsigned ** starpu_perfmodel::combinations
\private
    Table of combinations of parameters (and the exponents) used for
    multiple linear regression models
\var unsigned starpu_perfmodel::ncombinations
\private
    Number of combination of parameters used for multiple linear
    regression models

\struct starpu_perfmodel_regression_model
todo
\ingroup API_Performance_Model
\var double starpu_perfmodel_regression_model::sumlny
    sum of ln(measured)
\var double starpu_perfmodel_regression_model::sumlnx
    sum of ln(size)
\var double starpu_perfmodel_regression_model::sumlnx2
    sum of ln(size)^2
\var unsigned long starpu_perfmodel_regression_model::minx
    minimum size
\var unsigned long starpu_perfmodel_regression_model::maxx
    maximum size
\var double starpu_perfmodel_regression_model::sumlnxlny
    sum of ln(size)*ln(measured)
\var double starpu_perfmodel_regression_model::alpha
    estimated = alpha * size ^ beta
\var double starpu_perfmodel_regression_model::beta
    estimated = alpha * size ^ beta
\var unsigned starpu_perfmodel_regression_model::valid
    whether the linear regression model is valid (i.e. enough measures)
\var double starpu_perfmodel_regression_model::a
    estimated = a size ^b + c
\var double starpu_perfmodel_regression_model::b
    estimated = a size ^b + c
\var double starpu_perfmodel_regression_model::c
    estimated = a size ^b + c
\var unsigned starpu_perfmodel_regression_model::nl_valid
    whether the non-linear regression model is valid (i.e. enough measures)
\var unsigned starpu_perfmodel_regression_model::nsample
    number of sample values for non-linear regression
\var double starpu_perfmodel_regression_model::coeff[]
    list of computed coefficients for multiple linear regression model
\var double starpu_perfmodel_regression_model::ncoeff
    number of coefficients for multiple linear regression model
\var double starpu_perfmodel_regression_model::multi_valid
    whether the multiple linear regression model is valid

\struct starpu_perfmodel_per_arch
contains information about the performance model of a given
arch.
\ingroup API_Performance_Model
\var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function
    Used by ::STARPU_PER_ARCH, must point to functions which take a
    task, the target arch and implementation number (as mere
    conveniency, since the array is already indexed by these), and
    must return a task duration estimation in micro-seconds.
\var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base
    Same as in structure starpu_perfmodel, but per-arch, in case it
    depends on the architecture-specific implementation.
\var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history
\private
    The history of performance measurements.
\var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list
\private
    Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and
    ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history
    measures.
\var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression
\private
    Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED
    and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated
    factors of the regression.

\struct starpu_perfmodel_history_list
todo
\ingroup API_Performance_Model
\var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next
    todo
\var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry
    todo

\struct starpu_perfmodel_history_entry
todo
\ingroup API_Performance_Model
\var double starpu_perfmodel_history_entry::mean
    mean_n = 1/n sum
\var double starpu_perfmodel_history_entry::deviation
    n dev_n = sum2 - 1/n (sum)^2
\var double starpu_perfmodel_history_entry::sum
    sum of samples (in µs)
\var double starpu_perfmodel_history_entry::sum2
    sum of samples^2
\var unsigned starpu_perfmodel_history_entry::nsample
    number of samples
\var uint32_t starpu_perfmodel_history_entry::footprint
    data footprint
\var size_t starpu_perfmodel_history_entry::size
    in bytes
\var double starpu_perfmodel_history_entry::flops
    Provided by the application

\fn void starpu_perfmodel_init(struct starpu_perfmodel *model)
\ingroup API_Performance_Model
todo

\fn void starpu_perfmodel_free_sampling_directories(void)
\ingroup API_Performance_Model
Free internal memory used for sampling directory
management. It should only be called by an application which is not
calling starpu_shutdown() as this function already calls it. See for
example <c>tools/starpu_perfmodel_display.c</c>.

\fn int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model)
\ingroup API_Performance_Model
Load the performance model found in the file named \p filename. \p model has to be
completely zero, and will be filled with the information stored in the given file.

\fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model)
\ingroup API_Performance_Model
Load a given performance model. \p model has to be
completely zero, and will be filled with the information stored in
<c>$STARPU_HOME/.starpu</c>. The function is intended to be used by
external tools that want to read the performance model files.

\fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model)
\ingroup API_Performance_Model
Unload \p model which has been previously loaded
through the function starpu_perfmodel_load_symbol()

\fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl)
\ingroup API_Performance_Model
Return the path to the debugging information for the performance model.

\fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype)
\ingroup API_Performance_Model
todo

\fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl)
\ingroup API_Performance_Model
Return the architecture name for \p arch

\fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id)
\ingroup API_Performance_Model
Return the architecture type of the worker \p workerid.

\fn void starpu_perfmodel_initialize(void)
\ingroup API_Performance_Model
If starpu_init is not used, starpu_perfmodel_initialize should be used before calling starpu_perfmodel_* functions.

\fn int starpu_perfmodel_list(FILE *output)
\ingroup API_Performance_Model
Print a list of all performance models on \p output

\fn void starpu_perfmodel_directory(FILE *output)
\ingroup API_Performance_Model
Print the directory name storing performance models on \p output

\fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output)
\ingroup API_Performance_Model
todo

\fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output)
\ingroup API_Performance_Model
todo

\fn int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output)
\ingroup API_Performance_Model
todo

\fn void starpu_bus_print_bandwidth(FILE *f)
\ingroup API_Performance_Model
Print a matrix of bus bandwidths on \p f.

\fn void starpu_bus_print_affinity(FILE *f)
\ingroup API_Performance_Model
Print the affinity devices on \p f.

\fn void starpu_bus_print_filenames(FILE *f)
\ingroup API_Performance_Model
Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.

\fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured);
\ingroup API_Performance_Model
Feed the performance model model with an explicit
measurement measured (in µs), in addition to measurements done by StarPU
itself. This can be useful when the application already has an
existing set of measurements done in good conditions, that StarPU
could benefit from instead of doing on-line measurements. An example
of use can be seen in \ref PerformanceModelExample.

\fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node)
\ingroup API_Performance_Model
Return the bandwidth of data transfer between two memory nodes

\fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node)
\ingroup API_Performance_Model
Return the latency of data transfer between two memory nodes

\fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size)
\ingroup API_Performance_Model
Return the estimated time to transfer a given size between two memory nodes.

\fn double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint)
\ingroup API_Performance_Model
Return the estimated time of a task with the given model and the given footprint.

\var starpu_perfmodel_nop
Performance model which just always return 1µs.

*/