/* * This file is part of the StarPU Handbook. * Copyright (C) 2009--2011 Universit@'e de Bordeaux * Copyright (C) 2010, 2011, 2012, 2013, 2014 CNRS * Copyright (C) 2011, 2012 INRIA * See the file version.doxy for copying conditions. */ /*! \defgroup API_Performance_Model Performance Model \enum starpu_perfmodel_archtype \ingroup API_Performance_Model Enumerates the various types of architectures. it is possible that we have multiple versions of the same kind of workers, for instance multiple GPUs or even different CPUs within the same machine so we do not use the archtype enum type directly for performance models. \var starpu_perfmodel_archtype::STARPU_CPU_DEFAULT \ingroup API_Performance_Model CPU combined workers between 0 and STARPU_MAXCPUS-1 \var starpu_perfmodel_archtype::STARPU_CUDA_DEFAULT \ingroup API_Performance_Model CUDA workers \var starpu_perfmodel_archtype::STARPU_OPENCL_DEFAULT \ingroup API_Performance_Model OpenCL workers \var starpu_perfmodel_archtype::STARPU_MIC_DEFAULT \ingroup API_Performance_Model MIC workers \var starpu_perfmodel_archtype::STARPU_SCC_DEFAULT \ingroup API_Performance_Model SCC workers \enum starpu_perfmodel_type \ingroup API_Performance_Model TODO \var starpu_perfmodel_type::STARPU_PER_ARCH \ingroup API_Performance_Model Application-provided per-arch cost model function \var starpu_perfmodel_type::STARPU_COMMON \ingroup API_Performance_Model Application-provided common cost model function, with per-arch factor \var starpu_perfmodel_type::STARPU_HISTORY_BASED \ingroup API_Performance_Model Automatic history-based cost model \var starpu_perfmodel_type::STARPU_REGRESSION_BASED \ingroup API_Performance_Model Automatic linear regression-based cost model (alpha * size ^ beta) \var starpu_perfmodel_type::STARPU_NL_REGRESSION_BASED \ingroup API_Performance_Model Automatic non-linear regression-based cost model (a * size ^ b + c) \struct starpu_perfmodel Contains all information about a performance model. At least the type and symbol fields have to be filled when defining a performance model for a codelet. For compatibility, make sure to initialize the whole structure to zero, either by using explicit memset, or by letting the compiler implicitly do it in e.g. static storage case. If not provided, other fields have to be zero. \ingroup API_Performance_Model \var enum starpu_perfmodel_type starpu_perfmodel::type is the type of performance model \var const char *starpu_perfmodel::symbol is the symbol name for the performance model, which will be used as file name to store the model. It must be set otherwise the model will be ignored. \var double (*starpu_perfmodel::cost_function)(struct starpu_task *, unsigned nimpl) Used by ::STARPU_COMMON: takes a task and implementation number, and must return a task duration estimation in micro-seconds. \var size_t (*starpu_perfmodel::size_base)(struct starpu_task *, unsigned nimpl) Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED. If not NULL, takes a task and implementation number, and returns the size to be used as index to distinguish histories and as a base for regressions. \var uint32_t (*starpu_perfmodel::footprint)(struct starpu_task *) Used by ::STARPU_HISTORY_BASED. If not NULL, takes a task and returns the footprint to be used as index to distinguish histories. The default is to use the starpu_task_data_footprint function. \var unsigned starpu_perfmodel::is_loaded \private Whether the performance model is already loaded from the disk. \var unsigned starpu_perfmodel::benchmarking \private \var unsigned starpu_perfmodel::is_init todo \var starpu_perfmodel_state_t starpu_perfmodel::state \private \struct starpu_perfmodel_regression_model ... \ingroup API_Performance_Model \var double starpu_perfmodel_regression_model::sumlny sum of ln(measured) \var double starpu_perfmodel_regression_model::sumlnx sum of ln(size) \var double starpu_perfmodel_regression_model::sumlnx2 sum of ln(size)^2 \var unsigned long starpu_perfmodel_regression_model::minx minimum size \var unsigned long starpu_perfmodel_regression_model::maxx maximum size \var double starpu_perfmodel_regression_model::sumlnxlny sum of ln(size)*ln(measured) \var double starpu_perfmodel_regression_model::alpha estimated = alpha * size ^ beta \var double starpu_perfmodel_regression_model::beta estimated = alpha * size ^ beta \var unsigned starpu_perfmodel_regression_model::valid whether the linear regression model is valid (i.e. enough measures) \var double starpu_perfmodel_regression_model::a estimated = a size ^b + c \var double starpu_perfmodel_regression_model::b estimated = a size ^b + c \var double starpu_perfmodel_regression_model::c estimated = a size ^b + c \var unsigned starpu_perfmodel_regression_model::nl_valid whether the non-linear regression model is valid (i.e. enough measures) \var unsigned starpu_perfmodel_regression_model::nsample number of sample values for non-linear regression \struct starpu_perfmodel_per_arch contains information about the performance model of a given arch. \ingroup API_Performance_Model \var starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function Used by ::STARPU_PER_ARCH, must point to functions which take a task, the target arch and implementation number (as mere conveniency, since the array is already indexed by these), and must return a task duration estimation in micro-seconds. \var starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base Same as in structure starpu_perfmodel, but per-arch, in case it depends on the architecture-specific implementation. \var struct starpu_perfmodel_history_table *starpu_perfmodel_per_arch::history \private The history of performance measurements. \var struct starpu_perfmodel_history_list *starpu_perfmodel_per_arch::list \private Used by ::STARPU_HISTORY_BASED and ::STARPU_NL_REGRESSION_BASED, records all execution history measures. \var struct starpu_perfmodel_regression_model starpu_perfmodel_per_arch::regression \private Used by ::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED, contains the estimated factors of the regression. \struct starpu_perfmodel_history_list todo \ingroup API_Performance_Model \var struct starpu_perfmodel_history_list *starpu_perfmodel_history_list::next todo \var struct starpu_perfmodel_history_entry *starpu_perfmodel_history_list::entry todo \struct starpu_perfmodel_history_entry todo \ingroup API_Performance_Model \var double starpu_perfmodel_history_entry::mean mean_n = 1/n sum \var double starpu_perfmodel_history_entry::deviation n dev_n = sum2 - 1/n (sum)^2 \var double starpu_perfmodel_history_entry::sum sum of samples (in µs) \var double starpu_perfmodel_history_entry::sum2 sum of samples^2 \var unsigned starpu_perfmodel_history_entry::nsample number of samples \var uint32_t starpu_perfmodel_history_entry::footprint data footprint \var size_t starpu_perfmodel_history_entry::size in bytes \var double starpu_perfmodel_history_entry::flops Provided by the application \fn void starpu_perfmodel_init(FILE *f, struct starpu_perfmodel *model) \ingroup API_Performance_Model todo \fn void starpu_perfmodel_free_sampling_directories(void) \ingroup API_Performance_Model this function frees internal memory used for sampling directory management. It should only be called by an application which is not calling starpu_shutdown as this function already calls it. See for example tools/starpu_perfmodel_display.c. \fn int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model) \ingroup API_Performance_Model loads a given performance model. The model structure has to be completely zero, and will be filled with the information saved in $STARPU_HOME/.starpu. The function is intended to be used by external tools that should read the performance model files. \fn int starpu_perfmodel_unload_model(struct starpu_perfmodel *model) \ingroup API_Performance_Model unloads the given model which has been previously loaded through the function starpu_perfmodel_load_symbol() \fn void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl) \ingroup API_Performance_Model returns the path to the debugging information for the performance model. \fn char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype) \ingroup API_Performance_Model todo \fn void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl) \ingroup API_Performance_Model returns the architecture name for \p arch \fn struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id) \ingroup API_Performance_Model returns the architecture type of a given worker. \fn int starpu_perfmodel_list(FILE *output) \ingroup API_Performance_Model prints a list of all performance models on \p output \fn void starpu_perfmodel_directory(FILE *output) \ingroup API_Performance_Model prints the directory name storing performance models on \p output \fn void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output) \ingroup API_Performance_Model todo \fn int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output) \ingroup API_Performance_Model todo \fn void starpu_bus_print_bandwidth(FILE *f) \ingroup API_Performance_Model prints a matrix of bus bandwidths on \p f. \fn void starpu_bus_print_affinity(FILE *f) \ingroup API_Performance_Model prints the affinity devices on \p f. \fn void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured); \ingroup API_Performance_Model This feeds the performance model model with an explicit measurement measured (in µs), in addition to measurements done by StarPU itself. This can be useful when the application already has an existing set of measurements done in good conditions, that StarPU could benefit from instead of doing on-line measurements. And example of use can be seen in \ref PerformanceModelExample. \fn double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node) \ingroup API_Performance_Model Return the bandwidth of data transfer between two memory nodes \fn double starpu_transfer_latency(unsigned src_node, unsigned dst_node) \ingroup API_Performance_Model Return the latency of data transfer between two memory nodes \fn double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size) \ingroup API_Performance_Model Return the estimated time to transfer a given size between two memory nodes. \fn double starpu_permodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, uint32_t footprint) \ingroup API_Performance_Model todo */