1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246 |
- // Copyright 2014 Google Inc. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package metrics
- import (
- "fmt"
- "regexp"
- "time"
- "github.com/google/cadvisor/container"
- info "github.com/google/cadvisor/info/v1"
- "github.com/prometheus/client_golang/prometheus"
- "k8s.io/klog"
- )
- // infoProvider will usually be manager.Manager, but can be swapped out for testing.
- type infoProvider interface {
- // SubcontainersInfo provides information about all subcontainers of the
- // specified container including itself.
- SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
- // GetVersionInfo provides information about the version.
- GetVersionInfo() (*info.VersionInfo, error)
- // GetMachineInfo provides information about the machine.
- GetMachineInfo() (*info.MachineInfo, error)
- }
- // metricValue describes a single metric value for a given set of label values
- // within a parent containerMetric.
- type metricValue struct {
- value float64
- labels []string
- timestamp time.Time
- }
- type metricValues []metricValue
- // asFloat64 converts a uint64 into a float64.
- func asFloat64(v uint64) float64 { return float64(v) }
- // asNanosecondsToSeconds converts nanoseconds into a float64 representing seconds.
- func asNanosecondsToSeconds(v uint64) float64 {
- return float64(v) / float64(time.Second)
- }
- // fsValues is a helper method for assembling per-filesystem stats.
- func fsValues(fsStats []info.FsStats, valueFn func(*info.FsStats) float64, timestamp time.Time) metricValues {
- values := make(metricValues, 0, len(fsStats))
- for _, stat := range fsStats {
- values = append(values, metricValue{
- value: valueFn(&stat),
- labels: []string{stat.Device},
- timestamp: timestamp,
- })
- }
- return values
- }
- // ioValues is a helper method for assembling per-disk and per-filesystem stats.
- func ioValues(ioStats []info.PerDiskStats, ioType string, ioValueFn func(uint64) float64,
- fsStats []info.FsStats, valueFn func(*info.FsStats) float64, timestamp time.Time) metricValues {
- values := make(metricValues, 0, len(ioStats)+len(fsStats))
- for _, stat := range ioStats {
- values = append(values, metricValue{
- value: ioValueFn(stat.Stats[ioType]),
- labels: []string{stat.Device},
- timestamp: timestamp,
- })
- }
- for _, stat := range fsStats {
- values = append(values, metricValue{
- value: valueFn(&stat),
- labels: []string{stat.Device},
- timestamp: timestamp,
- })
- }
- return values
- }
- // containerMetric describes a multi-dimensional metric used for exposing a
- // certain type of container statistic.
- type containerMetric struct {
- name string
- help string
- valueType prometheus.ValueType
- extraLabels []string
- condition func(s info.ContainerSpec) bool
- getValues func(s *info.ContainerStats) metricValues
- }
- func (cm *containerMetric) desc(baseLabels []string) *prometheus.Desc {
- return prometheus.NewDesc(cm.name, cm.help, append(baseLabels, cm.extraLabels...), nil)
- }
- // ContainerLabelsFunc defines all base labels and their values attached to
- // each metric exported by cAdvisor.
- type ContainerLabelsFunc func(*info.ContainerInfo) map[string]string
- // PrometheusCollector implements prometheus.Collector.
- type PrometheusCollector struct {
- infoProvider infoProvider
- errors prometheus.Gauge
- containerMetrics []containerMetric
- containerLabelsFunc ContainerLabelsFunc
- includedMetrics container.MetricSet
- }
- // NewPrometheusCollector returns a new PrometheusCollector. The passed
- // ContainerLabelsFunc specifies which base labels will be attached to all
- // exported metrics. If left to nil, the DefaultContainerLabels function
- // will be used instead.
- func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet) *PrometheusCollector {
- if f == nil {
- f = DefaultContainerLabels
- }
- c := &PrometheusCollector{
- infoProvider: i,
- containerLabelsFunc: f,
- errors: prometheus.NewGauge(prometheus.GaugeOpts{
- Namespace: "container",
- Name: "scrape_error",
- Help: "1 if there was an error while getting container metrics, 0 otherwise",
- }),
- containerMetrics: []containerMetric{
- {
- name: "container_last_seen",
- help: "Last time a container was seen by the exporter",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{
- value: float64(time.Now().Unix()),
- timestamp: time.Now(),
- }}
- },
- },
- },
- includedMetrics: includedMetrics,
- }
- if includedMetrics.Has(container.CpuUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_cpu_user_seconds_total",
- help: "Cumulative user cpu time consumed in seconds.",
- valueType: prometheus.CounterValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Cpu.Usage.User) / float64(time.Second),
- timestamp: s.Timestamp,
- },
- }
- },
- }, {
- name: "container_cpu_system_seconds_total",
- help: "Cumulative system cpu time consumed in seconds.",
- valueType: prometheus.CounterValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Cpu.Usage.System) / float64(time.Second),
- timestamp: s.Timestamp,
- },
- }
- },
- }, {
- name: "container_cpu_usage_seconds_total",
- help: "Cumulative cpu time consumed in seconds.",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"cpu"},
- getValues: func(s *info.ContainerStats) metricValues {
- if len(s.Cpu.Usage.PerCpu) == 0 {
- if s.Cpu.Usage.Total > 0 {
- return metricValues{{
- value: float64(s.Cpu.Usage.Total) / float64(time.Second),
- labels: []string{"total"},
- timestamp: s.Timestamp,
- }}
- }
- }
- values := make(metricValues, 0, len(s.Cpu.Usage.PerCpu))
- for i, value := range s.Cpu.Usage.PerCpu {
- if value > 0 {
- values = append(values, metricValue{
- value: float64(value) / float64(time.Second),
- labels: []string{fmt.Sprintf("cpu%02d", i)},
- timestamp: s.Timestamp,
- })
- }
- }
- return values
- },
- }, {
- name: "container_cpu_cfs_periods_total",
- help: "Number of elapsed enforcement period intervals.",
- valueType: prometheus.CounterValue,
- condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Cpu.CFS.Periods),
- timestamp: s.Timestamp,
- }}
- },
- }, {
- name: "container_cpu_cfs_throttled_periods_total",
- help: "Number of throttled period intervals.",
- valueType: prometheus.CounterValue,
- condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Cpu.CFS.ThrottledPeriods),
- timestamp: s.Timestamp,
- }}
- },
- }, {
- name: "container_cpu_cfs_throttled_seconds_total",
- help: "Total time duration the container has been throttled.",
- valueType: prometheus.CounterValue,
- condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Cpu.CFS.ThrottledTime) / float64(time.Second),
- timestamp: s.Timestamp,
- }}
- },
- },
- }...)
- }
- if includedMetrics.Has(container.ProcessSchedulerMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_cpu_schedstat_run_seconds_total",
- help: "Time duration the processes of the container have run on the CPU.",
- valueType: prometheus.CounterValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{
- value: float64(s.Cpu.Schedstat.RunTime) / float64(time.Second),
- timestamp: s.Timestamp,
- }}
- },
- }, {
- name: "container_cpu_schedstat_runqueue_seconds_total",
- help: "Time duration processes of the container have been waiting on a runqueue.",
- valueType: prometheus.CounterValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{
- value: float64(s.Cpu.Schedstat.RunqueueTime) / float64(time.Second),
- timestamp: s.Timestamp,
- }}
- },
- }, {
- name: "container_cpu_schedstat_run_periods_total",
- help: "Number of times processes of the cgroup have run on the cpu",
- valueType: prometheus.CounterValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{
- value: float64(s.Cpu.Schedstat.RunPeriods),
- timestamp: s.Timestamp,
- }}
- },
- },
- }...)
- }
- if includedMetrics.Has(container.CpuLoadMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_cpu_load_average_10s",
- help: "Value of container cpu load average over the last 10 seconds.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Cpu.LoadAverage), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_tasks_state",
- help: "Number of tasks in given state",
- extraLabels: []string{"state"},
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.TaskStats.NrSleeping),
- labels: []string{"sleeping"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.TaskStats.NrRunning),
- labels: []string{"running"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.TaskStats.NrStopped),
- labels: []string{"stopped"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.TaskStats.NrUninterruptible),
- labels: []string{"uninterruptible"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.TaskStats.NrIoWait),
- labels: []string{"iowaiting"},
- timestamp: s.Timestamp,
- },
- }
- },
- },
- }...)
- }
- if includedMetrics.Has(container.MemoryUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_memory_cache",
- help: "Number of bytes of page cache memory.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.Cache), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_memory_rss",
- help: "Size of RSS in bytes.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.RSS), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_memory_mapped_file",
- help: "Size of memory mapped files in bytes.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.MappedFile), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_memory_swap",
- help: "Container swap usage in bytes.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.Swap), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_memory_failcnt",
- help: "Number of memory usage hits limits",
- valueType: prometheus.CounterValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{
- value: float64(s.Memory.Failcnt),
- timestamp: s.Timestamp,
- }}
- },
- }, {
- name: "container_memory_usage_bytes",
- help: "Current memory usage in bytes, including all memory regardless of when it was accessed",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.Usage), timestamp: s.Timestamp}}
- },
- },
- {
- name: "container_memory_max_usage_bytes",
- help: "Maximum memory usage recorded in bytes",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.MaxUsage), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_memory_working_set_bytes",
- help: "Current working set in bytes.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Memory.WorkingSet), timestamp: s.Timestamp}}
- },
- }, {
- name: "container_memory_failures_total",
- help: "Cumulative count of memory allocation failures.",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"failure_type", "scope"},
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Memory.ContainerData.Pgfault),
- labels: []string{"pgfault", "container"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Memory.ContainerData.Pgmajfault),
- labels: []string{"pgmajfault", "container"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Memory.HierarchicalData.Pgfault),
- labels: []string{"pgfault", "hierarchy"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Memory.HierarchicalData.Pgmajfault),
- labels: []string{"pgmajfault", "hierarchy"},
- timestamp: s.Timestamp,
- },
- }
- },
- },
- }...)
- }
- if includedMetrics.Has(container.AcceleratorUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_accelerator_memory_total_bytes",
- help: "Total accelerator memory.",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"make", "model", "acc_id"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Accelerators))
- for _, value := range s.Accelerators {
- values = append(values, metricValue{
- value: float64(value.MemoryTotal),
- labels: []string{value.Make, value.Model, value.ID},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_accelerator_memory_used_bytes",
- help: "Total accelerator memory allocated.",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"make", "model", "acc_id"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Accelerators))
- for _, value := range s.Accelerators {
- values = append(values, metricValue{
- value: float64(value.MemoryUsed),
- labels: []string{value.Make, value.Model, value.ID},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_accelerator_duty_cycle",
- help: "Percent of time over the past sample period during which the accelerator was actively processing.",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"make", "model", "acc_id"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Accelerators))
- for _, value := range s.Accelerators {
- values = append(values, metricValue{
- value: float64(value.DutyCycle),
- labels: []string{value.Make, value.Model, value.ID},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- },
- }...)
- }
- if includedMetrics.Has(container.DiskUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_fs_inodes_free",
- help: "Number of available Inodes",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.InodesFree)
- }, s.Timestamp)
- },
- }, {
- name: "container_fs_inodes_total",
- help: "Number of Inodes",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.Inodes)
- }, s.Timestamp)
- },
- }, {
- name: "container_fs_limit_bytes",
- help: "Number of bytes that can be consumed by the container on this filesystem.",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.Limit)
- }, s.Timestamp)
- },
- }, {
- name: "container_fs_usage_bytes",
- help: "Number of bytes that are consumed by the container on this filesystem.",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.Usage)
- }, s.Timestamp)
- },
- },
- }...)
- }
- if includedMetrics.Has(container.DiskIOMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_fs_reads_bytes_total",
- help: "Cumulative count of bytes read",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiceBytes, "Read", asFloat64,
- nil, nil,
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_reads_total",
- help: "Cumulative count of reads completed",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiced, "Read", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.ReadsCompleted)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_sector_reads_total",
- help: "Cumulative count of sector reads completed",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.Sectors, "Read", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.SectorsRead)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_reads_merged_total",
- help: "Cumulative count of reads merged",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoMerged, "Read", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.ReadsMerged)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_read_seconds_total",
- help: "Cumulative count of seconds spent reading",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiceTime, "Read", asNanosecondsToSeconds,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.ReadTime) / float64(time.Second)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_writes_bytes_total",
- help: "Cumulative count of bytes written",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiceBytes, "Write", asFloat64,
- nil, nil,
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_writes_total",
- help: "Cumulative count of writes completed",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiced, "Write", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.WritesCompleted)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_sector_writes_total",
- help: "Cumulative count of sector writes completed",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.Sectors, "Write", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.SectorsWritten)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_writes_merged_total",
- help: "Cumulative count of writes merged",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoMerged, "Write", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.WritesMerged)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_write_seconds_total",
- help: "Cumulative count of seconds spent writing",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiceTime, "Write", asNanosecondsToSeconds,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.WriteTime) / float64(time.Second)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_io_current",
- help: "Number of I/Os currently in progress",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoQueued, "Total", asFloat64,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.IoInProgress)
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_io_time_seconds_total",
- help: "Cumulative count of seconds spent doing I/Os",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return ioValues(
- s.DiskIo.IoServiceTime, "Total", asNanosecondsToSeconds,
- s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(float64(fs.IoTime) / float64(time.Second))
- },
- s.Timestamp,
- )
- },
- }, {
- name: "container_fs_io_time_weighted_seconds_total",
- help: "Cumulative weighted I/O time in seconds",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"device"},
- getValues: func(s *info.ContainerStats) metricValues {
- return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
- return float64(fs.WeightedIoTime) / float64(time.Second)
- }, s.Timestamp)
- },
- },
- }...)
- }
- if includedMetrics.Has(container.NetworkUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_network_receive_bytes_total",
- help: "Cumulative count of bytes received",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.RxBytes),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_receive_packets_total",
- help: "Cumulative count of packets received",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.RxPackets),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_receive_packets_dropped_total",
- help: "Cumulative count of packets dropped while receiving",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.RxDropped),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_receive_errors_total",
- help: "Cumulative count of errors encountered while receiving",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.RxErrors),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_transmit_bytes_total",
- help: "Cumulative count of bytes transmitted",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.TxBytes),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_transmit_packets_total",
- help: "Cumulative count of packets transmitted",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.TxPackets),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_transmit_packets_dropped_total",
- help: "Cumulative count of packets dropped while transmitting",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.TxDropped),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- }, {
- name: "container_network_transmit_errors_total",
- help: "Cumulative count of errors encountered while transmitting",
- valueType: prometheus.CounterValue,
- extraLabels: []string{"interface"},
- getValues: func(s *info.ContainerStats) metricValues {
- values := make(metricValues, 0, len(s.Network.Interfaces))
- for _, value := range s.Network.Interfaces {
- values = append(values, metricValue{
- value: float64(value.TxErrors),
- labels: []string{value.Name},
- timestamp: s.Timestamp,
- })
- }
- return values
- },
- },
- }...)
- }
- if includedMetrics.Has(container.NetworkTcpUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_network_tcp_usage_total",
- help: "tcp connection usage statistic for container",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"tcp_state"},
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Network.Tcp.Established),
- labels: []string{"established"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.SynSent),
- labels: []string{"synsent"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.SynRecv),
- labels: []string{"synrecv"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.FinWait1),
- labels: []string{"finwait1"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.FinWait2),
- labels: []string{"finwait2"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.TimeWait),
- labels: []string{"timewait"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.Close),
- labels: []string{"close"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.CloseWait),
- labels: []string{"closewait"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.LastAck),
- labels: []string{"lastack"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.Listen),
- labels: []string{"listen"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp.Closing),
- labels: []string{"closing"},
- timestamp: s.Timestamp,
- },
- }
- },
- },
- }...)
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_network_tcp6_usage_total",
- help: "tcp6 connection usage statistic for container",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"tcp_state"},
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Network.Tcp6.Established),
- labels: []string{"established"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.SynSent),
- labels: []string{"synsent"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.SynRecv),
- labels: []string{"synrecv"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.FinWait1),
- labels: []string{"finwait1"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.FinWait2),
- labels: []string{"finwait2"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.TimeWait),
- labels: []string{"timewait"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.Close),
- labels: []string{"close"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.CloseWait),
- labels: []string{"closewait"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.LastAck),
- labels: []string{"lastack"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.Listen),
- labels: []string{"listen"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Tcp6.Closing),
- labels: []string{"closing"},
- timestamp: s.Timestamp,
- },
- }
- },
- },
- }...)
- }
- if includedMetrics.Has(container.NetworkUdpUsageMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_network_udp6_usage_total",
- help: "udp6 connection usage statistic for container",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"udp_state"},
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Network.Udp6.Listen),
- labels: []string{"listen"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Udp6.Dropped),
- labels: []string{"dropped"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Udp6.RxQueued),
- labels: []string{"rxqueued"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Udp6.TxQueued),
- labels: []string{"txqueued"},
- timestamp: s.Timestamp,
- },
- }
- },
- },
- }...)
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_network_udp_usage_total",
- help: "udp connection usage statistic for container",
- valueType: prometheus.GaugeValue,
- extraLabels: []string{"udp_state"},
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{
- {
- value: float64(s.Network.Udp.Listen),
- labels: []string{"listen"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Udp.Dropped),
- labels: []string{"dropped"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Udp.RxQueued),
- labels: []string{"rxqueued"},
- timestamp: s.Timestamp,
- },
- {
- value: float64(s.Network.Udp.TxQueued),
- labels: []string{"txqueued"},
- timestamp: s.Timestamp,
- },
- }
- },
- },
- }...)
- }
- if includedMetrics.Has(container.ProcessMetrics) {
- c.containerMetrics = append(c.containerMetrics, []containerMetric{
- {
- name: "container_processes",
- help: "Number of processes running inside the container.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Processes.ProcessCount), timestamp: s.Timestamp}}
- },
- },
- {
- name: "container_file_descriptors",
- help: "Number of open file descriptors for the container.",
- valueType: prometheus.GaugeValue,
- getValues: func(s *info.ContainerStats) metricValues {
- return metricValues{{value: float64(s.Processes.FdCount), timestamp: s.Timestamp}}
- },
- },
- }...)
- }
- return c
- }
- var (
- versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
- machineInfoCoresDesc = prometheus.NewDesc("machine_cpu_cores", "Number of CPU cores on the machine.", nil, nil)
- machineInfoMemoryDesc = prometheus.NewDesc("machine_memory_bytes", "Amount of memory installed on the machine.", nil, nil)
- )
- // Describe describes all the metrics ever exported by cadvisor. It
- // implements prometheus.PrometheusCollector.
- func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
- c.errors.Describe(ch)
- for _, cm := range c.containerMetrics {
- ch <- cm.desc([]string{})
- }
- ch <- versionInfoDesc
- ch <- machineInfoCoresDesc
- ch <- machineInfoMemoryDesc
- }
- // Collect fetches the stats from all containers and delivers them as
- // Prometheus metrics. It implements prometheus.PrometheusCollector.
- func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
- c.errors.Set(0)
- c.collectMachineInfo(ch)
- c.collectVersionInfo(ch)
- c.collectContainersInfo(ch)
- c.errors.Collect(ch)
- }
- const (
- // ContainerLabelPrefix is the prefix added to all container labels.
- ContainerLabelPrefix = "container_label_"
- // ContainerEnvPrefix is the prefix added to all env variable labels.
- ContainerEnvPrefix = "container_env_"
- // LabelID is the name of the id label.
- LabelID = "id"
- // LabelName is the name of the name label.
- LabelName = "name"
- // LabelImage is the name of the image label.
- LabelImage = "image"
- )
- // DefaultContainerLabels implements ContainerLabelsFunc. It exports the
- // container name, first alias, image name as well as all its env and label
- // values.
- func DefaultContainerLabels(container *info.ContainerInfo) map[string]string {
- set := map[string]string{LabelID: container.Name}
- if len(container.Aliases) > 0 {
- set[LabelName] = container.Aliases[0]
- }
- if image := container.Spec.Image; len(image) > 0 {
- set[LabelImage] = image
- }
- for k, v := range container.Spec.Labels {
- set[ContainerLabelPrefix+k] = v
- }
- for k, v := range container.Spec.Envs {
- set[ContainerEnvPrefix+k] = v
- }
- return set
- }
- // BaseContainerLabels returns a ContainerLabelsFunc that exports the container
- // name, first alias, image name as well as white listed label values.
- func BaseContainerLabels(whiteList []string) func(container *info.ContainerInfo) map[string]string {
- whiteListMap := make(map[string]struct{}, len(whiteList))
- for _, k := range whiteList {
- whiteListMap[k] = struct{}{}
- }
- return func(container *info.ContainerInfo) map[string]string {
- set := map[string]string{LabelID: container.Name}
- if len(container.Aliases) > 0 {
- set[LabelName] = container.Aliases[0]
- }
- if image := container.Spec.Image; len(image) > 0 {
- set[LabelImage] = image
- }
- for k, v := range container.Spec.Labels {
- if _, ok := whiteListMap[k]; ok {
- set[ContainerLabelPrefix+k] = v
- }
- }
- return set
- }
- }
- func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) {
- containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
- if err != nil {
- c.errors.Set(1)
- klog.Warningf("Couldn't get containers: %s", err)
- return
- }
- rawLabels := map[string]struct{}{}
- for _, container := range containers {
- for l := range c.containerLabelsFunc(container) {
- rawLabels[l] = struct{}{}
- }
- }
- for _, cont := range containers {
- values := make([]string, 0, len(rawLabels))
- labels := make([]string, 0, len(rawLabels))
- containerLabels := c.containerLabelsFunc(cont)
- for l := range rawLabels {
- labels = append(labels, sanitizeLabelName(l))
- values = append(values, containerLabels[l])
- }
- // Container spec
- desc := prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.CreationTime.Unix()), values...)
- if cont.Spec.HasCpu {
- desc = prometheus.NewDesc("container_spec_cpu_period", "CPU period of the container.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Period), values...)
- if cont.Spec.Cpu.Quota != 0 {
- desc = prometheus.NewDesc("container_spec_cpu_quota", "CPU quota of the container.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Quota), values...)
- }
- desc := prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Limit), values...)
- }
- if cont.Spec.HasMemory {
- desc := prometheus.NewDesc("container_spec_memory_limit_bytes", "Memory limit for the container.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Limit), values...)
- desc = prometheus.NewDesc("container_spec_memory_swap_limit_bytes", "Memory swap limit for the container.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.SwapLimit), values...)
- desc = prometheus.NewDesc("container_spec_memory_reservation_limit_bytes", "Memory reservation limit for the container.", labels, nil)
- ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Reservation), values...)
- }
- // Now for the actual metrics
- if len(cont.Stats) == 0 {
- continue
- }
- stats := cont.Stats[0]
- for _, cm := range c.containerMetrics {
- if cm.condition != nil && !cm.condition(cont.Spec) {
- continue
- }
- desc := cm.desc(labels)
- for _, metricValue := range cm.getValues(stats) {
- ch <- prometheus.NewMetricWithTimestamp(
- metricValue.timestamp,
- prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append(values, metricValue.labels...)...),
- )
- }
- }
- }
- }
- func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
- versionInfo, err := c.infoProvider.GetVersionInfo()
- if err != nil {
- c.errors.Set(1)
- klog.Warningf("Couldn't get version info: %s", err)
- return
- }
- ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion, versionInfo.CadvisorRevision}...)
- }
- func (c *PrometheusCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
- machineInfo, err := c.infoProvider.GetMachineInfo()
- if err != nil {
- c.errors.Set(1)
- klog.Warningf("Couldn't get machine info: %s", err)
- return
- }
- ch <- prometheus.MustNewConstMetric(machineInfoCoresDesc, prometheus.GaugeValue, float64(machineInfo.NumCores))
- ch <- prometheus.MustNewConstMetric(machineInfoMemoryDesc, prometheus.GaugeValue, float64(machineInfo.MemoryCapacity))
- }
- // Size after which we consider memory to be "unlimited". This is not
- // MaxInt64 due to rounding by the kernel.
- const maxMemorySize = uint64(1 << 62)
- func specMemoryValue(v uint64) float64 {
- if v > maxMemorySize {
- return 0
- }
- return float64(v)
- }
- var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
- // sanitizeLabelName replaces anything that doesn't match
- // client_label.LabelNameRE with an underscore.
- func sanitizeLabelName(name string) string {
- return invalidLabelCharRE.ReplaceAllString(name, "_")
- }
|