prometheus.go 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package metrics
  15. import (
  16. "fmt"
  17. "regexp"
  18. "time"
  19. "github.com/google/cadvisor/container"
  20. info "github.com/google/cadvisor/info/v1"
  21. "github.com/prometheus/client_golang/prometheus"
  22. "k8s.io/klog"
  23. )
  24. // infoProvider will usually be manager.Manager, but can be swapped out for testing.
  25. type infoProvider interface {
  26. // SubcontainersInfo provides information about all subcontainers of the
  27. // specified container including itself.
  28. SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
  29. // GetVersionInfo provides information about the version.
  30. GetVersionInfo() (*info.VersionInfo, error)
  31. // GetMachineInfo provides information about the machine.
  32. GetMachineInfo() (*info.MachineInfo, error)
  33. }
  34. // metricValue describes a single metric value for a given set of label values
  35. // within a parent containerMetric.
  36. type metricValue struct {
  37. value float64
  38. labels []string
  39. timestamp time.Time
  40. }
  41. type metricValues []metricValue
  42. // asFloat64 converts a uint64 into a float64.
  43. func asFloat64(v uint64) float64 { return float64(v) }
  44. // asNanosecondsToSeconds converts nanoseconds into a float64 representing seconds.
  45. func asNanosecondsToSeconds(v uint64) float64 {
  46. return float64(v) / float64(time.Second)
  47. }
  48. // fsValues is a helper method for assembling per-filesystem stats.
  49. func fsValues(fsStats []info.FsStats, valueFn func(*info.FsStats) float64, timestamp time.Time) metricValues {
  50. values := make(metricValues, 0, len(fsStats))
  51. for _, stat := range fsStats {
  52. values = append(values, metricValue{
  53. value: valueFn(&stat),
  54. labels: []string{stat.Device},
  55. timestamp: timestamp,
  56. })
  57. }
  58. return values
  59. }
  60. // ioValues is a helper method for assembling per-disk and per-filesystem stats.
  61. func ioValues(ioStats []info.PerDiskStats, ioType string, ioValueFn func(uint64) float64,
  62. fsStats []info.FsStats, valueFn func(*info.FsStats) float64, timestamp time.Time) metricValues {
  63. values := make(metricValues, 0, len(ioStats)+len(fsStats))
  64. for _, stat := range ioStats {
  65. values = append(values, metricValue{
  66. value: ioValueFn(stat.Stats[ioType]),
  67. labels: []string{stat.Device},
  68. timestamp: timestamp,
  69. })
  70. }
  71. for _, stat := range fsStats {
  72. values = append(values, metricValue{
  73. value: valueFn(&stat),
  74. labels: []string{stat.Device},
  75. timestamp: timestamp,
  76. })
  77. }
  78. return values
  79. }
  80. // containerMetric describes a multi-dimensional metric used for exposing a
  81. // certain type of container statistic.
  82. type containerMetric struct {
  83. name string
  84. help string
  85. valueType prometheus.ValueType
  86. extraLabels []string
  87. condition func(s info.ContainerSpec) bool
  88. getValues func(s *info.ContainerStats) metricValues
  89. }
  90. func (cm *containerMetric) desc(baseLabels []string) *prometheus.Desc {
  91. return prometheus.NewDesc(cm.name, cm.help, append(baseLabels, cm.extraLabels...), nil)
  92. }
  93. // ContainerLabelsFunc defines all base labels and their values attached to
  94. // each metric exported by cAdvisor.
  95. type ContainerLabelsFunc func(*info.ContainerInfo) map[string]string
  96. // PrometheusCollector implements prometheus.Collector.
  97. type PrometheusCollector struct {
  98. infoProvider infoProvider
  99. errors prometheus.Gauge
  100. containerMetrics []containerMetric
  101. containerLabelsFunc ContainerLabelsFunc
  102. includedMetrics container.MetricSet
  103. }
  104. // NewPrometheusCollector returns a new PrometheusCollector. The passed
  105. // ContainerLabelsFunc specifies which base labels will be attached to all
  106. // exported metrics. If left to nil, the DefaultContainerLabels function
  107. // will be used instead.
  108. func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet) *PrometheusCollector {
  109. if f == nil {
  110. f = DefaultContainerLabels
  111. }
  112. c := &PrometheusCollector{
  113. infoProvider: i,
  114. containerLabelsFunc: f,
  115. errors: prometheus.NewGauge(prometheus.GaugeOpts{
  116. Namespace: "container",
  117. Name: "scrape_error",
  118. Help: "1 if there was an error while getting container metrics, 0 otherwise",
  119. }),
  120. containerMetrics: []containerMetric{
  121. {
  122. name: "container_last_seen",
  123. help: "Last time a container was seen by the exporter",
  124. valueType: prometheus.GaugeValue,
  125. getValues: func(s *info.ContainerStats) metricValues {
  126. return metricValues{{
  127. value: float64(time.Now().Unix()),
  128. timestamp: time.Now(),
  129. }}
  130. },
  131. },
  132. },
  133. includedMetrics: includedMetrics,
  134. }
  135. if includedMetrics.Has(container.CpuUsageMetrics) {
  136. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  137. {
  138. name: "container_cpu_user_seconds_total",
  139. help: "Cumulative user cpu time consumed in seconds.",
  140. valueType: prometheus.CounterValue,
  141. getValues: func(s *info.ContainerStats) metricValues {
  142. return metricValues{
  143. {
  144. value: float64(s.Cpu.Usage.User) / float64(time.Second),
  145. timestamp: s.Timestamp,
  146. },
  147. }
  148. },
  149. }, {
  150. name: "container_cpu_system_seconds_total",
  151. help: "Cumulative system cpu time consumed in seconds.",
  152. valueType: prometheus.CounterValue,
  153. getValues: func(s *info.ContainerStats) metricValues {
  154. return metricValues{
  155. {
  156. value: float64(s.Cpu.Usage.System) / float64(time.Second),
  157. timestamp: s.Timestamp,
  158. },
  159. }
  160. },
  161. }, {
  162. name: "container_cpu_usage_seconds_total",
  163. help: "Cumulative cpu time consumed in seconds.",
  164. valueType: prometheus.CounterValue,
  165. extraLabels: []string{"cpu"},
  166. getValues: func(s *info.ContainerStats) metricValues {
  167. if len(s.Cpu.Usage.PerCpu) == 0 {
  168. if s.Cpu.Usage.Total > 0 {
  169. return metricValues{{
  170. value: float64(s.Cpu.Usage.Total) / float64(time.Second),
  171. labels: []string{"total"},
  172. timestamp: s.Timestamp,
  173. }}
  174. }
  175. }
  176. values := make(metricValues, 0, len(s.Cpu.Usage.PerCpu))
  177. for i, value := range s.Cpu.Usage.PerCpu {
  178. if value > 0 {
  179. values = append(values, metricValue{
  180. value: float64(value) / float64(time.Second),
  181. labels: []string{fmt.Sprintf("cpu%02d", i)},
  182. timestamp: s.Timestamp,
  183. })
  184. }
  185. }
  186. return values
  187. },
  188. }, {
  189. name: "container_cpu_cfs_periods_total",
  190. help: "Number of elapsed enforcement period intervals.",
  191. valueType: prometheus.CounterValue,
  192. condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
  193. getValues: func(s *info.ContainerStats) metricValues {
  194. return metricValues{
  195. {
  196. value: float64(s.Cpu.CFS.Periods),
  197. timestamp: s.Timestamp,
  198. }}
  199. },
  200. }, {
  201. name: "container_cpu_cfs_throttled_periods_total",
  202. help: "Number of throttled period intervals.",
  203. valueType: prometheus.CounterValue,
  204. condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
  205. getValues: func(s *info.ContainerStats) metricValues {
  206. return metricValues{
  207. {
  208. value: float64(s.Cpu.CFS.ThrottledPeriods),
  209. timestamp: s.Timestamp,
  210. }}
  211. },
  212. }, {
  213. name: "container_cpu_cfs_throttled_seconds_total",
  214. help: "Total time duration the container has been throttled.",
  215. valueType: prometheus.CounterValue,
  216. condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
  217. getValues: func(s *info.ContainerStats) metricValues {
  218. return metricValues{
  219. {
  220. value: float64(s.Cpu.CFS.ThrottledTime) / float64(time.Second),
  221. timestamp: s.Timestamp,
  222. }}
  223. },
  224. },
  225. }...)
  226. }
  227. if includedMetrics.Has(container.ProcessSchedulerMetrics) {
  228. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  229. {
  230. name: "container_cpu_schedstat_run_seconds_total",
  231. help: "Time duration the processes of the container have run on the CPU.",
  232. valueType: prometheus.CounterValue,
  233. getValues: func(s *info.ContainerStats) metricValues {
  234. return metricValues{{
  235. value: float64(s.Cpu.Schedstat.RunTime) / float64(time.Second),
  236. timestamp: s.Timestamp,
  237. }}
  238. },
  239. }, {
  240. name: "container_cpu_schedstat_runqueue_seconds_total",
  241. help: "Time duration processes of the container have been waiting on a runqueue.",
  242. valueType: prometheus.CounterValue,
  243. getValues: func(s *info.ContainerStats) metricValues {
  244. return metricValues{{
  245. value: float64(s.Cpu.Schedstat.RunqueueTime) / float64(time.Second),
  246. timestamp: s.Timestamp,
  247. }}
  248. },
  249. }, {
  250. name: "container_cpu_schedstat_run_periods_total",
  251. help: "Number of times processes of the cgroup have run on the cpu",
  252. valueType: prometheus.CounterValue,
  253. getValues: func(s *info.ContainerStats) metricValues {
  254. return metricValues{{
  255. value: float64(s.Cpu.Schedstat.RunPeriods),
  256. timestamp: s.Timestamp,
  257. }}
  258. },
  259. },
  260. }...)
  261. }
  262. if includedMetrics.Has(container.CpuLoadMetrics) {
  263. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  264. {
  265. name: "container_cpu_load_average_10s",
  266. help: "Value of container cpu load average over the last 10 seconds.",
  267. valueType: prometheus.GaugeValue,
  268. getValues: func(s *info.ContainerStats) metricValues {
  269. return metricValues{{value: float64(s.Cpu.LoadAverage), timestamp: s.Timestamp}}
  270. },
  271. }, {
  272. name: "container_tasks_state",
  273. help: "Number of tasks in given state",
  274. extraLabels: []string{"state"},
  275. valueType: prometheus.GaugeValue,
  276. getValues: func(s *info.ContainerStats) metricValues {
  277. return metricValues{
  278. {
  279. value: float64(s.TaskStats.NrSleeping),
  280. labels: []string{"sleeping"},
  281. timestamp: s.Timestamp,
  282. },
  283. {
  284. value: float64(s.TaskStats.NrRunning),
  285. labels: []string{"running"},
  286. timestamp: s.Timestamp,
  287. },
  288. {
  289. value: float64(s.TaskStats.NrStopped),
  290. labels: []string{"stopped"},
  291. timestamp: s.Timestamp,
  292. },
  293. {
  294. value: float64(s.TaskStats.NrUninterruptible),
  295. labels: []string{"uninterruptible"},
  296. timestamp: s.Timestamp,
  297. },
  298. {
  299. value: float64(s.TaskStats.NrIoWait),
  300. labels: []string{"iowaiting"},
  301. timestamp: s.Timestamp,
  302. },
  303. }
  304. },
  305. },
  306. }...)
  307. }
  308. if includedMetrics.Has(container.MemoryUsageMetrics) {
  309. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  310. {
  311. name: "container_memory_cache",
  312. help: "Number of bytes of page cache memory.",
  313. valueType: prometheus.GaugeValue,
  314. getValues: func(s *info.ContainerStats) metricValues {
  315. return metricValues{{value: float64(s.Memory.Cache), timestamp: s.Timestamp}}
  316. },
  317. }, {
  318. name: "container_memory_rss",
  319. help: "Size of RSS in bytes.",
  320. valueType: prometheus.GaugeValue,
  321. getValues: func(s *info.ContainerStats) metricValues {
  322. return metricValues{{value: float64(s.Memory.RSS), timestamp: s.Timestamp}}
  323. },
  324. }, {
  325. name: "container_memory_mapped_file",
  326. help: "Size of memory mapped files in bytes.",
  327. valueType: prometheus.GaugeValue,
  328. getValues: func(s *info.ContainerStats) metricValues {
  329. return metricValues{{value: float64(s.Memory.MappedFile), timestamp: s.Timestamp}}
  330. },
  331. }, {
  332. name: "container_memory_swap",
  333. help: "Container swap usage in bytes.",
  334. valueType: prometheus.GaugeValue,
  335. getValues: func(s *info.ContainerStats) metricValues {
  336. return metricValues{{value: float64(s.Memory.Swap), timestamp: s.Timestamp}}
  337. },
  338. }, {
  339. name: "container_memory_failcnt",
  340. help: "Number of memory usage hits limits",
  341. valueType: prometheus.CounterValue,
  342. getValues: func(s *info.ContainerStats) metricValues {
  343. return metricValues{{
  344. value: float64(s.Memory.Failcnt),
  345. timestamp: s.Timestamp,
  346. }}
  347. },
  348. }, {
  349. name: "container_memory_usage_bytes",
  350. help: "Current memory usage in bytes, including all memory regardless of when it was accessed",
  351. valueType: prometheus.GaugeValue,
  352. getValues: func(s *info.ContainerStats) metricValues {
  353. return metricValues{{value: float64(s.Memory.Usage), timestamp: s.Timestamp}}
  354. },
  355. },
  356. {
  357. name: "container_memory_max_usage_bytes",
  358. help: "Maximum memory usage recorded in bytes",
  359. valueType: prometheus.GaugeValue,
  360. getValues: func(s *info.ContainerStats) metricValues {
  361. return metricValues{{value: float64(s.Memory.MaxUsage), timestamp: s.Timestamp}}
  362. },
  363. }, {
  364. name: "container_memory_working_set_bytes",
  365. help: "Current working set in bytes.",
  366. valueType: prometheus.GaugeValue,
  367. getValues: func(s *info.ContainerStats) metricValues {
  368. return metricValues{{value: float64(s.Memory.WorkingSet), timestamp: s.Timestamp}}
  369. },
  370. }, {
  371. name: "container_memory_failures_total",
  372. help: "Cumulative count of memory allocation failures.",
  373. valueType: prometheus.CounterValue,
  374. extraLabels: []string{"failure_type", "scope"},
  375. getValues: func(s *info.ContainerStats) metricValues {
  376. return metricValues{
  377. {
  378. value: float64(s.Memory.ContainerData.Pgfault),
  379. labels: []string{"pgfault", "container"},
  380. timestamp: s.Timestamp,
  381. },
  382. {
  383. value: float64(s.Memory.ContainerData.Pgmajfault),
  384. labels: []string{"pgmajfault", "container"},
  385. timestamp: s.Timestamp,
  386. },
  387. {
  388. value: float64(s.Memory.HierarchicalData.Pgfault),
  389. labels: []string{"pgfault", "hierarchy"},
  390. timestamp: s.Timestamp,
  391. },
  392. {
  393. value: float64(s.Memory.HierarchicalData.Pgmajfault),
  394. labels: []string{"pgmajfault", "hierarchy"},
  395. timestamp: s.Timestamp,
  396. },
  397. }
  398. },
  399. },
  400. }...)
  401. }
  402. if includedMetrics.Has(container.AcceleratorUsageMetrics) {
  403. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  404. {
  405. name: "container_accelerator_memory_total_bytes",
  406. help: "Total accelerator memory.",
  407. valueType: prometheus.GaugeValue,
  408. extraLabels: []string{"make", "model", "acc_id"},
  409. getValues: func(s *info.ContainerStats) metricValues {
  410. values := make(metricValues, 0, len(s.Accelerators))
  411. for _, value := range s.Accelerators {
  412. values = append(values, metricValue{
  413. value: float64(value.MemoryTotal),
  414. labels: []string{value.Make, value.Model, value.ID},
  415. timestamp: s.Timestamp,
  416. })
  417. }
  418. return values
  419. },
  420. }, {
  421. name: "container_accelerator_memory_used_bytes",
  422. help: "Total accelerator memory allocated.",
  423. valueType: prometheus.GaugeValue,
  424. extraLabels: []string{"make", "model", "acc_id"},
  425. getValues: func(s *info.ContainerStats) metricValues {
  426. values := make(metricValues, 0, len(s.Accelerators))
  427. for _, value := range s.Accelerators {
  428. values = append(values, metricValue{
  429. value: float64(value.MemoryUsed),
  430. labels: []string{value.Make, value.Model, value.ID},
  431. timestamp: s.Timestamp,
  432. })
  433. }
  434. return values
  435. },
  436. }, {
  437. name: "container_accelerator_duty_cycle",
  438. help: "Percent of time over the past sample period during which the accelerator was actively processing.",
  439. valueType: prometheus.GaugeValue,
  440. extraLabels: []string{"make", "model", "acc_id"},
  441. getValues: func(s *info.ContainerStats) metricValues {
  442. values := make(metricValues, 0, len(s.Accelerators))
  443. for _, value := range s.Accelerators {
  444. values = append(values, metricValue{
  445. value: float64(value.DutyCycle),
  446. labels: []string{value.Make, value.Model, value.ID},
  447. timestamp: s.Timestamp,
  448. })
  449. }
  450. return values
  451. },
  452. },
  453. }...)
  454. }
  455. if includedMetrics.Has(container.DiskUsageMetrics) {
  456. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  457. {
  458. name: "container_fs_inodes_free",
  459. help: "Number of available Inodes",
  460. valueType: prometheus.GaugeValue,
  461. extraLabels: []string{"device"},
  462. getValues: func(s *info.ContainerStats) metricValues {
  463. return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
  464. return float64(fs.InodesFree)
  465. }, s.Timestamp)
  466. },
  467. }, {
  468. name: "container_fs_inodes_total",
  469. help: "Number of Inodes",
  470. valueType: prometheus.GaugeValue,
  471. extraLabels: []string{"device"},
  472. getValues: func(s *info.ContainerStats) metricValues {
  473. return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
  474. return float64(fs.Inodes)
  475. }, s.Timestamp)
  476. },
  477. }, {
  478. name: "container_fs_limit_bytes",
  479. help: "Number of bytes that can be consumed by the container on this filesystem.",
  480. valueType: prometheus.GaugeValue,
  481. extraLabels: []string{"device"},
  482. getValues: func(s *info.ContainerStats) metricValues {
  483. return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
  484. return float64(fs.Limit)
  485. }, s.Timestamp)
  486. },
  487. }, {
  488. name: "container_fs_usage_bytes",
  489. help: "Number of bytes that are consumed by the container on this filesystem.",
  490. valueType: prometheus.GaugeValue,
  491. extraLabels: []string{"device"},
  492. getValues: func(s *info.ContainerStats) metricValues {
  493. return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
  494. return float64(fs.Usage)
  495. }, s.Timestamp)
  496. },
  497. },
  498. }...)
  499. }
  500. if includedMetrics.Has(container.DiskIOMetrics) {
  501. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  502. {
  503. name: "container_fs_reads_bytes_total",
  504. help: "Cumulative count of bytes read",
  505. valueType: prometheus.CounterValue,
  506. extraLabels: []string{"device"},
  507. getValues: func(s *info.ContainerStats) metricValues {
  508. return ioValues(
  509. s.DiskIo.IoServiceBytes, "Read", asFloat64,
  510. nil, nil,
  511. s.Timestamp,
  512. )
  513. },
  514. }, {
  515. name: "container_fs_reads_total",
  516. help: "Cumulative count of reads completed",
  517. valueType: prometheus.CounterValue,
  518. extraLabels: []string{"device"},
  519. getValues: func(s *info.ContainerStats) metricValues {
  520. return ioValues(
  521. s.DiskIo.IoServiced, "Read", asFloat64,
  522. s.Filesystem, func(fs *info.FsStats) float64 {
  523. return float64(fs.ReadsCompleted)
  524. },
  525. s.Timestamp,
  526. )
  527. },
  528. }, {
  529. name: "container_fs_sector_reads_total",
  530. help: "Cumulative count of sector reads completed",
  531. valueType: prometheus.CounterValue,
  532. extraLabels: []string{"device"},
  533. getValues: func(s *info.ContainerStats) metricValues {
  534. return ioValues(
  535. s.DiskIo.Sectors, "Read", asFloat64,
  536. s.Filesystem, func(fs *info.FsStats) float64 {
  537. return float64(fs.SectorsRead)
  538. },
  539. s.Timestamp,
  540. )
  541. },
  542. }, {
  543. name: "container_fs_reads_merged_total",
  544. help: "Cumulative count of reads merged",
  545. valueType: prometheus.CounterValue,
  546. extraLabels: []string{"device"},
  547. getValues: func(s *info.ContainerStats) metricValues {
  548. return ioValues(
  549. s.DiskIo.IoMerged, "Read", asFloat64,
  550. s.Filesystem, func(fs *info.FsStats) float64 {
  551. return float64(fs.ReadsMerged)
  552. },
  553. s.Timestamp,
  554. )
  555. },
  556. }, {
  557. name: "container_fs_read_seconds_total",
  558. help: "Cumulative count of seconds spent reading",
  559. valueType: prometheus.CounterValue,
  560. extraLabels: []string{"device"},
  561. getValues: func(s *info.ContainerStats) metricValues {
  562. return ioValues(
  563. s.DiskIo.IoServiceTime, "Read", asNanosecondsToSeconds,
  564. s.Filesystem, func(fs *info.FsStats) float64 {
  565. return float64(fs.ReadTime) / float64(time.Second)
  566. },
  567. s.Timestamp,
  568. )
  569. },
  570. }, {
  571. name: "container_fs_writes_bytes_total",
  572. help: "Cumulative count of bytes written",
  573. valueType: prometheus.CounterValue,
  574. extraLabels: []string{"device"},
  575. getValues: func(s *info.ContainerStats) metricValues {
  576. return ioValues(
  577. s.DiskIo.IoServiceBytes, "Write", asFloat64,
  578. nil, nil,
  579. s.Timestamp,
  580. )
  581. },
  582. }, {
  583. name: "container_fs_writes_total",
  584. help: "Cumulative count of writes completed",
  585. valueType: prometheus.CounterValue,
  586. extraLabels: []string{"device"},
  587. getValues: func(s *info.ContainerStats) metricValues {
  588. return ioValues(
  589. s.DiskIo.IoServiced, "Write", asFloat64,
  590. s.Filesystem, func(fs *info.FsStats) float64 {
  591. return float64(fs.WritesCompleted)
  592. },
  593. s.Timestamp,
  594. )
  595. },
  596. }, {
  597. name: "container_fs_sector_writes_total",
  598. help: "Cumulative count of sector writes completed",
  599. valueType: prometheus.CounterValue,
  600. extraLabels: []string{"device"},
  601. getValues: func(s *info.ContainerStats) metricValues {
  602. return ioValues(
  603. s.DiskIo.Sectors, "Write", asFloat64,
  604. s.Filesystem, func(fs *info.FsStats) float64 {
  605. return float64(fs.SectorsWritten)
  606. },
  607. s.Timestamp,
  608. )
  609. },
  610. }, {
  611. name: "container_fs_writes_merged_total",
  612. help: "Cumulative count of writes merged",
  613. valueType: prometheus.CounterValue,
  614. extraLabels: []string{"device"},
  615. getValues: func(s *info.ContainerStats) metricValues {
  616. return ioValues(
  617. s.DiskIo.IoMerged, "Write", asFloat64,
  618. s.Filesystem, func(fs *info.FsStats) float64 {
  619. return float64(fs.WritesMerged)
  620. },
  621. s.Timestamp,
  622. )
  623. },
  624. }, {
  625. name: "container_fs_write_seconds_total",
  626. help: "Cumulative count of seconds spent writing",
  627. valueType: prometheus.CounterValue,
  628. extraLabels: []string{"device"},
  629. getValues: func(s *info.ContainerStats) metricValues {
  630. return ioValues(
  631. s.DiskIo.IoServiceTime, "Write", asNanosecondsToSeconds,
  632. s.Filesystem, func(fs *info.FsStats) float64 {
  633. return float64(fs.WriteTime) / float64(time.Second)
  634. },
  635. s.Timestamp,
  636. )
  637. },
  638. }, {
  639. name: "container_fs_io_current",
  640. help: "Number of I/Os currently in progress",
  641. valueType: prometheus.GaugeValue,
  642. extraLabels: []string{"device"},
  643. getValues: func(s *info.ContainerStats) metricValues {
  644. return ioValues(
  645. s.DiskIo.IoQueued, "Total", asFloat64,
  646. s.Filesystem, func(fs *info.FsStats) float64 {
  647. return float64(fs.IoInProgress)
  648. },
  649. s.Timestamp,
  650. )
  651. },
  652. }, {
  653. name: "container_fs_io_time_seconds_total",
  654. help: "Cumulative count of seconds spent doing I/Os",
  655. valueType: prometheus.CounterValue,
  656. extraLabels: []string{"device"},
  657. getValues: func(s *info.ContainerStats) metricValues {
  658. return ioValues(
  659. s.DiskIo.IoServiceTime, "Total", asNanosecondsToSeconds,
  660. s.Filesystem, func(fs *info.FsStats) float64 {
  661. return float64(float64(fs.IoTime) / float64(time.Second))
  662. },
  663. s.Timestamp,
  664. )
  665. },
  666. }, {
  667. name: "container_fs_io_time_weighted_seconds_total",
  668. help: "Cumulative weighted I/O time in seconds",
  669. valueType: prometheus.CounterValue,
  670. extraLabels: []string{"device"},
  671. getValues: func(s *info.ContainerStats) metricValues {
  672. return fsValues(s.Filesystem, func(fs *info.FsStats) float64 {
  673. return float64(fs.WeightedIoTime) / float64(time.Second)
  674. }, s.Timestamp)
  675. },
  676. },
  677. }...)
  678. }
  679. if includedMetrics.Has(container.NetworkUsageMetrics) {
  680. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  681. {
  682. name: "container_network_receive_bytes_total",
  683. help: "Cumulative count of bytes received",
  684. valueType: prometheus.CounterValue,
  685. extraLabels: []string{"interface"},
  686. getValues: func(s *info.ContainerStats) metricValues {
  687. values := make(metricValues, 0, len(s.Network.Interfaces))
  688. for _, value := range s.Network.Interfaces {
  689. values = append(values, metricValue{
  690. value: float64(value.RxBytes),
  691. labels: []string{value.Name},
  692. timestamp: s.Timestamp,
  693. })
  694. }
  695. return values
  696. },
  697. }, {
  698. name: "container_network_receive_packets_total",
  699. help: "Cumulative count of packets received",
  700. valueType: prometheus.CounterValue,
  701. extraLabels: []string{"interface"},
  702. getValues: func(s *info.ContainerStats) metricValues {
  703. values := make(metricValues, 0, len(s.Network.Interfaces))
  704. for _, value := range s.Network.Interfaces {
  705. values = append(values, metricValue{
  706. value: float64(value.RxPackets),
  707. labels: []string{value.Name},
  708. timestamp: s.Timestamp,
  709. })
  710. }
  711. return values
  712. },
  713. }, {
  714. name: "container_network_receive_packets_dropped_total",
  715. help: "Cumulative count of packets dropped while receiving",
  716. valueType: prometheus.CounterValue,
  717. extraLabels: []string{"interface"},
  718. getValues: func(s *info.ContainerStats) metricValues {
  719. values := make(metricValues, 0, len(s.Network.Interfaces))
  720. for _, value := range s.Network.Interfaces {
  721. values = append(values, metricValue{
  722. value: float64(value.RxDropped),
  723. labels: []string{value.Name},
  724. timestamp: s.Timestamp,
  725. })
  726. }
  727. return values
  728. },
  729. }, {
  730. name: "container_network_receive_errors_total",
  731. help: "Cumulative count of errors encountered while receiving",
  732. valueType: prometheus.CounterValue,
  733. extraLabels: []string{"interface"},
  734. getValues: func(s *info.ContainerStats) metricValues {
  735. values := make(metricValues, 0, len(s.Network.Interfaces))
  736. for _, value := range s.Network.Interfaces {
  737. values = append(values, metricValue{
  738. value: float64(value.RxErrors),
  739. labels: []string{value.Name},
  740. timestamp: s.Timestamp,
  741. })
  742. }
  743. return values
  744. },
  745. }, {
  746. name: "container_network_transmit_bytes_total",
  747. help: "Cumulative count of bytes transmitted",
  748. valueType: prometheus.CounterValue,
  749. extraLabels: []string{"interface"},
  750. getValues: func(s *info.ContainerStats) metricValues {
  751. values := make(metricValues, 0, len(s.Network.Interfaces))
  752. for _, value := range s.Network.Interfaces {
  753. values = append(values, metricValue{
  754. value: float64(value.TxBytes),
  755. labels: []string{value.Name},
  756. timestamp: s.Timestamp,
  757. })
  758. }
  759. return values
  760. },
  761. }, {
  762. name: "container_network_transmit_packets_total",
  763. help: "Cumulative count of packets transmitted",
  764. valueType: prometheus.CounterValue,
  765. extraLabels: []string{"interface"},
  766. getValues: func(s *info.ContainerStats) metricValues {
  767. values := make(metricValues, 0, len(s.Network.Interfaces))
  768. for _, value := range s.Network.Interfaces {
  769. values = append(values, metricValue{
  770. value: float64(value.TxPackets),
  771. labels: []string{value.Name},
  772. timestamp: s.Timestamp,
  773. })
  774. }
  775. return values
  776. },
  777. }, {
  778. name: "container_network_transmit_packets_dropped_total",
  779. help: "Cumulative count of packets dropped while transmitting",
  780. valueType: prometheus.CounterValue,
  781. extraLabels: []string{"interface"},
  782. getValues: func(s *info.ContainerStats) metricValues {
  783. values := make(metricValues, 0, len(s.Network.Interfaces))
  784. for _, value := range s.Network.Interfaces {
  785. values = append(values, metricValue{
  786. value: float64(value.TxDropped),
  787. labels: []string{value.Name},
  788. timestamp: s.Timestamp,
  789. })
  790. }
  791. return values
  792. },
  793. }, {
  794. name: "container_network_transmit_errors_total",
  795. help: "Cumulative count of errors encountered while transmitting",
  796. valueType: prometheus.CounterValue,
  797. extraLabels: []string{"interface"},
  798. getValues: func(s *info.ContainerStats) metricValues {
  799. values := make(metricValues, 0, len(s.Network.Interfaces))
  800. for _, value := range s.Network.Interfaces {
  801. values = append(values, metricValue{
  802. value: float64(value.TxErrors),
  803. labels: []string{value.Name},
  804. timestamp: s.Timestamp,
  805. })
  806. }
  807. return values
  808. },
  809. },
  810. }...)
  811. }
  812. if includedMetrics.Has(container.NetworkTcpUsageMetrics) {
  813. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  814. {
  815. name: "container_network_tcp_usage_total",
  816. help: "tcp connection usage statistic for container",
  817. valueType: prometheus.GaugeValue,
  818. extraLabels: []string{"tcp_state"},
  819. getValues: func(s *info.ContainerStats) metricValues {
  820. return metricValues{
  821. {
  822. value: float64(s.Network.Tcp.Established),
  823. labels: []string{"established"},
  824. timestamp: s.Timestamp,
  825. },
  826. {
  827. value: float64(s.Network.Tcp.SynSent),
  828. labels: []string{"synsent"},
  829. timestamp: s.Timestamp,
  830. },
  831. {
  832. value: float64(s.Network.Tcp.SynRecv),
  833. labels: []string{"synrecv"},
  834. timestamp: s.Timestamp,
  835. },
  836. {
  837. value: float64(s.Network.Tcp.FinWait1),
  838. labels: []string{"finwait1"},
  839. timestamp: s.Timestamp,
  840. },
  841. {
  842. value: float64(s.Network.Tcp.FinWait2),
  843. labels: []string{"finwait2"},
  844. timestamp: s.Timestamp,
  845. },
  846. {
  847. value: float64(s.Network.Tcp.TimeWait),
  848. labels: []string{"timewait"},
  849. timestamp: s.Timestamp,
  850. },
  851. {
  852. value: float64(s.Network.Tcp.Close),
  853. labels: []string{"close"},
  854. timestamp: s.Timestamp,
  855. },
  856. {
  857. value: float64(s.Network.Tcp.CloseWait),
  858. labels: []string{"closewait"},
  859. timestamp: s.Timestamp,
  860. },
  861. {
  862. value: float64(s.Network.Tcp.LastAck),
  863. labels: []string{"lastack"},
  864. timestamp: s.Timestamp,
  865. },
  866. {
  867. value: float64(s.Network.Tcp.Listen),
  868. labels: []string{"listen"},
  869. timestamp: s.Timestamp,
  870. },
  871. {
  872. value: float64(s.Network.Tcp.Closing),
  873. labels: []string{"closing"},
  874. timestamp: s.Timestamp,
  875. },
  876. }
  877. },
  878. },
  879. }...)
  880. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  881. {
  882. name: "container_network_tcp6_usage_total",
  883. help: "tcp6 connection usage statistic for container",
  884. valueType: prometheus.GaugeValue,
  885. extraLabels: []string{"tcp_state"},
  886. getValues: func(s *info.ContainerStats) metricValues {
  887. return metricValues{
  888. {
  889. value: float64(s.Network.Tcp6.Established),
  890. labels: []string{"established"},
  891. timestamp: s.Timestamp,
  892. },
  893. {
  894. value: float64(s.Network.Tcp6.SynSent),
  895. labels: []string{"synsent"},
  896. timestamp: s.Timestamp,
  897. },
  898. {
  899. value: float64(s.Network.Tcp6.SynRecv),
  900. labels: []string{"synrecv"},
  901. timestamp: s.Timestamp,
  902. },
  903. {
  904. value: float64(s.Network.Tcp6.FinWait1),
  905. labels: []string{"finwait1"},
  906. timestamp: s.Timestamp,
  907. },
  908. {
  909. value: float64(s.Network.Tcp6.FinWait2),
  910. labels: []string{"finwait2"},
  911. timestamp: s.Timestamp,
  912. },
  913. {
  914. value: float64(s.Network.Tcp6.TimeWait),
  915. labels: []string{"timewait"},
  916. timestamp: s.Timestamp,
  917. },
  918. {
  919. value: float64(s.Network.Tcp6.Close),
  920. labels: []string{"close"},
  921. timestamp: s.Timestamp,
  922. },
  923. {
  924. value: float64(s.Network.Tcp6.CloseWait),
  925. labels: []string{"closewait"},
  926. timestamp: s.Timestamp,
  927. },
  928. {
  929. value: float64(s.Network.Tcp6.LastAck),
  930. labels: []string{"lastack"},
  931. timestamp: s.Timestamp,
  932. },
  933. {
  934. value: float64(s.Network.Tcp6.Listen),
  935. labels: []string{"listen"},
  936. timestamp: s.Timestamp,
  937. },
  938. {
  939. value: float64(s.Network.Tcp6.Closing),
  940. labels: []string{"closing"},
  941. timestamp: s.Timestamp,
  942. },
  943. }
  944. },
  945. },
  946. }...)
  947. }
  948. if includedMetrics.Has(container.NetworkUdpUsageMetrics) {
  949. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  950. {
  951. name: "container_network_udp6_usage_total",
  952. help: "udp6 connection usage statistic for container",
  953. valueType: prometheus.GaugeValue,
  954. extraLabels: []string{"udp_state"},
  955. getValues: func(s *info.ContainerStats) metricValues {
  956. return metricValues{
  957. {
  958. value: float64(s.Network.Udp6.Listen),
  959. labels: []string{"listen"},
  960. timestamp: s.Timestamp,
  961. },
  962. {
  963. value: float64(s.Network.Udp6.Dropped),
  964. labels: []string{"dropped"},
  965. timestamp: s.Timestamp,
  966. },
  967. {
  968. value: float64(s.Network.Udp6.RxQueued),
  969. labels: []string{"rxqueued"},
  970. timestamp: s.Timestamp,
  971. },
  972. {
  973. value: float64(s.Network.Udp6.TxQueued),
  974. labels: []string{"txqueued"},
  975. timestamp: s.Timestamp,
  976. },
  977. }
  978. },
  979. },
  980. }...)
  981. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  982. {
  983. name: "container_network_udp_usage_total",
  984. help: "udp connection usage statistic for container",
  985. valueType: prometheus.GaugeValue,
  986. extraLabels: []string{"udp_state"},
  987. getValues: func(s *info.ContainerStats) metricValues {
  988. return metricValues{
  989. {
  990. value: float64(s.Network.Udp.Listen),
  991. labels: []string{"listen"},
  992. timestamp: s.Timestamp,
  993. },
  994. {
  995. value: float64(s.Network.Udp.Dropped),
  996. labels: []string{"dropped"},
  997. timestamp: s.Timestamp,
  998. },
  999. {
  1000. value: float64(s.Network.Udp.RxQueued),
  1001. labels: []string{"rxqueued"},
  1002. timestamp: s.Timestamp,
  1003. },
  1004. {
  1005. value: float64(s.Network.Udp.TxQueued),
  1006. labels: []string{"txqueued"},
  1007. timestamp: s.Timestamp,
  1008. },
  1009. }
  1010. },
  1011. },
  1012. }...)
  1013. }
  1014. if includedMetrics.Has(container.ProcessMetrics) {
  1015. c.containerMetrics = append(c.containerMetrics, []containerMetric{
  1016. {
  1017. name: "container_processes",
  1018. help: "Number of processes running inside the container.",
  1019. valueType: prometheus.GaugeValue,
  1020. getValues: func(s *info.ContainerStats) metricValues {
  1021. return metricValues{{value: float64(s.Processes.ProcessCount), timestamp: s.Timestamp}}
  1022. },
  1023. },
  1024. {
  1025. name: "container_file_descriptors",
  1026. help: "Number of open file descriptors for the container.",
  1027. valueType: prometheus.GaugeValue,
  1028. getValues: func(s *info.ContainerStats) metricValues {
  1029. return metricValues{{value: float64(s.Processes.FdCount), timestamp: s.Timestamp}}
  1030. },
  1031. },
  1032. }...)
  1033. }
  1034. return c
  1035. }
  1036. var (
  1037. versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
  1038. machineInfoCoresDesc = prometheus.NewDesc("machine_cpu_cores", "Number of CPU cores on the machine.", nil, nil)
  1039. machineInfoMemoryDesc = prometheus.NewDesc("machine_memory_bytes", "Amount of memory installed on the machine.", nil, nil)
  1040. )
  1041. // Describe describes all the metrics ever exported by cadvisor. It
  1042. // implements prometheus.PrometheusCollector.
  1043. func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
  1044. c.errors.Describe(ch)
  1045. for _, cm := range c.containerMetrics {
  1046. ch <- cm.desc([]string{})
  1047. }
  1048. ch <- versionInfoDesc
  1049. ch <- machineInfoCoresDesc
  1050. ch <- machineInfoMemoryDesc
  1051. }
  1052. // Collect fetches the stats from all containers and delivers them as
  1053. // Prometheus metrics. It implements prometheus.PrometheusCollector.
  1054. func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
  1055. c.errors.Set(0)
  1056. c.collectMachineInfo(ch)
  1057. c.collectVersionInfo(ch)
  1058. c.collectContainersInfo(ch)
  1059. c.errors.Collect(ch)
  1060. }
  1061. const (
  1062. // ContainerLabelPrefix is the prefix added to all container labels.
  1063. ContainerLabelPrefix = "container_label_"
  1064. // ContainerEnvPrefix is the prefix added to all env variable labels.
  1065. ContainerEnvPrefix = "container_env_"
  1066. // LabelID is the name of the id label.
  1067. LabelID = "id"
  1068. // LabelName is the name of the name label.
  1069. LabelName = "name"
  1070. // LabelImage is the name of the image label.
  1071. LabelImage = "image"
  1072. )
  1073. // DefaultContainerLabels implements ContainerLabelsFunc. It exports the
  1074. // container name, first alias, image name as well as all its env and label
  1075. // values.
  1076. func DefaultContainerLabels(container *info.ContainerInfo) map[string]string {
  1077. set := map[string]string{LabelID: container.Name}
  1078. if len(container.Aliases) > 0 {
  1079. set[LabelName] = container.Aliases[0]
  1080. }
  1081. if image := container.Spec.Image; len(image) > 0 {
  1082. set[LabelImage] = image
  1083. }
  1084. for k, v := range container.Spec.Labels {
  1085. set[ContainerLabelPrefix+k] = v
  1086. }
  1087. for k, v := range container.Spec.Envs {
  1088. set[ContainerEnvPrefix+k] = v
  1089. }
  1090. return set
  1091. }
  1092. // BaseContainerLabels returns a ContainerLabelsFunc that exports the container
  1093. // name, first alias, image name as well as white listed label values.
  1094. func BaseContainerLabels(whiteList []string) func(container *info.ContainerInfo) map[string]string {
  1095. whiteListMap := make(map[string]struct{}, len(whiteList))
  1096. for _, k := range whiteList {
  1097. whiteListMap[k] = struct{}{}
  1098. }
  1099. return func(container *info.ContainerInfo) map[string]string {
  1100. set := map[string]string{LabelID: container.Name}
  1101. if len(container.Aliases) > 0 {
  1102. set[LabelName] = container.Aliases[0]
  1103. }
  1104. if image := container.Spec.Image; len(image) > 0 {
  1105. set[LabelImage] = image
  1106. }
  1107. for k, v := range container.Spec.Labels {
  1108. if _, ok := whiteListMap[k]; ok {
  1109. set[ContainerLabelPrefix+k] = v
  1110. }
  1111. }
  1112. return set
  1113. }
  1114. }
  1115. func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) {
  1116. containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
  1117. if err != nil {
  1118. c.errors.Set(1)
  1119. klog.Warningf("Couldn't get containers: %s", err)
  1120. return
  1121. }
  1122. rawLabels := map[string]struct{}{}
  1123. for _, container := range containers {
  1124. for l := range c.containerLabelsFunc(container) {
  1125. rawLabels[l] = struct{}{}
  1126. }
  1127. }
  1128. for _, cont := range containers {
  1129. values := make([]string, 0, len(rawLabels))
  1130. labels := make([]string, 0, len(rawLabels))
  1131. containerLabels := c.containerLabelsFunc(cont)
  1132. for l := range rawLabels {
  1133. labels = append(labels, sanitizeLabelName(l))
  1134. values = append(values, containerLabels[l])
  1135. }
  1136. // Container spec
  1137. desc := prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", labels, nil)
  1138. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.CreationTime.Unix()), values...)
  1139. if cont.Spec.HasCpu {
  1140. desc = prometheus.NewDesc("container_spec_cpu_period", "CPU period of the container.", labels, nil)
  1141. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Period), values...)
  1142. if cont.Spec.Cpu.Quota != 0 {
  1143. desc = prometheus.NewDesc("container_spec_cpu_quota", "CPU quota of the container.", labels, nil)
  1144. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Quota), values...)
  1145. }
  1146. desc := prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", labels, nil)
  1147. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(cont.Spec.Cpu.Limit), values...)
  1148. }
  1149. if cont.Spec.HasMemory {
  1150. desc := prometheus.NewDesc("container_spec_memory_limit_bytes", "Memory limit for the container.", labels, nil)
  1151. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Limit), values...)
  1152. desc = prometheus.NewDesc("container_spec_memory_swap_limit_bytes", "Memory swap limit for the container.", labels, nil)
  1153. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.SwapLimit), values...)
  1154. desc = prometheus.NewDesc("container_spec_memory_reservation_limit_bytes", "Memory reservation limit for the container.", labels, nil)
  1155. ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Reservation), values...)
  1156. }
  1157. // Now for the actual metrics
  1158. if len(cont.Stats) == 0 {
  1159. continue
  1160. }
  1161. stats := cont.Stats[0]
  1162. for _, cm := range c.containerMetrics {
  1163. if cm.condition != nil && !cm.condition(cont.Spec) {
  1164. continue
  1165. }
  1166. desc := cm.desc(labels)
  1167. for _, metricValue := range cm.getValues(stats) {
  1168. ch <- prometheus.NewMetricWithTimestamp(
  1169. metricValue.timestamp,
  1170. prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append(values, metricValue.labels...)...),
  1171. )
  1172. }
  1173. }
  1174. }
  1175. }
  1176. func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
  1177. versionInfo, err := c.infoProvider.GetVersionInfo()
  1178. if err != nil {
  1179. c.errors.Set(1)
  1180. klog.Warningf("Couldn't get version info: %s", err)
  1181. return
  1182. }
  1183. ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion, versionInfo.CadvisorRevision}...)
  1184. }
  1185. func (c *PrometheusCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
  1186. machineInfo, err := c.infoProvider.GetMachineInfo()
  1187. if err != nil {
  1188. c.errors.Set(1)
  1189. klog.Warningf("Couldn't get machine info: %s", err)
  1190. return
  1191. }
  1192. ch <- prometheus.MustNewConstMetric(machineInfoCoresDesc, prometheus.GaugeValue, float64(machineInfo.NumCores))
  1193. ch <- prometheus.MustNewConstMetric(machineInfoMemoryDesc, prometheus.GaugeValue, float64(machineInfo.MemoryCapacity))
  1194. }
  1195. // Size after which we consider memory to be "unlimited". This is not
  1196. // MaxInt64 due to rounding by the kernel.
  1197. const maxMemorySize = uint64(1 << 62)
  1198. func specMemoryValue(v uint64) float64 {
  1199. if v > maxMemorySize {
  1200. return 0
  1201. }
  1202. return float64(v)
  1203. }
  1204. var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
  1205. // sanitizeLabelName replaces anything that doesn't match
  1206. // client_label.LabelNameRE with an underscore.
  1207. func sanitizeLabelName(name string) string {
  1208. return invalidLabelCharRE.ReplaceAllString(name, "_")
  1209. }