resource_metrics.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package collectors
  14. import (
  15. "time"
  16. "k8s.io/component-base/metrics"
  17. "k8s.io/klog"
  18. summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  19. "k8s.io/kubernetes/pkg/kubelet/server/stats"
  20. )
  21. var (
  22. nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds",
  23. "Cumulative cpu time consumed by the node in core-seconds",
  24. nil,
  25. nil,
  26. metrics.ALPHA,
  27. "")
  28. nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
  29. "Current working set of the node in bytes",
  30. nil,
  31. nil,
  32. metrics.ALPHA,
  33. "")
  34. containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds",
  35. "Cumulative cpu time consumed by the container in core-seconds",
  36. []string{"container", "pod", "namespace"},
  37. nil,
  38. metrics.ALPHA,
  39. "")
  40. containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
  41. "Current working set of the container in bytes",
  42. []string{"container", "pod", "namespace"},
  43. nil,
  44. metrics.ALPHA,
  45. "")
  46. resouceScrapeResultDesc = metrics.NewDesc("scrape_error",
  47. "1 if there was an error while getting container metrics, 0 otherwise",
  48. nil,
  49. nil,
  50. metrics.ALPHA,
  51. "")
  52. )
  53. // NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics
  54. func NewResourceMetricsCollector(provider stats.SummaryProvider) metrics.StableCollector {
  55. return &resourceMetricsCollector{
  56. provider: provider,
  57. }
  58. }
  59. type resourceMetricsCollector struct {
  60. metrics.BaseStableCollector
  61. provider stats.SummaryProvider
  62. }
  63. // Check if resourceMetricsCollector implements necessary interface
  64. var _ metrics.StableCollector = &resourceMetricsCollector{}
  65. // DescribeWithStability implements metrics.StableCollector
  66. func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
  67. ch <- nodeCPUUsageDesc
  68. ch <- nodeMemoryUsageDesc
  69. ch <- containerCPUUsageDesc
  70. ch <- containerMemoryUsageDesc
  71. ch <- resouceScrapeResultDesc
  72. }
  73. // CollectWithStability implements metrics.StableCollector
  74. // Since new containers are frequently created and removed, using the Gauge would
  75. // leak metric collectors for containers or pods that no longer exist. Instead, implement
  76. // custom collector in a way that only collects metrics for active containers.
  77. func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) {
  78. var errorCount float64
  79. defer func() {
  80. ch <- metrics.NewLazyConstMetric(resouceScrapeResultDesc, metrics.GaugeValue, errorCount)
  81. }()
  82. statsSummary, err := rc.provider.GetCPUAndMemoryStats()
  83. if err != nil {
  84. errorCount = 1
  85. klog.Warningf("Error getting summary for resourceMetric prometheus endpoint: %v", err)
  86. return
  87. }
  88. rc.collectNodeCPUMetrics(ch, statsSummary.Node)
  89. rc.collectNodeMemoryMetrics(ch, statsSummary.Node)
  90. for _, pod := range statsSummary.Pods {
  91. for _, container := range pod.Containers {
  92. rc.collectContainerCPUMetrics(ch, pod, container)
  93. rc.collectContainerMemoryMetrics(ch, pod, container)
  94. }
  95. }
  96. }
  97. func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
  98. if s.CPU == nil {
  99. return
  100. }
  101. ch <- metrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
  102. metrics.NewLazyConstMetric(nodeCPUUsageDesc, metrics.GaugeValue, float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second)))
  103. }
  104. func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
  105. if s.Memory == nil {
  106. return
  107. }
  108. ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
  109. metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
  110. }
  111. func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
  112. if s.CPU == nil {
  113. return
  114. }
  115. ch <- metrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
  116. metrics.NewLazyConstMetric(containerCPUUsageDesc, metrics.GaugeValue,
  117. float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
  118. }
  119. func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
  120. if s.Memory == nil {
  121. return
  122. }
  123. ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
  124. metrics.NewLazyConstMetric(containerMemoryUsageDesc, metrics.GaugeValue,
  125. float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
  126. }