prometheus_resource_metrics_test.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package stats
  14. import (
  15. "fmt"
  16. "strings"
  17. "testing"
  18. "time"
  19. "github.com/stretchr/testify/mock"
  20. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  21. "k8s.io/component-base/metrics"
  22. "k8s.io/component-base/metrics/testutil"
  23. statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  24. summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  25. )
  26. // TODO(RainbowMango): The Desc variables and value functions should be shared with source code.
  27. // It can not be shared now because there is a import cycle.
  28. // Consider deprecate endpoint `/resource/v1alpha1` as stability framework could offer guarantee now.
  29. var (
  30. nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
  31. "Cumulative cpu time consumed by the node in core-seconds",
  32. nil,
  33. nil,
  34. metrics.ALPHA,
  35. "")
  36. nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
  37. "Current working set of the node in bytes",
  38. nil,
  39. nil,
  40. metrics.ALPHA,
  41. "")
  42. containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
  43. "Cumulative cpu time consumed by the container in core-seconds",
  44. []string{"container", "pod", "namespace"},
  45. nil,
  46. metrics.ALPHA,
  47. "")
  48. containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
  49. "Current working set of the container in bytes",
  50. []string{"container", "pod", "namespace"},
  51. nil,
  52. metrics.ALPHA,
  53. "")
  54. )
  55. // getNodeCPUMetrics returns CPU utilization of a node.
  56. func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
  57. if s.CPU == nil {
  58. return nil, time.Time{}
  59. }
  60. v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
  61. return &v, s.CPU.Time.Time
  62. }
  63. // getNodeMemoryMetrics returns memory utilization of a node.
  64. func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
  65. if s.Memory == nil {
  66. return nil, time.Time{}
  67. }
  68. v := float64(*s.Memory.WorkingSetBytes)
  69. return &v, s.Memory.Time.Time
  70. }
  71. // getContainerCPUMetrics returns CPU utilization of a container.
  72. func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
  73. if s.CPU == nil {
  74. return nil, time.Time{}
  75. }
  76. v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
  77. return &v, s.CPU.Time.Time
  78. }
  79. // getContainerMemoryMetrics returns memory utilization of a container.
  80. func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
  81. if s.Memory == nil {
  82. return nil, time.Time{}
  83. }
  84. v := float64(*s.Memory.WorkingSetBytes)
  85. return &v, s.Memory.Time.Time
  86. }
  87. // Config is the v1alpha1 resource metrics definition
  88. func Config() ResourceMetricsConfig {
  89. return ResourceMetricsConfig{
  90. NodeMetrics: []NodeResourceMetric{
  91. {
  92. Desc: nodeCPUUsageDesc,
  93. ValueFn: getNodeCPUMetrics,
  94. },
  95. {
  96. Desc: nodeMemoryUsageDesc,
  97. ValueFn: getNodeMemoryMetrics,
  98. },
  99. },
  100. ContainerMetrics: []ContainerResourceMetric{
  101. {
  102. Desc: containerCPUUsageDesc,
  103. ValueFn: getContainerCPUMetrics,
  104. },
  105. {
  106. Desc: containerMemoryUsageDesc,
  107. ValueFn: getContainerMemoryMetrics,
  108. },
  109. },
  110. }
  111. }
  112. type mockSummaryProvider struct {
  113. mock.Mock
  114. }
  115. func (m *mockSummaryProvider) Get(updateStats bool) (*statsapi.Summary, error) {
  116. args := m.Called(updateStats)
  117. return args.Get(0).(*statsapi.Summary), args.Error(1)
  118. }
  119. func (m *mockSummaryProvider) GetCPUAndMemoryStats() (*statsapi.Summary, error) {
  120. args := m.Called()
  121. return args.Get(0).(*statsapi.Summary), args.Error(1)
  122. }
  123. func TestCollectResourceMetrics(t *testing.T) {
  124. testTime := metav1.NewTime(time.Unix(2, 0)) // a static timestamp: 2000
  125. tests := []struct {
  126. name string
  127. config ResourceMetricsConfig
  128. summary *statsapi.Summary
  129. summaryErr error
  130. expectedMetricsNames []string
  131. expectedMetrics string
  132. }{
  133. {
  134. name: "error getting summary",
  135. config: Config(),
  136. summary: nil,
  137. summaryErr: fmt.Errorf("failed to get summary"),
  138. expectedMetricsNames: []string{"scrape_error"},
  139. expectedMetrics: `
  140. # HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
  141. # TYPE scrape_error gauge
  142. scrape_error 1
  143. `,
  144. },
  145. {
  146. name: "arbitrary node metrics",
  147. config: Config(),
  148. summary: &statsapi.Summary{
  149. Node: statsapi.NodeStats{
  150. CPU: &statsapi.CPUStats{
  151. Time: testTime,
  152. UsageCoreNanoSeconds: uint64Ptr(10000000000),
  153. },
  154. Memory: &statsapi.MemoryStats{
  155. Time: testTime,
  156. WorkingSetBytes: uint64Ptr(1000),
  157. },
  158. },
  159. },
  160. summaryErr: nil,
  161. expectedMetricsNames: []string{
  162. "node_cpu_usage_seconds_total",
  163. "node_memory_working_set_bytes",
  164. "scrape_error",
  165. },
  166. expectedMetrics: `
  167. # HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
  168. # TYPE node_cpu_usage_seconds_total gauge
  169. node_cpu_usage_seconds_total 10 2000
  170. # HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
  171. # TYPE node_memory_working_set_bytes gauge
  172. node_memory_working_set_bytes 1000 2000
  173. # HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
  174. # TYPE scrape_error gauge
  175. scrape_error 0
  176. `,
  177. },
  178. {
  179. name: "arbitrary container metrics for different container, pods and namespaces",
  180. config: Config(),
  181. summary: &statsapi.Summary{
  182. Pods: []statsapi.PodStats{
  183. {
  184. PodRef: statsapi.PodReference{
  185. Name: "pod_a",
  186. Namespace: "namespace_a",
  187. },
  188. Containers: []statsapi.ContainerStats{
  189. {
  190. Name: "container_a",
  191. CPU: &statsapi.CPUStats{
  192. Time: testTime,
  193. UsageCoreNanoSeconds: uint64Ptr(10000000000),
  194. },
  195. Memory: &statsapi.MemoryStats{
  196. Time: testTime,
  197. WorkingSetBytes: uint64Ptr(1000),
  198. },
  199. },
  200. {
  201. Name: "container_b",
  202. CPU: &statsapi.CPUStats{
  203. Time: testTime,
  204. UsageCoreNanoSeconds: uint64Ptr(10000000000),
  205. },
  206. Memory: &statsapi.MemoryStats{
  207. Time: testTime,
  208. WorkingSetBytes: uint64Ptr(1000),
  209. },
  210. },
  211. },
  212. },
  213. {
  214. PodRef: statsapi.PodReference{
  215. Name: "pod_b",
  216. Namespace: "namespace_b",
  217. },
  218. Containers: []statsapi.ContainerStats{
  219. {
  220. Name: "container_a",
  221. CPU: &statsapi.CPUStats{
  222. Time: testTime,
  223. UsageCoreNanoSeconds: uint64Ptr(10000000000),
  224. },
  225. Memory: &statsapi.MemoryStats{
  226. Time: testTime,
  227. WorkingSetBytes: uint64Ptr(1000),
  228. },
  229. },
  230. },
  231. },
  232. },
  233. },
  234. summaryErr: nil,
  235. expectedMetricsNames: []string{
  236. "container_cpu_usage_seconds_total",
  237. "container_memory_working_set_bytes",
  238. "scrape_error",
  239. },
  240. expectedMetrics: `
  241. # HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
  242. # TYPE scrape_error gauge
  243. scrape_error 0
  244. # HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
  245. # TYPE container_cpu_usage_seconds_total gauge
  246. container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 2000
  247. container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 2000
  248. container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 2000
  249. # HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
  250. # TYPE container_memory_working_set_bytes gauge
  251. container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 2000
  252. container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 2000
  253. container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000
  254. `,
  255. },
  256. }
  257. for _, test := range tests {
  258. tc := test
  259. t.Run(tc.name, func(t *testing.T) {
  260. provider := &mockSummaryProvider{}
  261. provider.On("GetCPUAndMemoryStats").Return(tc.summary, tc.summaryErr)
  262. collector := NewPrometheusResourceMetricCollector(provider, tc.config)
  263. if err := testutil.CustomCollectAndCompare(collector, strings.NewReader(tc.expectedMetrics), tc.expectedMetricsNames...); err != nil {
  264. t.Fatal(err)
  265. }
  266. })
  267. }
  268. }
  269. func uint64Ptr(u uint64) *uint64 {
  270. return &u
  271. }