metrics.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package metrics
  14. import (
  15. "fmt"
  16. "sync"
  17. "time"
  18. "github.com/prometheus/client_golang/prometheus"
  19. corev1 "k8s.io/api/core/v1"
  20. "k8s.io/apimachinery/pkg/types"
  21. utilfeature "k8s.io/apiserver/pkg/util/feature"
  22. "k8s.io/klog"
  23. "k8s.io/kubernetes/pkg/features"
  24. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  25. )
  26. const (
  27. KubeletSubsystem = "kubelet"
  28. NodeNameKey = "node_name"
  29. NodeLabelKey = "node"
  30. PodWorkerDurationKey = "pod_worker_duration_seconds"
  31. PodStartDurationKey = "pod_start_duration_seconds"
  32. CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
  33. PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
  34. PLEGRelistDurationKey = "pleg_relist_duration_seconds"
  35. PLEGDiscardEventsKey = "pleg_discard_events"
  36. PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
  37. EvictionStatsAgeKey = "eviction_stats_age_seconds"
  38. DeprecatedPodWorkerLatencyKey = "pod_worker_latency_microseconds"
  39. DeprecatedPodStartLatencyKey = "pod_start_latency_microseconds"
  40. DeprecatedCgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
  41. DeprecatedPodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
  42. DeprecatedPLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
  43. DeprecatedPLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
  44. DeprecatedEvictionStatsAgeKey = "eviction_stats_age_microseconds"
  45. VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
  46. VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
  47. VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
  48. VolumeStatsInodesKey = "volume_stats_inodes"
  49. VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
  50. VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
  51. // Metrics keys of remote runtime operations
  52. RuntimeOperationsKey = "runtime_operations_total"
  53. RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
  54. RuntimeOperationsErrorsKey = "runtime_operations_errors_total"
  55. DeprecatedRuntimeOperationsKey = "runtime_operations"
  56. DeprecatedRuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
  57. DeprecatedRuntimeOperationsErrorsKey = "runtime_operations_errors"
  58. // Metrics keys of device plugin operations
  59. DevicePluginRegistrationCountKey = "device_plugin_registration_total"
  60. DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
  61. DeprecatedDevicePluginRegistrationCountKey = "device_plugin_registration_count"
  62. DeprecatedDevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"
  63. // Metric keys for node config
  64. AssignedConfigKey = "node_config_assigned"
  65. ActiveConfigKey = "node_config_active"
  66. LastKnownGoodConfigKey = "node_config_last_known_good"
  67. ConfigErrorKey = "node_config_error"
  68. ConfigSourceLabelKey = "node_config_source"
  69. ConfigSourceLabelValueLocal = "local"
  70. ConfigUIDLabelKey = "node_config_uid"
  71. ConfigResourceVersionLabelKey = "node_config_resource_version"
  72. KubeletConfigKeyLabelKey = "node_config_kubelet_key"
  73. // Metrics keys for RuntimeClass
  74. RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
  75. RunPodSandboxErrorsKey = "run_podsandbox_errors_total"
  76. )
  77. var (
  78. NodeName = prometheus.NewGaugeVec(
  79. prometheus.GaugeOpts{
  80. Subsystem: KubeletSubsystem,
  81. Name: NodeNameKey,
  82. Help: "The node's name. The count is always 1.",
  83. },
  84. []string{NodeLabelKey},
  85. )
  86. ContainersPerPodCount = prometheus.NewHistogram(
  87. prometheus.HistogramOpts{
  88. Subsystem: KubeletSubsystem,
  89. Name: "containers_per_pod_count",
  90. Help: "The number of containers per pod.",
  91. Buckets: prometheus.DefBuckets,
  92. },
  93. )
  94. PodWorkerDuration = prometheus.NewHistogramVec(
  95. prometheus.HistogramOpts{
  96. Subsystem: KubeletSubsystem,
  97. Name: PodWorkerDurationKey,
  98. Help: "Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync",
  99. Buckets: prometheus.DefBuckets,
  100. },
  101. []string{"operation_type"},
  102. )
  103. PodStartDuration = prometheus.NewHistogram(
  104. prometheus.HistogramOpts{
  105. Subsystem: KubeletSubsystem,
  106. Name: PodStartDurationKey,
  107. Help: "Duration in seconds for a single pod to go from pending to running.",
  108. Buckets: prometheus.DefBuckets,
  109. },
  110. )
  111. CgroupManagerDuration = prometheus.NewHistogramVec(
  112. prometheus.HistogramOpts{
  113. Subsystem: KubeletSubsystem,
  114. Name: CgroupManagerOperationsKey,
  115. Help: "Duration in seconds for cgroup manager operations. Broken down by method.",
  116. Buckets: prometheus.DefBuckets,
  117. },
  118. []string{"operation_type"},
  119. )
  120. PodWorkerStartDuration = prometheus.NewHistogram(
  121. prometheus.HistogramOpts{
  122. Subsystem: KubeletSubsystem,
  123. Name: PodWorkerStartDurationKey,
  124. Help: "Duration in seconds from seeing a pod to starting a worker.",
  125. Buckets: prometheus.DefBuckets,
  126. },
  127. )
  128. PLEGRelistDuration = prometheus.NewHistogram(
  129. prometheus.HistogramOpts{
  130. Subsystem: KubeletSubsystem,
  131. Name: PLEGRelistDurationKey,
  132. Help: "Duration in seconds for relisting pods in PLEG.",
  133. Buckets: prometheus.DefBuckets,
  134. },
  135. )
  136. PLEGDiscardEvents = prometheus.NewCounterVec(
  137. prometheus.CounterOpts{
  138. Subsystem: KubeletSubsystem,
  139. Name: PLEGDiscardEventsKey,
  140. Help: "The number of discard events in PLEG.",
  141. },
  142. []string{},
  143. )
  144. PLEGRelistInterval = prometheus.NewHistogram(
  145. prometheus.HistogramOpts{
  146. Subsystem: KubeletSubsystem,
  147. Name: PLEGRelistIntervalKey,
  148. Help: "Interval in seconds between relisting in PLEG.",
  149. Buckets: prometheus.DefBuckets,
  150. },
  151. )
  152. // Metrics of remote runtime operations.
  153. RuntimeOperations = prometheus.NewCounterVec(
  154. prometheus.CounterOpts{
  155. Subsystem: KubeletSubsystem,
  156. Name: RuntimeOperationsKey,
  157. Help: "Cumulative number of runtime operations by operation type.",
  158. },
  159. []string{"operation_type"},
  160. )
  161. RuntimeOperationsDuration = prometheus.NewHistogramVec(
  162. prometheus.HistogramOpts{
  163. Subsystem: KubeletSubsystem,
  164. Name: RuntimeOperationsDurationKey,
  165. Help: "Duration in seconds of runtime operations. Broken down by operation type.",
  166. Buckets: prometheus.DefBuckets,
  167. },
  168. []string{"operation_type"},
  169. )
  170. RuntimeOperationsErrors = prometheus.NewCounterVec(
  171. prometheus.CounterOpts{
  172. Subsystem: KubeletSubsystem,
  173. Name: RuntimeOperationsErrorsKey,
  174. Help: "Cumulative number of runtime operation errors by operation type.",
  175. },
  176. []string{"operation_type"},
  177. )
  178. EvictionStatsAge = prometheus.NewHistogramVec(
  179. prometheus.HistogramOpts{
  180. Subsystem: KubeletSubsystem,
  181. Name: EvictionStatsAgeKey,
  182. Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
  183. Buckets: prometheus.DefBuckets,
  184. },
  185. []string{"eviction_signal"},
  186. )
  187. DevicePluginRegistrationCount = prometheus.NewCounterVec(
  188. prometheus.CounterOpts{
  189. Subsystem: KubeletSubsystem,
  190. Name: DevicePluginRegistrationCountKey,
  191. Help: "Cumulative number of device plugin registrations. Broken down by resource name.",
  192. },
  193. []string{"resource_name"},
  194. )
  195. DevicePluginAllocationDuration = prometheus.NewHistogramVec(
  196. prometheus.HistogramOpts{
  197. Subsystem: KubeletSubsystem,
  198. Name: DevicePluginAllocationDurationKey,
  199. Help: "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.",
  200. Buckets: prometheus.DefBuckets,
  201. },
  202. []string{"resource_name"},
  203. )
  204. DeprecatedPodWorkerLatency = prometheus.NewSummaryVec(
  205. prometheus.SummaryOpts{
  206. Subsystem: KubeletSubsystem,
  207. Name: DeprecatedPodWorkerLatencyKey,
  208. Help: "(Deprecated) Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
  209. },
  210. []string{"operation_type"},
  211. )
  212. DeprecatedPodStartLatency = prometheus.NewSummary(
  213. prometheus.SummaryOpts{
  214. Subsystem: KubeletSubsystem,
  215. Name: DeprecatedPodStartLatencyKey,
  216. Help: "(Deprecated) Latency in microseconds for a single pod to go from pending to running.",
  217. },
  218. )
  219. DeprecatedCgroupManagerLatency = prometheus.NewSummaryVec(
  220. prometheus.SummaryOpts{
  221. Subsystem: KubeletSubsystem,
  222. Name: DeprecatedCgroupManagerOperationsKey,
  223. Help: "(Deprecated) Latency in microseconds for cgroup manager operations. Broken down by method.",
  224. },
  225. []string{"operation_type"},
  226. )
  227. DeprecatedPodWorkerStartLatency = prometheus.NewSummary(
  228. prometheus.SummaryOpts{
  229. Subsystem: KubeletSubsystem,
  230. Name: DeprecatedPodWorkerStartLatencyKey,
  231. Help: "(Deprecated) Latency in microseconds from seeing a pod to starting a worker.",
  232. },
  233. )
  234. DeprecatedPLEGRelistLatency = prometheus.NewSummary(
  235. prometheus.SummaryOpts{
  236. Subsystem: KubeletSubsystem,
  237. Name: DeprecatedPLEGRelistLatencyKey,
  238. Help: "(Deprecated) Latency in microseconds for relisting pods in PLEG.",
  239. },
  240. )
  241. DeprecatedPLEGRelistInterval = prometheus.NewSummary(
  242. prometheus.SummaryOpts{
  243. Subsystem: KubeletSubsystem,
  244. Name: DeprecatedPLEGRelistIntervalKey,
  245. Help: "(Deprecated) Interval in microseconds between relisting in PLEG.",
  246. },
  247. )
  248. DeprecatedRuntimeOperations = prometheus.NewCounterVec(
  249. prometheus.CounterOpts{
  250. Subsystem: KubeletSubsystem,
  251. Name: DeprecatedRuntimeOperationsKey,
  252. Help: "(Deprecated) Cumulative number of runtime operations by operation type.",
  253. },
  254. []string{"operation_type"},
  255. )
  256. DeprecatedRuntimeOperationsLatency = prometheus.NewSummaryVec(
  257. prometheus.SummaryOpts{
  258. Subsystem: KubeletSubsystem,
  259. Name: DeprecatedRuntimeOperationsLatencyKey,
  260. Help: "(Deprecated) Latency in microseconds of runtime operations. Broken down by operation type.",
  261. },
  262. []string{"operation_type"},
  263. )
  264. DeprecatedRuntimeOperationsErrors = prometheus.NewCounterVec(
  265. prometheus.CounterOpts{
  266. Subsystem: KubeletSubsystem,
  267. Name: DeprecatedRuntimeOperationsErrorsKey,
  268. Help: "(Deprecated) Cumulative number of runtime operation errors by operation type.",
  269. },
  270. []string{"operation_type"},
  271. )
  272. DeprecatedEvictionStatsAge = prometheus.NewSummaryVec(
  273. prometheus.SummaryOpts{
  274. Subsystem: KubeletSubsystem,
  275. Name: DeprecatedEvictionStatsAgeKey,
  276. Help: "(Deprecated) Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
  277. },
  278. []string{"eviction_signal"},
  279. )
  280. DeprecatedDevicePluginRegistrationCount = prometheus.NewCounterVec(
  281. prometheus.CounterOpts{
  282. Subsystem: KubeletSubsystem,
  283. Name: DeprecatedDevicePluginRegistrationCountKey,
  284. Help: "(Deprecated) Cumulative number of device plugin registrations. Broken down by resource name.",
  285. },
  286. []string{"resource_name"},
  287. )
  288. DeprecatedDevicePluginAllocationLatency = prometheus.NewSummaryVec(
  289. prometheus.SummaryOpts{
  290. Subsystem: KubeletSubsystem,
  291. Name: DeprecatedDevicePluginAllocationLatencyKey,
  292. Help: "(Deprecated) Latency in microseconds to serve a device plugin Allocation request. Broken down by resource name.",
  293. },
  294. []string{"resource_name"},
  295. )
  296. // Metrics for node config
  297. AssignedConfig = prometheus.NewGaugeVec(
  298. prometheus.GaugeOpts{
  299. Subsystem: KubeletSubsystem,
  300. Name: AssignedConfigKey,
  301. Help: "The node's understanding of intended config. The count is always 1.",
  302. },
  303. []string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
  304. )
  305. ActiveConfig = prometheus.NewGaugeVec(
  306. prometheus.GaugeOpts{
  307. Subsystem: KubeletSubsystem,
  308. Name: ActiveConfigKey,
  309. Help: "The config source the node is actively using. The count is always 1.",
  310. },
  311. []string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
  312. )
  313. LastKnownGoodConfig = prometheus.NewGaugeVec(
  314. prometheus.GaugeOpts{
  315. Subsystem: KubeletSubsystem,
  316. Name: LastKnownGoodConfigKey,
  317. Help: "The config source the node will fall back to when it encounters certain errors. The count is always 1.",
  318. },
  319. []string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
  320. )
  321. ConfigError = prometheus.NewGauge(
  322. prometheus.GaugeOpts{
  323. Subsystem: KubeletSubsystem,
  324. Name: ConfigErrorKey,
  325. Help: "This metric is true (1) if the node is experiencing a configuration-related error, false (0) otherwise.",
  326. },
  327. )
  328. RunPodSandboxDuration = prometheus.NewHistogramVec(
  329. prometheus.HistogramOpts{
  330. Subsystem: KubeletSubsystem,
  331. Name: RunPodSandboxDurationKey,
  332. Help: "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.",
  333. // Use DefBuckets for now, will customize the buckets if necessary.
  334. Buckets: prometheus.DefBuckets,
  335. },
  336. []string{"runtime_handler"},
  337. )
  338. RunPodSandboxErrors = prometheus.NewCounterVec(
  339. prometheus.CounterOpts{
  340. Subsystem: KubeletSubsystem,
  341. Name: RunPodSandboxErrorsKey,
  342. Help: "Cumulative number of the run_podsandbox operation errors by RuntimeClass.",
  343. },
  344. []string{"runtime_handler"},
  345. )
  346. )
  347. var registerMetrics sync.Once
  348. // Register all metrics.
  349. func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheus.Collector) {
  350. // Register the metrics.
  351. registerMetrics.Do(func() {
  352. prometheus.MustRegister(NodeName)
  353. prometheus.MustRegister(PodWorkerDuration)
  354. prometheus.MustRegister(PodStartDuration)
  355. prometheus.MustRegister(CgroupManagerDuration)
  356. prometheus.MustRegister(PodWorkerStartDuration)
  357. prometheus.MustRegister(ContainersPerPodCount)
  358. prometheus.MustRegister(newPodAndContainerCollector(containerCache))
  359. prometheus.MustRegister(PLEGRelistDuration)
  360. prometheus.MustRegister(PLEGDiscardEvents)
  361. prometheus.MustRegister(PLEGRelistInterval)
  362. prometheus.MustRegister(RuntimeOperations)
  363. prometheus.MustRegister(RuntimeOperationsDuration)
  364. prometheus.MustRegister(RuntimeOperationsErrors)
  365. prometheus.MustRegister(EvictionStatsAge)
  366. prometheus.MustRegister(DevicePluginRegistrationCount)
  367. prometheus.MustRegister(DevicePluginAllocationDuration)
  368. prometheus.MustRegister(DeprecatedPodWorkerLatency)
  369. prometheus.MustRegister(DeprecatedPodStartLatency)
  370. prometheus.MustRegister(DeprecatedCgroupManagerLatency)
  371. prometheus.MustRegister(DeprecatedPodWorkerStartLatency)
  372. prometheus.MustRegister(DeprecatedPLEGRelistLatency)
  373. prometheus.MustRegister(DeprecatedPLEGRelistInterval)
  374. prometheus.MustRegister(DeprecatedRuntimeOperations)
  375. prometheus.MustRegister(DeprecatedRuntimeOperationsLatency)
  376. prometheus.MustRegister(DeprecatedRuntimeOperationsErrors)
  377. prometheus.MustRegister(DeprecatedEvictionStatsAge)
  378. prometheus.MustRegister(DeprecatedDevicePluginRegistrationCount)
  379. prometheus.MustRegister(DeprecatedDevicePluginAllocationLatency)
  380. if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
  381. prometheus.MustRegister(AssignedConfig)
  382. prometheus.MustRegister(ActiveConfig)
  383. prometheus.MustRegister(LastKnownGoodConfig)
  384. prometheus.MustRegister(ConfigError)
  385. }
  386. for _, collector := range collectors {
  387. prometheus.MustRegister(collector)
  388. }
  389. })
  390. }
  391. // Gets the time since the specified start in microseconds.
  392. func SinceInMicroseconds(start time.Time) float64 {
  393. return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
  394. }
  395. // SinceInSeconds gets the time since the specified start in seconds.
  396. func SinceInSeconds(start time.Time) float64 {
  397. return time.Since(start).Seconds()
  398. }
  399. func newPodAndContainerCollector(containerCache kubecontainer.RuntimeCache) *podAndContainerCollector {
  400. return &podAndContainerCollector{
  401. containerCache: containerCache,
  402. }
  403. }
  404. // Custom collector for current pod and container counts.
  405. type podAndContainerCollector struct {
  406. // Cache for accessing information about running containers.
  407. containerCache kubecontainer.RuntimeCache
  408. }
  409. // TODO(vmarmol): Split by source?
  410. var (
  411. runningPodCountDesc = prometheus.NewDesc(
  412. prometheus.BuildFQName("", KubeletSubsystem, "running_pod_count"),
  413. "Number of pods currently running",
  414. nil, nil)
  415. runningContainerCountDesc = prometheus.NewDesc(
  416. prometheus.BuildFQName("", KubeletSubsystem, "running_container_count"),
  417. "Number of containers currently running",
  418. nil, nil)
  419. )
  420. func (pc *podAndContainerCollector) Describe(ch chan<- *prometheus.Desc) {
  421. ch <- runningPodCountDesc
  422. ch <- runningContainerCountDesc
  423. }
  424. func (pc *podAndContainerCollector) Collect(ch chan<- prometheus.Metric) {
  425. runningPods, err := pc.containerCache.GetPods()
  426. if err != nil {
  427. klog.Warningf("Failed to get running container information while collecting metrics: %v", err)
  428. return
  429. }
  430. runningContainers := 0
  431. for _, p := range runningPods {
  432. runningContainers += len(p.Containers)
  433. }
  434. ch <- prometheus.MustNewConstMetric(
  435. runningPodCountDesc,
  436. prometheus.GaugeValue,
  437. float64(len(runningPods)))
  438. ch <- prometheus.MustNewConstMetric(
  439. runningContainerCountDesc,
  440. prometheus.GaugeValue,
  441. float64(runningContainers))
  442. }
  443. const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"
  444. func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {
  445. if source == nil {
  446. return map[string]string{
  447. // prometheus requires all of the labels that can be set on the metric
  448. ConfigSourceLabelKey: "local",
  449. ConfigUIDLabelKey: "",
  450. ConfigResourceVersionLabelKey: "",
  451. KubeletConfigKeyLabelKey: "",
  452. }, nil
  453. }
  454. if source.ConfigMap != nil {
  455. return map[string]string{
  456. ConfigSourceLabelKey: fmt.Sprintf(configMapAPIPathFmt, source.ConfigMap.Namespace, source.ConfigMap.Name),
  457. ConfigUIDLabelKey: string(source.ConfigMap.UID),
  458. ConfigResourceVersionLabelKey: source.ConfigMap.ResourceVersion,
  459. KubeletConfigKeyLabelKey: source.ConfigMap.KubeletConfigKey,
  460. }, nil
  461. }
  462. return nil, fmt.Errorf("unrecognized config source type, all source subfields were nil")
  463. }
  464. // track labels across metric updates, so we can delete old label sets and prevent leaks
  465. var assignedConfigLabels map[string]string = map[string]string{}
  466. func SetAssignedConfig(source *corev1.NodeConfigSource) error {
  467. // compute the timeseries labels from the source
  468. labels, err := configLabels(source)
  469. if err != nil {
  470. return err
  471. }
  472. // clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
  473. AssignedConfig.Delete(assignedConfigLabels)
  474. // record the new timeseries
  475. assignedConfigLabels = labels
  476. // expose the new timeseries with a constant count of 1
  477. AssignedConfig.With(assignedConfigLabels).Set(1)
  478. return nil
  479. }
  480. // track labels across metric updates, so we can delete old label sets and prevent leaks
  481. var activeConfigLabels map[string]string = map[string]string{}
  482. func SetActiveConfig(source *corev1.NodeConfigSource) error {
  483. // compute the timeseries labels from the source
  484. labels, err := configLabels(source)
  485. if err != nil {
  486. return err
  487. }
  488. // clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
  489. ActiveConfig.Delete(activeConfigLabels)
  490. // record the new timeseries
  491. activeConfigLabels = labels
  492. // expose the new timeseries with a constant count of 1
  493. ActiveConfig.With(activeConfigLabels).Set(1)
  494. return nil
  495. }
  496. // track labels across metric updates, so we can delete old label sets and prevent leaks
  497. var lastKnownGoodConfigLabels map[string]string = map[string]string{}
  498. func SetLastKnownGoodConfig(source *corev1.NodeConfigSource) error {
  499. // compute the timeseries labels from the source
  500. labels, err := configLabels(source)
  501. if err != nil {
  502. return err
  503. }
  504. // clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
  505. LastKnownGoodConfig.Delete(lastKnownGoodConfigLabels)
  506. // record the new timeseries
  507. lastKnownGoodConfigLabels = labels
  508. // expose the new timeseries with a constant count of 1
  509. LastKnownGoodConfig.With(lastKnownGoodConfigLabels).Set(1)
  510. return nil
  511. }
  512. func SetConfigError(err bool) {
  513. if err {
  514. ConfigError.Set(1)
  515. } else {
  516. ConfigError.Set(0)
  517. }
  518. }
  519. func SetNodeName(name types.NodeName) {
  520. NodeName.WithLabelValues(string(name)).Set(1)
  521. }