metrics.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package metrics
  14. import (
  15. "sync"
  16. "time"
  17. v1 "k8s.io/api/core/v1"
  18. "k8s.io/component-base/metrics"
  19. "k8s.io/component-base/metrics/legacyregistry"
  20. metricutil "k8s.io/kubernetes/pkg/volume/util"
  21. )
  22. const (
  23. // Subsystem names.
  24. pvControllerSubsystem = "pv_collector"
  25. // Metric names.
  26. boundPVKey = "bound_pv_count"
  27. unboundPVKey = "unbound_pv_count"
  28. boundPVCKey = "bound_pvc_count"
  29. unboundPVCKey = "unbound_pvc_count"
  30. // Label names.
  31. namespaceLabel = "namespace"
  32. storageClassLabel = "storage_class"
  33. )
  34. var registerMetrics sync.Once
  35. // PVLister used to list persistent volumes.
  36. type PVLister interface {
  37. List() []interface{}
  38. }
  39. // PVCLister used to list persistent volume claims.
  40. type PVCLister interface {
  41. List() []interface{}
  42. }
  43. // Register all metrics for pv controller.
  44. func Register(pvLister PVLister, pvcLister PVCLister) {
  45. registerMetrics.Do(func() {
  46. legacyregistry.CustomMustRegister(newPVAndPVCCountCollector(pvLister, pvcLister))
  47. legacyregistry.MustRegister(volumeOperationErrorsMetric)
  48. })
  49. }
  50. func newPVAndPVCCountCollector(pvLister PVLister, pvcLister PVCLister) *pvAndPVCCountCollector {
  51. return &pvAndPVCCountCollector{pvLister: pvLister, pvcLister: pvcLister}
  52. }
  53. // Custom collector for current pod and container counts.
  54. type pvAndPVCCountCollector struct {
  55. metrics.BaseStableCollector
  56. // Cache for accessing information about PersistentVolumes.
  57. pvLister PVLister
  58. // Cache for accessing information about PersistentVolumeClaims.
  59. pvcLister PVCLister
  60. }
  61. // Check if our collector implements necessary collector interface
  62. var _ metrics.StableCollector = &pvAndPVCCountCollector{}
  63. var (
  64. boundPVCountDesc = metrics.NewDesc(
  65. metrics.BuildFQName("", pvControllerSubsystem, boundPVKey),
  66. "Gauge measuring number of persistent volume currently bound",
  67. []string{storageClassLabel}, nil,
  68. metrics.ALPHA, "")
  69. unboundPVCountDesc = metrics.NewDesc(
  70. metrics.BuildFQName("", pvControllerSubsystem, unboundPVKey),
  71. "Gauge measuring number of persistent volume currently unbound",
  72. []string{storageClassLabel}, nil,
  73. metrics.ALPHA, "")
  74. boundPVCCountDesc = metrics.NewDesc(
  75. metrics.BuildFQName("", pvControllerSubsystem, boundPVCKey),
  76. "Gauge measuring number of persistent volume claim currently bound",
  77. []string{namespaceLabel}, nil,
  78. metrics.ALPHA, "")
  79. unboundPVCCountDesc = metrics.NewDesc(
  80. metrics.BuildFQName("", pvControllerSubsystem, unboundPVCKey),
  81. "Gauge measuring number of persistent volume claim currently unbound",
  82. []string{namespaceLabel}, nil,
  83. metrics.ALPHA, "")
  84. volumeOperationErrorsMetric = metrics.NewCounterVec(
  85. &metrics.CounterOpts{
  86. Name: "volume_operation_total_errors",
  87. Help: "Total volume operation errors",
  88. StabilityLevel: metrics.ALPHA,
  89. },
  90. []string{"plugin_name", "operation_name"})
  91. )
  92. func (collector *pvAndPVCCountCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
  93. ch <- boundPVCountDesc
  94. ch <- unboundPVCountDesc
  95. ch <- boundPVCCountDesc
  96. ch <- unboundPVCCountDesc
  97. }
  98. func (collector *pvAndPVCCountCollector) CollectWithStability(ch chan<- metrics.Metric) {
  99. collector.pvCollect(ch)
  100. collector.pvcCollect(ch)
  101. }
  102. func (collector *pvAndPVCCountCollector) pvCollect(ch chan<- metrics.Metric) {
  103. boundNumberByStorageClass := make(map[string]int)
  104. unboundNumberByStorageClass := make(map[string]int)
  105. for _, pvObj := range collector.pvLister.List() {
  106. pv, ok := pvObj.(*v1.PersistentVolume)
  107. if !ok {
  108. continue
  109. }
  110. if pv.Status.Phase == v1.VolumeBound {
  111. boundNumberByStorageClass[pv.Spec.StorageClassName]++
  112. } else {
  113. unboundNumberByStorageClass[pv.Spec.StorageClassName]++
  114. }
  115. }
  116. for storageClassName, number := range boundNumberByStorageClass {
  117. ch <- metrics.NewLazyConstMetric(
  118. boundPVCountDesc,
  119. metrics.GaugeValue,
  120. float64(number),
  121. storageClassName)
  122. }
  123. for storageClassName, number := range unboundNumberByStorageClass {
  124. ch <- metrics.NewLazyConstMetric(
  125. unboundPVCountDesc,
  126. metrics.GaugeValue,
  127. float64(number),
  128. storageClassName)
  129. }
  130. }
  131. func (collector *pvAndPVCCountCollector) pvcCollect(ch chan<- metrics.Metric) {
  132. boundNumberByNamespace := make(map[string]int)
  133. unboundNumberByNamespace := make(map[string]int)
  134. for _, pvcObj := range collector.pvcLister.List() {
  135. pvc, ok := pvcObj.(*v1.PersistentVolumeClaim)
  136. if !ok {
  137. continue
  138. }
  139. if pvc.Status.Phase == v1.ClaimBound {
  140. boundNumberByNamespace[pvc.Namespace]++
  141. } else {
  142. unboundNumberByNamespace[pvc.Namespace]++
  143. }
  144. }
  145. for namespace, number := range boundNumberByNamespace {
  146. ch <- metrics.NewLazyConstMetric(
  147. boundPVCCountDesc,
  148. metrics.GaugeValue,
  149. float64(number),
  150. namespace)
  151. }
  152. for namespace, number := range unboundNumberByNamespace {
  153. ch <- metrics.NewLazyConstMetric(
  154. unboundPVCCountDesc,
  155. metrics.GaugeValue,
  156. float64(number),
  157. namespace)
  158. }
  159. }
  160. // RecordVolumeOperationErrorMetric records error count into metric
  161. // volume_operation_total_errors for provisioning/deletion operations
  162. func RecordVolumeOperationErrorMetric(pluginName, opName string) {
  163. if pluginName == "" {
  164. pluginName = "N/A"
  165. }
  166. volumeOperationErrorsMetric.WithLabelValues(pluginName, opName).Inc()
  167. }
  168. // operationTimestamp stores the start time of an operation by a plugin
  169. type operationTimestamp struct {
  170. pluginName string
  171. operation string
  172. startTs time.Time
  173. }
  174. func newOperationTimestamp(pluginName, operationName string) *operationTimestamp {
  175. return &operationTimestamp{
  176. pluginName: pluginName,
  177. operation: operationName,
  178. startTs: time.Now(),
  179. }
  180. }
  181. // OperationStartTimeCache concurrent safe cache for operation start timestamps
  182. type OperationStartTimeCache struct {
  183. cache sync.Map // [string]operationTimestamp
  184. }
  185. // NewOperationStartTimeCache creates a operation timestamp cache
  186. func NewOperationStartTimeCache() OperationStartTimeCache {
  187. return OperationStartTimeCache{
  188. cache: sync.Map{}, // [string]operationTimestamp {}
  189. }
  190. }
  191. // AddIfNotExist returns directly if there exists an entry with the key. Otherwise, it
  192. // creates a new operation timestamp using operationName, pluginName, and current timestamp
  193. // and stores the operation timestamp with the key
  194. func (c *OperationStartTimeCache) AddIfNotExist(key, pluginName, operationName string) {
  195. ts := newOperationTimestamp(pluginName, operationName)
  196. c.cache.LoadOrStore(key, ts)
  197. }
  198. // Delete deletes a value for a key.
  199. func (c *OperationStartTimeCache) Delete(key string) {
  200. c.cache.Delete(key)
  201. }
  202. // Has returns a bool value indicates the existence of a key in the cache
  203. func (c *OperationStartTimeCache) Has(key string) bool {
  204. _, exists := c.cache.Load(key)
  205. return exists
  206. }
  207. // RecordMetric records either an error count metric or a latency metric if there
  208. // exists a start timestamp entry in the cache. For a successful operation, i.e.,
  209. // err == nil, the corresponding timestamp entry will be removed from cache
  210. func RecordMetric(key string, c *OperationStartTimeCache, err error) {
  211. obj, exists := c.cache.Load(key)
  212. if !exists {
  213. return
  214. }
  215. ts, ok := obj.(*operationTimestamp)
  216. if !ok {
  217. return
  218. }
  219. if err != nil {
  220. RecordVolumeOperationErrorMetric(ts.pluginName, ts.operation)
  221. } else {
  222. timeTaken := time.Since(ts.startTs).Seconds()
  223. metricutil.RecordOperationLatencyMetric(ts.pluginName, ts.operation, timeTaken)
  224. // end of this operation, remove the timestamp entry from cache
  225. c.Delete(key)
  226. }
  227. }