summary.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. // Copyright 2015 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Maintains the summary of aggregated minute, hour, and day stats.
  15. // For a container running for more than a day, amount of tracked data can go up to
  16. // 40 KB when cpu and memory are tracked. We'll start by enabling collection for the
  17. // node, followed by docker, and then all containers as we understand the usage pattern
  18. // better
  19. // TODO(rjnagal): Optimize the size if we start running it for every container.
  20. package summary
  21. import (
  22. "fmt"
  23. "sync"
  24. "time"
  25. "github.com/google/cadvisor/info/v1"
  26. info "github.com/google/cadvisor/info/v2"
  27. )
  28. // Usage fields we track for generating percentiles.
  29. type secondSample struct {
  30. Timestamp time.Time // time when the sample was recorded.
  31. Cpu uint64 // cpu usage
  32. Memory uint64 // memory usage
  33. }
  34. type availableResources struct {
  35. Cpu bool
  36. Memory bool
  37. }
  38. type StatsSummary struct {
  39. // Resources being tracked for this container.
  40. available availableResources
  41. // list of second samples. The list is cleared when a new minute samples is generated.
  42. secondSamples []*secondSample
  43. // minute percentiles. We track 24 * 60 maximum samples.
  44. minuteSamples *SamplesBuffer
  45. // latest derived instant, minute, hour, and day stats. Instant sample updated every second.
  46. // Others updated every minute.
  47. derivedStats info.DerivedStats // Guarded by dataLock.
  48. dataLock sync.RWMutex
  49. }
  50. // Adds a new seconds sample.
  51. // If enough seconds samples are collected, a minute sample is generated and derived
  52. // stats are updated.
  53. func (s *StatsSummary) AddSample(stat v1.ContainerStats) error {
  54. sample := secondSample{}
  55. sample.Timestamp = stat.Timestamp
  56. if s.available.Cpu {
  57. sample.Cpu = stat.Cpu.Usage.Total
  58. }
  59. if s.available.Memory {
  60. sample.Memory = stat.Memory.WorkingSet
  61. }
  62. s.secondSamples = append(s.secondSamples, &sample)
  63. s.updateLatestUsage()
  64. // TODO(jnagal): Use 'available' to avoid unnecessary computation.
  65. numSamples := len(s.secondSamples)
  66. elapsed := time.Nanosecond
  67. if numSamples > 1 {
  68. start := s.secondSamples[0].Timestamp
  69. end := s.secondSamples[numSamples-1].Timestamp
  70. elapsed = end.Sub(start)
  71. }
  72. if elapsed > 60*time.Second {
  73. // Make a minute sample. This works with dynamic housekeeping as long
  74. // as we keep max dynamic houskeeping period close to a minute.
  75. minuteSample := GetMinutePercentiles(s.secondSamples)
  76. // Clear seconds samples. Keep the latest sample for continuity.
  77. // Copying and resizing helps avoid slice re-allocation.
  78. s.secondSamples[0] = s.secondSamples[numSamples-1]
  79. s.secondSamples = s.secondSamples[:1]
  80. s.minuteSamples.Add(minuteSample)
  81. err := s.updateDerivedStats()
  82. if err != nil {
  83. return err
  84. }
  85. }
  86. return nil
  87. }
  88. func (s *StatsSummary) updateLatestUsage() {
  89. usage := info.InstantUsage{}
  90. numStats := len(s.secondSamples)
  91. if numStats < 1 {
  92. return
  93. }
  94. latest := s.secondSamples[numStats-1]
  95. usage.Memory = latest.Memory
  96. if numStats > 1 {
  97. previous := s.secondSamples[numStats-2]
  98. cpu, err := getCpuRate(*latest, *previous)
  99. if err == nil {
  100. usage.Cpu = cpu
  101. }
  102. }
  103. s.dataLock.Lock()
  104. defer s.dataLock.Unlock()
  105. s.derivedStats.LatestUsage = usage
  106. s.derivedStats.Timestamp = latest.Timestamp
  107. return
  108. }
  109. // Generate new derived stats based on current minute stats samples.
  110. func (s *StatsSummary) updateDerivedStats() error {
  111. derived := info.DerivedStats{}
  112. derived.Timestamp = time.Now()
  113. minuteSamples := s.minuteSamples.RecentStats(1)
  114. if len(minuteSamples) != 1 {
  115. return fmt.Errorf("failed to retrieve minute stats")
  116. }
  117. derived.MinuteUsage = *minuteSamples[0]
  118. hourUsage, err := s.getDerivedUsage(60)
  119. if err != nil {
  120. return fmt.Errorf("failed to compute hour stats: %v", err)
  121. }
  122. dayUsage, err := s.getDerivedUsage(60 * 24)
  123. if err != nil {
  124. return fmt.Errorf("failed to compute day usage: %v", err)
  125. }
  126. derived.HourUsage = hourUsage
  127. derived.DayUsage = dayUsage
  128. s.dataLock.Lock()
  129. defer s.dataLock.Unlock()
  130. derived.LatestUsage = s.derivedStats.LatestUsage
  131. s.derivedStats = derived
  132. return nil
  133. }
  134. // helper method to get hour and daily derived stats
  135. func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
  136. if n < 1 {
  137. return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n)
  138. }
  139. samples := s.minuteSamples.RecentStats(n)
  140. numSamples := len(samples)
  141. if numSamples < 1 {
  142. return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats.")
  143. }
  144. // We generate derived stats even with partial data.
  145. usage := GetDerivedPercentiles(samples)
  146. // Assumes we have equally placed minute samples.
  147. usage.PercentComplete = int32(numSamples * 100 / n)
  148. return usage, nil
  149. }
  150. // Return the latest calculated derived stats.
  151. func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) {
  152. s.dataLock.RLock()
  153. defer s.dataLock.RUnlock()
  154. return s.derivedStats, nil
  155. }
  156. func New(spec v1.ContainerSpec) (*StatsSummary, error) {
  157. summary := StatsSummary{}
  158. if spec.HasCpu {
  159. summary.available.Cpu = true
  160. }
  161. if spec.HasMemory {
  162. summary.available.Memory = true
  163. }
  164. if !summary.available.Cpu && !summary.available.Memory {
  165. return nil, fmt.Errorf("none of the resources are being tracked.")
  166. }
  167. summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
  168. return &summary, nil
  169. }