container.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package manager
  15. import (
  16. "flag"
  17. "fmt"
  18. "io/ioutil"
  19. "math"
  20. "math/rand"
  21. "os/exec"
  22. "path"
  23. "regexp"
  24. "sort"
  25. "strconv"
  26. "strings"
  27. "sync"
  28. "time"
  29. "github.com/google/cadvisor/accelerators"
  30. "github.com/google/cadvisor/cache/memory"
  31. "github.com/google/cadvisor/collector"
  32. "github.com/google/cadvisor/container"
  33. info "github.com/google/cadvisor/info/v1"
  34. "github.com/google/cadvisor/info/v2"
  35. "github.com/google/cadvisor/summary"
  36. "github.com/google/cadvisor/utils/cpuload"
  37. units "github.com/docker/go-units"
  38. "k8s.io/klog"
  39. "k8s.io/utils/clock"
  40. )
  41. // Housekeeping interval.
  42. var enableLoadReader = flag.Bool("enable_load_reader", false, "Whether to enable cpu load reader")
  43. var HousekeepingInterval = flag.Duration("housekeeping_interval", 1*time.Second, "Interval between container housekeepings")
  44. // cgroup type chosen to fetch the cgroup path of a process.
  45. // Memory has been chosen, as it is one of the default cgroups that is enabled for most containers.
  46. var cgroupPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`)
  47. type containerInfo struct {
  48. info.ContainerReference
  49. Subcontainers []info.ContainerReference
  50. Spec info.ContainerSpec
  51. }
  52. type containerData struct {
  53. handler container.ContainerHandler
  54. info containerInfo
  55. memoryCache *memory.InMemoryCache
  56. lock sync.Mutex
  57. loadReader cpuload.CpuLoadReader
  58. summaryReader *summary.StatsSummary
  59. loadAvg float64 // smoothed load average seen so far.
  60. housekeepingInterval time.Duration
  61. maxHousekeepingInterval time.Duration
  62. allowDynamicHousekeeping bool
  63. infoLastUpdatedTime time.Time
  64. statsLastUpdatedTime time.Time
  65. lastErrorTime time.Time
  66. // used to track time
  67. clock clock.Clock
  68. // Decay value used for load average smoothing. Interval length of 10 seconds is used.
  69. loadDecay float64
  70. // Whether to log the usage of this container when it is updated.
  71. logUsage bool
  72. // Tells the container to stop.
  73. stop chan bool
  74. // Tells the container to immediately collect stats
  75. onDemandChan chan chan struct{}
  76. // Runs custom metric collectors.
  77. collectorManager collector.CollectorManager
  78. // nvidiaCollector updates stats for Nvidia GPUs attached to the container.
  79. nvidiaCollector accelerators.AcceleratorCollector
  80. }
  81. // jitter returns a time.Duration between duration and duration + maxFactor * duration,
  82. // to allow clients to avoid converging on periodic behavior. If maxFactor is 0.0, a
  83. // suggested default value will be chosen.
  84. func jitter(duration time.Duration, maxFactor float64) time.Duration {
  85. if maxFactor <= 0.0 {
  86. maxFactor = 1.0
  87. }
  88. wait := duration + time.Duration(rand.Float64()*maxFactor*float64(duration))
  89. return wait
  90. }
  91. func (c *containerData) Start() error {
  92. go c.housekeeping()
  93. return nil
  94. }
  95. func (c *containerData) Stop() error {
  96. err := c.memoryCache.RemoveContainer(c.info.Name)
  97. if err != nil {
  98. return err
  99. }
  100. c.stop <- true
  101. return nil
  102. }
  103. func (c *containerData) allowErrorLogging() bool {
  104. if c.clock.Since(c.lastErrorTime) > time.Minute {
  105. c.lastErrorTime = c.clock.Now()
  106. return true
  107. }
  108. return false
  109. }
  110. // OnDemandHousekeeping performs housekeeping on the container and blocks until it has completed.
  111. // It is designed to be used in conjunction with periodic housekeeping, and will cause the timer for
  112. // periodic housekeeping to reset. This should be used sparingly, as calling OnDemandHousekeeping frequently
  113. // can have serious performance costs.
  114. func (c *containerData) OnDemandHousekeeping(maxAge time.Duration) {
  115. if c.clock.Since(c.statsLastUpdatedTime) > maxAge {
  116. housekeepingFinishedChan := make(chan struct{})
  117. c.onDemandChan <- housekeepingFinishedChan
  118. select {
  119. case <-c.stop:
  120. case <-housekeepingFinishedChan:
  121. }
  122. }
  123. }
  124. // notifyOnDemand notifies all calls to OnDemandHousekeeping that housekeeping is finished
  125. func (c *containerData) notifyOnDemand() {
  126. for {
  127. select {
  128. case finishedChan := <-c.onDemandChan:
  129. close(finishedChan)
  130. default:
  131. return
  132. }
  133. }
  134. }
  135. func (c *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) {
  136. // Get spec and subcontainers.
  137. if c.clock.Since(c.infoLastUpdatedTime) > 5*time.Second {
  138. err := c.updateSpec()
  139. if err != nil {
  140. return nil, err
  141. }
  142. if shouldUpdateSubcontainers {
  143. err = c.updateSubcontainers()
  144. if err != nil {
  145. return nil, err
  146. }
  147. }
  148. c.infoLastUpdatedTime = c.clock.Now()
  149. }
  150. // Make a copy of the info for the user.
  151. c.lock.Lock()
  152. defer c.lock.Unlock()
  153. return &c.info, nil
  154. }
  155. func (c *containerData) DerivedStats() (v2.DerivedStats, error) {
  156. if c.summaryReader == nil {
  157. return v2.DerivedStats{}, fmt.Errorf("derived stats not enabled for container %q", c.info.Name)
  158. }
  159. return c.summaryReader.DerivedStats()
  160. }
  161. func (c *containerData) getCgroupPath(cgroups string) (string, error) {
  162. if cgroups == "-" {
  163. return "/", nil
  164. }
  165. if strings.HasPrefix(cgroups, "0::") {
  166. return cgroups[3:], nil
  167. }
  168. matches := cgroupPathRegExp.FindSubmatch([]byte(cgroups))
  169. if len(matches) != 2 {
  170. klog.V(3).Infof("failed to get memory cgroup path from %q", cgroups)
  171. // return root in case of failures - memory hierarchy might not be enabled.
  172. return "/", nil
  173. }
  174. return string(matches[1]), nil
  175. }
  176. // Returns contents of a file inside the container root.
  177. // Takes in a path relative to container root.
  178. func (c *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte, error) {
  179. pids, err := c.getContainerPids(inHostNamespace)
  180. if err != nil {
  181. return nil, err
  182. }
  183. // TODO(rjnagal): Optimize by just reading container's cgroup.proc file when in host namespace.
  184. rootfs := "/"
  185. if !inHostNamespace {
  186. rootfs = "/rootfs"
  187. }
  188. for _, pid := range pids {
  189. filePath := path.Join(rootfs, "/proc", pid, "/root", filepath)
  190. klog.V(3).Infof("Trying path %q", filePath)
  191. data, err := ioutil.ReadFile(filePath)
  192. if err == nil {
  193. return data, err
  194. }
  195. }
  196. // No process paths could be found. Declare config non-existent.
  197. return nil, fmt.Errorf("file %q does not exist.", filepath)
  198. }
  199. // Return output for ps command in host /proc with specified format
  200. func (c *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte, error) {
  201. args := []string{}
  202. command := "ps"
  203. if !inHostNamespace {
  204. command = "/usr/sbin/chroot"
  205. args = append(args, "/rootfs", "ps")
  206. }
  207. args = append(args, "-e", "-o", format)
  208. out, err := exec.Command(command, args...).Output()
  209. if err != nil {
  210. return nil, fmt.Errorf("failed to execute %q command: %v", command, err)
  211. }
  212. return out, err
  213. }
  214. // Get pids of processes in this container.
  215. // A slightly lighterweight call than GetProcessList if other details are not required.
  216. func (c *containerData) getContainerPids(inHostNamespace bool) ([]string, error) {
  217. format := "pid,cgroup"
  218. out, err := c.getPsOutput(inHostNamespace, format)
  219. if err != nil {
  220. return nil, err
  221. }
  222. expectedFields := 2
  223. lines := strings.Split(string(out), "\n")
  224. pids := []string{}
  225. for _, line := range lines[1:] {
  226. if len(line) == 0 {
  227. continue
  228. }
  229. fields := strings.Fields(line)
  230. if len(fields) < expectedFields {
  231. return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
  232. }
  233. pid := fields[0]
  234. cgroup, err := c.getCgroupPath(fields[1])
  235. if err != nil {
  236. return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[1], err)
  237. }
  238. if c.info.Name == cgroup {
  239. pids = append(pids, pid)
  240. }
  241. }
  242. return pids, nil
  243. }
  244. func (c *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) {
  245. // report all processes for root.
  246. isRoot := c.info.Name == "/"
  247. rootfs := "/"
  248. if !inHostNamespace {
  249. rootfs = "/rootfs"
  250. }
  251. format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,cgroup"
  252. out, err := c.getPsOutput(inHostNamespace, format)
  253. if err != nil {
  254. return nil, err
  255. }
  256. expectedFields := 12
  257. processes := []v2.ProcessInfo{}
  258. lines := strings.Split(string(out), "\n")
  259. for _, line := range lines[1:] {
  260. if len(line) == 0 {
  261. continue
  262. }
  263. fields := strings.Fields(line)
  264. if len(fields) < expectedFields {
  265. return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
  266. }
  267. pid, err := strconv.Atoi(fields[1])
  268. if err != nil {
  269. return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
  270. }
  271. ppid, err := strconv.Atoi(fields[2])
  272. if err != nil {
  273. return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err)
  274. }
  275. percentCpu, err := strconv.ParseFloat(fields[4], 32)
  276. if err != nil {
  277. return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err)
  278. }
  279. percentMem, err := strconv.ParseFloat(fields[5], 32)
  280. if err != nil {
  281. return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err)
  282. }
  283. rss, err := strconv.ParseUint(fields[6], 0, 64)
  284. if err != nil {
  285. return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err)
  286. }
  287. // convert to bytes
  288. rss *= 1024
  289. vs, err := strconv.ParseUint(fields[7], 0, 64)
  290. if err != nil {
  291. return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err)
  292. }
  293. // convert to bytes
  294. vs *= 1024
  295. cgroup, err := c.getCgroupPath(fields[11])
  296. if err != nil {
  297. return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[11], err)
  298. }
  299. // Remove the ps command we just ran from cadvisor container.
  300. // Not necessary, but makes the cadvisor page look cleaner.
  301. if !inHostNamespace && cadvisorContainer == cgroup && fields[10] == "ps" {
  302. continue
  303. }
  304. var cgroupPath string
  305. if isRoot {
  306. cgroupPath = cgroup
  307. }
  308. var fdCount int
  309. dirPath := path.Join(rootfs, "/proc", strconv.Itoa(pid), "fd")
  310. fds, err := ioutil.ReadDir(dirPath)
  311. if err != nil {
  312. klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err)
  313. continue
  314. }
  315. fdCount = len(fds)
  316. if isRoot || c.info.Name == cgroup {
  317. processes = append(processes, v2.ProcessInfo{
  318. User: fields[0],
  319. Pid: pid,
  320. Ppid: ppid,
  321. StartTime: fields[3],
  322. PercentCpu: float32(percentCpu),
  323. PercentMemory: float32(percentMem),
  324. RSS: rss,
  325. VirtualSize: vs,
  326. Status: fields[8],
  327. RunningTime: fields[9],
  328. Cmd: fields[10],
  329. CgroupPath: cgroupPath,
  330. FdCount: fdCount,
  331. })
  332. }
  333. }
  334. return processes, nil
  335. }
  336. func newContainerData(containerName string, memoryCache *memory.InMemoryCache, handler container.ContainerHandler, logUsage bool, collectorManager collector.CollectorManager, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, clock clock.Clock) (*containerData, error) {
  337. if memoryCache == nil {
  338. return nil, fmt.Errorf("nil memory storage")
  339. }
  340. if handler == nil {
  341. return nil, fmt.Errorf("nil container handler")
  342. }
  343. ref, err := handler.ContainerReference()
  344. if err != nil {
  345. return nil, err
  346. }
  347. cont := &containerData{
  348. handler: handler,
  349. memoryCache: memoryCache,
  350. housekeepingInterval: *HousekeepingInterval,
  351. maxHousekeepingInterval: maxHousekeepingInterval,
  352. allowDynamicHousekeeping: allowDynamicHousekeeping,
  353. logUsage: logUsage,
  354. loadAvg: -1.0, // negative value indicates uninitialized.
  355. stop: make(chan bool, 1),
  356. collectorManager: collectorManager,
  357. onDemandChan: make(chan chan struct{}, 100),
  358. clock: clock,
  359. }
  360. cont.info.ContainerReference = ref
  361. cont.loadDecay = math.Exp(float64(-cont.housekeepingInterval.Seconds() / 10))
  362. if *enableLoadReader {
  363. // Create cpu load reader.
  364. loadReader, err := cpuload.New()
  365. if err != nil {
  366. klog.Warningf("Could not initialize cpu load reader for %q: %s", ref.Name, err)
  367. } else {
  368. cont.loadReader = loadReader
  369. }
  370. }
  371. err = cont.updateSpec()
  372. if err != nil {
  373. return nil, err
  374. }
  375. cont.summaryReader, err = summary.New(cont.info.Spec)
  376. if err != nil {
  377. cont.summaryReader = nil
  378. klog.Warningf("Failed to create summary reader for %q: %v", ref.Name, err)
  379. }
  380. return cont, nil
  381. }
  382. // Determine when the next housekeeping should occur.
  383. func (self *containerData) nextHousekeepingInterval() time.Duration {
  384. if self.allowDynamicHousekeeping {
  385. var empty time.Time
  386. stats, err := self.memoryCache.RecentStats(self.info.Name, empty, empty, 2)
  387. if err != nil {
  388. if self.allowErrorLogging() {
  389. klog.Warningf("Failed to get RecentStats(%q) while determining the next housekeeping: %v", self.info.Name, err)
  390. }
  391. } else if len(stats) == 2 {
  392. // TODO(vishnuk): Use no processes as a signal.
  393. // Raise the interval if usage hasn't changed in the last housekeeping.
  394. if stats[0].StatsEq(stats[1]) && (self.housekeepingInterval < self.maxHousekeepingInterval) {
  395. self.housekeepingInterval *= 2
  396. if self.housekeepingInterval > self.maxHousekeepingInterval {
  397. self.housekeepingInterval = self.maxHousekeepingInterval
  398. }
  399. } else if self.housekeepingInterval != *HousekeepingInterval {
  400. // Lower interval back to the baseline.
  401. self.housekeepingInterval = *HousekeepingInterval
  402. }
  403. }
  404. }
  405. return jitter(self.housekeepingInterval, 1.0)
  406. }
  407. // TODO(vmarmol): Implement stats collecting as a custom collector.
  408. func (c *containerData) housekeeping() {
  409. // Start any background goroutines - must be cleaned up in c.handler.Cleanup().
  410. c.handler.Start()
  411. defer c.handler.Cleanup()
  412. // Initialize cpuload reader - must be cleaned up in c.loadReader.Stop()
  413. if c.loadReader != nil {
  414. err := c.loadReader.Start()
  415. if err != nil {
  416. klog.Warningf("Could not start cpu load stat collector for %q: %s", c.info.Name, err)
  417. }
  418. defer c.loadReader.Stop()
  419. }
  420. // Long housekeeping is either 100ms or half of the housekeeping interval.
  421. longHousekeeping := 100 * time.Millisecond
  422. if *HousekeepingInterval/2 < longHousekeeping {
  423. longHousekeeping = *HousekeepingInterval / 2
  424. }
  425. // Housekeep every second.
  426. klog.V(3).Infof("Start housekeeping for container %q\n", c.info.Name)
  427. houseKeepingTimer := c.clock.NewTimer(0 * time.Second)
  428. defer houseKeepingTimer.Stop()
  429. for {
  430. if !c.housekeepingTick(houseKeepingTimer.C(), longHousekeeping) {
  431. return
  432. }
  433. // Stop and drain the timer so that it is safe to reset it
  434. if !houseKeepingTimer.Stop() {
  435. select {
  436. case <-houseKeepingTimer.C():
  437. default:
  438. }
  439. }
  440. // Log usage if asked to do so.
  441. if c.logUsage {
  442. const numSamples = 60
  443. var empty time.Time
  444. stats, err := c.memoryCache.RecentStats(c.info.Name, empty, empty, numSamples)
  445. if err != nil {
  446. if c.allowErrorLogging() {
  447. klog.Warningf("[%s] Failed to get recent stats for logging usage: %v", c.info.Name, err)
  448. }
  449. } else if len(stats) < numSamples {
  450. // Ignore, not enough stats yet.
  451. } else {
  452. usageCpuNs := uint64(0)
  453. for i := range stats {
  454. if i > 0 {
  455. usageCpuNs += (stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total)
  456. }
  457. }
  458. usageMemory := stats[numSamples-1].Memory.Usage
  459. instantUsageInCores := float64(stats[numSamples-1].Cpu.Usage.Total-stats[numSamples-2].Cpu.Usage.Total) / float64(stats[numSamples-1].Timestamp.Sub(stats[numSamples-2].Timestamp).Nanoseconds())
  460. usageInCores := float64(usageCpuNs) / float64(stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds())
  461. usageInHuman := units.HumanSize(float64(usageMemory))
  462. // Don't set verbosity since this is already protected by the logUsage flag.
  463. klog.Infof("[%s] %.3f cores (average: %.3f cores), %s of memory", c.info.Name, instantUsageInCores, usageInCores, usageInHuman)
  464. }
  465. }
  466. houseKeepingTimer.Reset(c.nextHousekeepingInterval())
  467. }
  468. }
  469. func (c *containerData) housekeepingTick(timer <-chan time.Time, longHousekeeping time.Duration) bool {
  470. select {
  471. case <-c.stop:
  472. // Stop housekeeping when signaled.
  473. return false
  474. case finishedChan := <-c.onDemandChan:
  475. // notify the calling function once housekeeping has completed
  476. defer close(finishedChan)
  477. case <-timer:
  478. }
  479. start := c.clock.Now()
  480. err := c.updateStats()
  481. if err != nil {
  482. if c.allowErrorLogging() {
  483. klog.Warningf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
  484. }
  485. }
  486. // Log if housekeeping took too long.
  487. duration := c.clock.Since(start)
  488. if duration >= longHousekeeping {
  489. klog.V(3).Infof("[%s] Housekeeping took %s", c.info.Name, duration)
  490. }
  491. c.notifyOnDemand()
  492. c.statsLastUpdatedTime = c.clock.Now()
  493. return true
  494. }
  495. func (c *containerData) updateSpec() error {
  496. spec, err := c.handler.GetSpec()
  497. if err != nil {
  498. // Ignore errors if the container is dead.
  499. if !c.handler.Exists() {
  500. return nil
  501. }
  502. return err
  503. }
  504. customMetrics, err := c.collectorManager.GetSpec()
  505. if err != nil {
  506. return err
  507. }
  508. if len(customMetrics) > 0 {
  509. spec.HasCustomMetrics = true
  510. spec.CustomMetrics = customMetrics
  511. }
  512. c.lock.Lock()
  513. defer c.lock.Unlock()
  514. c.info.Spec = spec
  515. return nil
  516. }
  517. // Calculate new smoothed load average using the new sample of runnable threads.
  518. // The decay used ensures that the load will stabilize on a new constant value within
  519. // 10 seconds.
  520. func (c *containerData) updateLoad(newLoad uint64) {
  521. if c.loadAvg < 0 {
  522. c.loadAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization.
  523. } else {
  524. c.loadAvg = c.loadAvg*c.loadDecay + float64(newLoad)*(1.0-c.loadDecay)
  525. }
  526. }
  527. func (c *containerData) updateStats() error {
  528. stats, statsErr := c.handler.GetStats()
  529. if statsErr != nil {
  530. // Ignore errors if the container is dead.
  531. if !c.handler.Exists() {
  532. return nil
  533. }
  534. // Stats may be partially populated, push those before we return an error.
  535. statsErr = fmt.Errorf("%v, continuing to push stats", statsErr)
  536. }
  537. if stats == nil {
  538. return statsErr
  539. }
  540. if c.loadReader != nil {
  541. // TODO(vmarmol): Cache this path.
  542. path, err := c.handler.GetCgroupPath("cpu")
  543. if err == nil {
  544. loadStats, err := c.loadReader.GetCpuLoad(c.info.Name, path)
  545. if err != nil {
  546. return fmt.Errorf("failed to get load stat for %q - path %q, error %s", c.info.Name, path, err)
  547. }
  548. stats.TaskStats = loadStats
  549. c.updateLoad(loadStats.NrRunning)
  550. // convert to 'milliLoad' to avoid floats and preserve precision.
  551. stats.Cpu.LoadAverage = int32(c.loadAvg * 1000)
  552. }
  553. }
  554. if c.summaryReader != nil {
  555. err := c.summaryReader.AddSample(*stats)
  556. if err != nil {
  557. // Ignore summary errors for now.
  558. klog.V(2).Infof("Failed to add summary stats for %q: %v", c.info.Name, err)
  559. }
  560. }
  561. var customStatsErr error
  562. cm := c.collectorManager.(*collector.GenericCollectorManager)
  563. if len(cm.Collectors) > 0 {
  564. if cm.NextCollectionTime.Before(c.clock.Now()) {
  565. customStats, err := c.updateCustomStats()
  566. if customStats != nil {
  567. stats.CustomMetrics = customStats
  568. }
  569. if err != nil {
  570. customStatsErr = err
  571. }
  572. }
  573. }
  574. var nvidiaStatsErr error
  575. if c.nvidiaCollector != nil {
  576. // This updates the Accelerators field of the stats struct
  577. nvidiaStatsErr = c.nvidiaCollector.UpdateStats(stats)
  578. }
  579. ref, err := c.handler.ContainerReference()
  580. if err != nil {
  581. // Ignore errors if the container is dead.
  582. if !c.handler.Exists() {
  583. return nil
  584. }
  585. return err
  586. }
  587. cInfo := info.ContainerInfo{
  588. ContainerReference: ref,
  589. }
  590. err = c.memoryCache.AddStats(&cInfo, stats)
  591. if err != nil {
  592. return err
  593. }
  594. if statsErr != nil {
  595. return statsErr
  596. }
  597. if nvidiaStatsErr != nil {
  598. return nvidiaStatsErr
  599. }
  600. return customStatsErr
  601. }
  602. func (c *containerData) updateCustomStats() (map[string][]info.MetricVal, error) {
  603. _, customStats, customStatsErr := c.collectorManager.Collect()
  604. if customStatsErr != nil {
  605. if !c.handler.Exists() {
  606. return customStats, nil
  607. }
  608. customStatsErr = fmt.Errorf("%v, continuing to push custom stats", customStatsErr)
  609. }
  610. return customStats, customStatsErr
  611. }
  612. func (c *containerData) updateSubcontainers() error {
  613. var subcontainers info.ContainerReferenceSlice
  614. subcontainers, err := c.handler.ListContainers(container.ListSelf)
  615. if err != nil {
  616. // Ignore errors if the container is dead.
  617. if !c.handler.Exists() {
  618. return nil
  619. }
  620. return err
  621. }
  622. sort.Sort(subcontainers)
  623. c.lock.Lock()
  624. defer c.lock.Unlock()
  625. c.info.Subcontainers = subcontainers
  626. return nil
  627. }