manager.go 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Manager of cAdvisor-monitored containers.
  15. package manager
  16. import (
  17. "flag"
  18. "fmt"
  19. "net/http"
  20. "os"
  21. "path"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "time"
  26. "github.com/google/cadvisor/accelerators"
  27. "github.com/google/cadvisor/cache/memory"
  28. "github.com/google/cadvisor/collector"
  29. "github.com/google/cadvisor/container"
  30. "github.com/google/cadvisor/container/docker"
  31. "github.com/google/cadvisor/container/raw"
  32. "github.com/google/cadvisor/events"
  33. "github.com/google/cadvisor/fs"
  34. info "github.com/google/cadvisor/info/v1"
  35. "github.com/google/cadvisor/info/v2"
  36. "github.com/google/cadvisor/machine"
  37. "github.com/google/cadvisor/utils/oomparser"
  38. "github.com/google/cadvisor/utils/sysfs"
  39. "github.com/google/cadvisor/version"
  40. "github.com/google/cadvisor/watcher"
  41. "github.com/opencontainers/runc/libcontainer/cgroups"
  42. "k8s.io/klog"
  43. "k8s.io/utils/clock"
  44. )
  45. var globalHousekeepingInterval = flag.Duration("global_housekeeping_interval", 1*time.Minute, "Interval between global housekeepings")
  46. var updateMachineInfoInterval = flag.Duration("update_machine_info_interval", 5*time.Minute, "Interval between machine info updates.")
  47. var logCadvisorUsage = flag.Bool("log_cadvisor_usage", false, "Whether to log the usage of the cAdvisor container")
  48. var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h", "Max length of time for which to store events (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is a duration. Default is applied to all non-specified event types")
  49. var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types")
  50. var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)")
  51. // The Manager interface defines operations for starting a manager and getting
  52. // container and machine information.
  53. type Manager interface {
  54. // Start the manager. Calling other manager methods before this returns
  55. // may produce undefined behavior.
  56. Start() error
  57. // Stops the manager.
  58. Stop() error
  59. // information about a container.
  60. GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
  61. // Get V2 information about a container.
  62. // Recursive (subcontainer) requests are best-effort, and may return a partial result alongside an
  63. // error in the partial failure case.
  64. GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error)
  65. // Get information about all subcontainers of the specified container (includes self).
  66. SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
  67. // Gets all the Docker containers. Return is a map from full container name to ContainerInfo.
  68. AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error)
  69. // Gets information about a specific Docker container. The specified name is within the Docker namespace.
  70. DockerContainer(dockerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error)
  71. // Gets spec for all containers based on request options.
  72. GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error)
  73. // Gets summary stats for all containers based on request options.
  74. GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error)
  75. // Get info for all requested containers based on the request options.
  76. GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error)
  77. // Returns true if the named container exists.
  78. Exists(containerName string) bool
  79. // Get information about the machine.
  80. GetMachineInfo() (*info.MachineInfo, error)
  81. // Get version information about different components we depend on.
  82. GetVersionInfo() (*info.VersionInfo, error)
  83. // GetFsInfoByFsUUID returns the information of the device having the
  84. // specified filesystem uuid. If no such device with the UUID exists, this
  85. // function will return the fs.ErrNoSuchDevice error.
  86. GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error)
  87. // Get filesystem information for the filesystem that contains the given directory
  88. GetDirFsInfo(dir string) (v2.FsInfo, error)
  89. // Get filesystem information for a given label.
  90. // Returns information for all global filesystems if label is empty.
  91. GetFsInfo(label string) ([]v2.FsInfo, error)
  92. // Get ps output for a container.
  93. GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error)
  94. // Get events streamed through passedChannel that fit the request.
  95. WatchForEvents(request *events.Request) (*events.EventChannel, error)
  96. // Get past events that have been detected and that fit the request.
  97. GetPastEvents(request *events.Request) ([]*info.Event, error)
  98. CloseEventChannel(watch_id int)
  99. // Get status information about docker.
  100. DockerInfo() (info.DockerStatus, error)
  101. // Get details about interesting docker images.
  102. DockerImages() ([]info.DockerImage, error)
  103. // Returns debugging information. Map of lines per category.
  104. DebugInfo() map[string][]string
  105. }
  106. // New takes a memory storage and returns a new manager.
  107. func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, includedMetricsSet container.MetricSet, collectorHttpClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string) (Manager, error) {
  108. if memoryCache == nil {
  109. return nil, fmt.Errorf("manager requires memory storage")
  110. }
  111. // Detect the container we are running on.
  112. selfContainer, err := cgroups.GetOwnCgroupPath("cpu")
  113. if err != nil {
  114. return nil, err
  115. }
  116. klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)
  117. context := fs.Context{}
  118. if err := container.InitializeFSContext(&context); err != nil {
  119. return nil, err
  120. }
  121. fsInfo, err := fs.NewFsInfo(context)
  122. if err != nil {
  123. return nil, err
  124. }
  125. // If cAdvisor was started with host's rootfs mounted, assume that its running
  126. // in its own namespaces.
  127. inHostNamespace := false
  128. if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
  129. inHostNamespace = true
  130. }
  131. // Register for new subcontainers.
  132. eventsChannel := make(chan watcher.ContainerEvent, 16)
  133. newManager := &manager{
  134. containers: make(map[namespacedContainerName]*containerData),
  135. quitChannels: make([]chan error, 0, 2),
  136. memoryCache: memoryCache,
  137. fsInfo: fsInfo,
  138. sysFs: sysfs,
  139. cadvisorContainer: selfContainer,
  140. inHostNamespace: inHostNamespace,
  141. startupTime: time.Now(),
  142. maxHousekeepingInterval: maxHousekeepingInterval,
  143. allowDynamicHousekeeping: allowDynamicHousekeeping,
  144. includedMetrics: includedMetricsSet,
  145. containerWatchers: []watcher.ContainerWatcher{},
  146. eventsChannel: eventsChannel,
  147. collectorHttpClient: collectorHttpClient,
  148. nvidiaManager: &accelerators.NvidiaManager{},
  149. rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
  150. }
  151. machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace)
  152. if err != nil {
  153. return nil, err
  154. }
  155. newManager.machineInfo = *machineInfo
  156. klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
  157. versionInfo, err := getVersionInfo()
  158. if err != nil {
  159. return nil, err
  160. }
  161. klog.V(1).Infof("Version: %+v", *versionInfo)
  162. newManager.eventHandler = events.NewEventManager(parseEventsStoragePolicy())
  163. return newManager, nil
  164. }
  165. // A namespaced container name.
  166. type namespacedContainerName struct {
  167. // The namespace of the container. Can be empty for the root namespace.
  168. Namespace string
  169. // The name of the container in this namespace.
  170. Name string
  171. }
  172. type manager struct {
  173. containers map[namespacedContainerName]*containerData
  174. containersLock sync.RWMutex
  175. memoryCache *memory.InMemoryCache
  176. fsInfo fs.FsInfo
  177. sysFs sysfs.SysFs
  178. machineMu sync.RWMutex // protects machineInfo
  179. machineInfo info.MachineInfo
  180. quitChannels []chan error
  181. cadvisorContainer string
  182. inHostNamespace bool
  183. eventHandler events.EventManager
  184. startupTime time.Time
  185. maxHousekeepingInterval time.Duration
  186. allowDynamicHousekeeping bool
  187. includedMetrics container.MetricSet
  188. containerWatchers []watcher.ContainerWatcher
  189. eventsChannel chan watcher.ContainerEvent
  190. collectorHttpClient *http.Client
  191. nvidiaManager accelerators.AcceleratorManager
  192. // List of raw container cgroup path prefix whitelist.
  193. rawContainerCgroupPathPrefixWhiteList []string
  194. }
  195. // Start the container manager.
  196. func (self *manager) Start() error {
  197. self.containerWatchers = container.InitializePlugins(self, self.fsInfo, self.includedMetrics)
  198. err := raw.Register(self, self.fsInfo, self.includedMetrics, self.rawContainerCgroupPathPrefixWhiteList)
  199. if err != nil {
  200. klog.Errorf("Registration of the raw container factory failed: %v", err)
  201. }
  202. rawWatcher, err := raw.NewRawContainerWatcher()
  203. if err != nil {
  204. return err
  205. }
  206. self.containerWatchers = append(self.containerWatchers, rawWatcher)
  207. // Watch for OOMs.
  208. err = self.watchForNewOoms()
  209. if err != nil {
  210. klog.Warningf("Could not configure a source for OOM detection, disabling OOM events: %v", err)
  211. }
  212. // If there are no factories, don't start any housekeeping and serve the information we do have.
  213. if !container.HasFactories() {
  214. return nil
  215. }
  216. // Setup collection of nvidia GPU metrics if any of them are attached to the machine.
  217. self.nvidiaManager.Setup()
  218. // Create root and then recover all containers.
  219. err = self.createContainer("/", watcher.Raw)
  220. if err != nil {
  221. return err
  222. }
  223. klog.V(2).Infof("Starting recovery of all containers")
  224. err = self.detectSubcontainers("/")
  225. if err != nil {
  226. return err
  227. }
  228. klog.V(2).Infof("Recovery completed")
  229. // Watch for new container.
  230. quitWatcher := make(chan error)
  231. err = self.watchForNewContainers(quitWatcher)
  232. if err != nil {
  233. return err
  234. }
  235. self.quitChannels = append(self.quitChannels, quitWatcher)
  236. // Look for new containers in the main housekeeping thread.
  237. quitGlobalHousekeeping := make(chan error)
  238. self.quitChannels = append(self.quitChannels, quitGlobalHousekeeping)
  239. go self.globalHousekeeping(quitGlobalHousekeeping)
  240. quitUpdateMachineInfo := make(chan error)
  241. self.quitChannels = append(self.quitChannels, quitUpdateMachineInfo)
  242. go self.updateMachineInfo(quitUpdateMachineInfo)
  243. return nil
  244. }
  245. func (self *manager) Stop() error {
  246. defer self.nvidiaManager.Destroy()
  247. // Stop and wait on all quit channels.
  248. for i, c := range self.quitChannels {
  249. // Send the exit signal and wait on the thread to exit (by closing the channel).
  250. c <- nil
  251. err := <-c
  252. if err != nil {
  253. // Remove the channels that quit successfully.
  254. self.quitChannels = self.quitChannels[i:]
  255. return err
  256. }
  257. }
  258. self.quitChannels = make([]chan error, 0, 2)
  259. return nil
  260. }
  261. func (self *manager) updateMachineInfo(quit chan error) {
  262. ticker := time.NewTicker(*updateMachineInfoInterval)
  263. for {
  264. select {
  265. case <-ticker.C:
  266. info, err := machine.Info(self.sysFs, self.fsInfo, self.inHostNamespace)
  267. if err != nil {
  268. klog.Errorf("Could not get machine info: %v", err)
  269. break
  270. }
  271. self.machineMu.Lock()
  272. self.machineInfo = *info
  273. self.machineMu.Unlock()
  274. klog.V(5).Infof("Update machine info: %+v", *info)
  275. case <-quit:
  276. ticker.Stop()
  277. quit <- nil
  278. return
  279. }
  280. }
  281. }
  282. func (self *manager) globalHousekeeping(quit chan error) {
  283. // Long housekeeping is either 100ms or half of the housekeeping interval.
  284. longHousekeeping := 100 * time.Millisecond
  285. if *globalHousekeepingInterval/2 < longHousekeeping {
  286. longHousekeeping = *globalHousekeepingInterval / 2
  287. }
  288. ticker := time.Tick(*globalHousekeepingInterval)
  289. for {
  290. select {
  291. case t := <-ticker:
  292. start := time.Now()
  293. // Check for new containers.
  294. err := self.detectSubcontainers("/")
  295. if err != nil {
  296. klog.Errorf("Failed to detect containers: %s", err)
  297. }
  298. // Log if housekeeping took too long.
  299. duration := time.Since(start)
  300. if duration >= longHousekeeping {
  301. klog.V(3).Infof("Global Housekeeping(%d) took %s", t.Unix(), duration)
  302. }
  303. case <-quit:
  304. // Quit if asked to do so.
  305. quit <- nil
  306. klog.Infof("Exiting global housekeeping thread")
  307. return
  308. }
  309. }
  310. }
  311. func (self *manager) getContainerData(containerName string) (*containerData, error) {
  312. var cont *containerData
  313. var ok bool
  314. func() {
  315. self.containersLock.RLock()
  316. defer self.containersLock.RUnlock()
  317. // Ensure we have the container.
  318. cont, ok = self.containers[namespacedContainerName{
  319. Name: containerName,
  320. }]
  321. }()
  322. if !ok {
  323. return nil, fmt.Errorf("unknown container %q", containerName)
  324. }
  325. return cont, nil
  326. }
  327. func (self *manager) GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) {
  328. conts, err := self.getRequestedContainers(containerName, options)
  329. if err != nil {
  330. return nil, err
  331. }
  332. var errs partialFailure
  333. stats := make(map[string]v2.DerivedStats)
  334. for name, cont := range conts {
  335. d, err := cont.DerivedStats()
  336. if err != nil {
  337. errs.append(name, "DerivedStats", err)
  338. }
  339. stats[name] = d
  340. }
  341. return stats, errs.OrNil()
  342. }
  343. func (self *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) {
  344. conts, err := self.getRequestedContainers(containerName, options)
  345. if err != nil {
  346. return nil, err
  347. }
  348. var errs partialFailure
  349. specs := make(map[string]v2.ContainerSpec)
  350. for name, cont := range conts {
  351. cinfo, err := cont.GetInfo(false)
  352. if err != nil {
  353. errs.append(name, "GetInfo", err)
  354. }
  355. spec := self.getV2Spec(cinfo)
  356. specs[name] = spec
  357. }
  358. return specs, errs.OrNil()
  359. }
  360. // Get V2 container spec from v1 container info.
  361. func (self *manager) getV2Spec(cinfo *containerInfo) v2.ContainerSpec {
  362. spec := self.getAdjustedSpec(cinfo)
  363. return v2.ContainerSpecFromV1(&spec, cinfo.Aliases, cinfo.Namespace)
  364. }
  365. func (self *manager) getAdjustedSpec(cinfo *containerInfo) info.ContainerSpec {
  366. spec := cinfo.Spec
  367. // Set default value to an actual value
  368. if spec.HasMemory {
  369. // Memory.Limit is 0 means there's no limit
  370. if spec.Memory.Limit == 0 {
  371. self.machineMu.RLock()
  372. spec.Memory.Limit = uint64(self.machineInfo.MemoryCapacity)
  373. self.machineMu.RUnlock()
  374. }
  375. }
  376. return spec
  377. }
  378. func (self *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
  379. cont, err := self.getContainerData(containerName)
  380. if err != nil {
  381. return nil, err
  382. }
  383. return self.containerDataToContainerInfo(cont, query)
  384. }
  385. func (self *manager) GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
  386. containers, err := self.getRequestedContainers(containerName, options)
  387. if err != nil {
  388. return nil, err
  389. }
  390. var errs partialFailure
  391. var nilTime time.Time // Ignored.
  392. infos := make(map[string]v2.ContainerInfo, len(containers))
  393. for name, container := range containers {
  394. result := v2.ContainerInfo{}
  395. cinfo, err := container.GetInfo(false)
  396. if err != nil {
  397. errs.append(name, "GetInfo", err)
  398. infos[name] = result
  399. continue
  400. }
  401. result.Spec = self.getV2Spec(cinfo)
  402. stats, err := self.memoryCache.RecentStats(name, nilTime, nilTime, options.Count)
  403. if err != nil {
  404. errs.append(name, "RecentStats", err)
  405. infos[name] = result
  406. continue
  407. }
  408. result.Stats = v2.ContainerStatsFromV1(containerName, &cinfo.Spec, stats)
  409. infos[name] = result
  410. }
  411. return infos, errs.OrNil()
  412. }
  413. func (self *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
  414. // Get the info from the container.
  415. cinfo, err := cont.GetInfo(true)
  416. if err != nil {
  417. return nil, err
  418. }
  419. stats, err := self.memoryCache.RecentStats(cinfo.Name, query.Start, query.End, query.NumStats)
  420. if err != nil {
  421. return nil, err
  422. }
  423. // Make a copy of the info for the user.
  424. ret := &info.ContainerInfo{
  425. ContainerReference: cinfo.ContainerReference,
  426. Subcontainers: cinfo.Subcontainers,
  427. Spec: self.getAdjustedSpec(cinfo),
  428. Stats: stats,
  429. }
  430. return ret, nil
  431. }
  432. func (self *manager) getContainer(containerName string) (*containerData, error) {
  433. self.containersLock.RLock()
  434. defer self.containersLock.RUnlock()
  435. cont, ok := self.containers[namespacedContainerName{Name: containerName}]
  436. if !ok {
  437. return nil, fmt.Errorf("unknown container %q", containerName)
  438. }
  439. return cont, nil
  440. }
  441. func (self *manager) getSubcontainers(containerName string) map[string]*containerData {
  442. self.containersLock.RLock()
  443. defer self.containersLock.RUnlock()
  444. containersMap := make(map[string]*containerData, len(self.containers))
  445. // Get all the unique subcontainers of the specified container
  446. matchedName := path.Join(containerName, "/")
  447. for i := range self.containers {
  448. name := self.containers[i].info.Name
  449. if name == containerName || strings.HasPrefix(name, matchedName) {
  450. containersMap[self.containers[i].info.Name] = self.containers[i]
  451. }
  452. }
  453. return containersMap
  454. }
  455. func (self *manager) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
  456. containersMap := self.getSubcontainers(containerName)
  457. containers := make([]*containerData, 0, len(containersMap))
  458. for _, cont := range containersMap {
  459. containers = append(containers, cont)
  460. }
  461. return self.containerDataSliceToContainerInfoSlice(containers, query)
  462. }
  463. func (self *manager) getAllDockerContainers() map[string]*containerData {
  464. self.containersLock.RLock()
  465. defer self.containersLock.RUnlock()
  466. containers := make(map[string]*containerData, len(self.containers))
  467. // Get containers in the Docker namespace.
  468. for name, cont := range self.containers {
  469. if name.Namespace == docker.DockerNamespace {
  470. containers[cont.info.Name] = cont
  471. }
  472. }
  473. return containers
  474. }
  475. func (self *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
  476. containers := self.getAllDockerContainers()
  477. output := make(map[string]info.ContainerInfo, len(containers))
  478. for name, cont := range containers {
  479. inf, err := self.containerDataToContainerInfo(cont, query)
  480. if err != nil {
  481. // Ignore the error because of race condition and return best-effort result.
  482. if err == memory.ErrDataNotFound {
  483. klog.Warningf("Error getting data for container %s because of race condition", name)
  484. continue
  485. }
  486. return nil, err
  487. }
  488. output[name] = *inf
  489. }
  490. return output, nil
  491. }
  492. func (self *manager) getDockerContainer(containerName string) (*containerData, error) {
  493. self.containersLock.RLock()
  494. defer self.containersLock.RUnlock()
  495. // Check for the container in the Docker container namespace.
  496. cont, ok := self.containers[namespacedContainerName{
  497. Namespace: docker.DockerNamespace,
  498. Name: containerName,
  499. }]
  500. // Look for container by short prefix name if no exact match found.
  501. if !ok {
  502. for contName, c := range self.containers {
  503. if contName.Namespace == docker.DockerNamespace && strings.HasPrefix(contName.Name, containerName) {
  504. if cont == nil {
  505. cont = c
  506. } else {
  507. return nil, fmt.Errorf("unable to find container. Container %q is not unique", containerName)
  508. }
  509. }
  510. }
  511. if cont == nil {
  512. return nil, fmt.Errorf("unable to find Docker container %q", containerName)
  513. }
  514. }
  515. return cont, nil
  516. }
  517. func (self *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
  518. container, err := self.getDockerContainer(containerName)
  519. if err != nil {
  520. return info.ContainerInfo{}, err
  521. }
  522. inf, err := self.containerDataToContainerInfo(container, query)
  523. if err != nil {
  524. return info.ContainerInfo{}, err
  525. }
  526. return *inf, nil
  527. }
  528. func (self *manager) containerDataSliceToContainerInfoSlice(containers []*containerData, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
  529. if len(containers) == 0 {
  530. return nil, fmt.Errorf("no containers found")
  531. }
  532. // Get the info for each container.
  533. output := make([]*info.ContainerInfo, 0, len(containers))
  534. for i := range containers {
  535. cinfo, err := self.containerDataToContainerInfo(containers[i], query)
  536. if err != nil {
  537. // Skip containers with errors, we try to degrade gracefully.
  538. continue
  539. }
  540. output = append(output, cinfo)
  541. }
  542. return output, nil
  543. }
  544. func (self *manager) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
  545. containers, err := self.getRequestedContainers(containerName, options)
  546. if err != nil {
  547. return nil, err
  548. }
  549. var errs partialFailure
  550. containersMap := make(map[string]*info.ContainerInfo)
  551. query := info.ContainerInfoRequest{
  552. NumStats: options.Count,
  553. }
  554. for name, data := range containers {
  555. info, err := self.containerDataToContainerInfo(data, &query)
  556. if err != nil {
  557. errs.append(name, "containerDataToContainerInfo", err)
  558. }
  559. containersMap[name] = info
  560. }
  561. return containersMap, errs.OrNil()
  562. }
  563. func (self *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) {
  564. containersMap := make(map[string]*containerData)
  565. switch options.IdType {
  566. case v2.TypeName:
  567. if options.Recursive == false {
  568. cont, err := self.getContainer(containerName)
  569. if err != nil {
  570. return containersMap, err
  571. }
  572. containersMap[cont.info.Name] = cont
  573. } else {
  574. containersMap = self.getSubcontainers(containerName)
  575. if len(containersMap) == 0 {
  576. return containersMap, fmt.Errorf("unknown container: %q", containerName)
  577. }
  578. }
  579. case v2.TypeDocker:
  580. if options.Recursive == false {
  581. containerName = strings.TrimPrefix(containerName, "/")
  582. cont, err := self.getDockerContainer(containerName)
  583. if err != nil {
  584. return containersMap, err
  585. }
  586. containersMap[cont.info.Name] = cont
  587. } else {
  588. if containerName != "/" {
  589. return containersMap, fmt.Errorf("invalid request for docker container %q with subcontainers", containerName)
  590. }
  591. containersMap = self.getAllDockerContainers()
  592. }
  593. default:
  594. return containersMap, fmt.Errorf("invalid request type %q", options.IdType)
  595. }
  596. if options.MaxAge != nil {
  597. // update stats for all containers in containersMap
  598. var waitGroup sync.WaitGroup
  599. waitGroup.Add(len(containersMap))
  600. for _, container := range containersMap {
  601. go func(cont *containerData) {
  602. cont.OnDemandHousekeeping(*options.MaxAge)
  603. waitGroup.Done()
  604. }(container)
  605. }
  606. waitGroup.Wait()
  607. }
  608. return containersMap, nil
  609. }
  610. func (self *manager) GetDirFsInfo(dir string) (v2.FsInfo, error) {
  611. device, err := self.fsInfo.GetDirFsDevice(dir)
  612. if err != nil {
  613. return v2.FsInfo{}, fmt.Errorf("failed to get device for dir %q: %v", dir, err)
  614. }
  615. return self.getFsInfoByDeviceName(device.Device)
  616. }
  617. func (self *manager) GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) {
  618. device, err := self.fsInfo.GetDeviceInfoByFsUUID(uuid)
  619. if err != nil {
  620. return v2.FsInfo{}, err
  621. }
  622. return self.getFsInfoByDeviceName(device.Device)
  623. }
  624. func (self *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
  625. var empty time.Time
  626. // Get latest data from filesystems hanging off root container.
  627. stats, err := self.memoryCache.RecentStats("/", empty, empty, 1)
  628. if err != nil {
  629. return nil, err
  630. }
  631. dev := ""
  632. if len(label) != 0 {
  633. dev, err = self.fsInfo.GetDeviceForLabel(label)
  634. if err != nil {
  635. return nil, err
  636. }
  637. }
  638. fsInfo := []v2.FsInfo{}
  639. for i := range stats[0].Filesystem {
  640. fs := stats[0].Filesystem[i]
  641. if len(label) != 0 && fs.Device != dev {
  642. continue
  643. }
  644. mountpoint, err := self.fsInfo.GetMountpointForDevice(fs.Device)
  645. if err != nil {
  646. return nil, err
  647. }
  648. labels, err := self.fsInfo.GetLabelsForDevice(fs.Device)
  649. if err != nil {
  650. return nil, err
  651. }
  652. fi := v2.FsInfo{
  653. Timestamp: stats[0].Timestamp,
  654. Device: fs.Device,
  655. Mountpoint: mountpoint,
  656. Capacity: fs.Limit,
  657. Usage: fs.Usage,
  658. Available: fs.Available,
  659. Labels: labels,
  660. }
  661. if fs.HasInodes {
  662. fi.Inodes = &fs.Inodes
  663. fi.InodesFree = &fs.InodesFree
  664. }
  665. fsInfo = append(fsInfo, fi)
  666. }
  667. return fsInfo, nil
  668. }
  669. func (m *manager) GetMachineInfo() (*info.MachineInfo, error) {
  670. m.machineMu.RLock()
  671. defer m.machineMu.RUnlock()
  672. // Copy and return the MachineInfo.
  673. return &m.machineInfo, nil
  674. }
  675. func (m *manager) GetVersionInfo() (*info.VersionInfo, error) {
  676. // TODO: Consider caching this and periodically updating. The VersionInfo may change if
  677. // the docker daemon is started after the cAdvisor client is created. Caching the value
  678. // would be helpful so we would be able to return the last known docker version if
  679. // docker was down at the time of a query.
  680. return getVersionInfo()
  681. }
  682. func (m *manager) Exists(containerName string) bool {
  683. m.containersLock.Lock()
  684. defer m.containersLock.Unlock()
  685. namespacedName := namespacedContainerName{
  686. Name: containerName,
  687. }
  688. _, ok := m.containers[namespacedName]
  689. if ok {
  690. return true
  691. }
  692. return false
  693. }
  694. func (m *manager) GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) {
  695. // override recursive. Only support single container listing.
  696. options.Recursive = false
  697. // override MaxAge. ProcessList does not require updated stats.
  698. options.MaxAge = nil
  699. conts, err := m.getRequestedContainers(containerName, options)
  700. if err != nil {
  701. return nil, err
  702. }
  703. if len(conts) != 1 {
  704. return nil, fmt.Errorf("Expected the request to match only one container")
  705. }
  706. // TODO(rjnagal): handle count? Only if we can do count by type (eg. top 5 cpu users)
  707. ps := []v2.ProcessInfo{}
  708. for _, cont := range conts {
  709. ps, err = cont.GetProcessList(m.cadvisorContainer, m.inHostNamespace)
  710. if err != nil {
  711. return nil, err
  712. }
  713. }
  714. return ps, nil
  715. }
  716. func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *containerData) error {
  717. for k, v := range collectorConfigs {
  718. configFile, err := cont.ReadFile(v, m.inHostNamespace)
  719. if err != nil {
  720. return fmt.Errorf("failed to read config file %q for config %q, container %q: %v", k, v, cont.info.Name, err)
  721. }
  722. klog.V(4).Infof("Got config from %q: %q", v, configFile)
  723. if strings.HasPrefix(k, "prometheus") || strings.HasPrefix(k, "Prometheus") {
  724. newCollector, err := collector.NewPrometheusCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHttpClient)
  725. if err != nil {
  726. return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
  727. }
  728. err = cont.collectorManager.RegisterCollector(newCollector)
  729. if err != nil {
  730. return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
  731. }
  732. } else {
  733. newCollector, err := collector.NewCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHttpClient)
  734. if err != nil {
  735. return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
  736. }
  737. err = cont.collectorManager.RegisterCollector(newCollector)
  738. if err != nil {
  739. return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
  740. }
  741. }
  742. }
  743. return nil
  744. }
  745. // Enables overwriting an existing containerData/Handler object for a given containerName.
  746. // Can't use createContainer as it just returns if a given containerName has a handler already.
  747. // Ex: rkt handler will want to take priority over the raw handler, but the raw handler might be created first.
  748. // Only allow raw handler to be overridden
  749. func (m *manager) overrideContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
  750. m.containersLock.Lock()
  751. defer m.containersLock.Unlock()
  752. namespacedName := namespacedContainerName{
  753. Name: containerName,
  754. }
  755. if _, ok := m.containers[namespacedName]; ok {
  756. containerData := m.containers[namespacedName]
  757. if containerData.handler.Type() != container.ContainerTypeRaw {
  758. return nil
  759. }
  760. err := m.destroyContainerLocked(containerName)
  761. if err != nil {
  762. return fmt.Errorf("overrideContainer: failed to destroy containerData/handler for %v: %v", containerName, err)
  763. }
  764. }
  765. return m.createContainerLocked(containerName, watchSource)
  766. }
  767. // Create a container.
  768. func (m *manager) createContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
  769. m.containersLock.Lock()
  770. defer m.containersLock.Unlock()
  771. return m.createContainerLocked(containerName, watchSource)
  772. }
  773. func (m *manager) createContainerLocked(containerName string, watchSource watcher.ContainerWatchSource) error {
  774. namespacedName := namespacedContainerName{
  775. Name: containerName,
  776. }
  777. // Check that the container didn't already exist.
  778. if _, ok := m.containers[namespacedName]; ok {
  779. return nil
  780. }
  781. handler, accept, err := container.NewContainerHandler(containerName, watchSource, m.inHostNamespace)
  782. if err != nil {
  783. return err
  784. }
  785. if !accept {
  786. // ignoring this container.
  787. klog.V(4).Infof("ignoring container %q", containerName)
  788. return nil
  789. }
  790. collectorManager, err := collector.NewCollectorManager()
  791. if err != nil {
  792. return err
  793. }
  794. logUsage := *logCadvisorUsage && containerName == m.cadvisorContainer
  795. cont, err := newContainerData(containerName, m.memoryCache, handler, logUsage, collectorManager, m.maxHousekeepingInterval, m.allowDynamicHousekeeping, clock.RealClock{})
  796. if err != nil {
  797. return err
  798. }
  799. if !cgroups.IsCgroup2UnifiedMode() {
  800. devicesCgroupPath, err := handler.GetCgroupPath("devices")
  801. if err != nil {
  802. klog.Warningf("Error getting devices cgroup path: %v", err)
  803. } else {
  804. cont.nvidiaCollector, err = m.nvidiaManager.GetCollector(devicesCgroupPath)
  805. if err != nil {
  806. klog.V(4).Infof("GPU metrics may be unavailable/incomplete for container %q: %v", cont.info.Name, err)
  807. }
  808. }
  809. }
  810. // Add collectors
  811. labels := handler.GetContainerLabels()
  812. collectorConfigs := collector.GetCollectorConfigs(labels)
  813. err = m.registerCollectors(collectorConfigs, cont)
  814. if err != nil {
  815. klog.Warningf("Failed to register collectors for %q: %v", containerName, err)
  816. }
  817. // Add the container name and all its aliases. The aliases must be within the namespace of the factory.
  818. m.containers[namespacedName] = cont
  819. for _, alias := range cont.info.Aliases {
  820. m.containers[namespacedContainerName{
  821. Namespace: cont.info.Namespace,
  822. Name: alias,
  823. }] = cont
  824. }
  825. klog.V(3).Infof("Added container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace)
  826. contSpec, err := cont.handler.GetSpec()
  827. if err != nil {
  828. return err
  829. }
  830. contRef, err := cont.handler.ContainerReference()
  831. if err != nil {
  832. return err
  833. }
  834. newEvent := &info.Event{
  835. ContainerName: contRef.Name,
  836. Timestamp: contSpec.CreationTime,
  837. EventType: info.EventContainerCreation,
  838. }
  839. err = m.eventHandler.AddEvent(newEvent)
  840. if err != nil {
  841. return err
  842. }
  843. // Start the container's housekeeping.
  844. return cont.Start()
  845. }
  846. func (m *manager) destroyContainer(containerName string) error {
  847. m.containersLock.Lock()
  848. defer m.containersLock.Unlock()
  849. return m.destroyContainerLocked(containerName)
  850. }
  851. func (m *manager) destroyContainerLocked(containerName string) error {
  852. namespacedName := namespacedContainerName{
  853. Name: containerName,
  854. }
  855. cont, ok := m.containers[namespacedName]
  856. if !ok {
  857. // Already destroyed, done.
  858. return nil
  859. }
  860. // Tell the container to stop.
  861. err := cont.Stop()
  862. if err != nil {
  863. return err
  864. }
  865. // Remove the container from our records (and all its aliases).
  866. delete(m.containers, namespacedName)
  867. for _, alias := range cont.info.Aliases {
  868. delete(m.containers, namespacedContainerName{
  869. Namespace: cont.info.Namespace,
  870. Name: alias,
  871. })
  872. }
  873. klog.V(3).Infof("Destroyed container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace)
  874. contRef, err := cont.handler.ContainerReference()
  875. if err != nil {
  876. return err
  877. }
  878. newEvent := &info.Event{
  879. ContainerName: contRef.Name,
  880. Timestamp: time.Now(),
  881. EventType: info.EventContainerDeletion,
  882. }
  883. err = m.eventHandler.AddEvent(newEvent)
  884. if err != nil {
  885. return err
  886. }
  887. return nil
  888. }
  889. // Detect all containers that have been added or deleted from the specified container.
  890. func (m *manager) getContainersDiff(containerName string) (added []info.ContainerReference, removed []info.ContainerReference, err error) {
  891. // Get all subcontainers recursively.
  892. m.containersLock.RLock()
  893. cont, ok := m.containers[namespacedContainerName{
  894. Name: containerName,
  895. }]
  896. m.containersLock.RUnlock()
  897. if !ok {
  898. return nil, nil, fmt.Errorf("failed to find container %q while checking for new containers", containerName)
  899. }
  900. allContainers, err := cont.handler.ListContainers(container.ListRecursive)
  901. if err != nil {
  902. return nil, nil, err
  903. }
  904. allContainers = append(allContainers, info.ContainerReference{Name: containerName})
  905. m.containersLock.RLock()
  906. defer m.containersLock.RUnlock()
  907. // Determine which were added and which were removed.
  908. allContainersSet := make(map[string]*containerData)
  909. for name, d := range m.containers {
  910. // Only add the canonical name.
  911. if d.info.Name == name.Name {
  912. allContainersSet[name.Name] = d
  913. }
  914. }
  915. // Added containers
  916. for _, c := range allContainers {
  917. delete(allContainersSet, c.Name)
  918. _, ok := m.containers[namespacedContainerName{
  919. Name: c.Name,
  920. }]
  921. if !ok {
  922. added = append(added, c)
  923. }
  924. }
  925. // Removed ones are no longer in the container listing.
  926. for _, d := range allContainersSet {
  927. removed = append(removed, d.info.ContainerReference)
  928. }
  929. return
  930. }
  931. // Detect the existing subcontainers and reflect the setup here.
  932. func (m *manager) detectSubcontainers(containerName string) error {
  933. added, removed, err := m.getContainersDiff(containerName)
  934. if err != nil {
  935. return err
  936. }
  937. // Add the new containers.
  938. for _, cont := range added {
  939. err = m.createContainer(cont.Name, watcher.Raw)
  940. if err != nil {
  941. klog.Errorf("Failed to create existing container: %s: %s", cont.Name, err)
  942. }
  943. }
  944. // Remove the old containers.
  945. for _, cont := range removed {
  946. err = m.destroyContainer(cont.Name)
  947. if err != nil {
  948. klog.Errorf("Failed to destroy existing container: %s: %s", cont.Name, err)
  949. }
  950. }
  951. return nil
  952. }
  953. // Watches for new containers started in the system. Runs forever unless there is a setup error.
  954. func (self *manager) watchForNewContainers(quit chan error) error {
  955. for _, watcher := range self.containerWatchers {
  956. err := watcher.Start(self.eventsChannel)
  957. if err != nil {
  958. return err
  959. }
  960. }
  961. // There is a race between starting the watch and new container creation so we do a detection before we read new containers.
  962. err := self.detectSubcontainers("/")
  963. if err != nil {
  964. return err
  965. }
  966. // Listen to events from the container handler.
  967. go func() {
  968. for {
  969. select {
  970. case event := <-self.eventsChannel:
  971. switch {
  972. case event.EventType == watcher.ContainerAdd:
  973. switch event.WatchSource {
  974. default:
  975. err = self.createContainer(event.Name, event.WatchSource)
  976. }
  977. case event.EventType == watcher.ContainerDelete:
  978. err = self.destroyContainer(event.Name)
  979. }
  980. if err != nil {
  981. klog.Warningf("Failed to process watch event %+v: %v", event, err)
  982. }
  983. case <-quit:
  984. var errs partialFailure
  985. // Stop processing events if asked to quit.
  986. for i, watcher := range self.containerWatchers {
  987. err := watcher.Stop()
  988. if err != nil {
  989. errs.append(fmt.Sprintf("watcher %d", i), "Stop", err)
  990. }
  991. }
  992. if len(errs) > 0 {
  993. quit <- errs
  994. } else {
  995. quit <- nil
  996. klog.Infof("Exiting thread watching subcontainers")
  997. return
  998. }
  999. }
  1000. }
  1001. }()
  1002. return nil
  1003. }
  1004. func (self *manager) watchForNewOoms() error {
  1005. klog.V(2).Infof("Started watching for new ooms in manager")
  1006. outStream := make(chan *oomparser.OomInstance, 10)
  1007. oomLog, err := oomparser.New()
  1008. if err != nil {
  1009. return err
  1010. }
  1011. go oomLog.StreamOoms(outStream)
  1012. go func() {
  1013. for oomInstance := range outStream {
  1014. // Surface OOM and OOM kill events.
  1015. newEvent := &info.Event{
  1016. ContainerName: oomInstance.ContainerName,
  1017. Timestamp: oomInstance.TimeOfDeath,
  1018. EventType: info.EventOom,
  1019. }
  1020. err := self.eventHandler.AddEvent(newEvent)
  1021. if err != nil {
  1022. klog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
  1023. }
  1024. klog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
  1025. newEvent = &info.Event{
  1026. ContainerName: oomInstance.VictimContainerName,
  1027. Timestamp: oomInstance.TimeOfDeath,
  1028. EventType: info.EventOomKill,
  1029. EventData: info.EventData{
  1030. OomKill: &info.OomKillEventData{
  1031. Pid: oomInstance.Pid,
  1032. ProcessName: oomInstance.ProcessName,
  1033. },
  1034. },
  1035. }
  1036. err = self.eventHandler.AddEvent(newEvent)
  1037. if err != nil {
  1038. klog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
  1039. }
  1040. }
  1041. }()
  1042. return nil
  1043. }
  1044. // can be called by the api which will take events returned on the channel
  1045. func (self *manager) WatchForEvents(request *events.Request) (*events.EventChannel, error) {
  1046. return self.eventHandler.WatchEvents(request)
  1047. }
  1048. // can be called by the api which will return all events satisfying the request
  1049. func (self *manager) GetPastEvents(request *events.Request) ([]*info.Event, error) {
  1050. return self.eventHandler.GetEvents(request)
  1051. }
  1052. // called by the api when a client is no longer listening to the channel
  1053. func (self *manager) CloseEventChannel(watch_id int) {
  1054. self.eventHandler.StopWatch(watch_id)
  1055. }
  1056. // Parses the events StoragePolicy from the flags.
  1057. func parseEventsStoragePolicy() events.StoragePolicy {
  1058. policy := events.DefaultStoragePolicy()
  1059. // Parse max age.
  1060. parts := strings.Split(*eventStorageAgeLimit, ",")
  1061. for _, part := range parts {
  1062. items := strings.Split(part, "=")
  1063. if len(items) != 2 {
  1064. klog.Warningf("Unknown event storage policy %q when parsing max age", part)
  1065. continue
  1066. }
  1067. dur, err := time.ParseDuration(items[1])
  1068. if err != nil {
  1069. klog.Warningf("Unable to parse event max age duration %q: %v", items[1], err)
  1070. continue
  1071. }
  1072. if items[0] == "default" {
  1073. policy.DefaultMaxAge = dur
  1074. continue
  1075. }
  1076. policy.PerTypeMaxAge[info.EventType(items[0])] = dur
  1077. }
  1078. // Parse max number.
  1079. parts = strings.Split(*eventStorageEventLimit, ",")
  1080. for _, part := range parts {
  1081. items := strings.Split(part, "=")
  1082. if len(items) != 2 {
  1083. klog.Warningf("Unknown event storage policy %q when parsing max event limit", part)
  1084. continue
  1085. }
  1086. val, err := strconv.Atoi(items[1])
  1087. if err != nil {
  1088. klog.Warningf("Unable to parse integer from %q: %v", items[1], err)
  1089. continue
  1090. }
  1091. if items[0] == "default" {
  1092. policy.DefaultMaxNumEvents = val
  1093. continue
  1094. }
  1095. policy.PerTypeMaxNumEvents[info.EventType(items[0])] = val
  1096. }
  1097. return policy
  1098. }
  1099. func (m *manager) DockerImages() ([]info.DockerImage, error) {
  1100. return docker.Images()
  1101. }
  1102. func (m *manager) DockerInfo() (info.DockerStatus, error) {
  1103. return docker.Status()
  1104. }
  1105. func (m *manager) DebugInfo() map[string][]string {
  1106. debugInfo := container.DebugInfo()
  1107. // Get unique containers.
  1108. var conts map[*containerData]struct{}
  1109. func() {
  1110. m.containersLock.RLock()
  1111. defer m.containersLock.RUnlock()
  1112. conts = make(map[*containerData]struct{}, len(m.containers))
  1113. for _, c := range m.containers {
  1114. conts[c] = struct{}{}
  1115. }
  1116. }()
  1117. // List containers.
  1118. lines := make([]string, 0, len(conts))
  1119. for cont := range conts {
  1120. lines = append(lines, cont.info.Name)
  1121. if cont.info.Namespace != "" {
  1122. lines = append(lines, fmt.Sprintf("\tNamespace: %s", cont.info.Namespace))
  1123. }
  1124. if len(cont.info.Aliases) != 0 {
  1125. lines = append(lines, "\tAliases:")
  1126. for _, alias := range cont.info.Aliases {
  1127. lines = append(lines, fmt.Sprintf("\t\t%s", alias))
  1128. }
  1129. }
  1130. }
  1131. debugInfo["Managed containers"] = lines
  1132. return debugInfo
  1133. }
  1134. func (self *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error) {
  1135. mountPoint, err := self.fsInfo.GetMountpointForDevice(deviceName)
  1136. if err != nil {
  1137. return v2.FsInfo{}, fmt.Errorf("failed to get mount point for device %q: %v", deviceName, err)
  1138. }
  1139. infos, err := self.GetFsInfo("")
  1140. if err != nil {
  1141. return v2.FsInfo{}, err
  1142. }
  1143. for _, info := range infos {
  1144. if info.Mountpoint == mountPoint {
  1145. return info, nil
  1146. }
  1147. }
  1148. return v2.FsInfo{}, fmt.Errorf("cannot find filesystem info for device %q", deviceName)
  1149. }
  1150. func getVersionInfo() (*info.VersionInfo, error) {
  1151. kernel_version := machine.KernelVersion()
  1152. container_os := machine.ContainerOsVersion()
  1153. docker_version, err := docker.VersionString()
  1154. if err != nil {
  1155. return nil, err
  1156. }
  1157. docker_api_version, err := docker.APIVersionString()
  1158. if err != nil {
  1159. return nil, err
  1160. }
  1161. return &info.VersionInfo{
  1162. KernelVersion: kernel_version,
  1163. ContainerOsVersion: container_os,
  1164. DockerVersion: docker_version,
  1165. DockerAPIVersion: docker_api_version,
  1166. CadvisorVersion: version.Info["version"],
  1167. CadvisorRevision: version.Info["revision"],
  1168. }, nil
  1169. }
  1170. // Helper for accumulating partial failures.
  1171. type partialFailure []string
  1172. func (f *partialFailure) append(id, operation string, err error) {
  1173. *f = append(*f, fmt.Sprintf("[%q: %s: %s]", id, operation, err))
  1174. }
  1175. func (f partialFailure) Error() string {
  1176. return fmt.Sprintf("partial failures: %s", strings.Join(f, ", "))
  1177. }
  1178. func (f partialFailure) OrNil() error {
  1179. if len(f) == 0 {
  1180. return nil
  1181. }
  1182. return f
  1183. }