manager.go 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Manager of cAdvisor-monitored containers.
  15. package manager
  16. import (
  17. "flag"
  18. "fmt"
  19. "net/http"
  20. "os"
  21. "path"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "time"
  26. "github.com/google/cadvisor/accelerators"
  27. "github.com/google/cadvisor/cache/memory"
  28. "github.com/google/cadvisor/collector"
  29. "github.com/google/cadvisor/container"
  30. "github.com/google/cadvisor/container/docker"
  31. "github.com/google/cadvisor/container/raw"
  32. "github.com/google/cadvisor/events"
  33. "github.com/google/cadvisor/fs"
  34. info "github.com/google/cadvisor/info/v1"
  35. "github.com/google/cadvisor/info/v2"
  36. "github.com/google/cadvisor/machine"
  37. "github.com/google/cadvisor/utils/oomparser"
  38. "github.com/google/cadvisor/utils/sysfs"
  39. "github.com/google/cadvisor/version"
  40. "github.com/google/cadvisor/watcher"
  41. "github.com/opencontainers/runc/libcontainer/cgroups"
  42. "k8s.io/klog"
  43. "k8s.io/utils/clock"
  44. )
  45. var globalHousekeepingInterval = flag.Duration("global_housekeeping_interval", 1*time.Minute, "Interval between global housekeepings")
  46. var updateMachineInfoInterval = flag.Duration("update_machine_info_interval", 5*time.Minute, "Interval between machine info updates.")
  47. var logCadvisorUsage = flag.Bool("log_cadvisor_usage", false, "Whether to log the usage of the cAdvisor container")
  48. var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h", "Max length of time for which to store events (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is a duration. Default is applied to all non-specified event types")
  49. var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types")
  50. var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)")
  51. // The Manager interface defines operations for starting a manager and getting
  52. // container and machine information.
  53. type Manager interface {
  54. // Start the manager. Calling other manager methods before this returns
  55. // may produce undefined behavior.
  56. Start() error
  57. // Stops the manager.
  58. Stop() error
  59. // information about a container.
  60. GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
  61. // Get V2 information about a container.
  62. // Recursive (subcontainer) requests are best-effort, and may return a partial result alongside an
  63. // error in the partial failure case.
  64. GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error)
  65. // Get information about all subcontainers of the specified container (includes self).
  66. SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
  67. // Gets all the Docker containers. Return is a map from full container name to ContainerInfo.
  68. AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error)
  69. // Gets information about a specific Docker container. The specified name is within the Docker namespace.
  70. DockerContainer(dockerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error)
  71. // Gets spec for all containers based on request options.
  72. GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error)
  73. // Gets summary stats for all containers based on request options.
  74. GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error)
  75. // Get info for all requested containers based on the request options.
  76. GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error)
  77. // Returns true if the named container exists.
  78. Exists(containerName string) bool
  79. // Get information about the machine.
  80. GetMachineInfo() (*info.MachineInfo, error)
  81. // Get version information about different components we depend on.
  82. GetVersionInfo() (*info.VersionInfo, error)
  83. // GetFsInfoByFsUUID returns the information of the device having the
  84. // specified filesystem uuid. If no such device with the UUID exists, this
  85. // function will return the fs.ErrNoSuchDevice error.
  86. GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error)
  87. // Get filesystem information for the filesystem that contains the given directory
  88. GetDirFsInfo(dir string) (v2.FsInfo, error)
  89. // Get filesystem information for a given label.
  90. // Returns information for all global filesystems if label is empty.
  91. GetFsInfo(label string) ([]v2.FsInfo, error)
  92. // Get ps output for a container.
  93. GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error)
  94. // Get events streamed through passedChannel that fit the request.
  95. WatchForEvents(request *events.Request) (*events.EventChannel, error)
  96. // Get past events that have been detected and that fit the request.
  97. GetPastEvents(request *events.Request) ([]*info.Event, error)
  98. CloseEventChannel(watch_id int)
  99. // Get status information about docker.
  100. DockerInfo() (info.DockerStatus, error)
  101. // Get details about interesting docker images.
  102. DockerImages() ([]info.DockerImage, error)
  103. // Returns debugging information. Map of lines per category.
  104. DebugInfo() map[string][]string
  105. }
  106. // New takes a memory storage and returns a new manager.
  107. func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, includedMetricsSet container.MetricSet, collectorHttpClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string) (Manager, error) {
  108. if memoryCache == nil {
  109. return nil, fmt.Errorf("manager requires memory storage")
  110. }
  111. // Detect the container we are running on.
  112. selfContainer, err := cgroups.GetOwnCgroupPath("cpu")
  113. if err != nil {
  114. return nil, err
  115. }
  116. klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)
  117. context := fs.Context{}
  118. if err := container.InitializeFSContext(&context); err != nil {
  119. return nil, err
  120. }
  121. fsInfo, err := fs.NewFsInfo(context)
  122. if err != nil {
  123. return nil, err
  124. }
  125. // If cAdvisor was started with host's rootfs mounted, assume that its running
  126. // in its own namespaces.
  127. inHostNamespace := false
  128. if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
  129. inHostNamespace = true
  130. }
  131. // Register for new subcontainers.
  132. eventsChannel := make(chan watcher.ContainerEvent, 16)
  133. newManager := &manager{
  134. containers: make(map[namespacedContainerName]*containerData),
  135. quitChannels: make([]chan error, 0, 2),
  136. memoryCache: memoryCache,
  137. fsInfo: fsInfo,
  138. sysFs: sysfs,
  139. cadvisorContainer: selfContainer,
  140. inHostNamespace: inHostNamespace,
  141. startupTime: time.Now(),
  142. maxHousekeepingInterval: maxHousekeepingInterval,
  143. allowDynamicHousekeeping: allowDynamicHousekeeping,
  144. includedMetrics: includedMetricsSet,
  145. containerWatchers: []watcher.ContainerWatcher{},
  146. eventsChannel: eventsChannel,
  147. collectorHttpClient: collectorHttpClient,
  148. nvidiaManager: &accelerators.NvidiaManager{},
  149. rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
  150. }
  151. machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace)
  152. if err != nil {
  153. return nil, err
  154. }
  155. newManager.machineInfo = *machineInfo
  156. klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
  157. versionInfo, err := getVersionInfo()
  158. if err != nil {
  159. return nil, err
  160. }
  161. klog.V(1).Infof("Version: %+v", *versionInfo)
  162. newManager.eventHandler = events.NewEventManager(parseEventsStoragePolicy())
  163. return newManager, nil
  164. }
  165. // A namespaced container name.
  166. type namespacedContainerName struct {
  167. // The namespace of the container. Can be empty for the root namespace.
  168. Namespace string
  169. // The name of the container in this namespace.
  170. Name string
  171. }
  172. type manager struct {
  173. containers map[namespacedContainerName]*containerData
  174. containersLock sync.RWMutex
  175. memoryCache *memory.InMemoryCache
  176. fsInfo fs.FsInfo
  177. sysFs sysfs.SysFs
  178. machineMu sync.RWMutex // protects machineInfo
  179. machineInfo info.MachineInfo
  180. quitChannels []chan error
  181. cadvisorContainer string
  182. inHostNamespace bool
  183. eventHandler events.EventManager
  184. startupTime time.Time
  185. maxHousekeepingInterval time.Duration
  186. allowDynamicHousekeeping bool
  187. includedMetrics container.MetricSet
  188. containerWatchers []watcher.ContainerWatcher
  189. eventsChannel chan watcher.ContainerEvent
  190. collectorHttpClient *http.Client
  191. nvidiaManager accelerators.AcceleratorManager
  192. // List of raw container cgroup path prefix whitelist.
  193. rawContainerCgroupPathPrefixWhiteList []string
  194. }
  195. // Start the container manager.
  196. func (self *manager) Start() error {
  197. self.containerWatchers = container.InitializePlugins(self, self.fsInfo, self.includedMetrics)
  198. err := raw.Register(self, self.fsInfo, self.includedMetrics, self.rawContainerCgroupPathPrefixWhiteList)
  199. if err != nil {
  200. klog.Errorf("Registration of the raw container factory failed: %v", err)
  201. }
  202. rawWatcher, err := raw.NewRawContainerWatcher()
  203. if err != nil {
  204. return err
  205. }
  206. self.containerWatchers = append(self.containerWatchers, rawWatcher)
  207. // Watch for OOMs.
  208. err = self.watchForNewOoms()
  209. if err != nil {
  210. klog.Warningf("Could not configure a source for OOM detection, disabling OOM events: %v", err)
  211. }
  212. // If there are no factories, don't start any housekeeping and serve the information we do have.
  213. if !container.HasFactories() {
  214. return nil
  215. }
  216. // Setup collection of nvidia GPU metrics if any of them are attached to the machine.
  217. self.nvidiaManager.Setup()
  218. // Create root and then recover all containers.
  219. err = self.createContainer("/", watcher.Raw)
  220. if err != nil {
  221. return err
  222. }
  223. klog.V(2).Infof("Starting recovery of all containers")
  224. err = self.detectSubcontainers("/")
  225. if err != nil {
  226. return err
  227. }
  228. klog.V(2).Infof("Recovery completed")
  229. // Watch for new container.
  230. quitWatcher := make(chan error)
  231. err = self.watchForNewContainers(quitWatcher)
  232. if err != nil {
  233. return err
  234. }
  235. self.quitChannels = append(self.quitChannels, quitWatcher)
  236. // Look for new containers in the main housekeeping thread.
  237. quitGlobalHousekeeping := make(chan error)
  238. self.quitChannels = append(self.quitChannels, quitGlobalHousekeeping)
  239. go self.globalHousekeeping(quitGlobalHousekeeping)
  240. quitUpdateMachineInfo := make(chan error)
  241. self.quitChannels = append(self.quitChannels, quitUpdateMachineInfo)
  242. go self.updateMachineInfo(quitUpdateMachineInfo)
  243. return nil
  244. }
  245. func (self *manager) Stop() error {
  246. defer self.nvidiaManager.Destroy()
  247. // Stop and wait on all quit channels.
  248. for i, c := range self.quitChannels {
  249. // Send the exit signal and wait on the thread to exit (by closing the channel).
  250. c <- nil
  251. err := <-c
  252. if err != nil {
  253. // Remove the channels that quit successfully.
  254. self.quitChannels = self.quitChannels[i:]
  255. return err
  256. }
  257. }
  258. self.quitChannels = make([]chan error, 0, 2)
  259. return nil
  260. }
  261. func (self *manager) updateMachineInfo(quit chan error) {
  262. ticker := time.NewTicker(*updateMachineInfoInterval)
  263. for {
  264. select {
  265. case <-ticker.C:
  266. info, err := machine.Info(self.sysFs, self.fsInfo, self.inHostNamespace)
  267. if err != nil {
  268. klog.Errorf("Could not get machine info: %v", err)
  269. break
  270. }
  271. self.machineMu.Lock()
  272. self.machineInfo = *info
  273. self.machineMu.Unlock()
  274. klog.V(5).Infof("Update machine info: %+v", *info)
  275. case <-quit:
  276. ticker.Stop()
  277. quit <- nil
  278. return
  279. }
  280. }
  281. }
  282. func (self *manager) globalHousekeeping(quit chan error) {
  283. // Long housekeeping is either 100ms or half of the housekeeping interval.
  284. longHousekeeping := 100 * time.Millisecond
  285. if *globalHousekeepingInterval/2 < longHousekeeping {
  286. longHousekeeping = *globalHousekeepingInterval / 2
  287. }
  288. ticker := time.Tick(*globalHousekeepingInterval)
  289. for {
  290. select {
  291. case t := <-ticker:
  292. start := time.Now()
  293. // Check for new containers.
  294. err := self.detectSubcontainers("/")
  295. if err != nil {
  296. klog.Errorf("Failed to detect containers: %s", err)
  297. }
  298. // Log if housekeeping took too long.
  299. duration := time.Since(start)
  300. if duration >= longHousekeeping {
  301. klog.V(3).Infof("Global Housekeeping(%d) took %s", t.Unix(), duration)
  302. }
  303. case <-quit:
  304. // Quit if asked to do so.
  305. quit <- nil
  306. klog.Infof("Exiting global housekeeping thread")
  307. return
  308. }
  309. }
  310. }
  311. func (self *manager) getContainerData(containerName string) (*containerData, error) {
  312. var cont *containerData
  313. var ok bool
  314. func() {
  315. self.containersLock.RLock()
  316. defer self.containersLock.RUnlock()
  317. // Ensure we have the container.
  318. cont, ok = self.containers[namespacedContainerName{
  319. Name: containerName,
  320. }]
  321. }()
  322. if !ok {
  323. return nil, fmt.Errorf("unknown container %q", containerName)
  324. }
  325. return cont, nil
  326. }
  327. func (self *manager) GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) {
  328. conts, err := self.getRequestedContainers(containerName, options)
  329. if err != nil {
  330. return nil, err
  331. }
  332. var errs partialFailure
  333. stats := make(map[string]v2.DerivedStats)
  334. for name, cont := range conts {
  335. d, err := cont.DerivedStats()
  336. if err != nil {
  337. errs.append(name, "DerivedStats", err)
  338. }
  339. stats[name] = d
  340. }
  341. return stats, errs.OrNil()
  342. }
  343. func (self *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) {
  344. conts, err := self.getRequestedContainers(containerName, options)
  345. if err != nil {
  346. return nil, err
  347. }
  348. var errs partialFailure
  349. specs := make(map[string]v2.ContainerSpec)
  350. for name, cont := range conts {
  351. cinfo, err := cont.GetInfo(false)
  352. if err != nil {
  353. errs.append(name, "GetInfo", err)
  354. }
  355. spec := self.getV2Spec(cinfo)
  356. specs[name] = spec
  357. }
  358. return specs, errs.OrNil()
  359. }
  360. // Get V2 container spec from v1 container info.
  361. func (self *manager) getV2Spec(cinfo *containerInfo) v2.ContainerSpec {
  362. spec := self.getAdjustedSpec(cinfo)
  363. return v2.ContainerSpecFromV1(&spec, cinfo.Aliases, cinfo.Namespace)
  364. }
  365. func (self *manager) getAdjustedSpec(cinfo *containerInfo) info.ContainerSpec {
  366. spec := cinfo.Spec
  367. // Set default value to an actual value
  368. if spec.HasMemory {
  369. // Memory.Limit is 0 means there's no limit
  370. if spec.Memory.Limit == 0 {
  371. self.machineMu.RLock()
  372. spec.Memory.Limit = uint64(self.machineInfo.MemoryCapacity)
  373. self.machineMu.RUnlock()
  374. }
  375. }
  376. return spec
  377. }
  378. func (self *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
  379. cont, err := self.getContainerData(containerName)
  380. if err != nil {
  381. return nil, err
  382. }
  383. return self.containerDataToContainerInfo(cont, query)
  384. }
  385. func (self *manager) GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
  386. containers, err := self.getRequestedContainers(containerName, options)
  387. if err != nil {
  388. return nil, err
  389. }
  390. var errs partialFailure
  391. var nilTime time.Time // Ignored.
  392. infos := make(map[string]v2.ContainerInfo, len(containers))
  393. for name, container := range containers {
  394. result := v2.ContainerInfo{}
  395. cinfo, err := container.GetInfo(false)
  396. if err != nil {
  397. errs.append(name, "GetInfo", err)
  398. infos[name] = result
  399. continue
  400. }
  401. result.Spec = self.getV2Spec(cinfo)
  402. stats, err := self.memoryCache.RecentStats(name, nilTime, nilTime, options.Count)
  403. if err != nil {
  404. errs.append(name, "RecentStats", err)
  405. infos[name] = result
  406. continue
  407. }
  408. result.Stats = v2.ContainerStatsFromV1(containerName, &cinfo.Spec, stats)
  409. infos[name] = result
  410. }
  411. return infos, errs.OrNil()
  412. }
  413. func (self *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
  414. // Get the info from the container.
  415. cinfo, err := cont.GetInfo(true)
  416. if err != nil {
  417. return nil, err
  418. }
  419. stats, err := self.memoryCache.RecentStats(cinfo.Name, query.Start, query.End, query.NumStats)
  420. if err != nil {
  421. return nil, err
  422. }
  423. // Make a copy of the info for the user.
  424. ret := &info.ContainerInfo{
  425. ContainerReference: cinfo.ContainerReference,
  426. Subcontainers: cinfo.Subcontainers,
  427. Spec: self.getAdjustedSpec(cinfo),
  428. Stats: stats,
  429. }
  430. return ret, nil
  431. }
  432. func (self *manager) getContainer(containerName string) (*containerData, error) {
  433. self.containersLock.RLock()
  434. defer self.containersLock.RUnlock()
  435. cont, ok := self.containers[namespacedContainerName{Name: containerName}]
  436. if !ok {
  437. return nil, fmt.Errorf("unknown container %q", containerName)
  438. }
  439. return cont, nil
  440. }
  441. func (self *manager) getSubcontainers(containerName string) map[string]*containerData {
  442. self.containersLock.RLock()
  443. defer self.containersLock.RUnlock()
  444. containersMap := make(map[string]*containerData, len(self.containers))
  445. // Get all the unique subcontainers of the specified container
  446. matchedName := path.Join(containerName, "/")
  447. for i := range self.containers {
  448. name := self.containers[i].info.Name
  449. if name == containerName || strings.HasPrefix(name, matchedName) {
  450. containersMap[self.containers[i].info.Name] = self.containers[i]
  451. }
  452. }
  453. return containersMap
  454. }
  455. func (self *manager) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
  456. containersMap := self.getSubcontainers(containerName)
  457. containers := make([]*containerData, 0, len(containersMap))
  458. for _, cont := range containersMap {
  459. containers = append(containers, cont)
  460. }
  461. return self.containerDataSliceToContainerInfoSlice(containers, query)
  462. }
  463. func (self *manager) getAllDockerContainers() map[string]*containerData {
  464. self.containersLock.RLock()
  465. defer self.containersLock.RUnlock()
  466. containers := make(map[string]*containerData, len(self.containers))
  467. // Get containers in the Docker namespace.
  468. for name, cont := range self.containers {
  469. if name.Namespace == docker.DockerNamespace {
  470. containers[cont.info.Name] = cont
  471. }
  472. }
  473. return containers
  474. }
  475. func (self *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
  476. containers := self.getAllDockerContainers()
  477. output := make(map[string]info.ContainerInfo, len(containers))
  478. for name, cont := range containers {
  479. inf, err := self.containerDataToContainerInfo(cont, query)
  480. if err != nil {
  481. // Ignore the error because of race condition and return best-effort result.
  482. if err == memory.ErrDataNotFound {
  483. klog.Warningf("Error getting data for container %s because of race condition", name)
  484. continue
  485. }
  486. return nil, err
  487. }
  488. output[name] = *inf
  489. }
  490. return output, nil
  491. }
  492. func (self *manager) getDockerContainer(containerName string) (*containerData, error) {
  493. self.containersLock.RLock()
  494. defer self.containersLock.RUnlock()
  495. // Check for the container in the Docker container namespace.
  496. cont, ok := self.containers[namespacedContainerName{
  497. Namespace: docker.DockerNamespace,
  498. Name: containerName,
  499. }]
  500. // Look for container by short prefix name if no exact match found.
  501. if !ok {
  502. for contName, c := range self.containers {
  503. if contName.Namespace == docker.DockerNamespace && strings.HasPrefix(contName.Name, containerName) {
  504. if cont == nil {
  505. cont = c
  506. } else {
  507. return nil, fmt.Errorf("unable to find container. Container %q is not unique", containerName)
  508. }
  509. }
  510. }
  511. if cont == nil {
  512. return nil, fmt.Errorf("unable to find Docker container %q", containerName)
  513. }
  514. }
  515. return cont, nil
  516. }
  517. func (self *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
  518. container, err := self.getDockerContainer(containerName)
  519. if err != nil {
  520. return info.ContainerInfo{}, err
  521. }
  522. inf, err := self.containerDataToContainerInfo(container, query)
  523. if err != nil {
  524. return info.ContainerInfo{}, err
  525. }
  526. return *inf, nil
  527. }
  528. func (self *manager) containerDataSliceToContainerInfoSlice(containers []*containerData, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
  529. if len(containers) == 0 {
  530. return nil, fmt.Errorf("no containers found")
  531. }
  532. // Get the info for each container.
  533. output := make([]*info.ContainerInfo, 0, len(containers))
  534. for i := range containers {
  535. cinfo, err := self.containerDataToContainerInfo(containers[i], query)
  536. if err != nil {
  537. // Skip containers with errors, we try to degrade gracefully.
  538. continue
  539. }
  540. output = append(output, cinfo)
  541. }
  542. return output, nil
  543. }
  544. func (self *manager) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
  545. containers, err := self.getRequestedContainers(containerName, options)
  546. if err != nil {
  547. return nil, err
  548. }
  549. var errs partialFailure
  550. containersMap := make(map[string]*info.ContainerInfo)
  551. query := info.ContainerInfoRequest{
  552. NumStats: options.Count,
  553. }
  554. for name, data := range containers {
  555. info, err := self.containerDataToContainerInfo(data, &query)
  556. if err != nil {
  557. errs.append(name, "containerDataToContainerInfo", err)
  558. }
  559. containersMap[name] = info
  560. }
  561. return containersMap, errs.OrNil()
  562. }
  563. func (self *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) {
  564. containersMap := make(map[string]*containerData)
  565. switch options.IdType {
  566. case v2.TypeName:
  567. if options.Recursive == false {
  568. cont, err := self.getContainer(containerName)
  569. if err != nil {
  570. return containersMap, err
  571. }
  572. containersMap[cont.info.Name] = cont
  573. } else {
  574. containersMap = self.getSubcontainers(containerName)
  575. if len(containersMap) == 0 {
  576. return containersMap, fmt.Errorf("unknown container: %q", containerName)
  577. }
  578. }
  579. case v2.TypeDocker:
  580. if options.Recursive == false {
  581. containerName = strings.TrimPrefix(containerName, "/")
  582. cont, err := self.getDockerContainer(containerName)
  583. if err != nil {
  584. return containersMap, err
  585. }
  586. containersMap[cont.info.Name] = cont
  587. } else {
  588. if containerName != "/" {
  589. return containersMap, fmt.Errorf("invalid request for docker container %q with subcontainers", containerName)
  590. }
  591. containersMap = self.getAllDockerContainers()
  592. }
  593. default:
  594. return containersMap, fmt.Errorf("invalid request type %q", options.IdType)
  595. }
  596. if options.MaxAge != nil {
  597. // update stats for all containers in containersMap
  598. var waitGroup sync.WaitGroup
  599. waitGroup.Add(len(containersMap))
  600. for _, container := range containersMap {
  601. go func(cont *containerData) {
  602. cont.OnDemandHousekeeping(*options.MaxAge)
  603. waitGroup.Done()
  604. }(container)
  605. }
  606. waitGroup.Wait()
  607. }
  608. return containersMap, nil
  609. }
  610. func (self *manager) GetDirFsInfo(dir string) (v2.FsInfo, error) {
  611. device, err := self.fsInfo.GetDirFsDevice(dir)
  612. if err != nil {
  613. return v2.FsInfo{}, fmt.Errorf("failed to get device for dir %q: %v", dir, err)
  614. }
  615. return self.getFsInfoByDeviceName(device.Device)
  616. }
  617. func (self *manager) GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) {
  618. device, err := self.fsInfo.GetDeviceInfoByFsUUID(uuid)
  619. if err != nil {
  620. return v2.FsInfo{}, err
  621. }
  622. return self.getFsInfoByDeviceName(device.Device)
  623. }
  624. func (self *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
  625. var empty time.Time
  626. // Get latest data from filesystems hanging off root container.
  627. stats, err := self.memoryCache.RecentStats("/", empty, empty, 1)
  628. if err != nil {
  629. return nil, err
  630. }
  631. dev := ""
  632. if len(label) != 0 {
  633. dev, err = self.fsInfo.GetDeviceForLabel(label)
  634. if err != nil {
  635. return nil, err
  636. }
  637. }
  638. fsInfo := []v2.FsInfo{}
  639. for i := range stats[0].Filesystem {
  640. fs := stats[0].Filesystem[i]
  641. if len(label) != 0 && fs.Device != dev {
  642. continue
  643. }
  644. mountpoint, err := self.fsInfo.GetMountpointForDevice(fs.Device)
  645. if err != nil {
  646. return nil, err
  647. }
  648. labels, err := self.fsInfo.GetLabelsForDevice(fs.Device)
  649. if err != nil {
  650. return nil, err
  651. }
  652. fi := v2.FsInfo{
  653. Timestamp: stats[0].Timestamp,
  654. Device: fs.Device,
  655. Mountpoint: mountpoint,
  656. Capacity: fs.Limit,
  657. Usage: fs.Usage,
  658. Available: fs.Available,
  659. Labels: labels,
  660. }
  661. if fs.HasInodes {
  662. fi.Inodes = &fs.Inodes
  663. fi.InodesFree = &fs.InodesFree
  664. }
  665. fsInfo = append(fsInfo, fi)
  666. }
  667. return fsInfo, nil
  668. }
  669. func (m *manager) GetMachineInfo() (*info.MachineInfo, error) {
  670. m.machineMu.RLock()
  671. defer m.machineMu.RUnlock()
  672. // Copy and return the MachineInfo.
  673. return &m.machineInfo, nil
  674. }
  675. func (m *manager) GetVersionInfo() (*info.VersionInfo, error) {
  676. // TODO: Consider caching this and periodically updating. The VersionInfo may change if
  677. // the docker daemon is started after the cAdvisor client is created. Caching the value
  678. // would be helpful so we would be able to return the last known docker version if
  679. // docker was down at the time of a query.
  680. return getVersionInfo()
  681. }
  682. func (m *manager) Exists(containerName string) bool {
  683. m.containersLock.Lock()
  684. defer m.containersLock.Unlock()
  685. namespacedName := namespacedContainerName{
  686. Name: containerName,
  687. }
  688. _, ok := m.containers[namespacedName]
  689. if ok {
  690. return true
  691. }
  692. return false
  693. }
  694. func (m *manager) GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) {
  695. // override recursive. Only support single container listing.
  696. options.Recursive = false
  697. // override MaxAge. ProcessList does not require updated stats.
  698. options.MaxAge = nil
  699. conts, err := m.getRequestedContainers(containerName, options)
  700. if err != nil {
  701. return nil, err
  702. }
  703. if len(conts) != 1 {
  704. return nil, fmt.Errorf("Expected the request to match only one container")
  705. }
  706. // TODO(rjnagal): handle count? Only if we can do count by type (eg. top 5 cpu users)
  707. ps := []v2.ProcessInfo{}
  708. for _, cont := range conts {
  709. ps, err = cont.GetProcessList(m.cadvisorContainer, m.inHostNamespace)
  710. if err != nil {
  711. return nil, err
  712. }
  713. }
  714. return ps, nil
  715. }
  716. func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *containerData) error {
  717. for k, v := range collectorConfigs {
  718. configFile, err := cont.ReadFile(v, m.inHostNamespace)
  719. if err != nil {
  720. return fmt.Errorf("failed to read config file %q for config %q, container %q: %v", k, v, cont.info.Name, err)
  721. }
  722. klog.V(4).Infof("Got config from %q: %q", v, configFile)
  723. if strings.HasPrefix(k, "prometheus") || strings.HasPrefix(k, "Prometheus") {
  724. newCollector, err := collector.NewPrometheusCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHttpClient)
  725. if err != nil {
  726. return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
  727. }
  728. err = cont.collectorManager.RegisterCollector(newCollector)
  729. if err != nil {
  730. return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
  731. }
  732. } else {
  733. newCollector, err := collector.NewCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHttpClient)
  734. if err != nil {
  735. return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
  736. }
  737. err = cont.collectorManager.RegisterCollector(newCollector)
  738. if err != nil {
  739. return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
  740. }
  741. }
  742. }
  743. return nil
  744. }
  745. // Enables overwriting an existing containerData/Handler object for a given containerName.
  746. // Can't use createContainer as it just returns if a given containerName has a handler already.
  747. // Ex: rkt handler will want to take priority over the raw handler, but the raw handler might be created first.
  748. // Only allow raw handler to be overridden
  749. func (m *manager) overrideContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
  750. m.containersLock.Lock()
  751. defer m.containersLock.Unlock()
  752. namespacedName := namespacedContainerName{
  753. Name: containerName,
  754. }
  755. if _, ok := m.containers[namespacedName]; ok {
  756. containerData := m.containers[namespacedName]
  757. if containerData.handler.Type() != container.ContainerTypeRaw {
  758. return nil
  759. }
  760. err := m.destroyContainerLocked(containerName)
  761. if err != nil {
  762. return fmt.Errorf("overrideContainer: failed to destroy containerData/handler for %v: %v", containerName, err)
  763. }
  764. }
  765. return m.createContainerLocked(containerName, watchSource)
  766. }
  767. // Create a container.
  768. func (m *manager) createContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
  769. m.containersLock.Lock()
  770. defer m.containersLock.Unlock()
  771. return m.createContainerLocked(containerName, watchSource)
  772. }
  773. func (m *manager) createContainerLocked(containerName string, watchSource watcher.ContainerWatchSource) error {
  774. namespacedName := namespacedContainerName{
  775. Name: containerName,
  776. }
  777. // Check that the container didn't already exist.
  778. if _, ok := m.containers[namespacedName]; ok {
  779. return nil
  780. }
  781. handler, accept, err := container.NewContainerHandler(containerName, watchSource, m.inHostNamespace)
  782. if err != nil {
  783. return err
  784. }
  785. if !accept {
  786. // ignoring this container.
  787. klog.V(4).Infof("ignoring container %q", containerName)
  788. return nil
  789. }
  790. collectorManager, err := collector.NewCollectorManager()
  791. if err != nil {
  792. return err
  793. }
  794. logUsage := *logCadvisorUsage && containerName == m.cadvisorContainer
  795. cont, err := newContainerData(containerName, m.memoryCache, handler, logUsage, collectorManager, m.maxHousekeepingInterval, m.allowDynamicHousekeeping, clock.RealClock{})
  796. if err != nil {
  797. return err
  798. }
  799. devicesCgroupPath, err := handler.GetCgroupPath("devices")
  800. if err != nil {
  801. klog.Warningf("Error getting devices cgroup path: %v", err)
  802. } else {
  803. cont.nvidiaCollector, err = m.nvidiaManager.GetCollector(devicesCgroupPath)
  804. if err != nil {
  805. klog.V(4).Infof("GPU metrics may be unavailable/incomplete for container %q: %v", cont.info.Name, err)
  806. }
  807. }
  808. // Add collectors
  809. labels := handler.GetContainerLabels()
  810. collectorConfigs := collector.GetCollectorConfigs(labels)
  811. err = m.registerCollectors(collectorConfigs, cont)
  812. if err != nil {
  813. klog.Warningf("Failed to register collectors for %q: %v", containerName, err)
  814. }
  815. // Add the container name and all its aliases. The aliases must be within the namespace of the factory.
  816. m.containers[namespacedName] = cont
  817. for _, alias := range cont.info.Aliases {
  818. m.containers[namespacedContainerName{
  819. Namespace: cont.info.Namespace,
  820. Name: alias,
  821. }] = cont
  822. }
  823. klog.V(3).Infof("Added container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace)
  824. contSpec, err := cont.handler.GetSpec()
  825. if err != nil {
  826. return err
  827. }
  828. contRef, err := cont.handler.ContainerReference()
  829. if err != nil {
  830. return err
  831. }
  832. newEvent := &info.Event{
  833. ContainerName: contRef.Name,
  834. Timestamp: contSpec.CreationTime,
  835. EventType: info.EventContainerCreation,
  836. }
  837. err = m.eventHandler.AddEvent(newEvent)
  838. if err != nil {
  839. return err
  840. }
  841. // Start the container's housekeeping.
  842. return cont.Start()
  843. }
  844. func (m *manager) destroyContainer(containerName string) error {
  845. m.containersLock.Lock()
  846. defer m.containersLock.Unlock()
  847. return m.destroyContainerLocked(containerName)
  848. }
  849. func (m *manager) destroyContainerLocked(containerName string) error {
  850. namespacedName := namespacedContainerName{
  851. Name: containerName,
  852. }
  853. cont, ok := m.containers[namespacedName]
  854. if !ok {
  855. // Already destroyed, done.
  856. return nil
  857. }
  858. // Tell the container to stop.
  859. err := cont.Stop()
  860. if err != nil {
  861. return err
  862. }
  863. // Remove the container from our records (and all its aliases).
  864. delete(m.containers, namespacedName)
  865. for _, alias := range cont.info.Aliases {
  866. delete(m.containers, namespacedContainerName{
  867. Namespace: cont.info.Namespace,
  868. Name: alias,
  869. })
  870. }
  871. klog.V(3).Infof("Destroyed container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace)
  872. contRef, err := cont.handler.ContainerReference()
  873. if err != nil {
  874. return err
  875. }
  876. newEvent := &info.Event{
  877. ContainerName: contRef.Name,
  878. Timestamp: time.Now(),
  879. EventType: info.EventContainerDeletion,
  880. }
  881. err = m.eventHandler.AddEvent(newEvent)
  882. if err != nil {
  883. return err
  884. }
  885. return nil
  886. }
  887. // Detect all containers that have been added or deleted from the specified container.
  888. func (m *manager) getContainersDiff(containerName string) (added []info.ContainerReference, removed []info.ContainerReference, err error) {
  889. // Get all subcontainers recursively.
  890. m.containersLock.RLock()
  891. cont, ok := m.containers[namespacedContainerName{
  892. Name: containerName,
  893. }]
  894. m.containersLock.RUnlock()
  895. if !ok {
  896. return nil, nil, fmt.Errorf("failed to find container %q while checking for new containers", containerName)
  897. }
  898. allContainers, err := cont.handler.ListContainers(container.ListRecursive)
  899. if err != nil {
  900. return nil, nil, err
  901. }
  902. allContainers = append(allContainers, info.ContainerReference{Name: containerName})
  903. m.containersLock.RLock()
  904. defer m.containersLock.RUnlock()
  905. // Determine which were added and which were removed.
  906. allContainersSet := make(map[string]*containerData)
  907. for name, d := range m.containers {
  908. // Only add the canonical name.
  909. if d.info.Name == name.Name {
  910. allContainersSet[name.Name] = d
  911. }
  912. }
  913. // Added containers
  914. for _, c := range allContainers {
  915. delete(allContainersSet, c.Name)
  916. _, ok := m.containers[namespacedContainerName{
  917. Name: c.Name,
  918. }]
  919. if !ok {
  920. added = append(added, c)
  921. }
  922. }
  923. // Removed ones are no longer in the container listing.
  924. for _, d := range allContainersSet {
  925. removed = append(removed, d.info.ContainerReference)
  926. }
  927. return
  928. }
  929. // Detect the existing subcontainers and reflect the setup here.
  930. func (m *manager) detectSubcontainers(containerName string) error {
  931. added, removed, err := m.getContainersDiff(containerName)
  932. if err != nil {
  933. return err
  934. }
  935. // Add the new containers.
  936. for _, cont := range added {
  937. err = m.createContainer(cont.Name, watcher.Raw)
  938. if err != nil {
  939. klog.Errorf("Failed to create existing container: %s: %s", cont.Name, err)
  940. }
  941. }
  942. // Remove the old containers.
  943. for _, cont := range removed {
  944. err = m.destroyContainer(cont.Name)
  945. if err != nil {
  946. klog.Errorf("Failed to destroy existing container: %s: %s", cont.Name, err)
  947. }
  948. }
  949. return nil
  950. }
  951. // Watches for new containers started in the system. Runs forever unless there is a setup error.
  952. func (self *manager) watchForNewContainers(quit chan error) error {
  953. for _, watcher := range self.containerWatchers {
  954. err := watcher.Start(self.eventsChannel)
  955. if err != nil {
  956. return err
  957. }
  958. }
  959. // There is a race between starting the watch and new container creation so we do a detection before we read new containers.
  960. err := self.detectSubcontainers("/")
  961. if err != nil {
  962. return err
  963. }
  964. // Listen to events from the container handler.
  965. go func() {
  966. for {
  967. select {
  968. case event := <-self.eventsChannel:
  969. switch {
  970. case event.EventType == watcher.ContainerAdd:
  971. switch event.WatchSource {
  972. // the Rkt and Raw watchers can race, and if Raw wins, we want Rkt to override and create a new handler for Rkt containers
  973. case watcher.Rkt:
  974. err = self.overrideContainer(event.Name, event.WatchSource)
  975. default:
  976. err = self.createContainer(event.Name, event.WatchSource)
  977. }
  978. case event.EventType == watcher.ContainerDelete:
  979. err = self.destroyContainer(event.Name)
  980. }
  981. if err != nil {
  982. klog.Warningf("Failed to process watch event %+v: %v", event, err)
  983. }
  984. case <-quit:
  985. var errs partialFailure
  986. // Stop processing events if asked to quit.
  987. for i, watcher := range self.containerWatchers {
  988. err := watcher.Stop()
  989. if err != nil {
  990. errs.append(fmt.Sprintf("watcher %d", i), "Stop", err)
  991. }
  992. }
  993. if len(errs) > 0 {
  994. quit <- errs
  995. } else {
  996. quit <- nil
  997. klog.Infof("Exiting thread watching subcontainers")
  998. return
  999. }
  1000. }
  1001. }
  1002. }()
  1003. return nil
  1004. }
  1005. func (self *manager) watchForNewOoms() error {
  1006. klog.V(2).Infof("Started watching for new ooms in manager")
  1007. outStream := make(chan *oomparser.OomInstance, 10)
  1008. oomLog, err := oomparser.New()
  1009. if err != nil {
  1010. return err
  1011. }
  1012. go oomLog.StreamOoms(outStream)
  1013. go func() {
  1014. for oomInstance := range outStream {
  1015. // Surface OOM and OOM kill events.
  1016. newEvent := &info.Event{
  1017. ContainerName: oomInstance.ContainerName,
  1018. Timestamp: oomInstance.TimeOfDeath,
  1019. EventType: info.EventOom,
  1020. }
  1021. err := self.eventHandler.AddEvent(newEvent)
  1022. if err != nil {
  1023. klog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
  1024. }
  1025. klog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
  1026. newEvent = &info.Event{
  1027. ContainerName: oomInstance.VictimContainerName,
  1028. Timestamp: oomInstance.TimeOfDeath,
  1029. EventType: info.EventOomKill,
  1030. EventData: info.EventData{
  1031. OomKill: &info.OomKillEventData{
  1032. Pid: oomInstance.Pid,
  1033. ProcessName: oomInstance.ProcessName,
  1034. },
  1035. },
  1036. }
  1037. err = self.eventHandler.AddEvent(newEvent)
  1038. if err != nil {
  1039. klog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
  1040. }
  1041. }
  1042. }()
  1043. return nil
  1044. }
  1045. // can be called by the api which will take events returned on the channel
  1046. func (self *manager) WatchForEvents(request *events.Request) (*events.EventChannel, error) {
  1047. return self.eventHandler.WatchEvents(request)
  1048. }
  1049. // can be called by the api which will return all events satisfying the request
  1050. func (self *manager) GetPastEvents(request *events.Request) ([]*info.Event, error) {
  1051. return self.eventHandler.GetEvents(request)
  1052. }
  1053. // called by the api when a client is no longer listening to the channel
  1054. func (self *manager) CloseEventChannel(watch_id int) {
  1055. self.eventHandler.StopWatch(watch_id)
  1056. }
  1057. // Parses the events StoragePolicy from the flags.
  1058. func parseEventsStoragePolicy() events.StoragePolicy {
  1059. policy := events.DefaultStoragePolicy()
  1060. // Parse max age.
  1061. parts := strings.Split(*eventStorageAgeLimit, ",")
  1062. for _, part := range parts {
  1063. items := strings.Split(part, "=")
  1064. if len(items) != 2 {
  1065. klog.Warningf("Unknown event storage policy %q when parsing max age", part)
  1066. continue
  1067. }
  1068. dur, err := time.ParseDuration(items[1])
  1069. if err != nil {
  1070. klog.Warningf("Unable to parse event max age duration %q: %v", items[1], err)
  1071. continue
  1072. }
  1073. if items[0] == "default" {
  1074. policy.DefaultMaxAge = dur
  1075. continue
  1076. }
  1077. policy.PerTypeMaxAge[info.EventType(items[0])] = dur
  1078. }
  1079. // Parse max number.
  1080. parts = strings.Split(*eventStorageEventLimit, ",")
  1081. for _, part := range parts {
  1082. items := strings.Split(part, "=")
  1083. if len(items) != 2 {
  1084. klog.Warningf("Unknown event storage policy %q when parsing max event limit", part)
  1085. continue
  1086. }
  1087. val, err := strconv.Atoi(items[1])
  1088. if err != nil {
  1089. klog.Warningf("Unable to parse integer from %q: %v", items[1], err)
  1090. continue
  1091. }
  1092. if items[0] == "default" {
  1093. policy.DefaultMaxNumEvents = val
  1094. continue
  1095. }
  1096. policy.PerTypeMaxNumEvents[info.EventType(items[0])] = val
  1097. }
  1098. return policy
  1099. }
  1100. func (m *manager) DockerImages() ([]info.DockerImage, error) {
  1101. return docker.Images()
  1102. }
  1103. func (m *manager) DockerInfo() (info.DockerStatus, error) {
  1104. return docker.Status()
  1105. }
  1106. func (m *manager) DebugInfo() map[string][]string {
  1107. debugInfo := container.DebugInfo()
  1108. // Get unique containers.
  1109. var conts map[*containerData]struct{}
  1110. func() {
  1111. m.containersLock.RLock()
  1112. defer m.containersLock.RUnlock()
  1113. conts = make(map[*containerData]struct{}, len(m.containers))
  1114. for _, c := range m.containers {
  1115. conts[c] = struct{}{}
  1116. }
  1117. }()
  1118. // List containers.
  1119. lines := make([]string, 0, len(conts))
  1120. for cont := range conts {
  1121. lines = append(lines, cont.info.Name)
  1122. if cont.info.Namespace != "" {
  1123. lines = append(lines, fmt.Sprintf("\tNamespace: %s", cont.info.Namespace))
  1124. }
  1125. if len(cont.info.Aliases) != 0 {
  1126. lines = append(lines, "\tAliases:")
  1127. for _, alias := range cont.info.Aliases {
  1128. lines = append(lines, fmt.Sprintf("\t\t%s", alias))
  1129. }
  1130. }
  1131. }
  1132. debugInfo["Managed containers"] = lines
  1133. return debugInfo
  1134. }
  1135. func (self *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error) {
  1136. mountPoint, err := self.fsInfo.GetMountpointForDevice(deviceName)
  1137. if err != nil {
  1138. return v2.FsInfo{}, fmt.Errorf("failed to get mount point for device %q: %v", deviceName, err)
  1139. }
  1140. infos, err := self.GetFsInfo("")
  1141. if err != nil {
  1142. return v2.FsInfo{}, err
  1143. }
  1144. for _, info := range infos {
  1145. if info.Mountpoint == mountPoint {
  1146. return info, nil
  1147. }
  1148. }
  1149. return v2.FsInfo{}, fmt.Errorf("cannot find filesystem info for device %q", deviceName)
  1150. }
  1151. func getVersionInfo() (*info.VersionInfo, error) {
  1152. kernel_version := machine.KernelVersion()
  1153. container_os := machine.ContainerOsVersion()
  1154. docker_version, err := docker.VersionString()
  1155. if err != nil {
  1156. return nil, err
  1157. }
  1158. docker_api_version, err := docker.APIVersionString()
  1159. if err != nil {
  1160. return nil, err
  1161. }
  1162. return &info.VersionInfo{
  1163. KernelVersion: kernel_version,
  1164. ContainerOsVersion: container_os,
  1165. DockerVersion: docker_version,
  1166. DockerAPIVersion: docker_api_version,
  1167. CadvisorVersion: version.Info["version"],
  1168. CadvisorRevision: version.Info["revision"],
  1169. }, nil
  1170. }
  1171. // Helper for accumulating partial failures.
  1172. type partialFailure []string
  1173. func (f *partialFailure) append(id, operation string, err error) {
  1174. *f = append(*f, fmt.Sprintf("[%q: %s: %s]", id, operation, err))
  1175. }
  1176. func (f partialFailure) Error() string {
  1177. return fmt.Sprintf("partial failures: %s", strings.Join(f, ", "))
  1178. }
  1179. func (f partialFailure) OrNil() error {
  1180. if len(f) == 0 {
  1181. return nil
  1182. }
  1183. return f
  1184. }