summary_test.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2enode
  14. import (
  15. "fmt"
  16. "io/ioutil"
  17. "os/exec"
  18. "strings"
  19. "time"
  20. "k8s.io/api/core/v1"
  21. "k8s.io/apimachinery/pkg/api/resource"
  22. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  23. kubeletstatsv1alpha1 "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  24. "k8s.io/kubernetes/test/e2e/framework"
  25. e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
  26. "k8s.io/kubernetes/test/e2e/framework/volume"
  27. systemdutil "github.com/coreos/go-systemd/util"
  28. "github.com/onsi/ginkgo"
  29. "github.com/onsi/gomega"
  30. "github.com/onsi/gomega/gstruct"
  31. "github.com/onsi/gomega/types"
  32. )
  33. var _ = framework.KubeDescribe("Summary API [NodeConformance]", func() {
  34. f := framework.NewDefaultFramework("summary-test")
  35. ginkgo.Context("when querying /stats/summary", func() {
  36. ginkgo.AfterEach(func() {
  37. if !ginkgo.CurrentGinkgoTestDescription().Failed {
  38. return
  39. }
  40. if framework.TestContext.DumpLogsOnFailure {
  41. e2ekubectl.LogFailedContainers(f.ClientSet, f.Namespace.Name, framework.Logf)
  42. }
  43. ginkgo.By("Recording processes in system cgroups")
  44. recordSystemCgroupProcesses()
  45. })
  46. ginkgo.It("should report resource usage through the stats api", func() {
  47. const pod0 = "stats-busybox-0"
  48. const pod1 = "stats-busybox-1"
  49. ginkgo.By("Creating test pods")
  50. numRestarts := int32(1)
  51. pods := getSummaryTestPods(f, numRestarts, pod0, pod1)
  52. f.PodClient().CreateBatch(pods)
  53. gomega.Eventually(func() error {
  54. for _, pod := range pods {
  55. err := verifyPodRestartCount(f, pod.Name, len(pod.Spec.Containers), numRestarts)
  56. if err != nil {
  57. return err
  58. }
  59. }
  60. return nil
  61. }, time.Minute, 5*time.Second).Should(gomega.BeNil())
  62. // Wait for cAdvisor to collect 2 stats points
  63. time.Sleep(15 * time.Second)
  64. // Setup expectations.
  65. const (
  66. maxStartAge = time.Hour * 24 * 365 // 1 year
  67. maxStatsAge = time.Minute
  68. )
  69. // fetch node so we can know proper node memory bounds for unconstrained cgroups
  70. node := getLocalNode(f)
  71. memoryCapacity := node.Status.Capacity["memory"]
  72. memoryLimit := memoryCapacity.Value()
  73. fsCapacityBounds := bounded(100*volume.Mb, 10*volume.Tb)
  74. // Expectations for system containers.
  75. sysContExpectations := func() types.GomegaMatcher {
  76. return gstruct.MatchAllFields(gstruct.Fields{
  77. "Name": gstruct.Ignore(),
  78. "StartTime": recent(maxStartAge),
  79. "CPU": ptrMatchAllFields(gstruct.Fields{
  80. "Time": recent(maxStatsAge),
  81. "UsageNanoCores": bounded(10000, 2e9),
  82. "UsageCoreNanoSeconds": bounded(10000000, 1e15),
  83. }),
  84. "Memory": ptrMatchAllFields(gstruct.Fields{
  85. "Time": recent(maxStatsAge),
  86. // We don't limit system container memory.
  87. "AvailableBytes": gomega.BeNil(),
  88. "UsageBytes": bounded(1*volume.Mb, memoryLimit),
  89. "WorkingSetBytes": bounded(1*volume.Mb, memoryLimit),
  90. // this now returns /sys/fs/cgroup/memory.stat total_rss
  91. "RSSBytes": bounded(1*volume.Mb, memoryLimit),
  92. "PageFaults": bounded(1000, 1e9),
  93. "MajorPageFaults": bounded(0, 100000),
  94. }),
  95. "Accelerators": gomega.BeEmpty(),
  96. "Rootfs": gomega.BeNil(),
  97. "Logs": gomega.BeNil(),
  98. "UserDefinedMetrics": gomega.BeEmpty(),
  99. })
  100. }
  101. podsContExpectations := sysContExpectations().(*gstruct.FieldsMatcher)
  102. podsContExpectations.Fields["Memory"] = ptrMatchAllFields(gstruct.Fields{
  103. "Time": recent(maxStatsAge),
  104. // Pods are limited by Node Allocatable
  105. "AvailableBytes": bounded(1*volume.Kb, memoryLimit),
  106. "UsageBytes": bounded(10*volume.Kb, memoryLimit),
  107. "WorkingSetBytes": bounded(10*volume.Kb, memoryLimit),
  108. "RSSBytes": bounded(1*volume.Kb, memoryLimit),
  109. "PageFaults": bounded(0, 1000000),
  110. "MajorPageFaults": bounded(0, 10),
  111. })
  112. runtimeContExpectations := sysContExpectations().(*gstruct.FieldsMatcher)
  113. if systemdutil.IsRunningSystemd() && framework.TestContext.ContainerRuntime == "docker" {
  114. // Some Linux distributions still ship a docker.service that is missing
  115. // a `Delegate=yes` setting (or equivalent CPUAccounting= and MemoryAccounting=)
  116. // that allows us to monitor the container runtime resource usage through
  117. // the "cpu" and "memory" cgroups.
  118. //
  119. // Make an exception here for those distros, only for Docker, so that they
  120. // can pass the full node e2e tests even in that case.
  121. //
  122. // For newer container runtimes (using CRI) and even distros that still
  123. // ship Docker, we should encourage them to always set `Delegate=yes` in
  124. // order to make monitoring of the runtime possible.
  125. stdout, err := exec.Command("systemctl", "show", "-p", "Delegate", "docker.service").CombinedOutput()
  126. if err == nil && strings.TrimSpace(string(stdout)) == "Delegate=no" {
  127. // Only make these optional if we can successfully confirm that
  128. // Delegate is set to "no" (in other words, unset.) If we fail
  129. // to check that, default to requiring it, which might cause
  130. // false positives, but that should be the safer approach.
  131. ginkgo.By("Making runtime container expectations optional, since systemd was not configured to Delegate=yes the cgroups")
  132. runtimeContExpectations.Fields["Memory"] = gomega.Or(gomega.BeNil(), runtimeContExpectations.Fields["Memory"])
  133. runtimeContExpectations.Fields["CPU"] = gomega.Or(gomega.BeNil(), runtimeContExpectations.Fields["CPU"])
  134. }
  135. }
  136. systemContainers := gstruct.Elements{
  137. "kubelet": sysContExpectations(),
  138. "runtime": runtimeContExpectations,
  139. "pods": podsContExpectations,
  140. }
  141. // The Kubelet only manages the 'misc' system container if the host is not running systemd.
  142. if !systemdutil.IsRunningSystemd() {
  143. framework.Logf("Host not running systemd; expecting 'misc' system container.")
  144. miscContExpectations := sysContExpectations().(*gstruct.FieldsMatcher)
  145. // Misc processes are system-dependent, so relax the memory constraints.
  146. miscContExpectations.Fields["Memory"] = ptrMatchAllFields(gstruct.Fields{
  147. "Time": recent(maxStatsAge),
  148. // We don't limit system container memory.
  149. "AvailableBytes": gomega.BeNil(),
  150. "UsageBytes": bounded(100*volume.Kb, memoryLimit),
  151. "WorkingSetBytes": bounded(100*volume.Kb, memoryLimit),
  152. "RSSBytes": bounded(100*volume.Kb, memoryLimit),
  153. "PageFaults": bounded(1000, 1e9),
  154. "MajorPageFaults": bounded(0, 100000),
  155. })
  156. systemContainers["misc"] = miscContExpectations
  157. }
  158. // Expectations for pods.
  159. podExpectations := gstruct.MatchAllFields(gstruct.Fields{
  160. "PodRef": gstruct.Ignore(),
  161. "StartTime": recent(maxStartAge),
  162. "Containers": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{
  163. "busybox-container": gstruct.MatchAllFields(gstruct.Fields{
  164. "Name": gomega.Equal("busybox-container"),
  165. "StartTime": recent(maxStartAge),
  166. "CPU": ptrMatchAllFields(gstruct.Fields{
  167. "Time": recent(maxStatsAge),
  168. "UsageNanoCores": bounded(10000, 1e9),
  169. "UsageCoreNanoSeconds": bounded(10000000, 1e11),
  170. }),
  171. "Memory": ptrMatchAllFields(gstruct.Fields{
  172. "Time": recent(maxStatsAge),
  173. "AvailableBytes": bounded(1*volume.Kb, 80*volume.Mb),
  174. "UsageBytes": bounded(10*volume.Kb, 80*volume.Mb),
  175. "WorkingSetBytes": bounded(10*volume.Kb, 80*volume.Mb),
  176. "RSSBytes": bounded(1*volume.Kb, 80*volume.Mb),
  177. "PageFaults": bounded(100, 1000000),
  178. "MajorPageFaults": bounded(0, 10),
  179. }),
  180. "Accelerators": gomega.BeEmpty(),
  181. "Rootfs": ptrMatchAllFields(gstruct.Fields{
  182. "Time": recent(maxStatsAge),
  183. "AvailableBytes": fsCapacityBounds,
  184. "CapacityBytes": fsCapacityBounds,
  185. "UsedBytes": bounded(volume.Kb, 10*volume.Mb),
  186. "InodesFree": bounded(1e4, 1e8),
  187. "Inodes": bounded(1e4, 1e8),
  188. "InodesUsed": bounded(0, 1e8),
  189. }),
  190. "Logs": ptrMatchAllFields(gstruct.Fields{
  191. "Time": recent(maxStatsAge),
  192. "AvailableBytes": fsCapacityBounds,
  193. "CapacityBytes": fsCapacityBounds,
  194. "UsedBytes": bounded(volume.Kb, 10*volume.Mb),
  195. "InodesFree": bounded(1e4, 1e8),
  196. "Inodes": bounded(1e4, 1e8),
  197. "InodesUsed": bounded(0, 1e8),
  198. }),
  199. "UserDefinedMetrics": gomega.BeEmpty(),
  200. }),
  201. }),
  202. "Network": ptrMatchAllFields(gstruct.Fields{
  203. "Time": recent(maxStatsAge),
  204. "InterfaceStats": gstruct.MatchAllFields(gstruct.Fields{
  205. "Name": gomega.Equal("eth0"),
  206. "RxBytes": bounded(10, 10*volume.Mb),
  207. "RxErrors": bounded(0, 1000),
  208. "TxBytes": bounded(10, 10*volume.Mb),
  209. "TxErrors": bounded(0, 1000),
  210. }),
  211. "Interfaces": gomega.Not(gomega.BeNil()),
  212. }),
  213. "CPU": ptrMatchAllFields(gstruct.Fields{
  214. "Time": recent(maxStatsAge),
  215. "UsageNanoCores": bounded(10000, 1e9),
  216. "UsageCoreNanoSeconds": bounded(10000000, 1e11),
  217. }),
  218. "Memory": ptrMatchAllFields(gstruct.Fields{
  219. "Time": recent(maxStatsAge),
  220. "AvailableBytes": bounded(1*volume.Kb, 80*volume.Mb),
  221. "UsageBytes": bounded(10*volume.Kb, 80*volume.Mb),
  222. "WorkingSetBytes": bounded(10*volume.Kb, 80*volume.Mb),
  223. "RSSBytes": bounded(1*volume.Kb, 80*volume.Mb),
  224. "PageFaults": bounded(0, 1000000),
  225. "MajorPageFaults": bounded(0, 10),
  226. }),
  227. "VolumeStats": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{
  228. "test-empty-dir": gstruct.MatchAllFields(gstruct.Fields{
  229. "Name": gomega.Equal("test-empty-dir"),
  230. "PVCRef": gomega.BeNil(),
  231. "FsStats": gstruct.MatchAllFields(gstruct.Fields{
  232. "Time": recent(maxStatsAge),
  233. "AvailableBytes": fsCapacityBounds,
  234. "CapacityBytes": fsCapacityBounds,
  235. "UsedBytes": bounded(volume.Kb, 1*volume.Mb),
  236. "InodesFree": bounded(1e4, 1e8),
  237. "Inodes": bounded(1e4, 1e8),
  238. "InodesUsed": bounded(0, 1e8),
  239. }),
  240. }),
  241. }),
  242. "EphemeralStorage": ptrMatchAllFields(gstruct.Fields{
  243. "Time": recent(maxStatsAge),
  244. "AvailableBytes": fsCapacityBounds,
  245. "CapacityBytes": fsCapacityBounds,
  246. "UsedBytes": bounded(volume.Kb, 21*volume.Mb),
  247. "InodesFree": bounded(1e4, 1e8),
  248. "Inodes": bounded(1e4, 1e8),
  249. "InodesUsed": bounded(0, 1e8),
  250. }),
  251. })
  252. matchExpectations := ptrMatchAllFields(gstruct.Fields{
  253. "Node": gstruct.MatchAllFields(gstruct.Fields{
  254. "NodeName": gomega.Equal(framework.TestContext.NodeName),
  255. "StartTime": recent(maxStartAge),
  256. "SystemContainers": gstruct.MatchAllElements(summaryObjectID, systemContainers),
  257. "CPU": ptrMatchAllFields(gstruct.Fields{
  258. "Time": recent(maxStatsAge),
  259. "UsageNanoCores": bounded(100e3, 2e9),
  260. "UsageCoreNanoSeconds": bounded(1e9, 1e15),
  261. }),
  262. "Memory": ptrMatchAllFields(gstruct.Fields{
  263. "Time": recent(maxStatsAge),
  264. "AvailableBytes": bounded(100*volume.Mb, memoryLimit),
  265. "UsageBytes": bounded(10*volume.Mb, memoryLimit),
  266. "WorkingSetBytes": bounded(10*volume.Mb, memoryLimit),
  267. // this now returns /sys/fs/cgroup/memory.stat total_rss
  268. "RSSBytes": bounded(1*volume.Kb, memoryLimit),
  269. "PageFaults": bounded(1000, 1e9),
  270. "MajorPageFaults": bounded(0, 100000),
  271. }),
  272. // TODO(#28407): Handle non-eth0 network interface names.
  273. "Network": ptrMatchAllFields(gstruct.Fields{
  274. "Time": recent(maxStatsAge),
  275. "InterfaceStats": gstruct.MatchAllFields(gstruct.Fields{
  276. "Name": gomega.Or(gomega.BeEmpty(), gomega.Equal("eth0")),
  277. "RxBytes": gomega.Or(gomega.BeNil(), bounded(1*volume.Mb, 100*volume.Gb)),
  278. "RxErrors": gomega.Or(gomega.BeNil(), bounded(0, 100000)),
  279. "TxBytes": gomega.Or(gomega.BeNil(), bounded(10*volume.Kb, 10*volume.Gb)),
  280. "TxErrors": gomega.Or(gomega.BeNil(), bounded(0, 100000)),
  281. }),
  282. "Interfaces": gomega.Not(gomega.BeNil()),
  283. }),
  284. "Fs": ptrMatchAllFields(gstruct.Fields{
  285. "Time": recent(maxStatsAge),
  286. "AvailableBytes": fsCapacityBounds,
  287. "CapacityBytes": fsCapacityBounds,
  288. // we assume we are not running tests on machines < 10tb of disk
  289. "UsedBytes": bounded(volume.Kb, 10*volume.Tb),
  290. "InodesFree": bounded(1e4, 1e8),
  291. "Inodes": bounded(1e4, 1e8),
  292. "InodesUsed": bounded(0, 1e8),
  293. }),
  294. "Runtime": ptrMatchAllFields(gstruct.Fields{
  295. "ImageFs": ptrMatchAllFields(gstruct.Fields{
  296. "Time": recent(maxStatsAge),
  297. "AvailableBytes": fsCapacityBounds,
  298. "CapacityBytes": fsCapacityBounds,
  299. // we assume we are not running tests on machines < 10tb of disk
  300. "UsedBytes": bounded(volume.Kb, 10*volume.Tb),
  301. "InodesFree": bounded(1e4, 1e8),
  302. "Inodes": bounded(1e4, 1e8),
  303. "InodesUsed": bounded(0, 1e8),
  304. }),
  305. }),
  306. "Rlimit": ptrMatchAllFields(gstruct.Fields{
  307. "Time": recent(maxStatsAge),
  308. "MaxPID": bounded(0, 1e8),
  309. "NumOfRunningProcesses": bounded(0, 1e8),
  310. }),
  311. }),
  312. // Ignore extra pods since the tests run in parallel.
  313. "Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
  314. fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): podExpectations,
  315. fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): podExpectations,
  316. }),
  317. })
  318. ginkgo.By("Validating /stats/summary")
  319. // Give pods a minute to actually start up.
  320. gomega.Eventually(getNodeSummary, 1*time.Minute, 15*time.Second).Should(matchExpectations)
  321. // Then the summary should match the expectations a few more times.
  322. gomega.Consistently(getNodeSummary, 30*time.Second, 15*time.Second).Should(matchExpectations)
  323. })
  324. })
  325. })
  326. func getSummaryTestPods(f *framework.Framework, numRestarts int32, names ...string) []*v1.Pod {
  327. pods := make([]*v1.Pod, 0, len(names))
  328. for _, name := range names {
  329. pods = append(pods, &v1.Pod{
  330. ObjectMeta: metav1.ObjectMeta{
  331. Name: name,
  332. },
  333. Spec: v1.PodSpec{
  334. RestartPolicy: v1.RestartPolicyAlways,
  335. Containers: []v1.Container{
  336. {
  337. Name: "busybox-container",
  338. Image: busyboxImage,
  339. Command: getRestartingContainerCommand("/test-empty-dir-mnt", 0, numRestarts, "echo 'some bytes' >/outside_the_volume.txt; ping -c 1 google.com; echo 'hello world' >> /test-empty-dir-mnt/file;"),
  340. Resources: v1.ResourceRequirements{
  341. Limits: v1.ResourceList{
  342. // Must set memory limit to get MemoryStats.AvailableBytes
  343. v1.ResourceMemory: resource.MustParse("80M"),
  344. },
  345. },
  346. VolumeMounts: []v1.VolumeMount{
  347. {MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"},
  348. },
  349. },
  350. },
  351. SecurityContext: &v1.PodSecurityContext{
  352. SELinuxOptions: &v1.SELinuxOptions{
  353. Level: "s0",
  354. },
  355. },
  356. Volumes: []v1.Volume{
  357. // TODO(#28393): Test secret volumes
  358. // TODO(#28394): Test hostpath volumes
  359. {Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}},
  360. },
  361. },
  362. })
  363. }
  364. return pods
  365. }
  366. // Mapping function for gstruct.MatchAllElements
  367. func summaryObjectID(element interface{}) string {
  368. switch el := element.(type) {
  369. case kubeletstatsv1alpha1.PodStats:
  370. return fmt.Sprintf("%s::%s", el.PodRef.Namespace, el.PodRef.Name)
  371. case kubeletstatsv1alpha1.ContainerStats:
  372. return el.Name
  373. case kubeletstatsv1alpha1.VolumeStats:
  374. return el.Name
  375. case kubeletstatsv1alpha1.UserDefinedMetric:
  376. return el.Name
  377. default:
  378. framework.Failf("Unknown type: %T", el)
  379. return "???"
  380. }
  381. }
  382. // Convenience functions for common matcher combinations.
  383. func ptrMatchAllFields(fields gstruct.Fields) types.GomegaMatcher {
  384. return gstruct.PointTo(gstruct.MatchAllFields(fields))
  385. }
  386. func bounded(lower, upper interface{}) types.GomegaMatcher {
  387. return gstruct.PointTo(gomega.And(
  388. gomega.BeNumerically(">=", lower),
  389. gomega.BeNumerically("<=", upper)))
  390. }
  391. func recent(d time.Duration) types.GomegaMatcher {
  392. return gomega.WithTransform(func(t metav1.Time) time.Time {
  393. return t.Time
  394. }, gomega.And(
  395. gomega.BeTemporally(">=", time.Now().Add(-d)),
  396. // Now() is the test start time, not the match time, so permit a few extra minutes.
  397. gomega.BeTemporally("<", time.Now().Add(2*time.Minute))))
  398. }
  399. func recordSystemCgroupProcesses() {
  400. cfg, err := getCurrentKubeletConfig()
  401. if err != nil {
  402. framework.Logf("Failed to read kubelet config: %v", err)
  403. return
  404. }
  405. cgroups := map[string]string{
  406. "kubelet": cfg.KubeletCgroups,
  407. "misc": cfg.SystemCgroups,
  408. }
  409. for name, cgroup := range cgroups {
  410. if cgroup == "" {
  411. framework.Logf("Skipping unconfigured cgroup %s", name)
  412. continue
  413. }
  414. pids, err := ioutil.ReadFile(fmt.Sprintf("/sys/fs/cgroup/cpu/%s/cgroup.procs", cgroup))
  415. if err != nil {
  416. framework.Logf("Failed to read processes in cgroup %s: %v", name, err)
  417. continue
  418. }
  419. framework.Logf("Processes in %s cgroup (%s):", name, cgroup)
  420. for _, pid := range strings.Fields(string(pids)) {
  421. path := fmt.Sprintf("/proc/%s/cmdline", pid)
  422. cmd, err := ioutil.ReadFile(path)
  423. if err != nil {
  424. framework.Logf(" ginkgo.Failed to read %s: %v", path, err)
  425. } else {
  426. framework.Logf(" %s", cmd)
  427. }
  428. }
  429. }
  430. }