eviction_test.go 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2e_node
  14. import (
  15. "fmt"
  16. "path/filepath"
  17. "strconv"
  18. "strings"
  19. "time"
  20. "k8s.io/api/core/v1"
  21. schedulerapi "k8s.io/api/scheduling/v1"
  22. "k8s.io/apimachinery/pkg/api/errors"
  23. "k8s.io/apimachinery/pkg/api/resource"
  24. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  25. "k8s.io/apimachinery/pkg/fields"
  26. kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
  27. stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  28. "k8s.io/kubernetes/pkg/kubelet/eviction"
  29. evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
  30. kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
  31. kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
  32. "k8s.io/kubernetes/test/e2e/framework"
  33. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  34. testutils "k8s.io/kubernetes/test/utils"
  35. imageutils "k8s.io/kubernetes/test/utils/image"
  36. . "github.com/onsi/ginkgo"
  37. . "github.com/onsi/gomega"
  38. )
  39. // Eviction Policy is described here:
  40. // https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/kubelet-eviction.md
  41. const (
  42. postTestConditionMonitoringPeriod = 1 * time.Minute
  43. evictionPollInterval = 2 * time.Second
  44. pressureDissapearTimeout = 1 * time.Minute
  45. // pressure conditions often surface after evictions because the kubelet only updates
  46. // node conditions periodically.
  47. // we wait this period after evictions to make sure that we wait out this delay
  48. pressureDelay = 20 * time.Second
  49. testContextFmt = "when we run containers that should cause %s"
  50. noPressure = v1.NodeConditionType("NoPressure")
  51. lotsOfDisk = 10240 // 10 Gb in Mb
  52. lotsOfFiles = 1000000000 // 1 billion
  53. resourceInodes = v1.ResourceName("inodes")
  54. noStarvedResource = v1.ResourceName("none")
  55. )
  56. // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
  57. // Node disk pressure is induced by consuming all inodes on the node.
  58. var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  59. f := framework.NewDefaultFramework("inode-eviction-test")
  60. expectedNodeCondition := v1.NodeDiskPressure
  61. expectedStarvedResource := resourceInodes
  62. pressureTimeout := 15 * time.Minute
  63. inodesConsumed := uint64(200000)
  64. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  65. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  66. // Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
  67. summary := eventuallyGetSummary()
  68. inodesFree := *summary.Node.Fs.InodesFree
  69. if inodesFree <= inodesConsumed {
  70. framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
  71. }
  72. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
  73. initialConfig.EvictionMinimumReclaim = map[string]string{}
  74. })
  75. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
  76. {
  77. evictionPriority: 1,
  78. pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
  79. },
  80. {
  81. evictionPriority: 1,
  82. pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
  83. },
  84. {
  85. evictionPriority: 0,
  86. pod: innocentPod(),
  87. },
  88. })
  89. })
  90. })
  91. // ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
  92. // Disk pressure is induced by pulling large images
  93. var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  94. f := framework.NewDefaultFramework("image-gc-eviction-test")
  95. pressureTimeout := 10 * time.Minute
  96. expectedNodeCondition := v1.NodeDiskPressure
  97. expectedStarvedResource := resourceInodes
  98. inodesConsumed := uint64(100000)
  99. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  100. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  101. // Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
  102. summary := eventuallyGetSummary()
  103. inodesFree := *summary.Node.Fs.InodesFree
  104. if inodesFree <= inodesConsumed {
  105. framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
  106. }
  107. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
  108. initialConfig.EvictionMinimumReclaim = map[string]string{}
  109. })
  110. // Consume enough inodes to induce disk pressure,
  111. // but expect that image garbage collection can reduce it enough to avoid an eviction
  112. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
  113. {
  114. evictionPriority: 0,
  115. pod: inodeConsumingPod("container-inode", 110000, nil),
  116. },
  117. })
  118. })
  119. })
  120. // MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
  121. // Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
  122. var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  123. f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
  124. expectedNodeCondition := v1.NodeMemoryPressure
  125. expectedStarvedResource := v1.ResourceMemory
  126. pressureTimeout := 10 * time.Minute
  127. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  128. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  129. // Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
  130. kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
  131. // The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
  132. // We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
  133. kubeReserved.Sub(resource.MustParse("300Mi"))
  134. initialConfig.KubeReserved = map[string]string{
  135. string(v1.ResourceMemory): kubeReserved.String(),
  136. }
  137. initialConfig.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableEnforcementKey}
  138. initialConfig.CgroupsPerQOS = true
  139. })
  140. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, []podEvictSpec{
  141. {
  142. evictionPriority: 1,
  143. pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
  144. },
  145. {
  146. evictionPriority: 0,
  147. pod: innocentPod(),
  148. },
  149. })
  150. })
  151. })
  152. // LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
  153. // Disk pressure is induced by running pods which consume disk space.
  154. var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  155. f := framework.NewDefaultFramework("localstorage-eviction-test")
  156. pressureTimeout := 10 * time.Minute
  157. expectedNodeCondition := v1.NodeDiskPressure
  158. expectedStarvedResource := v1.ResourceEphemeralStorage
  159. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  160. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  161. diskConsumed := resource.MustParse("200Mi")
  162. summary := eventuallyGetSummary()
  163. availableBytes := *(summary.Node.Fs.AvailableBytes)
  164. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
  165. initialConfig.EvictionMinimumReclaim = map[string]string{}
  166. })
  167. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
  168. {
  169. evictionPriority: 1,
  170. pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}),
  171. },
  172. {
  173. evictionPriority: 0,
  174. pod: innocentPod(),
  175. },
  176. })
  177. })
  178. })
  179. // LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
  180. // Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
  181. // Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
  182. var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  183. f := framework.NewDefaultFramework("localstorage-eviction-test")
  184. pressureTimeout := 10 * time.Minute
  185. expectedNodeCondition := v1.NodeDiskPressure
  186. expectedStarvedResource := v1.ResourceEphemeralStorage
  187. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  188. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  189. diskConsumed := resource.MustParse("200Mi")
  190. summary := eventuallyGetSummary()
  191. availableBytes := *(summary.Node.Fs.AvailableBytes)
  192. if availableBytes <= uint64(diskConsumed.Value()) {
  193. framework.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run")
  194. }
  195. initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
  196. initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "1m"}
  197. // Defer to the pod default grace period
  198. initialConfig.EvictionMaxPodGracePeriod = 30
  199. initialConfig.EvictionMinimumReclaim = map[string]string{}
  200. // Ensure that pods are not evicted because of the eviction-hard threshold
  201. // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
  202. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
  203. })
  204. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
  205. {
  206. evictionPriority: 1,
  207. pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}),
  208. },
  209. {
  210. evictionPriority: 0,
  211. pod: innocentPod(),
  212. },
  213. })
  214. })
  215. })
  216. // LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
  217. var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Feature:LocalStorageCapacityIsolation][NodeFeature:Eviction]", func() {
  218. f := framework.NewDefaultFramework("localstorage-eviction-test")
  219. evictionTestTimeout := 10 * time.Minute
  220. Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
  221. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  222. // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
  223. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
  224. })
  225. sizeLimit := resource.MustParse("100Mi")
  226. useOverLimit := 101 /* Mb */
  227. useUnderLimit := 99 /* Mb */
  228. containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit}
  229. runEvictionTest(f, evictionTestTimeout, noPressure, noStarvedResource, logDiskMetrics, []podEvictSpec{
  230. {
  231. evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation
  232. pod: diskConsumingPod("emptydir-disk-sizelimit", useOverLimit, &v1.VolumeSource{
  233. EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit},
  234. }, v1.ResourceRequirements{}),
  235. },
  236. {
  237. evictionPriority: 1, // This pod should be evicted because of memory emptyDir usage violation
  238. pod: diskConsumingPod("emptydir-memory-sizelimit", useOverLimit, &v1.VolumeSource{
  239. EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit},
  240. }, v1.ResourceRequirements{}),
  241. },
  242. {
  243. evictionPriority: 1, // This pod should cross the container limit by writing to its writable layer.
  244. pod: diskConsumingPod("container-disk-limit", useOverLimit, nil, v1.ResourceRequirements{Limits: containerLimit}),
  245. },
  246. {
  247. evictionPriority: 1, // This pod should hit the container limit by writing to an emptydir
  248. pod: diskConsumingPod("container-emptydir-disk-limit", useOverLimit, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}},
  249. v1.ResourceRequirements{Limits: containerLimit}),
  250. },
  251. {
  252. evictionPriority: 0, // This pod should not be evicted because it uses less than its limit
  253. pod: diskConsumingPod("emptydir-disk-below-sizelimit", useUnderLimit, &v1.VolumeSource{
  254. EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit},
  255. }, v1.ResourceRequirements{}),
  256. },
  257. {
  258. evictionPriority: 0, // This pod should not be evicted because it uses less than its limit
  259. pod: diskConsumingPod("container-disk-below-sizelimit", useUnderLimit, nil, v1.ResourceRequirements{Limits: containerLimit}),
  260. },
  261. })
  262. })
  263. })
  264. // PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods.
  265. // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before
  266. // the higher priority pod.
  267. var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  268. f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test")
  269. expectedNodeCondition := v1.NodeMemoryPressure
  270. expectedStarvedResource := v1.ResourceMemory
  271. pressureTimeout := 10 * time.Minute
  272. highPriorityClassName := f.BaseName + "-high-priority"
  273. highPriority := int32(999999999)
  274. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  275. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  276. memoryConsumed := resource.MustParse("600Mi")
  277. summary := eventuallyGetSummary()
  278. availableBytes := *(summary.Node.Memory.AvailableBytes)
  279. if availableBytes <= uint64(memoryConsumed.Value()) {
  280. framework.Skipf("Too little memory free on the host for the PriorityMemoryEvictionOrdering test to run")
  281. }
  282. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))}
  283. initialConfig.EvictionMinimumReclaim = map[string]string{}
  284. })
  285. BeforeEach(func() {
  286. _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
  287. Expect(err == nil || errors.IsAlreadyExists(err)).To(BeTrue())
  288. })
  289. AfterEach(func() {
  290. err := f.ClientSet.SchedulingV1().PriorityClasses().Delete(highPriorityClassName, &metav1.DeleteOptions{})
  291. Expect(err).NotTo(HaveOccurred())
  292. })
  293. specs := []podEvictSpec{
  294. {
  295. evictionPriority: 2,
  296. pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
  297. },
  298. {
  299. evictionPriority: 1,
  300. pod: getMemhogPod("high-priority-memory-hog-pod", "high-priority-memory-hog", v1.ResourceRequirements{}),
  301. },
  302. {
  303. evictionPriority: 0,
  304. pod: getMemhogPod("guaranteed-pod", "guaranteed-pod", v1.ResourceRequirements{
  305. Requests: v1.ResourceList{
  306. v1.ResourceMemory: resource.MustParse("300Mi"),
  307. },
  308. Limits: v1.ResourceList{
  309. v1.ResourceMemory: resource.MustParse("300Mi"),
  310. },
  311. }),
  312. },
  313. }
  314. specs[1].pod.Spec.PriorityClassName = highPriorityClassName
  315. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, specs)
  316. })
  317. })
  318. // PriorityLocalStorageEvictionOrdering tests that the node responds to node disk pressure by evicting pods.
  319. // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before
  320. // the higher priority pod.
  321. var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  322. f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test")
  323. expectedNodeCondition := v1.NodeDiskPressure
  324. expectedStarvedResource := v1.ResourceEphemeralStorage
  325. pressureTimeout := 10 * time.Minute
  326. highPriorityClassName := f.BaseName + "-high-priority"
  327. highPriority := int32(999999999)
  328. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  329. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  330. diskConsumed := resource.MustParse("350Mi")
  331. summary := eventuallyGetSummary()
  332. availableBytes := *(summary.Node.Fs.AvailableBytes)
  333. if availableBytes <= uint64(diskConsumed.Value()) {
  334. framework.Skipf("Too little disk free on the host for the PriorityLocalStorageEvictionOrdering test to run")
  335. }
  336. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
  337. initialConfig.EvictionMinimumReclaim = map[string]string{}
  338. })
  339. BeforeEach(func() {
  340. _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
  341. Expect(err == nil || errors.IsAlreadyExists(err)).To(BeTrue())
  342. })
  343. AfterEach(func() {
  344. err := f.ClientSet.SchedulingV1().PriorityClasses().Delete(highPriorityClassName, &metav1.DeleteOptions{})
  345. Expect(err).NotTo(HaveOccurred())
  346. })
  347. specs := []podEvictSpec{
  348. {
  349. evictionPriority: 2,
  350. pod: diskConsumingPod("best-effort-disk", lotsOfDisk, nil, v1.ResourceRequirements{}),
  351. },
  352. {
  353. evictionPriority: 1,
  354. pod: diskConsumingPod("high-priority-disk", lotsOfDisk, nil, v1.ResourceRequirements{}),
  355. },
  356. {
  357. evictionPriority: 0,
  358. // Only require 99% accuracy (297/300 Mb) because on some OS distributions, the file itself (excluding contents), consumes disk space.
  359. pod: diskConsumingPod("guaranteed-disk", 297 /* Mb */, nil, v1.ResourceRequirements{
  360. Requests: v1.ResourceList{
  361. v1.ResourceEphemeralStorage: resource.MustParse("300Mi"),
  362. },
  363. Limits: v1.ResourceList{
  364. v1.ResourceEphemeralStorage: resource.MustParse("300Mi"),
  365. },
  366. }),
  367. },
  368. }
  369. specs[1].pod.Spec.PriorityClassName = highPriorityClassName
  370. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, specs)
  371. })
  372. })
  373. // PriorityPidEvictionOrdering tests that the node emits pid pressure in response to a fork bomb, and evicts pods by priority
  374. var _ = framework.KubeDescribe("PriorityPidEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
  375. f := framework.NewDefaultFramework("pidpressure-eviction-test")
  376. pressureTimeout := 2 * time.Minute
  377. expectedNodeCondition := v1.NodePIDPressure
  378. expectedStarvedResource := noStarvedResource
  379. highPriorityClassName := f.BaseName + "-high-priority"
  380. highPriority := int32(999999999)
  381. Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
  382. tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
  383. pidsConsumed := int64(10000)
  384. summary := eventuallyGetSummary()
  385. availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
  386. initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
  387. initialConfig.EvictionMinimumReclaim = map[string]string{}
  388. })
  389. BeforeEach(func() {
  390. _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
  391. Expect(err == nil || errors.IsAlreadyExists(err)).To(BeTrue())
  392. })
  393. AfterEach(func() {
  394. err := f.ClientSet.SchedulingV1().PriorityClasses().Delete(highPriorityClassName, &metav1.DeleteOptions{})
  395. Expect(err).NotTo(HaveOccurred())
  396. })
  397. specs := []podEvictSpec{
  398. {
  399. evictionPriority: 1,
  400. pod: pidConsumingPod("fork-bomb-container", 12000),
  401. },
  402. {
  403. evictionPriority: 0,
  404. pod: innocentPod(),
  405. },
  406. }
  407. specs[1].pod.Spec.PriorityClassName = highPriorityClassName
  408. runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
  409. })
  410. })
  411. // Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
  412. type podEvictSpec struct {
  413. // P0 should never be evicted, P1 shouldn't evict before P2, etc.
  414. // If two are ranked at P1, either is permitted to fail before the other.
  415. // The test ends when all pods other than p0 have been evicted
  416. evictionPriority int
  417. pod *v1.Pod
  418. }
  419. // runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
  420. // It ensures that the desired expectedNodeCondition is actually triggered.
  421. // It ensures that evictionPriority 0 pods are not evicted
  422. // It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
  423. // It ensures that all pods with non-zero evictionPriority are eventually evicted.
  424. // runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists
  425. func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, expectedStarvedResource v1.ResourceName, logFunc func(), testSpecs []podEvictSpec) {
  426. // Place the remainder of the test within a context so that the kubelet config is set before and after the test.
  427. Context("", func() {
  428. BeforeEach(func() {
  429. // reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure
  430. reduceAllocatableMemoryUsage()
  431. // Nodes do not immediately report local storage capacity
  432. // Sleep so that pods requesting local storage do not fail to schedule
  433. time.Sleep(30 * time.Second)
  434. By("seting up pods to be used by tests")
  435. pods := []*v1.Pod{}
  436. for _, spec := range testSpecs {
  437. pods = append(pods, spec.pod)
  438. }
  439. f.PodClient().CreateBatch(pods)
  440. })
  441. It("should eventually evict all of the correct pods", func() {
  442. By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition))
  443. Eventually(func() error {
  444. logFunc()
  445. if expectedNodeCondition == noPressure || hasNodeCondition(f, expectedNodeCondition) {
  446. return nil
  447. }
  448. return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition)
  449. }, pressureTimeout, evictionPollInterval).Should(BeNil())
  450. By("Waiting for evictions to occur")
  451. Eventually(func() error {
  452. if expectedNodeCondition != noPressure {
  453. if hasNodeCondition(f, expectedNodeCondition) {
  454. e2elog.Logf("Node has %s", expectedNodeCondition)
  455. } else {
  456. e2elog.Logf("Node does NOT have %s", expectedNodeCondition)
  457. }
  458. }
  459. logKubeletLatencyMetrics(kubeletmetrics.EvictionStatsAgeKey)
  460. logFunc()
  461. return verifyEvictionOrdering(f, testSpecs)
  462. }, pressureTimeout, evictionPollInterval).Should(BeNil())
  463. // We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
  464. // This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
  465. // evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay
  466. By("making sure pressure from test has surfaced before continuing")
  467. time.Sleep(pressureDelay)
  468. By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition))
  469. Eventually(func() error {
  470. logFunc()
  471. logKubeletLatencyMetrics(kubeletmetrics.EvictionStatsAgeKey)
  472. if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
  473. return fmt.Errorf("Conditions havent returned to normal, node still has %s", expectedNodeCondition)
  474. }
  475. return nil
  476. }, pressureDissapearTimeout, evictionPollInterval).Should(BeNil())
  477. By("checking for stable, pressure-free condition without unexpected pod failures")
  478. Consistently(func() error {
  479. if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
  480. return fmt.Errorf("%s dissappeared and then reappeared", expectedNodeCondition)
  481. }
  482. logFunc()
  483. logKubeletLatencyMetrics(kubeletmetrics.EvictionStatsAgeKey)
  484. return verifyEvictionOrdering(f, testSpecs)
  485. }, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
  486. By("checking for correctly formatted eviction events")
  487. verifyEvictionEvents(f, testSpecs, expectedStarvedResource)
  488. })
  489. AfterEach(func() {
  490. By("deleting pods")
  491. for _, spec := range testSpecs {
  492. By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
  493. f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
  494. }
  495. reduceAllocatableMemoryUsage()
  496. if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages {
  497. // The disk eviction test may cause the prepulled images to be evicted,
  498. // prepull those images again to ensure this test not affect following tests.
  499. PrePullAllImages()
  500. }
  501. By("making sure we can start a new pod after the test")
  502. podName := "test-admit-pod"
  503. f.PodClient().CreateSync(&v1.Pod{
  504. ObjectMeta: metav1.ObjectMeta{
  505. Name: podName,
  506. },
  507. Spec: v1.PodSpec{
  508. RestartPolicy: v1.RestartPolicyNever,
  509. Containers: []v1.Container{
  510. {
  511. Image: imageutils.GetPauseImageName(),
  512. Name: podName,
  513. },
  514. },
  515. },
  516. })
  517. if CurrentGinkgoTestDescription().Failed {
  518. if framework.TestContext.DumpLogsOnFailure {
  519. logPodEvents(f)
  520. logNodeEvents(f)
  521. }
  522. }
  523. })
  524. })
  525. }
  526. // verifyEvictionOrdering returns an error if all non-zero priority pods have not been evicted, nil otherwise
  527. // This function panics (via Expect) if eviction ordering is violated, or if a priority-zero pod fails.
  528. func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) error {
  529. // Gather current information
  530. updatedPodList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(metav1.ListOptions{})
  531. if err != nil {
  532. return err
  533. }
  534. updatedPods := updatedPodList.Items
  535. for _, p := range updatedPods {
  536. e2elog.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
  537. }
  538. By("checking eviction ordering and ensuring important pods dont fail")
  539. done := true
  540. for _, priorityPodSpec := range testSpecs {
  541. var priorityPod v1.Pod
  542. for _, p := range updatedPods {
  543. if p.Name == priorityPodSpec.pod.Name {
  544. priorityPod = p
  545. }
  546. }
  547. Expect(priorityPod).NotTo(BeNil())
  548. Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodSucceeded),
  549. fmt.Sprintf("pod: %s succeeded unexpectedly", priorityPod.Name))
  550. // Check eviction ordering.
  551. // Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
  552. // but never alright for a priority 1 pod to fail while the priority 2 pod is still running
  553. for _, lowPriorityPodSpec := range testSpecs {
  554. var lowPriorityPod v1.Pod
  555. for _, p := range updatedPods {
  556. if p.Name == lowPriorityPodSpec.pod.Name {
  557. lowPriorityPod = p
  558. }
  559. }
  560. Expect(lowPriorityPod).NotTo(BeNil())
  561. if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
  562. Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
  563. fmt.Sprintf("priority %d pod: %s failed before priority %d pod: %s",
  564. priorityPodSpec.evictionPriority, priorityPodSpec.pod.Name, lowPriorityPodSpec.evictionPriority, lowPriorityPodSpec.pod.Name))
  565. }
  566. }
  567. if priorityPod.Status.Phase == v1.PodFailed {
  568. Expect(priorityPod.Status.Reason, eviction.Reason, "pod %s failed; expected Status.Reason to be %s, but got %s",
  569. priorityPod.Name, eviction.Reason, priorityPod.Status.Reason)
  570. }
  571. // EvictionPriority 0 pods should not fail
  572. if priorityPodSpec.evictionPriority == 0 {
  573. Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
  574. fmt.Sprintf("priority 0 pod: %s failed", priorityPod.Name))
  575. }
  576. // If a pod that is not evictionPriority 0 has not been evicted, we are not done
  577. if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
  578. done = false
  579. }
  580. }
  581. if done {
  582. return nil
  583. }
  584. return fmt.Errorf("pods that should be evicted are still running")
  585. }
  586. func verifyEvictionEvents(f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) {
  587. for _, spec := range testSpecs {
  588. pod := spec.pod
  589. if spec.evictionPriority != 0 {
  590. selector := fields.Set{
  591. "involvedObject.kind": "Pod",
  592. "involvedObject.name": pod.Name,
  593. "involvedObject.namespace": f.Namespace.Name,
  594. "reason": eviction.Reason,
  595. }.AsSelector().String()
  596. podEvictEvents, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(metav1.ListOptions{FieldSelector: selector})
  597. Expect(err).To(BeNil(), "Unexpected error getting events during eviction test: %v", err)
  598. Expect(len(podEvictEvents.Items)).To(Equal(1), "Expected to find 1 eviction event for pod %s, got %d", pod.Name, len(podEvictEvents.Items))
  599. event := podEvictEvents.Items[0]
  600. if expectedStarvedResource != noStarvedResource {
  601. // Check the eviction.StarvedResourceKey
  602. starved, found := event.Annotations[eviction.StarvedResourceKey]
  603. Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found",
  604. pod.Name, expectedStarvedResource)
  605. starvedResource := v1.ResourceName(starved)
  606. Expect(starvedResource).To(Equal(expectedStarvedResource), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead",
  607. pod.Name, expectedStarvedResource, starvedResource)
  608. // We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present
  609. if expectedStarvedResource == v1.ResourceMemory {
  610. // Check the eviction.OffendingContainersKey
  611. offendersString, found := event.Annotations[eviction.OffendingContainersKey]
  612. Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found",
  613. pod.Name)
  614. offendingContainers := strings.Split(offendersString, ",")
  615. Expect(len(offendingContainers)).To(Equal(1), "Expected to find the offending container's usage in the %s annotation, but no container was found",
  616. eviction.OffendingContainersKey)
  617. Expect(offendingContainers[0]).To(Equal(pod.Spec.Containers[0].Name), "Expected to find the offending container: %s's usage in the %s annotation, but found %s instead",
  618. pod.Spec.Containers[0].Name, eviction.OffendingContainersKey, offendingContainers[0])
  619. // Check the eviction.OffendingContainersUsageKey
  620. offendingUsageString, found := event.Annotations[eviction.OffendingContainersUsageKey]
  621. Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found",
  622. pod.Name)
  623. offendingContainersUsage := strings.Split(offendingUsageString, ",")
  624. Expect(len(offendingContainersUsage)).To(Equal(1), "Expected to find the offending container's usage in the %s annotation, but found %+v",
  625. eviction.OffendingContainersUsageKey, offendingContainersUsage)
  626. usageQuantity, err := resource.ParseQuantity(offendingContainersUsage[0])
  627. Expect(err).To(BeNil(), "Expected to be able to parse pod %s's %s annotation as a quantity, but got err: %v", pod.Name, eviction.OffendingContainersUsageKey, err)
  628. request := pod.Spec.Containers[0].Resources.Requests[starvedResource]
  629. Expect(usageQuantity.Cmp(request)).To(Equal(1), "Expected usage of offending container: %s in pod %s to exceed its request %s",
  630. usageQuantity.String(), pod.Name, request.String())
  631. }
  632. }
  633. }
  634. }
  635. }
  636. // Returns TRUE if the node has the node condition, FALSE otherwise
  637. func hasNodeCondition(f *framework.Framework, expectedNodeCondition v1.NodeConditionType) bool {
  638. localNodeStatus := getLocalNode(f).Status
  639. _, actualNodeCondition := testutils.GetNodeCondition(&localNodeStatus, expectedNodeCondition)
  640. Expect(actualNodeCondition).NotTo(BeNil())
  641. return actualNodeCondition.Status == v1.ConditionTrue
  642. }
  643. func logInodeMetrics() {
  644. summary, err := getNodeSummary()
  645. if err != nil {
  646. e2elog.Logf("Error getting summary: %v", err)
  647. return
  648. }
  649. if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil {
  650. e2elog.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree)
  651. }
  652. if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil {
  653. e2elog.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree)
  654. }
  655. for _, pod := range summary.Pods {
  656. e2elog.Logf("Pod: %s", pod.PodRef.Name)
  657. for _, container := range pod.Containers {
  658. if container.Rootfs != nil && container.Rootfs.InodesUsed != nil {
  659. e2elog.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed)
  660. }
  661. }
  662. for _, volume := range pod.VolumeStats {
  663. if volume.FsStats.InodesUsed != nil {
  664. e2elog.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed)
  665. }
  666. }
  667. }
  668. }
  669. func logDiskMetrics() {
  670. summary, err := getNodeSummary()
  671. if err != nil {
  672. e2elog.Logf("Error getting summary: %v", err)
  673. return
  674. }
  675. if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.CapacityBytes != nil && summary.Node.Runtime.ImageFs.AvailableBytes != nil {
  676. e2elog.Logf("imageFsInfo.CapacityBytes: %d, imageFsInfo.AvailableBytes: %d", *summary.Node.Runtime.ImageFs.CapacityBytes, *summary.Node.Runtime.ImageFs.AvailableBytes)
  677. }
  678. if summary.Node.Fs != nil && summary.Node.Fs.CapacityBytes != nil && summary.Node.Fs.AvailableBytes != nil {
  679. e2elog.Logf("rootFsInfo.CapacityBytes: %d, rootFsInfo.AvailableBytes: %d", *summary.Node.Fs.CapacityBytes, *summary.Node.Fs.AvailableBytes)
  680. }
  681. for _, pod := range summary.Pods {
  682. e2elog.Logf("Pod: %s", pod.PodRef.Name)
  683. for _, container := range pod.Containers {
  684. if container.Rootfs != nil && container.Rootfs.UsedBytes != nil {
  685. e2elog.Logf("--- summary Container: %s UsedBytes: %d", container.Name, *container.Rootfs.UsedBytes)
  686. }
  687. }
  688. for _, volume := range pod.VolumeStats {
  689. if volume.FsStats.InodesUsed != nil {
  690. e2elog.Logf("--- summary Volume: %s UsedBytes: %d", volume.Name, *volume.FsStats.UsedBytes)
  691. }
  692. }
  693. }
  694. }
  695. func logMemoryMetrics() {
  696. summary, err := getNodeSummary()
  697. if err != nil {
  698. e2elog.Logf("Error getting summary: %v", err)
  699. return
  700. }
  701. if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
  702. e2elog.Logf("Node.Memory.WorkingSetBytes: %d, Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
  703. }
  704. for _, sysContainer := range summary.Node.SystemContainers {
  705. if sysContainer.Name == stats.SystemContainerPods && sysContainer.Memory != nil && sysContainer.Memory.WorkingSetBytes != nil && sysContainer.Memory.AvailableBytes != nil {
  706. e2elog.Logf("Allocatable.Memory.WorkingSetBytes: %d, Allocatable.Memory.AvailableBytes: %d", *sysContainer.Memory.WorkingSetBytes, *sysContainer.Memory.AvailableBytes)
  707. }
  708. }
  709. for _, pod := range summary.Pods {
  710. e2elog.Logf("Pod: %s", pod.PodRef.Name)
  711. for _, container := range pod.Containers {
  712. if container.Memory != nil && container.Memory.WorkingSetBytes != nil {
  713. e2elog.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes)
  714. }
  715. }
  716. }
  717. }
  718. func logPidMetrics() {
  719. summary, err := getNodeSummary()
  720. if err != nil {
  721. e2elog.Logf("Error getting summary: %v", err)
  722. return
  723. }
  724. if summary.Node.Rlimit != nil && summary.Node.Rlimit.MaxPID != nil && summary.Node.Rlimit.NumOfRunningProcesses != nil {
  725. e2elog.Logf("Node.Rlimit.MaxPID: %d, Node.Rlimit.RunningProcesses: %d", *summary.Node.Rlimit.MaxPID, *summary.Node.Rlimit.NumOfRunningProcesses)
  726. }
  727. }
  728. func eventuallyGetSummary() (s *stats.Summary) {
  729. Eventually(func() error {
  730. summary, err := getNodeSummary()
  731. if err != nil {
  732. return err
  733. }
  734. if summary == nil || summary.Node.Fs == nil || summary.Node.Fs.InodesFree == nil || summary.Node.Fs.AvailableBytes == nil {
  735. return fmt.Errorf("some part of data is nil")
  736. }
  737. s = summary
  738. return nil
  739. }, time.Minute, evictionPollInterval).Should(BeNil())
  740. return
  741. }
  742. // returns a pod that does not use any resources
  743. func innocentPod() *v1.Pod {
  744. return &v1.Pod{
  745. ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"},
  746. Spec: v1.PodSpec{
  747. RestartPolicy: v1.RestartPolicyNever,
  748. Containers: []v1.Container{
  749. {
  750. Image: busyboxImage,
  751. Name: "innocent-container",
  752. Command: []string{
  753. "sh",
  754. "-c",
  755. "while true; do sleep 5; done",
  756. },
  757. },
  758. },
  759. },
  760. }
  761. }
  762. const (
  763. volumeMountPath = "/test-mnt"
  764. volumeName = "test-volume"
  765. )
  766. func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
  767. path := ""
  768. if volumeSource != nil {
  769. path = volumeMountPath
  770. }
  771. // Each iteration creates an empty file
  772. return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, fmt.Sprintf("touch %s${i}.txt; sleep 0.001;", filepath.Join(path, "file")))
  773. }
  774. func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
  775. path := ""
  776. if volumeSource != nil {
  777. path = volumeMountPath
  778. }
  779. // Each iteration writes 1 Mb, so do diskConsumedMB iterations.
  780. return podWithCommand(volumeSource, resources, diskConsumedMB, name, fmt.Sprintf("dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null; sleep .1;", filepath.Join(path, "file")))
  781. }
  782. func pidConsumingPod(name string, numProcesses int) *v1.Pod {
  783. // Each iteration forks once, but creates two processes
  784. return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do sleep 5; done)&")
  785. }
  786. // podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
  787. func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
  788. volumeMounts := []v1.VolumeMount{}
  789. volumes := []v1.Volume{}
  790. if volumeSource != nil {
  791. volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}}
  792. volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}}
  793. }
  794. return &v1.Pod{
  795. ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod", name)},
  796. Spec: v1.PodSpec{
  797. RestartPolicy: v1.RestartPolicyNever,
  798. Containers: []v1.Container{
  799. {
  800. Image: busyboxImage,
  801. Name: fmt.Sprintf("%s-container", name),
  802. Command: []string{
  803. "sh",
  804. "-c",
  805. fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while true; do sleep 5; done", iterations, command),
  806. },
  807. Resources: resources,
  808. VolumeMounts: volumeMounts,
  809. },
  810. },
  811. Volumes: volumes,
  812. },
  813. }
  814. }
  815. func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod {
  816. env := []v1.EnvVar{
  817. {
  818. Name: "MEMORY_LIMIT",
  819. ValueFrom: &v1.EnvVarSource{
  820. ResourceFieldRef: &v1.ResourceFieldSelector{
  821. Resource: "limits.memory",
  822. },
  823. },
  824. },
  825. }
  826. // If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API
  827. // to pass limits.memory, which will be the total memory available.
  828. // This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit,
  829. // which will cause the test to fail inappropriately.
  830. var memLimit string
  831. if limit, ok := res.Limits[v1.ResourceMemory]; ok {
  832. memLimit = strconv.Itoa(int(
  833. float64(limit.Value()) * 0.8))
  834. } else {
  835. memLimit = "$(MEMORY_LIMIT)"
  836. }
  837. return &v1.Pod{
  838. ObjectMeta: metav1.ObjectMeta{
  839. Name: podName,
  840. },
  841. Spec: v1.PodSpec{
  842. RestartPolicy: v1.RestartPolicyNever,
  843. Containers: []v1.Container{
  844. {
  845. Name: ctnName,
  846. Image: "k8s.gcr.io/stress:v1",
  847. ImagePullPolicy: "Always",
  848. Env: env,
  849. // 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
  850. // to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
  851. // We might see flakes due to timeout if the total memory on the nodes increases.
  852. Args: []string{"-mem-alloc-size", "12Mi", "-mem-alloc-sleep", "10s", "-mem-total", memLimit},
  853. Resources: res,
  854. },
  855. },
  856. },
  857. }
  858. }