eviction_manager_test.go 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package eviction
  14. import (
  15. "fmt"
  16. "testing"
  17. "time"
  18. "k8s.io/api/core/v1"
  19. "k8s.io/apimachinery/pkg/api/resource"
  20. "k8s.io/apimachinery/pkg/types"
  21. "k8s.io/apimachinery/pkg/util/clock"
  22. utilfeature "k8s.io/apiserver/pkg/util/feature"
  23. "k8s.io/client-go/tools/record"
  24. featuregatetesting "k8s.io/component-base/featuregate/testing"
  25. kubeapi "k8s.io/kubernetes/pkg/apis/core"
  26. "k8s.io/kubernetes/pkg/apis/scheduling"
  27. "k8s.io/kubernetes/pkg/features"
  28. statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  29. evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
  30. "k8s.io/kubernetes/pkg/kubelet/lifecycle"
  31. kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
  32. )
  33. const (
  34. lowPriority = -1
  35. defaultPriority = 0
  36. highPriority = 1
  37. )
  38. // mockPodKiller is used to testing which pod is killed
  39. type mockPodKiller struct {
  40. pod *v1.Pod
  41. status v1.PodStatus
  42. gracePeriodOverride *int64
  43. }
  44. // killPodNow records the pod that was killed
  45. func (m *mockPodKiller) killPodNow(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
  46. m.pod = pod
  47. m.status = status
  48. m.gracePeriodOverride = gracePeriodOverride
  49. return nil
  50. }
  51. // mockDiskInfoProvider is used to simulate testing.
  52. type mockDiskInfoProvider struct {
  53. dedicatedImageFs bool
  54. }
  55. // HasDedicatedImageFs returns the mocked value
  56. func (m *mockDiskInfoProvider) HasDedicatedImageFs() (bool, error) {
  57. return m.dedicatedImageFs, nil
  58. }
  59. // mockDiskGC is used to simulate invoking image and container garbage collection.
  60. type mockDiskGC struct {
  61. err error
  62. imageGCInvoked bool
  63. containerGCInvoked bool
  64. fakeSummaryProvider *fakeSummaryProvider
  65. summaryAfterGC *statsapi.Summary
  66. }
  67. // DeleteUnusedImages returns the mocked values.
  68. func (m *mockDiskGC) DeleteUnusedImages() error {
  69. m.imageGCInvoked = true
  70. if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
  71. m.fakeSummaryProvider.result = m.summaryAfterGC
  72. }
  73. return m.err
  74. }
  75. // DeleteAllUnusedContainers returns the mocked value
  76. func (m *mockDiskGC) DeleteAllUnusedContainers() error {
  77. m.containerGCInvoked = true
  78. if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
  79. m.fakeSummaryProvider.result = m.summaryAfterGC
  80. }
  81. return m.err
  82. }
  83. func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
  84. pod := newPod(name, priority, []v1.Container{
  85. newContainer(name, requests, limits),
  86. }, nil)
  87. podStats := newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
  88. return pod, podStats
  89. }
  90. func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) {
  91. pod := newPod(name, priority, []v1.Container{
  92. newContainer(name, requests, limits),
  93. }, nil)
  94. podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed))
  95. return pod, podStats
  96. }
  97. func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
  98. val := resource.MustParse(nodeAvailableBytes)
  99. availableBytes := uint64(val.Value())
  100. WorkingSetBytes := uint64(val.Value())
  101. result := &statsapi.Summary{
  102. Node: statsapi.NodeStats{
  103. Memory: &statsapi.MemoryStats{
  104. AvailableBytes: &availableBytes,
  105. WorkingSetBytes: &WorkingSetBytes,
  106. },
  107. SystemContainers: []statsapi.ContainerStats{
  108. {
  109. Name: statsapi.SystemContainerPods,
  110. Memory: &statsapi.MemoryStats{
  111. AvailableBytes: &availableBytes,
  112. WorkingSetBytes: &WorkingSetBytes,
  113. },
  114. },
  115. },
  116. },
  117. Pods: []statsapi.PodStats{},
  118. }
  119. for _, podStat := range podStats {
  120. result.Pods = append(result.Pods, podStat)
  121. }
  122. return result
  123. }
  124. func makeDiskStats(rootFsAvailableBytes, imageFsAvailableBytes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
  125. rootFsVal := resource.MustParse(rootFsAvailableBytes)
  126. rootFsBytes := uint64(rootFsVal.Value())
  127. rootFsCapacityBytes := uint64(rootFsVal.Value() * 2)
  128. imageFsVal := resource.MustParse(imageFsAvailableBytes)
  129. imageFsBytes := uint64(imageFsVal.Value())
  130. imageFsCapacityBytes := uint64(imageFsVal.Value() * 2)
  131. result := &statsapi.Summary{
  132. Node: statsapi.NodeStats{
  133. Fs: &statsapi.FsStats{
  134. AvailableBytes: &rootFsBytes,
  135. CapacityBytes: &rootFsCapacityBytes,
  136. },
  137. Runtime: &statsapi.RuntimeStats{
  138. ImageFs: &statsapi.FsStats{
  139. AvailableBytes: &imageFsBytes,
  140. CapacityBytes: &imageFsCapacityBytes,
  141. },
  142. },
  143. },
  144. Pods: []statsapi.PodStats{},
  145. }
  146. for _, podStat := range podStats {
  147. result.Pods = append(result.Pods, podStat)
  148. }
  149. return result
  150. }
  151. type podToMake struct {
  152. name string
  153. priority int32
  154. requests v1.ResourceList
  155. limits v1.ResourceList
  156. memoryWorkingSet string
  157. rootFsUsed string
  158. logsFsUsed string
  159. logsFsInodesUsed string
  160. rootFsInodesUsed string
  161. perLocalVolumeUsed string
  162. perLocalVolumeInodesUsed string
  163. }
  164. // TestMemoryPressure
  165. func TestMemoryPressure(t *testing.T) {
  166. podMaker := makePodWithMemoryStats
  167. summaryStatsMaker := makeMemoryStats
  168. podsToMake := []podToMake{
  169. {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
  170. {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
  171. {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
  172. {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
  173. {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
  174. }
  175. pods := []*v1.Pod{}
  176. podStats := map[*v1.Pod]statsapi.PodStats{}
  177. for _, podToMake := range podsToMake {
  178. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  179. pods = append(pods, pod)
  180. podStats[pod] = podStat
  181. }
  182. podToEvict := pods[4]
  183. activePodsFunc := func() []*v1.Pod {
  184. return pods
  185. }
  186. fakeClock := clock.NewFakeClock(time.Now())
  187. podKiller := &mockPodKiller{}
  188. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  189. diskGC := &mockDiskGC{err: nil}
  190. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  191. config := Config{
  192. MaxPodGracePeriodSeconds: 5,
  193. PressureTransitionPeriod: time.Minute * 5,
  194. Thresholds: []evictionapi.Threshold{
  195. {
  196. Signal: evictionapi.SignalMemoryAvailable,
  197. Operator: evictionapi.OpLessThan,
  198. Value: evictionapi.ThresholdValue{
  199. Quantity: quantityMustParse("1Gi"),
  200. },
  201. },
  202. {
  203. Signal: evictionapi.SignalMemoryAvailable,
  204. Operator: evictionapi.OpLessThan,
  205. Value: evictionapi.ThresholdValue{
  206. Quantity: quantityMustParse("2Gi"),
  207. },
  208. GracePeriod: time.Minute * 2,
  209. },
  210. },
  211. }
  212. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
  213. manager := &managerImpl{
  214. clock: fakeClock,
  215. killPodFunc: podKiller.killPodNow,
  216. imageGC: diskGC,
  217. containerGC: diskGC,
  218. config: config,
  219. recorder: &record.FakeRecorder{},
  220. summaryProvider: summaryProvider,
  221. nodeRef: nodeRef,
  222. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  223. thresholdsFirstObservedAt: thresholdsObservedAt{},
  224. }
  225. // create a best effort pod to test admission
  226. bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
  227. burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
  228. // synchronize
  229. manager.synchronize(diskInfoProvider, activePodsFunc)
  230. // we should not have memory pressure
  231. if manager.IsUnderMemoryPressure() {
  232. t.Errorf("Manager should not report memory pressure")
  233. }
  234. // try to admit our pods (they should succeed)
  235. expected := []bool{true, true}
  236. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  237. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  238. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  239. }
  240. }
  241. // induce soft threshold
  242. fakeClock.Step(1 * time.Minute)
  243. summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
  244. manager.synchronize(diskInfoProvider, activePodsFunc)
  245. // we should have memory pressure
  246. if !manager.IsUnderMemoryPressure() {
  247. t.Errorf("Manager should report memory pressure since soft threshold was met")
  248. }
  249. // verify no pod was yet killed because there has not yet been enough time passed.
  250. if podKiller.pod != nil {
  251. t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  252. }
  253. // step forward in time pass the grace period
  254. fakeClock.Step(3 * time.Minute)
  255. summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
  256. manager.synchronize(diskInfoProvider, activePodsFunc)
  257. // we should have memory pressure
  258. if !manager.IsUnderMemoryPressure() {
  259. t.Errorf("Manager should report memory pressure since soft threshold was met")
  260. }
  261. // verify the right pod was killed with the right grace period.
  262. if podKiller.pod != podToEvict {
  263. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  264. }
  265. if podKiller.gracePeriodOverride == nil {
  266. t.Errorf("Manager chose to kill pod but should have had a grace period override.")
  267. }
  268. observedGracePeriod := *podKiller.gracePeriodOverride
  269. if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
  270. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
  271. }
  272. // reset state
  273. podKiller.pod = nil
  274. podKiller.gracePeriodOverride = nil
  275. // remove memory pressure
  276. fakeClock.Step(20 * time.Minute)
  277. summaryProvider.result = summaryStatsMaker("3Gi", podStats)
  278. manager.synchronize(diskInfoProvider, activePodsFunc)
  279. // we should not have memory pressure
  280. if manager.IsUnderMemoryPressure() {
  281. t.Errorf("Manager should not report memory pressure")
  282. }
  283. // induce memory pressure!
  284. fakeClock.Step(1 * time.Minute)
  285. summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  286. manager.synchronize(diskInfoProvider, activePodsFunc)
  287. // we should have memory pressure
  288. if !manager.IsUnderMemoryPressure() {
  289. t.Errorf("Manager should report memory pressure")
  290. }
  291. // check the right pod was killed
  292. if podKiller.pod != podToEvict {
  293. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  294. }
  295. observedGracePeriod = *podKiller.gracePeriodOverride
  296. if observedGracePeriod != int64(0) {
  297. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  298. }
  299. // the best-effort pod should not admit, burstable should
  300. expected = []bool{false, true}
  301. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  302. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  303. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  304. }
  305. }
  306. // reduce memory pressure
  307. fakeClock.Step(1 * time.Minute)
  308. summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  309. podKiller.pod = nil // reset state
  310. manager.synchronize(diskInfoProvider, activePodsFunc)
  311. // we should have memory pressure (because transition period not yet met)
  312. if !manager.IsUnderMemoryPressure() {
  313. t.Errorf("Manager should report memory pressure")
  314. }
  315. // no pod should have been killed
  316. if podKiller.pod != nil {
  317. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  318. }
  319. // the best-effort pod should not admit, burstable should
  320. expected = []bool{false, true}
  321. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  322. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  323. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  324. }
  325. }
  326. // move the clock past transition period to ensure that we stop reporting pressure
  327. fakeClock.Step(5 * time.Minute)
  328. summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  329. podKiller.pod = nil // reset state
  330. manager.synchronize(diskInfoProvider, activePodsFunc)
  331. // we should not have memory pressure (because transition period met)
  332. if manager.IsUnderMemoryPressure() {
  333. t.Errorf("Manager should not report memory pressure")
  334. }
  335. // no pod should have been killed
  336. if podKiller.pod != nil {
  337. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  338. }
  339. // all pods should admit now
  340. expected = []bool{true, true}
  341. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  342. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  343. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  344. }
  345. }
  346. }
  347. func makeContainersByQOS(class v1.PodQOSClass) []v1.Container {
  348. resource := newResourceList("100m", "1Gi", "")
  349. switch class {
  350. case v1.PodQOSGuaranteed:
  351. return []v1.Container{newContainer("guaranteed-container", resource, resource)}
  352. case v1.PodQOSBurstable:
  353. return []v1.Container{newContainer("burtable-container", resource, nil)}
  354. case v1.PodQOSBestEffort:
  355. fallthrough
  356. default:
  357. return []v1.Container{newContainer("best-effort-container", nil, nil)}
  358. }
  359. }
  360. func TestAdmitUnderNodeConditions(t *testing.T) {
  361. manager := &managerImpl{}
  362. pods := []*v1.Pod{
  363. newPod("guaranteed-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSGuaranteed), nil),
  364. newPod("burstable-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBurstable), nil),
  365. newPod("best-effort-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBestEffort), nil),
  366. }
  367. expected := []bool{true, true, true}
  368. for i, pod := range pods {
  369. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  370. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  371. }
  372. }
  373. manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure}
  374. expected = []bool{true, true, false}
  375. for i, pod := range pods {
  376. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  377. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  378. }
  379. }
  380. manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure, v1.NodeDiskPressure}
  381. expected = []bool{false, false, false}
  382. for i, pod := range pods {
  383. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  384. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  385. }
  386. }
  387. }
  388. // parseQuantity parses the specified value (if provided) otherwise returns 0 value
  389. func parseQuantity(value string) resource.Quantity {
  390. if len(value) == 0 {
  391. return resource.MustParse("0")
  392. }
  393. return resource.MustParse(value)
  394. }
  395. func TestDiskPressureNodeFs(t *testing.T) {
  396. defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
  397. podMaker := makePodWithDiskStats
  398. summaryStatsMaker := makeDiskStats
  399. podsToMake := []podToMake{
  400. {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  401. {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  402. {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  403. {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  404. {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  405. }
  406. pods := []*v1.Pod{}
  407. podStats := map[*v1.Pod]statsapi.PodStats{}
  408. for _, podToMake := range podsToMake {
  409. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
  410. pods = append(pods, pod)
  411. podStats[pod] = podStat
  412. }
  413. podToEvict := pods[0]
  414. activePodsFunc := func() []*v1.Pod {
  415. return pods
  416. }
  417. fakeClock := clock.NewFakeClock(time.Now())
  418. podKiller := &mockPodKiller{}
  419. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  420. diskGC := &mockDiskGC{err: nil}
  421. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  422. config := Config{
  423. MaxPodGracePeriodSeconds: 5,
  424. PressureTransitionPeriod: time.Minute * 5,
  425. Thresholds: []evictionapi.Threshold{
  426. {
  427. Signal: evictionapi.SignalNodeFsAvailable,
  428. Operator: evictionapi.OpLessThan,
  429. Value: evictionapi.ThresholdValue{
  430. Quantity: quantityMustParse("1Gi"),
  431. },
  432. },
  433. {
  434. Signal: evictionapi.SignalNodeFsAvailable,
  435. Operator: evictionapi.OpLessThan,
  436. Value: evictionapi.ThresholdValue{
  437. Quantity: quantityMustParse("2Gi"),
  438. },
  439. GracePeriod: time.Minute * 2,
  440. },
  441. },
  442. }
  443. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
  444. manager := &managerImpl{
  445. clock: fakeClock,
  446. killPodFunc: podKiller.killPodNow,
  447. imageGC: diskGC,
  448. containerGC: diskGC,
  449. config: config,
  450. recorder: &record.FakeRecorder{},
  451. summaryProvider: summaryProvider,
  452. nodeRef: nodeRef,
  453. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  454. thresholdsFirstObservedAt: thresholdsObservedAt{},
  455. }
  456. // create a best effort pod to test admission
  457. podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi")
  458. // synchronize
  459. manager.synchronize(diskInfoProvider, activePodsFunc)
  460. // we should not have disk pressure
  461. if manager.IsUnderDiskPressure() {
  462. t.Errorf("Manager should not report disk pressure")
  463. }
  464. // try to admit our pod (should succeed)
  465. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  466. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  467. }
  468. // induce soft threshold
  469. fakeClock.Step(1 * time.Minute)
  470. summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
  471. manager.synchronize(diskInfoProvider, activePodsFunc)
  472. // we should have disk pressure
  473. if !manager.IsUnderDiskPressure() {
  474. t.Errorf("Manager should report disk pressure since soft threshold was met")
  475. }
  476. // verify no pod was yet killed because there has not yet been enough time passed.
  477. if podKiller.pod != nil {
  478. t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  479. }
  480. // step forward in time pass the grace period
  481. fakeClock.Step(3 * time.Minute)
  482. summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
  483. manager.synchronize(diskInfoProvider, activePodsFunc)
  484. // we should have disk pressure
  485. if !manager.IsUnderDiskPressure() {
  486. t.Errorf("Manager should report disk pressure since soft threshold was met")
  487. }
  488. // verify the right pod was killed with the right grace period.
  489. if podKiller.pod != podToEvict {
  490. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  491. }
  492. if podKiller.gracePeriodOverride == nil {
  493. t.Errorf("Manager chose to kill pod but should have had a grace period override.")
  494. }
  495. observedGracePeriod := *podKiller.gracePeriodOverride
  496. if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
  497. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
  498. }
  499. // reset state
  500. podKiller.pod = nil
  501. podKiller.gracePeriodOverride = nil
  502. // remove disk pressure
  503. fakeClock.Step(20 * time.Minute)
  504. summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
  505. manager.synchronize(diskInfoProvider, activePodsFunc)
  506. // we should not have disk pressure
  507. if manager.IsUnderDiskPressure() {
  508. t.Errorf("Manager should not report disk pressure")
  509. }
  510. // induce disk pressure!
  511. fakeClock.Step(1 * time.Minute)
  512. summaryProvider.result = summaryStatsMaker("500Mi", "200Gi", podStats)
  513. manager.synchronize(diskInfoProvider, activePodsFunc)
  514. // we should have disk pressure
  515. if !manager.IsUnderDiskPressure() {
  516. t.Errorf("Manager should report disk pressure")
  517. }
  518. // check the right pod was killed
  519. if podKiller.pod != podToEvict {
  520. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  521. }
  522. observedGracePeriod = *podKiller.gracePeriodOverride
  523. if observedGracePeriod != int64(0) {
  524. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  525. }
  526. // try to admit our pod (should fail)
  527. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  528. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  529. }
  530. // reduce disk pressure
  531. fakeClock.Step(1 * time.Minute)
  532. summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
  533. podKiller.pod = nil // reset state
  534. manager.synchronize(diskInfoProvider, activePodsFunc)
  535. // we should have disk pressure (because transition period not yet met)
  536. if !manager.IsUnderDiskPressure() {
  537. t.Errorf("Manager should report disk pressure")
  538. }
  539. // no pod should have been killed
  540. if podKiller.pod != nil {
  541. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  542. }
  543. // try to admit our pod (should fail)
  544. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  545. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  546. }
  547. // move the clock past transition period to ensure that we stop reporting pressure
  548. fakeClock.Step(5 * time.Minute)
  549. summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
  550. podKiller.pod = nil // reset state
  551. manager.synchronize(diskInfoProvider, activePodsFunc)
  552. // we should not have disk pressure (because transition period met)
  553. if manager.IsUnderDiskPressure() {
  554. t.Errorf("Manager should not report disk pressure")
  555. }
  556. // no pod should have been killed
  557. if podKiller.pod != nil {
  558. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  559. }
  560. // try to admit our pod (should succeed)
  561. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  562. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  563. }
  564. }
  565. // TestMinReclaim verifies that min-reclaim works as desired.
  566. func TestMinReclaim(t *testing.T) {
  567. podMaker := makePodWithMemoryStats
  568. summaryStatsMaker := makeMemoryStats
  569. podsToMake := []podToMake{
  570. {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
  571. {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
  572. {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
  573. {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
  574. {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
  575. }
  576. pods := []*v1.Pod{}
  577. podStats := map[*v1.Pod]statsapi.PodStats{}
  578. for _, podToMake := range podsToMake {
  579. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  580. pods = append(pods, pod)
  581. podStats[pod] = podStat
  582. }
  583. podToEvict := pods[4]
  584. activePodsFunc := func() []*v1.Pod {
  585. return pods
  586. }
  587. fakeClock := clock.NewFakeClock(time.Now())
  588. podKiller := &mockPodKiller{}
  589. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  590. diskGC := &mockDiskGC{err: nil}
  591. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  592. config := Config{
  593. MaxPodGracePeriodSeconds: 5,
  594. PressureTransitionPeriod: time.Minute * 5,
  595. Thresholds: []evictionapi.Threshold{
  596. {
  597. Signal: evictionapi.SignalMemoryAvailable,
  598. Operator: evictionapi.OpLessThan,
  599. Value: evictionapi.ThresholdValue{
  600. Quantity: quantityMustParse("1Gi"),
  601. },
  602. MinReclaim: &evictionapi.ThresholdValue{
  603. Quantity: quantityMustParse("500Mi"),
  604. },
  605. },
  606. },
  607. }
  608. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
  609. manager := &managerImpl{
  610. clock: fakeClock,
  611. killPodFunc: podKiller.killPodNow,
  612. imageGC: diskGC,
  613. containerGC: diskGC,
  614. config: config,
  615. recorder: &record.FakeRecorder{},
  616. summaryProvider: summaryProvider,
  617. nodeRef: nodeRef,
  618. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  619. thresholdsFirstObservedAt: thresholdsObservedAt{},
  620. }
  621. // synchronize
  622. manager.synchronize(diskInfoProvider, activePodsFunc)
  623. // we should not have memory pressure
  624. if manager.IsUnderMemoryPressure() {
  625. t.Errorf("Manager should not report memory pressure")
  626. }
  627. // induce memory pressure!
  628. fakeClock.Step(1 * time.Minute)
  629. summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  630. manager.synchronize(diskInfoProvider, activePodsFunc)
  631. // we should have memory pressure
  632. if !manager.IsUnderMemoryPressure() {
  633. t.Errorf("Manager should report memory pressure")
  634. }
  635. // check the right pod was killed
  636. if podKiller.pod != podToEvict {
  637. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  638. }
  639. observedGracePeriod := *podKiller.gracePeriodOverride
  640. if observedGracePeriod != int64(0) {
  641. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  642. }
  643. // reduce memory pressure, but not below the min-reclaim amount
  644. fakeClock.Step(1 * time.Minute)
  645. summaryProvider.result = summaryStatsMaker("1.2Gi", podStats)
  646. podKiller.pod = nil // reset state
  647. manager.synchronize(diskInfoProvider, activePodsFunc)
  648. // we should have memory pressure (because transition period not yet met)
  649. if !manager.IsUnderMemoryPressure() {
  650. t.Errorf("Manager should report memory pressure")
  651. }
  652. // check the right pod was killed
  653. if podKiller.pod != podToEvict {
  654. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  655. }
  656. observedGracePeriod = *podKiller.gracePeriodOverride
  657. if observedGracePeriod != int64(0) {
  658. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  659. }
  660. // reduce memory pressure and ensure the min-reclaim amount
  661. fakeClock.Step(1 * time.Minute)
  662. summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  663. podKiller.pod = nil // reset state
  664. manager.synchronize(diskInfoProvider, activePodsFunc)
  665. // we should have memory pressure (because transition period not yet met)
  666. if !manager.IsUnderMemoryPressure() {
  667. t.Errorf("Manager should report memory pressure")
  668. }
  669. // no pod should have been killed
  670. if podKiller.pod != nil {
  671. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  672. }
  673. // move the clock past transition period to ensure that we stop reporting pressure
  674. fakeClock.Step(5 * time.Minute)
  675. summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  676. podKiller.pod = nil // reset state
  677. manager.synchronize(diskInfoProvider, activePodsFunc)
  678. // we should not have memory pressure (because transition period met)
  679. if manager.IsUnderMemoryPressure() {
  680. t.Errorf("Manager should not report memory pressure")
  681. }
  682. // no pod should have been killed
  683. if podKiller.pod != nil {
  684. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  685. }
  686. }
  687. func TestNodeReclaimFuncs(t *testing.T) {
  688. defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)()
  689. podMaker := makePodWithDiskStats
  690. summaryStatsMaker := makeDiskStats
  691. podsToMake := []podToMake{
  692. {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  693. {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  694. {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  695. {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  696. {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  697. }
  698. pods := []*v1.Pod{}
  699. podStats := map[*v1.Pod]statsapi.PodStats{}
  700. for _, podToMake := range podsToMake {
  701. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
  702. pods = append(pods, pod)
  703. podStats[pod] = podStat
  704. }
  705. podToEvict := pods[0]
  706. activePodsFunc := func() []*v1.Pod {
  707. return pods
  708. }
  709. fakeClock := clock.NewFakeClock(time.Now())
  710. podKiller := &mockPodKiller{}
  711. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  712. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  713. config := Config{
  714. MaxPodGracePeriodSeconds: 5,
  715. PressureTransitionPeriod: time.Minute * 5,
  716. Thresholds: []evictionapi.Threshold{
  717. {
  718. Signal: evictionapi.SignalNodeFsAvailable,
  719. Operator: evictionapi.OpLessThan,
  720. Value: evictionapi.ThresholdValue{
  721. Quantity: quantityMustParse("1Gi"),
  722. },
  723. MinReclaim: &evictionapi.ThresholdValue{
  724. Quantity: quantityMustParse("500Mi"),
  725. },
  726. },
  727. },
  728. }
  729. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
  730. diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
  731. manager := &managerImpl{
  732. clock: fakeClock,
  733. killPodFunc: podKiller.killPodNow,
  734. imageGC: diskGC,
  735. containerGC: diskGC,
  736. config: config,
  737. recorder: &record.FakeRecorder{},
  738. summaryProvider: summaryProvider,
  739. nodeRef: nodeRef,
  740. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  741. thresholdsFirstObservedAt: thresholdsObservedAt{},
  742. }
  743. // synchronize
  744. manager.synchronize(diskInfoProvider, activePodsFunc)
  745. // we should not have disk pressure
  746. if manager.IsUnderDiskPressure() {
  747. t.Errorf("Manager should not report disk pressure")
  748. }
  749. // induce hard threshold
  750. fakeClock.Step(1 * time.Minute)
  751. summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
  752. // make GC successfully return disk usage to previous levels
  753. diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
  754. manager.synchronize(diskInfoProvider, activePodsFunc)
  755. // we should have disk pressure
  756. if !manager.IsUnderDiskPressure() {
  757. t.Errorf("Manager should report disk pressure since soft threshold was met")
  758. }
  759. // verify image gc was invoked
  760. if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
  761. t.Errorf("Manager should have invoked image gc")
  762. }
  763. // verify no pod was killed because image gc was sufficient
  764. if podKiller.pod != nil {
  765. t.Errorf("Manager should not have killed a pod, but killed: %v", podKiller.pod.Name)
  766. }
  767. // reset state
  768. diskGC.imageGCInvoked = false
  769. diskGC.containerGCInvoked = false
  770. // remove disk pressure
  771. fakeClock.Step(20 * time.Minute)
  772. summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
  773. manager.synchronize(diskInfoProvider, activePodsFunc)
  774. // we should not have disk pressure
  775. if manager.IsUnderDiskPressure() {
  776. t.Errorf("Manager should not report disk pressure")
  777. }
  778. // induce disk pressure!
  779. fakeClock.Step(1 * time.Minute)
  780. summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
  781. // Dont reclaim any disk
  782. diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
  783. manager.synchronize(diskInfoProvider, activePodsFunc)
  784. // we should have disk pressure
  785. if !manager.IsUnderDiskPressure() {
  786. t.Errorf("Manager should report disk pressure")
  787. }
  788. // ensure disk gc was invoked
  789. if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
  790. t.Errorf("Manager should have invoked image gc")
  791. }
  792. // check the right pod was killed
  793. if podKiller.pod != podToEvict {
  794. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  795. }
  796. observedGracePeriod := *podKiller.gracePeriodOverride
  797. if observedGracePeriod != int64(0) {
  798. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  799. }
  800. // reduce disk pressure
  801. fakeClock.Step(1 * time.Minute)
  802. summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
  803. diskGC.imageGCInvoked = false // reset state
  804. diskGC.containerGCInvoked = false // reset state
  805. podKiller.pod = nil // reset state
  806. manager.synchronize(diskInfoProvider, activePodsFunc)
  807. // we should have disk pressure (because transition period not yet met)
  808. if !manager.IsUnderDiskPressure() {
  809. t.Errorf("Manager should report disk pressure")
  810. }
  811. // no image gc should have occurred
  812. if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
  813. t.Errorf("Manager chose to perform image gc when it was not needed")
  814. }
  815. // no pod should have been killed
  816. if podKiller.pod != nil {
  817. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  818. }
  819. // move the clock past transition period to ensure that we stop reporting pressure
  820. fakeClock.Step(5 * time.Minute)
  821. summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
  822. diskGC.imageGCInvoked = false // reset state
  823. diskGC.containerGCInvoked = false // reset state
  824. podKiller.pod = nil // reset state
  825. manager.synchronize(diskInfoProvider, activePodsFunc)
  826. // we should not have disk pressure (because transition period met)
  827. if manager.IsUnderDiskPressure() {
  828. t.Errorf("Manager should not report disk pressure")
  829. }
  830. // no image gc should have occurred
  831. if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
  832. t.Errorf("Manager chose to perform image gc when it was not needed")
  833. }
  834. // no pod should have been killed
  835. if podKiller.pod != nil {
  836. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  837. }
  838. }
  839. func TestInodePressureNodeFsInodes(t *testing.T) {
  840. podMaker := func(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) {
  841. pod := newPod(name, priority, []v1.Container{
  842. newContainer(name, requests, limits),
  843. }, nil)
  844. podStats := newPodInodeStats(pod, parseQuantity(rootInodes), parseQuantity(logInodes), parseQuantity(volumeInodes))
  845. return pod, podStats
  846. }
  847. summaryStatsMaker := func(rootFsInodesFree, rootFsInodes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
  848. rootFsInodesFreeVal := resource.MustParse(rootFsInodesFree)
  849. internalRootFsInodesFree := uint64(rootFsInodesFreeVal.Value())
  850. rootFsInodesVal := resource.MustParse(rootFsInodes)
  851. internalRootFsInodes := uint64(rootFsInodesVal.Value())
  852. result := &statsapi.Summary{
  853. Node: statsapi.NodeStats{
  854. Fs: &statsapi.FsStats{
  855. InodesFree: &internalRootFsInodesFree,
  856. Inodes: &internalRootFsInodes,
  857. },
  858. },
  859. Pods: []statsapi.PodStats{},
  860. }
  861. for _, podStat := range podStats {
  862. result.Pods = append(result.Pods, podStat)
  863. }
  864. return result
  865. }
  866. podsToMake := []podToMake{
  867. {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
  868. {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
  869. {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
  870. {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
  871. {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
  872. }
  873. pods := []*v1.Pod{}
  874. podStats := map[*v1.Pod]statsapi.PodStats{}
  875. for _, podToMake := range podsToMake {
  876. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed)
  877. pods = append(pods, pod)
  878. podStats[pod] = podStat
  879. }
  880. podToEvict := pods[0]
  881. activePodsFunc := func() []*v1.Pod {
  882. return pods
  883. }
  884. fakeClock := clock.NewFakeClock(time.Now())
  885. podKiller := &mockPodKiller{}
  886. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  887. diskGC := &mockDiskGC{err: nil}
  888. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  889. config := Config{
  890. MaxPodGracePeriodSeconds: 5,
  891. PressureTransitionPeriod: time.Minute * 5,
  892. Thresholds: []evictionapi.Threshold{
  893. {
  894. Signal: evictionapi.SignalNodeFsInodesFree,
  895. Operator: evictionapi.OpLessThan,
  896. Value: evictionapi.ThresholdValue{
  897. Quantity: quantityMustParse("1Mi"),
  898. },
  899. },
  900. {
  901. Signal: evictionapi.SignalNodeFsInodesFree,
  902. Operator: evictionapi.OpLessThan,
  903. Value: evictionapi.ThresholdValue{
  904. Quantity: quantityMustParse("2Mi"),
  905. },
  906. GracePeriod: time.Minute * 2,
  907. },
  908. },
  909. }
  910. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("3Mi", "4Mi", podStats)}
  911. manager := &managerImpl{
  912. clock: fakeClock,
  913. killPodFunc: podKiller.killPodNow,
  914. imageGC: diskGC,
  915. containerGC: diskGC,
  916. config: config,
  917. recorder: &record.FakeRecorder{},
  918. summaryProvider: summaryProvider,
  919. nodeRef: nodeRef,
  920. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  921. thresholdsFirstObservedAt: thresholdsObservedAt{},
  922. }
  923. // create a best effort pod to test admission
  924. podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0")
  925. // synchronize
  926. manager.synchronize(diskInfoProvider, activePodsFunc)
  927. // we should not have disk pressure
  928. if manager.IsUnderDiskPressure() {
  929. t.Errorf("Manager should not report inode pressure")
  930. }
  931. // try to admit our pod (should succeed)
  932. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  933. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  934. }
  935. // induce soft threshold
  936. fakeClock.Step(1 * time.Minute)
  937. summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats)
  938. manager.synchronize(diskInfoProvider, activePodsFunc)
  939. // we should have disk pressure
  940. if !manager.IsUnderDiskPressure() {
  941. t.Errorf("Manager should report inode pressure since soft threshold was met")
  942. }
  943. // verify no pod was yet killed because there has not yet been enough time passed.
  944. if podKiller.pod != nil {
  945. t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  946. }
  947. // step forward in time pass the grace period
  948. fakeClock.Step(3 * time.Minute)
  949. summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats)
  950. manager.synchronize(diskInfoProvider, activePodsFunc)
  951. // we should have disk pressure
  952. if !manager.IsUnderDiskPressure() {
  953. t.Errorf("Manager should report inode pressure since soft threshold was met")
  954. }
  955. // verify the right pod was killed with the right grace period.
  956. if podKiller.pod != podToEvict {
  957. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  958. }
  959. if podKiller.gracePeriodOverride == nil {
  960. t.Errorf("Manager chose to kill pod but should have had a grace period override.")
  961. }
  962. observedGracePeriod := *podKiller.gracePeriodOverride
  963. if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
  964. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
  965. }
  966. // reset state
  967. podKiller.pod = nil
  968. podKiller.gracePeriodOverride = nil
  969. // remove inode pressure
  970. fakeClock.Step(20 * time.Minute)
  971. summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
  972. manager.synchronize(diskInfoProvider, activePodsFunc)
  973. // we should not have disk pressure
  974. if manager.IsUnderDiskPressure() {
  975. t.Errorf("Manager should not report inode pressure")
  976. }
  977. // induce inode pressure!
  978. fakeClock.Step(1 * time.Minute)
  979. summaryProvider.result = summaryStatsMaker("0.5Mi", "4Mi", podStats)
  980. manager.synchronize(diskInfoProvider, activePodsFunc)
  981. // we should have disk pressure
  982. if !manager.IsUnderDiskPressure() {
  983. t.Errorf("Manager should report inode pressure")
  984. }
  985. // check the right pod was killed
  986. if podKiller.pod != podToEvict {
  987. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  988. }
  989. observedGracePeriod = *podKiller.gracePeriodOverride
  990. if observedGracePeriod != int64(0) {
  991. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  992. }
  993. // try to admit our pod (should fail)
  994. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  995. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  996. }
  997. // reduce inode pressure
  998. fakeClock.Step(1 * time.Minute)
  999. summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
  1000. podKiller.pod = nil // reset state
  1001. manager.synchronize(diskInfoProvider, activePodsFunc)
  1002. // we should have disk pressure (because transition period not yet met)
  1003. if !manager.IsUnderDiskPressure() {
  1004. t.Errorf("Manager should report inode pressure")
  1005. }
  1006. // no pod should have been killed
  1007. if podKiller.pod != nil {
  1008. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1009. }
  1010. // try to admit our pod (should fail)
  1011. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  1012. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  1013. }
  1014. // move the clock past transition period to ensure that we stop reporting pressure
  1015. fakeClock.Step(5 * time.Minute)
  1016. summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
  1017. podKiller.pod = nil // reset state
  1018. manager.synchronize(diskInfoProvider, activePodsFunc)
  1019. // we should not have disk pressure (because transition period met)
  1020. if manager.IsUnderDiskPressure() {
  1021. t.Errorf("Manager should not report inode pressure")
  1022. }
  1023. // no pod should have been killed
  1024. if podKiller.pod != nil {
  1025. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1026. }
  1027. // try to admit our pod (should succeed)
  1028. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  1029. t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  1030. }
  1031. }
  1032. // TestStaticCriticalPodsAreNotEvicted
  1033. func TestStaticCriticalPodsAreNotEvicted(t *testing.T) {
  1034. podMaker := makePodWithMemoryStats
  1035. summaryStatsMaker := makeMemoryStats
  1036. podsToMake := []podToMake{
  1037. {name: "critical", priority: scheduling.SystemCriticalPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "800Mi"},
  1038. }
  1039. pods := []*v1.Pod{}
  1040. podStats := map[*v1.Pod]statsapi.PodStats{}
  1041. for _, podToMake := range podsToMake {
  1042. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  1043. pods = append(pods, pod)
  1044. podStats[pod] = podStat
  1045. }
  1046. pods[0].Annotations = map[string]string{
  1047. kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
  1048. }
  1049. // Mark the pod as critical
  1050. podPriority := scheduling.SystemCriticalPriority
  1051. pods[0].Spec.Priority = &podPriority
  1052. pods[0].Namespace = kubeapi.NamespaceSystem
  1053. podToEvict := pods[0]
  1054. activePodsFunc := func() []*v1.Pod {
  1055. return pods
  1056. }
  1057. mirrorPodFunc := func(staticPod *v1.Pod) (*v1.Pod, bool) {
  1058. mirrorPod := staticPod.DeepCopy()
  1059. mirrorPod.Annotations[kubelettypes.ConfigSourceAnnotationKey] = kubelettypes.ApiserverSource
  1060. return mirrorPod, true
  1061. }
  1062. fakeClock := clock.NewFakeClock(time.Now())
  1063. podKiller := &mockPodKiller{}
  1064. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  1065. diskGC := &mockDiskGC{err: nil}
  1066. nodeRef := &v1.ObjectReference{
  1067. Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
  1068. }
  1069. config := Config{
  1070. MaxPodGracePeriodSeconds: 5,
  1071. PressureTransitionPeriod: time.Minute * 5,
  1072. Thresholds: []evictionapi.Threshold{
  1073. {
  1074. Signal: evictionapi.SignalMemoryAvailable,
  1075. Operator: evictionapi.OpLessThan,
  1076. Value: evictionapi.ThresholdValue{
  1077. Quantity: quantityMustParse("1Gi"),
  1078. },
  1079. },
  1080. {
  1081. Signal: evictionapi.SignalMemoryAvailable,
  1082. Operator: evictionapi.OpLessThan,
  1083. Value: evictionapi.ThresholdValue{
  1084. Quantity: quantityMustParse("2Gi"),
  1085. },
  1086. GracePeriod: time.Minute * 2,
  1087. },
  1088. },
  1089. }
  1090. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
  1091. manager := &managerImpl{
  1092. clock: fakeClock,
  1093. killPodFunc: podKiller.killPodNow,
  1094. mirrorPodFunc: mirrorPodFunc,
  1095. imageGC: diskGC,
  1096. containerGC: diskGC,
  1097. config: config,
  1098. recorder: &record.FakeRecorder{},
  1099. summaryProvider: summaryProvider,
  1100. nodeRef: nodeRef,
  1101. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1102. thresholdsFirstObservedAt: thresholdsObservedAt{},
  1103. }
  1104. fakeClock.Step(1 * time.Minute)
  1105. summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
  1106. manager.synchronize(diskInfoProvider, activePodsFunc)
  1107. // we should have memory pressure
  1108. if !manager.IsUnderMemoryPressure() {
  1109. t.Errorf("Manager should report memory pressure since soft threshold was met")
  1110. }
  1111. // verify no pod was yet killed because there has not yet been enough time passed.
  1112. if podKiller.pod != nil {
  1113. t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  1114. }
  1115. // step forward in time pass the grace period
  1116. fakeClock.Step(3 * time.Minute)
  1117. summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
  1118. manager.synchronize(diskInfoProvider, activePodsFunc)
  1119. // we should have memory pressure
  1120. if !manager.IsUnderMemoryPressure() {
  1121. t.Errorf("Manager should report memory pressure since soft threshold was met")
  1122. }
  1123. // verify the right pod was killed with the right grace period.
  1124. if podKiller.pod == podToEvict {
  1125. t.Errorf("Manager chose to kill critical pod: %v, but should have ignored it", podKiller.pod.Name)
  1126. }
  1127. // reset state
  1128. podKiller.pod = nil
  1129. podKiller.gracePeriodOverride = nil
  1130. // remove memory pressure
  1131. fakeClock.Step(20 * time.Minute)
  1132. summaryProvider.result = summaryStatsMaker("3Gi", podStats)
  1133. manager.synchronize(diskInfoProvider, activePodsFunc)
  1134. // we should not have memory pressure
  1135. if manager.IsUnderMemoryPressure() {
  1136. t.Errorf("Manager should not report memory pressure")
  1137. }
  1138. pods[0].Annotations = map[string]string{
  1139. kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
  1140. }
  1141. pods[0].Spec.Priority = nil
  1142. pods[0].Namespace = kubeapi.NamespaceSystem
  1143. // induce memory pressure!
  1144. fakeClock.Step(1 * time.Minute)
  1145. summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  1146. manager.synchronize(diskInfoProvider, activePodsFunc)
  1147. // we should have memory pressure
  1148. if !manager.IsUnderMemoryPressure() {
  1149. t.Errorf("Manager should report memory pressure")
  1150. }
  1151. }
  1152. // TestAllocatableMemoryPressure
  1153. func TestAllocatableMemoryPressure(t *testing.T) {
  1154. podMaker := makePodWithMemoryStats
  1155. summaryStatsMaker := makeMemoryStats
  1156. podsToMake := []podToMake{
  1157. {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
  1158. {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
  1159. {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
  1160. {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
  1161. {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
  1162. }
  1163. pods := []*v1.Pod{}
  1164. podStats := map[*v1.Pod]statsapi.PodStats{}
  1165. for _, podToMake := range podsToMake {
  1166. pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  1167. pods = append(pods, pod)
  1168. podStats[pod] = podStat
  1169. }
  1170. podToEvict := pods[4]
  1171. activePodsFunc := func() []*v1.Pod {
  1172. return pods
  1173. }
  1174. fakeClock := clock.NewFakeClock(time.Now())
  1175. podKiller := &mockPodKiller{}
  1176. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  1177. diskGC := &mockDiskGC{err: nil}
  1178. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  1179. config := Config{
  1180. MaxPodGracePeriodSeconds: 5,
  1181. PressureTransitionPeriod: time.Minute * 5,
  1182. Thresholds: []evictionapi.Threshold{
  1183. {
  1184. Signal: evictionapi.SignalAllocatableMemoryAvailable,
  1185. Operator: evictionapi.OpLessThan,
  1186. Value: evictionapi.ThresholdValue{
  1187. Quantity: quantityMustParse("1Gi"),
  1188. },
  1189. },
  1190. },
  1191. }
  1192. summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("4Gi", podStats)}
  1193. manager := &managerImpl{
  1194. clock: fakeClock,
  1195. killPodFunc: podKiller.killPodNow,
  1196. imageGC: diskGC,
  1197. containerGC: diskGC,
  1198. config: config,
  1199. recorder: &record.FakeRecorder{},
  1200. summaryProvider: summaryProvider,
  1201. nodeRef: nodeRef,
  1202. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1203. thresholdsFirstObservedAt: thresholdsObservedAt{},
  1204. }
  1205. // create a best effort pod to test admission
  1206. bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
  1207. burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
  1208. // synchronize
  1209. manager.synchronize(diskInfoProvider, activePodsFunc)
  1210. // we should not have memory pressure
  1211. if manager.IsUnderMemoryPressure() {
  1212. t.Errorf("Manager should not report memory pressure")
  1213. }
  1214. // try to admit our pods (they should succeed)
  1215. expected := []bool{true, true}
  1216. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  1217. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1218. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1219. }
  1220. }
  1221. // induce memory pressure!
  1222. fakeClock.Step(1 * time.Minute)
  1223. pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi")
  1224. podStats[pod] = podStat
  1225. summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  1226. manager.synchronize(diskInfoProvider, activePodsFunc)
  1227. // we should have memory pressure
  1228. if !manager.IsUnderMemoryPressure() {
  1229. t.Errorf("Manager should report memory pressure")
  1230. }
  1231. // check the right pod was killed
  1232. if podKiller.pod != podToEvict {
  1233. t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1234. }
  1235. observedGracePeriod := *podKiller.gracePeriodOverride
  1236. if observedGracePeriod != int64(0) {
  1237. t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
  1238. }
  1239. // reset state
  1240. podKiller.pod = nil
  1241. podKiller.gracePeriodOverride = nil
  1242. // the best-effort pod should not admit, burstable should
  1243. expected = []bool{false, true}
  1244. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  1245. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1246. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1247. }
  1248. }
  1249. // reduce memory pressure
  1250. fakeClock.Step(1 * time.Minute)
  1251. for pod := range podStats {
  1252. if pod.Name == "guaranteed-high-2" {
  1253. delete(podStats, pod)
  1254. }
  1255. }
  1256. summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  1257. podKiller.pod = nil // reset state
  1258. manager.synchronize(diskInfoProvider, activePodsFunc)
  1259. // we should have memory pressure (because transition period not yet met)
  1260. if !manager.IsUnderMemoryPressure() {
  1261. t.Errorf("Manager should report memory pressure")
  1262. }
  1263. // no pod should have been killed
  1264. if podKiller.pod != nil {
  1265. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1266. }
  1267. // the best-effort pod should not admit, burstable should
  1268. expected = []bool{false, true}
  1269. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  1270. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1271. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1272. }
  1273. }
  1274. // move the clock past transition period to ensure that we stop reporting pressure
  1275. fakeClock.Step(5 * time.Minute)
  1276. summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  1277. podKiller.pod = nil // reset state
  1278. manager.synchronize(diskInfoProvider, activePodsFunc)
  1279. // we should not have memory pressure (because transition period met)
  1280. if manager.IsUnderMemoryPressure() {
  1281. t.Errorf("Manager should not report memory pressure")
  1282. }
  1283. // no pod should have been killed
  1284. if podKiller.pod != nil {
  1285. t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1286. }
  1287. // all pods should admit now
  1288. expected = []bool{true, true}
  1289. for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  1290. if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1291. t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1292. }
  1293. }
  1294. }
  1295. func TestUpdateMemcgThreshold(t *testing.T) {
  1296. activePodsFunc := func() []*v1.Pod {
  1297. return []*v1.Pod{}
  1298. }
  1299. fakeClock := clock.NewFakeClock(time.Now())
  1300. podKiller := &mockPodKiller{}
  1301. diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
  1302. diskGC := &mockDiskGC{err: nil}
  1303. nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  1304. config := Config{
  1305. MaxPodGracePeriodSeconds: 5,
  1306. PressureTransitionPeriod: time.Minute * 5,
  1307. Thresholds: []evictionapi.Threshold{
  1308. {
  1309. Signal: evictionapi.SignalMemoryAvailable,
  1310. Operator: evictionapi.OpLessThan,
  1311. Value: evictionapi.ThresholdValue{
  1312. Quantity: quantityMustParse("1Gi"),
  1313. },
  1314. },
  1315. },
  1316. PodCgroupRoot: "kubepods",
  1317. }
  1318. summaryProvider := &fakeSummaryProvider{result: makeMemoryStats("2Gi", map[*v1.Pod]statsapi.PodStats{})}
  1319. thresholdNotifier := &MockThresholdNotifier{}
  1320. thresholdNotifier.On("UpdateThreshold", summaryProvider.result).Return(nil).Twice()
  1321. manager := &managerImpl{
  1322. clock: fakeClock,
  1323. killPodFunc: podKiller.killPodNow,
  1324. imageGC: diskGC,
  1325. containerGC: diskGC,
  1326. config: config,
  1327. recorder: &record.FakeRecorder{},
  1328. summaryProvider: summaryProvider,
  1329. nodeRef: nodeRef,
  1330. nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1331. thresholdsFirstObservedAt: thresholdsObservedAt{},
  1332. thresholdNotifiers: []ThresholdNotifier{thresholdNotifier},
  1333. }
  1334. manager.synchronize(diskInfoProvider, activePodsFunc)
  1335. // The UpdateThreshold method should have been called once, since this is the first run.
  1336. thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 1)
  1337. manager.synchronize(diskInfoProvider, activePodsFunc)
  1338. // The UpdateThreshold method should not have been called again, since not enough time has passed
  1339. thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 1)
  1340. fakeClock.Step(2 * notifierRefreshInterval)
  1341. manager.synchronize(diskInfoProvider, activePodsFunc)
  1342. // The UpdateThreshold method should be called again since enough time has passed
  1343. thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 2)
  1344. // new memory threshold notifier that returns an error
  1345. thresholdNotifier = &MockThresholdNotifier{}
  1346. thresholdNotifier.On("UpdateThreshold", summaryProvider.result).Return(fmt.Errorf("error updating threshold"))
  1347. thresholdNotifier.On("Description").Return("mock thresholdNotifier").Once()
  1348. manager.thresholdNotifiers = []ThresholdNotifier{thresholdNotifier}
  1349. fakeClock.Step(2 * notifierRefreshInterval)
  1350. manager.synchronize(diskInfoProvider, activePodsFunc)
  1351. // The UpdateThreshold method should be called because at least notifierRefreshInterval time has passed.
  1352. // The Description method should be called because UpdateThreshold returned an error
  1353. thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 1)
  1354. thresholdNotifier.AssertNumberOfCalls(t, "Description", 1)
  1355. }