taint_test.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package scheduler
  14. // This file tests the Taint feature.
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. "testing"
  20. "time"
  21. v1 "k8s.io/api/core/v1"
  22. apierrors "k8s.io/apimachinery/pkg/api/errors"
  23. "k8s.io/apimachinery/pkg/api/resource"
  24. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  25. "k8s.io/apimachinery/pkg/runtime/schema"
  26. "k8s.io/apiserver/pkg/admission"
  27. utilfeature "k8s.io/apiserver/pkg/util/feature"
  28. "k8s.io/client-go/informers"
  29. "k8s.io/client-go/kubernetes"
  30. restclient "k8s.io/client-go/rest"
  31. featuregatetesting "k8s.io/component-base/featuregate/testing"
  32. "k8s.io/kubernetes/pkg/controller/nodelifecycle"
  33. "k8s.io/kubernetes/pkg/features"
  34. "k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
  35. "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
  36. pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
  37. "k8s.io/kubernetes/test/e2e/framework/pod"
  38. imageutils "k8s.io/kubernetes/test/utils/image"
  39. )
  40. func newPod(nsName, name string, req, limit v1.ResourceList) *v1.Pod {
  41. return &v1.Pod{
  42. ObjectMeta: metav1.ObjectMeta{
  43. Name: name,
  44. Namespace: nsName,
  45. },
  46. Spec: v1.PodSpec{
  47. Containers: []v1.Container{
  48. {
  49. Name: "busybox",
  50. Image: "busybox",
  51. Resources: v1.ResourceRequirements{
  52. Requests: req,
  53. Limits: limit,
  54. },
  55. },
  56. },
  57. },
  58. }
  59. }
  60. // TestTaintNodeByCondition tests related cases for TaintNodeByCondition feature.
  61. func TestTaintNodeByCondition(t *testing.T) {
  62. // Build PodToleration Admission.
  63. admission := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
  64. testCtx := initTestMaster(t, "default", admission)
  65. // Build clientset and informers for controllers.
  66. externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
  67. QPS: -1,
  68. Host: testCtx.httpServer.URL,
  69. ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
  70. externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
  71. admission.SetExternalKubeClientSet(externalClientset)
  72. admission.SetExternalKubeInformerFactory(externalInformers)
  73. testCtx = initTestScheduler(t, testCtx, false, nil)
  74. defer cleanupTest(t, testCtx)
  75. cs := testCtx.clientSet
  76. informers := testCtx.informerFactory
  77. nsName := testCtx.ns.Name
  78. // Start NodeLifecycleController for taint.
  79. nc, err := nodelifecycle.NewNodeLifecycleController(
  80. informers.Coordination().V1().Leases(),
  81. informers.Core().V1().Pods(),
  82. informers.Core().V1().Nodes(),
  83. informers.Apps().V1().DaemonSets(),
  84. cs,
  85. time.Hour, // Node monitor grace period
  86. time.Second, // Node startup grace period
  87. time.Second, // Node monitor period
  88. time.Second, // Pod eviction timeout
  89. 100, // Eviction limiter QPS
  90. 100, // Secondary eviction limiter QPS
  91. 100, // Large cluster threshold
  92. 100, // Unhealthy zone threshold
  93. true, // Run taint manager
  94. true, // Use taint based evictions
  95. )
  96. if err != nil {
  97. t.Errorf("Failed to create node controller: %v", err)
  98. return
  99. }
  100. go nc.Run(testCtx.ctx.Done())
  101. // Waiting for all controller sync.
  102. externalInformers.Start(testCtx.ctx.Done())
  103. externalInformers.WaitForCacheSync(testCtx.ctx.Done())
  104. informers.Start(testCtx.ctx.Done())
  105. informers.WaitForCacheSync(testCtx.ctx.Done())
  106. // -------------------------------------------
  107. // Test TaintNodeByCondition feature.
  108. // -------------------------------------------
  109. nodeRes := v1.ResourceList{
  110. v1.ResourceCPU: resource.MustParse("4000m"),
  111. v1.ResourceMemory: resource.MustParse("16Gi"),
  112. v1.ResourcePods: resource.MustParse("110"),
  113. }
  114. podRes := v1.ResourceList{
  115. v1.ResourceCPU: resource.MustParse("100m"),
  116. v1.ResourceMemory: resource.MustParse("100Mi"),
  117. }
  118. notReadyToleration := v1.Toleration{
  119. Key: v1.TaintNodeNotReady,
  120. Operator: v1.TolerationOpExists,
  121. Effect: v1.TaintEffectNoSchedule,
  122. }
  123. unschedulableToleration := v1.Toleration{
  124. Key: v1.TaintNodeUnschedulable,
  125. Operator: v1.TolerationOpExists,
  126. Effect: v1.TaintEffectNoSchedule,
  127. }
  128. memoryPressureToleration := v1.Toleration{
  129. Key: v1.TaintNodeMemoryPressure,
  130. Operator: v1.TolerationOpExists,
  131. Effect: v1.TaintEffectNoSchedule,
  132. }
  133. diskPressureToleration := v1.Toleration{
  134. Key: v1.TaintNodeDiskPressure,
  135. Operator: v1.TolerationOpExists,
  136. Effect: v1.TaintEffectNoSchedule,
  137. }
  138. networkUnavailableToleration := v1.Toleration{
  139. Key: v1.TaintNodeNetworkUnavailable,
  140. Operator: v1.TolerationOpExists,
  141. Effect: v1.TaintEffectNoSchedule,
  142. }
  143. pidPressureToleration := v1.Toleration{
  144. Key: v1.TaintNodePIDPressure,
  145. Operator: v1.TolerationOpExists,
  146. Effect: v1.TaintEffectNoSchedule,
  147. }
  148. bestEffortPod := newPod(nsName, "besteffort-pod", nil, nil)
  149. burstablePod := newPod(nsName, "burstable-pod", podRes, nil)
  150. guaranteePod := newPod(nsName, "guarantee-pod", podRes, podRes)
  151. type podCase struct {
  152. pod *v1.Pod
  153. tolerations []v1.Toleration
  154. fits bool
  155. }
  156. // switch to table driven testings
  157. tests := []struct {
  158. name string
  159. existingTaints []v1.Taint
  160. nodeConditions []v1.NodeCondition
  161. unschedulable bool
  162. expectedTaints []v1.Taint
  163. pods []podCase
  164. }{
  165. {
  166. name: "not-ready node",
  167. nodeConditions: []v1.NodeCondition{
  168. {
  169. Type: v1.NodeReady,
  170. Status: v1.ConditionFalse,
  171. },
  172. },
  173. expectedTaints: []v1.Taint{
  174. {
  175. Key: v1.TaintNodeNotReady,
  176. Effect: v1.TaintEffectNoSchedule,
  177. },
  178. },
  179. pods: []podCase{
  180. {
  181. pod: bestEffortPod,
  182. fits: false,
  183. },
  184. {
  185. pod: burstablePod,
  186. fits: false,
  187. },
  188. {
  189. pod: guaranteePod,
  190. fits: false,
  191. },
  192. {
  193. pod: bestEffortPod,
  194. tolerations: []v1.Toleration{notReadyToleration},
  195. fits: true,
  196. },
  197. },
  198. },
  199. {
  200. name: "unschedulable node",
  201. unschedulable: true, // node.spec.unschedulable = true
  202. nodeConditions: []v1.NodeCondition{
  203. {
  204. Type: v1.NodeReady,
  205. Status: v1.ConditionTrue,
  206. },
  207. },
  208. expectedTaints: []v1.Taint{
  209. {
  210. Key: v1.TaintNodeUnschedulable,
  211. Effect: v1.TaintEffectNoSchedule,
  212. },
  213. },
  214. pods: []podCase{
  215. {
  216. pod: bestEffortPod,
  217. fits: false,
  218. },
  219. {
  220. pod: burstablePod,
  221. fits: false,
  222. },
  223. {
  224. pod: guaranteePod,
  225. fits: false,
  226. },
  227. {
  228. pod: bestEffortPod,
  229. tolerations: []v1.Toleration{unschedulableToleration},
  230. fits: true,
  231. },
  232. },
  233. },
  234. {
  235. name: "memory pressure node",
  236. nodeConditions: []v1.NodeCondition{
  237. {
  238. Type: v1.NodeMemoryPressure,
  239. Status: v1.ConditionTrue,
  240. },
  241. {
  242. Type: v1.NodeReady,
  243. Status: v1.ConditionTrue,
  244. },
  245. },
  246. expectedTaints: []v1.Taint{
  247. {
  248. Key: v1.TaintNodeMemoryPressure,
  249. Effect: v1.TaintEffectNoSchedule,
  250. },
  251. },
  252. // In MemoryPressure condition, both Burstable and Guarantee pods are scheduled;
  253. // BestEffort pod with toleration are also scheduled.
  254. pods: []podCase{
  255. {
  256. pod: bestEffortPod,
  257. fits: false,
  258. },
  259. {
  260. pod: bestEffortPod,
  261. tolerations: []v1.Toleration{memoryPressureToleration},
  262. fits: true,
  263. },
  264. {
  265. pod: bestEffortPod,
  266. tolerations: []v1.Toleration{diskPressureToleration},
  267. fits: false,
  268. },
  269. {
  270. pod: burstablePod,
  271. fits: true,
  272. },
  273. {
  274. pod: guaranteePod,
  275. fits: true,
  276. },
  277. },
  278. },
  279. {
  280. name: "disk pressure node",
  281. nodeConditions: []v1.NodeCondition{
  282. {
  283. Type: v1.NodeDiskPressure,
  284. Status: v1.ConditionTrue,
  285. },
  286. {
  287. Type: v1.NodeReady,
  288. Status: v1.ConditionTrue,
  289. },
  290. },
  291. expectedTaints: []v1.Taint{
  292. {
  293. Key: v1.TaintNodeDiskPressure,
  294. Effect: v1.TaintEffectNoSchedule,
  295. },
  296. },
  297. // In DiskPressure condition, only pods with toleration can be scheduled.
  298. pods: []podCase{
  299. {
  300. pod: bestEffortPod,
  301. fits: false,
  302. },
  303. {
  304. pod: burstablePod,
  305. fits: false,
  306. },
  307. {
  308. pod: guaranteePod,
  309. fits: false,
  310. },
  311. {
  312. pod: bestEffortPod,
  313. tolerations: []v1.Toleration{diskPressureToleration},
  314. fits: true,
  315. },
  316. {
  317. pod: bestEffortPod,
  318. tolerations: []v1.Toleration{memoryPressureToleration},
  319. fits: false,
  320. },
  321. },
  322. },
  323. {
  324. name: "network unavailable and node is ready",
  325. nodeConditions: []v1.NodeCondition{
  326. {
  327. Type: v1.NodeNetworkUnavailable,
  328. Status: v1.ConditionTrue,
  329. },
  330. {
  331. Type: v1.NodeReady,
  332. Status: v1.ConditionTrue,
  333. },
  334. },
  335. expectedTaints: []v1.Taint{
  336. {
  337. Key: v1.TaintNodeNetworkUnavailable,
  338. Effect: v1.TaintEffectNoSchedule,
  339. },
  340. },
  341. pods: []podCase{
  342. {
  343. pod: bestEffortPod,
  344. fits: false,
  345. },
  346. {
  347. pod: burstablePod,
  348. fits: false,
  349. },
  350. {
  351. pod: guaranteePod,
  352. fits: false,
  353. },
  354. {
  355. pod: burstablePod,
  356. tolerations: []v1.Toleration{
  357. networkUnavailableToleration,
  358. },
  359. fits: true,
  360. },
  361. },
  362. },
  363. {
  364. name: "network unavailable and node is not ready",
  365. nodeConditions: []v1.NodeCondition{
  366. {
  367. Type: v1.NodeNetworkUnavailable,
  368. Status: v1.ConditionTrue,
  369. },
  370. {
  371. Type: v1.NodeReady,
  372. Status: v1.ConditionFalse,
  373. },
  374. },
  375. expectedTaints: []v1.Taint{
  376. {
  377. Key: v1.TaintNodeNetworkUnavailable,
  378. Effect: v1.TaintEffectNoSchedule,
  379. },
  380. {
  381. Key: v1.TaintNodeNotReady,
  382. Effect: v1.TaintEffectNoSchedule,
  383. },
  384. },
  385. pods: []podCase{
  386. {
  387. pod: bestEffortPod,
  388. fits: false,
  389. },
  390. {
  391. pod: burstablePod,
  392. fits: false,
  393. },
  394. {
  395. pod: guaranteePod,
  396. fits: false,
  397. },
  398. {
  399. pod: burstablePod,
  400. tolerations: []v1.Toleration{
  401. networkUnavailableToleration,
  402. },
  403. fits: false,
  404. },
  405. {
  406. pod: burstablePod,
  407. tolerations: []v1.Toleration{
  408. networkUnavailableToleration,
  409. notReadyToleration,
  410. },
  411. fits: true,
  412. },
  413. },
  414. },
  415. {
  416. name: "pid pressure node",
  417. nodeConditions: []v1.NodeCondition{
  418. {
  419. Type: v1.NodePIDPressure,
  420. Status: v1.ConditionTrue,
  421. },
  422. {
  423. Type: v1.NodeReady,
  424. Status: v1.ConditionTrue,
  425. },
  426. },
  427. expectedTaints: []v1.Taint{
  428. {
  429. Key: v1.TaintNodePIDPressure,
  430. Effect: v1.TaintEffectNoSchedule,
  431. },
  432. },
  433. pods: []podCase{
  434. {
  435. pod: bestEffortPod,
  436. fits: false,
  437. },
  438. {
  439. pod: burstablePod,
  440. fits: false,
  441. },
  442. {
  443. pod: guaranteePod,
  444. fits: false,
  445. },
  446. {
  447. pod: bestEffortPod,
  448. tolerations: []v1.Toleration{pidPressureToleration},
  449. fits: true,
  450. },
  451. },
  452. },
  453. {
  454. name: "multi taints on node",
  455. nodeConditions: []v1.NodeCondition{
  456. {
  457. Type: v1.NodePIDPressure,
  458. Status: v1.ConditionTrue,
  459. },
  460. {
  461. Type: v1.NodeMemoryPressure,
  462. Status: v1.ConditionTrue,
  463. },
  464. {
  465. Type: v1.NodeDiskPressure,
  466. Status: v1.ConditionTrue,
  467. },
  468. {
  469. Type: v1.NodeReady,
  470. Status: v1.ConditionTrue,
  471. },
  472. },
  473. expectedTaints: []v1.Taint{
  474. {
  475. Key: v1.TaintNodeDiskPressure,
  476. Effect: v1.TaintEffectNoSchedule,
  477. },
  478. {
  479. Key: v1.TaintNodeMemoryPressure,
  480. Effect: v1.TaintEffectNoSchedule,
  481. },
  482. {
  483. Key: v1.TaintNodePIDPressure,
  484. Effect: v1.TaintEffectNoSchedule,
  485. },
  486. },
  487. },
  488. }
  489. for _, test := range tests {
  490. t.Run(test.name, func(t *testing.T) {
  491. node := &v1.Node{
  492. ObjectMeta: metav1.ObjectMeta{
  493. Name: "node-1",
  494. },
  495. Spec: v1.NodeSpec{
  496. Unschedulable: test.unschedulable,
  497. Taints: test.existingTaints,
  498. },
  499. Status: v1.NodeStatus{
  500. Capacity: nodeRes,
  501. Allocatable: nodeRes,
  502. Conditions: test.nodeConditions,
  503. },
  504. }
  505. if _, err := cs.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}); err != nil {
  506. t.Errorf("Failed to create node, err: %v", err)
  507. }
  508. if err := waitForNodeTaints(cs, node, test.expectedTaints); err != nil {
  509. node, err = cs.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
  510. if err != nil {
  511. t.Errorf("Failed to get node <%s>", node.Name)
  512. }
  513. t.Errorf("Failed to taint node <%s>, expected: %v, got: %v, err: %v", node.Name, test.expectedTaints, node.Spec.Taints, err)
  514. }
  515. var pods []*v1.Pod
  516. for i, p := range test.pods {
  517. pod := p.pod.DeepCopy()
  518. pod.Name = fmt.Sprintf("%s-%d", pod.Name, i)
  519. pod.Spec.Tolerations = p.tolerations
  520. createdPod, err := cs.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
  521. if err != nil {
  522. t.Fatalf("Failed to create pod %s/%s, error: %v",
  523. pod.Namespace, pod.Name, err)
  524. }
  525. pods = append(pods, createdPod)
  526. if p.fits {
  527. if err := waitForPodToSchedule(cs, createdPod); err != nil {
  528. t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
  529. pod.Namespace, pod.Name, err)
  530. }
  531. } else {
  532. if err := waitForPodUnschedulable(cs, createdPod); err != nil {
  533. t.Errorf("Unschedulable pod %s/%s gets scheduled on the node, err: %v",
  534. pod.Namespace, pod.Name, err)
  535. }
  536. }
  537. }
  538. cleanupPods(cs, t, pods)
  539. cleanupNodes(cs, t)
  540. waitForSchedulerCacheCleanup(testCtx.scheduler, t)
  541. })
  542. }
  543. }
  544. // TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature
  545. func TestTaintBasedEvictions(t *testing.T) {
  546. // we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
  547. nodeCount := 3
  548. zero := int64(0)
  549. gracePeriod := int64(1)
  550. heartbeatInternal := time.Second * 2
  551. testPod := &v1.Pod{
  552. ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
  553. Spec: v1.PodSpec{
  554. Containers: []v1.Container{
  555. {Name: "container", Image: imageutils.GetPauseImageName()},
  556. },
  557. Tolerations: []v1.Toleration{
  558. {
  559. Key: v1.TaintNodeNotReady,
  560. Operator: v1.TolerationOpExists,
  561. Effect: v1.TaintEffectNoExecute,
  562. },
  563. },
  564. TerminationGracePeriodSeconds: &gracePeriod,
  565. },
  566. }
  567. tolerationSeconds := []int64{200, 300, 0}
  568. tests := []struct {
  569. name string
  570. nodeTaints []v1.Taint
  571. nodeConditions []v1.NodeCondition
  572. pod *v1.Pod
  573. waitForPodCondition string
  574. }{
  575. {
  576. name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
  577. nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
  578. nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
  579. pod: testPod,
  580. waitForPodCondition: "updated with tolerationSeconds of 200",
  581. },
  582. {
  583. name: "Taint based evictions for NodeNotReady with no pod tolerations",
  584. nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
  585. nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
  586. pod: &v1.Pod{
  587. ObjectMeta: metav1.ObjectMeta{Name: "testpod1"},
  588. Spec: v1.PodSpec{
  589. Containers: []v1.Container{
  590. {Name: "container", Image: imageutils.GetPauseImageName()},
  591. },
  592. },
  593. },
  594. waitForPodCondition: "updated with tolerationSeconds=300",
  595. },
  596. {
  597. name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
  598. nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
  599. nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
  600. pod: testPod,
  601. waitForPodCondition: "terminating",
  602. },
  603. {
  604. name: "Taint based evictions for NodeUnreachable",
  605. nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}},
  606. nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}},
  607. },
  608. }
  609. // Enable TaintBasedEvictions
  610. defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()
  611. // Build admission chain handler.
  612. podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
  613. admission := admission.NewChainHandler(
  614. podTolerations,
  615. defaulttolerationseconds.NewDefaultTolerationSeconds(),
  616. )
  617. for i, test := range tests {
  618. t.Run(test.name, func(t *testing.T) {
  619. testCtx := initTestMaster(t, "taint-based-evictions", admission)
  620. // Build clientset and informers for controllers.
  621. externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
  622. QPS: -1,
  623. Host: testCtx.httpServer.URL,
  624. ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
  625. externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
  626. podTolerations.SetExternalKubeClientSet(externalClientset)
  627. podTolerations.SetExternalKubeInformerFactory(externalInformers)
  628. testCtx = initTestScheduler(t, testCtx, true, nil)
  629. defer cleanupTest(t, testCtx)
  630. cs := testCtx.clientSet
  631. informers := testCtx.informerFactory
  632. _, err := cs.CoreV1().Namespaces().Create(context.TODO(), testCtx.ns, metav1.CreateOptions{})
  633. if err != nil {
  634. t.Errorf("Failed to create namespace %+v", err)
  635. }
  636. // Start NodeLifecycleController for taint.
  637. nc, err := nodelifecycle.NewNodeLifecycleController(
  638. informers.Coordination().V1().Leases(),
  639. informers.Core().V1().Pods(),
  640. informers.Core().V1().Nodes(),
  641. informers.Apps().V1().DaemonSets(),
  642. cs,
  643. 5*time.Second, // Node monitor grace period
  644. time.Minute, // Node startup grace period
  645. time.Millisecond, // Node monitor period
  646. time.Second, // Pod eviction timeout
  647. 100, // Eviction limiter QPS
  648. 100, // Secondary eviction limiter QPS
  649. 50, // Large cluster threshold
  650. 0.55, // Unhealthy zone threshold
  651. true, // Run taint manager
  652. true, // Use taint based evictions
  653. )
  654. if err != nil {
  655. t.Errorf("Failed to create node controller: %v", err)
  656. return
  657. }
  658. go nc.Run(testCtx.ctx.Done())
  659. // Waiting for all controller sync.
  660. externalInformers.Start(testCtx.ctx.Done())
  661. externalInformers.WaitForCacheSync(testCtx.ctx.Done())
  662. informers.Start(testCtx.ctx.Done())
  663. informers.WaitForCacheSync(testCtx.ctx.Done())
  664. nodeRes := v1.ResourceList{
  665. v1.ResourceCPU: resource.MustParse("4000m"),
  666. v1.ResourceMemory: resource.MustParse("16Gi"),
  667. v1.ResourcePods: resource.MustParse("110"),
  668. }
  669. var nodes []*v1.Node
  670. for i := 0; i < nodeCount; i++ {
  671. nodes = append(nodes, &v1.Node{
  672. ObjectMeta: metav1.ObjectMeta{
  673. Name: fmt.Sprintf("node-%d", i),
  674. Labels: map[string]string{v1.LabelZoneRegion: "region1", v1.LabelZoneFailureDomain: "zone1"},
  675. },
  676. Spec: v1.NodeSpec{},
  677. Status: v1.NodeStatus{
  678. Capacity: nodeRes,
  679. Allocatable: nodeRes,
  680. Conditions: []v1.NodeCondition{
  681. {
  682. Type: v1.NodeReady,
  683. Status: v1.ConditionTrue,
  684. LastHeartbeatTime: metav1.Now(),
  685. },
  686. },
  687. },
  688. })
  689. if _, err := cs.CoreV1().Nodes().Create(context.TODO(), nodes[i], metav1.CreateOptions{}); err != nil {
  690. t.Errorf("Failed to create node, err: %v", err)
  691. }
  692. }
  693. neededNode := nodes[1]
  694. if test.pod != nil {
  695. test.pod.Name = fmt.Sprintf("testpod-%d", i)
  696. if len(test.pod.Spec.Tolerations) > 0 {
  697. test.pod.Spec.Tolerations[0].TolerationSeconds = &tolerationSeconds[i]
  698. }
  699. test.pod, err = cs.CoreV1().Pods(testCtx.ns.Name).Create(context.TODO(), test.pod, metav1.CreateOptions{})
  700. if err != nil {
  701. t.Fatalf("Test Failed: error: %v, while creating pod", err)
  702. }
  703. if err := waitForPodToSchedule(cs, test.pod); err != nil {
  704. t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
  705. test.pod.Namespace, test.pod.Name, err)
  706. }
  707. test.pod, err = cs.CoreV1().Pods(testCtx.ns.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
  708. if err != nil {
  709. t.Fatalf("Test Failed: error: %v, while creating pod", err)
  710. }
  711. neededNode, err = cs.CoreV1().Nodes().Get(context.TODO(), test.pod.Spec.NodeName, metav1.GetOptions{})
  712. if err != nil {
  713. t.Fatalf("Error while getting node associated with pod %v with err %v", test.pod.Name, err)
  714. }
  715. }
  716. // Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
  717. // TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
  718. for i := 0; i < nodeCount; i++ {
  719. var conditions []v1.NodeCondition
  720. // If current node is not <neededNode>
  721. if neededNode.Name != nodes[i].Name {
  722. conditions = []v1.NodeCondition{
  723. {
  724. Type: v1.NodeReady,
  725. Status: v1.ConditionTrue,
  726. },
  727. }
  728. } else {
  729. c, err := nodeReadyStatus(test.nodeConditions)
  730. if err != nil {
  731. t.Error(err)
  732. }
  733. // Need to distinguish NodeReady/False and NodeReady/Unknown.
  734. // If we try to update the node with condition NotReady/False, i.e. expect a NotReady:NoExecute taint
  735. // we need to keep sending the update event to keep it alive, rather than just sending once.
  736. if c == v1.ConditionFalse {
  737. conditions = test.nodeConditions
  738. } else if c == v1.ConditionUnknown {
  739. // If it's expected to update the node with condition NotReady/Unknown,
  740. // i.e. expect a Unreachable:NoExecute taint,
  741. // we need to only send the update event once to simulate the network unreachable scenario.
  742. nodeCopy := nodeCopyWithConditions(nodes[i], test.nodeConditions)
  743. if err := updateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
  744. t.Errorf("Cannot update node: %v", err)
  745. }
  746. continue
  747. }
  748. }
  749. // Keeping sending NodeReady/True or NodeReady/False events.
  750. go func(i int) {
  751. for {
  752. select {
  753. case <-testCtx.ctx.Done():
  754. return
  755. case <-time.Tick(heartbeatInternal):
  756. nodeCopy := nodeCopyWithConditions(nodes[i], conditions)
  757. if err := updateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
  758. t.Errorf("Cannot update node: %v", err)
  759. }
  760. }
  761. }
  762. }(i)
  763. }
  764. if err := waitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
  765. t.Errorf("Failed to taint node in test %d <%s>, err: %v", i, neededNode.Name, err)
  766. }
  767. if test.pod != nil {
  768. err = pod.WaitForPodCondition(cs, testCtx.ns.Name, test.pod.Name, test.waitForPodCondition, time.Second*15, func(pod *v1.Pod) (bool, error) {
  769. // as node is unreachable, pod0 is expected to be in Terminating status
  770. // rather than getting deleted
  771. if tolerationSeconds[i] == 0 {
  772. return pod.DeletionTimestamp != nil, nil
  773. }
  774. if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
  775. return seconds == tolerationSeconds[i], nil
  776. }
  777. return false, nil
  778. })
  779. if err != nil {
  780. pod, _ := cs.CoreV1().Pods(testCtx.ns.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
  781. t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.waitForPodCondition, pod)
  782. }
  783. cleanupPods(cs, t, []*v1.Pod{test.pod})
  784. }
  785. cleanupNodes(cs, t)
  786. waitForSchedulerCacheCleanup(testCtx.scheduler, t)
  787. })
  788. }
  789. }
  790. func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
  791. for _, t := range tolerations {
  792. if t.Key == v1.TaintNodeNotReady && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
  793. return *t.TolerationSeconds, nil
  794. }
  795. }
  796. return 0, fmt.Errorf("cannot find toleration")
  797. }
  798. // nodeReadyStatus returns the status of first condition with type NodeReady.
  799. // If none of the condition is of type NodeReady, returns an error.
  800. func nodeReadyStatus(conditions []v1.NodeCondition) (v1.ConditionStatus, error) {
  801. for _, c := range conditions {
  802. if c.Type != v1.NodeReady {
  803. continue
  804. }
  805. // Just return the first condition with type NodeReady
  806. return c.Status, nil
  807. }
  808. return v1.ConditionFalse, errors.New("None of the conditions is of type NodeReady")
  809. }
  810. func nodeCopyWithConditions(node *v1.Node, conditions []v1.NodeCondition) *v1.Node {
  811. copy := node.DeepCopy()
  812. copy.ResourceVersion = "0"
  813. copy.Status.Conditions = conditions
  814. for i := range copy.Status.Conditions {
  815. copy.Status.Conditions[i].LastHeartbeatTime = metav1.Now()
  816. }
  817. return copy
  818. }