123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873 |
- /*
- Copyright 2017 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package scheduler
- // This file tests the Taint feature.
- import (
- "context"
- "errors"
- "fmt"
- "testing"
- "time"
- v1 "k8s.io/api/core/v1"
- apierrors "k8s.io/apimachinery/pkg/api/errors"
- "k8s.io/apimachinery/pkg/api/resource"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/runtime/schema"
- "k8s.io/apiserver/pkg/admission"
- utilfeature "k8s.io/apiserver/pkg/util/feature"
- "k8s.io/client-go/informers"
- "k8s.io/client-go/kubernetes"
- restclient "k8s.io/client-go/rest"
- featuregatetesting "k8s.io/component-base/featuregate/testing"
- "k8s.io/kubernetes/pkg/controller/nodelifecycle"
- "k8s.io/kubernetes/pkg/features"
- "k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
- "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
- pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
- "k8s.io/kubernetes/test/e2e/framework/pod"
- imageutils "k8s.io/kubernetes/test/utils/image"
- )
- func newPod(nsName, name string, req, limit v1.ResourceList) *v1.Pod {
- return &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: name,
- Namespace: nsName,
- },
- Spec: v1.PodSpec{
- Containers: []v1.Container{
- {
- Name: "busybox",
- Image: "busybox",
- Resources: v1.ResourceRequirements{
- Requests: req,
- Limits: limit,
- },
- },
- },
- },
- }
- }
- // TestTaintNodeByCondition tests related cases for TaintNodeByCondition feature.
- func TestTaintNodeByCondition(t *testing.T) {
- // Build PodToleration Admission.
- admission := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
- testCtx := initTestMaster(t, "default", admission)
- // Build clientset and informers for controllers.
- externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
- QPS: -1,
- Host: testCtx.httpServer.URL,
- ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
- externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
- admission.SetExternalKubeClientSet(externalClientset)
- admission.SetExternalKubeInformerFactory(externalInformers)
- testCtx = initTestScheduler(t, testCtx, false, nil)
- defer cleanupTest(t, testCtx)
- cs := testCtx.clientSet
- informers := testCtx.informerFactory
- nsName := testCtx.ns.Name
- // Start NodeLifecycleController for taint.
- nc, err := nodelifecycle.NewNodeLifecycleController(
- informers.Coordination().V1().Leases(),
- informers.Core().V1().Pods(),
- informers.Core().V1().Nodes(),
- informers.Apps().V1().DaemonSets(),
- cs,
- time.Hour, // Node monitor grace period
- time.Second, // Node startup grace period
- time.Second, // Node monitor period
- time.Second, // Pod eviction timeout
- 100, // Eviction limiter QPS
- 100, // Secondary eviction limiter QPS
- 100, // Large cluster threshold
- 100, // Unhealthy zone threshold
- true, // Run taint manager
- true, // Use taint based evictions
- )
- if err != nil {
- t.Errorf("Failed to create node controller: %v", err)
- return
- }
- go nc.Run(testCtx.ctx.Done())
- // Waiting for all controller sync.
- externalInformers.Start(testCtx.ctx.Done())
- externalInformers.WaitForCacheSync(testCtx.ctx.Done())
- informers.Start(testCtx.ctx.Done())
- informers.WaitForCacheSync(testCtx.ctx.Done())
- // -------------------------------------------
- // Test TaintNodeByCondition feature.
- // -------------------------------------------
- nodeRes := v1.ResourceList{
- v1.ResourceCPU: resource.MustParse("4000m"),
- v1.ResourceMemory: resource.MustParse("16Gi"),
- v1.ResourcePods: resource.MustParse("110"),
- }
- podRes := v1.ResourceList{
- v1.ResourceCPU: resource.MustParse("100m"),
- v1.ResourceMemory: resource.MustParse("100Mi"),
- }
- notReadyToleration := v1.Toleration{
- Key: v1.TaintNodeNotReady,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoSchedule,
- }
- unschedulableToleration := v1.Toleration{
- Key: v1.TaintNodeUnschedulable,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoSchedule,
- }
- memoryPressureToleration := v1.Toleration{
- Key: v1.TaintNodeMemoryPressure,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoSchedule,
- }
- diskPressureToleration := v1.Toleration{
- Key: v1.TaintNodeDiskPressure,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoSchedule,
- }
- networkUnavailableToleration := v1.Toleration{
- Key: v1.TaintNodeNetworkUnavailable,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoSchedule,
- }
- pidPressureToleration := v1.Toleration{
- Key: v1.TaintNodePIDPressure,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoSchedule,
- }
- bestEffortPod := newPod(nsName, "besteffort-pod", nil, nil)
- burstablePod := newPod(nsName, "burstable-pod", podRes, nil)
- guaranteePod := newPod(nsName, "guarantee-pod", podRes, podRes)
- type podCase struct {
- pod *v1.Pod
- tolerations []v1.Toleration
- fits bool
- }
- // switch to table driven testings
- tests := []struct {
- name string
- existingTaints []v1.Taint
- nodeConditions []v1.NodeCondition
- unschedulable bool
- expectedTaints []v1.Taint
- pods []podCase
- }{
- {
- name: "not-ready node",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodeReady,
- Status: v1.ConditionFalse,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeNotReady,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: burstablePod,
- fits: false,
- },
- {
- pod: guaranteePod,
- fits: false,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{notReadyToleration},
- fits: true,
- },
- },
- },
- {
- name: "unschedulable node",
- unschedulable: true, // node.spec.unschedulable = true
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeUnschedulable,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: burstablePod,
- fits: false,
- },
- {
- pod: guaranteePod,
- fits: false,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{unschedulableToleration},
- fits: true,
- },
- },
- },
- {
- name: "memory pressure node",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodeMemoryPressure,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeMemoryPressure,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- // In MemoryPressure condition, both Burstable and Guarantee pods are scheduled;
- // BestEffort pod with toleration are also scheduled.
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{memoryPressureToleration},
- fits: true,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{diskPressureToleration},
- fits: false,
- },
- {
- pod: burstablePod,
- fits: true,
- },
- {
- pod: guaranteePod,
- fits: true,
- },
- },
- },
- {
- name: "disk pressure node",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodeDiskPressure,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeDiskPressure,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- // In DiskPressure condition, only pods with toleration can be scheduled.
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: burstablePod,
- fits: false,
- },
- {
- pod: guaranteePod,
- fits: false,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{diskPressureToleration},
- fits: true,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{memoryPressureToleration},
- fits: false,
- },
- },
- },
- {
- name: "network unavailable and node is ready",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodeNetworkUnavailable,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeNetworkUnavailable,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: burstablePod,
- fits: false,
- },
- {
- pod: guaranteePod,
- fits: false,
- },
- {
- pod: burstablePod,
- tolerations: []v1.Toleration{
- networkUnavailableToleration,
- },
- fits: true,
- },
- },
- },
- {
- name: "network unavailable and node is not ready",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodeNetworkUnavailable,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeReady,
- Status: v1.ConditionFalse,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeNetworkUnavailable,
- Effect: v1.TaintEffectNoSchedule,
- },
- {
- Key: v1.TaintNodeNotReady,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: burstablePod,
- fits: false,
- },
- {
- pod: guaranteePod,
- fits: false,
- },
- {
- pod: burstablePod,
- tolerations: []v1.Toleration{
- networkUnavailableToleration,
- },
- fits: false,
- },
- {
- pod: burstablePod,
- tolerations: []v1.Toleration{
- networkUnavailableToleration,
- notReadyToleration,
- },
- fits: true,
- },
- },
- },
- {
- name: "pid pressure node",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodePIDPressure,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodePIDPressure,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- pods: []podCase{
- {
- pod: bestEffortPod,
- fits: false,
- },
- {
- pod: burstablePod,
- fits: false,
- },
- {
- pod: guaranteePod,
- fits: false,
- },
- {
- pod: bestEffortPod,
- tolerations: []v1.Toleration{pidPressureToleration},
- fits: true,
- },
- },
- },
- {
- name: "multi taints on node",
- nodeConditions: []v1.NodeCondition{
- {
- Type: v1.NodePIDPressure,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeMemoryPressure,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeDiskPressure,
- Status: v1.ConditionTrue,
- },
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- },
- expectedTaints: []v1.Taint{
- {
- Key: v1.TaintNodeDiskPressure,
- Effect: v1.TaintEffectNoSchedule,
- },
- {
- Key: v1.TaintNodeMemoryPressure,
- Effect: v1.TaintEffectNoSchedule,
- },
- {
- Key: v1.TaintNodePIDPressure,
- Effect: v1.TaintEffectNoSchedule,
- },
- },
- },
- }
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- node := &v1.Node{
- ObjectMeta: metav1.ObjectMeta{
- Name: "node-1",
- },
- Spec: v1.NodeSpec{
- Unschedulable: test.unschedulable,
- Taints: test.existingTaints,
- },
- Status: v1.NodeStatus{
- Capacity: nodeRes,
- Allocatable: nodeRes,
- Conditions: test.nodeConditions,
- },
- }
- if _, err := cs.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}); err != nil {
- t.Errorf("Failed to create node, err: %v", err)
- }
- if err := waitForNodeTaints(cs, node, test.expectedTaints); err != nil {
- node, err = cs.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
- if err != nil {
- t.Errorf("Failed to get node <%s>", node.Name)
- }
- t.Errorf("Failed to taint node <%s>, expected: %v, got: %v, err: %v", node.Name, test.expectedTaints, node.Spec.Taints, err)
- }
- var pods []*v1.Pod
- for i, p := range test.pods {
- pod := p.pod.DeepCopy()
- pod.Name = fmt.Sprintf("%s-%d", pod.Name, i)
- pod.Spec.Tolerations = p.tolerations
- createdPod, err := cs.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
- if err != nil {
- t.Fatalf("Failed to create pod %s/%s, error: %v",
- pod.Namespace, pod.Name, err)
- }
- pods = append(pods, createdPod)
- if p.fits {
- if err := waitForPodToSchedule(cs, createdPod); err != nil {
- t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
- pod.Namespace, pod.Name, err)
- }
- } else {
- if err := waitForPodUnschedulable(cs, createdPod); err != nil {
- t.Errorf("Unschedulable pod %s/%s gets scheduled on the node, err: %v",
- pod.Namespace, pod.Name, err)
- }
- }
- }
- cleanupPods(cs, t, pods)
- cleanupNodes(cs, t)
- waitForSchedulerCacheCleanup(testCtx.scheduler, t)
- })
- }
- }
- // TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature
- func TestTaintBasedEvictions(t *testing.T) {
- // we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode
- nodeCount := 3
- zero := int64(0)
- gracePeriod := int64(1)
- heartbeatInternal := time.Second * 2
- testPod := &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero},
- Spec: v1.PodSpec{
- Containers: []v1.Container{
- {Name: "container", Image: imageutils.GetPauseImageName()},
- },
- Tolerations: []v1.Toleration{
- {
- Key: v1.TaintNodeNotReady,
- Operator: v1.TolerationOpExists,
- Effect: v1.TaintEffectNoExecute,
- },
- },
- TerminationGracePeriodSeconds: &gracePeriod,
- },
- }
- tolerationSeconds := []int64{200, 300, 0}
- tests := []struct {
- name string
- nodeTaints []v1.Taint
- nodeConditions []v1.NodeCondition
- pod *v1.Pod
- waitForPodCondition string
- }{
- {
- name: "Taint based evictions for NodeNotReady and 200 tolerationseconds",
- nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
- nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
- pod: testPod,
- waitForPodCondition: "updated with tolerationSeconds of 200",
- },
- {
- name: "Taint based evictions for NodeNotReady with no pod tolerations",
- nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
- nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{Name: "testpod1"},
- Spec: v1.PodSpec{
- Containers: []v1.Container{
- {Name: "container", Image: imageutils.GetPauseImageName()},
- },
- },
- },
- waitForPodCondition: "updated with tolerationSeconds=300",
- },
- {
- name: "Taint based evictions for NodeNotReady and 0 tolerationseconds",
- nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}},
- nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}},
- pod: testPod,
- waitForPodCondition: "terminating",
- },
- {
- name: "Taint based evictions for NodeUnreachable",
- nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}},
- nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}},
- },
- }
- // Enable TaintBasedEvictions
- defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()
- // Build admission chain handler.
- podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
- admission := admission.NewChainHandler(
- podTolerations,
- defaulttolerationseconds.NewDefaultTolerationSeconds(),
- )
- for i, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- testCtx := initTestMaster(t, "taint-based-evictions", admission)
- // Build clientset and informers for controllers.
- externalClientset := kubernetes.NewForConfigOrDie(&restclient.Config{
- QPS: -1,
- Host: testCtx.httpServer.URL,
- ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
- externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
- podTolerations.SetExternalKubeClientSet(externalClientset)
- podTolerations.SetExternalKubeInformerFactory(externalInformers)
- testCtx = initTestScheduler(t, testCtx, true, nil)
- defer cleanupTest(t, testCtx)
- cs := testCtx.clientSet
- informers := testCtx.informerFactory
- _, err := cs.CoreV1().Namespaces().Create(context.TODO(), testCtx.ns, metav1.CreateOptions{})
- if err != nil {
- t.Errorf("Failed to create namespace %+v", err)
- }
- // Start NodeLifecycleController for taint.
- nc, err := nodelifecycle.NewNodeLifecycleController(
- informers.Coordination().V1().Leases(),
- informers.Core().V1().Pods(),
- informers.Core().V1().Nodes(),
- informers.Apps().V1().DaemonSets(),
- cs,
- 5*time.Second, // Node monitor grace period
- time.Minute, // Node startup grace period
- time.Millisecond, // Node monitor period
- time.Second, // Pod eviction timeout
- 100, // Eviction limiter QPS
- 100, // Secondary eviction limiter QPS
- 50, // Large cluster threshold
- 0.55, // Unhealthy zone threshold
- true, // Run taint manager
- true, // Use taint based evictions
- )
- if err != nil {
- t.Errorf("Failed to create node controller: %v", err)
- return
- }
- go nc.Run(testCtx.ctx.Done())
- // Waiting for all controller sync.
- externalInformers.Start(testCtx.ctx.Done())
- externalInformers.WaitForCacheSync(testCtx.ctx.Done())
- informers.Start(testCtx.ctx.Done())
- informers.WaitForCacheSync(testCtx.ctx.Done())
- nodeRes := v1.ResourceList{
- v1.ResourceCPU: resource.MustParse("4000m"),
- v1.ResourceMemory: resource.MustParse("16Gi"),
- v1.ResourcePods: resource.MustParse("110"),
- }
- var nodes []*v1.Node
- for i := 0; i < nodeCount; i++ {
- nodes = append(nodes, &v1.Node{
- ObjectMeta: metav1.ObjectMeta{
- Name: fmt.Sprintf("node-%d", i),
- Labels: map[string]string{v1.LabelZoneRegion: "region1", v1.LabelZoneFailureDomain: "zone1"},
- },
- Spec: v1.NodeSpec{},
- Status: v1.NodeStatus{
- Capacity: nodeRes,
- Allocatable: nodeRes,
- Conditions: []v1.NodeCondition{
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- LastHeartbeatTime: metav1.Now(),
- },
- },
- },
- })
- if _, err := cs.CoreV1().Nodes().Create(context.TODO(), nodes[i], metav1.CreateOptions{}); err != nil {
- t.Errorf("Failed to create node, err: %v", err)
- }
- }
- neededNode := nodes[1]
- if test.pod != nil {
- test.pod.Name = fmt.Sprintf("testpod-%d", i)
- if len(test.pod.Spec.Tolerations) > 0 {
- test.pod.Spec.Tolerations[0].TolerationSeconds = &tolerationSeconds[i]
- }
- test.pod, err = cs.CoreV1().Pods(testCtx.ns.Name).Create(context.TODO(), test.pod, metav1.CreateOptions{})
- if err != nil {
- t.Fatalf("Test Failed: error: %v, while creating pod", err)
- }
- if err := waitForPodToSchedule(cs, test.pod); err != nil {
- t.Errorf("Failed to schedule pod %s/%s on the node, err: %v",
- test.pod.Namespace, test.pod.Name, err)
- }
- test.pod, err = cs.CoreV1().Pods(testCtx.ns.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
- if err != nil {
- t.Fatalf("Test Failed: error: %v, while creating pod", err)
- }
- neededNode, err = cs.CoreV1().Nodes().Get(context.TODO(), test.pod.Spec.NodeName, metav1.GetOptions{})
- if err != nil {
- t.Fatalf("Error while getting node associated with pod %v with err %v", test.pod.Name, err)
- }
- }
- // Regularly send heartbeat event to APIServer so that the cluster doesn't enter fullyDisruption mode.
- // TODO(Huang-Wei): use "NodeDisruptionExclusion" feature to simply the below logic when it's beta.
- for i := 0; i < nodeCount; i++ {
- var conditions []v1.NodeCondition
- // If current node is not <neededNode>
- if neededNode.Name != nodes[i].Name {
- conditions = []v1.NodeCondition{
- {
- Type: v1.NodeReady,
- Status: v1.ConditionTrue,
- },
- }
- } else {
- c, err := nodeReadyStatus(test.nodeConditions)
- if err != nil {
- t.Error(err)
- }
- // Need to distinguish NodeReady/False and NodeReady/Unknown.
- // If we try to update the node with condition NotReady/False, i.e. expect a NotReady:NoExecute taint
- // we need to keep sending the update event to keep it alive, rather than just sending once.
- if c == v1.ConditionFalse {
- conditions = test.nodeConditions
- } else if c == v1.ConditionUnknown {
- // If it's expected to update the node with condition NotReady/Unknown,
- // i.e. expect a Unreachable:NoExecute taint,
- // we need to only send the update event once to simulate the network unreachable scenario.
- nodeCopy := nodeCopyWithConditions(nodes[i], test.nodeConditions)
- if err := updateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
- t.Errorf("Cannot update node: %v", err)
- }
- continue
- }
- }
- // Keeping sending NodeReady/True or NodeReady/False events.
- go func(i int) {
- for {
- select {
- case <-testCtx.ctx.Done():
- return
- case <-time.Tick(heartbeatInternal):
- nodeCopy := nodeCopyWithConditions(nodes[i], conditions)
- if err := updateNodeStatus(cs, nodeCopy); err != nil && !apierrors.IsNotFound(err) {
- t.Errorf("Cannot update node: %v", err)
- }
- }
- }
- }(i)
- }
- if err := waitForNodeTaints(cs, neededNode, test.nodeTaints); err != nil {
- t.Errorf("Failed to taint node in test %d <%s>, err: %v", i, neededNode.Name, err)
- }
- if test.pod != nil {
- err = pod.WaitForPodCondition(cs, testCtx.ns.Name, test.pod.Name, test.waitForPodCondition, time.Second*15, func(pod *v1.Pod) (bool, error) {
- // as node is unreachable, pod0 is expected to be in Terminating status
- // rather than getting deleted
- if tolerationSeconds[i] == 0 {
- return pod.DeletionTimestamp != nil, nil
- }
- if seconds, err := getTolerationSeconds(pod.Spec.Tolerations); err == nil {
- return seconds == tolerationSeconds[i], nil
- }
- return false, nil
- })
- if err != nil {
- pod, _ := cs.CoreV1().Pods(testCtx.ns.Name).Get(context.TODO(), test.pod.Name, metav1.GetOptions{})
- t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.waitForPodCondition, pod)
- }
- cleanupPods(cs, t, []*v1.Pod{test.pod})
- }
- cleanupNodes(cs, t)
- waitForSchedulerCacheCleanup(testCtx.scheduler, t)
- })
- }
- }
- func getTolerationSeconds(tolerations []v1.Toleration) (int64, error) {
- for _, t := range tolerations {
- if t.Key == v1.TaintNodeNotReady && t.Effect == v1.TaintEffectNoExecute && t.Operator == v1.TolerationOpExists {
- return *t.TolerationSeconds, nil
- }
- }
- return 0, fmt.Errorf("cannot find toleration")
- }
- // nodeReadyStatus returns the status of first condition with type NodeReady.
- // If none of the condition is of type NodeReady, returns an error.
- func nodeReadyStatus(conditions []v1.NodeCondition) (v1.ConditionStatus, error) {
- for _, c := range conditions {
- if c.Type != v1.NodeReady {
- continue
- }
- // Just return the first condition with type NodeReady
- return c.Status, nil
- }
- return v1.ConditionFalse, errors.New("None of the conditions is of type NodeReady")
- }
- func nodeCopyWithConditions(node *v1.Node, conditions []v1.NodeCondition) *v1.Node {
- copy := node.DeepCopy()
- copy.ResourceVersion = "0"
- copy.Status.Conditions = conditions
- for i := range copy.Status.Conditions {
- copy.Status.Conditions[i].LastHeartbeatTime = metav1.Now()
- }
- return copy
- }
|