123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589 |
- /*
- Copyright 2016 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package eviction
- import (
- "fmt"
- "sort"
- "sync"
- "time"
- "k8s.io/klog"
- v1 "k8s.io/api/core/v1"
- "k8s.io/apimachinery/pkg/api/resource"
- "k8s.io/apimachinery/pkg/util/clock"
- utilfeature "k8s.io/apiserver/pkg/util/feature"
- "k8s.io/client-go/tools/record"
- apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
- v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
- v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
- "k8s.io/kubernetes/pkg/features"
- statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
- evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
- "k8s.io/kubernetes/pkg/kubelet/lifecycle"
- "k8s.io/kubernetes/pkg/kubelet/metrics"
- "k8s.io/kubernetes/pkg/kubelet/server/stats"
- kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
- "k8s.io/kubernetes/pkg/kubelet/util/format"
- )
- const (
- podCleanupTimeout = 30 * time.Second
- podCleanupPollFreq = time.Second
- )
- const (
- // signalEphemeralContainerFsLimit is amount of storage available on filesystem requested by the container
- signalEphemeralContainerFsLimit string = "ephemeralcontainerfs.limit"
- // signalEphemeralPodFsLimit is amount of storage available on filesystem requested by the pod
- signalEphemeralPodFsLimit string = "ephemeralpodfs.limit"
- // signalEmptyDirFsLimit is amount of storage available on filesystem requested by an emptyDir
- signalEmptyDirFsLimit string = "emptydirfs.limit"
- )
- // managerImpl implements Manager
- type managerImpl struct {
- // used to track time
- clock clock.Clock
- // config is how the manager is configured
- config Config
- // the function to invoke to kill a pod
- killPodFunc KillPodFunc
- // the function to get the mirror pod by a given statid pod
- mirrorPodFunc MirrorPodFunc
- // the interface that knows how to do image gc
- imageGC ImageGC
- // the interface that knows how to do container gc
- containerGC ContainerGC
- // protects access to internal state
- sync.RWMutex
- // node conditions are the set of conditions present
- nodeConditions []v1.NodeConditionType
- // captures when a node condition was last observed based on a threshold being met
- nodeConditionsLastObservedAt nodeConditionsObservedAt
- // nodeRef is a reference to the node
- nodeRef *v1.ObjectReference
- // used to record events about the node
- recorder record.EventRecorder
- // used to measure usage stats on system
- summaryProvider stats.SummaryProvider
- // records when a threshold was first observed
- thresholdsFirstObservedAt thresholdsObservedAt
- // records the set of thresholds that have been met (including graceperiod) but not yet resolved
- thresholdsMet []evictionapi.Threshold
- // signalToRankFunc maps a resource to ranking function for that resource.
- signalToRankFunc map[evictionapi.Signal]rankFunc
- // signalToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
- signalToNodeReclaimFuncs map[evictionapi.Signal]nodeReclaimFuncs
- // last observations from synchronize
- lastObservations signalObservations
- // dedicatedImageFs indicates if imagefs is on a separate device from the rootfs
- dedicatedImageFs *bool
- // thresholdNotifiers is a list of memory threshold notifiers which each notify for a memory eviction threshold
- thresholdNotifiers []ThresholdNotifier
- // thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
- thresholdsLastUpdated time.Time
- }
- // ensure it implements the required interface
- var _ Manager = &managerImpl{}
- // NewManager returns a configured Manager and an associated admission handler to enforce eviction configuration.
- func NewManager(
- summaryProvider stats.SummaryProvider,
- config Config,
- killPodFunc KillPodFunc,
- mirrorPodFunc MirrorPodFunc,
- imageGC ImageGC,
- containerGC ContainerGC,
- recorder record.EventRecorder,
- nodeRef *v1.ObjectReference,
- clock clock.Clock,
- ) (Manager, lifecycle.PodAdmitHandler) {
- manager := &managerImpl{
- clock: clock,
- killPodFunc: killPodFunc,
- mirrorPodFunc: mirrorPodFunc,
- imageGC: imageGC,
- containerGC: containerGC,
- config: config,
- recorder: recorder,
- summaryProvider: summaryProvider,
- nodeRef: nodeRef,
- nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
- thresholdsFirstObservedAt: thresholdsObservedAt{},
- dedicatedImageFs: nil,
- thresholdNotifiers: []ThresholdNotifier{},
- }
- return manager, manager
- }
- // Admit rejects a pod if its not safe to admit for node stability.
- func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
- m.RLock()
- defer m.RUnlock()
- if len(m.nodeConditions) == 0 {
- return lifecycle.PodAdmitResult{Admit: true}
- }
- // Admit Critical pods even under resource pressure since they are required for system stability.
- // https://github.com/kubernetes/kubernetes/issues/40573 has more details.
- if kubelettypes.IsCriticalPod(attrs.Pod) {
- return lifecycle.PodAdmitResult{Admit: true}
- }
- // Conditions other than memory pressure reject all pods
- nodeOnlyHasMemoryPressureCondition := hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) && len(m.nodeConditions) == 1
- if nodeOnlyHasMemoryPressureCondition {
- notBestEffort := v1.PodQOSBestEffort != v1qos.GetPodQOS(attrs.Pod)
- if notBestEffort {
- return lifecycle.PodAdmitResult{Admit: true}
- }
- // When node has memory pressure, check BestEffort Pod's toleration:
- // admit it if tolerates memory pressure taint, fail for other tolerations, e.g. DiskPressure.
- if v1helper.TolerationsTolerateTaint(attrs.Pod.Spec.Tolerations, &v1.Taint{
- Key: v1.TaintNodeMemoryPressure,
- Effect: v1.TaintEffectNoSchedule,
- }) {
- return lifecycle.PodAdmitResult{Admit: true}
- }
- }
- // reject pods when under memory pressure (if pod is best effort), or if under disk pressure.
- klog.Warningf("Failed to admit pod %s - node has conditions: %v", format.Pod(attrs.Pod), m.nodeConditions)
- return lifecycle.PodAdmitResult{
- Admit: false,
- Reason: Reason,
- Message: fmt.Sprintf(nodeConditionMessageFmt, m.nodeConditions),
- }
- }
- // Start starts the control loop to observe and response to low compute resources.
- func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
- thresholdHandler := func(message string) {
- klog.Infof(message)
- m.synchronize(diskInfoProvider, podFunc)
- }
- if m.config.KernelMemcgNotification {
- for _, threshold := range m.config.Thresholds {
- if threshold.Signal == evictionapi.SignalMemoryAvailable || threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable {
- notifier, err := NewMemoryThresholdNotifier(threshold, m.config.PodCgroupRoot, &CgroupNotifierFactory{}, thresholdHandler)
- if err != nil {
- klog.Warningf("eviction manager: failed to create memory threshold notifier: %v", err)
- } else {
- go notifier.Start()
- m.thresholdNotifiers = append(m.thresholdNotifiers, notifier)
- }
- }
- }
- }
- // start the eviction manager monitoring
- go func() {
- for {
- if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil {
- klog.Infof("eviction manager: pods %s evicted, waiting for pod to be cleaned up", format.Pods(evictedPods))
- m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
- } else {
- time.Sleep(monitoringInterval)
- }
- }
- }()
- }
- // IsUnderMemoryPressure returns true if the node is under memory pressure.
- func (m *managerImpl) IsUnderMemoryPressure() bool {
- m.RLock()
- defer m.RUnlock()
- return hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure)
- }
- // IsUnderDiskPressure returns true if the node is under disk pressure.
- func (m *managerImpl) IsUnderDiskPressure() bool {
- m.RLock()
- defer m.RUnlock()
- return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
- }
- // IsUnderPIDPressure returns true if the node is under PID pressure.
- func (m *managerImpl) IsUnderPIDPressure() bool {
- m.RLock()
- defer m.RUnlock()
- return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure)
- }
- // synchronize is the main control loop that enforces eviction thresholds.
- // Returns the pod that was killed, or nil if no pod was killed.
- func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
- // if we have nothing to do, just return
- thresholds := m.config.Thresholds
- if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
- return nil
- }
- klog.V(3).Infof("eviction manager: synchronize housekeeping")
- // build the ranking functions (if not yet known)
- // TODO: have a function in cadvisor that lets us know if global housekeeping has completed
- if m.dedicatedImageFs == nil {
- hasImageFs, ok := diskInfoProvider.HasDedicatedImageFs()
- if ok != nil {
- return nil
- }
- m.dedicatedImageFs = &hasImageFs
- m.signalToRankFunc = buildSignalToRankFunc(hasImageFs)
- m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
- }
- activePods := podFunc()
- updateStats := true
- summary, err := m.summaryProvider.Get(updateStats)
- if err != nil {
- klog.Errorf("eviction manager: failed to get summary stats: %v", err)
- return nil
- }
- if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval {
- m.thresholdsLastUpdated = m.clock.Now()
- for _, notifier := range m.thresholdNotifiers {
- if err := notifier.UpdateThreshold(summary); err != nil {
- klog.Warningf("eviction manager: failed to update %s: %v", notifier.Description(), err)
- }
- }
- }
- // make observations and get a function to derive pod usage stats relative to those observations.
- observations, statsFunc := makeSignalObservations(summary)
- debugLogObservations("observations", observations)
- // determine the set of thresholds met independent of grace period
- thresholds = thresholdsMet(thresholds, observations, false)
- debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)
- // determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim
- if len(m.thresholdsMet) > 0 {
- thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true)
- thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved)
- }
- debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations)
- // track when a threshold was first observed
- now := m.clock.Now()
- thresholdsFirstObservedAt := thresholdsFirstObservedAt(thresholds, m.thresholdsFirstObservedAt, now)
- // the set of node conditions that are triggered by currently observed thresholds
- nodeConditions := nodeConditions(thresholds)
- if len(nodeConditions) > 0 {
- klog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions)
- }
- // track when a node condition was last observed
- nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now)
- // node conditions report true if it has been observed within the transition period window
- nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now)
- if len(nodeConditions) > 0 {
- klog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions)
- }
- // determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met)
- thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now)
- debugLogThresholdsWithObservation("thresholds - grace periods satisfied", thresholds, observations)
- // update internal state
- m.Lock()
- m.nodeConditions = nodeConditions
- m.thresholdsFirstObservedAt = thresholdsFirstObservedAt
- m.nodeConditionsLastObservedAt = nodeConditionsLastObservedAt
- m.thresholdsMet = thresholds
- // determine the set of thresholds whose stats have been updated since the last sync
- thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations)
- debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations)
- m.lastObservations = observations
- m.Unlock()
- // evict pods if there is a resource usage violation from local volume temporary storage
- // If eviction happens in localStorageEviction function, skip the rest of eviction action
- if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
- if evictedPods := m.localStorageEviction(summary, activePods); len(evictedPods) > 0 {
- return evictedPods
- }
- }
- if len(thresholds) == 0 {
- klog.V(3).Infof("eviction manager: no resources are starved")
- return nil
- }
- // rank the thresholds by eviction priority
- sort.Sort(byEvictionPriority(thresholds))
- thresholdToReclaim, resourceToReclaim, foundAny := getReclaimableThreshold(thresholds)
- if !foundAny {
- return nil
- }
- klog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim)
- // record an event about the resources we are now attempting to reclaim via eviction
- m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
- // check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
- if m.reclaimNodeLevelResources(thresholdToReclaim.Signal, resourceToReclaim) {
- klog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
- return nil
- }
- klog.Infof("eviction manager: must evict pod(s) to reclaim %v", resourceToReclaim)
- // rank the pods for eviction
- rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal]
- if !ok {
- klog.Errorf("eviction manager: no ranking function for signal %s", thresholdToReclaim.Signal)
- return nil
- }
- // the only candidates viable for eviction are those pods that had anything running.
- if len(activePods) == 0 {
- klog.Errorf("eviction manager: eviction thresholds have been met, but no pods are active to evict")
- return nil
- }
- // rank the running pods for eviction for the specified resource
- rank(activePods, statsFunc)
- klog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods))
- //record age of metrics for met thresholds that we are using for evictions.
- for _, t := range thresholds {
- timeObserved := observations[t.Signal].time
- if !timeObserved.IsZero() {
- metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInSeconds(timeObserved.Time))
- }
- }
- // we kill at most a single pod during each eviction interval
- for i := range activePods {
- pod := activePods[i]
- gracePeriodOverride := int64(0)
- if !isHardEvictionThreshold(thresholdToReclaim) {
- gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
- }
- message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
- if m.evictPod(pod, gracePeriodOverride, message, annotations) {
- metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc()
- return []*v1.Pod{pod}
- }
- }
- klog.Infof("eviction manager: unable to evict any pods from the node")
- return nil
- }
- func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) {
- timeout := m.clock.NewTimer(podCleanupTimeout)
- defer timeout.Stop()
- ticker := m.clock.NewTicker(podCleanupPollFreq)
- defer ticker.Stop()
- for {
- select {
- case <-timeout.C():
- klog.Warningf("eviction manager: timed out waiting for pods %s to be cleaned up", format.Pods(pods))
- return
- case <-ticker.C():
- for i, pod := range pods {
- if !podCleanedUpFunc(pod) {
- break
- }
- if i == len(pods)-1 {
- klog.Infof("eviction manager: pods %s successfully cleaned up", format.Pods(pods))
- return
- }
- }
- }
- }
- }
- // reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
- func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Signal, resourceToReclaim v1.ResourceName) bool {
- nodeReclaimFuncs := m.signalToNodeReclaimFuncs[signalToReclaim]
- for _, nodeReclaimFunc := range nodeReclaimFuncs {
- // attempt to reclaim the pressured resource.
- if err := nodeReclaimFunc(); err != nil {
- klog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
- }
- }
- if len(nodeReclaimFuncs) > 0 {
- summary, err := m.summaryProvider.Get(true)
- if err != nil {
- klog.Errorf("eviction manager: failed to get summary stats after resource reclaim: %v", err)
- return false
- }
- // make observations and get a function to derive pod usage stats relative to those observations.
- observations, _ := makeSignalObservations(summary)
- debugLogObservations("observations after resource reclaim", observations)
- // determine the set of thresholds met independent of grace period
- thresholds := thresholdsMet(m.config.Thresholds, observations, false)
- debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
- if len(thresholds) == 0 {
- return true
- }
- }
- return false
- }
- // localStorageEviction checks the EmptyDir volume usage for each pod and determine whether it exceeds the specified limit and needs
- // to be evicted. It also checks every container in the pod, if the container overlay usage exceeds the limit, the pod will be evicted too.
- func (m *managerImpl) localStorageEviction(summary *statsapi.Summary, pods []*v1.Pod) []*v1.Pod {
- statsFunc := cachedStatsFunc(summary.Pods)
- evicted := []*v1.Pod{}
- for _, pod := range pods {
- podStats, ok := statsFunc(pod)
- if !ok {
- continue
- }
- if m.emptyDirLimitEviction(podStats, pod) {
- evicted = append(evicted, pod)
- continue
- }
- if m.podEphemeralStorageLimitEviction(podStats, pod) {
- evicted = append(evicted, pod)
- continue
- }
- if m.containerEphemeralStorageLimitEviction(podStats, pod) {
- evicted = append(evicted, pod)
- }
- }
- return evicted
- }
- func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
- podVolumeUsed := make(map[string]*resource.Quantity)
- for _, volume := range podStats.VolumeStats {
- podVolumeUsed[volume.Name] = resource.NewQuantity(int64(*volume.UsedBytes), resource.BinarySI)
- }
- for i := range pod.Spec.Volumes {
- source := &pod.Spec.Volumes[i].VolumeSource
- if source.EmptyDir != nil {
- size := source.EmptyDir.SizeLimit
- used := podVolumeUsed[pod.Spec.Volumes[i].Name]
- if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
- // the emptyDir usage exceeds the size limit, evict the pod
- if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil) {
- metrics.Evictions.WithLabelValues(signalEmptyDirFsLimit).Inc()
- return true
- }
- return false
- }
- }
- }
- return false
- }
- func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
- _, podLimits := apiv1resource.PodRequestsAndLimits(pod)
- _, found := podLimits[v1.ResourceEphemeralStorage]
- if !found {
- return false
- }
- podEphemeralStorageTotalUsage := &resource.Quantity{}
- var fsStatsSet []fsStatsType
- if *m.dedicatedImageFs {
- fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
- } else {
- fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
- }
- podEphemeralUsage, err := podLocalEphemeralStorageUsage(podStats, pod, fsStatsSet)
- if err != nil {
- klog.Errorf("eviction manager: error getting pod disk usage %v", err)
- return false
- }
- podEphemeralStorageTotalUsage.Add(podEphemeralUsage[v1.ResourceEphemeralStorage])
- podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
- if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
- // the total usage of pod exceeds the total size limit of containers, evict the pod
- if m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String()), nil) {
- metrics.Evictions.WithLabelValues(signalEphemeralPodFsLimit).Inc()
- return true
- }
- return false
- }
- return false
- }
- func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
- thresholdsMap := make(map[string]*resource.Quantity)
- for _, container := range pod.Spec.Containers {
- ephemeralLimit := container.Resources.Limits.StorageEphemeral()
- if ephemeralLimit != nil && ephemeralLimit.Value() != 0 {
- thresholdsMap[container.Name] = ephemeralLimit
- }
- }
- for _, containerStat := range podStats.Containers {
- containerUsed := diskUsage(containerStat.Logs)
- if !*m.dedicatedImageFs {
- containerUsed.Add(*diskUsage(containerStat.Rootfs))
- }
- if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
- if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
- if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil) {
- metrics.Evictions.WithLabelValues(signalEphemeralContainerFsLimit).Inc()
- return true
- }
- return false
- }
- }
- }
- return false
- }
- func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
- // If the pod is marked as critical and static, and support for critical pod annotations is enabled,
- // do not evict such pods. Static pods are not re-admitted after evictions.
- // https://github.com/kubernetes/kubernetes/issues/40573 has more details.
- if kubelettypes.IsCriticalPod(pod) {
- klog.Errorf("eviction manager: cannot evict a critical pod %s", format.Pod(pod))
- return false
- }
- status := v1.PodStatus{
- Phase: v1.PodFailed,
- Message: evictMsg,
- Reason: Reason,
- }
- // record that we are evicting the pod
- m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg)
- // this is a blocking call and should only return when the pod and its containers are killed.
- err := m.killPodFunc(pod, status, &gracePeriodOverride)
- if err != nil {
- klog.Errorf("eviction manager: pod %s failed to evict %v", format.Pod(pod), err)
- } else {
- klog.Infof("eviction manager: pod %s is evicted successfully", format.Pod(pod))
- }
- return true
- }
|