helpers.go 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package eviction
  14. import (
  15. "fmt"
  16. "sort"
  17. "strconv"
  18. "strings"
  19. "time"
  20. v1 "k8s.io/api/core/v1"
  21. "k8s.io/apimachinery/pkg/api/resource"
  22. "k8s.io/klog"
  23. "k8s.io/kubernetes/pkg/api/v1/pod"
  24. v1resource "k8s.io/kubernetes/pkg/api/v1/resource"
  25. statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  26. evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
  27. kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
  28. volumeutils "k8s.io/kubernetes/pkg/volume/util"
  29. )
  30. const (
  31. unsupportedEvictionSignal = "unsupported eviction signal %v"
  32. // Reason is the reason reported back in status.
  33. Reason = "Evicted"
  34. // nodeLowMessageFmt is the message for evictions due to resource pressure.
  35. nodeLowMessageFmt = "The node was low on resource: %v. "
  36. // nodeConditionMessageFmt is the message for evictions due to resource pressure.
  37. nodeConditionMessageFmt = "The node had condition: %v. "
  38. // containerMessageFmt provides additional information for containers exceeding requests
  39. containerMessageFmt = "Container %s was using %s, which exceeds its request of %s. "
  40. // containerEphemeralStorageMessageFmt provides additional information for containers which have exceeded their ES limit
  41. containerEphemeralStorageMessageFmt = "Container %s exceeded its local ephemeral storage limit %q. "
  42. // podEphemeralStorageMessageFmt provides additional information for pods which have exceeded their ES limit
  43. podEphemeralStorageMessageFmt = "Pod ephemeral local storage usage exceeds the total limit of containers %s. "
  44. // emptyDirMessageFmt provides additional information for empty-dir volumes which have exceeded their size limit
  45. emptyDirMessageFmt = "Usage of EmptyDir volume %q exceeds the limit %q. "
  46. // inodes, number. internal to this module, used to account for local disk inode consumption.
  47. resourceInodes v1.ResourceName = "inodes"
  48. // resourcePids, number. internal to this module, used to account for local pid consumption.
  49. resourcePids v1.ResourceName = "pids"
  50. // OffendingContainersKey is the key in eviction event annotations for the list of container names which exceeded their requests
  51. OffendingContainersKey = "offending_containers"
  52. // OffendingContainersUsageKey is the key in eviction event annotations for the list of usage of containers which exceeded their requests
  53. OffendingContainersUsageKey = "offending_containers_usage"
  54. // StarvedResourceKey is the key for the starved resource in eviction event annotations
  55. StarvedResourceKey = "starved_resource"
  56. )
  57. var (
  58. // signalToNodeCondition maps a signal to the node condition to report if threshold is met.
  59. signalToNodeCondition map[evictionapi.Signal]v1.NodeConditionType
  60. // signalToResource maps a Signal to its associated Resource.
  61. signalToResource map[evictionapi.Signal]v1.ResourceName
  62. )
  63. func init() {
  64. // map eviction signals to node conditions
  65. signalToNodeCondition = map[evictionapi.Signal]v1.NodeConditionType{}
  66. signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure
  67. signalToNodeCondition[evictionapi.SignalAllocatableMemoryAvailable] = v1.NodeMemoryPressure
  68. signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure
  69. signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
  70. signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
  71. signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
  72. signalToNodeCondition[evictionapi.SignalPIDAvailable] = v1.NodePIDPressure
  73. // map signals to resources (and vice-versa)
  74. signalToResource = map[evictionapi.Signal]v1.ResourceName{}
  75. signalToResource[evictionapi.SignalMemoryAvailable] = v1.ResourceMemory
  76. signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory
  77. signalToResource[evictionapi.SignalImageFsAvailable] = v1.ResourceEphemeralStorage
  78. signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes
  79. signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage
  80. signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes
  81. signalToResource[evictionapi.SignalPIDAvailable] = resourcePids
  82. }
  83. // validSignal returns true if the signal is supported.
  84. func validSignal(signal evictionapi.Signal) bool {
  85. _, found := signalToResource[signal]
  86. return found
  87. }
  88. // getReclaimableThreshold finds the threshold and resource to reclaim
  89. func getReclaimableThreshold(thresholds []evictionapi.Threshold) (evictionapi.Threshold, v1.ResourceName, bool) {
  90. for _, thresholdToReclaim := range thresholds {
  91. if resourceToReclaim, ok := signalToResource[thresholdToReclaim.Signal]; ok {
  92. return thresholdToReclaim, resourceToReclaim, true
  93. }
  94. klog.V(3).Infof("eviction manager: threshold %s was crossed, but reclaim is not implemented for this threshold.", thresholdToReclaim.Signal)
  95. }
  96. return evictionapi.Threshold{}, "", false
  97. }
  98. // ParseThresholdConfig parses the flags for thresholds.
  99. func ParseThresholdConfig(allocatableConfig []string, evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim map[string]string) ([]evictionapi.Threshold, error) {
  100. results := []evictionapi.Threshold{}
  101. hardThresholds, err := parseThresholdStatements(evictionHard)
  102. if err != nil {
  103. return nil, err
  104. }
  105. results = append(results, hardThresholds...)
  106. softThresholds, err := parseThresholdStatements(evictionSoft)
  107. if err != nil {
  108. return nil, err
  109. }
  110. gracePeriods, err := parseGracePeriods(evictionSoftGracePeriod)
  111. if err != nil {
  112. return nil, err
  113. }
  114. minReclaims, err := parseMinimumReclaims(evictionMinimumReclaim)
  115. if err != nil {
  116. return nil, err
  117. }
  118. for i := range softThresholds {
  119. signal := softThresholds[i].Signal
  120. period, found := gracePeriods[signal]
  121. if !found {
  122. return nil, fmt.Errorf("grace period must be specified for the soft eviction threshold %v", signal)
  123. }
  124. softThresholds[i].GracePeriod = period
  125. }
  126. results = append(results, softThresholds...)
  127. for i := range results {
  128. if minReclaim, ok := minReclaims[results[i].Signal]; ok {
  129. results[i].MinReclaim = &minReclaim
  130. }
  131. }
  132. for _, key := range allocatableConfig {
  133. if key == kubetypes.NodeAllocatableEnforcementKey {
  134. results = addAllocatableThresholds(results)
  135. break
  136. }
  137. }
  138. return results, nil
  139. }
  140. func addAllocatableThresholds(thresholds []evictionapi.Threshold) []evictionapi.Threshold {
  141. additionalThresholds := []evictionapi.Threshold{}
  142. for _, threshold := range thresholds {
  143. if threshold.Signal == evictionapi.SignalMemoryAvailable && isHardEvictionThreshold(threshold) {
  144. // Copy the SignalMemoryAvailable to SignalAllocatableMemoryAvailable
  145. additionalThresholds = append(additionalThresholds, evictionapi.Threshold{
  146. Signal: evictionapi.SignalAllocatableMemoryAvailable,
  147. Operator: threshold.Operator,
  148. Value: threshold.Value,
  149. MinReclaim: threshold.MinReclaim,
  150. })
  151. }
  152. }
  153. return append(thresholds, additionalThresholds...)
  154. }
  155. // parseThresholdStatements parses the input statements into a list of Threshold objects.
  156. func parseThresholdStatements(statements map[string]string) ([]evictionapi.Threshold, error) {
  157. if len(statements) == 0 {
  158. return nil, nil
  159. }
  160. results := []evictionapi.Threshold{}
  161. for signal, val := range statements {
  162. result, err := parseThresholdStatement(evictionapi.Signal(signal), val)
  163. if err != nil {
  164. return nil, err
  165. }
  166. if result != nil {
  167. results = append(results, *result)
  168. }
  169. }
  170. return results, nil
  171. }
  172. // parseThresholdStatement parses a threshold statement and returns a threshold,
  173. // or nil if the threshold should be ignored.
  174. func parseThresholdStatement(signal evictionapi.Signal, val string) (*evictionapi.Threshold, error) {
  175. if !validSignal(signal) {
  176. return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
  177. }
  178. operator := evictionapi.OpForSignal[signal]
  179. if strings.HasSuffix(val, "%") {
  180. // ignore 0% and 100%
  181. if val == "0%" || val == "100%" {
  182. return nil, nil
  183. }
  184. percentage, err := parsePercentage(val)
  185. if err != nil {
  186. return nil, err
  187. }
  188. if percentage < 0 {
  189. return nil, fmt.Errorf("eviction percentage threshold %v must be >= 0%%: %s", signal, val)
  190. }
  191. if percentage > 100 {
  192. return nil, fmt.Errorf("eviction percentage threshold %v must be <= 100%%: %s", signal, val)
  193. }
  194. return &evictionapi.Threshold{
  195. Signal: signal,
  196. Operator: operator,
  197. Value: evictionapi.ThresholdValue{
  198. Percentage: percentage,
  199. },
  200. }, nil
  201. }
  202. quantity, err := resource.ParseQuantity(val)
  203. if err != nil {
  204. return nil, err
  205. }
  206. if quantity.Sign() < 0 || quantity.IsZero() {
  207. return nil, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
  208. }
  209. return &evictionapi.Threshold{
  210. Signal: signal,
  211. Operator: operator,
  212. Value: evictionapi.ThresholdValue{
  213. Quantity: &quantity,
  214. },
  215. }, nil
  216. }
  217. // parsePercentage parses a string representing a percentage value
  218. func parsePercentage(input string) (float32, error) {
  219. value, err := strconv.ParseFloat(strings.TrimRight(input, "%"), 32)
  220. if err != nil {
  221. return 0, err
  222. }
  223. return float32(value) / 100, nil
  224. }
  225. // parseGracePeriods parses the grace period statements
  226. func parseGracePeriods(statements map[string]string) (map[evictionapi.Signal]time.Duration, error) {
  227. if len(statements) == 0 {
  228. return nil, nil
  229. }
  230. results := map[evictionapi.Signal]time.Duration{}
  231. for signal, val := range statements {
  232. signal := evictionapi.Signal(signal)
  233. if !validSignal(signal) {
  234. return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
  235. }
  236. gracePeriod, err := time.ParseDuration(val)
  237. if err != nil {
  238. return nil, err
  239. }
  240. if gracePeriod < 0 {
  241. return nil, fmt.Errorf("invalid eviction grace period specified: %v, must be a positive value", val)
  242. }
  243. results[signal] = gracePeriod
  244. }
  245. return results, nil
  246. }
  247. // parseMinimumReclaims parses the minimum reclaim statements
  248. func parseMinimumReclaims(statements map[string]string) (map[evictionapi.Signal]evictionapi.ThresholdValue, error) {
  249. if len(statements) == 0 {
  250. return nil, nil
  251. }
  252. results := map[evictionapi.Signal]evictionapi.ThresholdValue{}
  253. for signal, val := range statements {
  254. signal := evictionapi.Signal(signal)
  255. if !validSignal(signal) {
  256. return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
  257. }
  258. if strings.HasSuffix(val, "%") {
  259. percentage, err := parsePercentage(val)
  260. if err != nil {
  261. return nil, err
  262. }
  263. if percentage <= 0 {
  264. return nil, fmt.Errorf("eviction percentage minimum reclaim %v must be positive: %s", signal, val)
  265. }
  266. results[signal] = evictionapi.ThresholdValue{
  267. Percentage: percentage,
  268. }
  269. continue
  270. }
  271. quantity, err := resource.ParseQuantity(val)
  272. if err != nil {
  273. return nil, err
  274. }
  275. if quantity.Sign() < 0 {
  276. return nil, fmt.Errorf("negative eviction minimum reclaim specified for %v", signal)
  277. }
  278. results[signal] = evictionapi.ThresholdValue{
  279. Quantity: &quantity,
  280. }
  281. }
  282. return results, nil
  283. }
  284. // diskUsage converts used bytes into a resource quantity.
  285. func diskUsage(fsStats *statsapi.FsStats) *resource.Quantity {
  286. if fsStats == nil || fsStats.UsedBytes == nil {
  287. return &resource.Quantity{Format: resource.BinarySI}
  288. }
  289. usage := int64(*fsStats.UsedBytes)
  290. return resource.NewQuantity(usage, resource.BinarySI)
  291. }
  292. // inodeUsage converts inodes consumed into a resource quantity.
  293. func inodeUsage(fsStats *statsapi.FsStats) *resource.Quantity {
  294. if fsStats == nil || fsStats.InodesUsed == nil {
  295. return &resource.Quantity{Format: resource.DecimalSI}
  296. }
  297. usage := int64(*fsStats.InodesUsed)
  298. return resource.NewQuantity(usage, resource.DecimalSI)
  299. }
  300. // memoryUsage converts working set into a resource quantity.
  301. func memoryUsage(memStats *statsapi.MemoryStats) *resource.Quantity {
  302. if memStats == nil || memStats.WorkingSetBytes == nil {
  303. return &resource.Quantity{Format: resource.BinarySI}
  304. }
  305. usage := int64(*memStats.WorkingSetBytes)
  306. return resource.NewQuantity(usage, resource.BinarySI)
  307. }
  308. // localVolumeNames returns the set of volumes for the pod that are local
  309. // TODO: summary API should report what volumes consume local storage rather than hard-code here.
  310. func localVolumeNames(pod *v1.Pod) []string {
  311. result := []string{}
  312. for _, volume := range pod.Spec.Volumes {
  313. if volume.HostPath != nil ||
  314. (volume.EmptyDir != nil && volume.EmptyDir.Medium != v1.StorageMediumMemory) ||
  315. volume.ConfigMap != nil ||
  316. volume.GitRepo != nil {
  317. result = append(result, volume.Name)
  318. }
  319. }
  320. return result
  321. }
  322. // containerUsage aggregates container disk usage and inode consumption for the specified stats to measure.
  323. func containerUsage(podStats statsapi.PodStats, statsToMeasure []fsStatsType) v1.ResourceList {
  324. disk := resource.Quantity{Format: resource.BinarySI}
  325. inodes := resource.Quantity{Format: resource.DecimalSI}
  326. for _, container := range podStats.Containers {
  327. if hasFsStatsType(statsToMeasure, fsStatsRoot) {
  328. disk.Add(*diskUsage(container.Rootfs))
  329. inodes.Add(*inodeUsage(container.Rootfs))
  330. }
  331. if hasFsStatsType(statsToMeasure, fsStatsLogs) {
  332. disk.Add(*diskUsage(container.Logs))
  333. inodes.Add(*inodeUsage(container.Logs))
  334. }
  335. }
  336. return v1.ResourceList{
  337. v1.ResourceEphemeralStorage: disk,
  338. resourceInodes: inodes,
  339. }
  340. }
  341. // podLocalVolumeUsage aggregates pod local volumes disk usage and inode consumption for the specified stats to measure.
  342. func podLocalVolumeUsage(volumeNames []string, podStats statsapi.PodStats) v1.ResourceList {
  343. disk := resource.Quantity{Format: resource.BinarySI}
  344. inodes := resource.Quantity{Format: resource.DecimalSI}
  345. for _, volumeName := range volumeNames {
  346. for _, volumeStats := range podStats.VolumeStats {
  347. if volumeStats.Name == volumeName {
  348. disk.Add(*diskUsage(&volumeStats.FsStats))
  349. inodes.Add(*inodeUsage(&volumeStats.FsStats))
  350. break
  351. }
  352. }
  353. }
  354. return v1.ResourceList{
  355. v1.ResourceEphemeralStorage: disk,
  356. resourceInodes: inodes,
  357. }
  358. }
  359. // podDiskUsage aggregates pod disk usage and inode consumption for the specified stats to measure.
  360. func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) {
  361. disk := resource.Quantity{Format: resource.BinarySI}
  362. inodes := resource.Quantity{Format: resource.DecimalSI}
  363. containerUsageList := containerUsage(podStats, statsToMeasure)
  364. disk.Add(containerUsageList[v1.ResourceEphemeralStorage])
  365. inodes.Add(containerUsageList[resourceInodes])
  366. if hasFsStatsType(statsToMeasure, fsStatsLocalVolumeSource) {
  367. volumeNames := localVolumeNames(pod)
  368. podLocalVolumeUsageList := podLocalVolumeUsage(volumeNames, podStats)
  369. disk.Add(podLocalVolumeUsageList[v1.ResourceEphemeralStorage])
  370. inodes.Add(podLocalVolumeUsageList[resourceInodes])
  371. }
  372. return v1.ResourceList{
  373. v1.ResourceEphemeralStorage: disk,
  374. resourceInodes: inodes,
  375. }, nil
  376. }
  377. // localEphemeralVolumeNames returns the set of ephemeral volumes for the pod that are local
  378. func localEphemeralVolumeNames(pod *v1.Pod) []string {
  379. result := []string{}
  380. for _, volume := range pod.Spec.Volumes {
  381. if volumeutils.IsLocalEphemeralVolume(volume) {
  382. result = append(result, volume.Name)
  383. }
  384. }
  385. return result
  386. }
  387. // podLocalEphemeralStorageUsage aggregates pod local ephemeral storage usage and inode consumption for the specified stats to measure.
  388. func podLocalEphemeralStorageUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) {
  389. disk := resource.Quantity{Format: resource.BinarySI}
  390. inodes := resource.Quantity{Format: resource.DecimalSI}
  391. containerUsageList := containerUsage(podStats, statsToMeasure)
  392. disk.Add(containerUsageList[v1.ResourceEphemeralStorage])
  393. inodes.Add(containerUsageList[resourceInodes])
  394. if hasFsStatsType(statsToMeasure, fsStatsLocalVolumeSource) {
  395. volumeNames := localEphemeralVolumeNames(pod)
  396. podLocalVolumeUsageList := podLocalVolumeUsage(volumeNames, podStats)
  397. disk.Add(podLocalVolumeUsageList[v1.ResourceEphemeralStorage])
  398. inodes.Add(podLocalVolumeUsageList[resourceInodes])
  399. }
  400. return v1.ResourceList{
  401. v1.ResourceEphemeralStorage: disk,
  402. resourceInodes: inodes,
  403. }, nil
  404. }
  405. // formatThreshold formats a threshold for logging.
  406. func formatThreshold(threshold evictionapi.Threshold) string {
  407. return fmt.Sprintf("threshold(signal=%v, operator=%v, value=%v, gracePeriod=%v)", threshold.Signal, threshold.Operator, evictionapi.ThresholdValue(threshold.Value), threshold.GracePeriod)
  408. }
  409. // cachedStatsFunc returns a statsFunc based on the provided pod stats.
  410. func cachedStatsFunc(podStats []statsapi.PodStats) statsFunc {
  411. uid2PodStats := map[string]statsapi.PodStats{}
  412. for i := range podStats {
  413. uid2PodStats[podStats[i].PodRef.UID] = podStats[i]
  414. }
  415. return func(pod *v1.Pod) (statsapi.PodStats, bool) {
  416. stats, found := uid2PodStats[string(pod.UID)]
  417. return stats, found
  418. }
  419. }
  420. // Cmp compares p1 and p2 and returns:
  421. //
  422. // -1 if p1 < p2
  423. // 0 if p1 == p2
  424. // +1 if p1 > p2
  425. //
  426. type cmpFunc func(p1, p2 *v1.Pod) int
  427. // multiSorter implements the Sort interface, sorting changes within.
  428. type multiSorter struct {
  429. pods []*v1.Pod
  430. cmp []cmpFunc
  431. }
  432. // Sort sorts the argument slice according to the less functions passed to OrderedBy.
  433. func (ms *multiSorter) Sort(pods []*v1.Pod) {
  434. ms.pods = pods
  435. sort.Sort(ms)
  436. }
  437. // OrderedBy returns a Sorter that sorts using the cmp functions, in order.
  438. // Call its Sort method to sort the data.
  439. func orderedBy(cmp ...cmpFunc) *multiSorter {
  440. return &multiSorter{
  441. cmp: cmp,
  442. }
  443. }
  444. // Len is part of sort.Interface.
  445. func (ms *multiSorter) Len() int {
  446. return len(ms.pods)
  447. }
  448. // Swap is part of sort.Interface.
  449. func (ms *multiSorter) Swap(i, j int) {
  450. ms.pods[i], ms.pods[j] = ms.pods[j], ms.pods[i]
  451. }
  452. // Less is part of sort.Interface.
  453. func (ms *multiSorter) Less(i, j int) bool {
  454. p1, p2 := ms.pods[i], ms.pods[j]
  455. var k int
  456. for k = 0; k < len(ms.cmp)-1; k++ {
  457. cmpResult := ms.cmp[k](p1, p2)
  458. // p1 is less than p2
  459. if cmpResult < 0 {
  460. return true
  461. }
  462. // p1 is greater than p2
  463. if cmpResult > 0 {
  464. return false
  465. }
  466. // we don't know yet
  467. }
  468. // the last cmp func is the final decider
  469. return ms.cmp[k](p1, p2) < 0
  470. }
  471. // priority compares pods by Priority, if priority is enabled.
  472. func priority(p1, p2 *v1.Pod) int {
  473. priority1 := pod.GetPodPriority(p1)
  474. priority2 := pod.GetPodPriority(p2)
  475. if priority1 == priority2 {
  476. return 0
  477. }
  478. if priority1 > priority2 {
  479. return 1
  480. }
  481. return -1
  482. }
  483. // exceedMemoryRequests compares whether or not pods' memory usage exceeds their requests
  484. func exceedMemoryRequests(stats statsFunc) cmpFunc {
  485. return func(p1, p2 *v1.Pod) int {
  486. p1Stats, p1Found := stats(p1)
  487. p2Stats, p2Found := stats(p2)
  488. if !p1Found || !p2Found {
  489. // prioritize evicting the pod for which no stats were found
  490. return cmpBool(!p1Found, !p2Found)
  491. }
  492. p1Memory := memoryUsage(p1Stats.Memory)
  493. p2Memory := memoryUsage(p2Stats.Memory)
  494. p1ExceedsRequests := p1Memory.Cmp(v1resource.GetResourceRequestQuantity(p1, v1.ResourceMemory)) == 1
  495. p2ExceedsRequests := p2Memory.Cmp(v1resource.GetResourceRequestQuantity(p2, v1.ResourceMemory)) == 1
  496. // prioritize evicting the pod which exceeds its requests
  497. return cmpBool(p1ExceedsRequests, p2ExceedsRequests)
  498. }
  499. }
  500. // memory compares pods by largest consumer of memory relative to request.
  501. func memory(stats statsFunc) cmpFunc {
  502. return func(p1, p2 *v1.Pod) int {
  503. p1Stats, p1Found := stats(p1)
  504. p2Stats, p2Found := stats(p2)
  505. if !p1Found || !p2Found {
  506. // prioritize evicting the pod for which no stats were found
  507. return cmpBool(!p1Found, !p2Found)
  508. }
  509. // adjust p1, p2 usage relative to the request (if any)
  510. p1Memory := memoryUsage(p1Stats.Memory)
  511. p1Request := v1resource.GetResourceRequestQuantity(p1, v1.ResourceMemory)
  512. p1Memory.Sub(p1Request)
  513. p2Memory := memoryUsage(p2Stats.Memory)
  514. p2Request := v1resource.GetResourceRequestQuantity(p2, v1.ResourceMemory)
  515. p2Memory.Sub(p2Request)
  516. // prioritize evicting the pod which has the larger consumption of memory
  517. return p2Memory.Cmp(*p1Memory)
  518. }
  519. }
  520. // exceedDiskRequests compares whether or not pods' disk usage exceeds their requests
  521. func exceedDiskRequests(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) cmpFunc {
  522. return func(p1, p2 *v1.Pod) int {
  523. p1Stats, p1Found := stats(p1)
  524. p2Stats, p2Found := stats(p2)
  525. if !p1Found || !p2Found {
  526. // prioritize evicting the pod for which no stats were found
  527. return cmpBool(!p1Found, !p2Found)
  528. }
  529. p1Usage, p1Err := podDiskUsage(p1Stats, p1, fsStatsToMeasure)
  530. p2Usage, p2Err := podDiskUsage(p2Stats, p2, fsStatsToMeasure)
  531. if p1Err != nil || p2Err != nil {
  532. // prioritize evicting the pod which had an error getting stats
  533. return cmpBool(p1Err != nil, p2Err != nil)
  534. }
  535. p1Disk := p1Usage[diskResource]
  536. p2Disk := p2Usage[diskResource]
  537. p1ExceedsRequests := p1Disk.Cmp(v1resource.GetResourceRequestQuantity(p1, diskResource)) == 1
  538. p2ExceedsRequests := p2Disk.Cmp(v1resource.GetResourceRequestQuantity(p2, diskResource)) == 1
  539. // prioritize evicting the pod which exceeds its requests
  540. return cmpBool(p1ExceedsRequests, p2ExceedsRequests)
  541. }
  542. }
  543. // disk compares pods by largest consumer of disk relative to request for the specified disk resource.
  544. func disk(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) cmpFunc {
  545. return func(p1, p2 *v1.Pod) int {
  546. p1Stats, p1Found := stats(p1)
  547. p2Stats, p2Found := stats(p2)
  548. if !p1Found || !p2Found {
  549. // prioritize evicting the pod for which no stats were found
  550. return cmpBool(!p1Found, !p2Found)
  551. }
  552. p1Usage, p1Err := podDiskUsage(p1Stats, p1, fsStatsToMeasure)
  553. p2Usage, p2Err := podDiskUsage(p2Stats, p2, fsStatsToMeasure)
  554. if p1Err != nil || p2Err != nil {
  555. // prioritize evicting the pod which had an error getting stats
  556. return cmpBool(p1Err != nil, p2Err != nil)
  557. }
  558. // adjust p1, p2 usage relative to the request (if any)
  559. p1Disk := p1Usage[diskResource]
  560. p2Disk := p2Usage[diskResource]
  561. p1Request := v1resource.GetResourceRequestQuantity(p1, v1.ResourceEphemeralStorage)
  562. p1Disk.Sub(p1Request)
  563. p2Request := v1resource.GetResourceRequestQuantity(p2, v1.ResourceEphemeralStorage)
  564. p2Disk.Sub(p2Request)
  565. // prioritize evicting the pod which has the larger consumption of disk
  566. return p2Disk.Cmp(p1Disk)
  567. }
  568. }
  569. // cmpBool compares booleans, placing true before false
  570. func cmpBool(a, b bool) int {
  571. if a == b {
  572. return 0
  573. }
  574. if !b {
  575. return -1
  576. }
  577. return 1
  578. }
  579. // rankMemoryPressure orders the input pods for eviction in response to memory pressure.
  580. // It ranks by whether or not the pod's usage exceeds its requests, then by priority, and
  581. // finally by memory usage above requests.
  582. func rankMemoryPressure(pods []*v1.Pod, stats statsFunc) {
  583. orderedBy(exceedMemoryRequests(stats), priority, memory(stats)).Sort(pods)
  584. }
  585. // rankPIDPressure orders the input pods by priority in response to PID pressure.
  586. func rankPIDPressure(pods []*v1.Pod, stats statsFunc) {
  587. orderedBy(priority).Sort(pods)
  588. }
  589. // rankDiskPressureFunc returns a rankFunc that measures the specified fs stats.
  590. func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) rankFunc {
  591. return func(pods []*v1.Pod, stats statsFunc) {
  592. orderedBy(exceedDiskRequests(stats, fsStatsToMeasure, diskResource), priority, disk(stats, fsStatsToMeasure, diskResource)).Sort(pods)
  593. }
  594. }
  595. // byEvictionPriority implements sort.Interface for []v1.ResourceName.
  596. type byEvictionPriority []evictionapi.Threshold
  597. func (a byEvictionPriority) Len() int { return len(a) }
  598. func (a byEvictionPriority) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  599. // Less ranks memory before all other resources, and ranks thresholds with no resource to reclaim last
  600. func (a byEvictionPriority) Less(i, j int) bool {
  601. _, jSignalHasResource := signalToResource[a[j].Signal]
  602. return a[i].Signal == evictionapi.SignalMemoryAvailable || a[i].Signal == evictionapi.SignalAllocatableMemoryAvailable || !jSignalHasResource
  603. }
  604. // makeSignalObservations derives observations using the specified summary provider.
  605. func makeSignalObservations(summary *statsapi.Summary) (signalObservations, statsFunc) {
  606. // build the function to work against for pod stats
  607. statsFunc := cachedStatsFunc(summary.Pods)
  608. // build an evaluation context for current eviction signals
  609. result := signalObservations{}
  610. if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
  611. result[evictionapi.SignalMemoryAvailable] = signalObservation{
  612. available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
  613. capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
  614. time: memory.Time,
  615. }
  616. }
  617. if allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods); err != nil {
  618. klog.Errorf("eviction manager: failed to construct signal: %q error: %v", evictionapi.SignalAllocatableMemoryAvailable, err)
  619. } else {
  620. if memory := allocatableContainer.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
  621. result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{
  622. available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
  623. capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
  624. time: memory.Time,
  625. }
  626. }
  627. }
  628. if nodeFs := summary.Node.Fs; nodeFs != nil {
  629. if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
  630. result[evictionapi.SignalNodeFsAvailable] = signalObservation{
  631. available: resource.NewQuantity(int64(*nodeFs.AvailableBytes), resource.BinarySI),
  632. capacity: resource.NewQuantity(int64(*nodeFs.CapacityBytes), resource.BinarySI),
  633. time: nodeFs.Time,
  634. }
  635. }
  636. if nodeFs.InodesFree != nil && nodeFs.Inodes != nil {
  637. result[evictionapi.SignalNodeFsInodesFree] = signalObservation{
  638. available: resource.NewQuantity(int64(*nodeFs.InodesFree), resource.DecimalSI),
  639. capacity: resource.NewQuantity(int64(*nodeFs.Inodes), resource.DecimalSI),
  640. time: nodeFs.Time,
  641. }
  642. }
  643. }
  644. if summary.Node.Runtime != nil {
  645. if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil {
  646. if imageFs.AvailableBytes != nil && imageFs.CapacityBytes != nil {
  647. result[evictionapi.SignalImageFsAvailable] = signalObservation{
  648. available: resource.NewQuantity(int64(*imageFs.AvailableBytes), resource.BinarySI),
  649. capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI),
  650. time: imageFs.Time,
  651. }
  652. if imageFs.InodesFree != nil && imageFs.Inodes != nil {
  653. result[evictionapi.SignalImageFsInodesFree] = signalObservation{
  654. available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.DecimalSI),
  655. capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.DecimalSI),
  656. time: imageFs.Time,
  657. }
  658. }
  659. }
  660. }
  661. }
  662. if rlimit := summary.Node.Rlimit; rlimit != nil {
  663. if rlimit.NumOfRunningProcesses != nil && rlimit.MaxPID != nil {
  664. available := int64(*rlimit.MaxPID) - int64(*rlimit.NumOfRunningProcesses)
  665. result[evictionapi.SignalPIDAvailable] = signalObservation{
  666. available: resource.NewQuantity(available, resource.BinarySI),
  667. capacity: resource.NewQuantity(int64(*rlimit.MaxPID), resource.BinarySI),
  668. time: rlimit.Time,
  669. }
  670. }
  671. }
  672. return result, statsFunc
  673. }
  674. func getSysContainer(sysContainers []statsapi.ContainerStats, name string) (*statsapi.ContainerStats, error) {
  675. for _, cont := range sysContainers {
  676. if cont.Name == name {
  677. return &cont, nil
  678. }
  679. }
  680. return nil, fmt.Errorf("system container %q not found in metrics", name)
  681. }
  682. // thresholdsMet returns the set of thresholds that were met independent of grace period
  683. func thresholdsMet(thresholds []evictionapi.Threshold, observations signalObservations, enforceMinReclaim bool) []evictionapi.Threshold {
  684. results := []evictionapi.Threshold{}
  685. for i := range thresholds {
  686. threshold := thresholds[i]
  687. observed, found := observations[threshold.Signal]
  688. if !found {
  689. klog.Warningf("eviction manager: no observation found for eviction signal %v", threshold.Signal)
  690. continue
  691. }
  692. // determine if we have met the specified threshold
  693. thresholdMet := false
  694. quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
  695. // if enforceMinReclaim is specified, we compare relative to value - minreclaim
  696. if enforceMinReclaim && threshold.MinReclaim != nil {
  697. quantity.Add(*evictionapi.GetThresholdQuantity(*threshold.MinReclaim, observed.capacity))
  698. }
  699. thresholdResult := quantity.Cmp(*observed.available)
  700. switch threshold.Operator {
  701. case evictionapi.OpLessThan:
  702. thresholdMet = thresholdResult > 0
  703. }
  704. if thresholdMet {
  705. results = append(results, threshold)
  706. }
  707. }
  708. return results
  709. }
  710. func debugLogObservations(logPrefix string, observations signalObservations) {
  711. if !klog.V(3) {
  712. return
  713. }
  714. for k, v := range observations {
  715. if !v.time.IsZero() {
  716. klog.Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v, time: %v", logPrefix, k, v.available, v.capacity, v.time)
  717. } else {
  718. klog.Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v", logPrefix, k, v.available, v.capacity)
  719. }
  720. }
  721. }
  722. func debugLogThresholdsWithObservation(logPrefix string, thresholds []evictionapi.Threshold, observations signalObservations) {
  723. if !klog.V(3) {
  724. return
  725. }
  726. for i := range thresholds {
  727. threshold := thresholds[i]
  728. observed, found := observations[threshold.Signal]
  729. if found {
  730. quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
  731. klog.Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available)
  732. } else {
  733. klog.Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal)
  734. }
  735. }
  736. }
  737. func thresholdsUpdatedStats(thresholds []evictionapi.Threshold, observations, lastObservations signalObservations) []evictionapi.Threshold {
  738. results := []evictionapi.Threshold{}
  739. for i := range thresholds {
  740. threshold := thresholds[i]
  741. observed, found := observations[threshold.Signal]
  742. if !found {
  743. klog.Warningf("eviction manager: no observation found for eviction signal %v", threshold.Signal)
  744. continue
  745. }
  746. last, found := lastObservations[threshold.Signal]
  747. if !found || observed.time.IsZero() || observed.time.After(last.time.Time) {
  748. results = append(results, threshold)
  749. }
  750. }
  751. return results
  752. }
  753. // thresholdsFirstObservedAt merges the input set of thresholds with the previous observation to determine when active set of thresholds were initially met.
  754. func thresholdsFirstObservedAt(thresholds []evictionapi.Threshold, lastObservedAt thresholdsObservedAt, now time.Time) thresholdsObservedAt {
  755. results := thresholdsObservedAt{}
  756. for i := range thresholds {
  757. observedAt, found := lastObservedAt[thresholds[i]]
  758. if !found {
  759. observedAt = now
  760. }
  761. results[thresholds[i]] = observedAt
  762. }
  763. return results
  764. }
  765. // thresholdsMetGracePeriod returns the set of thresholds that have satisfied associated grace period
  766. func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []evictionapi.Threshold {
  767. results := []evictionapi.Threshold{}
  768. for threshold, at := range observedAt {
  769. duration := now.Sub(at)
  770. if duration < threshold.GracePeriod {
  771. klog.V(2).Infof("eviction manager: eviction criteria not yet met for %v, duration: %v", formatThreshold(threshold), duration)
  772. continue
  773. }
  774. results = append(results, threshold)
  775. }
  776. return results
  777. }
  778. // nodeConditions returns the set of node conditions associated with a threshold
  779. func nodeConditions(thresholds []evictionapi.Threshold) []v1.NodeConditionType {
  780. results := []v1.NodeConditionType{}
  781. for _, threshold := range thresholds {
  782. if nodeCondition, found := signalToNodeCondition[threshold.Signal]; found {
  783. if !hasNodeCondition(results, nodeCondition) {
  784. results = append(results, nodeCondition)
  785. }
  786. }
  787. }
  788. return results
  789. }
  790. // nodeConditionsLastObservedAt merges the input with the previous observation to determine when a condition was most recently met.
  791. func nodeConditionsLastObservedAt(nodeConditions []v1.NodeConditionType, lastObservedAt nodeConditionsObservedAt, now time.Time) nodeConditionsObservedAt {
  792. results := nodeConditionsObservedAt{}
  793. // the input conditions were observed "now"
  794. for i := range nodeConditions {
  795. results[nodeConditions[i]] = now
  796. }
  797. // the conditions that were not observed now are merged in with their old time
  798. for key, value := range lastObservedAt {
  799. _, found := results[key]
  800. if !found {
  801. results[key] = value
  802. }
  803. }
  804. return results
  805. }
  806. // nodeConditionsObservedSince returns the set of conditions that have been observed within the specified period
  807. func nodeConditionsObservedSince(observedAt nodeConditionsObservedAt, period time.Duration, now time.Time) []v1.NodeConditionType {
  808. results := []v1.NodeConditionType{}
  809. for nodeCondition, at := range observedAt {
  810. duration := now.Sub(at)
  811. if duration < period {
  812. results = append(results, nodeCondition)
  813. }
  814. }
  815. return results
  816. }
  817. // hasFsStatsType returns true if the fsStat is in the input list
  818. func hasFsStatsType(inputs []fsStatsType, item fsStatsType) bool {
  819. for _, input := range inputs {
  820. if input == item {
  821. return true
  822. }
  823. }
  824. return false
  825. }
  826. // hasNodeCondition returns true if the node condition is in the input list
  827. func hasNodeCondition(inputs []v1.NodeConditionType, item v1.NodeConditionType) bool {
  828. for _, input := range inputs {
  829. if input == item {
  830. return true
  831. }
  832. }
  833. return false
  834. }
  835. // mergeThresholds will merge both threshold lists eliminating duplicates.
  836. func mergeThresholds(inputsA []evictionapi.Threshold, inputsB []evictionapi.Threshold) []evictionapi.Threshold {
  837. results := inputsA
  838. for _, threshold := range inputsB {
  839. if !hasThreshold(results, threshold) {
  840. results = append(results, threshold)
  841. }
  842. }
  843. return results
  844. }
  845. // hasThreshold returns true if the threshold is in the input list
  846. func hasThreshold(inputs []evictionapi.Threshold, item evictionapi.Threshold) bool {
  847. for _, input := range inputs {
  848. if input.GracePeriod == item.GracePeriod && input.Operator == item.Operator && input.Signal == item.Signal && compareThresholdValue(input.Value, item.Value) {
  849. return true
  850. }
  851. }
  852. return false
  853. }
  854. // compareThresholdValue returns true if the two thresholdValue objects are logically the same
  855. func compareThresholdValue(a evictionapi.ThresholdValue, b evictionapi.ThresholdValue) bool {
  856. if a.Quantity != nil {
  857. if b.Quantity == nil {
  858. return false
  859. }
  860. return a.Quantity.Cmp(*b.Quantity) == 0
  861. }
  862. if b.Quantity != nil {
  863. return false
  864. }
  865. return a.Percentage == b.Percentage
  866. }
  867. // isHardEvictionThreshold returns true if eviction should immediately occur
  868. func isHardEvictionThreshold(threshold evictionapi.Threshold) bool {
  869. return threshold.GracePeriod == time.Duration(0)
  870. }
  871. func isAllocatableEvictionThreshold(threshold evictionapi.Threshold) bool {
  872. return threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable
  873. }
  874. // buildSignalToRankFunc returns ranking functions associated with resources
  875. func buildSignalToRankFunc(withImageFs bool) map[evictionapi.Signal]rankFunc {
  876. signalToRankFunc := map[evictionapi.Signal]rankFunc{
  877. evictionapi.SignalMemoryAvailable: rankMemoryPressure,
  878. evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure,
  879. evictionapi.SignalPIDAvailable: rankPIDPressure,
  880. }
  881. // usage of an imagefs is optional
  882. if withImageFs {
  883. // with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
  884. signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
  885. signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
  886. // with an imagefs, imagefs pod rank func for eviction only includes rootfs
  887. signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, v1.ResourceEphemeralStorage)
  888. signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceInodes)
  889. } else {
  890. // without an imagefs, nodefs pod rank func for eviction looks at all fs stats.
  891. // since imagefs and nodefs share a common device, they share common ranking functions.
  892. signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
  893. signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
  894. signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
  895. signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
  896. }
  897. return signalToRankFunc
  898. }
  899. // PodIsEvicted returns true if the reported pod status is due to an eviction.
  900. func PodIsEvicted(podStatus v1.PodStatus) bool {
  901. return podStatus.Phase == v1.PodFailed && podStatus.Reason == Reason
  902. }
  903. // buildSignalToNodeReclaimFuncs returns reclaim functions associated with resources.
  904. func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[evictionapi.Signal]nodeReclaimFuncs {
  905. signalToReclaimFunc := map[evictionapi.Signal]nodeReclaimFuncs{}
  906. // usage of an imagefs is optional
  907. if withImageFs {
  908. // with an imagefs, nodefs pressure should just delete logs
  909. signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{}
  910. signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{}
  911. // with an imagefs, imagefs pressure should delete unused images
  912. signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
  913. signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
  914. } else {
  915. // without an imagefs, nodefs pressure should delete logs, and unused images
  916. // since imagefs and nodefs share a common device, they share common reclaim functions
  917. signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
  918. signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
  919. signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
  920. signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
  921. }
  922. return signalToReclaimFunc
  923. }
  924. // evictionMessage constructs a useful message about why an eviction occurred, and annotations to provide metadata about the eviction
  925. func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats statsFunc) (message string, annotations map[string]string) {
  926. annotations = make(map[string]string)
  927. message = fmt.Sprintf(nodeLowMessageFmt, resourceToReclaim)
  928. containers := []string{}
  929. containerUsage := []string{}
  930. podStats, ok := stats(pod)
  931. if !ok {
  932. return
  933. }
  934. for _, containerStats := range podStats.Containers {
  935. for _, container := range pod.Spec.Containers {
  936. if container.Name == containerStats.Name {
  937. requests := container.Resources.Requests[resourceToReclaim]
  938. var usage *resource.Quantity
  939. switch resourceToReclaim {
  940. case v1.ResourceEphemeralStorage:
  941. if containerStats.Rootfs != nil && containerStats.Rootfs.UsedBytes != nil && containerStats.Logs != nil && containerStats.Logs.UsedBytes != nil {
  942. usage = resource.NewQuantity(int64(*containerStats.Rootfs.UsedBytes+*containerStats.Logs.UsedBytes), resource.BinarySI)
  943. }
  944. case v1.ResourceMemory:
  945. if containerStats.Memory != nil && containerStats.Memory.WorkingSetBytes != nil {
  946. usage = resource.NewQuantity(int64(*containerStats.Memory.WorkingSetBytes), resource.BinarySI)
  947. }
  948. }
  949. if usage != nil && usage.Cmp(requests) > 0 {
  950. message += fmt.Sprintf(containerMessageFmt, container.Name, usage.String(), requests.String())
  951. containers = append(containers, container.Name)
  952. containerUsage = append(containerUsage, usage.String())
  953. }
  954. }
  955. }
  956. }
  957. annotations[OffendingContainersKey] = strings.Join(containers, ",")
  958. annotations[OffendingContainersUsageKey] = strings.Join(containerUsage, ",")
  959. annotations[StarvedResourceKey] = string(resourceToReclaim)
  960. return
  961. }