daemonset_util.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package util
  14. import (
  15. "fmt"
  16. "strconv"
  17. apps "k8s.io/api/apps/v1"
  18. "k8s.io/api/core/v1"
  19. extensions "k8s.io/api/extensions/v1beta1"
  20. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  21. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  22. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  23. schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
  24. )
  25. // GetTemplateGeneration gets the template generation associated with a v1.DaemonSet by extracting it from the
  26. // deprecated annotation. If no annotation is found nil is returned. If the annotation is found and fails to parse
  27. // nil is returned with an error. If the generation can be parsed from the annotation, a pointer to the parsed int64
  28. // value is returned.
  29. func GetTemplateGeneration(ds *apps.DaemonSet) (*int64, error) {
  30. annotation, found := ds.Annotations[apps.DeprecatedTemplateGeneration]
  31. if !found {
  32. return nil, nil
  33. }
  34. generation, err := strconv.ParseInt(annotation, 10, 64)
  35. if err != nil {
  36. return nil, err
  37. }
  38. return &generation, nil
  39. }
  40. // AddOrUpdateDaemonPodTolerations apply necessary tolerations to DaemonSet Pods, e.g. node.kubernetes.io/not-ready:NoExecute.
  41. func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec) {
  42. // DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
  43. // Add infinite toleration for taint notReady:NoExecute here
  44. // to survive taint-based eviction enforced by NodeController
  45. // when node turns not ready.
  46. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  47. Key: schedulerapi.TaintNodeNotReady,
  48. Operator: v1.TolerationOpExists,
  49. Effect: v1.TaintEffectNoExecute,
  50. })
  51. // DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
  52. // Add infinite toleration for taint unreachable:NoExecute here
  53. // to survive taint-based eviction enforced by NodeController
  54. // when node turns unreachable.
  55. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  56. Key: schedulerapi.TaintNodeUnreachable,
  57. Operator: v1.TolerationOpExists,
  58. Effect: v1.TaintEffectNoExecute,
  59. })
  60. // According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
  61. // MemoryPressure, DiskPressure, PIDPressure, Unschedulable and NetworkUnavailable taints.
  62. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  63. Key: schedulerapi.TaintNodeDiskPressure,
  64. Operator: v1.TolerationOpExists,
  65. Effect: v1.TaintEffectNoSchedule,
  66. })
  67. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  68. Key: schedulerapi.TaintNodeMemoryPressure,
  69. Operator: v1.TolerationOpExists,
  70. Effect: v1.TaintEffectNoSchedule,
  71. })
  72. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  73. Key: schedulerapi.TaintNodePIDPressure,
  74. Operator: v1.TolerationOpExists,
  75. Effect: v1.TaintEffectNoSchedule,
  76. })
  77. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  78. Key: schedulerapi.TaintNodeUnschedulable,
  79. Operator: v1.TolerationOpExists,
  80. Effect: v1.TaintEffectNoSchedule,
  81. })
  82. if spec.HostNetwork {
  83. v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
  84. Key: schedulerapi.TaintNodeNetworkUnavailable,
  85. Operator: v1.TolerationOpExists,
  86. Effect: v1.TaintEffectNoSchedule,
  87. })
  88. }
  89. }
  90. // CreatePodTemplate returns copy of provided template with additional
  91. // label which contains templateGeneration (for backward compatibility),
  92. // hash of provided template and sets default daemon tolerations.
  93. func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
  94. newTemplate := *template.DeepCopy()
  95. AddOrUpdateDaemonPodTolerations(&newTemplate.Spec)
  96. if newTemplate.ObjectMeta.Labels == nil {
  97. newTemplate.ObjectMeta.Labels = make(map[string]string)
  98. }
  99. if generation != nil {
  100. newTemplate.ObjectMeta.Labels[extensions.DaemonSetTemplateGenerationKey] = fmt.Sprint(*generation)
  101. }
  102. // TODO: do we need to validate if the DaemonSet is RollingUpdate or not?
  103. if len(hash) > 0 {
  104. newTemplate.ObjectMeta.Labels[extensions.DefaultDaemonSetUniqueLabelKey] = hash
  105. }
  106. return newTemplate
  107. }
  108. // IsPodUpdated checks if pod contains label value that either matches templateGeneration or hash
  109. func IsPodUpdated(pod *v1.Pod, hash string, dsTemplateGeneration *int64) bool {
  110. // Compare with hash to see if the pod is updated, need to maintain backward compatibility of templateGeneration
  111. templateMatches := dsTemplateGeneration != nil &&
  112. pod.Labels[extensions.DaemonSetTemplateGenerationKey] == fmt.Sprint(dsTemplateGeneration)
  113. hashMatches := len(hash) > 0 && pod.Labels[extensions.DefaultDaemonSetUniqueLabelKey] == hash
  114. return hashMatches || templateMatches
  115. }
  116. // SplitByAvailablePods splits provided daemon set pods by availability
  117. func SplitByAvailablePods(minReadySeconds int32, pods []*v1.Pod) ([]*v1.Pod, []*v1.Pod) {
  118. unavailablePods := []*v1.Pod{}
  119. availablePods := []*v1.Pod{}
  120. for _, pod := range pods {
  121. if podutil.IsPodAvailable(pod, minReadySeconds, metav1.Now()) {
  122. availablePods = append(availablePods, pod)
  123. } else {
  124. unavailablePods = append(unavailablePods, pod)
  125. }
  126. }
  127. return availablePods, unavailablePods
  128. }
  129. // ReplaceDaemonSetPodNodeNameNodeAffinity replaces the RequiredDuringSchedulingIgnoredDuringExecution
  130. // NodeAffinity of the given affinity with a new NodeAffinity that selects the given nodeName.
  131. // Note that this function assumes that no NodeAffinity conflicts with the selected nodeName.
  132. func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *v1.Affinity, nodename string) *v1.Affinity {
  133. nodeSelReq := v1.NodeSelectorRequirement{
  134. Key: schedulerapi.NodeFieldSelectorKeyNodeName,
  135. Operator: v1.NodeSelectorOpIn,
  136. Values: []string{nodename},
  137. }
  138. nodeSelector := &v1.NodeSelector{
  139. NodeSelectorTerms: []v1.NodeSelectorTerm{
  140. {
  141. MatchFields: []v1.NodeSelectorRequirement{nodeSelReq},
  142. },
  143. },
  144. }
  145. if affinity == nil {
  146. return &v1.Affinity{
  147. NodeAffinity: &v1.NodeAffinity{
  148. RequiredDuringSchedulingIgnoredDuringExecution: nodeSelector,
  149. },
  150. }
  151. }
  152. if affinity.NodeAffinity == nil {
  153. affinity.NodeAffinity = &v1.NodeAffinity{
  154. RequiredDuringSchedulingIgnoredDuringExecution: nodeSelector,
  155. }
  156. return affinity
  157. }
  158. nodeAffinity := affinity.NodeAffinity
  159. if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
  160. nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nodeSelector
  161. return affinity
  162. }
  163. // Replace node selector with the new one.
  164. nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []v1.NodeSelectorTerm{
  165. {
  166. MatchFields: []v1.NodeSelectorRequirement{nodeSelReq},
  167. },
  168. }
  169. return affinity
  170. }
  171. // GetTargetNodeName get the target node name of DaemonSet pods. If `.spec.NodeName` is not empty (nil),
  172. // return `.spec.NodeName`; otherwise, retrieve node name of pending pods from NodeAffinity. Return error
  173. // if failed to retrieve node name from `.spec.NodeName` and NodeAffinity.
  174. func GetTargetNodeName(pod *v1.Pod) (string, error) {
  175. if len(pod.Spec.NodeName) != 0 {
  176. return pod.Spec.NodeName, nil
  177. }
  178. // If ScheduleDaemonSetPods was enabled before, retrieve node name of unscheduled pods from NodeAffinity
  179. if pod.Spec.Affinity == nil ||
  180. pod.Spec.Affinity.NodeAffinity == nil ||
  181. pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
  182. return "", fmt.Errorf("no spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution for pod %s/%s",
  183. pod.Namespace, pod.Name)
  184. }
  185. terms := pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
  186. if len(terms) < 1 {
  187. return "", fmt.Errorf("no nodeSelectorTerms in requiredDuringSchedulingIgnoredDuringExecution of pod %s/%s",
  188. pod.Namespace, pod.Name)
  189. }
  190. for _, term := range terms {
  191. for _, exp := range term.MatchFields {
  192. if exp.Key == schedulerapi.NodeFieldSelectorKeyNodeName &&
  193. exp.Operator == v1.NodeSelectorOpIn {
  194. if len(exp.Values) != 1 {
  195. return "", fmt.Errorf("the matchFields value of '%s' is not unique for pod %s/%s",
  196. schedulerapi.NodeFieldSelectorKeyNodeName, pod.Namespace, pod.Name)
  197. }
  198. return exp.Values[0], nil
  199. }
  200. }
  201. }
  202. return "", fmt.Errorf("no node name found for pod %s/%s", pod.Namespace, pod.Name)
  203. }