stateful_set_utils.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package statefulset
  14. import (
  15. "bytes"
  16. "encoding/json"
  17. "fmt"
  18. "regexp"
  19. "strconv"
  20. apps "k8s.io/api/apps/v1"
  21. "k8s.io/api/core/v1"
  22. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  23. "k8s.io/apimachinery/pkg/runtime"
  24. "k8s.io/apimachinery/pkg/util/strategicpatch"
  25. "k8s.io/client-go/kubernetes/scheme"
  26. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  27. "k8s.io/kubernetes/pkg/controller"
  28. "k8s.io/kubernetes/pkg/controller/history"
  29. )
  30. // maxUpdateRetries is the maximum number of retries used for update conflict resolution prior to failure
  31. const maxUpdateRetries = 10
  32. // updateConflictError is the error used to indicate that the maximum number of retries against the API server have
  33. // been attempted and we need to back off
  34. var updateConflictError = fmt.Errorf("aborting update after %d attempts", maxUpdateRetries)
  35. var patchCodec = scheme.Codecs.LegacyCodec(apps.SchemeGroupVersion)
  36. // overlappingStatefulSets sorts a list of StatefulSets by creation timestamp, using their names as a tie breaker.
  37. // Generally used to tie break between StatefulSets that have overlapping selectors.
  38. type overlappingStatefulSets []*apps.StatefulSet
  39. func (o overlappingStatefulSets) Len() int { return len(o) }
  40. func (o overlappingStatefulSets) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  41. func (o overlappingStatefulSets) Less(i, j int) bool {
  42. if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
  43. return o[i].Name < o[j].Name
  44. }
  45. return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
  46. }
  47. // statefulPodRegex is a regular expression that extracts the parent StatefulSet and ordinal from the Name of a Pod
  48. var statefulPodRegex = regexp.MustCompile("(.*)-([0-9]+)$")
  49. // getParentNameAndOrdinal gets the name of pod's parent StatefulSet and pod's ordinal as extracted from its Name. If
  50. // the Pod was not created by a StatefulSet, its parent is considered to be empty string, and its ordinal is considered
  51. // to be -1.
  52. func getParentNameAndOrdinal(pod *v1.Pod) (string, int) {
  53. parent := ""
  54. ordinal := -1
  55. subMatches := statefulPodRegex.FindStringSubmatch(pod.Name)
  56. if len(subMatches) < 3 {
  57. return parent, ordinal
  58. }
  59. parent = subMatches[1]
  60. if i, err := strconv.ParseInt(subMatches[2], 10, 32); err == nil {
  61. ordinal = int(i)
  62. }
  63. return parent, ordinal
  64. }
  65. // getParentName gets the name of pod's parent StatefulSet. If pod has not parent, the empty string is returned.
  66. func getParentName(pod *v1.Pod) string {
  67. parent, _ := getParentNameAndOrdinal(pod)
  68. return parent
  69. }
  70. // getOrdinal gets pod's ordinal. If pod has no ordinal, -1 is returned.
  71. func getOrdinal(pod *v1.Pod) int {
  72. _, ordinal := getParentNameAndOrdinal(pod)
  73. return ordinal
  74. }
  75. // getPodName gets the name of set's child Pod with an ordinal index of ordinal
  76. func getPodName(set *apps.StatefulSet, ordinal int) string {
  77. return fmt.Sprintf("%s-%d", set.Name, ordinal)
  78. }
  79. // getPersistentVolumeClaimName gets the name of PersistentVolumeClaim for a Pod with an ordinal index of ordinal. claim
  80. // must be a PersistentVolumeClaim from set's VolumeClaims template.
  81. func getPersistentVolumeClaimName(set *apps.StatefulSet, claim *v1.PersistentVolumeClaim, ordinal int) string {
  82. // NOTE: This name format is used by the heuristics for zone spreading in ChooseZoneForVolume
  83. return fmt.Sprintf("%s-%s-%d", claim.Name, set.Name, ordinal)
  84. }
  85. // isMemberOf tests if pod is a member of set.
  86. func isMemberOf(set *apps.StatefulSet, pod *v1.Pod) bool {
  87. return getParentName(pod) == set.Name
  88. }
  89. // identityMatches returns true if pod has a valid identity and network identity for a member of set.
  90. func identityMatches(set *apps.StatefulSet, pod *v1.Pod) bool {
  91. parent, ordinal := getParentNameAndOrdinal(pod)
  92. return ordinal >= 0 &&
  93. set.Name == parent &&
  94. pod.Name == getPodName(set, ordinal) &&
  95. pod.Namespace == set.Namespace &&
  96. pod.Labels[apps.StatefulSetPodNameLabel] == pod.Name
  97. }
  98. // storageMatches returns true if pod's Volumes cover the set of PersistentVolumeClaims
  99. func storageMatches(set *apps.StatefulSet, pod *v1.Pod) bool {
  100. ordinal := getOrdinal(pod)
  101. if ordinal < 0 {
  102. return false
  103. }
  104. volumes := make(map[string]v1.Volume, len(pod.Spec.Volumes))
  105. for _, volume := range pod.Spec.Volumes {
  106. volumes[volume.Name] = volume
  107. }
  108. for _, claim := range set.Spec.VolumeClaimTemplates {
  109. volume, found := volumes[claim.Name]
  110. if !found ||
  111. volume.VolumeSource.PersistentVolumeClaim == nil ||
  112. volume.VolumeSource.PersistentVolumeClaim.ClaimName !=
  113. getPersistentVolumeClaimName(set, &claim, ordinal) {
  114. return false
  115. }
  116. }
  117. return true
  118. }
  119. // getPersistentVolumeClaims gets a map of PersistentVolumeClaims to their template names, as defined in set. The
  120. // returned PersistentVolumeClaims are each constructed with a the name specific to the Pod. This name is determined
  121. // by getPersistentVolumeClaimName.
  122. func getPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) map[string]v1.PersistentVolumeClaim {
  123. ordinal := getOrdinal(pod)
  124. templates := set.Spec.VolumeClaimTemplates
  125. claims := make(map[string]v1.PersistentVolumeClaim, len(templates))
  126. for i := range templates {
  127. claim := templates[i]
  128. claim.Name = getPersistentVolumeClaimName(set, &claim, ordinal)
  129. claim.Namespace = set.Namespace
  130. if claim.Labels != nil {
  131. for key, value := range set.Spec.Selector.MatchLabels {
  132. claim.Labels[key] = value
  133. }
  134. } else {
  135. claim.Labels = set.Spec.Selector.MatchLabels
  136. }
  137. claims[templates[i].Name] = claim
  138. }
  139. return claims
  140. }
  141. // updateStorage updates pod's Volumes to conform with the PersistentVolumeClaim of set's templates. If pod has
  142. // conflicting local Volumes these are replaced with Volumes that conform to the set's templates.
  143. func updateStorage(set *apps.StatefulSet, pod *v1.Pod) {
  144. currentVolumes := pod.Spec.Volumes
  145. claims := getPersistentVolumeClaims(set, pod)
  146. newVolumes := make([]v1.Volume, 0, len(claims))
  147. for name, claim := range claims {
  148. newVolumes = append(newVolumes, v1.Volume{
  149. Name: name,
  150. VolumeSource: v1.VolumeSource{
  151. PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
  152. ClaimName: claim.Name,
  153. // TODO: Use source definition to set this value when we have one.
  154. ReadOnly: false,
  155. },
  156. },
  157. })
  158. }
  159. for i := range currentVolumes {
  160. if _, ok := claims[currentVolumes[i].Name]; !ok {
  161. newVolumes = append(newVolumes, currentVolumes[i])
  162. }
  163. }
  164. pod.Spec.Volumes = newVolumes
  165. }
  166. func initIdentity(set *apps.StatefulSet, pod *v1.Pod) {
  167. updateIdentity(set, pod)
  168. // Set these immutable fields only on initial Pod creation, not updates.
  169. pod.Spec.Hostname = pod.Name
  170. pod.Spec.Subdomain = set.Spec.ServiceName
  171. }
  172. // updateIdentity updates pod's name, hostname, and subdomain, and StatefulSetPodNameLabel to conform to set's name
  173. // and headless service.
  174. func updateIdentity(set *apps.StatefulSet, pod *v1.Pod) {
  175. pod.Name = getPodName(set, getOrdinal(pod))
  176. pod.Namespace = set.Namespace
  177. if pod.Labels == nil {
  178. pod.Labels = make(map[string]string)
  179. }
  180. pod.Labels[apps.StatefulSetPodNameLabel] = pod.Name
  181. }
  182. // isRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady.
  183. func isRunningAndReady(pod *v1.Pod) bool {
  184. return pod.Status.Phase == v1.PodRunning && podutil.IsPodReady(pod)
  185. }
  186. // isCreated returns true if pod has been created and is maintained by the API server
  187. func isCreated(pod *v1.Pod) bool {
  188. return pod.Status.Phase != ""
  189. }
  190. // isFailed returns true if pod has a Phase of PodFailed
  191. func isFailed(pod *v1.Pod) bool {
  192. return pod.Status.Phase == v1.PodFailed
  193. }
  194. // isTerminating returns true if pod's DeletionTimestamp has been set
  195. func isTerminating(pod *v1.Pod) bool {
  196. return pod.DeletionTimestamp != nil
  197. }
  198. // isHealthy returns true if pod is running and ready and has not been terminated
  199. func isHealthy(pod *v1.Pod) bool {
  200. return isRunningAndReady(pod) && !isTerminating(pod)
  201. }
  202. // allowsBurst is true if the alpha burst annotation is set.
  203. func allowsBurst(set *apps.StatefulSet) bool {
  204. return set.Spec.PodManagementPolicy == apps.ParallelPodManagement
  205. }
  206. // setPodRevision sets the revision of Pod to revision by adding the StatefulSetRevisionLabel
  207. func setPodRevision(pod *v1.Pod, revision string) {
  208. if pod.Labels == nil {
  209. pod.Labels = make(map[string]string)
  210. }
  211. pod.Labels[apps.StatefulSetRevisionLabel] = revision
  212. }
  213. // getPodRevision gets the revision of Pod by inspecting the StatefulSetRevisionLabel. If pod has no revision the empty
  214. // string is returned.
  215. func getPodRevision(pod *v1.Pod) string {
  216. if pod.Labels == nil {
  217. return ""
  218. }
  219. return pod.Labels[apps.StatefulSetRevisionLabel]
  220. }
  221. // newStatefulSetPod returns a new Pod conforming to the set's Spec with an identity generated from ordinal.
  222. func newStatefulSetPod(set *apps.StatefulSet, ordinal int) *v1.Pod {
  223. pod, _ := controller.GetPodFromTemplate(&set.Spec.Template, set, metav1.NewControllerRef(set, controllerKind))
  224. pod.Name = getPodName(set, ordinal)
  225. initIdentity(set, pod)
  226. updateStorage(set, pod)
  227. return pod
  228. }
  229. // newVersionedStatefulSetPod creates a new Pod for a StatefulSet. currentSet is the representation of the set at the
  230. // current revision. updateSet is the representation of the set at the updateRevision. currentRevision is the name of
  231. // the current revision. updateRevision is the name of the update revision. ordinal is the ordinal of the Pod. If the
  232. // returned error is nil, the returned Pod is valid.
  233. func newVersionedStatefulSetPod(currentSet, updateSet *apps.StatefulSet, currentRevision, updateRevision string, ordinal int) *v1.Pod {
  234. if currentSet.Spec.UpdateStrategy.Type == apps.RollingUpdateStatefulSetStrategyType &&
  235. (currentSet.Spec.UpdateStrategy.RollingUpdate == nil && ordinal < int(currentSet.Status.CurrentReplicas)) ||
  236. (currentSet.Spec.UpdateStrategy.RollingUpdate != nil && ordinal < int(*currentSet.Spec.UpdateStrategy.RollingUpdate.Partition)) {
  237. pod := newStatefulSetPod(currentSet, ordinal)
  238. setPodRevision(pod, currentRevision)
  239. return pod
  240. }
  241. pod := newStatefulSetPod(updateSet, ordinal)
  242. setPodRevision(pod, updateRevision)
  243. return pod
  244. }
  245. // Match check if the given StatefulSet's template matches the template stored in the given history.
  246. func Match(ss *apps.StatefulSet, history *apps.ControllerRevision) (bool, error) {
  247. patch, err := getPatch(ss)
  248. if err != nil {
  249. return false, err
  250. }
  251. return bytes.Equal(patch, history.Data.Raw), nil
  252. }
  253. // getPatch returns a strategic merge patch that can be applied to restore a StatefulSet to a
  254. // previous version. If the returned error is nil the patch is valid. The current state that we save is just the
  255. // PodSpecTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously
  256. // recorded patches.
  257. func getPatch(set *apps.StatefulSet) ([]byte, error) {
  258. str, err := runtime.Encode(patchCodec, set)
  259. if err != nil {
  260. return nil, err
  261. }
  262. var raw map[string]interface{}
  263. json.Unmarshal([]byte(str), &raw)
  264. objCopy := make(map[string]interface{})
  265. specCopy := make(map[string]interface{})
  266. spec := raw["spec"].(map[string]interface{})
  267. template := spec["template"].(map[string]interface{})
  268. specCopy["template"] = template
  269. template["$patch"] = "replace"
  270. objCopy["spec"] = specCopy
  271. patch, err := json.Marshal(objCopy)
  272. return patch, err
  273. }
  274. // newRevision creates a new ControllerRevision containing a patch that reapplies the target state of set.
  275. // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned
  276. // ControllerRevision is valid. StatefulSet revisions are stored as patches that re-apply the current state of set
  277. // to a new StatefulSet using a strategic merge patch to replace the saved state of the new StatefulSet.
  278. func newRevision(set *apps.StatefulSet, revision int64, collisionCount *int32) (*apps.ControllerRevision, error) {
  279. patch, err := getPatch(set)
  280. if err != nil {
  281. return nil, err
  282. }
  283. cr, err := history.NewControllerRevision(set,
  284. controllerKind,
  285. set.Spec.Template.Labels,
  286. runtime.RawExtension{Raw: patch},
  287. revision,
  288. collisionCount)
  289. if err != nil {
  290. return nil, err
  291. }
  292. if cr.ObjectMeta.Annotations == nil {
  293. cr.ObjectMeta.Annotations = make(map[string]string)
  294. }
  295. for key, value := range set.Annotations {
  296. cr.ObjectMeta.Annotations[key] = value
  297. }
  298. return cr, nil
  299. }
  300. // ApplyRevision returns a new StatefulSet constructed by restoring the state in revision to set. If the returned error
  301. // is nil, the returned StatefulSet is valid.
  302. func ApplyRevision(set *apps.StatefulSet, revision *apps.ControllerRevision) (*apps.StatefulSet, error) {
  303. clone := set.DeepCopy()
  304. patched, err := strategicpatch.StrategicMergePatch([]byte(runtime.EncodeOrDie(patchCodec, clone)), revision.Data.Raw, clone)
  305. if err != nil {
  306. return nil, err
  307. }
  308. restoredSet := &apps.StatefulSet{}
  309. err = json.Unmarshal(patched, restoredSet)
  310. if err != nil {
  311. return nil, err
  312. }
  313. return restoredSet, nil
  314. }
  315. // nextRevision finds the next valid revision number based on revisions. If the length of revisions
  316. // is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method
  317. // assumes that revisions has been sorted by Revision.
  318. func nextRevision(revisions []*apps.ControllerRevision) int64 {
  319. count := len(revisions)
  320. if count <= 0 {
  321. return 1
  322. }
  323. return revisions[count-1].Revision + 1
  324. }
  325. // inconsistentStatus returns true if the ObservedGeneration of status is greater than set's
  326. // Generation or if any of the status's fields do not match those of set's status.
  327. func inconsistentStatus(set *apps.StatefulSet, status *apps.StatefulSetStatus) bool {
  328. return status.ObservedGeneration > set.Status.ObservedGeneration ||
  329. status.Replicas != set.Status.Replicas ||
  330. status.CurrentReplicas != set.Status.CurrentReplicas ||
  331. status.ReadyReplicas != set.Status.ReadyReplicas ||
  332. status.UpdatedReplicas != set.Status.UpdatedReplicas ||
  333. status.CurrentRevision != set.Status.CurrentRevision ||
  334. status.UpdateRevision != set.Status.UpdateRevision
  335. }
  336. // completeRollingUpdate completes a rolling update when all of set's replica Pods have been updated
  337. // to the updateRevision. status's currentRevision is set to updateRevision and its' updateRevision
  338. // is set to the empty string. status's currentReplicas is set to updateReplicas and its updateReplicas
  339. // are set to 0.
  340. func completeRollingUpdate(set *apps.StatefulSet, status *apps.StatefulSetStatus) {
  341. if set.Spec.UpdateStrategy.Type == apps.RollingUpdateStatefulSetStrategyType &&
  342. status.UpdatedReplicas == status.Replicas &&
  343. status.ReadyReplicas == status.Replicas {
  344. status.CurrentReplicas = status.UpdatedReplicas
  345. status.CurrentRevision = status.UpdateRevision
  346. }
  347. }
  348. // ascendingOrdinal is a sort.Interface that Sorts a list of Pods based on the ordinals extracted
  349. // from the Pod. Pod's that have not been constructed by StatefulSet's have an ordinal of -1, and are therefore pushed
  350. // to the front of the list.
  351. type ascendingOrdinal []*v1.Pod
  352. func (ao ascendingOrdinal) Len() int {
  353. return len(ao)
  354. }
  355. func (ao ascendingOrdinal) Swap(i, j int) {
  356. ao[i], ao[j] = ao[j], ao[i]
  357. }
  358. func (ao ascendingOrdinal) Less(i, j int) bool {
  359. return getOrdinal(ao[i]) < getOrdinal(ao[j])
  360. }