stateful_set_utils.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package statefulset
  14. import (
  15. "bytes"
  16. "encoding/json"
  17. "fmt"
  18. "regexp"
  19. "strconv"
  20. apps "k8s.io/api/apps/v1"
  21. "k8s.io/api/core/v1"
  22. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  23. "k8s.io/apimachinery/pkg/runtime"
  24. "k8s.io/apimachinery/pkg/util/strategicpatch"
  25. "k8s.io/client-go/kubernetes/scheme"
  26. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  27. "k8s.io/kubernetes/pkg/controller"
  28. "k8s.io/kubernetes/pkg/controller/history"
  29. )
  30. var patchCodec = scheme.Codecs.LegacyCodec(apps.SchemeGroupVersion)
  31. // overlappingStatefulSets sorts a list of StatefulSets by creation timestamp, using their names as a tie breaker.
  32. // Generally used to tie break between StatefulSets that have overlapping selectors.
  33. type overlappingStatefulSets []*apps.StatefulSet
  34. func (o overlappingStatefulSets) Len() int { return len(o) }
  35. func (o overlappingStatefulSets) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  36. func (o overlappingStatefulSets) Less(i, j int) bool {
  37. if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
  38. return o[i].Name < o[j].Name
  39. }
  40. return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
  41. }
  42. // statefulPodRegex is a regular expression that extracts the parent StatefulSet and ordinal from the Name of a Pod
  43. var statefulPodRegex = regexp.MustCompile("(.*)-([0-9]+)$")
  44. // getParentNameAndOrdinal gets the name of pod's parent StatefulSet and pod's ordinal as extracted from its Name. If
  45. // the Pod was not created by a StatefulSet, its parent is considered to be empty string, and its ordinal is considered
  46. // to be -1.
  47. func getParentNameAndOrdinal(pod *v1.Pod) (string, int) {
  48. parent := ""
  49. ordinal := -1
  50. subMatches := statefulPodRegex.FindStringSubmatch(pod.Name)
  51. if len(subMatches) < 3 {
  52. return parent, ordinal
  53. }
  54. parent = subMatches[1]
  55. if i, err := strconv.ParseInt(subMatches[2], 10, 32); err == nil {
  56. ordinal = int(i)
  57. }
  58. return parent, ordinal
  59. }
  60. // getParentName gets the name of pod's parent StatefulSet. If pod has not parent, the empty string is returned.
  61. func getParentName(pod *v1.Pod) string {
  62. parent, _ := getParentNameAndOrdinal(pod)
  63. return parent
  64. }
  65. // getOrdinal gets pod's ordinal. If pod has no ordinal, -1 is returned.
  66. func getOrdinal(pod *v1.Pod) int {
  67. _, ordinal := getParentNameAndOrdinal(pod)
  68. return ordinal
  69. }
  70. // getPodName gets the name of set's child Pod with an ordinal index of ordinal
  71. func getPodName(set *apps.StatefulSet, ordinal int) string {
  72. return fmt.Sprintf("%s-%d", set.Name, ordinal)
  73. }
  74. // getPersistentVolumeClaimName gets the name of PersistentVolumeClaim for a Pod with an ordinal index of ordinal. claim
  75. // must be a PersistentVolumeClaim from set's VolumeClaims template.
  76. func getPersistentVolumeClaimName(set *apps.StatefulSet, claim *v1.PersistentVolumeClaim, ordinal int) string {
  77. // NOTE: This name format is used by the heuristics for zone spreading in ChooseZoneForVolume
  78. return fmt.Sprintf("%s-%s-%d", claim.Name, set.Name, ordinal)
  79. }
  80. // isMemberOf tests if pod is a member of set.
  81. func isMemberOf(set *apps.StatefulSet, pod *v1.Pod) bool {
  82. return getParentName(pod) == set.Name
  83. }
  84. // identityMatches returns true if pod has a valid identity and network identity for a member of set.
  85. func identityMatches(set *apps.StatefulSet, pod *v1.Pod) bool {
  86. parent, ordinal := getParentNameAndOrdinal(pod)
  87. return ordinal >= 0 &&
  88. set.Name == parent &&
  89. pod.Name == getPodName(set, ordinal) &&
  90. pod.Namespace == set.Namespace &&
  91. pod.Labels[apps.StatefulSetPodNameLabel] == pod.Name
  92. }
  93. // storageMatches returns true if pod's Volumes cover the set of PersistentVolumeClaims
  94. func storageMatches(set *apps.StatefulSet, pod *v1.Pod) bool {
  95. ordinal := getOrdinal(pod)
  96. if ordinal < 0 {
  97. return false
  98. }
  99. volumes := make(map[string]v1.Volume, len(pod.Spec.Volumes))
  100. for _, volume := range pod.Spec.Volumes {
  101. volumes[volume.Name] = volume
  102. }
  103. for _, claim := range set.Spec.VolumeClaimTemplates {
  104. volume, found := volumes[claim.Name]
  105. if !found ||
  106. volume.VolumeSource.PersistentVolumeClaim == nil ||
  107. volume.VolumeSource.PersistentVolumeClaim.ClaimName !=
  108. getPersistentVolumeClaimName(set, &claim, ordinal) {
  109. return false
  110. }
  111. }
  112. return true
  113. }
  114. // getPersistentVolumeClaims gets a map of PersistentVolumeClaims to their template names, as defined in set. The
  115. // returned PersistentVolumeClaims are each constructed with a the name specific to the Pod. This name is determined
  116. // by getPersistentVolumeClaimName.
  117. func getPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) map[string]v1.PersistentVolumeClaim {
  118. ordinal := getOrdinal(pod)
  119. templates := set.Spec.VolumeClaimTemplates
  120. claims := make(map[string]v1.PersistentVolumeClaim, len(templates))
  121. for i := range templates {
  122. claim := templates[i]
  123. claim.Name = getPersistentVolumeClaimName(set, &claim, ordinal)
  124. claim.Namespace = set.Namespace
  125. if claim.Labels != nil {
  126. for key, value := range set.Spec.Selector.MatchLabels {
  127. claim.Labels[key] = value
  128. }
  129. } else {
  130. claim.Labels = set.Spec.Selector.MatchLabels
  131. }
  132. claims[templates[i].Name] = claim
  133. }
  134. return claims
  135. }
  136. // updateStorage updates pod's Volumes to conform with the PersistentVolumeClaim of set's templates. If pod has
  137. // conflicting local Volumes these are replaced with Volumes that conform to the set's templates.
  138. func updateStorage(set *apps.StatefulSet, pod *v1.Pod) {
  139. currentVolumes := pod.Spec.Volumes
  140. claims := getPersistentVolumeClaims(set, pod)
  141. newVolumes := make([]v1.Volume, 0, len(claims))
  142. for name, claim := range claims {
  143. newVolumes = append(newVolumes, v1.Volume{
  144. Name: name,
  145. VolumeSource: v1.VolumeSource{
  146. PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
  147. ClaimName: claim.Name,
  148. // TODO: Use source definition to set this value when we have one.
  149. ReadOnly: false,
  150. },
  151. },
  152. })
  153. }
  154. for i := range currentVolumes {
  155. if _, ok := claims[currentVolumes[i].Name]; !ok {
  156. newVolumes = append(newVolumes, currentVolumes[i])
  157. }
  158. }
  159. pod.Spec.Volumes = newVolumes
  160. }
  161. func initIdentity(set *apps.StatefulSet, pod *v1.Pod) {
  162. updateIdentity(set, pod)
  163. // Set these immutable fields only on initial Pod creation, not updates.
  164. pod.Spec.Hostname = pod.Name
  165. pod.Spec.Subdomain = set.Spec.ServiceName
  166. }
  167. // updateIdentity updates pod's name, hostname, and subdomain, and StatefulSetPodNameLabel to conform to set's name
  168. // and headless service.
  169. func updateIdentity(set *apps.StatefulSet, pod *v1.Pod) {
  170. pod.Name = getPodName(set, getOrdinal(pod))
  171. pod.Namespace = set.Namespace
  172. if pod.Labels == nil {
  173. pod.Labels = make(map[string]string)
  174. }
  175. pod.Labels[apps.StatefulSetPodNameLabel] = pod.Name
  176. }
  177. // isRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady.
  178. func isRunningAndReady(pod *v1.Pod) bool {
  179. return pod.Status.Phase == v1.PodRunning && podutil.IsPodReady(pod)
  180. }
  181. // isCreated returns true if pod has been created and is maintained by the API server
  182. func isCreated(pod *v1.Pod) bool {
  183. return pod.Status.Phase != ""
  184. }
  185. // isFailed returns true if pod has a Phase of PodFailed
  186. func isFailed(pod *v1.Pod) bool {
  187. return pod.Status.Phase == v1.PodFailed
  188. }
  189. // isTerminating returns true if pod's DeletionTimestamp has been set
  190. func isTerminating(pod *v1.Pod) bool {
  191. return pod.DeletionTimestamp != nil
  192. }
  193. // isHealthy returns true if pod is running and ready and has not been terminated
  194. func isHealthy(pod *v1.Pod) bool {
  195. return isRunningAndReady(pod) && !isTerminating(pod)
  196. }
  197. // allowsBurst is true if the alpha burst annotation is set.
  198. func allowsBurst(set *apps.StatefulSet) bool {
  199. return set.Spec.PodManagementPolicy == apps.ParallelPodManagement
  200. }
  201. // setPodRevision sets the revision of Pod to revision by adding the StatefulSetRevisionLabel
  202. func setPodRevision(pod *v1.Pod, revision string) {
  203. if pod.Labels == nil {
  204. pod.Labels = make(map[string]string)
  205. }
  206. pod.Labels[apps.StatefulSetRevisionLabel] = revision
  207. }
  208. // getPodRevision gets the revision of Pod by inspecting the StatefulSetRevisionLabel. If pod has no revision the empty
  209. // string is returned.
  210. func getPodRevision(pod *v1.Pod) string {
  211. if pod.Labels == nil {
  212. return ""
  213. }
  214. return pod.Labels[apps.StatefulSetRevisionLabel]
  215. }
  216. // newStatefulSetPod returns a new Pod conforming to the set's Spec with an identity generated from ordinal.
  217. func newStatefulSetPod(set *apps.StatefulSet, ordinal int) *v1.Pod {
  218. pod, _ := controller.GetPodFromTemplate(&set.Spec.Template, set, metav1.NewControllerRef(set, controllerKind))
  219. pod.Name = getPodName(set, ordinal)
  220. initIdentity(set, pod)
  221. updateStorage(set, pod)
  222. return pod
  223. }
  224. // newVersionedStatefulSetPod creates a new Pod for a StatefulSet. currentSet is the representation of the set at the
  225. // current revision. updateSet is the representation of the set at the updateRevision. currentRevision is the name of
  226. // the current revision. updateRevision is the name of the update revision. ordinal is the ordinal of the Pod. If the
  227. // returned error is nil, the returned Pod is valid.
  228. func newVersionedStatefulSetPod(currentSet, updateSet *apps.StatefulSet, currentRevision, updateRevision string, ordinal int) *v1.Pod {
  229. if currentSet.Spec.UpdateStrategy.Type == apps.RollingUpdateStatefulSetStrategyType &&
  230. (currentSet.Spec.UpdateStrategy.RollingUpdate == nil && ordinal < int(currentSet.Status.CurrentReplicas)) ||
  231. (currentSet.Spec.UpdateStrategy.RollingUpdate != nil && ordinal < int(*currentSet.Spec.UpdateStrategy.RollingUpdate.Partition)) {
  232. pod := newStatefulSetPod(currentSet, ordinal)
  233. setPodRevision(pod, currentRevision)
  234. return pod
  235. }
  236. pod := newStatefulSetPod(updateSet, ordinal)
  237. setPodRevision(pod, updateRevision)
  238. return pod
  239. }
  240. // Match check if the given StatefulSet's template matches the template stored in the given history.
  241. func Match(ss *apps.StatefulSet, history *apps.ControllerRevision) (bool, error) {
  242. patch, err := getPatch(ss)
  243. if err != nil {
  244. return false, err
  245. }
  246. return bytes.Equal(patch, history.Data.Raw), nil
  247. }
  248. // getPatch returns a strategic merge patch that can be applied to restore a StatefulSet to a
  249. // previous version. If the returned error is nil the patch is valid. The current state that we save is just the
  250. // PodSpecTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously
  251. // recorded patches.
  252. func getPatch(set *apps.StatefulSet) ([]byte, error) {
  253. str, err := runtime.Encode(patchCodec, set)
  254. if err != nil {
  255. return nil, err
  256. }
  257. var raw map[string]interface{}
  258. json.Unmarshal([]byte(str), &raw)
  259. objCopy := make(map[string]interface{})
  260. specCopy := make(map[string]interface{})
  261. spec := raw["spec"].(map[string]interface{})
  262. template := spec["template"].(map[string]interface{})
  263. specCopy["template"] = template
  264. template["$patch"] = "replace"
  265. objCopy["spec"] = specCopy
  266. patch, err := json.Marshal(objCopy)
  267. return patch, err
  268. }
  269. // newRevision creates a new ControllerRevision containing a patch that reapplies the target state of set.
  270. // The Revision of the returned ControllerRevision is set to revision. If the returned error is nil, the returned
  271. // ControllerRevision is valid. StatefulSet revisions are stored as patches that re-apply the current state of set
  272. // to a new StatefulSet using a strategic merge patch to replace the saved state of the new StatefulSet.
  273. func newRevision(set *apps.StatefulSet, revision int64, collisionCount *int32) (*apps.ControllerRevision, error) {
  274. patch, err := getPatch(set)
  275. if err != nil {
  276. return nil, err
  277. }
  278. cr, err := history.NewControllerRevision(set,
  279. controllerKind,
  280. set.Spec.Template.Labels,
  281. runtime.RawExtension{Raw: patch},
  282. revision,
  283. collisionCount)
  284. if err != nil {
  285. return nil, err
  286. }
  287. if cr.ObjectMeta.Annotations == nil {
  288. cr.ObjectMeta.Annotations = make(map[string]string)
  289. }
  290. for key, value := range set.Annotations {
  291. cr.ObjectMeta.Annotations[key] = value
  292. }
  293. return cr, nil
  294. }
  295. // ApplyRevision returns a new StatefulSet constructed by restoring the state in revision to set. If the returned error
  296. // is nil, the returned StatefulSet is valid.
  297. func ApplyRevision(set *apps.StatefulSet, revision *apps.ControllerRevision) (*apps.StatefulSet, error) {
  298. clone := set.DeepCopy()
  299. patched, err := strategicpatch.StrategicMergePatch([]byte(runtime.EncodeOrDie(patchCodec, clone)), revision.Data.Raw, clone)
  300. if err != nil {
  301. return nil, err
  302. }
  303. restoredSet := &apps.StatefulSet{}
  304. err = json.Unmarshal(patched, restoredSet)
  305. if err != nil {
  306. return nil, err
  307. }
  308. return restoredSet, nil
  309. }
  310. // nextRevision finds the next valid revision number based on revisions. If the length of revisions
  311. // is 0 this is 1. Otherwise, it is 1 greater than the largest revision's Revision. This method
  312. // assumes that revisions has been sorted by Revision.
  313. func nextRevision(revisions []*apps.ControllerRevision) int64 {
  314. count := len(revisions)
  315. if count <= 0 {
  316. return 1
  317. }
  318. return revisions[count-1].Revision + 1
  319. }
  320. // inconsistentStatus returns true if the ObservedGeneration of status is greater than set's
  321. // Generation or if any of the status's fields do not match those of set's status.
  322. func inconsistentStatus(set *apps.StatefulSet, status *apps.StatefulSetStatus) bool {
  323. return status.ObservedGeneration > set.Status.ObservedGeneration ||
  324. status.Replicas != set.Status.Replicas ||
  325. status.CurrentReplicas != set.Status.CurrentReplicas ||
  326. status.ReadyReplicas != set.Status.ReadyReplicas ||
  327. status.UpdatedReplicas != set.Status.UpdatedReplicas ||
  328. status.CurrentRevision != set.Status.CurrentRevision ||
  329. status.UpdateRevision != set.Status.UpdateRevision
  330. }
  331. // completeRollingUpdate completes a rolling update when all of set's replica Pods have been updated
  332. // to the updateRevision. status's currentRevision is set to updateRevision and its' updateRevision
  333. // is set to the empty string. status's currentReplicas is set to updateReplicas and its updateReplicas
  334. // are set to 0.
  335. func completeRollingUpdate(set *apps.StatefulSet, status *apps.StatefulSetStatus) {
  336. if set.Spec.UpdateStrategy.Type == apps.RollingUpdateStatefulSetStrategyType &&
  337. status.UpdatedReplicas == status.Replicas &&
  338. status.ReadyReplicas == status.Replicas {
  339. status.CurrentReplicas = status.UpdatedReplicas
  340. status.CurrentRevision = status.UpdateRevision
  341. }
  342. }
  343. // ascendingOrdinal is a sort.Interface that Sorts a list of Pods based on the ordinals extracted
  344. // from the Pod. Pod's that have not been constructed by StatefulSet's have an ordinal of -1, and are therefore pushed
  345. // to the front of the list.
  346. type ascendingOrdinal []*v1.Pod
  347. func (ao ascendingOrdinal) Len() int {
  348. return len(ao)
  349. }
  350. func (ao ascendingOrdinal) Swap(i, j int) {
  351. ao[i], ao[j] = ao[j], ao[i]
  352. }
  353. func (ao ascendingOrdinal) Less(i, j int) bool {
  354. return getOrdinal(ao[i]) < getOrdinal(ao[j])
  355. }