rolling.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package deployment
  14. import (
  15. "fmt"
  16. "sort"
  17. apps "k8s.io/api/apps/v1"
  18. "k8s.io/klog"
  19. "k8s.io/kubernetes/pkg/controller"
  20. deploymentutil "k8s.io/kubernetes/pkg/controller/deployment/util"
  21. "k8s.io/utils/integer"
  22. )
  23. // rolloutRolling implements the logic for rolling a new replica set.
  24. func (dc *DeploymentController) rolloutRolling(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
  25. newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, true)
  26. if err != nil {
  27. return err
  28. }
  29. allRSs := append(oldRSs, newRS)
  30. // Scale up, if we can.
  31. scaledUp, err := dc.reconcileNewReplicaSet(allRSs, newRS, d)
  32. if err != nil {
  33. return err
  34. }
  35. if scaledUp {
  36. // Update DeploymentStatus
  37. return dc.syncRolloutStatus(allRSs, newRS, d)
  38. }
  39. // Scale down, if we can.
  40. scaledDown, err := dc.reconcileOldReplicaSets(allRSs, controller.FilterActiveReplicaSets(oldRSs), newRS, d)
  41. if err != nil {
  42. return err
  43. }
  44. if scaledDown {
  45. // Update DeploymentStatus
  46. return dc.syncRolloutStatus(allRSs, newRS, d)
  47. }
  48. if deploymentutil.DeploymentComplete(d, &d.Status) {
  49. if err := dc.cleanupDeployment(oldRSs, d); err != nil {
  50. return err
  51. }
  52. }
  53. // Sync deployment status
  54. return dc.syncRolloutStatus(allRSs, newRS, d)
  55. }
  56. func (dc *DeploymentController) reconcileNewReplicaSet(allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) {
  57. if *(newRS.Spec.Replicas) == *(deployment.Spec.Replicas) {
  58. // Scaling not required.
  59. return false, nil
  60. }
  61. if *(newRS.Spec.Replicas) > *(deployment.Spec.Replicas) {
  62. // Scale down.
  63. scaled, _, err := dc.scaleReplicaSetAndRecordEvent(newRS, *(deployment.Spec.Replicas), deployment)
  64. return scaled, err
  65. }
  66. newReplicasCount, err := deploymentutil.NewRSNewReplicas(deployment, allRSs, newRS)
  67. if err != nil {
  68. return false, err
  69. }
  70. scaled, _, err := dc.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment)
  71. return scaled, err
  72. }
  73. func (dc *DeploymentController) reconcileOldReplicaSets(allRSs []*apps.ReplicaSet, oldRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) {
  74. oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs)
  75. if oldPodsCount == 0 {
  76. // Can't scale down further
  77. return false, nil
  78. }
  79. allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
  80. klog.V(4).Infof("New replica set %s/%s has %d available pods.", newRS.Namespace, newRS.Name, newRS.Status.AvailableReplicas)
  81. maxUnavailable := deploymentutil.MaxUnavailable(*deployment)
  82. // Check if we can scale down. We can scale down in the following 2 cases:
  83. // * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further
  84. // increase unavailability.
  85. // * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step.
  86. //
  87. // maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable
  88. // take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from
  89. // the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further
  90. // step(that will increase unavailability).
  91. //
  92. // Concrete example:
  93. //
  94. // * 10 replicas
  95. // * 2 maxUnavailable (absolute number, not percent)
  96. // * 3 maxSurge (absolute number, not percent)
  97. //
  98. // case 1:
  99. // * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5.
  100. // * The new replica set pods crashloop and never become available.
  101. // * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5.
  102. // * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down.
  103. // * The user notices the crashloop and does kubectl rollout undo to rollback.
  104. // * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down.
  105. // * The total number of pods will then be 9 and the newRS can be scaled up to 10.
  106. //
  107. // case 2:
  108. // Same example, but pushing a new pod template instead of rolling back (aka "roll over"):
  109. // * The new replica set created must start with 0 replicas because allPodsCount is already at 13.
  110. // * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then
  111. // allow the new replica set to be scaled up by 5.
  112. minAvailable := *(deployment.Spec.Replicas) - maxUnavailable
  113. newRSUnavailablePodCount := *(newRS.Spec.Replicas) - newRS.Status.AvailableReplicas
  114. maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount
  115. if maxScaledDown <= 0 {
  116. return false, nil
  117. }
  118. // Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment
  119. // and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737
  120. oldRSs, cleanupCount, err := dc.cleanupUnhealthyReplicas(oldRSs, deployment, maxScaledDown)
  121. if err != nil {
  122. return false, nil
  123. }
  124. klog.V(4).Infof("Cleaned up unhealthy replicas from old RSes by %d", cleanupCount)
  125. // Scale down old replica sets, need check maxUnavailable to ensure we can scale down
  126. allRSs = append(oldRSs, newRS)
  127. scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(allRSs, oldRSs, deployment)
  128. if err != nil {
  129. return false, nil
  130. }
  131. klog.V(4).Infof("Scaled down old RSes of deployment %s by %d", deployment.Name, scaledDownCount)
  132. totalScaledDown := cleanupCount + scaledDownCount
  133. return totalScaledDown > 0, nil
  134. }
  135. // cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted.
  136. func (dc *DeploymentController) cleanupUnhealthyReplicas(oldRSs []*apps.ReplicaSet, deployment *apps.Deployment, maxCleanupCount int32) ([]*apps.ReplicaSet, int32, error) {
  137. sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))
  138. // Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order
  139. // such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will
  140. // been deleted first and won't increase unavailability.
  141. totalScaledDown := int32(0)
  142. for i, targetRS := range oldRSs {
  143. if totalScaledDown >= maxCleanupCount {
  144. break
  145. }
  146. if *(targetRS.Spec.Replicas) == 0 {
  147. // cannot scale down this replica set.
  148. continue
  149. }
  150. klog.V(4).Infof("Found %d available pods in old RS %s/%s", targetRS.Status.AvailableReplicas, targetRS.Namespace, targetRS.Name)
  151. if *(targetRS.Spec.Replicas) == targetRS.Status.AvailableReplicas {
  152. // no unhealthy replicas found, no scaling required.
  153. continue
  154. }
  155. scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(*(targetRS.Spec.Replicas)-targetRS.Status.AvailableReplicas)))
  156. newReplicasCount := *(targetRS.Spec.Replicas) - scaledDownCount
  157. if newReplicasCount > *(targetRS.Spec.Replicas) {
  158. return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount)
  159. }
  160. _, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment)
  161. if err != nil {
  162. return nil, totalScaledDown, err
  163. }
  164. totalScaledDown += scaledDownCount
  165. oldRSs[i] = updatedOldRS
  166. }
  167. return oldRSs, totalScaledDown, nil
  168. }
  169. // scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate".
  170. // Need check maxUnavailable to ensure availability
  171. func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(allRSs []*apps.ReplicaSet, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment) (int32, error) {
  172. maxUnavailable := deploymentutil.MaxUnavailable(*deployment)
  173. // Check if we can scale down.
  174. minAvailable := *(deployment.Spec.Replicas) - maxUnavailable
  175. // Find the number of available pods.
  176. availablePodCount := deploymentutil.GetAvailableReplicaCountForReplicaSets(allRSs)
  177. if availablePodCount <= minAvailable {
  178. // Cannot scale down.
  179. return 0, nil
  180. }
  181. klog.V(4).Infof("Found %d available pods in deployment %s, scaling down old RSes", availablePodCount, deployment.Name)
  182. sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))
  183. totalScaledDown := int32(0)
  184. totalScaleDownCount := availablePodCount - minAvailable
  185. for _, targetRS := range oldRSs {
  186. if totalScaledDown >= totalScaleDownCount {
  187. // No further scaling required.
  188. break
  189. }
  190. if *(targetRS.Spec.Replicas) == 0 {
  191. // cannot scale down this ReplicaSet.
  192. continue
  193. }
  194. // Scale down.
  195. scaleDownCount := int32(integer.IntMin(int(*(targetRS.Spec.Replicas)), int(totalScaleDownCount-totalScaledDown)))
  196. newReplicasCount := *(targetRS.Spec.Replicas) - scaleDownCount
  197. if newReplicasCount > *(targetRS.Spec.Replicas) {
  198. return 0, fmt.Errorf("when scaling down old RS, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount)
  199. }
  200. _, _, err := dc.scaleReplicaSetAndRecordEvent(targetRS, newReplicasCount, deployment)
  201. if err != nil {
  202. return totalScaledDown, err
  203. }
  204. totalScaledDown += scaleDownCount
  205. }
  206. return totalScaledDown, nil
  207. }