disruption.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package apps
  14. import (
  15. "context"
  16. "fmt"
  17. "time"
  18. "github.com/onsi/ginkgo"
  19. "github.com/onsi/gomega"
  20. appsv1 "k8s.io/api/apps/v1"
  21. v1 "k8s.io/api/core/v1"
  22. policyv1beta1 "k8s.io/api/policy/v1beta1"
  23. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  24. "k8s.io/apimachinery/pkg/util/intstr"
  25. "k8s.io/apimachinery/pkg/util/wait"
  26. "k8s.io/client-go/kubernetes"
  27. "k8s.io/client-go/util/retry"
  28. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  29. "k8s.io/kubernetes/test/e2e/framework"
  30. e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
  31. imageutils "k8s.io/kubernetes/test/utils/image"
  32. )
  33. // schedulingTimeout is longer specifically because sometimes we need to wait
  34. // awhile to guarantee that we've been patient waiting for something ordinary
  35. // to happen: a pod to get scheduled and move into Ready
  36. const (
  37. bigClusterSize = 7
  38. schedulingTimeout = 10 * time.Minute
  39. timeout = 60 * time.Second
  40. )
  41. var _ = SIGDescribe("DisruptionController", func() {
  42. f := framework.NewDefaultFramework("disruption")
  43. var ns string
  44. var cs kubernetes.Interface
  45. ginkgo.BeforeEach(func() {
  46. cs = f.ClientSet
  47. ns = f.Namespace.Name
  48. })
  49. ginkgo.It("should create a PodDisruptionBudget", func() {
  50. createPDBMinAvailableOrDie(cs, ns, intstr.FromString("1%"))
  51. })
  52. ginkgo.It("should update PodDisruptionBudget status", func() {
  53. createPDBMinAvailableOrDie(cs, ns, intstr.FromInt(2))
  54. createPodsOrDie(cs, ns, 3)
  55. waitForPodsOrDie(cs, ns, 3)
  56. // Since disruptionAllowed starts out 0, if we see it ever become positive,
  57. // that means the controller is working.
  58. err := wait.PollImmediate(framework.Poll, timeout, func() (bool, error) {
  59. pdb, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Get(context.TODO(), "foo", metav1.GetOptions{})
  60. if err != nil {
  61. return false, err
  62. }
  63. return pdb.Status.DisruptionsAllowed > 0, nil
  64. })
  65. framework.ExpectNoError(err)
  66. })
  67. evictionCases := []struct {
  68. description string
  69. minAvailable intstr.IntOrString
  70. maxUnavailable intstr.IntOrString
  71. podCount int
  72. replicaSetSize int32
  73. shouldDeny bool
  74. exclusive bool
  75. skipForBigClusters bool
  76. }{
  77. {
  78. description: "no PDB",
  79. minAvailable: intstr.FromString(""),
  80. maxUnavailable: intstr.FromString(""),
  81. podCount: 1,
  82. shouldDeny: false,
  83. }, {
  84. description: "too few pods, absolute",
  85. minAvailable: intstr.FromInt(2),
  86. maxUnavailable: intstr.FromString(""),
  87. podCount: 2,
  88. shouldDeny: true,
  89. }, {
  90. description: "enough pods, absolute",
  91. minAvailable: intstr.FromInt(2),
  92. maxUnavailable: intstr.FromString(""),
  93. podCount: 3,
  94. shouldDeny: false,
  95. }, {
  96. description: "enough pods, replicaSet, percentage",
  97. minAvailable: intstr.FromString("90%"),
  98. maxUnavailable: intstr.FromString(""),
  99. replicaSetSize: 10,
  100. exclusive: false,
  101. shouldDeny: false,
  102. }, {
  103. description: "too few pods, replicaSet, percentage",
  104. minAvailable: intstr.FromString("90%"),
  105. maxUnavailable: intstr.FromString(""),
  106. replicaSetSize: 10,
  107. exclusive: true,
  108. shouldDeny: true,
  109. // This tests assumes that there is less than replicaSetSize nodes in the cluster.
  110. skipForBigClusters: true,
  111. },
  112. {
  113. description: "maxUnavailable allow single eviction, percentage",
  114. minAvailable: intstr.FromString(""),
  115. maxUnavailable: intstr.FromString("10%"),
  116. replicaSetSize: 10,
  117. exclusive: false,
  118. shouldDeny: false,
  119. },
  120. {
  121. description: "maxUnavailable deny evictions, integer",
  122. minAvailable: intstr.FromString(""),
  123. maxUnavailable: intstr.FromInt(1),
  124. replicaSetSize: 10,
  125. exclusive: true,
  126. shouldDeny: true,
  127. // This tests assumes that there is less than replicaSetSize nodes in the cluster.
  128. skipForBigClusters: true,
  129. },
  130. }
  131. for i := range evictionCases {
  132. c := evictionCases[i]
  133. expectation := "should allow an eviction"
  134. if c.shouldDeny {
  135. expectation = "should not allow an eviction"
  136. }
  137. ginkgo.It(fmt.Sprintf("evictions: %s => %s", c.description, expectation), func() {
  138. if c.skipForBigClusters {
  139. e2eskipper.SkipUnlessNodeCountIsAtMost(bigClusterSize - 1)
  140. }
  141. createPodsOrDie(cs, ns, c.podCount)
  142. if c.replicaSetSize > 0 {
  143. createReplicaSetOrDie(cs, ns, c.replicaSetSize, c.exclusive)
  144. }
  145. if c.minAvailable.String() != "" {
  146. createPDBMinAvailableOrDie(cs, ns, c.minAvailable)
  147. }
  148. if c.maxUnavailable.String() != "" {
  149. createPDBMaxUnavailableOrDie(cs, ns, c.maxUnavailable)
  150. }
  151. // Locate a running pod.
  152. pod, err := locateRunningPod(cs, ns)
  153. framework.ExpectNoError(err)
  154. e := &policyv1beta1.Eviction{
  155. ObjectMeta: metav1.ObjectMeta{
  156. Name: pod.Name,
  157. Namespace: ns,
  158. },
  159. }
  160. if c.shouldDeny {
  161. err = cs.CoreV1().Pods(ns).Evict(e)
  162. gomega.Expect(err).Should(gomega.MatchError("Cannot evict pod as it would violate the pod's disruption budget."))
  163. } else {
  164. // Only wait for running pods in the "allow" case
  165. // because one of shouldDeny cases relies on the
  166. // replicaSet not fitting on the cluster.
  167. waitForPodsOrDie(cs, ns, c.podCount+int(c.replicaSetSize))
  168. // Since disruptionAllowed starts out false, if an eviction is ever allowed,
  169. // that means the controller is working.
  170. err = wait.PollImmediate(framework.Poll, timeout, func() (bool, error) {
  171. err = cs.CoreV1().Pods(ns).Evict(e)
  172. if err != nil {
  173. return false, nil
  174. }
  175. return true, nil
  176. })
  177. framework.ExpectNoError(err)
  178. }
  179. })
  180. }
  181. ginkgo.It("should block an eviction until the PDB is updated to allow it", func() {
  182. ginkgo.By("Creating a pdb that targets all three pods in a test replica set")
  183. createPDBMinAvailableOrDie(cs, ns, intstr.FromInt(3))
  184. createReplicaSetOrDie(cs, ns, 3, false)
  185. ginkgo.By("First trying to evict a pod which shouldn't be evictable")
  186. pod, err := locateRunningPod(cs, ns)
  187. framework.ExpectNoError(err)
  188. waitForPodsOrDie(cs, ns, 3) // make sure that they are running and so would be evictable with a different pdb
  189. e := &policyv1beta1.Eviction{
  190. ObjectMeta: metav1.ObjectMeta{
  191. Name: pod.Name,
  192. Namespace: ns,
  193. },
  194. }
  195. err = cs.CoreV1().Pods(ns).Evict(e)
  196. gomega.Expect(err).Should(gomega.MatchError("Cannot evict pod as it would violate the pod's disruption budget."))
  197. ginkgo.By("Updating the pdb to allow a pod to be evicted")
  198. updatePDBMinAvailableOrDie(cs, ns, intstr.FromInt(2))
  199. ginkgo.By("Trying to evict the same pod we tried earlier which should now be evictable")
  200. waitForPodsOrDie(cs, ns, 3)
  201. waitForPdbToObserveHealthyPods(cs, ns, 3)
  202. err = cs.CoreV1().Pods(ns).Evict(e)
  203. framework.ExpectNoError(err) // the eviction is now allowed
  204. })
  205. })
  206. func createPDBMinAvailableOrDie(cs kubernetes.Interface, ns string, minAvailable intstr.IntOrString) {
  207. pdb := policyv1beta1.PodDisruptionBudget{
  208. ObjectMeta: metav1.ObjectMeta{
  209. Name: "foo",
  210. Namespace: ns,
  211. },
  212. Spec: policyv1beta1.PodDisruptionBudgetSpec{
  213. Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}},
  214. MinAvailable: &minAvailable,
  215. },
  216. }
  217. _, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Create(context.TODO(), &pdb, metav1.CreateOptions{})
  218. framework.ExpectNoError(err, "Waiting for the pdb to be created with minAvailable %d in namespace %s", minAvailable.IntVal, ns)
  219. waitForPdbToBeProcessed(cs, ns)
  220. }
  221. func createPDBMaxUnavailableOrDie(cs kubernetes.Interface, ns string, maxUnavailable intstr.IntOrString) {
  222. pdb := policyv1beta1.PodDisruptionBudget{
  223. ObjectMeta: metav1.ObjectMeta{
  224. Name: "foo",
  225. Namespace: ns,
  226. },
  227. Spec: policyv1beta1.PodDisruptionBudgetSpec{
  228. Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}},
  229. MaxUnavailable: &maxUnavailable,
  230. },
  231. }
  232. _, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Create(context.TODO(), &pdb, metav1.CreateOptions{})
  233. framework.ExpectNoError(err, "Waiting for the pdb to be created with maxUnavailable %d in namespace %s", maxUnavailable.IntVal, ns)
  234. waitForPdbToBeProcessed(cs, ns)
  235. }
  236. func updatePDBMinAvailableOrDie(cs kubernetes.Interface, ns string, minAvailable intstr.IntOrString) {
  237. err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
  238. old, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Get(context.TODO(), "foo", metav1.GetOptions{})
  239. if err != nil {
  240. return err
  241. }
  242. old.Spec.MinAvailable = &minAvailable
  243. if _, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Update(context.TODO(), old, metav1.UpdateOptions{}); err != nil {
  244. return err
  245. }
  246. return nil
  247. })
  248. framework.ExpectNoError(err, "Waiting for the pdb update to be processed in namespace %s", ns)
  249. waitForPdbToBeProcessed(cs, ns)
  250. }
  251. func createPodsOrDie(cs kubernetes.Interface, ns string, n int) {
  252. for i := 0; i < n; i++ {
  253. pod := &v1.Pod{
  254. ObjectMeta: metav1.ObjectMeta{
  255. Name: fmt.Sprintf("pod-%d", i),
  256. Namespace: ns,
  257. Labels: map[string]string{"foo": "bar"},
  258. },
  259. Spec: v1.PodSpec{
  260. Containers: []v1.Container{
  261. {
  262. Name: "busybox",
  263. Image: imageutils.GetE2EImage(imageutils.EchoServer),
  264. },
  265. },
  266. RestartPolicy: v1.RestartPolicyAlways,
  267. },
  268. }
  269. _, err := cs.CoreV1().Pods(ns).Create(context.TODO(), pod, metav1.CreateOptions{})
  270. framework.ExpectNoError(err, "Creating pod %q in namespace %q", pod.Name, ns)
  271. }
  272. }
  273. func waitForPodsOrDie(cs kubernetes.Interface, ns string, n int) {
  274. ginkgo.By("Waiting for all pods to be running")
  275. err := wait.PollImmediate(framework.Poll, schedulingTimeout, func() (bool, error) {
  276. pods, err := cs.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{LabelSelector: "foo=bar"})
  277. if err != nil {
  278. return false, err
  279. }
  280. if pods == nil {
  281. return false, fmt.Errorf("pods is nil")
  282. }
  283. if len(pods.Items) < n {
  284. framework.Logf("pods: %v < %v", len(pods.Items), n)
  285. return false, nil
  286. }
  287. ready := 0
  288. for i := range pods.Items {
  289. pod := pods.Items[i]
  290. if podutil.IsPodReady(&pod) {
  291. ready++
  292. }
  293. }
  294. if ready < n {
  295. framework.Logf("running pods: %v < %v", ready, n)
  296. return false, nil
  297. }
  298. return true, nil
  299. })
  300. framework.ExpectNoError(err, "Waiting for pods in namespace %q to be ready", ns)
  301. }
  302. func createReplicaSetOrDie(cs kubernetes.Interface, ns string, size int32, exclusive bool) {
  303. container := v1.Container{
  304. Name: "busybox",
  305. Image: imageutils.GetE2EImage(imageutils.EchoServer),
  306. }
  307. if exclusive {
  308. container.Ports = []v1.ContainerPort{
  309. {HostPort: 5555, ContainerPort: 5555},
  310. }
  311. }
  312. rs := &appsv1.ReplicaSet{
  313. ObjectMeta: metav1.ObjectMeta{
  314. Name: "rs",
  315. Namespace: ns,
  316. },
  317. Spec: appsv1.ReplicaSetSpec{
  318. Replicas: &size,
  319. Selector: &metav1.LabelSelector{
  320. MatchLabels: map[string]string{"foo": "bar"},
  321. },
  322. Template: v1.PodTemplateSpec{
  323. ObjectMeta: metav1.ObjectMeta{
  324. Labels: map[string]string{"foo": "bar"},
  325. },
  326. Spec: v1.PodSpec{
  327. Containers: []v1.Container{container},
  328. },
  329. },
  330. },
  331. }
  332. _, err := cs.AppsV1().ReplicaSets(ns).Create(context.TODO(), rs, metav1.CreateOptions{})
  333. framework.ExpectNoError(err, "Creating replica set %q in namespace %q", rs.Name, ns)
  334. }
  335. func locateRunningPod(cs kubernetes.Interface, ns string) (pod *v1.Pod, err error) {
  336. ginkgo.By("locating a running pod")
  337. err = wait.PollImmediate(framework.Poll, schedulingTimeout, func() (bool, error) {
  338. podList, err := cs.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
  339. if err != nil {
  340. return false, err
  341. }
  342. for i := range podList.Items {
  343. p := podList.Items[i]
  344. if podutil.IsPodReady(&p) {
  345. pod = &p
  346. return true, nil
  347. }
  348. }
  349. return false, nil
  350. })
  351. return pod, err
  352. }
  353. func waitForPdbToBeProcessed(cs kubernetes.Interface, ns string) {
  354. ginkgo.By("Waiting for the pdb to be processed")
  355. err := wait.PollImmediate(framework.Poll, schedulingTimeout, func() (bool, error) {
  356. pdb, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Get(context.TODO(), "foo", metav1.GetOptions{})
  357. if err != nil {
  358. return false, err
  359. }
  360. if pdb.Status.ObservedGeneration < pdb.Generation {
  361. return false, nil
  362. }
  363. return true, nil
  364. })
  365. framework.ExpectNoError(err, "Waiting for the pdb to be processed in namespace %s", ns)
  366. }
  367. func waitForPdbToObserveHealthyPods(cs kubernetes.Interface, ns string, healthyCount int32) {
  368. ginkgo.By("Waiting for the pdb to observed all healthy pods")
  369. err := wait.PollImmediate(framework.Poll, wait.ForeverTestTimeout, func() (bool, error) {
  370. pdb, err := cs.PolicyV1beta1().PodDisruptionBudgets(ns).Get(context.TODO(), "foo", metav1.GetOptions{})
  371. if err != nil {
  372. return false, err
  373. }
  374. if pdb.Status.CurrentHealthy != healthyCount {
  375. return false, nil
  376. }
  377. return true, nil
  378. })
  379. framework.ExpectNoError(err, "Waiting for the pdb in namespace %s to observed %d healthy pods", ns, healthyCount)
  380. }