preemption.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package scheduling
  14. import (
  15. "fmt"
  16. "strings"
  17. "time"
  18. "k8s.io/client-go/tools/cache"
  19. appsv1 "k8s.io/api/apps/v1"
  20. corev1 "k8s.io/api/core/v1"
  21. schedulerapi "k8s.io/api/scheduling/v1"
  22. "k8s.io/apimachinery/pkg/api/errors"
  23. "k8s.io/apimachinery/pkg/api/resource"
  24. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  25. "k8s.io/apimachinery/pkg/runtime"
  26. "k8s.io/apimachinery/pkg/watch"
  27. clientset "k8s.io/client-go/kubernetes"
  28. "k8s.io/kubernetes/pkg/apis/scheduling"
  29. "k8s.io/kubernetes/test/e2e/framework"
  30. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  31. "k8s.io/kubernetes/test/e2e/framework/replicaset"
  32. "github.com/onsi/ginkgo"
  33. "github.com/onsi/gomega"
  34. // ensure libs have a chance to initialize
  35. _ "github.com/stretchr/testify/assert"
  36. )
  37. type priorityPair struct {
  38. name string
  39. value int32
  40. }
  41. var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
  42. var cs clientset.Interface
  43. var nodeList *corev1.NodeList
  44. var ns string
  45. f := framework.NewDefaultFramework("sched-preemption")
  46. lowPriority, mediumPriority, highPriority := int32(1), int32(100), int32(1000)
  47. lowPriorityClassName := f.BaseName + "-low-priority"
  48. mediumPriorityClassName := f.BaseName + "-medium-priority"
  49. highPriorityClassName := f.BaseName + "-high-priority"
  50. priorityPairs := []priorityPair{
  51. {name: lowPriorityClassName, value: lowPriority},
  52. {name: mediumPriorityClassName, value: mediumPriority},
  53. {name: highPriorityClassName, value: highPriority},
  54. }
  55. ginkgo.AfterEach(func() {
  56. for _, pair := range priorityPairs {
  57. cs.SchedulingV1().PriorityClasses().Delete(pair.name, metav1.NewDeleteOptions(0))
  58. }
  59. })
  60. ginkgo.BeforeEach(func() {
  61. cs = f.ClientSet
  62. ns = f.Namespace.Name
  63. nodeList = &corev1.NodeList{}
  64. for _, pair := range priorityPairs {
  65. _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: pair.name}, Value: pair.value})
  66. gomega.Expect(err == nil || errors.IsAlreadyExists(err)).To(gomega.Equal(true))
  67. }
  68. framework.WaitForAllNodesHealthy(cs, time.Minute)
  69. masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(cs)
  70. err := framework.CheckTestingNSDeletedExcept(cs, ns)
  71. framework.ExpectNoError(err)
  72. })
  73. // This test verifies that when a higher priority pod is created and no node with
  74. // enough resources is found, scheduler preempts a lower priority pod to schedule
  75. // the high priority pod.
  76. ginkgo.It("validates basic preemption works", func() {
  77. var podRes corev1.ResourceList
  78. // Create one pod per node that uses a lot of the node's resources.
  79. ginkgo.By("Create pods that use 60% of node resources.")
  80. pods := make([]*corev1.Pod, len(nodeList.Items))
  81. for i, node := range nodeList.Items {
  82. cpuAllocatable, found := node.Status.Allocatable["cpu"]
  83. gomega.Expect(found).To(gomega.Equal(true))
  84. milliCPU := cpuAllocatable.MilliValue() * 40 / 100
  85. memAllocatable, found := node.Status.Allocatable["memory"]
  86. gomega.Expect(found).To(gomega.Equal(true))
  87. memory := memAllocatable.Value() * 60 / 100
  88. podRes = corev1.ResourceList{}
  89. podRes[corev1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
  90. podRes[corev1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
  91. // make the first pod low priority and the rest medium priority.
  92. priorityName := mediumPriorityClassName
  93. if i == 0 {
  94. priorityName = lowPriorityClassName
  95. }
  96. pods[i] = createPausePod(f, pausePodConfig{
  97. Name: fmt.Sprintf("pod%d-%v", i, priorityName),
  98. PriorityClassName: priorityName,
  99. Resources: &corev1.ResourceRequirements{
  100. Requests: podRes,
  101. },
  102. })
  103. e2elog.Logf("Created pod: %v", pods[i].Name)
  104. }
  105. ginkgo.By("Wait for pods to be scheduled.")
  106. for _, pod := range pods {
  107. framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
  108. }
  109. ginkgo.By("Run a high priority pod that use 60% of a node resources.")
  110. // Create a high priority pod and make sure it is scheduled.
  111. runPausePod(f, pausePodConfig{
  112. Name: "preemptor-pod",
  113. PriorityClassName: highPriorityClassName,
  114. Resources: &corev1.ResourceRequirements{
  115. Requests: podRes,
  116. },
  117. })
  118. // Make sure that the lowest priority pod is deleted.
  119. preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
  120. podDeleted := (err != nil && errors.IsNotFound(err)) ||
  121. (err == nil && preemptedPod.DeletionTimestamp != nil)
  122. gomega.Expect(podDeleted).To(gomega.BeTrue())
  123. // Other pods (mid priority ones) should be present.
  124. for i := 1; i < len(pods); i++ {
  125. livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
  126. framework.ExpectNoError(err)
  127. gomega.Expect(livePod.DeletionTimestamp).To(gomega.BeNil())
  128. }
  129. })
  130. // This test verifies that when a critical pod is created and no node with
  131. // enough resources is found, scheduler preempts a lower priority pod to schedule
  132. // this critical pod.
  133. ginkgo.It("validates lower priority pod preemption by critical pod", func() {
  134. var podRes corev1.ResourceList
  135. // Create one pod per node that uses a lot of the node's resources.
  136. ginkgo.By("Create pods that use 60% of node resources.")
  137. pods := make([]*corev1.Pod, len(nodeList.Items))
  138. for i, node := range nodeList.Items {
  139. cpuAllocatable, found := node.Status.Allocatable["cpu"]
  140. gomega.Expect(found).To(gomega.Equal(true))
  141. milliCPU := cpuAllocatable.MilliValue() * 40 / 100
  142. memAllocatable, found := node.Status.Allocatable["memory"]
  143. gomega.Expect(found).To(gomega.Equal(true))
  144. memory := memAllocatable.Value() * 60 / 100
  145. podRes = corev1.ResourceList{}
  146. podRes[corev1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
  147. podRes[corev1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
  148. // make the first pod low priority and the rest medium priority.
  149. priorityName := mediumPriorityClassName
  150. if i == 0 {
  151. priorityName = lowPriorityClassName
  152. }
  153. pods[i] = createPausePod(f, pausePodConfig{
  154. Name: fmt.Sprintf("pod%d-%v", i, priorityName),
  155. PriorityClassName: priorityName,
  156. Resources: &corev1.ResourceRequirements{
  157. Requests: podRes,
  158. },
  159. })
  160. e2elog.Logf("Created pod: %v", pods[i].Name)
  161. }
  162. ginkgo.By("Wait for pods to be scheduled.")
  163. for _, pod := range pods {
  164. framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
  165. }
  166. ginkgo.By("Run a critical pod that use 60% of a node resources.")
  167. // Create a critical pod and make sure it is scheduled.
  168. runPausePod(f, pausePodConfig{
  169. Name: "critical-pod",
  170. Namespace: metav1.NamespaceSystem,
  171. PriorityClassName: scheduling.SystemClusterCritical,
  172. Resources: &corev1.ResourceRequirements{
  173. Requests: podRes,
  174. },
  175. })
  176. // Make sure that the lowest priority pod is deleted.
  177. preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
  178. defer func() {
  179. // Clean-up the critical pod
  180. err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Delete("critical-pod", metav1.NewDeleteOptions(0))
  181. framework.ExpectNoError(err)
  182. }()
  183. podDeleted := (err != nil && errors.IsNotFound(err)) ||
  184. (err == nil && preemptedPod.DeletionTimestamp != nil)
  185. gomega.Expect(podDeleted).To(gomega.BeTrue())
  186. // Other pods (mid priority ones) should be present.
  187. for i := 1; i < len(pods); i++ {
  188. livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
  189. framework.ExpectNoError(err)
  190. gomega.Expect(livePod.DeletionTimestamp).To(gomega.BeNil())
  191. }
  192. })
  193. // This test verifies that when a high priority pod is pending and its
  194. // scheduling violates a medium priority pod anti-affinity, the medium priority
  195. // pod is preempted to allow the higher priority pod schedule.
  196. // It also verifies that existing low priority pods are not preempted as their
  197. // preemption wouldn't help.
  198. ginkgo.It("validates pod anti-affinity works in preemption", func() {
  199. var podRes corev1.ResourceList
  200. // Create a few pods that uses a small amount of resources.
  201. ginkgo.By("Create pods that use 10% of node resources.")
  202. numPods := 4
  203. if len(nodeList.Items) < numPods {
  204. numPods = len(nodeList.Items)
  205. }
  206. pods := make([]*corev1.Pod, numPods)
  207. for i := 0; i < numPods; i++ {
  208. node := nodeList.Items[i]
  209. cpuAllocatable, found := node.Status.Allocatable["cpu"]
  210. gomega.Expect(found).To(gomega.BeTrue())
  211. milliCPU := cpuAllocatable.MilliValue() * 10 / 100
  212. memAllocatable, found := node.Status.Allocatable["memory"]
  213. gomega.Expect(found).To(gomega.BeTrue())
  214. memory := memAllocatable.Value() * 10 / 100
  215. podRes = corev1.ResourceList{}
  216. podRes[corev1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
  217. podRes[corev1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
  218. // Apply node label to each node
  219. framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
  220. framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
  221. // make the first pod medium priority and the rest low priority.
  222. priorityName := lowPriorityClassName
  223. if i == 0 {
  224. priorityName = mediumPriorityClassName
  225. }
  226. pods[i] = createPausePod(f, pausePodConfig{
  227. Name: fmt.Sprintf("pod%d-%v", i, priorityName),
  228. PriorityClassName: priorityName,
  229. Resources: &corev1.ResourceRequirements{
  230. Requests: podRes,
  231. },
  232. Affinity: &corev1.Affinity{
  233. PodAntiAffinity: &corev1.PodAntiAffinity{
  234. RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
  235. {
  236. LabelSelector: &metav1.LabelSelector{
  237. MatchExpressions: []metav1.LabelSelectorRequirement{
  238. {
  239. Key: "service",
  240. Operator: metav1.LabelSelectorOpIn,
  241. Values: []string{"blah", "foo"},
  242. },
  243. },
  244. },
  245. TopologyKey: "node",
  246. },
  247. },
  248. },
  249. NodeAffinity: &corev1.NodeAffinity{
  250. RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
  251. NodeSelectorTerms: []corev1.NodeSelectorTerm{
  252. {
  253. MatchExpressions: []corev1.NodeSelectorRequirement{
  254. {
  255. Key: "node",
  256. Operator: corev1.NodeSelectorOpIn,
  257. Values: []string{node.Name},
  258. },
  259. },
  260. },
  261. },
  262. },
  263. },
  264. },
  265. })
  266. e2elog.Logf("Created pod: %v", pods[i].Name)
  267. }
  268. defer func() { // Remove added labels
  269. for i := 0; i < numPods; i++ {
  270. framework.RemoveLabelOffNode(cs, nodeList.Items[i].Name, "node")
  271. }
  272. }()
  273. ginkgo.By("Wait for pods to be scheduled.")
  274. for _, pod := range pods {
  275. framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
  276. }
  277. ginkgo.By("Run a high priority pod with node affinity to the first node.")
  278. // Create a high priority pod and make sure it is scheduled.
  279. runPausePod(f, pausePodConfig{
  280. Name: "preemptor-pod",
  281. PriorityClassName: highPriorityClassName,
  282. Labels: map[string]string{"service": "blah"},
  283. Affinity: &corev1.Affinity{
  284. NodeAffinity: &corev1.NodeAffinity{
  285. RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
  286. NodeSelectorTerms: []corev1.NodeSelectorTerm{
  287. {
  288. MatchExpressions: []corev1.NodeSelectorRequirement{
  289. {
  290. Key: "node",
  291. Operator: corev1.NodeSelectorOpIn,
  292. Values: []string{nodeList.Items[0].Name},
  293. },
  294. },
  295. },
  296. },
  297. },
  298. },
  299. },
  300. })
  301. // Make sure that the medium priority pod on the first node is preempted.
  302. preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
  303. podDeleted := (err != nil && errors.IsNotFound(err)) ||
  304. (err == nil && preemptedPod.DeletionTimestamp != nil)
  305. gomega.Expect(podDeleted).To(gomega.BeTrue())
  306. // Other pods (low priority ones) should be present.
  307. for i := 1; i < len(pods); i++ {
  308. livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
  309. framework.ExpectNoError(err)
  310. gomega.Expect(livePod.DeletionTimestamp).To(gomega.BeNil())
  311. }
  312. })
  313. })
  314. var _ = SIGDescribe("PodPriorityResolution [Serial]", func() {
  315. var cs clientset.Interface
  316. var ns string
  317. f := framework.NewDefaultFramework("sched-pod-priority")
  318. ginkgo.BeforeEach(func() {
  319. cs = f.ClientSet
  320. ns = f.Namespace.Name
  321. err := framework.CheckTestingNSDeletedExcept(cs, ns)
  322. framework.ExpectNoError(err)
  323. })
  324. // This test verifies that system critical priorities are created automatically and resolved properly.
  325. ginkgo.It("validates critical system priorities are created and resolved", func() {
  326. // Create pods that use system critical priorities and
  327. ginkgo.By("Create pods that use critical system priorities.")
  328. systemPriorityClasses := []string{
  329. scheduling.SystemNodeCritical, scheduling.SystemClusterCritical,
  330. }
  331. for i, spc := range systemPriorityClasses {
  332. pod := createPausePod(f, pausePodConfig{
  333. Name: fmt.Sprintf("pod%d-%v", i, spc),
  334. Namespace: metav1.NamespaceSystem,
  335. PriorityClassName: spc,
  336. })
  337. defer func() {
  338. // Clean-up the pod.
  339. err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0))
  340. framework.ExpectNoError(err)
  341. }()
  342. gomega.Expect(pod.Spec.Priority).NotTo(gomega.BeNil())
  343. e2elog.Logf("Created pod: %v", pod.Name)
  344. }
  345. })
  346. })
  347. // construct a fakecpu so as to set it to status of Node object
  348. // otherwise if we update CPU/Memory/etc, those values will be corrected back by kubelet
  349. var fakecpu corev1.ResourceName = "example.com/fakecpu"
  350. var _ = SIGDescribe("PreemptionExecutionPath", func() {
  351. var cs clientset.Interface
  352. var node *corev1.Node
  353. var ns, nodeHostNameLabel string
  354. f := framework.NewDefaultFramework("sched-preemption-path")
  355. priorityPairs := make([]priorityPair, 0)
  356. ginkgo.AfterEach(func() {
  357. // print out additional info if tests failed
  358. if ginkgo.CurrentGinkgoTestDescription().Failed {
  359. // list existing priorities
  360. priorityList, err := cs.SchedulingV1().PriorityClasses().List(metav1.ListOptions{})
  361. if err != nil {
  362. e2elog.Logf("Unable to list priorities: %v", err)
  363. } else {
  364. e2elog.Logf("List existing priorities:")
  365. for _, p := range priorityList.Items {
  366. e2elog.Logf("%v/%v created at %v", p.Name, p.Value, p.CreationTimestamp)
  367. }
  368. }
  369. }
  370. if node != nil {
  371. nodeCopy := node.DeepCopy()
  372. // force it to update
  373. nodeCopy.ResourceVersion = "0"
  374. delete(nodeCopy.Status.Capacity, fakecpu)
  375. _, err := cs.CoreV1().Nodes().UpdateStatus(nodeCopy)
  376. framework.ExpectNoError(err)
  377. }
  378. for _, pair := range priorityPairs {
  379. cs.SchedulingV1().PriorityClasses().Delete(pair.name, metav1.NewDeleteOptions(0))
  380. }
  381. })
  382. ginkgo.BeforeEach(func() {
  383. cs = f.ClientSet
  384. ns = f.Namespace.Name
  385. // find an available node
  386. ginkgo.By("Finding an available node")
  387. nodeName := GetNodeThatCanRunPod(f)
  388. e2elog.Logf("found a healthy node: %s", nodeName)
  389. // get the node API object
  390. var err error
  391. node, err = cs.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
  392. if err != nil {
  393. framework.Failf("error getting node %q: %v", nodeName, err)
  394. }
  395. var ok bool
  396. nodeHostNameLabel, ok = node.GetObjectMeta().GetLabels()["kubernetes.io/hostname"]
  397. if !ok {
  398. framework.Failf("error getting kubernetes.io/hostname label on node %s", nodeName)
  399. }
  400. // update Node API object with a fake resource
  401. nodeCopy := node.DeepCopy()
  402. // force it to update
  403. nodeCopy.ResourceVersion = "0"
  404. nodeCopy.Status.Capacity[fakecpu] = resource.MustParse("800")
  405. node, err = cs.CoreV1().Nodes().UpdateStatus(nodeCopy)
  406. framework.ExpectNoError(err)
  407. // create four PriorityClass: p1, p2, p3, p4
  408. for i := 1; i <= 4; i++ {
  409. priorityName := fmt.Sprintf("p%d", i)
  410. priorityVal := int32(i)
  411. priorityPairs = append(priorityPairs, priorityPair{name: priorityName, value: priorityVal})
  412. _, err := cs.SchedulingV1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: priorityName}, Value: priorityVal})
  413. if err != nil {
  414. e2elog.Logf("Failed to create priority '%v/%v': %v", priorityName, priorityVal, err)
  415. e2elog.Logf("Reason: %v. Msg: %v", errors.ReasonForError(err), err)
  416. }
  417. gomega.Expect(err == nil || errors.IsAlreadyExists(err)).To(gomega.Equal(true))
  418. }
  419. })
  420. ginkgo.It("runs ReplicaSets to verify preemption running path", func() {
  421. podNamesSeen := make(map[string]struct{})
  422. stopCh := make(chan struct{})
  423. // create a pod controller to list/watch pod events from the test framework namespace
  424. _, podController := cache.NewInformer(
  425. &cache.ListWatch{
  426. ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
  427. obj, err := f.ClientSet.CoreV1().Pods(ns).List(options)
  428. return runtime.Object(obj), err
  429. },
  430. WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
  431. return f.ClientSet.CoreV1().Pods(ns).Watch(options)
  432. },
  433. },
  434. &corev1.Pod{},
  435. 0,
  436. cache.ResourceEventHandlerFuncs{
  437. AddFunc: func(obj interface{}) {
  438. if pod, ok := obj.(*corev1.Pod); ok {
  439. podNamesSeen[pod.Name] = struct{}{}
  440. }
  441. },
  442. },
  443. )
  444. go podController.Run(stopCh)
  445. defer close(stopCh)
  446. // prepare four ReplicaSet
  447. rsConfs := []pauseRSConfig{
  448. {
  449. Replicas: int32(5),
  450. PodConfig: pausePodConfig{
  451. Name: "pod1",
  452. Namespace: ns,
  453. Labels: map[string]string{"name": "pod1"},
  454. PriorityClassName: "p1",
  455. NodeSelector: map[string]string{"kubernetes.io/hostname": nodeHostNameLabel},
  456. Resources: &corev1.ResourceRequirements{
  457. Requests: corev1.ResourceList{fakecpu: resource.MustParse("40")},
  458. Limits: corev1.ResourceList{fakecpu: resource.MustParse("40")},
  459. },
  460. },
  461. },
  462. {
  463. Replicas: int32(4),
  464. PodConfig: pausePodConfig{
  465. Name: "pod2",
  466. Namespace: ns,
  467. Labels: map[string]string{"name": "pod2"},
  468. PriorityClassName: "p2",
  469. NodeSelector: map[string]string{"kubernetes.io/hostname": nodeHostNameLabel},
  470. Resources: &corev1.ResourceRequirements{
  471. Requests: corev1.ResourceList{fakecpu: resource.MustParse("50")},
  472. Limits: corev1.ResourceList{fakecpu: resource.MustParse("50")},
  473. },
  474. },
  475. },
  476. {
  477. Replicas: int32(4),
  478. PodConfig: pausePodConfig{
  479. Name: "pod3",
  480. Namespace: ns,
  481. Labels: map[string]string{"name": "pod3"},
  482. PriorityClassName: "p3",
  483. NodeSelector: map[string]string{"kubernetes.io/hostname": nodeHostNameLabel},
  484. Resources: &corev1.ResourceRequirements{
  485. Requests: corev1.ResourceList{fakecpu: resource.MustParse("95")},
  486. Limits: corev1.ResourceList{fakecpu: resource.MustParse("95")},
  487. },
  488. },
  489. },
  490. {
  491. Replicas: int32(1),
  492. PodConfig: pausePodConfig{
  493. Name: "pod4",
  494. Namespace: ns,
  495. Labels: map[string]string{"name": "pod4"},
  496. PriorityClassName: "p4",
  497. NodeSelector: map[string]string{"kubernetes.io/hostname": nodeHostNameLabel},
  498. Resources: &corev1.ResourceRequirements{
  499. Requests: corev1.ResourceList{fakecpu: resource.MustParse("400")},
  500. Limits: corev1.ResourceList{fakecpu: resource.MustParse("400")},
  501. },
  502. },
  503. },
  504. }
  505. // create ReplicaSet{1,2,3} so as to occupy 780/800 fake resource
  506. rsNum := len(rsConfs)
  507. for i := 0; i < rsNum-1; i++ {
  508. runPauseRS(f, rsConfs[i])
  509. }
  510. e2elog.Logf("pods created so far: %v", podNamesSeen)
  511. e2elog.Logf("length of pods created so far: %v", len(podNamesSeen))
  512. // create ReplicaSet4
  513. // if runPauseRS failed, it means ReplicaSet4 cannot be scheduled even after 1 minute
  514. // which is unacceptable
  515. runPauseRS(f, rsConfs[rsNum-1])
  516. e2elog.Logf("pods created so far: %v", podNamesSeen)
  517. e2elog.Logf("length of pods created so far: %v", len(podNamesSeen))
  518. // count pods number of ReplicaSet{1,2,3}, if it's more than expected replicas
  519. // then it denotes its pods have been over-preempted
  520. // "*2" means pods of ReplicaSet{1,2} are expected to be only preempted once
  521. maxRSPodsSeen := []int{5 * 2, 4 * 2, 4}
  522. rsPodsSeen := []int{0, 0, 0}
  523. for podName := range podNamesSeen {
  524. if strings.HasPrefix(podName, "rs-pod1") {
  525. rsPodsSeen[0]++
  526. } else if strings.HasPrefix(podName, "rs-pod2") {
  527. rsPodsSeen[1]++
  528. } else if strings.HasPrefix(podName, "rs-pod3") {
  529. rsPodsSeen[2]++
  530. }
  531. }
  532. for i, got := range rsPodsSeen {
  533. expected := maxRSPodsSeen[i]
  534. if got > expected {
  535. framework.Failf("pods of ReplicaSet%d have been over-preempted: expect %v pod names, but got %d", i+1, expected, got)
  536. }
  537. }
  538. })
  539. })
  540. type pauseRSConfig struct {
  541. Replicas int32
  542. PodConfig pausePodConfig
  543. }
  544. func initPauseRS(f *framework.Framework, conf pauseRSConfig) *appsv1.ReplicaSet {
  545. pausePod := initPausePod(f, conf.PodConfig)
  546. pauseRS := &appsv1.ReplicaSet{
  547. ObjectMeta: metav1.ObjectMeta{
  548. Name: "rs-" + pausePod.Name,
  549. Namespace: pausePod.Namespace,
  550. },
  551. Spec: appsv1.ReplicaSetSpec{
  552. Replicas: &conf.Replicas,
  553. Selector: &metav1.LabelSelector{
  554. MatchLabels: pausePod.Labels,
  555. },
  556. Template: corev1.PodTemplateSpec{
  557. ObjectMeta: metav1.ObjectMeta{Labels: pausePod.ObjectMeta.Labels},
  558. Spec: pausePod.Spec,
  559. },
  560. },
  561. }
  562. return pauseRS
  563. }
  564. func createPauseRS(f *framework.Framework, conf pauseRSConfig) *appsv1.ReplicaSet {
  565. namespace := conf.PodConfig.Namespace
  566. if len(namespace) == 0 {
  567. namespace = f.Namespace.Name
  568. }
  569. rs, err := f.ClientSet.AppsV1().ReplicaSets(namespace).Create(initPauseRS(f, conf))
  570. framework.ExpectNoError(err)
  571. return rs
  572. }
  573. func runPauseRS(f *framework.Framework, conf pauseRSConfig) *appsv1.ReplicaSet {
  574. rs := createPauseRS(f, conf)
  575. framework.ExpectNoError(replicaset.WaitForReplicaSetTargetAvailableReplicas(f.ClientSet, rs, conf.Replicas))
  576. return rs
  577. }