priorities.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package scheduling
  14. import (
  15. "context"
  16. "encoding/json"
  17. "fmt"
  18. "math"
  19. "time"
  20. "github.com/onsi/ginkgo"
  21. // ensure libs have a chance to initialize
  22. _ "github.com/stretchr/testify/assert"
  23. v1 "k8s.io/api/core/v1"
  24. apierrors "k8s.io/apimachinery/pkg/api/errors"
  25. "k8s.io/apimachinery/pkg/api/resource"
  26. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  27. "k8s.io/apimachinery/pkg/util/uuid"
  28. "k8s.io/apimachinery/pkg/util/wait"
  29. clientset "k8s.io/client-go/kubernetes"
  30. v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
  31. schedutil "k8s.io/kubernetes/pkg/scheduler/util"
  32. "k8s.io/kubernetes/test/e2e/framework"
  33. e2eevents "k8s.io/kubernetes/test/e2e/framework/events"
  34. e2enode "k8s.io/kubernetes/test/e2e/framework/node"
  35. e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
  36. e2erc "k8s.io/kubernetes/test/e2e/framework/rc"
  37. e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
  38. testutils "k8s.io/kubernetes/test/utils"
  39. imageutils "k8s.io/kubernetes/test/utils/image"
  40. )
  41. // Resource is a collection of compute resource.
  42. type Resource struct {
  43. MilliCPU int64
  44. Memory int64
  45. }
  46. var balancePodLabel = map[string]string{"name": "priority-balanced-memory"}
  47. var podRequestedResource = &v1.ResourceRequirements{
  48. Limits: v1.ResourceList{
  49. v1.ResourceMemory: resource.MustParse("100Mi"),
  50. v1.ResourceCPU: resource.MustParse("100m"),
  51. },
  52. Requests: v1.ResourceList{
  53. v1.ResourceMemory: resource.MustParse("100Mi"),
  54. v1.ResourceCPU: resource.MustParse("100m"),
  55. },
  56. }
  57. // addOrUpdateAvoidPodOnNode adds avoidPods annotations to node, will override if it exists
  58. func addOrUpdateAvoidPodOnNode(c clientset.Interface, nodeName string, avoidPods v1.AvoidPods) {
  59. err := wait.PollImmediate(framework.Poll, framework.SingleCallTimeout, func() (bool, error) {
  60. node, err := c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
  61. if err != nil {
  62. if testutils.IsRetryableAPIError(err) {
  63. return false, nil
  64. }
  65. return false, err
  66. }
  67. taintsData, err := json.Marshal(avoidPods)
  68. framework.ExpectNoError(err)
  69. if node.Annotations == nil {
  70. node.Annotations = make(map[string]string)
  71. }
  72. node.Annotations[v1.PreferAvoidPodsAnnotationKey] = string(taintsData)
  73. _, err = c.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
  74. if err != nil {
  75. if !apierrors.IsConflict(err) {
  76. framework.ExpectNoError(err)
  77. } else {
  78. framework.Logf("Conflict when trying to add/update avoidPods %v to %v with error %v", avoidPods, nodeName, err)
  79. return false, nil
  80. }
  81. }
  82. return true, nil
  83. })
  84. framework.ExpectNoError(err)
  85. }
  86. // removeAvoidPodsOffNode removes AvoidPods annotations from the node. It does not fail if no such annotation exists.
  87. func removeAvoidPodsOffNode(c clientset.Interface, nodeName string) {
  88. err := wait.PollImmediate(framework.Poll, framework.SingleCallTimeout, func() (bool, error) {
  89. node, err := c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
  90. if err != nil {
  91. if testutils.IsRetryableAPIError(err) {
  92. return false, nil
  93. }
  94. return false, err
  95. }
  96. if node.Annotations == nil {
  97. return true, nil
  98. }
  99. delete(node.Annotations, v1.PreferAvoidPodsAnnotationKey)
  100. _, err = c.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{})
  101. if err != nil {
  102. if !apierrors.IsConflict(err) {
  103. framework.ExpectNoError(err)
  104. } else {
  105. framework.Logf("Conflict when trying to remove avoidPods to %v", nodeName)
  106. return false, nil
  107. }
  108. }
  109. return true, nil
  110. })
  111. framework.ExpectNoError(err)
  112. }
  113. // This test suite is used to verifies scheduler priority functions based on the default provider
  114. var _ = SIGDescribe("SchedulerPriorities [Serial]", func() {
  115. var cs clientset.Interface
  116. var nodeList *v1.NodeList
  117. var systemPodsNo int
  118. var ns string
  119. f := framework.NewDefaultFramework("sched-priority")
  120. ginkgo.AfterEach(func() {
  121. })
  122. ginkgo.BeforeEach(func() {
  123. cs = f.ClientSet
  124. ns = f.Namespace.Name
  125. nodeList = &v1.NodeList{}
  126. var err error
  127. e2enode.WaitForTotalHealthy(cs, time.Minute)
  128. _, nodeList, err = e2enode.GetMasterAndWorkerNodes(cs)
  129. if err != nil {
  130. framework.Logf("Unexpected error occurred: %v", err)
  131. }
  132. // TODO: write a wrapper for ExpectNoErrorWithOffset()
  133. framework.ExpectNoErrorWithOffset(0, err)
  134. err = framework.CheckTestingNSDeletedExcept(cs, ns)
  135. framework.ExpectNoError(err)
  136. err = e2epod.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, map[string]string{})
  137. framework.ExpectNoError(err)
  138. })
  139. ginkgo.It("Pod should be scheduled to node that don't match the PodAntiAffinity terms", func() {
  140. e2eskipper.SkipUnlessNodeCountIsAtLeast(2)
  141. ginkgo.By("Trying to launch a pod with a label to get a node which can launch it.")
  142. pod := runPausePod(f, pausePodConfig{
  143. Name: "pod-with-label-security-s1",
  144. Labels: map[string]string{"security": "S1"},
  145. })
  146. nodeName := pod.Spec.NodeName
  147. k := v1.LabelHostname
  148. ginkgo.By("Verifying the node has a label " + k)
  149. node, err := cs.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
  150. framework.ExpectNoError(err)
  151. if _, hasLabel := node.Labels[k]; !hasLabel {
  152. // If the label is not exists, label all nodes for testing.
  153. ginkgo.By("Trying to apply a label on the found node.")
  154. k = "kubernetes.io/e2e-node-topologyKey"
  155. v := "topologyvalue1"
  156. framework.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
  157. framework.ExpectNodeHasLabel(cs, nodeName, k, v)
  158. defer framework.RemoveLabelOffNode(cs, nodeName, k)
  159. ginkgo.By("Trying to apply a label on other nodes.")
  160. v = "topologyvalue2"
  161. for _, node := range nodeList.Items {
  162. if node.Name != nodeName {
  163. framework.AddOrUpdateLabelOnNode(cs, node.Name, k, v)
  164. framework.ExpectNodeHasLabel(cs, node.Name, k, v)
  165. defer framework.RemoveLabelOffNode(cs, node.Name, k)
  166. }
  167. }
  168. }
  169. // make the nodes have balanced cpu,mem usage
  170. err = createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.6)
  171. framework.ExpectNoError(err)
  172. ginkgo.By("Trying to launch the pod with podAntiAffinity.")
  173. labelPodName := "pod-with-pod-antiaffinity"
  174. pod = createPausePod(f, pausePodConfig{
  175. Resources: podRequestedResource,
  176. Name: labelPodName,
  177. Affinity: &v1.Affinity{
  178. PodAntiAffinity: &v1.PodAntiAffinity{
  179. PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
  180. {
  181. PodAffinityTerm: v1.PodAffinityTerm{
  182. LabelSelector: &metav1.LabelSelector{
  183. MatchExpressions: []metav1.LabelSelectorRequirement{
  184. {
  185. Key: "security",
  186. Operator: metav1.LabelSelectorOpIn,
  187. Values: []string{"S1", "value2"},
  188. },
  189. {
  190. Key: "security",
  191. Operator: metav1.LabelSelectorOpNotIn,
  192. Values: []string{"S2"},
  193. }, {
  194. Key: "security",
  195. Operator: metav1.LabelSelectorOpExists,
  196. },
  197. },
  198. },
  199. TopologyKey: k,
  200. Namespaces: []string{ns},
  201. },
  202. Weight: 10,
  203. },
  204. },
  205. },
  206. },
  207. })
  208. ginkgo.By("Wait the pod becomes running")
  209. framework.ExpectNoError(f.WaitForPodRunning(pod.Name))
  210. labelPod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), labelPodName, metav1.GetOptions{})
  211. framework.ExpectNoError(err)
  212. ginkgo.By("Verify the pod was scheduled to the expected node.")
  213. framework.ExpectNotEqual(labelPod.Spec.NodeName, nodeName)
  214. })
  215. ginkgo.It("Pod should avoid nodes that have avoidPod annotation", func() {
  216. nodeName := nodeList.Items[0].Name
  217. // make the nodes have balanced cpu,mem usage
  218. err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.5)
  219. framework.ExpectNoError(err)
  220. ginkgo.By("Create a RC, with 0 replicas")
  221. rc := createRC(ns, "scheduler-priority-avoid-pod", int32(0), map[string]string{"name": "scheduler-priority-avoid-pod"}, f, podRequestedResource)
  222. // Cleanup the replication controller when we are done.
  223. defer func() {
  224. // Resize the replication controller to zero to get rid of pods.
  225. if err := e2erc.DeleteRCAndWaitForGC(f.ClientSet, f.Namespace.Name, rc.Name); err != nil {
  226. framework.Logf("Failed to cleanup replication controller %v: %v.", rc.Name, err)
  227. }
  228. }()
  229. ginkgo.By("Trying to apply avoidPod annotations on the first node.")
  230. avoidPod := v1.AvoidPods{
  231. PreferAvoidPods: []v1.PreferAvoidPodsEntry{
  232. {
  233. PodSignature: v1.PodSignature{
  234. PodController: &metav1.OwnerReference{
  235. APIVersion: "v1",
  236. Kind: "ReplicationController",
  237. Name: rc.Name,
  238. UID: rc.UID,
  239. Controller: func() *bool { b := true; return &b }(),
  240. },
  241. },
  242. Reason: "some reson",
  243. Message: "some message",
  244. },
  245. },
  246. }
  247. action := func() error {
  248. addOrUpdateAvoidPodOnNode(cs, nodeName, avoidPod)
  249. return nil
  250. }
  251. predicate := func(node *v1.Node) bool {
  252. val, err := json.Marshal(avoidPod)
  253. if err != nil {
  254. return false
  255. }
  256. return node.Annotations[v1.PreferAvoidPodsAnnotationKey] == string(val)
  257. }
  258. success, err := e2eevents.ObserveNodeUpdateAfterAction(f.ClientSet, nodeName, predicate, action)
  259. framework.ExpectNoError(err)
  260. framework.ExpectEqual(success, true)
  261. defer removeAvoidPodsOffNode(cs, nodeName)
  262. ginkgo.By(fmt.Sprintf("Scale the RC: %s to len(nodeList.Item)-1 : %v.", rc.Name, len(nodeList.Items)-1))
  263. e2erc.ScaleRC(f.ClientSet, f.ScalesGetter, ns, rc.Name, uint(len(nodeList.Items)-1), true)
  264. testPods, err := cs.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{
  265. LabelSelector: "name=scheduler-priority-avoid-pod",
  266. })
  267. framework.ExpectNoError(err)
  268. ginkgo.By(fmt.Sprintf("Verify the pods should not scheduled to the node: %s", nodeName))
  269. for _, pod := range testPods.Items {
  270. framework.ExpectNotEqual(pod.Spec.NodeName, nodeName)
  271. }
  272. })
  273. ginkgo.It("Pod should be preferably scheduled to nodes pod can tolerate", func() {
  274. // make the nodes have balanced cpu,mem usage ratio
  275. err := createBalancedPodForNodes(f, cs, ns, nodeList.Items, podRequestedResource, 0.5)
  276. framework.ExpectNoError(err)
  277. // Apply 10 taints to first node
  278. nodeName := nodeList.Items[0].Name
  279. ginkgo.By("Trying to apply 10 (tolerable) taints on the first node.")
  280. var tolerations []v1.Toleration
  281. for i := 0; i < 10; i++ {
  282. testTaint := addRandomTaintToNode(cs, nodeName)
  283. tolerations = append(tolerations, v1.Toleration{Key: testTaint.Key, Value: testTaint.Value, Effect: testTaint.Effect})
  284. defer framework.RemoveTaintOffNode(cs, nodeName, *testTaint)
  285. }
  286. ginkgo.By("Adding 10 intolerable taints to all other nodes")
  287. for i := 1; i < len(nodeList.Items); i++ {
  288. node := nodeList.Items[i]
  289. for i := 0; i < 10; i++ {
  290. testTaint := addRandomTaintToNode(cs, node.Name)
  291. defer framework.RemoveTaintOffNode(cs, node.Name, *testTaint)
  292. }
  293. }
  294. tolerationPodName := "with-tolerations"
  295. ginkgo.By("Create a pod that tolerates all the taints of the first node.")
  296. pod := createPausePod(f, pausePodConfig{
  297. Name: tolerationPodName,
  298. Tolerations: tolerations,
  299. })
  300. framework.ExpectNoError(f.WaitForPodRunning(pod.Name))
  301. ginkgo.By("Pod should prefer scheduled to the node that pod can tolerate.")
  302. tolePod, err := cs.CoreV1().Pods(ns).Get(context.TODO(), tolerationPodName, metav1.GetOptions{})
  303. framework.ExpectNoError(err)
  304. framework.ExpectEqual(tolePod.Spec.NodeName, nodeName)
  305. })
  306. })
  307. // createBalancedPodForNodes creates a pod per node that asks for enough resources to make all nodes have the same mem/cpu usage ratio.
  308. func createBalancedPodForNodes(f *framework.Framework, cs clientset.Interface, ns string, nodes []v1.Node, requestedResource *v1.ResourceRequirements, ratio float64) error {
  309. // find the max, if the node has the max,use the one, if not,use the ratio parameter
  310. var maxCPUFraction, maxMemFraction float64 = ratio, ratio
  311. var cpuFractionMap = make(map[string]float64)
  312. var memFractionMap = make(map[string]float64)
  313. for _, node := range nodes {
  314. cpuFraction, memFraction := computeCPUMemFraction(cs, node, requestedResource)
  315. cpuFractionMap[node.Name] = cpuFraction
  316. memFractionMap[node.Name] = memFraction
  317. if cpuFraction > maxCPUFraction {
  318. maxCPUFraction = cpuFraction
  319. }
  320. if memFraction > maxMemFraction {
  321. maxMemFraction = memFraction
  322. }
  323. }
  324. // we need the max one to keep the same cpu/mem use rate
  325. ratio = math.Max(maxCPUFraction, maxMemFraction)
  326. for _, node := range nodes {
  327. memAllocatable, found := node.Status.Allocatable[v1.ResourceMemory]
  328. framework.ExpectEqual(found, true)
  329. memAllocatableVal := memAllocatable.Value()
  330. cpuAllocatable, found := node.Status.Allocatable[v1.ResourceCPU]
  331. framework.ExpectEqual(found, true)
  332. cpuAllocatableMil := cpuAllocatable.MilliValue()
  333. needCreateResource := v1.ResourceList{}
  334. cpuFraction := cpuFractionMap[node.Name]
  335. memFraction := memFractionMap[node.Name]
  336. needCreateResource[v1.ResourceCPU] = *resource.NewMilliQuantity(int64((ratio-cpuFraction)*float64(cpuAllocatableMil)), resource.DecimalSI)
  337. needCreateResource[v1.ResourceMemory] = *resource.NewQuantity(int64((ratio-memFraction)*float64(memAllocatableVal)), resource.BinarySI)
  338. err := testutils.StartPods(cs, 1, ns, string(uuid.NewUUID()),
  339. *initPausePod(f, pausePodConfig{
  340. Name: "",
  341. Labels: balancePodLabel,
  342. Resources: &v1.ResourceRequirements{
  343. Limits: needCreateResource,
  344. Requests: needCreateResource,
  345. },
  346. NodeName: node.Name,
  347. }), true, framework.Logf)
  348. if err != nil {
  349. return err
  350. }
  351. }
  352. for _, node := range nodes {
  353. ginkgo.By("Compute Cpu, Mem Fraction after create balanced pods.")
  354. computeCPUMemFraction(cs, node, requestedResource)
  355. }
  356. return nil
  357. }
  358. func computeCPUMemFraction(cs clientset.Interface, node v1.Node, resource *v1.ResourceRequirements) (float64, float64) {
  359. framework.Logf("ComputeCPUMemFraction for node: %v", node.Name)
  360. totalRequestedCPUResource := resource.Requests.Cpu().MilliValue()
  361. totalRequestedMemResource := resource.Requests.Memory().Value()
  362. allpods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{})
  363. if err != nil {
  364. framework.Failf("Expect error of invalid, got : %v", err)
  365. }
  366. for _, pod := range allpods.Items {
  367. if pod.Spec.NodeName == node.Name {
  368. framework.Logf("Pod for on the node: %v, Cpu: %v, Mem: %v", pod.Name, getNonZeroRequests(&pod).MilliCPU, getNonZeroRequests(&pod).Memory)
  369. // Ignore best effort pods while computing fractions as they won't be taken in account by scheduler.
  370. if v1qos.GetPodQOS(&pod) == v1.PodQOSBestEffort {
  371. continue
  372. }
  373. totalRequestedCPUResource += getNonZeroRequests(&pod).MilliCPU
  374. totalRequestedMemResource += getNonZeroRequests(&pod).Memory
  375. }
  376. }
  377. cpuAllocatable, found := node.Status.Allocatable[v1.ResourceCPU]
  378. framework.ExpectEqual(found, true)
  379. cpuAllocatableMil := cpuAllocatable.MilliValue()
  380. floatOne := float64(1)
  381. cpuFraction := float64(totalRequestedCPUResource) / float64(cpuAllocatableMil)
  382. if cpuFraction > floatOne {
  383. cpuFraction = floatOne
  384. }
  385. memAllocatable, found := node.Status.Allocatable[v1.ResourceMemory]
  386. framework.ExpectEqual(found, true)
  387. memAllocatableVal := memAllocatable.Value()
  388. memFraction := float64(totalRequestedMemResource) / float64(memAllocatableVal)
  389. if memFraction > floatOne {
  390. memFraction = floatOne
  391. }
  392. framework.Logf("Node: %v, totalRequestedCPUResource: %v, cpuAllocatableMil: %v, cpuFraction: %v", node.Name, totalRequestedCPUResource, cpuAllocatableMil, cpuFraction)
  393. framework.Logf("Node: %v, totalRequestedMemResource: %v, memAllocatableVal: %v, memFraction: %v", node.Name, totalRequestedMemResource, memAllocatableVal, memFraction)
  394. return cpuFraction, memFraction
  395. }
  396. func getNonZeroRequests(pod *v1.Pod) Resource {
  397. result := Resource{}
  398. for i := range pod.Spec.Containers {
  399. container := &pod.Spec.Containers[i]
  400. cpu, memory := schedutil.GetNonzeroRequests(&container.Resources.Requests)
  401. result.MilliCPU += cpu
  402. result.Memory += memory
  403. }
  404. return result
  405. }
  406. func createRC(ns, rsName string, replicas int32, rcPodLabels map[string]string, f *framework.Framework, resource *v1.ResourceRequirements) *v1.ReplicationController {
  407. rc := &v1.ReplicationController{
  408. TypeMeta: metav1.TypeMeta{
  409. Kind: "ReplicationController",
  410. APIVersion: "v1",
  411. },
  412. ObjectMeta: metav1.ObjectMeta{
  413. Name: rsName,
  414. },
  415. Spec: v1.ReplicationControllerSpec{
  416. Replicas: &replicas,
  417. Template: &v1.PodTemplateSpec{
  418. ObjectMeta: metav1.ObjectMeta{
  419. Labels: rcPodLabels,
  420. },
  421. Spec: v1.PodSpec{
  422. Containers: []v1.Container{
  423. {
  424. Name: rsName,
  425. Image: imageutils.GetPauseImageName(),
  426. Resources: *resource,
  427. },
  428. },
  429. },
  430. },
  431. },
  432. }
  433. rc, err := f.ClientSet.CoreV1().ReplicationControllers(ns).Create(context.TODO(), rc, metav1.CreateOptions{})
  434. framework.ExpectNoError(err)
  435. return rc
  436. }
  437. func addRandomTaintToNode(cs clientset.Interface, nodeName string) *v1.Taint {
  438. testTaint := v1.Taint{
  439. Key: fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
  440. Value: fmt.Sprintf("testing-taint-value-%s", string(uuid.NewUUID())),
  441. Effect: v1.TaintEffectPreferNoSchedule,
  442. }
  443. framework.AddOrUpdateTaintOnNode(cs, nodeName, testTaint)
  444. framework.ExpectNodeHasTaint(cs, nodeName, &testTaint)
  445. return &testTaint
  446. }