123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278 |
- /*
- Copyright 2015 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package benchmark
- import (
- "fmt"
- "k8s.io/api/core/v1"
- "k8s.io/apimachinery/pkg/api/resource"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/labels"
- "k8s.io/klog"
- "k8s.io/kubernetes/pkg/scheduler/factory"
- testutils "k8s.io/kubernetes/test/utils"
- "math"
- "strconv"
- "testing"
- "time"
- )
- const (
- warning3K = 100
- threshold3K = 30
- threshold30K = 30
- threshold60K = 30
- )
- var (
- basePodTemplate = &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- GenerateName: "sched-perf-pod-",
- },
- // TODO: this needs to be configurable.
- Spec: testutils.MakePodSpec(),
- }
- baseNodeTemplate = &v1.Node{
- ObjectMeta: metav1.ObjectMeta{
- GenerateName: "sample-node-",
- },
- Status: v1.NodeStatus{
- Capacity: v1.ResourceList{
- v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI),
- v1.ResourceCPU: resource.MustParse("4"),
- v1.ResourceMemory: resource.MustParse("32Gi"),
- },
- Phase: v1.NodeRunning,
- Conditions: []v1.NodeCondition{
- {Type: v1.NodeReady, Status: v1.ConditionTrue},
- },
- },
- }
- )
- // TestSchedule100Node3KPods schedules 3k pods on 100 nodes.
- func TestSchedule100Node3KPods(t *testing.T) {
- if testing.Short() {
- t.Skip("Skipping because we want to run short tests")
- }
- config := getBaseConfig(100, 3000)
- err := writePodAndNodeTopologyToConfig(config)
- if err != nil {
- t.Errorf("Misconfiguration happened for nodes/pods chosen to have predicates and priorities")
- }
- min := schedulePods(config)
- if min < threshold3K {
- t.Errorf("Failing: Scheduling rate was too low for an interval, we saw rate of %v, which is the allowed minimum of %v ! ", min, threshold3K)
- } else if min < warning3K {
- fmt.Printf("Warning: pod scheduling throughput for 3k pods was slow for an interval... Saw an interval with very low (%v) scheduling rate!", min)
- } else {
- fmt.Printf("Minimal observed throughput for 3k pod test: %v\n", min)
- }
- }
- // TestSchedule2000Node60KPods schedules 60k pods on 2000 nodes.
- // This test won't fit in normal 10 minutes time window.
- // func TestSchedule2000Node60KPods(t *testing.T) {
- // if testing.Short() {
- // t.Skip("Skipping because we want to run short tests")
- // }
- // config := defaultSchedulerBenchmarkConfig(2000, 60000)
- // if min := schedulePods(config); min < threshold60K {
- // t.Errorf("Too small pod scheduling throughput for 60k pods. Expected %v got %v", threshold60K, min)
- // } else {
- // fmt.Printf("Minimal observed throughput for 60k pod test: %v\n", min)
- // }
- // }
- // testConfig contains the some input parameters needed for running test-suite
- type testConfig struct {
- numPods int
- numNodes int
- mutatedNodeTemplate *v1.Node
- mutatedPodTemplate *v1.Pod
- schedulerSupportFunctions factory.Configurator
- destroyFunc func()
- }
- // getBaseConfig returns baseConfig after initializing number of nodes and pods.
- func getBaseConfig(nodes int, pods int) *testConfig {
- schedulerConfigFactory, destroyFunc := mustSetupScheduler()
- return &testConfig{
- schedulerSupportFunctions: schedulerConfigFactory,
- destroyFunc: destroyFunc,
- numNodes: nodes,
- numPods: pods,
- }
- }
- // schedulePods schedules specific number of pods on specific number of nodes.
- // This is used to learn the scheduling throughput on various
- // sizes of cluster and changes as more and more pods are scheduled.
- // It won't stop until all pods are scheduled.
- // It returns the minimum of throughput over whole run.
- func schedulePods(config *testConfig) int32 {
- defer config.destroyFunc()
- prev := 0
- // On startup there may be a latent period where NO scheduling occurs (qps = 0).
- // We are interested in low scheduling rates (i.e. qps=2),
- minQPS := int32(math.MaxInt32)
- start := time.Now()
- // Bake in time for the first pod scheduling event.
- for {
- time.Sleep(50 * time.Millisecond)
- scheduled, err := config.schedulerSupportFunctions.GetScheduledPodLister().List(labels.Everything())
- if err != nil {
- klog.Fatalf("%v", err)
- }
- // 30,000 pods -> wait till @ least 300 are scheduled to start measuring.
- // TODO Find out why sometimes there may be scheduling blips in the beginning.
- if len(scheduled) > config.numPods/100 {
- break
- }
- }
- // map minimum QPS entries in a counter, useful for debugging tests.
- qpsStats := map[int]int{}
- // Now that scheduling has started, lets start taking the pulse on how many pods are happening per second.
- for {
- // This can potentially affect performance of scheduler, since List() is done under mutex.
- // Listing 10000 pods is an expensive operation, so running it frequently may impact scheduler.
- // TODO: Setup watch on apiserver and wait until all pods scheduled.
- scheduled, err := config.schedulerSupportFunctions.GetScheduledPodLister().List(labels.Everything())
- if err != nil {
- klog.Fatalf("%v", err)
- }
- // We will be completed when all pods are done being scheduled.
- // return the worst-case-scenario interval that was seen during this time.
- // Note this should never be low due to cold-start, so allow bake in sched time if necessary.
- if len(scheduled) >= config.numPods {
- consumed := int(time.Since(start) / time.Second)
- if consumed <= 0 {
- consumed = 1
- }
- fmt.Printf("Scheduled %v Pods in %v seconds (%v per second on average). min QPS was %v\n",
- config.numPods, consumed, config.numPods/consumed, minQPS)
- return minQPS
- }
- // There's no point in printing it for the last iteration, as the value is random
- qps := len(scheduled) - prev
- qpsStats[qps]++
- if int32(qps) < minQPS {
- minQPS = int32(qps)
- }
- fmt.Printf("%ds\trate: %d\ttotal: %d (qps frequency: %v)\n", time.Since(start)/time.Second, qps, len(scheduled), qpsStats)
- prev = len(scheduled)
- time.Sleep(1 * time.Second)
- }
- }
- // mutateNodeTemplate returns the modified node needed for creation of nodes.
- func (na nodeAffinity) mutateNodeTemplate(node *v1.Node) {
- labels := make(map[string]string)
- for i := 0; i < na.LabelCount; i++ {
- value := strconv.Itoa(i)
- key := na.nodeAffinityKey + value
- labels[key] = value
- }
- node.ObjectMeta.Labels = labels
- return
- }
- // mutatePodTemplate returns the modified pod template after applying mutations.
- func (na nodeAffinity) mutatePodTemplate(pod *v1.Pod) {
- var nodeSelectorRequirements []v1.NodeSelectorRequirement
- for i := 0; i < na.LabelCount; i++ {
- value := strconv.Itoa(i)
- key := na.nodeAffinityKey + value
- nodeSelector := v1.NodeSelectorRequirement{Key: key, Values: []string{value}, Operator: v1.NodeSelectorOpIn}
- nodeSelectorRequirements = append(nodeSelectorRequirements, nodeSelector)
- }
- pod.Spec.Affinity = &v1.Affinity{
- NodeAffinity: &v1.NodeAffinity{
- RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
- NodeSelectorTerms: []v1.NodeSelectorTerm{
- {
- MatchExpressions: nodeSelectorRequirements,
- },
- },
- },
- },
- }
- }
- // generateNodes generates nodes to be used for scheduling.
- func (inputConfig *schedulerPerfConfig) generateNodes(config *testConfig) {
- for i := 0; i < inputConfig.NodeCount; i++ {
- config.schedulerSupportFunctions.GetClient().CoreV1().Nodes().Create(config.mutatedNodeTemplate)
- }
- for i := 0; i < config.numNodes-inputConfig.NodeCount; i++ {
- config.schedulerSupportFunctions.GetClient().CoreV1().Nodes().Create(baseNodeTemplate)
- }
- }
- // generatePods generates pods to be used for scheduling.
- func (inputConfig *schedulerPerfConfig) generatePods(config *testConfig) {
- testutils.CreatePod(config.schedulerSupportFunctions.GetClient(), "sample", inputConfig.PodCount, config.mutatedPodTemplate)
- testutils.CreatePod(config.schedulerSupportFunctions.GetClient(), "sample", config.numPods-inputConfig.PodCount, basePodTemplate)
- }
- // generatePodAndNodeTopology is the wrapper function for modifying both pods and node objects.
- func (inputConfig *schedulerPerfConfig) generatePodAndNodeTopology(config *testConfig) error {
- if config.numNodes < inputConfig.NodeCount || config.numPods < inputConfig.PodCount {
- return fmt.Errorf("NodeCount cannot be greater than numNodes")
- }
- nodeAffinity := inputConfig.NodeAffinity
- // Node template that needs to be mutated.
- mutatedNodeTemplate := baseNodeTemplate
- // Pod template that needs to be mutated.
- mutatedPodTemplate := basePodTemplate
- if nodeAffinity != nil {
- nodeAffinity.mutateNodeTemplate(mutatedNodeTemplate)
- nodeAffinity.mutatePodTemplate(mutatedPodTemplate)
- } // TODO: other predicates/priorities will be processed in subsequent if statements or a switch:).
- config.mutatedPodTemplate = mutatedPodTemplate
- config.mutatedNodeTemplate = mutatedNodeTemplate
- inputConfig.generateNodes(config)
- inputConfig.generatePods(config)
- return nil
- }
- // writePodAndNodeTopologyToConfig reads a configuration and then applies it to a test configuration.
- //TODO: As of now, this function is not doing anything except for reading input values to priority structs.
- func writePodAndNodeTopologyToConfig(config *testConfig) error {
- // High Level structure that should be filled for every predicate or priority.
- inputConfig := &schedulerPerfConfig{
- NodeCount: 100,
- PodCount: 3000,
- NodeAffinity: &nodeAffinity{
- nodeAffinityKey: "kubernetes.io/sched-perf-node-affinity-",
- LabelCount: 10,
- },
- }
- err := inputConfig.generatePodAndNodeTopology(config)
- if err != nil {
- return err
- }
- return nil
- }
|