scheduler_perf_test.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package benchmark
  14. import (
  15. "fmt"
  16. "io/ioutil"
  17. "testing"
  18. "time"
  19. v1 "k8s.io/api/core/v1"
  20. utilfeature "k8s.io/apiserver/pkg/util/feature"
  21. coreinformers "k8s.io/client-go/informers/core/v1"
  22. clientset "k8s.io/client-go/kubernetes"
  23. "k8s.io/component-base/featuregate"
  24. featuregatetesting "k8s.io/component-base/featuregate/testing"
  25. "k8s.io/klog"
  26. "k8s.io/kubernetes/test/integration/framework"
  27. testutils "k8s.io/kubernetes/test/utils"
  28. "sigs.k8s.io/yaml"
  29. )
  30. const (
  31. configFile = "config/performance-config.yaml"
  32. )
  33. var (
  34. defaultMetricsCollectorConfig = metricsCollectorConfig{
  35. Metrics: []string{
  36. "scheduler_scheduling_algorithm_predicate_evaluation_seconds",
  37. "scheduler_scheduling_algorithm_priority_evaluation_seconds",
  38. "scheduler_binding_duration_seconds",
  39. "scheduler_e2e_scheduling_duration_seconds",
  40. },
  41. }
  42. )
  43. // testCase configures a test case to run the scheduler performance test. Users should be able to
  44. // provide this via a YAML file.
  45. //
  46. // It specifies nodes and pods in the cluster before running the test. It also specifies the pods to
  47. // schedule during the test. The config can be as simple as just specify number of nodes/pods, where
  48. // default spec will be applied. It also allows the user to specify a pod spec template for more
  49. // complicated test cases.
  50. //
  51. // It also specifies the metrics to be collected after the test. If nothing is specified, default metrics
  52. // such as scheduling throughput and latencies will be collected.
  53. type testCase struct {
  54. // description of the test case
  55. Desc string
  56. // configures nodes in the cluster
  57. Nodes nodeCase
  58. // configures pods in the cluster before running the tests
  59. InitPods podCase
  60. // pods to be scheduled during the test.
  61. PodsToSchedule podCase
  62. // optional, feature gates to set before running the test
  63. FeatureGates map[featuregate.Feature]bool
  64. // optional, replaces default defaultMetricsCollectorConfig if supplied.
  65. MetricsCollectorConfig *metricsCollectorConfig
  66. }
  67. type nodeCase struct {
  68. Num int
  69. NodeTemplatePath *string
  70. // At most one of the following strategies can be defined. If not specified, default to TrivialNodePrepareStrategy.
  71. NodeAllocatableStrategy *testutils.NodeAllocatableStrategy
  72. LabelNodePrepareStrategy *testutils.LabelNodePrepareStrategy
  73. UniqueNodeLabelStrategy *testutils.UniqueNodeLabelStrategy
  74. }
  75. type podCase struct {
  76. Num int
  77. PodTemplatePath *string
  78. PersistentVolumeTemplatePath *string
  79. PersistentVolumeClaimTemplatePath *string
  80. }
  81. // simpleTestCases defines a set of test cases that share the same template (node spec, pod spec, etc)
  82. // with testParams(e.g., NumNodes) being overridden. This provides a convenient way to define multiple tests
  83. // with various sizes.
  84. type simpleTestCases struct {
  85. Template testCase
  86. Params []testParams
  87. }
  88. type testParams struct {
  89. NumNodes int
  90. NumInitPods int
  91. NumPodsToSchedule int
  92. }
  93. type testDataCollector interface {
  94. run(stopCh chan struct{})
  95. collect() []DataItem
  96. }
  97. func BenchmarkPerfScheduling(b *testing.B) {
  98. dataItems := DataItems{Version: "v1"}
  99. tests := getSimpleTestCases(configFile)
  100. for _, test := range tests {
  101. name := fmt.Sprintf("%v/%vNodes/%vInitPods/%vPodsToSchedule", test.Desc, test.Nodes.Num, test.InitPods.Num, test.PodsToSchedule.Num)
  102. b.Run(name, func(b *testing.B) {
  103. for feature, flag := range test.FeatureGates {
  104. defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
  105. }
  106. dataItems.DataItems = append(dataItems.DataItems, perfScheduling(test, b)...)
  107. })
  108. }
  109. if err := dataItems2JSONFile(dataItems, b.Name()); err != nil {
  110. klog.Fatalf("%v: unable to write measured data: %v", b.Name(), err)
  111. }
  112. }
  113. func perfScheduling(test testCase, b *testing.B) []DataItem {
  114. finalFunc, podInformer, clientset := mustSetupScheduler()
  115. defer finalFunc()
  116. nodePreparer := getNodePreparer(test.Nodes, clientset)
  117. if err := nodePreparer.PrepareNodes(); err != nil {
  118. klog.Fatalf("%v", err)
  119. }
  120. defer nodePreparer.CleanupNodes()
  121. createPods(setupNamespace, test.InitPods, clientset)
  122. waitNumPodsScheduled(test.InitPods.Num, podInformer)
  123. // start benchmark
  124. b.ResetTimer()
  125. // Start test data collectors.
  126. stopCh := make(chan struct{})
  127. collectors := getTestDataCollectors(test, podInformer, b)
  128. for _, collector := range collectors {
  129. go collector.run(stopCh)
  130. }
  131. // Schedule the main workload
  132. createPods(testNamespace, test.PodsToSchedule, clientset)
  133. waitNumPodsScheduled(test.InitPods.Num+test.PodsToSchedule.Num, podInformer)
  134. close(stopCh)
  135. // Note: without this line we're taking the overhead of defer() into account.
  136. b.StopTimer()
  137. var dataItems []DataItem
  138. for _, collector := range collectors {
  139. dataItems = append(dataItems, collector.collect()...)
  140. }
  141. return dataItems
  142. }
  143. func waitNumPodsScheduled(num int, podInformer coreinformers.PodInformer) {
  144. for {
  145. scheduled, err := getScheduledPods(podInformer)
  146. if err != nil {
  147. klog.Fatalf("%v", err)
  148. }
  149. if len(scheduled) >= num {
  150. break
  151. }
  152. klog.Infof("got %d existing pods, required: %d", len(scheduled), num)
  153. time.Sleep(1 * time.Second)
  154. }
  155. }
  156. func getTestDataCollectors(tc testCase, podInformer coreinformers.PodInformer, b *testing.B) []testDataCollector {
  157. collectors := []testDataCollector{newThroughputCollector(podInformer, map[string]string{"Name": b.Name()})}
  158. metricsCollectorConfig := defaultMetricsCollectorConfig
  159. if tc.MetricsCollectorConfig != nil {
  160. metricsCollectorConfig = *tc.MetricsCollectorConfig
  161. }
  162. collectors = append(collectors, newMetricsCollector(metricsCollectorConfig, map[string]string{"Name": b.Name()}))
  163. return collectors
  164. }
  165. func getNodePreparer(nc nodeCase, clientset clientset.Interface) testutils.TestNodePreparer {
  166. var nodeStrategy testutils.PrepareNodeStrategy = &testutils.TrivialNodePrepareStrategy{}
  167. if nc.NodeAllocatableStrategy != nil {
  168. nodeStrategy = nc.NodeAllocatableStrategy
  169. } else if nc.LabelNodePrepareStrategy != nil {
  170. nodeStrategy = nc.LabelNodePrepareStrategy
  171. } else if nc.UniqueNodeLabelStrategy != nil {
  172. nodeStrategy = nc.UniqueNodeLabelStrategy
  173. }
  174. if nc.NodeTemplatePath != nil {
  175. return framework.NewIntegrationTestNodePreparerWithNodeSpec(
  176. clientset,
  177. []testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}},
  178. getNodeSpecFromFile(nc.NodeTemplatePath),
  179. )
  180. }
  181. return framework.NewIntegrationTestNodePreparer(
  182. clientset,
  183. []testutils.CountToStrategy{{Count: nc.Num, Strategy: nodeStrategy}},
  184. "scheduler-perf-",
  185. )
  186. }
  187. func createPods(ns string, pc podCase, clientset clientset.Interface) {
  188. strategy := getPodStrategy(pc)
  189. config := testutils.NewTestPodCreatorConfig()
  190. config.AddStrategy(ns, pc.Num, strategy)
  191. podCreator := testutils.NewTestPodCreator(clientset, config)
  192. podCreator.CreatePods()
  193. }
  194. func getPodStrategy(pc podCase) testutils.TestPodCreateStrategy {
  195. basePod := makeBasePod()
  196. if pc.PodTemplatePath != nil {
  197. basePod = getPodSpecFromFile(pc.PodTemplatePath)
  198. }
  199. if pc.PersistentVolumeClaimTemplatePath == nil {
  200. return testutils.NewCustomCreatePodStrategy(basePod)
  201. }
  202. pvTemplate := getPersistentVolumeSpecFromFile(pc.PersistentVolumeTemplatePath)
  203. pvcTemplate := getPersistentVolumeClaimSpecFromFile(pc.PersistentVolumeClaimTemplatePath)
  204. return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod)
  205. }
  206. func getSimpleTestCases(path string) []testCase {
  207. var simpleTests []simpleTestCases
  208. getSpecFromFile(&path, &simpleTests)
  209. testCases := make([]testCase, 0)
  210. for _, s := range simpleTests {
  211. testCase := s.Template
  212. for _, p := range s.Params {
  213. testCase.Nodes.Num = p.NumNodes
  214. testCase.InitPods.Num = p.NumInitPods
  215. testCase.PodsToSchedule.Num = p.NumPodsToSchedule
  216. testCases = append(testCases, testCase)
  217. }
  218. }
  219. return testCases
  220. }
  221. func getNodeSpecFromFile(path *string) *v1.Node {
  222. nodeSpec := &v1.Node{}
  223. getSpecFromFile(path, nodeSpec)
  224. return nodeSpec
  225. }
  226. func getPodSpecFromFile(path *string) *v1.Pod {
  227. podSpec := &v1.Pod{}
  228. getSpecFromFile(path, podSpec)
  229. return podSpec
  230. }
  231. func getPersistentVolumeSpecFromFile(path *string) *v1.PersistentVolume {
  232. persistentVolumeSpec := &v1.PersistentVolume{}
  233. getSpecFromFile(path, persistentVolumeSpec)
  234. return persistentVolumeSpec
  235. }
  236. func getPersistentVolumeClaimSpecFromFile(path *string) *v1.PersistentVolumeClaim {
  237. persistentVolumeClaimSpec := &v1.PersistentVolumeClaim{}
  238. getSpecFromFile(path, persistentVolumeClaimSpec)
  239. return persistentVolumeClaimSpec
  240. }
  241. func getSpecFromFile(path *string, spec interface{}) {
  242. bytes, err := ioutil.ReadFile(*path)
  243. if err != nil {
  244. klog.Fatalf("%v", err)
  245. }
  246. if err := yaml.Unmarshal(bytes, spec); err != nil {
  247. klog.Fatalf("%v", err)
  248. }
  249. }
  250. func getCustomVolumeFactory(pvTemplate *v1.PersistentVolume) func(id int) *v1.PersistentVolume {
  251. return func(id int) *v1.PersistentVolume {
  252. pv := pvTemplate.DeepCopy()
  253. volumeID := fmt.Sprintf("vol-%d", id)
  254. pv.ObjectMeta.Name = volumeID
  255. pvs := pv.Spec.PersistentVolumeSource
  256. if pvs.CSI != nil {
  257. pvs.CSI.VolumeHandle = volumeID
  258. } else if pvs.AWSElasticBlockStore != nil {
  259. pvs.AWSElasticBlockStore.VolumeID = volumeID
  260. }
  261. return pv
  262. }
  263. }