soak.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package stackdriver
  14. import (
  15. "fmt"
  16. "math"
  17. "time"
  18. "k8s.io/kubernetes/test/e2e/framework"
  19. e2enode "k8s.io/kubernetes/test/e2e/framework/node"
  20. instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
  21. "k8s.io/kubernetes/test/e2e/instrumentation/logging/utils"
  22. "github.com/onsi/ginkgo"
  23. )
  24. const (
  25. // maxAllowedLostFraction is the fraction of lost logs considered acceptable.
  26. maxAllowedLostFraction = 0.01
  27. // maxAllowedRestartsPerHour is the number of fluentd container restarts
  28. // considered acceptable. Once per hour is fine for now, as long as it
  29. // doesn't loose too much logs.
  30. maxAllowedRestartsPerHour = 1.0
  31. )
  32. var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackdriver [Feature:StackdriverLogging] [Soak]", func() {
  33. f := framework.NewDefaultFramework("sd-logging-load")
  34. ginkgo.It("should ingest logs from applications running for a prolonged amount of time", func() {
  35. withLogProviderForScope(f, podsScope, func(p *sdLogProvider) {
  36. nodes, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
  37. framework.ExpectNoError(err)
  38. maxPodCount := 10
  39. jobDuration := 30 * time.Minute
  40. linesPerPodPerSecond := 100
  41. // TODO(instrumentation): Increase to 21 hrs
  42. testDuration := 3 * time.Hour
  43. ingestionInterval := 1 * time.Minute
  44. ingestionTimeout := testDuration + 30*time.Minute
  45. allowedRestarts := int(math.Ceil(float64(testDuration) /
  46. float64(time.Hour) * maxAllowedRestartsPerHour))
  47. podRunDelay := time.Duration(int64(jobDuration) / int64(maxPodCount))
  48. podRunCount := maxPodCount*(int(testDuration/jobDuration)-1) + 1
  49. linesPerPod := linesPerPodPerSecond * int(jobDuration.Seconds())
  50. // pods is a flat array of all pods to be run and to expect in Stackdriver.
  51. pods := []utils.FiniteLoggingPod{}
  52. // podsByRun is a two-dimensional array of pods, first dimension is the run
  53. // index, the second dimension is the node index. Since we want to create
  54. // an equal load on all nodes, for the same run we have one pod per node.
  55. podsByRun := [][]utils.FiniteLoggingPod{}
  56. for runIdx := 0; runIdx < podRunCount; runIdx++ {
  57. podsInRun := []utils.FiniteLoggingPod{}
  58. for nodeIdx, node := range nodes.Items {
  59. podName := fmt.Sprintf("job-logs-generator-%d-%d-%d-%d", maxPodCount, linesPerPod, runIdx, nodeIdx)
  60. pod := utils.NewLoadLoggingPod(podName, node.Name, linesPerPod, jobDuration)
  61. pods = append(pods, pod)
  62. podsInRun = append(podsInRun, pod)
  63. }
  64. podsByRun = append(podsByRun, podsInRun)
  65. }
  66. ginkgo.By("Running short-living pods")
  67. go func() {
  68. t := time.NewTicker(podRunDelay)
  69. defer t.Stop()
  70. for runIdx := 0; runIdx < podRunCount; runIdx++ {
  71. // Starting one pod on each node.
  72. for _, pod := range podsByRun[runIdx] {
  73. if err := pod.Start(f); err != nil {
  74. framework.Logf("Failed to start pod: %v", err)
  75. }
  76. }
  77. <-t.C
  78. }
  79. }()
  80. checker := utils.NewFullIngestionPodLogChecker(p, maxAllowedLostFraction, pods...)
  81. err = utils.WaitForLogs(checker, ingestionInterval, ingestionTimeout)
  82. framework.ExpectNoError(err)
  83. utils.EnsureLoggingAgentRestartsCount(f, p.LoggingAgentName(), allowedRestarts)
  84. })
  85. })
  86. })