soak.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package stackdriver
  14. import (
  15. "fmt"
  16. "math"
  17. "time"
  18. "k8s.io/kubernetes/test/e2e/framework"
  19. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  20. instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
  21. "k8s.io/kubernetes/test/e2e/instrumentation/logging/utils"
  22. "github.com/onsi/ginkgo"
  23. )
  24. const (
  25. // maxAllowedLostFraction is the fraction of lost logs considered acceptable.
  26. maxAllowedLostFraction = 0.01
  27. // maxAllowedRestartsPerHour is the number of fluentd container restarts
  28. // considered acceptable. Once per hour is fine for now, as long as it
  29. // doesn't loose too much logs.
  30. maxAllowedRestartsPerHour = 1.0
  31. // lastPodIngestionSlack is the amount of time to wait for the last pod's
  32. // logs to be ingested by the logging agent.
  33. lastPodIngestionSlack = 5 * time.Minute
  34. )
  35. var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackdriver [Feature:StackdriverLogging] [Soak]", func() {
  36. f := framework.NewDefaultFramework("sd-logging-load")
  37. ginkgo.It("should ingest logs from applications running for a prolonged amount of time", func() {
  38. withLogProviderForScope(f, podsScope, func(p *sdLogProvider) {
  39. nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet).Items
  40. maxPodCount := 10
  41. jobDuration := 30 * time.Minute
  42. linesPerPodPerSecond := 100
  43. // TODO(instrumentation): Increase to 21 hrs
  44. testDuration := 3 * time.Hour
  45. ingestionInterval := 1 * time.Minute
  46. ingestionTimeout := testDuration + 30*time.Minute
  47. allowedRestarts := int(math.Ceil(float64(testDuration) /
  48. float64(time.Hour) * maxAllowedRestartsPerHour))
  49. podRunDelay := time.Duration(int64(jobDuration) / int64(maxPodCount))
  50. podRunCount := maxPodCount*(int(testDuration/jobDuration)-1) + 1
  51. linesPerPod := linesPerPodPerSecond * int(jobDuration.Seconds())
  52. // pods is a flat array of all pods to be run and to expect in Stackdriver.
  53. pods := []utils.FiniteLoggingPod{}
  54. // podsByRun is a two-dimensional array of pods, first dimension is the run
  55. // index, the second dimension is the node index. Since we want to create
  56. // an equal load on all nodes, for the same run we have one pod per node.
  57. podsByRun := [][]utils.FiniteLoggingPod{}
  58. for runIdx := 0; runIdx < podRunCount; runIdx++ {
  59. podsInRun := []utils.FiniteLoggingPod{}
  60. for nodeIdx, node := range nodes {
  61. podName := fmt.Sprintf("job-logs-generator-%d-%d-%d-%d", maxPodCount, linesPerPod, runIdx, nodeIdx)
  62. pod := utils.NewLoadLoggingPod(podName, node.Name, linesPerPod, jobDuration)
  63. pods = append(pods, pod)
  64. podsInRun = append(podsInRun, pod)
  65. }
  66. podsByRun = append(podsByRun, podsInRun)
  67. }
  68. ginkgo.By("Running short-living pods")
  69. go func() {
  70. t := time.NewTicker(podRunDelay)
  71. defer t.Stop()
  72. for runIdx := 0; runIdx < podRunCount; runIdx++ {
  73. // Starting one pod on each node.
  74. for _, pod := range podsByRun[runIdx] {
  75. if err := pod.Start(f); err != nil {
  76. e2elog.Logf("Failed to start pod: %v", err)
  77. }
  78. }
  79. <-t.C
  80. }
  81. }()
  82. checker := utils.NewFullIngestionPodLogChecker(p, maxAllowedLostFraction, pods...)
  83. err := utils.WaitForLogs(checker, ingestionInterval, ingestionTimeout)
  84. framework.ExpectNoError(err)
  85. utils.EnsureLoggingAgentRestartsCount(f, p.LoggingAgentName(), allowedRestarts)
  86. })
  87. })
  88. })