123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- /*
- Copyright 2017 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package stackdriver
- import (
- "fmt"
- "math"
- "time"
- "k8s.io/kubernetes/test/e2e/framework"
- e2elog "k8s.io/kubernetes/test/e2e/framework/log"
- instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
- "k8s.io/kubernetes/test/e2e/instrumentation/logging/utils"
- "github.com/onsi/ginkgo"
- )
- const (
- // maxAllowedLostFraction is the fraction of lost logs considered acceptable.
- maxAllowedLostFraction = 0.01
- // maxAllowedRestartsPerHour is the number of fluentd container restarts
- // considered acceptable. Once per hour is fine for now, as long as it
- // doesn't loose too much logs.
- maxAllowedRestartsPerHour = 1.0
- // lastPodIngestionSlack is the amount of time to wait for the last pod's
- // logs to be ingested by the logging agent.
- lastPodIngestionSlack = 5 * time.Minute
- )
- var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackdriver [Feature:StackdriverLogging] [Soak]", func() {
- f := framework.NewDefaultFramework("sd-logging-load")
- ginkgo.It("should ingest logs from applications running for a prolonged amount of time", func() {
- withLogProviderForScope(f, podsScope, func(p *sdLogProvider) {
- nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet).Items
- maxPodCount := 10
- jobDuration := 30 * time.Minute
- linesPerPodPerSecond := 100
- // TODO(instrumentation): Increase to 21 hrs
- testDuration := 3 * time.Hour
- ingestionInterval := 1 * time.Minute
- ingestionTimeout := testDuration + 30*time.Minute
- allowedRestarts := int(math.Ceil(float64(testDuration) /
- float64(time.Hour) * maxAllowedRestartsPerHour))
- podRunDelay := time.Duration(int64(jobDuration) / int64(maxPodCount))
- podRunCount := maxPodCount*(int(testDuration/jobDuration)-1) + 1
- linesPerPod := linesPerPodPerSecond * int(jobDuration.Seconds())
- // pods is a flat array of all pods to be run and to expect in Stackdriver.
- pods := []utils.FiniteLoggingPod{}
- // podsByRun is a two-dimensional array of pods, first dimension is the run
- // index, the second dimension is the node index. Since we want to create
- // an equal load on all nodes, for the same run we have one pod per node.
- podsByRun := [][]utils.FiniteLoggingPod{}
- for runIdx := 0; runIdx < podRunCount; runIdx++ {
- podsInRun := []utils.FiniteLoggingPod{}
- for nodeIdx, node := range nodes {
- podName := fmt.Sprintf("job-logs-generator-%d-%d-%d-%d", maxPodCount, linesPerPod, runIdx, nodeIdx)
- pod := utils.NewLoadLoggingPod(podName, node.Name, linesPerPod, jobDuration)
- pods = append(pods, pod)
- podsInRun = append(podsInRun, pod)
- }
- podsByRun = append(podsByRun, podsInRun)
- }
- ginkgo.By("Running short-living pods")
- go func() {
- t := time.NewTicker(podRunDelay)
- defer t.Stop()
- for runIdx := 0; runIdx < podRunCount; runIdx++ {
- // Starting one pod on each node.
- for _, pod := range podsByRun[runIdx] {
- if err := pod.Start(f); err != nil {
- e2elog.Logf("Failed to start pod: %v", err)
- }
- }
- <-t.C
- }
- }()
- checker := utils.NewFullIngestionPodLogChecker(p, maxAllowedLostFraction, pods...)
- err := utils.WaitForLogs(checker, ingestionInterval, ingestionTimeout)
- framework.ExpectNoError(err)
- utils.EnsureLoggingAgentRestartsCount(f, p.LoggingAgentName(), allowedRestarts)
- })
- })
- })
|