garbage_collector_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2enode
  14. import (
  15. "context"
  16. "fmt"
  17. "strconv"
  18. "time"
  19. "k8s.io/api/core/v1"
  20. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  21. internalapi "k8s.io/cri-api/pkg/apis"
  22. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  23. "k8s.io/kubernetes/pkg/kubelet/types"
  24. "k8s.io/kubernetes/test/e2e/framework"
  25. "github.com/onsi/ginkgo"
  26. "github.com/onsi/gomega"
  27. )
  28. const (
  29. //TODO (dashpole): Once dynamic config is possible, test different values for maxPerPodContainer and maxContainers
  30. // Currently using default values for maxPerPodContainer and maxTotalContainers
  31. maxPerPodContainer = 1
  32. maxTotalContainers = -1
  33. garbageCollectDuration = 3 * time.Minute
  34. setupDuration = 10 * time.Minute
  35. runtimePollInterval = 10 * time.Second
  36. )
  37. type testPodSpec struct {
  38. podName string
  39. // containerPrefix must be unique for each pod, and cannot end in a number.
  40. // containerPrefix is used to identify which containers belong to which pod in the test.
  41. containerPrefix string
  42. // the number of times each container should restart
  43. restartCount int32
  44. // the number of containers in the test pod
  45. numContainers int
  46. // a function that returns the number of containers currently on the node (including dead containers).
  47. getContainerNames func() ([]string, error)
  48. }
  49. func (pod *testPodSpec) getContainerName(containerNumber int) string {
  50. return fmt.Sprintf("%s%d", pod.containerPrefix, containerNumber)
  51. }
  52. type testRun struct {
  53. // Name for logging purposes
  54. testName string
  55. // Pod specs for the test
  56. testPods []*testPodSpec
  57. }
  58. // GarbageCollect tests that the Kubelet conforms to the Kubelet Garbage Collection Policy, found here:
  59. // http://kubernetes.io/docs/admin/garbage-collection/
  60. var _ = framework.KubeDescribe("GarbageCollect [Serial][NodeFeature:GarbageCollect]", func() {
  61. f := framework.NewDefaultFramework("garbage-collect-test")
  62. containerNamePrefix := "gc-test-container-"
  63. podNamePrefix := "gc-test-pod-"
  64. // These suffixes are appended to pod and container names.
  65. // They differentiate pods from one another, and allow filtering
  66. // by names to identify which containers belong to which pods
  67. // They must be unique, and must not end in a number
  68. firstSuffix := "one-container-no-restarts"
  69. secondSuffix := "many-containers-many-restarts-one-pod"
  70. thirdSuffix := "many-containers-many-restarts-"
  71. tests := []testRun{
  72. {
  73. testName: "One Non-restarting Container",
  74. testPods: []*testPodSpec{
  75. {
  76. podName: podNamePrefix + firstSuffix,
  77. containerPrefix: containerNamePrefix + firstSuffix,
  78. restartCount: 0,
  79. numContainers: 1,
  80. },
  81. },
  82. },
  83. {
  84. testName: "Many Restarting Containers",
  85. testPods: []*testPodSpec{
  86. {
  87. podName: podNamePrefix + secondSuffix,
  88. containerPrefix: containerNamePrefix + secondSuffix,
  89. restartCount: 4,
  90. numContainers: 4,
  91. },
  92. },
  93. },
  94. {
  95. testName: "Many Pods with Many Restarting Containers",
  96. testPods: []*testPodSpec{
  97. {
  98. podName: podNamePrefix + thirdSuffix + "one",
  99. containerPrefix: containerNamePrefix + thirdSuffix + "one",
  100. restartCount: 3,
  101. numContainers: 4,
  102. },
  103. {
  104. podName: podNamePrefix + thirdSuffix + "two",
  105. containerPrefix: containerNamePrefix + thirdSuffix + "two",
  106. restartCount: 2,
  107. numContainers: 6,
  108. },
  109. {
  110. podName: podNamePrefix + thirdSuffix + "three",
  111. containerPrefix: containerNamePrefix + thirdSuffix + "three",
  112. restartCount: 3,
  113. numContainers: 5,
  114. },
  115. },
  116. },
  117. }
  118. for _, test := range tests {
  119. containerGCTest(f, test)
  120. }
  121. })
  122. // Tests the following:
  123. // pods are created, and all containers restart the specified number of times
  124. // while containers are running, the number of copies of a single container does not exceed maxPerPodContainer
  125. // while containers are running, the total number of containers does not exceed maxTotalContainers
  126. // while containers are running, if not constrained by maxPerPodContainer or maxTotalContainers, keep an extra copy of each container
  127. // once pods are killed, all containers are eventually cleaned up
  128. func containerGCTest(f *framework.Framework, test testRun) {
  129. var runtime internalapi.RuntimeService
  130. ginkgo.BeforeEach(func() {
  131. var err error
  132. runtime, _, err = getCRIClient()
  133. framework.ExpectNoError(err)
  134. })
  135. for _, pod := range test.testPods {
  136. // Initialize the getContainerNames function to use CRI runtime client.
  137. pod.getContainerNames = func() ([]string, error) {
  138. relevantContainers := []string{}
  139. containers, err := runtime.ListContainers(&runtimeapi.ContainerFilter{
  140. LabelSelector: map[string]string{
  141. types.KubernetesPodNameLabel: pod.podName,
  142. types.KubernetesPodNamespaceLabel: f.Namespace.Name,
  143. },
  144. })
  145. if err != nil {
  146. return relevantContainers, err
  147. }
  148. for _, container := range containers {
  149. relevantContainers = append(relevantContainers, container.Labels[types.KubernetesContainerNameLabel])
  150. }
  151. return relevantContainers, nil
  152. }
  153. }
  154. ginkgo.Context(fmt.Sprintf("Garbage Collection Test: %s", test.testName), func() {
  155. ginkgo.BeforeEach(func() {
  156. realPods := getPods(test.testPods)
  157. f.PodClient().CreateBatch(realPods)
  158. ginkgo.By("Making sure all containers restart the specified number of times")
  159. gomega.Eventually(func() error {
  160. for _, podSpec := range test.testPods {
  161. err := verifyPodRestartCount(f, podSpec.podName, podSpec.numContainers, podSpec.restartCount)
  162. if err != nil {
  163. return err
  164. }
  165. }
  166. return nil
  167. }, setupDuration, runtimePollInterval).Should(gomega.BeNil())
  168. })
  169. ginkgo.It(fmt.Sprintf("Should eventually garbage collect containers when we exceed the number of dead containers per container"), func() {
  170. totalContainers := 0
  171. for _, pod := range test.testPods {
  172. totalContainers += pod.numContainers*2 + 1
  173. }
  174. gomega.Eventually(func() error {
  175. total := 0
  176. for _, pod := range test.testPods {
  177. containerNames, err := pod.getContainerNames()
  178. if err != nil {
  179. return err
  180. }
  181. total += len(containerNames)
  182. // Check maxPerPodContainer for each container in the pod
  183. for i := 0; i < pod.numContainers; i++ {
  184. containerCount := 0
  185. for _, containerName := range containerNames {
  186. if containerName == pod.getContainerName(i) {
  187. containerCount++
  188. }
  189. }
  190. if containerCount > maxPerPodContainer+1 {
  191. return fmt.Errorf("expected number of copies of container: %s, to be <= maxPerPodContainer: %d; list of containers: %v",
  192. pod.getContainerName(i), maxPerPodContainer, containerNames)
  193. }
  194. }
  195. }
  196. //Check maxTotalContainers. Currently, the default is -1, so this will never happen until we can configure maxTotalContainers
  197. if maxTotalContainers > 0 && totalContainers <= maxTotalContainers && total > maxTotalContainers {
  198. return fmt.Errorf("expected total number of containers: %v, to be <= maxTotalContainers: %v", total, maxTotalContainers)
  199. }
  200. return nil
  201. }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil())
  202. if maxPerPodContainer >= 2 && maxTotalContainers < 0 { // make sure constraints wouldn't make us gc old containers
  203. ginkgo.By("Making sure the kubelet consistently keeps around an extra copy of each container.")
  204. gomega.Consistently(func() error {
  205. for _, pod := range test.testPods {
  206. containerNames, err := pod.getContainerNames()
  207. if err != nil {
  208. return err
  209. }
  210. for i := 0; i < pod.numContainers; i++ {
  211. containerCount := 0
  212. for _, containerName := range containerNames {
  213. if containerName == pod.getContainerName(i) {
  214. containerCount++
  215. }
  216. }
  217. if pod.restartCount > 0 && containerCount < maxPerPodContainer+1 {
  218. return fmt.Errorf("expected pod %v to have extra copies of old containers", pod.podName)
  219. }
  220. }
  221. }
  222. return nil
  223. }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil())
  224. }
  225. })
  226. ginkgo.AfterEach(func() {
  227. for _, pod := range test.testPods {
  228. ginkgo.By(fmt.Sprintf("Deleting Pod %v", pod.podName))
  229. f.PodClient().DeleteSync(pod.podName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
  230. }
  231. ginkgo.By("Making sure all containers get cleaned up")
  232. gomega.Eventually(func() error {
  233. for _, pod := range test.testPods {
  234. containerNames, err := pod.getContainerNames()
  235. if err != nil {
  236. return err
  237. }
  238. if len(containerNames) > 0 {
  239. return fmt.Errorf("%v containers still remain", containerNames)
  240. }
  241. }
  242. return nil
  243. }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil())
  244. if ginkgo.CurrentGinkgoTestDescription().Failed && framework.TestContext.DumpLogsOnFailure {
  245. logNodeEvents(f)
  246. logPodEvents(f)
  247. }
  248. })
  249. })
  250. }
  251. func getPods(specs []*testPodSpec) (pods []*v1.Pod) {
  252. for _, spec := range specs {
  253. ginkgo.By(fmt.Sprintf("Creating %v containers with restartCount: %v", spec.numContainers, spec.restartCount))
  254. containers := []v1.Container{}
  255. for i := 0; i < spec.numContainers; i++ {
  256. containers = append(containers, v1.Container{
  257. Image: busyboxImage,
  258. Name: spec.getContainerName(i),
  259. Command: getRestartingContainerCommand("/test-empty-dir-mnt", i, spec.restartCount, ""),
  260. VolumeMounts: []v1.VolumeMount{
  261. {MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"},
  262. },
  263. })
  264. }
  265. pods = append(pods, &v1.Pod{
  266. ObjectMeta: metav1.ObjectMeta{Name: spec.podName},
  267. Spec: v1.PodSpec{
  268. RestartPolicy: v1.RestartPolicyAlways,
  269. Containers: containers,
  270. Volumes: []v1.Volume{
  271. {Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}},
  272. },
  273. },
  274. })
  275. }
  276. return
  277. }
  278. func getRestartingContainerCommand(path string, containerNum int, restarts int32, loopingCommand string) []string {
  279. return []string{
  280. "sh",
  281. "-c",
  282. fmt.Sprintf(`
  283. f=%s/countfile%s
  284. count=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})
  285. if [ $count -lt %d ]; then
  286. exit 0
  287. fi
  288. while true; do %s sleep 1; done`,
  289. path, strconv.Itoa(containerNum), restarts+1, loopingCommand),
  290. }
  291. }
  292. func verifyPodRestartCount(f *framework.Framework, podName string, expectedNumContainers int, expectedRestartCount int32) error {
  293. updatedPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), podName, metav1.GetOptions{})
  294. if err != nil {
  295. return err
  296. }
  297. if len(updatedPod.Status.ContainerStatuses) != expectedNumContainers {
  298. return fmt.Errorf("expected pod %s to have %d containers, actual: %d",
  299. updatedPod.Name, expectedNumContainers, len(updatedPod.Status.ContainerStatuses))
  300. }
  301. for _, containerStatus := range updatedPod.Status.ContainerStatuses {
  302. if containerStatus.RestartCount != expectedRestartCount {
  303. return fmt.Errorf("pod %s had container with restartcount %d. Should have been at least %d",
  304. updatedPod.Name, containerStatus.RestartCount, expectedRestartCount)
  305. }
  306. }
  307. return nil
  308. }