gc_controller_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package podgc
  14. import (
  15. "context"
  16. "sync"
  17. "testing"
  18. "time"
  19. v1 "k8s.io/api/core/v1"
  20. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  21. "k8s.io/apimachinery/pkg/labels"
  22. "k8s.io/apimachinery/pkg/util/clock"
  23. "k8s.io/apimachinery/pkg/util/sets"
  24. "k8s.io/apimachinery/pkg/util/wait"
  25. "k8s.io/client-go/informers"
  26. coreinformers "k8s.io/client-go/informers/core/v1"
  27. clientset "k8s.io/client-go/kubernetes"
  28. "k8s.io/client-go/kubernetes/fake"
  29. "k8s.io/client-go/util/workqueue"
  30. "k8s.io/kubernetes/pkg/controller"
  31. "k8s.io/kubernetes/pkg/controller/testutil"
  32. )
  33. func alwaysReady() bool { return true }
  34. func NewFromClient(kubeClient clientset.Interface, terminatedPodThreshold int) (*PodGCController, coreinformers.PodInformer, coreinformers.NodeInformer) {
  35. informerFactory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc())
  36. podInformer := informerFactory.Core().V1().Pods()
  37. nodeInformer := informerFactory.Core().V1().Nodes()
  38. controller := NewPodGC(kubeClient, podInformer, nodeInformer, terminatedPodThreshold)
  39. controller.podListerSynced = alwaysReady
  40. return controller, podInformer, nodeInformer
  41. }
  42. func compareStringSetToList(set sets.String, list []string) bool {
  43. for _, item := range list {
  44. if !set.Has(item) {
  45. return false
  46. }
  47. }
  48. if len(list) != len(set) {
  49. return false
  50. }
  51. return true
  52. }
  53. func TestGCTerminated(t *testing.T) {
  54. type nameToPhase struct {
  55. name string
  56. phase v1.PodPhase
  57. }
  58. testCases := []struct {
  59. pods []nameToPhase
  60. threshold int
  61. deletedPodNames sets.String
  62. }{
  63. {
  64. pods: []nameToPhase{
  65. {name: "a", phase: v1.PodFailed},
  66. {name: "b", phase: v1.PodSucceeded},
  67. },
  68. threshold: 0,
  69. // threshold = 0 disables terminated pod deletion
  70. deletedPodNames: sets.NewString(),
  71. },
  72. {
  73. pods: []nameToPhase{
  74. {name: "a", phase: v1.PodFailed},
  75. {name: "b", phase: v1.PodSucceeded},
  76. {name: "c", phase: v1.PodFailed},
  77. },
  78. threshold: 1,
  79. deletedPodNames: sets.NewString("a", "b"),
  80. },
  81. {
  82. pods: []nameToPhase{
  83. {name: "a", phase: v1.PodRunning},
  84. {name: "b", phase: v1.PodSucceeded},
  85. {name: "c", phase: v1.PodFailed},
  86. },
  87. threshold: 1,
  88. deletedPodNames: sets.NewString("b"),
  89. },
  90. {
  91. pods: []nameToPhase{
  92. {name: "a", phase: v1.PodFailed},
  93. {name: "b", phase: v1.PodSucceeded},
  94. },
  95. threshold: 1,
  96. deletedPodNames: sets.NewString("a"),
  97. },
  98. {
  99. pods: []nameToPhase{
  100. {name: "a", phase: v1.PodFailed},
  101. {name: "b", phase: v1.PodSucceeded},
  102. },
  103. threshold: 5,
  104. deletedPodNames: sets.NewString(),
  105. },
  106. }
  107. for i, test := range testCases {
  108. client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node")}})
  109. gcc, podInformer, _ := NewFromClient(client, test.threshold)
  110. deletedPodNames := make([]string, 0)
  111. var lock sync.Mutex
  112. gcc.deletePod = func(_, name string) error {
  113. lock.Lock()
  114. defer lock.Unlock()
  115. deletedPodNames = append(deletedPodNames, name)
  116. return nil
  117. }
  118. creationTime := time.Unix(0, 0)
  119. for _, pod := range test.pods {
  120. creationTime = creationTime.Add(1 * time.Hour)
  121. podInformer.Informer().GetStore().Add(&v1.Pod{
  122. ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime}},
  123. Status: v1.PodStatus{Phase: pod.phase},
  124. Spec: v1.PodSpec{NodeName: "node"},
  125. })
  126. }
  127. gcc.gc()
  128. if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
  129. t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
  130. i, test.deletedPodNames.List(), deletedPodNames)
  131. }
  132. }
  133. }
  134. func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod {
  135. return &v1.Pod{
  136. ObjectMeta: metav1.ObjectMeta{
  137. Name: name,
  138. },
  139. Spec: v1.PodSpec{NodeName: nodeName},
  140. Status: v1.PodStatus{Phase: phase},
  141. }
  142. }
  143. func waitForAdded(q workqueue.DelayingInterface, depth int) error {
  144. return wait.Poll(1*time.Millisecond, 10*time.Second, func() (done bool, err error) {
  145. if q.Len() == depth {
  146. return true, nil
  147. }
  148. return false, nil
  149. })
  150. }
  151. func TestGCOrphaned(t *testing.T) {
  152. testCases := []struct {
  153. name string
  154. initialClientNodes []*v1.Node
  155. initialInformerNodes []*v1.Node
  156. delay time.Duration
  157. addedClientNodes []*v1.Node
  158. deletedClientNodes []*v1.Node
  159. addedInformerNodes []*v1.Node
  160. deletedInformerNodes []*v1.Node
  161. pods []*v1.Pod
  162. itemsInQueue int
  163. deletedPodNames sets.String
  164. }{
  165. {
  166. name: "nodes present in lister",
  167. initialInformerNodes: []*v1.Node{
  168. testutil.NewNode("existing1"),
  169. testutil.NewNode("existing2"),
  170. },
  171. delay: 2 * quarantineTime,
  172. pods: []*v1.Pod{
  173. makePod("a", "existing1", v1.PodRunning),
  174. makePod("b", "existing2", v1.PodFailed),
  175. makePod("c", "existing2", v1.PodSucceeded),
  176. },
  177. itemsInQueue: 0,
  178. deletedPodNames: sets.NewString(),
  179. },
  180. {
  181. name: "nodes present in client",
  182. initialClientNodes: []*v1.Node{
  183. testutil.NewNode("existing1"),
  184. testutil.NewNode("existing2"),
  185. },
  186. delay: 2 * quarantineTime,
  187. pods: []*v1.Pod{
  188. makePod("a", "existing1", v1.PodRunning),
  189. makePod("b", "existing2", v1.PodFailed),
  190. makePod("c", "existing2", v1.PodSucceeded),
  191. },
  192. itemsInQueue: 2,
  193. deletedPodNames: sets.NewString(),
  194. },
  195. {
  196. name: "no nodes",
  197. delay: 2 * quarantineTime,
  198. pods: []*v1.Pod{
  199. makePod("a", "deleted", v1.PodFailed),
  200. makePod("b", "deleted", v1.PodSucceeded),
  201. },
  202. itemsInQueue: 1,
  203. deletedPodNames: sets.NewString("a", "b"),
  204. },
  205. {
  206. name: "quarantine not finished",
  207. delay: quarantineTime / 2,
  208. pods: []*v1.Pod{
  209. makePod("a", "deleted", v1.PodFailed),
  210. },
  211. itemsInQueue: 0,
  212. deletedPodNames: sets.NewString(),
  213. },
  214. {
  215. name: "wrong nodes",
  216. initialInformerNodes: []*v1.Node{testutil.NewNode("existing")},
  217. delay: 2 * quarantineTime,
  218. pods: []*v1.Pod{
  219. makePod("a", "deleted", v1.PodRunning),
  220. },
  221. itemsInQueue: 1,
  222. deletedPodNames: sets.NewString("a"),
  223. },
  224. {
  225. name: "some nodes missing",
  226. initialInformerNodes: []*v1.Node{testutil.NewNode("existing")},
  227. delay: 2 * quarantineTime,
  228. pods: []*v1.Pod{
  229. makePod("a", "deleted", v1.PodFailed),
  230. makePod("b", "existing", v1.PodFailed),
  231. makePod("c", "deleted", v1.PodSucceeded),
  232. makePod("d", "deleted", v1.PodRunning),
  233. },
  234. itemsInQueue: 1,
  235. deletedPodNames: sets.NewString("a", "c", "d"),
  236. },
  237. {
  238. name: "node added to client after quarantine",
  239. delay: 2 * quarantineTime,
  240. addedClientNodes: []*v1.Node{testutil.NewNode("node")},
  241. pods: []*v1.Pod{
  242. makePod("a", "node", v1.PodRunning),
  243. },
  244. itemsInQueue: 1,
  245. deletedPodNames: sets.NewString(),
  246. },
  247. {
  248. name: "node added to informer after quarantine",
  249. delay: 2 * quarantineTime,
  250. addedInformerNodes: []*v1.Node{testutil.NewNode("node")},
  251. pods: []*v1.Pod{
  252. makePod("a", "node", v1.PodFailed),
  253. },
  254. itemsInQueue: 1,
  255. deletedPodNames: sets.NewString(),
  256. },
  257. {
  258. // It shouldn't happen that client will be lagging behind informer.
  259. // This test case is more a sanity check.
  260. name: "node deleted from client after quarantine",
  261. initialClientNodes: []*v1.Node{testutil.NewNode("node")},
  262. delay: 2 * quarantineTime,
  263. deletedClientNodes: []*v1.Node{testutil.NewNode("node")},
  264. pods: []*v1.Pod{
  265. makePod("a", "node", v1.PodFailed),
  266. },
  267. itemsInQueue: 1,
  268. deletedPodNames: sets.NewString("a"),
  269. },
  270. {
  271. name: "node deleted from informer after quarantine",
  272. initialInformerNodes: []*v1.Node{testutil.NewNode("node")},
  273. delay: 2 * quarantineTime,
  274. deletedInformerNodes: []*v1.Node{testutil.NewNode("node")},
  275. pods: []*v1.Pod{
  276. makePod("a", "node", v1.PodSucceeded),
  277. },
  278. itemsInQueue: 0,
  279. deletedPodNames: sets.NewString(),
  280. },
  281. }
  282. for _, test := range testCases {
  283. t.Run(test.name, func(t *testing.T) {
  284. nodeList := &v1.NodeList{}
  285. for _, node := range test.initialClientNodes {
  286. nodeList.Items = append(nodeList.Items, *node)
  287. }
  288. client := fake.NewSimpleClientset(nodeList)
  289. gcc, podInformer, nodeInformer := NewFromClient(client, -1)
  290. for _, node := range test.initialInformerNodes {
  291. nodeInformer.Informer().GetStore().Add(node)
  292. }
  293. for _, pod := range test.pods {
  294. podInformer.Informer().GetStore().Add(pod)
  295. }
  296. // Overwrite queue
  297. fakeClock := clock.NewFakeClock(time.Now())
  298. gcc.nodeQueue.ShutDown()
  299. gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
  300. deletedPodNames := make([]string, 0)
  301. var lock sync.Mutex
  302. gcc.deletePod = func(_, name string) error {
  303. lock.Lock()
  304. defer lock.Unlock()
  305. deletedPodNames = append(deletedPodNames, name)
  306. return nil
  307. }
  308. // First GC of orphaned pods
  309. gcc.gc()
  310. if len(deletedPodNames) > 0 {
  311. t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
  312. }
  313. // Move clock forward
  314. fakeClock.Step(test.delay)
  315. // Wait for queue goroutine to process items
  316. if test.itemsInQueue > 0 {
  317. err := waitForAdded(gcc.nodeQueue, test.itemsInQueue)
  318. if err != nil {
  319. t.Errorf("wrong number of items in the node queue.\n\texpected: %v\n\tactual: %v",
  320. test.itemsInQueue, gcc.nodeQueue.Len())
  321. }
  322. }
  323. // Execute planned nodes changes
  324. for _, node := range test.addedClientNodes {
  325. client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
  326. }
  327. for _, node := range test.deletedClientNodes {
  328. client.CoreV1().Nodes().Delete(context.TODO(), node.Name, &metav1.DeleteOptions{})
  329. }
  330. for _, node := range test.addedInformerNodes {
  331. nodeInformer.Informer().GetStore().Add(node)
  332. }
  333. for _, node := range test.deletedInformerNodes {
  334. nodeInformer.Informer().GetStore().Delete(node)
  335. }
  336. // Actual pod deletion
  337. gcc.gc()
  338. if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
  339. t.Errorf("pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
  340. test.deletedPodNames.List(), deletedPodNames)
  341. }
  342. })
  343. }
  344. }
  345. func TestGCUnscheduledTerminating(t *testing.T) {
  346. type nameToPhase struct {
  347. name string
  348. phase v1.PodPhase
  349. deletionTimeStamp *metav1.Time
  350. nodeName string
  351. }
  352. testCases := []struct {
  353. name string
  354. pods []nameToPhase
  355. deletedPodNames sets.String
  356. }{
  357. {
  358. name: "Unscheduled pod in any phase must be deleted",
  359. pods: []nameToPhase{
  360. {name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
  361. {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
  362. {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
  363. },
  364. deletedPodNames: sets.NewString("a", "b", "c"),
  365. },
  366. {
  367. name: "Scheduled pod in any phase must not be deleted",
  368. pods: []nameToPhase{
  369. {name: "a", phase: v1.PodFailed, deletionTimeStamp: nil, nodeName: ""},
  370. {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: nil, nodeName: "node"},
  371. {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "node"},
  372. },
  373. deletedPodNames: sets.NewString(),
  374. },
  375. }
  376. for i, test := range testCases {
  377. client := fake.NewSimpleClientset()
  378. gcc, podInformer, _ := NewFromClient(client, -1)
  379. deletedPodNames := make([]string, 0)
  380. var lock sync.Mutex
  381. gcc.deletePod = func(_, name string) error {
  382. lock.Lock()
  383. defer lock.Unlock()
  384. deletedPodNames = append(deletedPodNames, name)
  385. return nil
  386. }
  387. creationTime := time.Unix(0, 0)
  388. for _, pod := range test.pods {
  389. creationTime = creationTime.Add(1 * time.Hour)
  390. podInformer.Informer().GetStore().Add(&v1.Pod{
  391. ObjectMeta: metav1.ObjectMeta{Name: pod.name, CreationTimestamp: metav1.Time{Time: creationTime},
  392. DeletionTimestamp: pod.deletionTimeStamp},
  393. Status: v1.PodStatus{Phase: pod.phase},
  394. Spec: v1.PodSpec{NodeName: pod.nodeName},
  395. })
  396. }
  397. pods, err := podInformer.Lister().List(labels.Everything())
  398. if err != nil {
  399. t.Errorf("Error while listing all Pods: %v", err)
  400. return
  401. }
  402. gcc.gcUnscheduledTerminating(pods)
  403. if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
  404. t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v",
  405. i, test.deletedPodNames.List(), deletedPodNames, test.name)
  406. }
  407. }
  408. }