resource.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package pod
  14. import (
  15. "context"
  16. "fmt"
  17. "strconv"
  18. "strings"
  19. "time"
  20. "github.com/onsi/ginkgo"
  21. "github.com/onsi/gomega"
  22. v1 "k8s.io/api/core/v1"
  23. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  24. "k8s.io/apimachinery/pkg/labels"
  25. "k8s.io/apimachinery/pkg/types"
  26. "k8s.io/apimachinery/pkg/util/wait"
  27. clientset "k8s.io/client-go/kubernetes"
  28. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  29. "k8s.io/kubernetes/pkg/client/conditions"
  30. kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
  31. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  32. testutils "k8s.io/kubernetes/test/utils"
  33. imageutils "k8s.io/kubernetes/test/utils/image"
  34. )
  35. // TODO: Move to its own subpkg.
  36. // expectNoError checks if "err" is set, and if so, fails assertion while logging the error.
  37. func expectNoError(err error, explain ...interface{}) {
  38. expectNoErrorWithOffset(1, err, explain...)
  39. }
  40. // TODO: Move to its own subpkg.
  41. // expectNoErrorWithOffset checks if "err" is set, and if so, fails assertion while logging the error at "offset" levels above its caller
  42. // (for example, for call chain f -> g -> expectNoErrorWithOffset(1, ...) error would be logged for "f").
  43. func expectNoErrorWithOffset(offset int, err error, explain ...interface{}) {
  44. if err != nil {
  45. e2elog.Logf("Unexpected error occurred: %v", err)
  46. }
  47. gomega.ExpectWithOffset(1+offset, err).NotTo(gomega.HaveOccurred(), explain...)
  48. }
  49. func isElementOf(podUID types.UID, pods *v1.PodList) bool {
  50. for _, pod := range pods.Items {
  51. if pod.UID == podUID {
  52. return true
  53. }
  54. }
  55. return false
  56. }
  57. // ProxyResponseChecker is a context for checking pods responses by issuing GETs to them (via the API
  58. // proxy) and verifying that they answer with their own pod name.
  59. type ProxyResponseChecker struct {
  60. c clientset.Interface
  61. ns string
  62. label labels.Selector
  63. controllerName string
  64. respondName bool // Whether the pod should respond with its own name.
  65. pods *v1.PodList
  66. }
  67. // NewProxyResponseChecker returns a context for checking pods responses.
  68. func NewProxyResponseChecker(c clientset.Interface, ns string, label labels.Selector, controllerName string, respondName bool, pods *v1.PodList) ProxyResponseChecker {
  69. return ProxyResponseChecker{c, ns, label, controllerName, respondName, pods}
  70. }
  71. // CheckAllResponses issues GETs to all pods in the context and verify they
  72. // reply with their own pod name.
  73. func (r ProxyResponseChecker) CheckAllResponses() (done bool, err error) {
  74. successes := 0
  75. options := metav1.ListOptions{LabelSelector: r.label.String()}
  76. currentPods, err := r.c.CoreV1().Pods(r.ns).List(context.TODO(), options)
  77. expectNoError(err, "Failed to get list of currentPods in namespace: %s", r.ns)
  78. for i, pod := range r.pods.Items {
  79. // Check that the replica list remains unchanged, otherwise we have problems.
  80. if !isElementOf(pod.UID, currentPods) {
  81. return false, fmt.Errorf("pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason. Current replica set: %v", pod.UID, currentPods)
  82. }
  83. ctx, cancel := context.WithTimeout(context.Background(), singleCallTimeout)
  84. defer cancel()
  85. body, err := r.c.CoreV1().RESTClient().Get().
  86. Namespace(r.ns).
  87. Resource("pods").
  88. SubResource("proxy").
  89. Name(string(pod.Name)).
  90. Do(ctx).
  91. Raw()
  92. if err != nil {
  93. if ctx.Err() != nil {
  94. // We may encounter errors here because of a race between the pod readiness and apiserver
  95. // proxy. So, we log the error and retry if this occurs.
  96. e2elog.Logf("Controller %s: Failed to Get from replica %d [%s]: %v\n pod status: %#v", r.controllerName, i+1, pod.Name, err, pod.Status)
  97. return false, nil
  98. }
  99. e2elog.Logf("Controller %s: Failed to GET from replica %d [%s]: %v\npod status: %#v", r.controllerName, i+1, pod.Name, err, pod.Status)
  100. continue
  101. }
  102. // The response checker expects the pod's name unless !respondName, in
  103. // which case it just checks for a non-empty response.
  104. got := string(body)
  105. what := ""
  106. if r.respondName {
  107. what = "expected"
  108. want := pod.Name
  109. if got != want {
  110. e2elog.Logf("Controller %s: Replica %d [%s] expected response %q but got %q",
  111. r.controllerName, i+1, pod.Name, want, got)
  112. continue
  113. }
  114. } else {
  115. what = "non-empty"
  116. if len(got) == 0 {
  117. e2elog.Logf("Controller %s: Replica %d [%s] expected non-empty response",
  118. r.controllerName, i+1, pod.Name)
  119. continue
  120. }
  121. }
  122. successes++
  123. e2elog.Logf("Controller %s: Got %s result from replica %d [%s]: %q, %d of %d required successes so far",
  124. r.controllerName, what, i+1, pod.Name, got, successes, len(r.pods.Items))
  125. }
  126. if successes < len(r.pods.Items) {
  127. return false, nil
  128. }
  129. return true, nil
  130. }
  131. func podRunning(c clientset.Interface, podName, namespace string) wait.ConditionFunc {
  132. return func() (bool, error) {
  133. pod, err := c.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{})
  134. if err != nil {
  135. return false, err
  136. }
  137. switch pod.Status.Phase {
  138. case v1.PodRunning:
  139. return true, nil
  140. case v1.PodFailed, v1.PodSucceeded:
  141. return false, conditions.ErrPodCompleted
  142. }
  143. return false, nil
  144. }
  145. }
  146. func podCompleted(c clientset.Interface, podName, namespace string) wait.ConditionFunc {
  147. return func() (bool, error) {
  148. pod, err := c.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{})
  149. if err != nil {
  150. return false, err
  151. }
  152. switch pod.Status.Phase {
  153. case v1.PodFailed, v1.PodSucceeded:
  154. return true, nil
  155. }
  156. return false, nil
  157. }
  158. }
  159. func podRunningAndReady(c clientset.Interface, podName, namespace string) wait.ConditionFunc {
  160. return func() (bool, error) {
  161. pod, err := c.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{})
  162. if err != nil {
  163. return false, err
  164. }
  165. switch pod.Status.Phase {
  166. case v1.PodFailed, v1.PodSucceeded:
  167. e2elog.Logf("The status of Pod %s is %s which is unexpected", podName, pod.Status.Phase)
  168. return false, conditions.ErrPodCompleted
  169. case v1.PodRunning:
  170. e2elog.Logf("The status of Pod %s is %s (Ready = %v)", podName, pod.Status.Phase, podutil.IsPodReady(pod))
  171. return podutil.IsPodReady(pod), nil
  172. }
  173. e2elog.Logf("The status of Pod %s is %s, waiting for it to be Running (with Ready = true)", podName, pod.Status.Phase)
  174. return false, nil
  175. }
  176. }
  177. func podNotPending(c clientset.Interface, podName, namespace string) wait.ConditionFunc {
  178. return func() (bool, error) {
  179. pod, err := c.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{})
  180. if err != nil {
  181. return false, err
  182. }
  183. switch pod.Status.Phase {
  184. case v1.PodPending:
  185. return false, nil
  186. default:
  187. return true, nil
  188. }
  189. }
  190. }
  191. // PodsCreated returns a pod list matched by the given name.
  192. func PodsCreated(c clientset.Interface, ns, name string, replicas int32) (*v1.PodList, error) {
  193. label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
  194. return PodsCreatedByLabel(c, ns, name, replicas, label)
  195. }
  196. // PodsCreatedByLabel returns a created pod list matched by the given label.
  197. func PodsCreatedByLabel(c clientset.Interface, ns, name string, replicas int32, label labels.Selector) (*v1.PodList, error) {
  198. timeout := 2 * time.Minute
  199. for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
  200. options := metav1.ListOptions{LabelSelector: label.String()}
  201. // List the pods, making sure we observe all the replicas.
  202. pods, err := c.CoreV1().Pods(ns).List(context.TODO(), options)
  203. if err != nil {
  204. return nil, err
  205. }
  206. created := []v1.Pod{}
  207. for _, pod := range pods.Items {
  208. if pod.DeletionTimestamp != nil {
  209. continue
  210. }
  211. created = append(created, pod)
  212. }
  213. e2elog.Logf("Pod name %s: Found %d pods out of %d", name, len(created), replicas)
  214. if int32(len(created)) == replicas {
  215. pods.Items = created
  216. return pods, nil
  217. }
  218. }
  219. return nil, fmt.Errorf("Pod name %s: Gave up waiting %v for %d pods to come up", name, timeout, replicas)
  220. }
  221. // VerifyPods checks if the specified pod is responding.
  222. func VerifyPods(c clientset.Interface, ns, name string, wantName bool, replicas int32) error {
  223. return podRunningMaybeResponding(c, ns, name, wantName, replicas, true)
  224. }
  225. // VerifyPodsRunning checks if the specified pod is running.
  226. func VerifyPodsRunning(c clientset.Interface, ns, name string, wantName bool, replicas int32) error {
  227. return podRunningMaybeResponding(c, ns, name, wantName, replicas, false)
  228. }
  229. func podRunningMaybeResponding(c clientset.Interface, ns, name string, wantName bool, replicas int32, checkResponding bool) error {
  230. pods, err := PodsCreated(c, ns, name, replicas)
  231. if err != nil {
  232. return err
  233. }
  234. e := podsRunning(c, pods)
  235. if len(e) > 0 {
  236. return fmt.Errorf("failed to wait for pods running: %v", e)
  237. }
  238. if checkResponding {
  239. err = PodsResponding(c, ns, name, wantName, pods)
  240. if err != nil {
  241. return fmt.Errorf("failed to wait for pods responding: %v", err)
  242. }
  243. }
  244. return nil
  245. }
  246. func podsRunning(c clientset.Interface, pods *v1.PodList) []error {
  247. // Wait for the pods to enter the running state. Waiting loops until the pods
  248. // are running so non-running pods cause a timeout for this test.
  249. ginkgo.By("ensuring each pod is running")
  250. e := []error{}
  251. errorChan := make(chan error)
  252. for _, pod := range pods.Items {
  253. go func(p v1.Pod) {
  254. errorChan <- WaitForPodRunningInNamespace(c, &p)
  255. }(pod)
  256. }
  257. for range pods.Items {
  258. err := <-errorChan
  259. if err != nil {
  260. e = append(e, err)
  261. }
  262. }
  263. return e
  264. }
  265. // LogPodStates logs basic info of provided pods for debugging.
  266. func LogPodStates(pods []v1.Pod) {
  267. // Find maximum widths for pod, node, and phase strings for column printing.
  268. maxPodW, maxNodeW, maxPhaseW, maxGraceW := len("POD"), len("NODE"), len("PHASE"), len("GRACE")
  269. for i := range pods {
  270. pod := &pods[i]
  271. if len(pod.ObjectMeta.Name) > maxPodW {
  272. maxPodW = len(pod.ObjectMeta.Name)
  273. }
  274. if len(pod.Spec.NodeName) > maxNodeW {
  275. maxNodeW = len(pod.Spec.NodeName)
  276. }
  277. if len(pod.Status.Phase) > maxPhaseW {
  278. maxPhaseW = len(pod.Status.Phase)
  279. }
  280. }
  281. // Increase widths by one to separate by a single space.
  282. maxPodW++
  283. maxNodeW++
  284. maxPhaseW++
  285. maxGraceW++
  286. // Log pod info. * does space padding, - makes them left-aligned.
  287. e2elog.Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
  288. maxPodW, "POD", maxNodeW, "NODE", maxPhaseW, "PHASE", maxGraceW, "GRACE", "CONDITIONS")
  289. for _, pod := range pods {
  290. grace := ""
  291. if pod.DeletionGracePeriodSeconds != nil {
  292. grace = fmt.Sprintf("%ds", *pod.DeletionGracePeriodSeconds)
  293. }
  294. e2elog.Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
  295. maxPodW, pod.ObjectMeta.Name, maxNodeW, pod.Spec.NodeName, maxPhaseW, pod.Status.Phase, maxGraceW, grace, pod.Status.Conditions)
  296. }
  297. e2elog.Logf("") // Final empty line helps for readability.
  298. }
  299. // logPodTerminationMessages logs termination messages for failing pods. It's a short snippet (much smaller than full logs), but it often shows
  300. // why pods crashed and since it is in the API, it's fast to retrieve.
  301. func logPodTerminationMessages(pods []v1.Pod) {
  302. for _, pod := range pods {
  303. for _, status := range pod.Status.InitContainerStatuses {
  304. if status.LastTerminationState.Terminated != nil && len(status.LastTerminationState.Terminated.Message) > 0 {
  305. e2elog.Logf("%s[%s].initContainer[%s]=%s", pod.Name, pod.Namespace, status.Name, status.LastTerminationState.Terminated.Message)
  306. }
  307. }
  308. for _, status := range pod.Status.ContainerStatuses {
  309. if status.LastTerminationState.Terminated != nil && len(status.LastTerminationState.Terminated.Message) > 0 {
  310. e2elog.Logf("%s[%s].container[%s]=%s", pod.Name, pod.Namespace, status.Name, status.LastTerminationState.Terminated.Message)
  311. }
  312. }
  313. }
  314. }
  315. // DumpAllPodInfoForNamespace logs all pod information for a given namespace.
  316. func DumpAllPodInfoForNamespace(c clientset.Interface, namespace string) {
  317. pods, err := c.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{})
  318. if err != nil {
  319. e2elog.Logf("unable to fetch pod debug info: %v", err)
  320. }
  321. LogPodStates(pods.Items)
  322. logPodTerminationMessages(pods.Items)
  323. }
  324. // FilterNonRestartablePods filters out pods that will never get recreated if
  325. // deleted after termination.
  326. func FilterNonRestartablePods(pods []*v1.Pod) []*v1.Pod {
  327. var results []*v1.Pod
  328. for _, p := range pods {
  329. if isNotRestartAlwaysMirrorPod(p) {
  330. // Mirror pods with restart policy == Never will not get
  331. // recreated if they are deleted after the pods have
  332. // terminated. For now, we discount such pods.
  333. // https://github.com/kubernetes/kubernetes/issues/34003
  334. continue
  335. }
  336. results = append(results, p)
  337. }
  338. return results
  339. }
  340. func isNotRestartAlwaysMirrorPod(p *v1.Pod) bool {
  341. if !kubetypes.IsMirrorPod(p) {
  342. return false
  343. }
  344. return p.Spec.RestartPolicy != v1.RestartPolicyAlways
  345. }
  346. // NewExecPodSpec returns the pod spec of hostexec pod
  347. func NewExecPodSpec(ns, name string, hostNetwork bool) *v1.Pod {
  348. immediate := int64(0)
  349. pod := &v1.Pod{
  350. ObjectMeta: metav1.ObjectMeta{
  351. Name: name,
  352. Namespace: ns,
  353. },
  354. Spec: v1.PodSpec{
  355. Containers: []v1.Container{
  356. {
  357. Name: "agnhost",
  358. Image: imageutils.GetE2EImage(imageutils.Agnhost),
  359. ImagePullPolicy: v1.PullIfNotPresent,
  360. },
  361. },
  362. HostNetwork: hostNetwork,
  363. SecurityContext: &v1.PodSecurityContext{},
  364. TerminationGracePeriodSeconds: &immediate,
  365. },
  366. }
  367. return pod
  368. }
  369. // newExecPodSpec returns the pod spec of exec pod
  370. func newExecPodSpec(ns, generateName string) *v1.Pod {
  371. immediate := int64(0)
  372. pod := &v1.Pod{
  373. ObjectMeta: metav1.ObjectMeta{
  374. GenerateName: generateName,
  375. Namespace: ns,
  376. },
  377. Spec: v1.PodSpec{
  378. TerminationGracePeriodSeconds: &immediate,
  379. Containers: []v1.Container{
  380. {
  381. Name: "agnhost-pause",
  382. Image: imageutils.GetE2EImage(imageutils.Agnhost),
  383. Args: []string{"pause"},
  384. },
  385. },
  386. },
  387. }
  388. return pod
  389. }
  390. // CreateExecPodOrFail creates a agnhost pause pod used as a vessel for kubectl exec commands.
  391. // Pod name is uniquely generated.
  392. func CreateExecPodOrFail(client clientset.Interface, ns, generateName string, tweak func(*v1.Pod)) *v1.Pod {
  393. e2elog.Logf("Creating new exec pod")
  394. pod := newExecPodSpec(ns, generateName)
  395. if tweak != nil {
  396. tweak(pod)
  397. }
  398. execPod, err := client.CoreV1().Pods(ns).Create(context.TODO(), pod, metav1.CreateOptions{})
  399. expectNoError(err, "failed to create new exec pod in namespace: %s", ns)
  400. err = wait.PollImmediate(poll, 5*time.Minute, func() (bool, error) {
  401. retrievedPod, err := client.CoreV1().Pods(execPod.Namespace).Get(context.TODO(), execPod.Name, metav1.GetOptions{})
  402. if err != nil {
  403. if testutils.IsRetryableAPIError(err) {
  404. return false, nil
  405. }
  406. return false, err
  407. }
  408. return retrievedPod.Status.Phase == v1.PodRunning, nil
  409. })
  410. expectNoError(err)
  411. return execPod
  412. }
  413. // CheckPodsRunningReady returns whether all pods whose names are listed in
  414. // podNames in namespace ns are running and ready, using c and waiting at most
  415. // timeout.
  416. func CheckPodsRunningReady(c clientset.Interface, ns string, podNames []string, timeout time.Duration) bool {
  417. return checkPodsCondition(c, ns, podNames, timeout, testutils.PodRunningReady, "running and ready")
  418. }
  419. // CheckPodsRunningReadyOrSucceeded returns whether all pods whose names are
  420. // listed in podNames in namespace ns are running and ready, or succeeded; use
  421. // c and waiting at most timeout.
  422. func CheckPodsRunningReadyOrSucceeded(c clientset.Interface, ns string, podNames []string, timeout time.Duration) bool {
  423. return checkPodsCondition(c, ns, podNames, timeout, testutils.PodRunningReadyOrSucceeded, "running and ready, or succeeded")
  424. }
  425. // checkPodsCondition returns whether all pods whose names are listed in podNames
  426. // in namespace ns are in the condition, using c and waiting at most timeout.
  427. func checkPodsCondition(c clientset.Interface, ns string, podNames []string, timeout time.Duration, condition podCondition, desc string) bool {
  428. np := len(podNames)
  429. e2elog.Logf("Waiting up to %v for %d pods to be %s: %s", timeout, np, desc, podNames)
  430. type waitPodResult struct {
  431. success bool
  432. podName string
  433. }
  434. result := make(chan waitPodResult, len(podNames))
  435. for _, podName := range podNames {
  436. // Launch off pod readiness checkers.
  437. go func(name string) {
  438. err := WaitForPodCondition(c, ns, name, desc, timeout, condition)
  439. result <- waitPodResult{err == nil, name}
  440. }(podName)
  441. }
  442. // Wait for them all to finish.
  443. success := true
  444. for range podNames {
  445. res := <-result
  446. if !res.success {
  447. e2elog.Logf("Pod %[1]s failed to be %[2]s.", res.podName, desc)
  448. success = false
  449. }
  450. }
  451. e2elog.Logf("Wanted all %d pods to be %s. Result: %t. Pods: %v", np, desc, success, podNames)
  452. return success
  453. }
  454. // GetPodLogs returns the logs of the specified container (namespace/pod/container).
  455. func GetPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
  456. return getPodLogsInternal(c, namespace, podName, containerName, false)
  457. }
  458. // GetPreviousPodLogs returns the logs of the previous instance of the
  459. // specified container (namespace/pod/container).
  460. func GetPreviousPodLogs(c clientset.Interface, namespace, podName, containerName string) (string, error) {
  461. return getPodLogsInternal(c, namespace, podName, containerName, true)
  462. }
  463. // utility function for gomega Eventually
  464. func getPodLogsInternal(c clientset.Interface, namespace, podName, containerName string, previous bool) (string, error) {
  465. logs, err := c.CoreV1().RESTClient().Get().
  466. Resource("pods").
  467. Namespace(namespace).
  468. Name(podName).SubResource("log").
  469. Param("container", containerName).
  470. Param("previous", strconv.FormatBool(previous)).
  471. Do(context.TODO()).
  472. Raw()
  473. if err != nil {
  474. return "", err
  475. }
  476. if strings.Contains(string(logs), "Internal Error") {
  477. return "", fmt.Errorf("Fetched log contains \"Internal Error\": %q", string(logs))
  478. }
  479. return string(logs), err
  480. }
  481. // GetPodsInNamespace returns the pods in the given namespace.
  482. func GetPodsInNamespace(c clientset.Interface, ns string, ignoreLabels map[string]string) ([]*v1.Pod, error) {
  483. pods, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
  484. if err != nil {
  485. return []*v1.Pod{}, err
  486. }
  487. ignoreSelector := labels.SelectorFromSet(ignoreLabels)
  488. var filtered []*v1.Pod
  489. for i := range pods.Items {
  490. p := pods.Items[i]
  491. if len(ignoreLabels) != 0 && ignoreSelector.Matches(labels.Set(p.Labels)) {
  492. continue
  493. }
  494. filtered = append(filtered, &p)
  495. }
  496. return filtered, nil
  497. }