pd.go 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package storage
  14. import (
  15. "context"
  16. "fmt"
  17. "math/rand"
  18. "strings"
  19. "time"
  20. "google.golang.org/api/googleapi"
  21. "github.com/aws/aws-sdk-go/aws"
  22. "github.com/aws/aws-sdk-go/aws/session"
  23. "github.com/aws/aws-sdk-go/service/ec2"
  24. "github.com/onsi/ginkgo"
  25. "github.com/onsi/gomega"
  26. v1 "k8s.io/api/core/v1"
  27. policyv1beta1 "k8s.io/api/policy/v1beta1"
  28. "k8s.io/apimachinery/pkg/api/resource"
  29. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  30. "k8s.io/apimachinery/pkg/labels"
  31. "k8s.io/apimachinery/pkg/types"
  32. "k8s.io/apimachinery/pkg/util/uuid"
  33. "k8s.io/apimachinery/pkg/util/wait"
  34. clientset "k8s.io/client-go/kubernetes"
  35. v1core "k8s.io/client-go/kubernetes/typed/core/v1"
  36. "k8s.io/kubernetes/test/e2e/framework"
  37. e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
  38. e2enode "k8s.io/kubernetes/test/e2e/framework/node"
  39. e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
  40. "k8s.io/kubernetes/test/e2e/framework/providers/gce"
  41. e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
  42. e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
  43. "k8s.io/kubernetes/test/e2e/storage/utils"
  44. imageutils "k8s.io/kubernetes/test/utils/image"
  45. )
  46. const (
  47. gcePDDetachTimeout = 10 * time.Minute
  48. gcePDDetachPollTime = 10 * time.Second
  49. nodeStatusTimeout = 10 * time.Minute
  50. nodeStatusPollTime = 1 * time.Second
  51. podEvictTimeout = 2 * time.Minute
  52. minNodes = 2
  53. )
  54. var _ = utils.SIGDescribe("Pod Disks", func() {
  55. var (
  56. ns string
  57. cs clientset.Interface
  58. podClient v1core.PodInterface
  59. nodeClient v1core.NodeInterface
  60. host0Name types.NodeName
  61. host1Name types.NodeName
  62. nodes *v1.NodeList
  63. )
  64. f := framework.NewDefaultFramework("pod-disks")
  65. ginkgo.BeforeEach(func() {
  66. e2eskipper.SkipUnlessNodeCountIsAtLeast(minNodes)
  67. cs = f.ClientSet
  68. ns = f.Namespace.Name
  69. e2eskipper.SkipIfMultizone(cs)
  70. podClient = cs.CoreV1().Pods(ns)
  71. nodeClient = cs.CoreV1().Nodes()
  72. var err error
  73. nodes, err = e2enode.GetReadySchedulableNodes(cs)
  74. framework.ExpectNoError(err)
  75. gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">=", minNodes), fmt.Sprintf("Requires at least %d nodes", minNodes))
  76. host0Name = types.NodeName(nodes.Items[0].ObjectMeta.Name)
  77. host1Name = types.NodeName(nodes.Items[1].ObjectMeta.Name)
  78. })
  79. ginkgo.Context("schedule pods each with a PD, delete pod and verify detach [Slow]", func() {
  80. const (
  81. podDefaultGrace = "default (30s)"
  82. podImmediateGrace = "immediate (0s)"
  83. )
  84. var readOnlyMap = map[bool]string{
  85. true: "read-only",
  86. false: "RW",
  87. }
  88. type testT struct {
  89. descr string // It description
  90. readOnly bool // true means pd is read-only
  91. deleteOpt *metav1.DeleteOptions // pod delete option
  92. }
  93. tests := []testT{
  94. {
  95. descr: podImmediateGrace,
  96. readOnly: false,
  97. deleteOpt: metav1.NewDeleteOptions(0),
  98. },
  99. {
  100. descr: podDefaultGrace,
  101. readOnly: false,
  102. deleteOpt: &metav1.DeleteOptions{},
  103. },
  104. {
  105. descr: podImmediateGrace,
  106. readOnly: true,
  107. deleteOpt: metav1.NewDeleteOptions(0),
  108. },
  109. {
  110. descr: podDefaultGrace,
  111. readOnly: true,
  112. deleteOpt: &metav1.DeleteOptions{},
  113. },
  114. }
  115. for _, t := range tests {
  116. podDelOpt := t.deleteOpt
  117. readOnly := t.readOnly
  118. readOnlyTxt := readOnlyMap[readOnly]
  119. ginkgo.It(fmt.Sprintf("for %s PD with pod delete grace period of %q", readOnlyTxt, t.descr), func() {
  120. e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
  121. if readOnly {
  122. e2eskipper.SkipIfProviderIs("aws")
  123. }
  124. ginkgo.By("creating PD")
  125. diskName, err := e2epv.CreatePDWithRetry()
  126. framework.ExpectNoError(err, "Error creating PD")
  127. var fmtPod *v1.Pod
  128. if readOnly {
  129. // if all test pods are RO then need a RW pod to format pd
  130. ginkgo.By("creating RW fmt Pod to ensure PD is formatted")
  131. fmtPod = testPDPod([]string{diskName}, host0Name, false, 1)
  132. _, err = podClient.Create(context.TODO(), fmtPod, metav1.CreateOptions{})
  133. framework.ExpectNoError(err, "Failed to create fmtPod")
  134. framework.ExpectNoError(f.WaitForPodRunningSlow(fmtPod.Name))
  135. ginkgo.By("deleting the fmtPod")
  136. framework.ExpectNoError(podClient.Delete(context.TODO(), fmtPod.Name, metav1.NewDeleteOptions(0)), "Failed to delete fmtPod")
  137. framework.Logf("deleted fmtPod %q", fmtPod.Name)
  138. ginkgo.By("waiting for PD to detach")
  139. framework.ExpectNoError(waitForPDDetach(diskName, host0Name))
  140. }
  141. // prepare to create two test pods on separate nodes
  142. host0Pod := testPDPod([]string{diskName}, host0Name, readOnly, 1)
  143. host1Pod := testPDPod([]string{diskName}, host1Name, readOnly, 1)
  144. defer func() {
  145. // Teardown should do nothing unless test failed
  146. ginkgo.By("defer: cleaning up PD-RW test environment")
  147. framework.Logf("defer cleanup errors can usually be ignored")
  148. if fmtPod != nil {
  149. podClient.Delete(context.TODO(), fmtPod.Name, podDelOpt)
  150. }
  151. podClient.Delete(context.TODO(), host0Pod.Name, podDelOpt)
  152. podClient.Delete(context.TODO(), host1Pod.Name, podDelOpt)
  153. detachAndDeletePDs(diskName, []types.NodeName{host0Name, host1Name})
  154. }()
  155. ginkgo.By("creating host0Pod on node0")
  156. _, err = podClient.Create(context.TODO(), host0Pod, metav1.CreateOptions{})
  157. framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err))
  158. framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name))
  159. framework.Logf("host0Pod: %q, node0: %q", host0Pod.Name, host0Name)
  160. var containerName, testFile, testFileContents string
  161. if !readOnly {
  162. ginkgo.By("writing content to host0Pod on node0")
  163. containerName = "mycontainer"
  164. testFile = "/testpd1/tracker"
  165. testFileContents = fmt.Sprintf("%v", rand.Int())
  166. tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, ns)
  167. framework.ExpectNoError(tk.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents))
  168. framework.Logf("wrote %q to file %q in pod %q on node %q", testFileContents, testFile, host0Pod.Name, host0Name)
  169. ginkgo.By("verifying PD is present in node0's VolumeInUse list")
  170. framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* shouldExist */))
  171. ginkgo.By("deleting host0Pod") // delete this pod before creating next pod
  172. framework.ExpectNoError(podClient.Delete(context.TODO(), host0Pod.Name, podDelOpt), "Failed to delete host0Pod")
  173. framework.Logf("deleted host0Pod %q", host0Pod.Name)
  174. e2epod.WaitForPodToDisappear(cs, host0Pod.Namespace, host0Pod.Name, labels.Everything(), framework.Poll, framework.PodDeleteTimeout)
  175. framework.Logf("deleted host0Pod %q disappeared", host0Pod.Name)
  176. }
  177. ginkgo.By("creating host1Pod on node1")
  178. _, err = podClient.Create(context.TODO(), host1Pod, metav1.CreateOptions{})
  179. framework.ExpectNoError(err, "Failed to create host1Pod")
  180. framework.ExpectNoError(f.WaitForPodRunningSlow(host1Pod.Name))
  181. framework.Logf("host1Pod: %q, node1: %q", host1Pod.Name, host1Name)
  182. if readOnly {
  183. ginkgo.By("deleting host0Pod")
  184. framework.ExpectNoError(podClient.Delete(context.TODO(), host0Pod.Name, podDelOpt), "Failed to delete host0Pod")
  185. framework.Logf("deleted host0Pod %q", host0Pod.Name)
  186. } else {
  187. ginkgo.By("verifying PD contents in host1Pod")
  188. verifyPDContentsViaContainer(ns, f, host1Pod.Name, containerName, map[string]string{testFile: testFileContents})
  189. framework.Logf("verified PD contents in pod %q", host1Pod.Name)
  190. ginkgo.By("verifying PD is removed from node0")
  191. framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, false /* shouldExist */))
  192. framework.Logf("PD %q removed from node %q's VolumeInUse list", diskName, host1Pod.Name)
  193. }
  194. ginkgo.By("deleting host1Pod")
  195. framework.ExpectNoError(podClient.Delete(context.TODO(), host1Pod.Name, podDelOpt), "Failed to delete host1Pod")
  196. framework.Logf("deleted host1Pod %q", host1Pod.Name)
  197. ginkgo.By("Test completed successfully, waiting for PD to detach from both nodes")
  198. waitForPDDetach(diskName, host0Name)
  199. waitForPDDetach(diskName, host1Name)
  200. })
  201. }
  202. })
  203. ginkgo.Context("schedule a pod w/ RW PD(s) mounted to 1 or more containers, write to PD, verify content, delete pod, and repeat in rapid succession [Slow]", func() {
  204. type testT struct {
  205. numContainers int
  206. numPDs int
  207. repeatCnt int
  208. }
  209. tests := []testT{
  210. {
  211. numContainers: 4,
  212. numPDs: 1,
  213. repeatCnt: 3,
  214. },
  215. {
  216. numContainers: 1,
  217. numPDs: 2,
  218. repeatCnt: 3,
  219. },
  220. }
  221. for _, t := range tests {
  222. numPDs := t.numPDs
  223. numContainers := t.numContainers
  224. ginkgo.It(fmt.Sprintf("using %d containers and %d PDs", numContainers, numPDs), func() {
  225. e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
  226. var host0Pod *v1.Pod
  227. var err error
  228. fileAndContentToVerify := make(map[string]string)
  229. diskNames := make([]string, 0, numPDs)
  230. ginkgo.By(fmt.Sprintf("creating %d PD(s)", numPDs))
  231. for i := 0; i < numPDs; i++ {
  232. name, err := e2epv.CreatePDWithRetry()
  233. framework.ExpectNoError(err, fmt.Sprintf("Error creating PD %d", i))
  234. diskNames = append(diskNames, name)
  235. }
  236. defer func() {
  237. // Teardown should do nothing unless test failed.
  238. ginkgo.By("defer: cleaning up PD-RW test environment")
  239. framework.Logf("defer cleanup errors can usually be ignored")
  240. if host0Pod != nil {
  241. podClient.Delete(context.TODO(), host0Pod.Name, metav1.NewDeleteOptions(0))
  242. }
  243. for _, diskName := range diskNames {
  244. detachAndDeletePDs(diskName, []types.NodeName{host0Name})
  245. }
  246. }()
  247. for i := 0; i < t.repeatCnt; i++ { // "rapid" repeat loop
  248. framework.Logf("PD Read/Writer Iteration #%v", i)
  249. ginkgo.By(fmt.Sprintf("creating host0Pod with %d containers on node0", numContainers))
  250. host0Pod = testPDPod(diskNames, host0Name, false /* readOnly */, numContainers)
  251. _, err = podClient.Create(context.TODO(), host0Pod, metav1.CreateOptions{})
  252. framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err))
  253. framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name))
  254. ginkgo.By(fmt.Sprintf("writing %d file(s) via a container", numPDs))
  255. containerName := "mycontainer"
  256. if numContainers > 1 {
  257. containerName = fmt.Sprintf("mycontainer%v", rand.Intn(numContainers)+1)
  258. }
  259. for x := 1; x <= numPDs; x++ {
  260. testFile := fmt.Sprintf("/testpd%d/tracker%d", x, i)
  261. testFileContents := fmt.Sprintf("%v", rand.Int())
  262. fileAndContentToVerify[testFile] = testFileContents
  263. tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, ns)
  264. framework.ExpectNoError(tk.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents))
  265. framework.Logf("wrote %q to file %q in pod %q (container %q) on node %q", testFileContents, testFile, host0Pod.Name, containerName, host0Name)
  266. }
  267. ginkgo.By("verifying PD contents via a container")
  268. if numContainers > 1 {
  269. containerName = fmt.Sprintf("mycontainer%v", rand.Intn(numContainers)+1)
  270. }
  271. verifyPDContentsViaContainer(ns, f, host0Pod.Name, containerName, fileAndContentToVerify)
  272. ginkgo.By("deleting host0Pod")
  273. framework.ExpectNoError(podClient.Delete(context.TODO(), host0Pod.Name, metav1.NewDeleteOptions(0)), "Failed to delete host0Pod")
  274. }
  275. ginkgo.By(fmt.Sprintf("Test completed successfully, waiting for %d PD(s) to detach from node0", numPDs))
  276. for _, diskName := range diskNames {
  277. waitForPDDetach(diskName, host0Name)
  278. }
  279. })
  280. }
  281. })
  282. ginkgo.Context("detach in a disrupted environment [Slow] [Disruptive]", func() {
  283. const (
  284. deleteNode = 1 // delete physical node
  285. deleteNodeObj = 2 // delete node's api object only
  286. evictPod = 3 // evict host0Pod on node0
  287. )
  288. type testT struct {
  289. descr string // It description
  290. disruptOp int // disruptive operation performed on target node
  291. }
  292. tests := []testT{
  293. // https://github.com/kubernetes/kubernetes/issues/85972
  294. // This test case is flawed. Disabling for now.
  295. // {
  296. // descr: "node is deleted",
  297. // disruptOp: deleteNode,
  298. // },
  299. {
  300. descr: "node's API object is deleted",
  301. disruptOp: deleteNodeObj,
  302. },
  303. {
  304. descr: "pod is evicted",
  305. disruptOp: evictPod,
  306. },
  307. }
  308. for _, t := range tests {
  309. disruptOp := t.disruptOp
  310. ginkgo.It(fmt.Sprintf("when %s", t.descr), func() {
  311. e2eskipper.SkipUnlessProviderIs("gce")
  312. origNodeCnt := len(nodes.Items) // healhy nodes running kubelet
  313. ginkgo.By("creating a pd")
  314. diskName, err := e2epv.CreatePDWithRetry()
  315. framework.ExpectNoError(err, "Error creating a pd")
  316. targetNode := &nodes.Items[0] // for node delete ops
  317. host0Pod := testPDPod([]string{diskName}, host0Name, false, 1)
  318. containerName := "mycontainer"
  319. defer func() {
  320. ginkgo.By("defer: cleaning up PD-RW test env")
  321. framework.Logf("defer cleanup errors can usually be ignored")
  322. ginkgo.By("defer: delete host0Pod")
  323. podClient.Delete(context.TODO(), host0Pod.Name, metav1.NewDeleteOptions(0))
  324. ginkgo.By("defer: detach and delete PDs")
  325. detachAndDeletePDs(diskName, []types.NodeName{host0Name})
  326. if disruptOp == deleteNode || disruptOp == deleteNodeObj {
  327. if disruptOp == deleteNodeObj {
  328. targetNode.ObjectMeta.SetResourceVersion("0")
  329. // need to set the resource version or else the Create() fails
  330. ginkgo.By("defer: re-create host0 node object")
  331. _, err := nodeClient.Create(context.TODO(), targetNode, metav1.CreateOptions{})
  332. framework.ExpectNoError(err, fmt.Sprintf("defer: Unable to re-create the deleted node object %q", targetNode.Name))
  333. }
  334. ginkgo.By("defer: verify the number of ready nodes")
  335. numNodes := countReadyNodes(cs, host0Name)
  336. // if this defer is reached due to an Expect then nested
  337. // Expects are lost, so use Failf here
  338. if numNodes != origNodeCnt {
  339. framework.Failf("defer: Requires current node count (%d) to return to original node count (%d)", numNodes, origNodeCnt)
  340. }
  341. }
  342. }()
  343. ginkgo.By("creating host0Pod on node0")
  344. _, err = podClient.Create(context.TODO(), host0Pod, metav1.CreateOptions{})
  345. framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err))
  346. ginkgo.By("waiting for host0Pod to be running")
  347. framework.ExpectNoError(f.WaitForPodRunningSlow(host0Pod.Name))
  348. ginkgo.By("writing content to host0Pod")
  349. testFile := "/testpd1/tracker"
  350. testFileContents := fmt.Sprintf("%v", rand.Int())
  351. tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, ns)
  352. framework.ExpectNoError(tk.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents))
  353. framework.Logf("wrote %q to file %q in pod %q on node %q", testFileContents, testFile, host0Pod.Name, host0Name)
  354. ginkgo.By("verifying PD is present in node0's VolumeInUse list")
  355. framework.ExpectNoError(waitForPDInVolumesInUse(nodeClient, diskName, host0Name, nodeStatusTimeout, true /* should exist*/))
  356. if disruptOp == deleteNode {
  357. ginkgo.By("getting gce instances")
  358. gceCloud, err := gce.GetGCECloud()
  359. framework.ExpectNoError(err, fmt.Sprintf("Unable to create gcloud client err=%v", err))
  360. output, err := gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone)
  361. framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output))
  362. framework.ExpectEqual(true, strings.Contains(string(output), string(host0Name)))
  363. ginkgo.By("deleting host0")
  364. err = gceCloud.DeleteInstance(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone, string(host0Name))
  365. framework.ExpectNoError(err, fmt.Sprintf("Failed to delete host0Pod: err=%v", err))
  366. ginkgo.By("expecting host0 node to be re-created")
  367. numNodes := countReadyNodes(cs, host0Name)
  368. framework.ExpectEqual(numNodes, origNodeCnt, fmt.Sprintf("Requires current node count (%d) to return to original node count (%d)", numNodes, origNodeCnt))
  369. output, err = gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone)
  370. framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output))
  371. framework.ExpectEqual(true, strings.Contains(string(output), string(host0Name)))
  372. } else if disruptOp == deleteNodeObj {
  373. ginkgo.By("deleting host0's node api object")
  374. framework.ExpectNoError(nodeClient.Delete(context.TODO(), string(host0Name), metav1.NewDeleteOptions(0)), "Unable to delete host0's node object")
  375. ginkgo.By("deleting host0Pod")
  376. framework.ExpectNoError(podClient.Delete(context.TODO(), host0Pod.Name, metav1.NewDeleteOptions(0)), "Unable to delete host0Pod")
  377. } else if disruptOp == evictPod {
  378. evictTarget := &policyv1beta1.Eviction{
  379. ObjectMeta: metav1.ObjectMeta{
  380. Name: host0Pod.Name,
  381. Namespace: ns,
  382. },
  383. }
  384. ginkgo.By("evicting host0Pod")
  385. err = wait.PollImmediate(framework.Poll, podEvictTimeout, func() (bool, error) {
  386. if err := cs.CoreV1().Pods(ns).Evict(evictTarget); err != nil {
  387. framework.Logf("Failed to evict host0Pod, ignoring error: %v", err)
  388. return false, nil
  389. }
  390. return true, nil
  391. })
  392. framework.ExpectNoError(err, "failed to evict host0Pod after %v", podEvictTimeout)
  393. }
  394. ginkgo.By("waiting for pd to detach from host0")
  395. waitForPDDetach(diskName, host0Name)
  396. })
  397. }
  398. })
  399. ginkgo.It("should be able to delete a non-existent PD without error", func() {
  400. e2eskipper.SkipUnlessProviderIs("gce")
  401. ginkgo.By("delete a PD")
  402. framework.ExpectNoError(e2epv.DeletePDWithRetry("non-exist"))
  403. })
  404. })
  405. func countReadyNodes(c clientset.Interface, hostName types.NodeName) int {
  406. e2enode.WaitForNodeToBeReady(c, string(hostName), nodeStatusTimeout)
  407. framework.WaitForAllNodesSchedulable(c, nodeStatusTimeout)
  408. nodes, err := e2enode.GetReadySchedulableNodes(c)
  409. framework.ExpectNoError(err)
  410. return len(nodes.Items)
  411. }
  412. func verifyPDContentsViaContainer(namespace string, f *framework.Framework, podName, containerName string, fileAndContentToVerify map[string]string) {
  413. for filePath, expectedContents := range fileAndContentToVerify {
  414. // No retry loop as there should not be temporal based failures
  415. tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, namespace)
  416. v, err := tk.ReadFileViaContainer(podName, containerName, filePath)
  417. framework.ExpectNoError(err, "Error reading file %s via container %s", filePath, containerName)
  418. framework.Logf("Read file %q with content: %v", filePath, v)
  419. if strings.TrimSpace(v) != strings.TrimSpace(expectedContents) {
  420. framework.Failf("Read content <%q> does not match execpted content <%q>.", v, expectedContents)
  421. }
  422. }
  423. }
  424. func detachPD(nodeName types.NodeName, pdName string) error {
  425. if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
  426. gceCloud, err := gce.GetGCECloud()
  427. if err != nil {
  428. return err
  429. }
  430. err = gceCloud.DetachDisk(pdName, nodeName)
  431. if err != nil {
  432. if gerr, ok := err.(*googleapi.Error); ok && strings.Contains(gerr.Message, "Invalid value for field 'disk'") {
  433. // PD already detached, ignore error.
  434. return nil
  435. }
  436. framework.Logf("Error detaching PD %q: %v", pdName, err)
  437. }
  438. return err
  439. } else if framework.TestContext.Provider == "aws" {
  440. awsSession, err := session.NewSession()
  441. if err != nil {
  442. return fmt.Errorf("error creating session: %v", err)
  443. }
  444. client := ec2.New(awsSession)
  445. tokens := strings.Split(pdName, "/")
  446. awsVolumeID := tokens[len(tokens)-1]
  447. request := ec2.DetachVolumeInput{
  448. VolumeId: aws.String(awsVolumeID),
  449. }
  450. _, err = client.DetachVolume(&request)
  451. if err != nil {
  452. return fmt.Errorf("error detaching EBS volume: %v", err)
  453. }
  454. return nil
  455. } else {
  456. return fmt.Errorf("Provider does not support volume detaching")
  457. }
  458. }
  459. // Returns pod spec suitable for api Create call. Handles gce, gke and aws providers only and
  460. // escapes if a different provider is supplied.
  461. // The first container name is hard-coded to "mycontainer". Subsequent containers are named:
  462. // "mycontainer<number> where <number> is 1..numContainers. Note if there is only one container it's
  463. // name has no number.
  464. // Container's volumeMounts are hard-coded to "/testpd<number>" where <number> is 1..len(diskNames).
  465. func testPDPod(diskNames []string, targetNode types.NodeName, readOnly bool, numContainers int) *v1.Pod {
  466. // escape if not a supported provider
  467. if !(framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" ||
  468. framework.TestContext.Provider == "aws") {
  469. framework.Failf(fmt.Sprintf("func `testPDPod` only supports gce, gke, and aws providers, not %v", framework.TestContext.Provider))
  470. }
  471. containers := make([]v1.Container, numContainers)
  472. for i := range containers {
  473. containers[i].Name = "mycontainer"
  474. if numContainers > 1 {
  475. containers[i].Name = fmt.Sprintf("mycontainer%v", i+1)
  476. }
  477. containers[i].Image = imageutils.GetE2EImage(imageutils.BusyBox)
  478. containers[i].Command = []string{"sleep", "6000"}
  479. containers[i].VolumeMounts = make([]v1.VolumeMount, len(diskNames))
  480. for k := range diskNames {
  481. containers[i].VolumeMounts[k].Name = fmt.Sprintf("testpd%v", k+1)
  482. containers[i].VolumeMounts[k].MountPath = fmt.Sprintf("/testpd%v", k+1)
  483. }
  484. containers[i].Resources.Limits = v1.ResourceList{}
  485. containers[i].Resources.Limits[v1.ResourceCPU] = *resource.NewQuantity(int64(0), resource.DecimalSI)
  486. }
  487. pod := &v1.Pod{
  488. TypeMeta: metav1.TypeMeta{
  489. Kind: "Pod",
  490. APIVersion: "v1",
  491. },
  492. ObjectMeta: metav1.ObjectMeta{
  493. Name: "pd-test-" + string(uuid.NewUUID()),
  494. },
  495. Spec: v1.PodSpec{
  496. Containers: containers,
  497. NodeName: string(targetNode),
  498. },
  499. }
  500. pod.Spec.Volumes = make([]v1.Volume, len(diskNames))
  501. for k, diskName := range diskNames {
  502. pod.Spec.Volumes[k].Name = fmt.Sprintf("testpd%v", k+1)
  503. if framework.TestContext.Provider == "aws" {
  504. pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{
  505. AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{
  506. VolumeID: diskName,
  507. FSType: "ext4",
  508. ReadOnly: readOnly,
  509. },
  510. }
  511. } else { // "gce" or "gke"
  512. pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{
  513. GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{
  514. PDName: diskName,
  515. FSType: "ext4",
  516. ReadOnly: readOnly,
  517. },
  518. }
  519. }
  520. }
  521. return pod
  522. }
  523. // Waits for specified PD to detach from specified hostName
  524. func waitForPDDetach(diskName string, nodeName types.NodeName) error {
  525. if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
  526. framework.Logf("Waiting for GCE PD %q to detach from node %q.", diskName, nodeName)
  527. gceCloud, err := gce.GetGCECloud()
  528. if err != nil {
  529. return err
  530. }
  531. for start := time.Now(); time.Since(start) < gcePDDetachTimeout; time.Sleep(gcePDDetachPollTime) {
  532. diskAttached, err := gceCloud.DiskIsAttached(diskName, nodeName)
  533. if err != nil {
  534. framework.Logf("Error waiting for PD %q to detach from node %q. 'DiskIsAttached(...)' failed with %v", diskName, nodeName, err)
  535. return err
  536. }
  537. if !diskAttached {
  538. // Specified disk does not appear to be attached to specified node
  539. framework.Logf("GCE PD %q appears to have successfully detached from %q.", diskName, nodeName)
  540. return nil
  541. }
  542. framework.Logf("Waiting for GCE PD %q to detach from %q.", diskName, nodeName)
  543. }
  544. return fmt.Errorf("Gave up waiting for GCE PD %q to detach from %q after %v", diskName, nodeName, gcePDDetachTimeout)
  545. }
  546. return nil
  547. }
  548. func detachAndDeletePDs(diskName string, hosts []types.NodeName) {
  549. for _, host := range hosts {
  550. framework.Logf("Detaching GCE PD %q from node %q.", diskName, host)
  551. detachPD(host, diskName)
  552. ginkgo.By(fmt.Sprintf("Waiting for PD %q to detach from %q", diskName, host))
  553. waitForPDDetach(diskName, host)
  554. }
  555. ginkgo.By(fmt.Sprintf("Deleting PD %q", diskName))
  556. framework.ExpectNoError(e2epv.DeletePDWithRetry(diskName))
  557. }
  558. func waitForPDInVolumesInUse(
  559. nodeClient v1core.NodeInterface,
  560. diskName string,
  561. nodeName types.NodeName,
  562. timeout time.Duration,
  563. shouldExist bool) error {
  564. logStr := "to contain"
  565. if !shouldExist {
  566. logStr = "to NOT contain"
  567. }
  568. framework.Logf("Waiting for node %s's VolumesInUse Status %s PD %q", nodeName, logStr, diskName)
  569. for start := time.Now(); time.Since(start) < timeout; time.Sleep(nodeStatusPollTime) {
  570. nodeObj, err := nodeClient.Get(context.TODO(), string(nodeName), metav1.GetOptions{})
  571. if err != nil || nodeObj == nil {
  572. framework.Logf("Failed to fetch node object %q from API server. err=%v", nodeName, err)
  573. continue
  574. }
  575. exists := false
  576. for _, volumeInUse := range nodeObj.Status.VolumesInUse {
  577. volumeInUseStr := string(volumeInUse)
  578. if strings.Contains(volumeInUseStr, diskName) {
  579. if shouldExist {
  580. framework.Logf("Found PD %q in node %q's VolumesInUse Status: %q", diskName, nodeName, volumeInUseStr)
  581. return nil
  582. }
  583. exists = true
  584. }
  585. }
  586. if !shouldExist && !exists {
  587. framework.Logf("Verified PD %q does not exist in node %q's VolumesInUse Status.", diskName, nodeName)
  588. return nil
  589. }
  590. }
  591. return fmt.Errorf("Timed out waiting for node %s VolumesInUse Status %s diskName %q", nodeName, logStr, diskName)
  592. }