kube_proxy.go 9.1 KB


  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package network
  14. import (
  15. "encoding/json"
  16. "fmt"
  17. "math"
  18. "strconv"
  19. "strings"
  20. "time"
  21. v1 "k8s.io/api/core/v1"
  22. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  23. "k8s.io/kubernetes/test/e2e/framework"
  24. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  25. e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
  26. "k8s.io/kubernetes/test/images/net/nat"
  27. imageutils "k8s.io/kubernetes/test/utils/image"
  28. "github.com/onsi/ginkgo"
  29. "github.com/onsi/gomega"
  30. )
  31. var kubeProxyE2eImage = imageutils.GetE2EImage(imageutils.Net)
  32. var _ = SIGDescribe("Network", func() {
  33. const (
  34. testDaemonHTTPPort = 11301
  35. testDaemonTCPPort = 11302
  36. timeoutSeconds = 10
  37. postFinTimeoutSeconds = 5
  38. )
  39. fr := framework.NewDefaultFramework("network")
  40. ginkgo.It("should set TCP CLOSE_WAIT timeout", func() {
  41. nodes := framework.GetReadySchedulableNodesOrDie(fr.ClientSet)
  42. ips := framework.CollectAddresses(nodes, v1.NodeInternalIP)
  43. if len(nodes.Items) < 2 {
  44. framework.Skipf(
  45. "Test requires >= 2 Ready nodes, but there are only %v nodes",
  46. len(nodes.Items))
  47. }
  48. type NodeInfo struct {
  49. node *v1.Node
  50. name string
  51. nodeIP string
  52. }
  53. clientNodeInfo := NodeInfo{
  54. node: &nodes.Items[0],
  55. name: nodes.Items[0].Name,
  56. nodeIP: ips[0],
  57. }
  58. serverNodeInfo := NodeInfo{
  59. node: &nodes.Items[1],
  60. name: nodes.Items[1].Name,
  61. nodeIP: ips[1],
  62. }
  63. zero := int64(0)
  64. // Some distributions (Ubuntu 16.04 etc.) don't support the proc file.
  65. _, err := e2essh.IssueSSHCommandWithResult(
  66. "ls /proc/net/nf_conntrack",
  67. framework.TestContext.Provider,
  68. clientNodeInfo.node)
  69. if err != nil && strings.Contains(err.Error(), "No such file or directory") {
  70. framework.Skipf("The node %s does not support /proc/net/nf_conntrack", clientNodeInfo.name)
  71. }
  72. framework.ExpectNoError(err)
  73. clientPodSpec := &v1.Pod{
  74. ObjectMeta: metav1.ObjectMeta{
  75. Name: "e2e-net-client",
  76. Namespace: fr.Namespace.Name,
  77. Labels: map[string]string{"app": "e2e-net-client"},
  78. },
  79. Spec: v1.PodSpec{
  80. NodeName: clientNodeInfo.name,
  81. Containers: []v1.Container{
  82. {
  83. Name: "e2e-net-client",
  84. Image: kubeProxyE2eImage,
  85. ImagePullPolicy: "Always",
  86. Command: []string{
  87. "/net", "-serve", fmt.Sprintf("0.0.0.0:%d", testDaemonHTTPPort),
  88. },
  89. },
  90. },
  91. TerminationGracePeriodSeconds: &zero,
  92. },
  93. }
  94. serverPodSpec := &v1.Pod{
  95. ObjectMeta: metav1.ObjectMeta{
  96. Name: "e2e-net-server",
  97. Namespace: fr.Namespace.Name,
  98. Labels: map[string]string{"app": "e2e-net-server"},
  99. },
  100. Spec: v1.PodSpec{
  101. NodeName: serverNodeInfo.name,
  102. Containers: []v1.Container{
  103. {
  104. Name: "e2e-net-server",
  105. Image: kubeProxyE2eImage,
  106. ImagePullPolicy: "Always",
  107. Command: []string{
  108. "/net",
  109. "-runner", "nat-closewait-server",
  110. "-options",
  111. fmt.Sprintf(`{"LocalAddr":"0.0.0.0:%v", "PostFindTimeoutSeconds":%v}`,
  112. testDaemonTCPPort,
  113. postFinTimeoutSeconds),
  114. },
  115. Ports: []v1.ContainerPort{
  116. {
  117. Name: "tcp",
  118. ContainerPort: testDaemonTCPPort,
  119. HostPort: testDaemonTCPPort,
  120. },
  121. },
  122. },
  123. },
  124. TerminationGracePeriodSeconds: &zero,
  125. },
  126. }
  127. ginkgo.By(fmt.Sprintf(
  128. "Launching a server daemon on node %v (node ip: %v, image: %v)",
  129. serverNodeInfo.name,
  130. serverNodeInfo.nodeIP,
  131. kubeProxyE2eImage))
  132. fr.PodClient().CreateSync(serverPodSpec)
  133. ginkgo.By(fmt.Sprintf(
  134. "Launching a client daemon on node %v (node ip: %v, image: %v)",
  135. clientNodeInfo.name,
  136. clientNodeInfo.nodeIP,
  137. kubeProxyE2eImage))
  138. fr.PodClient().CreateSync(clientPodSpec)
  139. ginkgo.By("Make client connect")
  140. options := nat.CloseWaitClientOptions{
  141. RemoteAddr: fmt.Sprintf("%v:%v",
  142. serverNodeInfo.nodeIP, testDaemonTCPPort),
  143. TimeoutSeconds: timeoutSeconds,
  144. PostFinTimeoutSeconds: 0,
  145. LeakConnection: true,
  146. }
  147. jsonBytes, err := json.Marshal(options)
  148. cmd := fmt.Sprintf(
  149. `curl -X POST http://localhost:%v/run/nat-closewait-client -d `+
  150. `'%v' 2>/dev/null`,
  151. testDaemonHTTPPort,
  152. string(jsonBytes))
  153. framework.RunHostCmdOrDie(fr.Namespace.Name, "e2e-net-client", cmd)
  154. <-time.After(time.Duration(1) * time.Second)
  155. ginkgo.By("Checking /proc/net/nf_conntrack for the timeout")
  156. // If test flakes occur here, then this check should be performed
  157. // in a loop as there may be a race with the client connecting.
  158. e2essh.IssueSSHCommandWithResult(
  159. fmt.Sprintf("sudo cat /proc/net/nf_conntrack | grep 'dport=%v'",
  160. testDaemonTCPPort),
  161. framework.TestContext.Provider,
  162. clientNodeInfo.node)
  163. // Timeout in seconds is available as the fifth column from
  164. // /proc/net/nf_conntrack.
  165. result, err := e2essh.IssueSSHCommandWithResult(
  166. fmt.Sprintf(
  167. "sudo cat /proc/net/nf_conntrack "+
  168. "| grep 'CLOSE_WAIT.*dst=%v.*dport=%v' "+
  169. "| tail -n 1"+
  170. "| awk '{print $5}' ",
  171. serverNodeInfo.nodeIP,
  172. testDaemonTCPPort),
  173. framework.TestContext.Provider,
  174. clientNodeInfo.node)
  175. framework.ExpectNoError(err)
  176. timeoutSeconds, err := strconv.Atoi(strings.TrimSpace(result.Stdout))
  177. framework.ExpectNoError(err)
  178. // These must be synchronized from the default values set in
  179. // pkg/apis/../defaults.go ConntrackTCPCloseWaitTimeout. The
  180. // current defaults are hidden in the initialization code.
  181. const epsilonSeconds = 60
  182. const expectedTimeoutSeconds = 60 * 60
  183. e2elog.Logf("conntrack entry timeout was: %v, expected: %v",
  184. timeoutSeconds, expectedTimeoutSeconds)
  185. gomega.Expect(math.Abs(float64(timeoutSeconds - expectedTimeoutSeconds))).Should(
  186. gomega.BeNumerically("<", (epsilonSeconds)))
  187. })
  188. // Regression test for #74839, where:
  189. // Packets considered INVALID by conntrack are now dropped. In particular, this fixes
  190. // a problem where spurious retransmits in a long-running TCP connection to a service
  191. // IP could result in the connection being closed with the error "Connection reset by
  192. // peer"
  193. ginkgo.It("should resolve connrection reset issue #74839 [Slow]", func() {
  194. serverLabel := map[string]string{
  195. "app": "boom-server",
  196. }
  197. clientLabel := map[string]string{
  198. "app": "client",
  199. }
  200. serverPod := &v1.Pod{
  201. ObjectMeta: metav1.ObjectMeta{
  202. Name: "boom-server",
  203. Labels: serverLabel,
  204. },
  205. Spec: v1.PodSpec{
  206. Containers: []v1.Container{
  207. {
  208. Name: "boom-server",
  209. Image: "gcr.io/kubernetes-e2e-test-images/regression-issue-74839-amd64:1.0",
  210. Ports: []v1.ContainerPort{
  211. {
  212. ContainerPort: 9000, // Default port exposed by boom-server
  213. },
  214. },
  215. },
  216. },
  217. Affinity: &v1.Affinity{
  218. PodAntiAffinity: &v1.PodAntiAffinity{
  219. RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
  220. {
  221. LabelSelector: &metav1.LabelSelector{
  222. MatchLabels: clientLabel,
  223. },
  224. TopologyKey: "kubernetes.io/hostname",
  225. },
  226. },
  227. },
  228. },
  229. },
  230. }
  231. _, err := fr.ClientSet.CoreV1().Pods(fr.Namespace.Name).Create(serverPod)
  232. framework.ExpectNoError(err)
  233. ginkgo.By("Server pod created")
  234. svc := &v1.Service{
  235. ObjectMeta: metav1.ObjectMeta{
  236. Name: "boom-server",
  237. },
  238. Spec: v1.ServiceSpec{
  239. Selector: serverLabel,
  240. Ports: []v1.ServicePort{
  241. {
  242. Protocol: v1.ProtocolTCP,
  243. Port: 9000,
  244. },
  245. },
  246. },
  247. }
  248. _, err = fr.ClientSet.CoreV1().Services(fr.Namespace.Name).Create(svc)
  249. framework.ExpectNoError(err)
  250. ginkgo.By("Server service created")
  251. pod := &v1.Pod{
  252. ObjectMeta: metav1.ObjectMeta{
  253. Name: "startup-script",
  254. Labels: clientLabel,
  255. },
  256. Spec: v1.PodSpec{
  257. Containers: []v1.Container{
  258. {
  259. Name: "startup-script",
  260. Image: "gcr.io/google-containers/startup-script:v1",
  261. Command: []string{
  262. "bash", "-c", "while true; do sleep 2; nc boom-server 9000& done",
  263. },
  264. },
  265. },
  266. Affinity: &v1.Affinity{
  267. PodAntiAffinity: &v1.PodAntiAffinity{
  268. RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
  269. {
  270. LabelSelector: &metav1.LabelSelector{
  271. MatchLabels: serverLabel,
  272. },
  273. TopologyKey: "kubernetes.io/hostname",
  274. },
  275. },
  276. },
  277. },
  278. RestartPolicy: v1.RestartPolicyNever,
  279. },
  280. }
  281. _, err = fr.ClientSet.CoreV1().Pods(fr.Namespace.Name).Create(pod)
  282. framework.ExpectNoError(err)
  283. ginkgo.By("Client pod created")
  284. for i := 0; i < 20; i++ {
  285. time.Sleep(3 * time.Second)
  286. resultPod, err := fr.ClientSet.CoreV1().Pods(fr.Namespace.Name).Get(serverPod.Name, metav1.GetOptions{})
  287. framework.ExpectNoError(err)
  288. gomega.Expect(resultPod.Status.ContainerStatuses[0].LastTerminationState.Terminated).Should(gomega.BeNil())
  289. }
  290. })
  291. })