serve_hostnames.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. /*
  14. This soak tests places a specified number of pods on each node and then
  15. repeatedly sends queries to a service running on these pods via
  16. a serivce
  17. */
  18. package main
  19. import (
  20. "context"
  21. "flag"
  22. "fmt"
  23. "os"
  24. "path/filepath"
  25. "time"
  26. v1 "k8s.io/api/core/v1"
  27. apierrors "k8s.io/apimachinery/pkg/api/errors"
  28. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  29. "k8s.io/apimachinery/pkg/runtime"
  30. "k8s.io/apimachinery/pkg/util/intstr"
  31. clientset "k8s.io/client-go/kubernetes"
  32. restclient "k8s.io/client-go/rest"
  33. "k8s.io/client-go/tools/clientcmd"
  34. "k8s.io/kubernetes/pkg/api/legacyscheme"
  35. e2e "k8s.io/kubernetes/test/e2e/framework"
  36. "k8s.io/kubernetes/test/e2e/framework/service"
  37. "k8s.io/klog"
  38. )
  39. var (
  40. queriesAverage = flag.Int("queries", 100, "Number of hostname queries to make in each iteration per pod on average")
  41. podsPerNode = flag.Int("pods_per_node", 1, "Number of serve_hostname pods per node")
  42. upTo = flag.Int("up_to", 1, "Number of iterations or -1 for no limit")
  43. maxPar = flag.Int("max_par", 500, "Maximum number of queries in flight")
  44. gke = flag.String("gke_context", "", "Target GKE cluster with context gke_{project}_{zone}_{cluster-name}")
  45. )
  46. const (
  47. deleteTimeout = 2 * time.Minute
  48. endpointTimeout = 5 * time.Minute
  49. nodeListTimeout = 2 * time.Minute
  50. podCreateTimeout = 2 * time.Minute
  51. podStartTimeout = 30 * time.Minute
  52. serviceCreateTimeout = 2 * time.Minute
  53. namespaceDeleteTimeout = 5 * time.Minute
  54. )
  55. func main() {
  56. flag.Parse()
  57. klog.Infof("Starting serve_hostnames soak test with queries=%d and podsPerNode=%d upTo=%d",
  58. *queriesAverage, *podsPerNode, *upTo)
  59. var spec string
  60. if *gke != "" {
  61. spec = filepath.Join(os.Getenv("HOME"), ".config", "gcloud", "kubernetes", "kubeconfig")
  62. } else {
  63. spec = filepath.Join(os.Getenv("HOME"), ".kube", "config")
  64. }
  65. settings, err := clientcmd.LoadFromFile(spec)
  66. if err != nil {
  67. klog.Fatalf("Error loading configuration: %v", err.Error())
  68. }
  69. if *gke != "" {
  70. settings.CurrentContext = *gke
  71. }
  72. config, err := clientcmd.NewDefaultClientConfig(*settings, &clientcmd.ConfigOverrides{}).ClientConfig()
  73. if err != nil {
  74. klog.Fatalf("Failed to construct config: %v", err)
  75. }
  76. client, err := clientset.NewForConfig(config)
  77. if err != nil {
  78. klog.Fatalf("Failed to make client: %v", err)
  79. }
  80. var nodes *v1.NodeList
  81. for start := time.Now(); time.Since(start) < nodeListTimeout; time.Sleep(2 * time.Second) {
  82. nodes, err = client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
  83. if err == nil {
  84. break
  85. }
  86. klog.Warningf("Failed to list nodes: %v", err)
  87. }
  88. if err != nil {
  89. klog.Fatalf("Giving up trying to list nodes: %v", err)
  90. }
  91. if len(nodes.Items) == 0 {
  92. klog.Fatalf("Failed to find any nodes.")
  93. }
  94. klog.Infof("Found %d nodes on this cluster:", len(nodes.Items))
  95. for i, node := range nodes.Items {
  96. klog.Infof("%d: %s", i, node.Name)
  97. }
  98. queries := *queriesAverage * len(nodes.Items) * *podsPerNode
  99. // Create the namespace
  100. got, err := client.CoreV1().Namespaces().Create(context.TODO(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{GenerateName: "serve-hostnames-"}}, metav1.CreateOptions{})
  101. if err != nil {
  102. klog.Fatalf("Failed to create namespace: %v", err)
  103. }
  104. ns := got.Name
  105. defer func(ns string) {
  106. if err := client.CoreV1().Namespaces().Delete(context.TODO(), ns, nil); err != nil {
  107. klog.Warningf("Failed to delete namespace %s: %v", ns, err)
  108. } else {
  109. // wait until the namespace disappears
  110. for i := 0; i < int(namespaceDeleteTimeout/time.Second); i++ {
  111. if _, err := client.CoreV1().Namespaces().Get(context.TODO(), ns, metav1.GetOptions{}); err != nil {
  112. if apierrors.IsNotFound(err) {
  113. return
  114. }
  115. }
  116. time.Sleep(time.Second)
  117. }
  118. }
  119. }(ns)
  120. klog.Infof("Created namespace %s", ns)
  121. // Create a service for these pods.
  122. klog.Infof("Creating service %s/serve-hostnames", ns)
  123. // Make several attempts to create a service.
  124. var svc *v1.Service
  125. for start := time.Now(); time.Since(start) < serviceCreateTimeout; time.Sleep(2 * time.Second) {
  126. t := time.Now()
  127. svc, err = client.CoreV1().Services(ns).Create(context.TODO(), &v1.Service{
  128. ObjectMeta: metav1.ObjectMeta{
  129. Name: "serve-hostnames",
  130. Labels: map[string]string{
  131. "name": "serve-hostname",
  132. },
  133. },
  134. Spec: v1.ServiceSpec{
  135. Ports: []v1.ServicePort{{
  136. Protocol: "TCP",
  137. Port: 9376,
  138. TargetPort: intstr.FromInt(9376),
  139. }},
  140. Selector: map[string]string{
  141. "name": "serve-hostname",
  142. },
  143. },
  144. }, metav1.CreateOptions{})
  145. klog.V(4).Infof("Service create %s/server-hostnames took %v", ns, time.Since(t))
  146. if err == nil {
  147. break
  148. }
  149. klog.Warningf("After %v failed to create service %s/serve-hostnames: %v", time.Since(start), ns, err)
  150. }
  151. if err != nil {
  152. klog.Warningf("Unable to create service %s/%s: %v", ns, svc.Name, err)
  153. return
  154. }
  155. // Clean up service
  156. defer func() {
  157. klog.Infof("Cleaning up service %s/serve-hostnames", ns)
  158. // Make several attempts to delete the service.
  159. for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) {
  160. if err := client.CoreV1().Services(ns).Delete(context.TODO(), svc.Name, nil); err == nil {
  161. return
  162. }
  163. klog.Warningf("After %v unable to delete service %s/%s: %v", time.Since(start), ns, svc.Name, err)
  164. }
  165. }()
  166. // Put serve-hostname pods on each node.
  167. podNames := []string{}
  168. for i, node := range nodes.Items {
  169. for j := 0; j < *podsPerNode; j++ {
  170. podName := fmt.Sprintf("serve-hostname-%d-%d", i, j)
  171. podNames = append(podNames, podName)
  172. // Make several attempts
  173. for start := time.Now(); time.Since(start) < podCreateTimeout; time.Sleep(2 * time.Second) {
  174. klog.Infof("Creating pod %s/%s on node %s", ns, podName, node.Name)
  175. t := time.Now()
  176. _, err = client.CoreV1().Pods(ns).Create(context.TODO(), &v1.Pod{
  177. ObjectMeta: metav1.ObjectMeta{
  178. Name: podName,
  179. Labels: map[string]string{
  180. "name": "serve-hostname",
  181. },
  182. },
  183. Spec: v1.PodSpec{
  184. Containers: []v1.Container{
  185. {
  186. Name: "serve-hostname",
  187. Image: e2e.ServeHostnameImage,
  188. Ports: []v1.ContainerPort{{ContainerPort: 9376}},
  189. },
  190. },
  191. NodeName: node.Name,
  192. },
  193. }, metav1.CreateOptions{})
  194. klog.V(4).Infof("Pod create %s/%s request took %v", ns, podName, time.Since(t))
  195. if err == nil {
  196. break
  197. }
  198. klog.Warningf("After %s failed to create pod %s/%s: %v", time.Since(start), ns, podName, err)
  199. }
  200. if err != nil {
  201. klog.Warningf("Failed to create pod %s/%s: %v", ns, podName, err)
  202. return
  203. }
  204. }
  205. }
  206. // Clean up the pods
  207. defer func() {
  208. klog.Info("Cleaning up pods")
  209. // Make several attempts to delete the pods.
  210. for _, podName := range podNames {
  211. for start := time.Now(); time.Since(start) < deleteTimeout; time.Sleep(1 * time.Second) {
  212. if err = client.CoreV1().Pods(ns).Delete(context.TODO(), podName, nil); err == nil {
  213. break
  214. }
  215. klog.Warningf("After %v failed to delete pod %s/%s: %v", time.Since(start), ns, podName, err)
  216. }
  217. }
  218. }()
  219. klog.Info("Waiting for the serve-hostname pods to be ready")
  220. for _, podName := range podNames {
  221. var pod *v1.Pod
  222. for start := time.Now(); time.Since(start) < podStartTimeout; time.Sleep(5 * time.Second) {
  223. pod, err = client.CoreV1().Pods(ns).Get(context.TODO(), podName, metav1.GetOptions{})
  224. if err != nil {
  225. klog.Warningf("Get pod %s/%s failed, ignoring for %v: %v", ns, podName, err, podStartTimeout)
  226. continue
  227. }
  228. if pod.Status.Phase == v1.PodRunning {
  229. break
  230. }
  231. }
  232. if pod.Status.Phase != v1.PodRunning {
  233. klog.Warningf("Gave up waiting on pod %s/%s to be running (saw %v)", ns, podName, pod.Status.Phase)
  234. } else {
  235. klog.Infof("%s/%s is running", ns, podName)
  236. }
  237. }
  238. rclient, err := restclient.RESTClientFor(config)
  239. if err != nil {
  240. klog.Warningf("Failed to build restclient: %v", err)
  241. return
  242. }
  243. proxyRequest, errProxy := service.GetServicesProxyRequest(client, rclient.Get())
  244. if errProxy != nil {
  245. klog.Warningf("Get services proxy request failed: %v", errProxy)
  246. return
  247. }
  248. // Wait for the endpoints to propagate.
  249. for start := time.Now(); time.Since(start) < endpointTimeout; time.Sleep(10 * time.Second) {
  250. hostname, err := proxyRequest.
  251. Namespace(ns).
  252. Name("serve-hostnames").
  253. DoRaw(context.TODO())
  254. if err != nil {
  255. klog.Infof("After %v while making a proxy call got error %v", time.Since(start), err)
  256. continue
  257. }
  258. var r metav1.Status
  259. if err := runtime.DecodeInto(legacyscheme.Codecs.UniversalDecoder(), hostname, &r); err != nil {
  260. break
  261. }
  262. if r.Status == metav1.StatusFailure {
  263. klog.Infof("After %v got status %v", time.Since(start), string(hostname))
  264. continue
  265. }
  266. break
  267. }
  268. // Repeatedly make requests.
  269. for iteration := 0; iteration != *upTo; iteration++ {
  270. responseChan := make(chan string, queries)
  271. // Use a channel of size *maxPar to throttle the number
  272. // of in-flight requests to avoid overloading the service.
  273. inFlight := make(chan struct{}, *maxPar)
  274. start := time.Now()
  275. for q := 0; q < queries; q++ {
  276. go func(i int, query int) {
  277. inFlight <- struct{}{}
  278. t := time.Now()
  279. hostname, err := proxyRequest.
  280. Namespace(ns).
  281. Name("serve-hostnames").
  282. DoRaw(context.TODO())
  283. klog.V(4).Infof("Proxy call in namespace %s took %v", ns, time.Since(t))
  284. if err != nil {
  285. klog.Warningf("Call failed during iteration %d query %d : %v", i, query, err)
  286. // If the query failed return a string which starts with a character
  287. // that can't be part of a hostname.
  288. responseChan <- fmt.Sprintf("!failed in iteration %d to issue query %d: %v", i, query, err)
  289. } else {
  290. responseChan <- string(hostname)
  291. }
  292. <-inFlight
  293. }(iteration, q)
  294. }
  295. responses := make(map[string]int, *podsPerNode*len(nodes.Items))
  296. missing := 0
  297. for q := 0; q < queries; q++ {
  298. r := <-responseChan
  299. klog.V(4).Infof("Got response from %s", r)
  300. responses[r]++
  301. // If the returned hostname starts with '!' then it indicates
  302. // an error response.
  303. if len(r) > 0 && r[0] == '!' {
  304. klog.V(3).Infof("Got response %s", r)
  305. missing++
  306. }
  307. }
  308. if missing > 0 {
  309. klog.Warningf("Missing %d responses out of %d", missing, queries)
  310. }
  311. // Report any nodes that did not respond.
  312. for n, node := range nodes.Items {
  313. for i := 0; i < *podsPerNode; i++ {
  314. name := fmt.Sprintf("serve-hostname-%d-%d", n, i)
  315. if _, ok := responses[name]; !ok {
  316. klog.Warningf("No response from pod %s on node %s at iteration %d", name, node.Name, iteration)
  317. }
  318. }
  319. }
  320. klog.Infof("Iteration %d took %v for %d queries (%.2f QPS) with %d missing",
  321. iteration, time.Since(start), queries-missing, float64(queries-missing)/time.Since(start).Seconds(), missing)
  322. }
  323. }