util.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2e_node
  14. import (
  15. "encoding/json"
  16. "flag"
  17. "fmt"
  18. "io/ioutil"
  19. "net/http"
  20. "os/exec"
  21. "regexp"
  22. "strings"
  23. "time"
  24. "golang.org/x/net/context"
  25. "k8s.io/klog"
  26. apiv1 "k8s.io/api/core/v1"
  27. apiequality "k8s.io/apimachinery/pkg/api/equality"
  28. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  29. "k8s.io/apimachinery/pkg/util/sets"
  30. utilfeature "k8s.io/apiserver/pkg/util/feature"
  31. "k8s.io/client-go/kubernetes/scheme"
  32. "k8s.io/component-base/featuregate"
  33. internalapi "k8s.io/cri-api/pkg/apis"
  34. kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"
  35. "k8s.io/kubernetes/pkg/features"
  36. kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
  37. "k8s.io/kubernetes/pkg/kubelet/apis/podresources"
  38. podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
  39. stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  40. "k8s.io/kubernetes/pkg/kubelet/cm"
  41. kubeletconfigcodec "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/codec"
  42. kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
  43. "k8s.io/kubernetes/pkg/kubelet/remote"
  44. "k8s.io/kubernetes/pkg/kubelet/util"
  45. "k8s.io/kubernetes/test/e2e/framework"
  46. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  47. frameworkmetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
  48. imageutils "k8s.io/kubernetes/test/utils/image"
  49. . "github.com/onsi/ginkgo"
  50. . "github.com/onsi/gomega"
  51. )
  52. // TODO(random-liu): Get this automatically from kubelet flag.
  53. var kubeletAddress = flag.String("kubelet-address", "http://127.0.0.1:10255", "Host and port of the kubelet")
  54. var startServices = flag.Bool("start-services", true, "If true, start local node services")
  55. var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
  56. var busyboxImage = imageutils.GetE2EImage(imageutils.BusyBox)
  57. var perlImage = imageutils.GetE2EImage(imageutils.Perl)
  58. const (
  59. // Kubelet internal cgroup name for node allocatable cgroup.
  60. defaultNodeAllocatableCgroup = "kubepods"
  61. // defaultPodResourcesPath is the path to the local endpoint serving the podresources GRPC service.
  62. defaultPodResourcesPath = "/var/lib/kubelet/pod-resources"
  63. defaultPodResourcesTimeout = 10 * time.Second
  64. defaultPodResourcesMaxSize = 1024 * 1024 * 16 // 16 Mb
  65. )
  66. func getNodeSummary() (*stats.Summary, error) {
  67. req, err := http.NewRequest("GET", *kubeletAddress+"/stats/summary", nil)
  68. if err != nil {
  69. return nil, fmt.Errorf("failed to build http request: %v", err)
  70. }
  71. req.Header.Add("Accept", "application/json")
  72. client := &http.Client{}
  73. resp, err := client.Do(req)
  74. if err != nil {
  75. return nil, fmt.Errorf("failed to get /stats/summary: %v", err)
  76. }
  77. defer resp.Body.Close()
  78. contentsBytes, err := ioutil.ReadAll(resp.Body)
  79. if err != nil {
  80. return nil, fmt.Errorf("failed to read /stats/summary: %+v", resp)
  81. }
  82. decoder := json.NewDecoder(strings.NewReader(string(contentsBytes)))
  83. summary := stats.Summary{}
  84. err = decoder.Decode(&summary)
  85. if err != nil {
  86. return nil, fmt.Errorf("failed to parse /stats/summary to go struct: %+v", resp)
  87. }
  88. return &summary, nil
  89. }
  90. func getNodeDevices() (*podresourcesapi.ListPodResourcesResponse, error) {
  91. endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
  92. if err != nil {
  93. return nil, fmt.Errorf("Error getting local endpoint: %v", err)
  94. }
  95. client, conn, err := podresources.GetClient(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
  96. if err != nil {
  97. return nil, fmt.Errorf("Error getting grpc client: %v", err)
  98. }
  99. defer conn.Close()
  100. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  101. defer cancel()
  102. resp, err := client.List(ctx, &podresourcesapi.ListPodResourcesRequest{})
  103. if err != nil {
  104. return nil, fmt.Errorf("%v.Get(_) = _, %v", client, err)
  105. }
  106. return resp, nil
  107. }
  108. // Returns the current KubeletConfiguration
  109. func getCurrentKubeletConfig() (*kubeletconfig.KubeletConfiguration, error) {
  110. resp := pollConfigz(5*time.Minute, 5*time.Second)
  111. kubeCfg, err := decodeConfigz(resp)
  112. if err != nil {
  113. return nil, err
  114. }
  115. return kubeCfg, nil
  116. }
  117. // Must be called within a Context. Allows the function to modify the KubeletConfiguration during the BeforeEach of the context.
  118. // The change is reverted in the AfterEach of the context.
  119. // Returns true on success.
  120. func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(initialConfig *kubeletconfig.KubeletConfiguration)) {
  121. var oldCfg *kubeletconfig.KubeletConfiguration
  122. BeforeEach(func() {
  123. configEnabled, err := isKubeletConfigEnabled(f)
  124. framework.ExpectNoError(err)
  125. Expect(configEnabled).To(BeTrue(), "The Dynamic Kubelet Configuration feature is not enabled.\n"+
  126. "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n"+
  127. "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
  128. oldCfg, err = getCurrentKubeletConfig()
  129. framework.ExpectNoError(err)
  130. newCfg := oldCfg.DeepCopy()
  131. updateFunction(newCfg)
  132. if apiequality.Semantic.DeepEqual(*newCfg, *oldCfg) {
  133. return
  134. }
  135. framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
  136. })
  137. AfterEach(func() {
  138. if oldCfg != nil {
  139. err := setKubeletConfiguration(f, oldCfg)
  140. framework.ExpectNoError(err)
  141. }
  142. })
  143. }
  144. // Returns true if kubeletConfig is enabled, false otherwise or if we cannot determine if it is.
  145. func isKubeletConfigEnabled(f *framework.Framework) (bool, error) {
  146. cfgz, err := getCurrentKubeletConfig()
  147. if err != nil {
  148. return false, fmt.Errorf("could not determine whether 'DynamicKubeletConfig' feature is enabled, err: %v", err)
  149. }
  150. v, ok := cfgz.FeatureGates[string(features.DynamicKubeletConfig)]
  151. if !ok {
  152. return true, nil
  153. }
  154. return v, nil
  155. }
  156. // Creates or updates the configmap for KubeletConfiguration, waits for the Kubelet to restart
  157. // with the new configuration. Returns an error if the configuration after waiting for restartGap
  158. // doesn't match what you attempted to set, or if the dynamic configuration feature is disabled.
  159. // You should only call this from serial tests.
  160. func setKubeletConfiguration(f *framework.Framework, kubeCfg *kubeletconfig.KubeletConfiguration) error {
  161. const (
  162. restartGap = 40 * time.Second
  163. pollInterval = 5 * time.Second
  164. )
  165. // make sure Dynamic Kubelet Configuration feature is enabled on the Kubelet we are about to reconfigure
  166. if configEnabled, err := isKubeletConfigEnabled(f); err != nil {
  167. return err
  168. } else if !configEnabled {
  169. return fmt.Errorf("The Dynamic Kubelet Configuration feature is not enabled.\n" +
  170. "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n" +
  171. "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
  172. }
  173. // create the ConfigMap with the new configuration
  174. cm, err := createConfigMap(f, kubeCfg)
  175. if err != nil {
  176. return err
  177. }
  178. // create the reference and set Node.Spec.ConfigSource
  179. src := &apiv1.NodeConfigSource{
  180. ConfigMap: &apiv1.ConfigMapNodeConfigSource{
  181. Namespace: "kube-system",
  182. Name: cm.Name,
  183. KubeletConfigKey: "kubelet",
  184. },
  185. }
  186. // set the source, retry a few times in case we are competing with other writers
  187. Eventually(func() error {
  188. if err := setNodeConfigSource(f, src); err != nil {
  189. return err
  190. }
  191. return nil
  192. }, time.Minute, time.Second).Should(BeNil())
  193. // poll for new config, for a maximum wait of restartGap
  194. Eventually(func() error {
  195. newKubeCfg, err := getCurrentKubeletConfig()
  196. if err != nil {
  197. return fmt.Errorf("failed trying to get current Kubelet config, will retry, error: %v", err)
  198. }
  199. if !apiequality.Semantic.DeepEqual(*kubeCfg, *newKubeCfg) {
  200. return fmt.Errorf("still waiting for new configuration to take effect, will continue to watch /configz")
  201. }
  202. klog.Infof("new configuration has taken effect")
  203. return nil
  204. }, restartGap, pollInterval).Should(BeNil())
  205. return nil
  206. }
  207. // sets the current node's configSource, this should only be called from Serial tests
  208. func setNodeConfigSource(f *framework.Framework, source *apiv1.NodeConfigSource) error {
  209. // since this is a serial test, we just get the node, change the source, and then update it
  210. // this prevents any issues with the patch API from affecting the test results
  211. nodeclient := f.ClientSet.CoreV1().Nodes()
  212. // get the node
  213. node, err := nodeclient.Get(framework.TestContext.NodeName, metav1.GetOptions{})
  214. if err != nil {
  215. return err
  216. }
  217. // set new source
  218. node.Spec.ConfigSource = source
  219. // update to the new source
  220. _, err = nodeclient.Update(node)
  221. if err != nil {
  222. return err
  223. }
  224. return nil
  225. }
  226. // Causes the test to fail, or returns a status 200 response from the /configz endpoint
  227. func pollConfigz(timeout time.Duration, pollInterval time.Duration) *http.Response {
  228. endpoint := fmt.Sprintf("http://127.0.0.1:8080/api/v1/nodes/%s/proxy/configz", framework.TestContext.NodeName)
  229. client := &http.Client{}
  230. req, err := http.NewRequest("GET", endpoint, nil)
  231. framework.ExpectNoError(err)
  232. req.Header.Add("Accept", "application/json")
  233. var resp *http.Response
  234. Eventually(func() bool {
  235. resp, err = client.Do(req)
  236. if err != nil {
  237. klog.Errorf("Failed to get /configz, retrying. Error: %v", err)
  238. return false
  239. }
  240. if resp.StatusCode != 200 {
  241. klog.Errorf("/configz response status not 200, retrying. Response was: %+v", resp)
  242. return false
  243. }
  244. return true
  245. }, timeout, pollInterval).Should(Equal(true))
  246. return resp
  247. }
  248. // Decodes the http response from /configz and returns a kubeletconfig.KubeletConfiguration (internal type).
  249. func decodeConfigz(resp *http.Response) (*kubeletconfig.KubeletConfiguration, error) {
  250. // This hack because /configz reports the following structure:
  251. // {"kubeletconfig": {the JSON representation of kubeletconfigv1beta1.KubeletConfiguration}}
  252. type configzWrapper struct {
  253. ComponentConfig kubeletconfigv1beta1.KubeletConfiguration `json:"kubeletconfig"`
  254. }
  255. configz := configzWrapper{}
  256. kubeCfg := kubeletconfig.KubeletConfiguration{}
  257. contentsBytes, err := ioutil.ReadAll(resp.Body)
  258. if err != nil {
  259. return nil, err
  260. }
  261. err = json.Unmarshal(contentsBytes, &configz)
  262. if err != nil {
  263. return nil, err
  264. }
  265. err = scheme.Scheme.Convert(&configz.ComponentConfig, &kubeCfg, nil)
  266. if err != nil {
  267. return nil, err
  268. }
  269. return &kubeCfg, nil
  270. }
  271. // creates a configmap containing kubeCfg in kube-system namespace
  272. func createConfigMap(f *framework.Framework, internalKC *kubeletconfig.KubeletConfiguration) (*apiv1.ConfigMap, error) {
  273. cmap := newKubeletConfigMap("testcfg", internalKC)
  274. cmap, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Create(cmap)
  275. if err != nil {
  276. return nil, err
  277. }
  278. return cmap, nil
  279. }
  280. // constructs a ConfigMap, populating one of its keys with the KubeletConfiguration. Always uses GenerateName to generate a suffix.
  281. func newKubeletConfigMap(name string, internalKC *kubeletconfig.KubeletConfiguration) *apiv1.ConfigMap {
  282. data, err := kubeletconfigcodec.EncodeKubeletConfig(internalKC, kubeletconfigv1beta1.SchemeGroupVersion)
  283. framework.ExpectNoError(err)
  284. cmap := &apiv1.ConfigMap{
  285. ObjectMeta: metav1.ObjectMeta{GenerateName: name + "-"},
  286. Data: map[string]string{
  287. "kubelet": string(data),
  288. },
  289. }
  290. return cmap
  291. }
  292. func logPodEvents(f *framework.Framework) {
  293. e2elog.Logf("Summary of pod events during the test:")
  294. err := framework.ListNamespaceEvents(f.ClientSet, f.Namespace.Name)
  295. framework.ExpectNoError(err)
  296. }
  297. func logNodeEvents(f *framework.Framework) {
  298. e2elog.Logf("Summary of node events during the test:")
  299. err := framework.ListNamespaceEvents(f.ClientSet, "")
  300. framework.ExpectNoError(err)
  301. }
  302. func getLocalNode(f *framework.Framework) *apiv1.Node {
  303. nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
  304. Expect(len(nodeList.Items)).To(Equal(1), "Unexpected number of node objects for node e2e. Expects only one node.")
  305. return &nodeList.Items[0]
  306. }
  307. // logKubeletLatencyMetrics logs KubeletLatencyMetrics computed from the Prometheus
  308. // metrics exposed on the current node and identified by the metricNames.
  309. // The Kubelet subsystem prefix is automatically prepended to these metric names.
  310. func logKubeletLatencyMetrics(metricNames ...string) {
  311. metricSet := sets.NewString()
  312. for _, key := range metricNames {
  313. metricSet.Insert(kubeletmetrics.KubeletSubsystem + "_" + key)
  314. }
  315. metric, err := frameworkmetrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName+":10255", "/metrics")
  316. if err != nil {
  317. e2elog.Logf("Error getting kubelet metrics: %v", err)
  318. } else {
  319. e2elog.Logf("Kubelet Metrics: %+v", framework.GetKubeletLatencyMetrics(metric, metricSet))
  320. }
  321. }
  322. // returns config related metrics from the local kubelet, filtered to the filterMetricNames passed in
  323. func getKubeletMetrics(filterMetricNames sets.String) (frameworkmetrics.KubeletMetrics, error) {
  324. // grab Kubelet metrics
  325. ms, err := frameworkmetrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName+":10255", "/metrics")
  326. if err != nil {
  327. return nil, err
  328. }
  329. filtered := frameworkmetrics.NewKubeletMetrics()
  330. for name := range ms {
  331. if !filterMetricNames.Has(name) {
  332. continue
  333. }
  334. filtered[name] = ms[name]
  335. }
  336. return filtered, nil
  337. }
  338. // runCommand runs the cmd and returns the combined stdout and stderr, or an
  339. // error if the command failed.
  340. func runCommand(cmd ...string) (string, error) {
  341. output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
  342. if err != nil {
  343. return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output)
  344. }
  345. return string(output), nil
  346. }
  347. // getCRIClient connects CRI and returns CRI runtime service clients and image service client.
  348. func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService, error) {
  349. // connection timeout for CRI service connection
  350. const connectionTimeout = 2 * time.Minute
  351. runtimeEndpoint := framework.TestContext.ContainerRuntimeEndpoint
  352. r, err := remote.NewRemoteRuntimeService(runtimeEndpoint, connectionTimeout)
  353. if err != nil {
  354. return nil, nil, err
  355. }
  356. imageManagerEndpoint := runtimeEndpoint
  357. if framework.TestContext.ImageServiceEndpoint != "" {
  358. //ImageServiceEndpoint is the same as ContainerRuntimeEndpoint if not
  359. //explicitly specified
  360. imageManagerEndpoint = framework.TestContext.ImageServiceEndpoint
  361. }
  362. i, err := remote.NewRemoteImageService(imageManagerEndpoint, connectionTimeout)
  363. if err != nil {
  364. return nil, nil, err
  365. }
  366. return r, i, nil
  367. }
  368. // TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
  369. func restartKubelet() {
  370. stdout, err := exec.Command("sudo", "systemctl", "list-units", "kubelet*", "--state=running").CombinedOutput()
  371. framework.ExpectNoError(err)
  372. regex := regexp.MustCompile("(kubelet-\\w+)")
  373. matches := regex.FindStringSubmatch(string(stdout))
  374. Expect(len(matches)).NotTo(BeZero())
  375. kube := matches[0]
  376. e2elog.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kube)
  377. stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
  378. framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
  379. }
  380. func toCgroupFsName(cgroupName cm.CgroupName) string {
  381. if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
  382. return cgroupName.ToSystemd()
  383. } else {
  384. return cgroupName.ToCgroupfs()
  385. }
  386. }
  387. // reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/)
  388. // to make the kernel reclaim memory in the allocatable cgroup
  389. // the time to reduce pressure may be unbounded, but usually finishes within a second
  390. func reduceAllocatableMemoryUsage() {
  391. cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup)))
  392. _, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput()
  393. framework.ExpectNoError(err)
  394. }
  395. // Equivalent of featuregatetesting.SetFeatureGateDuringTest
  396. // which can't be used here because we're not in a Testing context.
  397. // This must be in a non-"_test" file to pass
  398. // make verify WHAT=test-featuregates
  399. func withFeatureGate(feature featuregate.Feature, desired bool) func() {
  400. current := utilfeature.DefaultFeatureGate.Enabled(feature)
  401. utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), desired))
  402. return func() {
  403. utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), current))
  404. }
  405. }