util.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2enode
  14. import (
  15. "context"
  16. "encoding/json"
  17. "flag"
  18. "fmt"
  19. "io/ioutil"
  20. "net/http"
  21. "os/exec"
  22. "regexp"
  23. "strings"
  24. "time"
  25. v1 "k8s.io/api/core/v1"
  26. apiequality "k8s.io/apimachinery/pkg/api/equality"
  27. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  28. "k8s.io/apimachinery/pkg/util/sets"
  29. utilfeature "k8s.io/apiserver/pkg/util/feature"
  30. clientset "k8s.io/client-go/kubernetes"
  31. "k8s.io/component-base/featuregate"
  32. internalapi "k8s.io/cri-api/pkg/apis"
  33. "k8s.io/klog"
  34. kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"
  35. "k8s.io/kubernetes/pkg/features"
  36. kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
  37. "k8s.io/kubernetes/pkg/kubelet/apis/podresources"
  38. kubeletpodresourcesv1alpha1 "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
  39. kubeletstatsv1alpha1 "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
  40. "k8s.io/kubernetes/pkg/kubelet/cm"
  41. kubeletconfigcodec "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/codec"
  42. kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
  43. "k8s.io/kubernetes/pkg/kubelet/remote"
  44. "k8s.io/kubernetes/pkg/kubelet/util"
  45. "k8s.io/kubernetes/test/e2e/framework"
  46. e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
  47. e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
  48. e2enode "k8s.io/kubernetes/test/e2e/framework/node"
  49. imageutils "k8s.io/kubernetes/test/utils/image"
  50. "github.com/onsi/ginkgo"
  51. "github.com/onsi/gomega"
  52. )
  53. // TODO(random-liu): Get this automatically from kubelet flag.
  54. var kubeletAddress = flag.String("kubelet-address", "http://127.0.0.1:10255", "Host and port of the kubelet")
  55. var startServices = flag.Bool("start-services", true, "If true, start local node services")
  56. var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
  57. var busyboxImage = imageutils.GetE2EImage(imageutils.BusyBox)
  58. const (
  59. // Kubelet internal cgroup name for node allocatable cgroup.
  60. defaultNodeAllocatableCgroup = "kubepods"
  61. // defaultPodResourcesPath is the path to the local endpoint serving the podresources GRPC service.
  62. defaultPodResourcesPath = "/var/lib/kubelet/pod-resources"
  63. defaultPodResourcesTimeout = 10 * time.Second
  64. defaultPodResourcesMaxSize = 1024 * 1024 * 16 // 16 Mb
  65. )
  66. func getNodeSummary() (*kubeletstatsv1alpha1.Summary, error) {
  67. req, err := http.NewRequest("GET", *kubeletAddress+"/stats/summary", nil)
  68. if err != nil {
  69. return nil, fmt.Errorf("failed to build http request: %v", err)
  70. }
  71. req.Header.Add("Accept", "application/json")
  72. client := &http.Client{}
  73. resp, err := client.Do(req)
  74. if err != nil {
  75. return nil, fmt.Errorf("failed to get /stats/summary: %v", err)
  76. }
  77. defer resp.Body.Close()
  78. contentsBytes, err := ioutil.ReadAll(resp.Body)
  79. if err != nil {
  80. return nil, fmt.Errorf("failed to read /stats/summary: %+v", resp)
  81. }
  82. decoder := json.NewDecoder(strings.NewReader(string(contentsBytes)))
  83. summary := kubeletstatsv1alpha1.Summary{}
  84. err = decoder.Decode(&summary)
  85. if err != nil {
  86. return nil, fmt.Errorf("failed to parse /stats/summary to go struct: %+v", resp)
  87. }
  88. return &summary, nil
  89. }
  90. func getNodeDevices() (*kubeletpodresourcesv1alpha1.ListPodResourcesResponse, error) {
  91. endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
  92. if err != nil {
  93. return nil, fmt.Errorf("Error getting local endpoint: %v", err)
  94. }
  95. client, conn, err := podresources.GetClient(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
  96. if err != nil {
  97. return nil, fmt.Errorf("Error getting grpc client: %v", err)
  98. }
  99. defer conn.Close()
  100. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  101. defer cancel()
  102. resp, err := client.List(ctx, &kubeletpodresourcesv1alpha1.ListPodResourcesRequest{})
  103. if err != nil {
  104. return nil, fmt.Errorf("%v.Get(_) = _, %v", client, err)
  105. }
  106. return resp, nil
  107. }
  108. // Returns the current KubeletConfiguration
  109. func getCurrentKubeletConfig() (*kubeletconfig.KubeletConfiguration, error) {
  110. // namespace only relevant if useProxy==true, so we don't bother
  111. return e2ekubelet.GetCurrentKubeletConfig(framework.TestContext.NodeName, "", false)
  112. }
  113. // Must be called within a Context. Allows the function to modify the KubeletConfiguration during the BeforeEach of the context.
  114. // The change is reverted in the AfterEach of the context.
  115. // Returns true on success.
  116. func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(initialConfig *kubeletconfig.KubeletConfiguration)) {
  117. var oldCfg *kubeletconfig.KubeletConfiguration
  118. ginkgo.BeforeEach(func() {
  119. configEnabled, err := isKubeletConfigEnabled(f)
  120. framework.ExpectNoError(err)
  121. framework.ExpectEqual(configEnabled, true, "The Dynamic Kubelet Configuration feature is not enabled.\n"+
  122. "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n"+
  123. "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
  124. oldCfg, err = getCurrentKubeletConfig()
  125. framework.ExpectNoError(err)
  126. newCfg := oldCfg.DeepCopy()
  127. updateFunction(newCfg)
  128. if apiequality.Semantic.DeepEqual(*newCfg, *oldCfg) {
  129. return
  130. }
  131. framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
  132. })
  133. ginkgo.AfterEach(func() {
  134. if oldCfg != nil {
  135. err := setKubeletConfiguration(f, oldCfg)
  136. framework.ExpectNoError(err)
  137. }
  138. })
  139. }
  140. // Returns true if kubeletConfig is enabled, false otherwise or if we cannot determine if it is.
  141. func isKubeletConfigEnabled(f *framework.Framework) (bool, error) {
  142. cfgz, err := getCurrentKubeletConfig()
  143. if err != nil {
  144. return false, fmt.Errorf("could not determine whether 'DynamicKubeletConfig' feature is enabled, err: %v", err)
  145. }
  146. v, ok := cfgz.FeatureGates[string(features.DynamicKubeletConfig)]
  147. if !ok {
  148. return true, nil
  149. }
  150. return v, nil
  151. }
  152. // Creates or updates the configmap for KubeletConfiguration, waits for the Kubelet to restart
  153. // with the new configuration. Returns an error if the configuration after waiting for restartGap
  154. // doesn't match what you attempted to set, or if the dynamic configuration feature is disabled.
  155. // You should only call this from serial tests.
  156. func setKubeletConfiguration(f *framework.Framework, kubeCfg *kubeletconfig.KubeletConfiguration) error {
  157. const (
  158. restartGap = 40 * time.Second
  159. pollInterval = 5 * time.Second
  160. )
  161. // make sure Dynamic Kubelet Configuration feature is enabled on the Kubelet we are about to reconfigure
  162. if configEnabled, err := isKubeletConfigEnabled(f); err != nil {
  163. return err
  164. } else if !configEnabled {
  165. return fmt.Errorf("The Dynamic Kubelet Configuration feature is not enabled.\n" +
  166. "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n" +
  167. "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
  168. }
  169. // create the ConfigMap with the new configuration
  170. cm, err := createConfigMap(f, kubeCfg)
  171. if err != nil {
  172. return err
  173. }
  174. // create the reference and set Node.Spec.ConfigSource
  175. src := &v1.NodeConfigSource{
  176. ConfigMap: &v1.ConfigMapNodeConfigSource{
  177. Namespace: "kube-system",
  178. Name: cm.Name,
  179. KubeletConfigKey: "kubelet",
  180. },
  181. }
  182. // set the source, retry a few times in case we are competing with other writers
  183. gomega.Eventually(func() error {
  184. if err := setNodeConfigSource(f, src); err != nil {
  185. return err
  186. }
  187. return nil
  188. }, time.Minute, time.Second).Should(gomega.BeNil())
  189. // poll for new config, for a maximum wait of restartGap
  190. gomega.Eventually(func() error {
  191. newKubeCfg, err := getCurrentKubeletConfig()
  192. if err != nil {
  193. return fmt.Errorf("failed trying to get current Kubelet config, will retry, error: %v", err)
  194. }
  195. if !apiequality.Semantic.DeepEqual(*kubeCfg, *newKubeCfg) {
  196. return fmt.Errorf("still waiting for new configuration to take effect, will continue to watch /configz")
  197. }
  198. klog.Infof("new configuration has taken effect")
  199. return nil
  200. }, restartGap, pollInterval).Should(gomega.BeNil())
  201. return nil
  202. }
  203. // sets the current node's configSource, this should only be called from Serial tests
  204. func setNodeConfigSource(f *framework.Framework, source *v1.NodeConfigSource) error {
  205. // since this is a serial test, we just get the node, change the source, and then update it
  206. // this prevents any issues with the patch API from affecting the test results
  207. nodeclient := f.ClientSet.CoreV1().Nodes()
  208. // get the node
  209. node, err := nodeclient.Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{})
  210. if err != nil {
  211. return err
  212. }
  213. // set new source
  214. node.Spec.ConfigSource = source
  215. // update to the new source
  216. _, err = nodeclient.Update(context.TODO(), node, metav1.UpdateOptions{})
  217. if err != nil {
  218. return err
  219. }
  220. return nil
  221. }
  222. // creates a configmap containing kubeCfg in kube-system namespace
  223. func createConfigMap(f *framework.Framework, internalKC *kubeletconfig.KubeletConfiguration) (*v1.ConfigMap, error) {
  224. cmap := newKubeletConfigMap("testcfg", internalKC)
  225. cmap, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Create(context.TODO(), cmap, metav1.CreateOptions{})
  226. if err != nil {
  227. return nil, err
  228. }
  229. return cmap, nil
  230. }
  231. // constructs a ConfigMap, populating one of its keys with the KubeletConfiguration. Always uses GenerateName to generate a suffix.
  232. func newKubeletConfigMap(name string, internalKC *kubeletconfig.KubeletConfiguration) *v1.ConfigMap {
  233. data, err := kubeletconfigcodec.EncodeKubeletConfig(internalKC, kubeletconfigv1beta1.SchemeGroupVersion)
  234. framework.ExpectNoError(err)
  235. cmap := &v1.ConfigMap{
  236. ObjectMeta: metav1.ObjectMeta{GenerateName: name + "-"},
  237. Data: map[string]string{
  238. "kubelet": string(data),
  239. },
  240. }
  241. return cmap
  242. }
  243. // listNamespaceEvents lists the events in the given namespace.
  244. func listNamespaceEvents(c clientset.Interface, ns string) error {
  245. ls, err := c.CoreV1().Events(ns).List(context.TODO(), metav1.ListOptions{})
  246. if err != nil {
  247. return err
  248. }
  249. for _, event := range ls.Items {
  250. klog.Infof("Event(%#v): type: '%v' reason: '%v' %v", event.InvolvedObject, event.Type, event.Reason, event.Message)
  251. }
  252. return nil
  253. }
  254. func logPodEvents(f *framework.Framework) {
  255. framework.Logf("Summary of pod events during the test:")
  256. err := listNamespaceEvents(f.ClientSet, f.Namespace.Name)
  257. framework.ExpectNoError(err)
  258. }
  259. func logNodeEvents(f *framework.Framework) {
  260. framework.Logf("Summary of node events during the test:")
  261. err := listNamespaceEvents(f.ClientSet, "")
  262. framework.ExpectNoError(err)
  263. }
  264. func getLocalNode(f *framework.Framework) *v1.Node {
  265. nodeList, err := e2enode.GetReadySchedulableNodes(f.ClientSet)
  266. framework.ExpectNoError(err)
  267. framework.ExpectEqual(len(nodeList.Items), 1, "Unexpected number of node objects for node e2e. Expects only one node.")
  268. return &nodeList.Items[0]
  269. }
  270. // logKubeletLatencyMetrics logs KubeletLatencyMetrics computed from the Prometheus
  271. // metrics exposed on the current node and identified by the metricNames.
  272. // The Kubelet subsystem prefix is automatically prepended to these metric names.
  273. func logKubeletLatencyMetrics(metricNames ...string) {
  274. metricSet := sets.NewString()
  275. for _, key := range metricNames {
  276. metricSet.Insert(kubeletmetrics.KubeletSubsystem + "_" + key)
  277. }
  278. metric, err := e2emetrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName+":10255", "/metrics")
  279. if err != nil {
  280. framework.Logf("Error getting kubelet metrics: %v", err)
  281. } else {
  282. framework.Logf("Kubelet Metrics: %+v", e2emetrics.GetKubeletLatencyMetrics(metric, metricSet))
  283. }
  284. }
  285. // returns config related metrics from the local kubelet, filtered to the filterMetricNames passed in
  286. func getKubeletMetrics(filterMetricNames sets.String) (e2emetrics.KubeletMetrics, error) {
  287. // grab Kubelet metrics
  288. ms, err := e2emetrics.GrabKubeletMetricsWithoutProxy(framework.TestContext.NodeName+":10255", "/metrics")
  289. if err != nil {
  290. return nil, err
  291. }
  292. filtered := e2emetrics.NewKubeletMetrics()
  293. for name := range ms {
  294. if !filterMetricNames.Has(name) {
  295. continue
  296. }
  297. filtered[name] = ms[name]
  298. }
  299. return filtered, nil
  300. }
  301. // runCommand runs the cmd and returns the combined stdout and stderr, or an
  302. // error if the command failed.
  303. func runCommand(cmd ...string) (string, error) {
  304. output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
  305. if err != nil {
  306. return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output)
  307. }
  308. return string(output), nil
  309. }
  310. // getCRIClient connects CRI and returns CRI runtime service clients and image service client.
  311. func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService, error) {
  312. // connection timeout for CRI service connection
  313. const connectionTimeout = 2 * time.Minute
  314. runtimeEndpoint := framework.TestContext.ContainerRuntimeEndpoint
  315. r, err := remote.NewRemoteRuntimeService(runtimeEndpoint, connectionTimeout)
  316. if err != nil {
  317. return nil, nil, err
  318. }
  319. imageManagerEndpoint := runtimeEndpoint
  320. if framework.TestContext.ImageServiceEndpoint != "" {
  321. //ImageServiceEndpoint is the same as ContainerRuntimeEndpoint if not
  322. //explicitly specified
  323. imageManagerEndpoint = framework.TestContext.ImageServiceEndpoint
  324. }
  325. i, err := remote.NewRemoteImageService(imageManagerEndpoint, connectionTimeout)
  326. if err != nil {
  327. return nil, nil, err
  328. }
  329. return r, i, nil
  330. }
  331. // TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494
  332. func restartKubelet() {
  333. stdout, err := exec.Command("sudo", "systemctl", "list-units", "kubelet*", "--state=running").CombinedOutput()
  334. framework.ExpectNoError(err)
  335. regex := regexp.MustCompile("(kubelet-\\w+)")
  336. matches := regex.FindStringSubmatch(string(stdout))
  337. framework.ExpectNotEqual(len(matches), 0)
  338. kube := matches[0]
  339. framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kube)
  340. stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
  341. framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
  342. }
  343. func toCgroupFsName(cgroupName cm.CgroupName) string {
  344. if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
  345. return cgroupName.ToSystemd()
  346. }
  347. return cgroupName.ToCgroupfs()
  348. }
  349. // reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/)
  350. // to make the kernel reclaim memory in the allocatable cgroup
  351. // the time to reduce pressure may be unbounded, but usually finishes within a second
  352. func reduceAllocatableMemoryUsage() {
  353. cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup)))
  354. _, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput()
  355. framework.ExpectNoError(err)
  356. }
  357. // Equivalent of featuregatetesting.SetFeatureGateDuringTest
  358. // which can't be used here because we're not in a Testing context.
  359. // This must be in a non-"_test" file to pass
  360. // make verify WHAT=test-featuregates
  361. func withFeatureGate(feature featuregate.Feature, desired bool) func() {
  362. current := utilfeature.DefaultFeatureGate.Enabled(feature)
  363. utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), desired))
  364. return func() {
  365. utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), current))
  366. }
  367. }