topology_manager_test.go 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. /*
  2. Copyright 2019 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2enode
  14. import (
  15. "context"
  16. "fmt"
  17. "io/ioutil"
  18. "os/exec"
  19. "regexp"
  20. "strconv"
  21. "strings"
  22. "time"
  23. v1 "k8s.io/api/core/v1"
  24. "k8s.io/apimachinery/pkg/runtime"
  25. "k8s.io/apimachinery/pkg/api/resource"
  26. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  27. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  28. kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
  29. "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
  30. "k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
  31. "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
  32. "k8s.io/kubernetes/pkg/kubelet/types"
  33. "k8s.io/kubernetes/test/e2e/framework"
  34. e2enode "k8s.io/kubernetes/test/e2e/framework/node"
  35. e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
  36. e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
  37. "k8s.io/kubernetes/test/e2e/framework/testfiles"
  38. "github.com/onsi/ginkgo"
  39. "github.com/onsi/gomega"
  40. )
  41. const (
  42. numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d`
  43. minNumaNodes = 2
  44. minCoreCount = 4
  45. )
  46. // Helper for makeTopologyManagerPod().
  47. type tmCtnAttribute struct {
  48. ctnName string
  49. cpuRequest string
  50. cpuLimit string
  51. deviceName string
  52. deviceRequest string
  53. deviceLimit string
  54. }
  55. func detectNUMANodes() int {
  56. outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"NUMA node(s):\" | cut -d \":\" -f 2").Output()
  57. framework.ExpectNoError(err)
  58. numaNodes, err := strconv.Atoi(strings.TrimSpace(string(outData)))
  59. framework.ExpectNoError(err)
  60. return numaNodes
  61. }
  62. func detectCoresPerSocket() int {
  63. outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"Core(s) per socket:\" | cut -d \":\" -f 2").Output()
  64. framework.ExpectNoError(err)
  65. coreCount, err := strconv.Atoi(strings.TrimSpace(string(outData)))
  66. framework.ExpectNoError(err)
  67. return coreCount
  68. }
  69. func detectSRIOVDevices() int {
  70. outData, err := exec.Command("/bin/sh", "-c", "ls /sys/bus/pci/devices/*/physfn | wc -w").Output()
  71. framework.ExpectNoError(err)
  72. devCount, err := strconv.Atoi(strings.TrimSpace(string(outData)))
  73. framework.ExpectNoError(err)
  74. return devCount
  75. }
  76. // makeTopologyMangerPod returns a pod with the provided tmCtnAttributes.
  77. func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v1.Pod {
  78. cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d"
  79. return makeTopologyManagerTestPod(podName, cpusetCmd, tmCtnAttributes)
  80. }
  81. func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnAttribute) *v1.Pod {
  82. var containers []v1.Container
  83. for _, ctnAttr := range tmCtnAttributes {
  84. ctn := v1.Container{
  85. Name: ctnAttr.ctnName,
  86. Image: busyboxImage,
  87. Resources: v1.ResourceRequirements{
  88. Requests: v1.ResourceList{
  89. v1.ResourceName(v1.ResourceCPU): resource.MustParse(ctnAttr.cpuRequest),
  90. v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"),
  91. },
  92. Limits: v1.ResourceList{
  93. v1.ResourceName(v1.ResourceCPU): resource.MustParse(ctnAttr.cpuLimit),
  94. v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"),
  95. },
  96. },
  97. Command: []string{"sh", "-c", podCmd},
  98. }
  99. if ctnAttr.deviceName != "" {
  100. ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest)
  101. ctn.Resources.Limits[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceLimit)
  102. }
  103. containers = append(containers, ctn)
  104. }
  105. return &v1.Pod{
  106. ObjectMeta: metav1.ObjectMeta{
  107. Name: podName,
  108. },
  109. Spec: v1.PodSpec{
  110. RestartPolicy: v1.RestartPolicyNever,
  111. Containers: containers,
  112. },
  113. }
  114. }
  115. func findNUMANodeWithoutSRIOVDevicesFromConfigMap(configMap *v1.ConfigMap, numaNodes int) (int, bool) {
  116. for nodeNum := 0; nodeNum < numaNodes; nodeNum++ {
  117. value, ok := configMap.Annotations[fmt.Sprintf("pcidevice_node%d", nodeNum)]
  118. if !ok {
  119. framework.Logf("missing pcidevice annotation for NUMA node %d", nodeNum)
  120. return -1, false
  121. }
  122. v, err := strconv.Atoi(value)
  123. if err != nil {
  124. framework.Failf("error getting the PCI device count on NUMA node %d: %v", nodeNum, err)
  125. }
  126. if v == 0 {
  127. framework.Logf("NUMA node %d has no SRIOV devices attached", nodeNum)
  128. return nodeNum, true
  129. }
  130. framework.Logf("NUMA node %d has %d SRIOV devices attached", nodeNum, v)
  131. }
  132. return -1, false
  133. }
  134. func findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes int) (int, bool) {
  135. pciDevs, err := getPCIDeviceInfo("/sys/bus/pci/devices")
  136. if err != nil {
  137. framework.Failf("error detecting the PCI device NUMA node: %v", err)
  138. }
  139. pciPerNuma := make(map[int]int)
  140. for _, pciDev := range pciDevs {
  141. if pciDev.IsVFn {
  142. pciPerNuma[pciDev.NUMANode]++
  143. }
  144. }
  145. if len(pciPerNuma) == 0 {
  146. // if we got this far we already passed a rough check that SRIOV devices
  147. // are available in the box, so something is seriously wrong
  148. framework.Failf("failed to find any VF devices from %v", pciDevs)
  149. }
  150. for nodeNum := 0; nodeNum < numaNodes; nodeNum++ {
  151. v := pciPerNuma[nodeNum]
  152. if v == 0 {
  153. framework.Logf("NUMA node %d has no SRIOV devices attached", nodeNum)
  154. return nodeNum, true
  155. }
  156. framework.Logf("NUMA node %d has %d SRIOV devices attached", nodeNum, v)
  157. }
  158. return -1, false
  159. }
  160. func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (int, bool) {
  161. // if someone annotated the configMap, let's use this information
  162. if nodeNum, found := findNUMANodeWithoutSRIOVDevicesFromConfigMap(configMap, numaNodes); found {
  163. return nodeNum, found
  164. }
  165. // no annotations, try to autodetect
  166. // NOTE: this assumes all the VFs in the box can be used for the tests.
  167. return findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes)
  168. }
  169. func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string, configMap *v1.ConfigMap, numaNodes int) string {
  170. // Configure Topology Manager in Kubelet with policy.
  171. newCfg := oldCfg.DeepCopy()
  172. if newCfg.FeatureGates == nil {
  173. newCfg.FeatureGates = make(map[string]bool)
  174. }
  175. newCfg.FeatureGates["CPUManager"] = true
  176. newCfg.FeatureGates["TopologyManager"] = true
  177. deleteStateFile()
  178. // Set the Topology Manager policy
  179. newCfg.TopologyManagerPolicy = policy
  180. // Set the CPU Manager policy to static.
  181. newCfg.CPUManagerPolicy = string(cpumanager.PolicyStatic)
  182. // Set the CPU Manager reconcile period to 1 second.
  183. newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
  184. if nodeNum, ok := findNUMANodeWithoutSRIOVDevices(configMap, numaNodes); ok {
  185. cpus, err := getCPUsPerNUMANode(nodeNum)
  186. framework.Logf("NUMA Node %d doesn't seem to have attached SRIOV devices and has cpus=%v", nodeNum, cpus)
  187. framework.ExpectNoError(err)
  188. newCfg.ReservedSystemCPUs = fmt.Sprintf("%d", cpus[len(cpus)-1])
  189. } else {
  190. // The Kubelet panics if either kube-reserved or system-reserved is not set
  191. // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that
  192. // kubelet doesn't panic.
  193. if newCfg.KubeReserved == nil {
  194. newCfg.KubeReserved = map[string]string{}
  195. }
  196. if _, ok := newCfg.KubeReserved["cpu"]; !ok {
  197. newCfg.KubeReserved["cpu"] = "200m"
  198. }
  199. }
  200. // Dump the config -- debug
  201. framework.Logf("New kubelet config is %s", *newCfg)
  202. // Update the Kubelet configuration.
  203. framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
  204. // Wait for the Kubelet to be ready.
  205. gomega.Eventually(func() bool {
  206. nodes, err := e2enode.TotalReady(f.ClientSet)
  207. framework.ExpectNoError(err)
  208. return nodes == 1
  209. }, time.Minute, time.Second).Should(gomega.BeTrue())
  210. return newCfg.ReservedSystemCPUs
  211. }
  212. // getSRIOVDevicePluginPod returns the Device Plugin pod for sriov resources in e2e tests.
  213. func getSRIOVDevicePluginPod() *v1.Pod {
  214. ds := readDaemonSetV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginDSYAML))
  215. p := &v1.Pod{
  216. ObjectMeta: metav1.ObjectMeta{
  217. Name: SRIOVDevicePluginName,
  218. Namespace: metav1.NamespaceSystem,
  219. },
  220. Spec: ds.Spec.Template.Spec,
  221. }
  222. return p
  223. }
  224. func readConfigMapV1OrDie(objBytes []byte) *v1.ConfigMap {
  225. v1.AddToScheme(appsScheme)
  226. requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(v1.SchemeGroupVersion), objBytes)
  227. if err != nil {
  228. panic(err)
  229. }
  230. return requiredObj.(*v1.ConfigMap)
  231. }
  232. func readServiceAccountV1OrDie(objBytes []byte) *v1.ServiceAccount {
  233. v1.AddToScheme(appsScheme)
  234. requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(v1.SchemeGroupVersion), objBytes)
  235. if err != nil {
  236. panic(err)
  237. }
  238. return requiredObj.(*v1.ServiceAccount)
  239. }
  240. func findSRIOVResource(node *v1.Node) (string, int64) {
  241. re := regexp.MustCompile(`^intel.com/.*sriov.*`)
  242. for key, val := range node.Status.Capacity {
  243. resource := string(key)
  244. if re.MatchString(resource) {
  245. v := val.Value()
  246. if v > 0 {
  247. return resource, v
  248. }
  249. }
  250. }
  251. return "", 0
  252. }
  253. func validatePodAlignment(f *framework.Framework, pod *v1.Pod, envInfo *testEnvInfo) {
  254. for _, cnt := range pod.Spec.Containers {
  255. ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
  256. logs, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
  257. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
  258. framework.Logf("got pod logs: %v", logs)
  259. numaRes, err := checkNUMAAlignment(f, pod, &cnt, logs, envInfo)
  260. framework.ExpectNoError(err, "NUMA Alignment check failed for [%s] of pod [%s]", cnt.Name, pod.Name)
  261. if numaRes != nil {
  262. framework.Logf("NUMA resources for %s/%s: %s", pod.Name, cnt.Name, numaRes.String())
  263. }
  264. }
  265. }
  266. func runTopologyManagerPolicySuiteTests(f *framework.Framework) {
  267. var cpuCap, cpuAlloc int64
  268. var cpuListString, expAllowedCPUsListRegex string
  269. var cpuList []int
  270. var cpu1, cpu2 int
  271. var cset cpuset.CPUSet
  272. var err error
  273. var ctnAttrs []tmCtnAttribute
  274. var pod, pod1, pod2 *v1.Pod
  275. cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f)
  276. ginkgo.By("running a non-Gu pod")
  277. ctnAttrs = []tmCtnAttribute{
  278. {
  279. ctnName: "non-gu-container",
  280. cpuRequest: "100m",
  281. cpuLimit: "200m",
  282. },
  283. }
  284. pod = makeTopologyManagerPod("non-gu-pod", ctnAttrs)
  285. pod = f.PodClient().CreateSync(pod)
  286. ginkgo.By("checking if the expected cpuset was assigned")
  287. expAllowedCPUsListRegex = fmt.Sprintf("^0-%d\n$", cpuCap-1)
  288. err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  289. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  290. pod.Spec.Containers[0].Name, pod.Name)
  291. ginkgo.By("by deleting the pods and waiting for container removal")
  292. deletePods(f, []string{pod.Name})
  293. waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
  294. ginkgo.By("running a Gu pod")
  295. ctnAttrs = []tmCtnAttribute{
  296. {
  297. ctnName: "gu-container",
  298. cpuRequest: "1000m",
  299. cpuLimit: "1000m",
  300. },
  301. }
  302. pod = makeTopologyManagerPod("gu-pod", ctnAttrs)
  303. pod = f.PodClient().CreateSync(pod)
  304. ginkgo.By("checking if the expected cpuset was assigned")
  305. cpu1 = 1
  306. if isHTEnabled() {
  307. cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
  308. cpu1 = cpuList[1]
  309. }
  310. expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
  311. err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  312. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  313. pod.Spec.Containers[0].Name, pod.Name)
  314. ginkgo.By("by deleting the pods and waiting for container removal")
  315. deletePods(f, []string{pod.Name})
  316. waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
  317. ginkgo.By("running multiple Gu and non-Gu pods")
  318. ctnAttrs = []tmCtnAttribute{
  319. {
  320. ctnName: "gu-container",
  321. cpuRequest: "1000m",
  322. cpuLimit: "1000m",
  323. },
  324. }
  325. pod1 = makeTopologyManagerPod("gu-pod", ctnAttrs)
  326. pod1 = f.PodClient().CreateSync(pod1)
  327. ctnAttrs = []tmCtnAttribute{
  328. {
  329. ctnName: "non-gu-container",
  330. cpuRequest: "200m",
  331. cpuLimit: "300m",
  332. },
  333. }
  334. pod2 = makeTopologyManagerPod("non-gu-pod", ctnAttrs)
  335. pod2 = f.PodClient().CreateSync(pod2)
  336. ginkgo.By("checking if the expected cpuset was assigned")
  337. cpu1 = 1
  338. if isHTEnabled() {
  339. cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
  340. cpu1 = cpuList[1]
  341. }
  342. expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
  343. err = f.PodClient().MatchContainerOutput(pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  344. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  345. pod1.Spec.Containers[0].Name, pod1.Name)
  346. cpuListString = "0"
  347. if cpuAlloc > 2 {
  348. cset = cpuset.MustParse(fmt.Sprintf("0-%d", cpuCap-1))
  349. cpuListString = fmt.Sprintf("%s", cset.Difference(cpuset.NewCPUSet(cpu1)))
  350. }
  351. expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString)
  352. err = f.PodClient().MatchContainerOutput(pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  353. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  354. pod2.Spec.Containers[0].Name, pod2.Name)
  355. ginkgo.By("by deleting the pods and waiting for container removal")
  356. deletePods(f, []string{pod1.Name, pod2.Name})
  357. waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
  358. waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
  359. // Skip rest of the tests if CPU capacity < 3.
  360. if cpuCap < 3 {
  361. e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3")
  362. }
  363. ginkgo.By("running a Gu pod requesting multiple CPUs")
  364. ctnAttrs = []tmCtnAttribute{
  365. {
  366. ctnName: "gu-container",
  367. cpuRequest: "2000m",
  368. cpuLimit: "2000m",
  369. },
  370. }
  371. pod = makeTopologyManagerPod("gu-pod", ctnAttrs)
  372. pod = f.PodClient().CreateSync(pod)
  373. ginkgo.By("checking if the expected cpuset was assigned")
  374. cpuListString = "1-2"
  375. if isHTEnabled() {
  376. cpuListString = "2-3"
  377. cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
  378. if cpuList[1] != 1 {
  379. cset = cpuset.MustParse(getCPUSiblingList(1))
  380. cpuListString = fmt.Sprintf("%s", cset)
  381. }
  382. }
  383. expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString)
  384. err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  385. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  386. pod.Spec.Containers[0].Name, pod.Name)
  387. ginkgo.By("by deleting the pods and waiting for container removal")
  388. deletePods(f, []string{pod.Name})
  389. waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
  390. ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs")
  391. ctnAttrs = []tmCtnAttribute{
  392. {
  393. ctnName: "gu-container1",
  394. cpuRequest: "1000m",
  395. cpuLimit: "1000m",
  396. },
  397. {
  398. ctnName: "gu-container2",
  399. cpuRequest: "1000m",
  400. cpuLimit: "1000m",
  401. },
  402. }
  403. pod = makeTopologyManagerPod("gu-pod", ctnAttrs)
  404. pod = f.PodClient().CreateSync(pod)
  405. ginkgo.By("checking if the expected cpuset was assigned")
  406. cpu1, cpu2 = 1, 2
  407. if isHTEnabled() {
  408. cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
  409. if cpuList[1] != 1 {
  410. cpu1, cpu2 = cpuList[1], 1
  411. }
  412. }
  413. expAllowedCPUsListRegex = fmt.Sprintf("^%d|%d\n$", cpu1, cpu2)
  414. err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  415. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  416. pod.Spec.Containers[0].Name, pod.Name)
  417. err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  418. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  419. pod.Spec.Containers[1].Name, pod.Name)
  420. ginkgo.By("by deleting the pods and waiting for container removal")
  421. deletePods(f, []string{pod.Name})
  422. waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
  423. waitForContainerRemoval(pod.Spec.Containers[1].Name, pod.Name, pod.Namespace)
  424. ginkgo.By("running multiple Gu pods")
  425. ctnAttrs = []tmCtnAttribute{
  426. {
  427. ctnName: "gu-container1",
  428. cpuRequest: "1000m",
  429. cpuLimit: "1000m",
  430. },
  431. }
  432. pod1 = makeTopologyManagerPod("gu-pod1", ctnAttrs)
  433. pod1 = f.PodClient().CreateSync(pod1)
  434. ctnAttrs = []tmCtnAttribute{
  435. {
  436. ctnName: "gu-container2",
  437. cpuRequest: "1000m",
  438. cpuLimit: "1000m",
  439. },
  440. }
  441. pod2 = makeTopologyManagerPod("gu-pod2", ctnAttrs)
  442. pod2 = f.PodClient().CreateSync(pod2)
  443. ginkgo.By("checking if the expected cpuset was assigned")
  444. cpu1, cpu2 = 1, 2
  445. if isHTEnabled() {
  446. cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
  447. if cpuList[1] != 1 {
  448. cpu1, cpu2 = cpuList[1], 1
  449. }
  450. }
  451. expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
  452. err = f.PodClient().MatchContainerOutput(pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  453. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  454. pod1.Spec.Containers[0].Name, pod1.Name)
  455. expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu2)
  456. err = f.PodClient().MatchContainerOutput(pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
  457. framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
  458. pod2.Spec.Containers[0].Name, pod2.Name)
  459. ginkgo.By("by deleting the pods and waiting for container removal")
  460. deletePods(f, []string{pod1.Name, pod2.Name})
  461. waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
  462. waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
  463. }
  464. func waitForAllContainerRemoval(podName, podNS string) {
  465. rs, _, err := getCRIClient()
  466. framework.ExpectNoError(err)
  467. gomega.Eventually(func() bool {
  468. containers, err := rs.ListContainers(&runtimeapi.ContainerFilter{
  469. LabelSelector: map[string]string{
  470. types.KubernetesPodNameLabel: podName,
  471. types.KubernetesPodNamespaceLabel: podNS,
  472. },
  473. })
  474. if err != nil {
  475. return false
  476. }
  477. return len(containers) == 0
  478. }, 2*time.Minute, 1*time.Second).Should(gomega.BeTrue())
  479. }
  480. func runTopologyManagerPositiveTest(f *framework.Framework, numPods int, ctnAttrs []tmCtnAttribute, envInfo *testEnvInfo) {
  481. var pods []*v1.Pod
  482. for podID := 0; podID < numPods; podID++ {
  483. podName := fmt.Sprintf("gu-pod-%d", podID)
  484. framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
  485. pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs)
  486. pod = f.PodClient().CreateSync(pod)
  487. framework.Logf("created pod %s", podName)
  488. pods = append(pods, pod)
  489. }
  490. // per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/0035-20190130-topology-manager.md#multi-numa-systems-tests
  491. // we can do a menaingful validation only when using the single-numa node policy
  492. if envInfo.policy == topologymanager.PolicySingleNumaNode {
  493. for podID := 0; podID < numPods; podID++ {
  494. validatePodAlignment(f, pods[podID], envInfo)
  495. }
  496. }
  497. for podID := 0; podID < numPods; podID++ {
  498. pod := pods[podID]
  499. framework.Logf("deleting the pod %s/%s and waiting for container removal",
  500. pod.Namespace, pod.Name)
  501. deletePods(f, []string{pod.Name})
  502. waitForAllContainerRemoval(pod.Name, pod.Namespace)
  503. }
  504. }
  505. func runTopologyManagerNegativeTest(f *framework.Framework, numPods int, ctnAttrs []tmCtnAttribute, envInfo *testEnvInfo) {
  506. podName := "gu-pod"
  507. framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
  508. pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs)
  509. pod = f.PodClient().Create(pod)
  510. err := e2epod.WaitForPodCondition(f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
  511. if pod.Status.Phase != v1.PodPending {
  512. return true, nil
  513. }
  514. return false, nil
  515. })
  516. framework.ExpectNoError(err)
  517. pod, err = f.PodClient().Get(context.TODO(), pod.Name, metav1.GetOptions{})
  518. framework.ExpectNoError(err)
  519. if pod.Status.Phase != v1.PodFailed {
  520. framework.Failf("pod %s not failed: %v", pod.Name, pod.Status)
  521. }
  522. if !isTopologyAffinityError(pod) {
  523. framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason)
  524. }
  525. deletePods(f, []string{pod.Name})
  526. }
  527. func isTopologyAffinityError(pod *v1.Pod) bool {
  528. re := regexp.MustCompile(`Topology.*Affinity.*Error`)
  529. return re.MatchString(pod.Status.Reason)
  530. }
  531. func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap {
  532. cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML)
  533. var err error
  534. // the SRIOVDP configuration is hw-dependent, so we allow per-test-host customization.
  535. framework.Logf("host-local SRIOV Device Plugin Config Map %q", cmFile)
  536. if cmFile != "" {
  537. cmData, err = ioutil.ReadFile(cmFile)
  538. if err != nil {
  539. framework.Failf("unable to load the SRIOV Device Plugin ConfigMap: %v", err)
  540. }
  541. } else {
  542. framework.Logf("Using built-in SRIOV Device Plugin Config Map")
  543. }
  544. return readConfigMapV1OrDie(cmData)
  545. }
  546. type sriovData struct {
  547. configMap *v1.ConfigMap
  548. serviceAccount *v1.ServiceAccount
  549. pod *v1.Pod
  550. resourceName string
  551. resourceAmount int64
  552. }
  553. func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) *sriovData {
  554. var err error
  555. ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name))
  556. if _, err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil {
  557. framework.Failf("unable to create test configMap %s: %v", configMap.Name, err)
  558. }
  559. serviceAccount := readServiceAccountV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginSAYAML))
  560. ginkgo.By(fmt.Sprintf("Creating serviceAccount %v/%v", metav1.NamespaceSystem, serviceAccount.Name))
  561. if _, err = f.ClientSet.CoreV1().ServiceAccounts(metav1.NamespaceSystem).Create(context.TODO(), serviceAccount, metav1.CreateOptions{}); err != nil {
  562. framework.Failf("unable to create test serviceAccount %s: %v", serviceAccount.Name, err)
  563. }
  564. e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute)
  565. dp := getSRIOVDevicePluginPod()
  566. dp.Spec.NodeName = framework.TestContext.NodeName
  567. ginkgo.By("Create SRIOV device plugin pod")
  568. dpPod, err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(context.TODO(), dp, metav1.CreateOptions{})
  569. framework.ExpectNoError(err)
  570. sriovResourceName := ""
  571. var sriovResourceAmount int64
  572. ginkgo.By("Waiting for devices to become available on the local node")
  573. gomega.Eventually(func() bool {
  574. node := getLocalNode(f)
  575. framework.Logf("Node status: %v", node.Status.Capacity)
  576. sriovResourceName, sriovResourceAmount = findSRIOVResource(node)
  577. return sriovResourceAmount > 0
  578. }, 2*time.Minute, framework.Poll).Should(gomega.BeTrue())
  579. framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName)
  580. return &sriovData{
  581. configMap: configMap,
  582. serviceAccount: serviceAccount,
  583. pod: dpPod,
  584. resourceName: sriovResourceName,
  585. resourceAmount: sriovResourceAmount,
  586. }
  587. }
  588. func teardownSRIOVConfigOrFail(f *framework.Framework, sd *sriovData) {
  589. var err error
  590. gp := int64(0)
  591. deleteOptions := metav1.DeleteOptions{
  592. GracePeriodSeconds: &gp,
  593. }
  594. ginkgo.By("Delete SRIOV device plugin pod %s/%s")
  595. err = f.ClientSet.CoreV1().Pods(sd.pod.Namespace).Delete(context.TODO(), sd.pod.Name, &deleteOptions)
  596. framework.ExpectNoError(err)
  597. waitForContainerRemoval(sd.pod.Spec.Containers[0].Name, sd.pod.Name, sd.pod.Namespace)
  598. ginkgo.By(fmt.Sprintf("Deleting configMap %v/%v", metav1.NamespaceSystem, sd.configMap.Name))
  599. err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(context.TODO(), sd.configMap.Name, &deleteOptions)
  600. framework.ExpectNoError(err)
  601. ginkgo.By(fmt.Sprintf("Deleting serviceAccount %v/%v", metav1.NamespaceSystem, sd.serviceAccount.Name))
  602. err = f.ClientSet.CoreV1().ServiceAccounts(metav1.NamespaceSystem).Delete(context.TODO(), sd.serviceAccount.Name, &deleteOptions)
  603. framework.ExpectNoError(err)
  604. }
  605. func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int, policy string) {
  606. threadsPerCore := 1
  607. if isHTEnabled() {
  608. threadsPerCore = 2
  609. }
  610. sd := setupSRIOVConfigOrFail(f, configMap)
  611. envInfo := &testEnvInfo{
  612. numaNodes: numaNodes,
  613. sriovResourceName: sd.resourceName,
  614. policy: policy,
  615. }
  616. // could have been a loop, we unroll it to explain the testcases
  617. var ctnAttrs []tmCtnAttribute
  618. // simplest case
  619. ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sd.resourceName))
  620. ctnAttrs = []tmCtnAttribute{
  621. {
  622. ctnName: "gu-container",
  623. cpuRequest: "1000m",
  624. cpuLimit: "1000m",
  625. deviceName: sd.resourceName,
  626. deviceRequest: "1",
  627. deviceLimit: "1",
  628. },
  629. }
  630. runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
  631. ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 2 cores, 1 %s device", sd.resourceName))
  632. ctnAttrs = []tmCtnAttribute{
  633. {
  634. ctnName: "gu-container",
  635. cpuRequest: "2000m",
  636. cpuLimit: "2000m",
  637. deviceName: sd.resourceName,
  638. deviceRequest: "1",
  639. deviceLimit: "1",
  640. },
  641. }
  642. runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
  643. if reservedSystemCPUs != "" {
  644. // to avoid false negatives, we have put reserved CPUs in such a way there is at least a NUMA node
  645. // with 1+ SRIOV devices and not reserved CPUs.
  646. numCores := threadsPerCore * coreCount
  647. allCoresReq := fmt.Sprintf("%dm", numCores*1000)
  648. ginkgo.By(fmt.Sprintf("Successfully admit an entire socket (%d cores), 1 %s device", numCores, sd.resourceName))
  649. ctnAttrs = []tmCtnAttribute{
  650. {
  651. ctnName: "gu-container",
  652. cpuRequest: allCoresReq,
  653. cpuLimit: allCoresReq,
  654. deviceName: sd.resourceName,
  655. deviceRequest: "1",
  656. deviceLimit: "1",
  657. },
  658. }
  659. runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
  660. }
  661. if sd.resourceAmount > 1 {
  662. // no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function
  663. // preconditions must ensure the following can be fulfilled
  664. ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 1 core, 1 %s device", sd.resourceName))
  665. ctnAttrs = []tmCtnAttribute{
  666. {
  667. ctnName: "gu-container",
  668. cpuRequest: "1000m",
  669. cpuLimit: "1000m",
  670. deviceName: sd.resourceName,
  671. deviceRequest: "1",
  672. deviceLimit: "1",
  673. },
  674. }
  675. runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
  676. ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 2 cores, 1 %s device", sd.resourceName))
  677. ctnAttrs = []tmCtnAttribute{
  678. {
  679. ctnName: "gu-container",
  680. cpuRequest: "2000m",
  681. cpuLimit: "2000m",
  682. deviceName: sd.resourceName,
  683. deviceRequest: "1",
  684. deviceLimit: "1",
  685. },
  686. }
  687. runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
  688. // testing more complex conditions require knowledge about the system cpu+bus topology
  689. }
  690. // multi-container tests
  691. if sd.resourceAmount >= 4 {
  692. ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pods, each with two containers, each with 2 cores, 1 %s device", sd.resourceName))
  693. ctnAttrs = []tmCtnAttribute{
  694. {
  695. ctnName: "gu-container-0",
  696. cpuRequest: "2000m",
  697. cpuLimit: "2000m",
  698. deviceName: sd.resourceName,
  699. deviceRequest: "1",
  700. deviceLimit: "1",
  701. },
  702. {
  703. ctnName: "gu-container-1",
  704. cpuRequest: "2000m",
  705. cpuLimit: "2000m",
  706. deviceName: sd.resourceName,
  707. deviceRequest: "1",
  708. deviceLimit: "1",
  709. },
  710. }
  711. runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
  712. ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with two containers, each with 1 core, 1 %s device", sd.resourceName))
  713. ctnAttrs = []tmCtnAttribute{
  714. {
  715. ctnName: "gu-container-0",
  716. cpuRequest: "1000m",
  717. cpuLimit: "1000m",
  718. deviceName: sd.resourceName,
  719. deviceRequest: "1",
  720. deviceLimit: "1",
  721. },
  722. {
  723. ctnName: "gu-container-1",
  724. cpuRequest: "1000m",
  725. cpuLimit: "1000m",
  726. deviceName: sd.resourceName,
  727. deviceRequest: "1",
  728. deviceLimit: "1",
  729. },
  730. }
  731. runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
  732. ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with two containers, both with with 2 cores, one with 1 %s device", sd.resourceName))
  733. ctnAttrs = []tmCtnAttribute{
  734. {
  735. ctnName: "gu-container-dev",
  736. cpuRequest: "2000m",
  737. cpuLimit: "2000m",
  738. deviceName: sd.resourceName,
  739. deviceRequest: "1",
  740. deviceLimit: "1",
  741. },
  742. {
  743. ctnName: "gu-container-nodev",
  744. cpuRequest: "2000m",
  745. cpuLimit: "2000m",
  746. },
  747. }
  748. runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
  749. }
  750. // this is the only policy that can guarantee reliable rejects
  751. if policy == topologymanager.PolicySingleNumaNode {
  752. // overflow NUMA node capacity: cores
  753. numCores := 1 + (threadsPerCore * coreCount)
  754. excessCoresReq := fmt.Sprintf("%dm", numCores*1000)
  755. ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected", numCores, sd.resourceName))
  756. ctnAttrs = []tmCtnAttribute{
  757. {
  758. ctnName: "gu-container",
  759. cpuRequest: excessCoresReq,
  760. cpuLimit: excessCoresReq,
  761. deviceName: sd.resourceName,
  762. deviceRequest: "1",
  763. deviceLimit: "1",
  764. },
  765. }
  766. runTopologyManagerNegativeTest(f, 1, ctnAttrs, envInfo)
  767. }
  768. teardownSRIOVConfigOrFail(f, sd)
  769. }
  770. func runTopologyManagerTests(f *framework.Framework) {
  771. var oldCfg *kubeletconfig.KubeletConfiguration
  772. var err error
  773. ginkgo.It("run Topology Manager policy test suite", func() {
  774. oldCfg, err = getCurrentKubeletConfig()
  775. framework.ExpectNoError(err)
  776. var policies = []string{topologymanager.PolicySingleNumaNode, topologymanager.PolicyRestricted,
  777. topologymanager.PolicyBestEffort, topologymanager.PolicyNone}
  778. for _, policy := range policies {
  779. // Configure Topology Manager
  780. ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy))
  781. framework.Logf("Configuring topology Manager policy to %s", policy)
  782. configureTopologyManagerInKubelet(f, oldCfg, policy, nil, 0)
  783. // Run the tests
  784. runTopologyManagerPolicySuiteTests(f)
  785. }
  786. // restore kubelet config
  787. setOldKubeletConfig(f, oldCfg)
  788. // Delete state file to allow repeated runs
  789. deleteStateFile()
  790. })
  791. ginkgo.It("run Topology Manager node alignment test suite", func() {
  792. // this is a very rough check. We just want to rule out system that does NOT have
  793. // any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
  794. sriovdevCount := detectSRIOVDevices()
  795. numaNodes := detectNUMANodes()
  796. coreCount := detectCoresPerSocket()
  797. if numaNodes < minNumaNodes {
  798. e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system")
  799. }
  800. if coreCount < minCoreCount {
  801. e2eskipper.Skipf("this test is meant to run on a system with at least 4 cores per socket")
  802. }
  803. if sriovdevCount == 0 {
  804. e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
  805. }
  806. configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
  807. oldCfg, err = getCurrentKubeletConfig()
  808. framework.ExpectNoError(err)
  809. var policies = []string{topologymanager.PolicySingleNumaNode, topologymanager.PolicyRestricted,
  810. topologymanager.PolicyBestEffort, topologymanager.PolicyNone}
  811. for _, policy := range policies {
  812. // Configure Topology Manager
  813. ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy))
  814. framework.Logf("Configuring topology Manager policy to %s", policy)
  815. reservedSystemCPUs := configureTopologyManagerInKubelet(f, oldCfg, policy, configMap, numaNodes)
  816. runTopologyManagerNodeAlignmentSuiteTests(f, configMap, reservedSystemCPUs, numaNodes, coreCount, policy)
  817. }
  818. // restore kubelet config
  819. setOldKubeletConfig(f, oldCfg)
  820. // Delete state file to allow repeated runs
  821. deleteStateFile()
  822. })
  823. }
  824. // Serial because the test updates kubelet configuration.
  825. var _ = SIGDescribe("Topology Manager [Serial] [Feature:TopologyManager][NodeAlphaFeature:TopologyManager]", func() {
  826. f := framework.NewDefaultFramework("topology-manager-test")
  827. ginkgo.Context("With kubeconfig updated to static CPU Manager policy run the Topology Manager tests", func() {
  828. runTopologyManagerTests(f)
  829. })
  830. })