123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- /*
- Copyright 2020 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package e2enode
- import (
- "fmt"
- "io/ioutil"
- "os"
- "path/filepath"
- "sort"
- "strconv"
- "strings"
- v1 "k8s.io/api/core/v1"
- "k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
- "k8s.io/kubernetes/test/e2e/framework"
- )
- type numaPodResources struct {
- CPUToNUMANode map[int]int
- PCIDevsToNUMANode map[string]int
- }
- func (R *numaPodResources) CheckAlignment() bool {
- nodeNum := -1 // not set
- for _, cpuNode := range R.CPUToNUMANode {
- if nodeNum == -1 {
- nodeNum = cpuNode
- } else if nodeNum != cpuNode {
- return false
- }
- }
- for _, devNode := range R.PCIDevsToNUMANode {
- if nodeNum != devNode {
- return false
- }
- }
- return true
- }
- func (R *numaPodResources) String() string {
- var b strings.Builder
- // To store the keys in slice in sorted order
- var cpuKeys []int
- for ck := range R.CPUToNUMANode {
- cpuKeys = append(cpuKeys, ck)
- }
- sort.Ints(cpuKeys)
- for _, k := range cpuKeys {
- nodeNum := R.CPUToNUMANode[k]
- b.WriteString(fmt.Sprintf("CPU cpu#%03d=%02d\n", k, nodeNum))
- }
- var pciKeys []string
- for pk := range R.PCIDevsToNUMANode {
- pciKeys = append(pciKeys, pk)
- }
- sort.Strings(pciKeys)
- for _, k := range pciKeys {
- nodeNum := R.PCIDevsToNUMANode[k]
- b.WriteString(fmt.Sprintf("PCI %s=%02d\n", k, nodeNum))
- }
- return b.String()
- }
- func getCPUsPerNUMANode(nodeNum int) ([]int, error) {
- nodeCPUList, err := ioutil.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeNum))
- if err != nil {
- return nil, err
- }
- cpus, err := cpuset.Parse(strings.TrimSpace(string(nodeCPUList)))
- if err != nil {
- return nil, err
- }
- return cpus.ToSlice(), nil
- }
- func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, environ map[string]string, numaNodes int) (map[int]int, error) {
- var cpuIDs []int
- cpuListAllowedEnvVar := "CPULIST_ALLOWED"
- for name, value := range environ {
- if name == cpuListAllowedEnvVar {
- cpus, err := cpuset.Parse(value)
- if err != nil {
- return nil, err
- }
- cpuIDs = cpus.ToSlice()
- }
- }
- if len(cpuIDs) == 0 {
- return nil, fmt.Errorf("variable %q not found in environ", cpuListAllowedEnvVar)
- }
- cpusPerNUMA := make(map[int][]int)
- for numaNode := 0; numaNode < numaNodes; numaNode++ {
- nodeCPUList := f.ExecCommandInContainer(pod.Name, cnt.Name,
- "/bin/cat", fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", numaNode))
- cpus, err := cpuset.Parse(nodeCPUList)
- if err != nil {
- return nil, err
- }
- cpusPerNUMA[numaNode] = cpus.ToSlice()
- }
- // CPU IDs -> NUMA Node ID
- CPUToNUMANode := make(map[int]int)
- for nodeNum, cpus := range cpusPerNUMA {
- for _, cpu := range cpus {
- CPUToNUMANode[cpu] = nodeNum
- }
- }
- // filter out only the allowed CPUs
- CPUMap := make(map[int]int)
- for _, cpuID := range cpuIDs {
- _, ok := CPUToNUMANode[cpuID]
- if !ok {
- return nil, fmt.Errorf("CPU %d not found on NUMA map: %v", cpuID, CPUToNUMANode)
- }
- CPUMap[cpuID] = CPUToNUMANode[cpuID]
- }
- return CPUMap, nil
- }
- func getPCIDeviceToNumaNodeMapFromEnv(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, environ map[string]string) (map[string]int, error) {
- pciDevPrefix := "PCIDEVICE_"
- // at this point we don't care which plugin selected the device,
- // we only need to know which devices were assigned to the POD.
- // Hence, do prefix search for the variable and fetch the device(s).
- NUMAPerDev := make(map[string]int)
- for name, value := range environ {
- if !strings.HasPrefix(name, pciDevPrefix) {
- continue
- }
- // a single plugin can allocate more than a single device
- pciDevs := strings.Split(value, ",")
- for _, pciDev := range pciDevs {
- pciDevNUMANode := f.ExecCommandInContainer(pod.Name, cnt.Name,
- "/bin/cat", fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", pciDev))
- NUMAPerDev[pciDev] = numaNodeFromSysFsEntry(pciDevNUMANode)
- }
- }
- return NUMAPerDev, nil
- }
- func makeEnvMap(logs string) (map[string]string, error) {
- podEnv := strings.Split(logs, "\n")
- envMap := make(map[string]string)
- for _, envVar := range podEnv {
- if len(envVar) == 0 {
- continue
- }
- pair := strings.SplitN(envVar, "=", 2)
- if len(pair) != 2 {
- return nil, fmt.Errorf("unable to split %q", envVar)
- }
- envMap[pair[0]] = pair[1]
- }
- return envMap, nil
- }
- type testEnvInfo struct {
- numaNodes int
- sriovResourceName string
- policy string
- }
- func containerWantsDevices(cnt *v1.Container, envInfo *testEnvInfo) bool {
- _, found := cnt.Resources.Requests[v1.ResourceName(envInfo.sriovResourceName)]
- return found
- }
- func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, logs string, envInfo *testEnvInfo) (*numaPodResources, error) {
- var err error
- podEnv, err := makeEnvMap(logs)
- if err != nil {
- return nil, err
- }
- CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, cnt, podEnv, envInfo.numaNodes)
- if err != nil {
- return nil, err
- }
- PCIDevsToNUMANode, err := getPCIDeviceToNumaNodeMapFromEnv(f, pod, cnt, podEnv)
- if err != nil {
- return nil, err
- }
- if containerWantsDevices(cnt, envInfo) && len(PCIDevsToNUMANode) == 0 {
- return nil, fmt.Errorf("no PCI devices found in environ")
- }
- numaRes := numaPodResources{
- CPUToNUMANode: CPUToNUMANode,
- PCIDevsToNUMANode: PCIDevsToNUMANode,
- }
- aligned := numaRes.CheckAlignment()
- if !aligned {
- err = fmt.Errorf("NUMA resources not aligned")
- }
- return &numaRes, err
- }
- type pciDeviceInfo struct {
- Address string
- NUMANode int
- IsPhysFn bool
- IsVFn bool
- }
- func getPCIDeviceInfo(sysPCIDir string) ([]pciDeviceInfo, error) {
- var pciDevs []pciDeviceInfo
- entries, err := ioutil.ReadDir(sysPCIDir)
- if err != nil {
- return nil, err
- }
- for _, entry := range entries {
- isPhysFn := false
- isVFn := false
- if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "sriov_numvfs")); err == nil {
- isPhysFn = true
- } else if !os.IsNotExist(err) {
- // unexpected error. Bail out
- return nil, err
- }
- if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "physfn")); err == nil {
- isVFn = true
- } else if !os.IsNotExist(err) {
- // unexpected error. Bail out
- return nil, err
- }
- content, err := ioutil.ReadFile(filepath.Join(sysPCIDir, entry.Name(), "numa_node"))
- if err != nil {
- return nil, err
- }
- pciDevs = append(pciDevs, pciDeviceInfo{
- Address: entry.Name(),
- NUMANode: numaNodeFromSysFsEntry(string(content)),
- IsPhysFn: isPhysFn,
- IsVFn: isVFn,
- })
- }
- return pciDevs, nil
- }
- func numaNodeFromSysFsEntry(content string) int {
- nodeNum, err := strconv.Atoi(strings.TrimSpace(content))
- framework.ExpectNoError(err, "error detecting the device numa_node from sysfs: %v", err)
- return nodeNum
- }
|