123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- /*
- Copyright 2019 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package topologymanager
- import (
- "fmt"
- "k8s.io/api/core/v1"
- "k8s.io/klog"
- cputopology "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
- "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
- "k8s.io/kubernetes/pkg/kubelet/lifecycle"
- )
- const (
- // maxAllowableNUMANodes specifies the maximum number of NUMA Nodes that
- // the TopologyManager supports on the underlying machine.
- //
- // At present, having more than this number of NUMA Nodes will result in a
- // state explosion when trying to enumerate possible NUMAAffinity masks and
- // generate hints for them. As such, if more NUMA Nodes than this are
- // present on a machine and the TopologyManager is enabled, an error will
- // be returned and the TopologyManager will not be loaded.
- maxAllowableNUMANodes = 8
- )
- //Manager interface provides methods for Kubelet to manage pod topology hints
- type Manager interface {
- //Manager implements pod admit handler interface
- lifecycle.PodAdmitHandler
- //Adds a hint provider to manager to indicate the hint provider
- //wants to be consoluted when making topology hints
- AddHintProvider(HintProvider)
- //Adds pod to Manager for tracking
- AddContainer(pod *v1.Pod, containerID string) error
- //Removes pod from Manager tracking
- RemoveContainer(containerID string) error
- //Interface for storing pod topology hints
- Store
- }
- type manager struct {
- //The list of components registered with the Manager
- hintProviders []HintProvider
- //Mapping of a Pods mapping of Containers and their TopologyHints
- //Indexed by PodUID to ContainerName
- podTopologyHints map[string]map[string]TopologyHint
- //Mapping of PodUID to ContainerID for Adding/Removing Pods from PodTopologyHints mapping
- podMap map[string]string
- //Topology Manager Policy
- policy Policy
- }
- // HintProvider is an interface for components that want to collaborate to
- // achieve globally optimal concrete resource alignment with respect to
- // NUMA locality.
- type HintProvider interface {
- // GetTopologyHints returns a map of resource names to a list of possible
- // concrete resource allocations in terms of NUMA locality hints. Each hint
- // is optionally marked "preferred" and indicates the set of NUMA nodes
- // involved in the hypothetical allocation. The topology manager calls
- // this function for each hint provider, and merges the hints to produce
- // a consensus "best" hint. The hint providers may subsequently query the
- // topology manager to influence actual resource assignment.
- GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]TopologyHint
- }
- //Store interface is to allow Hint Providers to retrieve pod affinity
- type Store interface {
- GetAffinity(podUID string, containerName string) TopologyHint
- }
- //TopologyHint is a struct containing the NUMANodeAffinity for a Container
- type TopologyHint struct {
- NUMANodeAffinity bitmask.BitMask
- // Preferred is set to true when the NUMANodeAffinity encodes a preferred
- // allocation for the Container. It is set to false otherwise.
- Preferred bool
- }
- // IsEqual checks if TopologyHint are equal
- func (th *TopologyHint) IsEqual(topologyHint TopologyHint) bool {
- if th.Preferred == topologyHint.Preferred {
- if th.NUMANodeAffinity == nil || topologyHint.NUMANodeAffinity == nil {
- return th.NUMANodeAffinity == topologyHint.NUMANodeAffinity
- }
- return th.NUMANodeAffinity.IsEqual(topologyHint.NUMANodeAffinity)
- }
- return false
- }
- // LessThan checks if TopologyHint `a` is less than TopologyHint `b`
- // this means that either `a` is a preferred hint and `b` is not
- // or `a` NUMANodeAffinity attribute is narrower than `b` NUMANodeAffinity attribute.
- func (th *TopologyHint) LessThan(other TopologyHint) bool {
- if th.Preferred != other.Preferred {
- return th.Preferred == true
- }
- return th.NUMANodeAffinity.IsNarrowerThan(other.NUMANodeAffinity)
- }
- var _ Manager = &manager{}
- //NewManager creates a new TopologyManager based on provided policy
- func NewManager(numaNodeInfo cputopology.NUMANodeInfo, topologyPolicyName string) (Manager, error) {
- klog.Infof("[topologymanager] Creating topology manager with %s policy", topologyPolicyName)
- var numaNodes []int
- for node := range numaNodeInfo {
- numaNodes = append(numaNodes, node)
- }
- if topologyPolicyName != PolicyNone && len(numaNodes) > maxAllowableNUMANodes {
- return nil, fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", maxAllowableNUMANodes)
- }
- var policy Policy
- switch topologyPolicyName {
- case PolicyNone:
- policy = NewNonePolicy()
- case PolicyBestEffort:
- policy = NewBestEffortPolicy(numaNodes)
- case PolicyRestricted:
- policy = NewRestrictedPolicy(numaNodes)
- case PolicySingleNumaNode:
- policy = NewSingleNumaNodePolicy(numaNodes)
- default:
- return nil, fmt.Errorf("unknown policy: \"%s\"", topologyPolicyName)
- }
- var hp []HintProvider
- pth := make(map[string]map[string]TopologyHint)
- pm := make(map[string]string)
- manager := &manager{
- hintProviders: hp,
- podTopologyHints: pth,
- podMap: pm,
- policy: policy,
- }
- return manager, nil
- }
- func (m *manager) GetAffinity(podUID string, containerName string) TopologyHint {
- return m.podTopologyHints[podUID][containerName]
- }
- func (m *manager) accumulateProvidersHints(pod *v1.Pod, container *v1.Container) (providersHints []map[string][]TopologyHint) {
- // Loop through all hint providers and save an accumulated list of the
- // hints returned by each hint provider.
- for _, provider := range m.hintProviders {
- // Get the TopologyHints from a provider.
- hints := provider.GetTopologyHints(pod, container)
- providersHints = append(providersHints, hints)
- klog.Infof("[topologymanager] TopologyHints for pod '%v', container '%v': %v", pod.Name, container.Name, hints)
- }
- return providersHints
- }
- // Collect Hints from hint providers and pass to policy to retrieve the best one.
- func (m *manager) calculateAffinity(pod *v1.Pod, container *v1.Container) (TopologyHint, bool) {
- providersHints := m.accumulateProvidersHints(pod, container)
- bestHint, admit := m.policy.Merge(providersHints)
- klog.Infof("[topologymanager] ContainerTopologyHint: %v", bestHint)
- return bestHint, admit
- }
- func (m *manager) AddHintProvider(h HintProvider) {
- m.hintProviders = append(m.hintProviders, h)
- }
- func (m *manager) AddContainer(pod *v1.Pod, containerID string) error {
- m.podMap[containerID] = string(pod.UID)
- return nil
- }
- func (m *manager) RemoveContainer(containerID string) error {
- klog.Infof("[topologymanager] RemoveContainer - Container ID: %v", containerID)
- podUIDString := m.podMap[containerID]
- delete(m.podMap, containerID)
- if _, exists := m.podTopologyHints[podUIDString]; exists {
- delete(m.podTopologyHints[podUIDString], containerID)
- if len(m.podTopologyHints[podUIDString]) == 0 {
- delete(m.podTopologyHints, podUIDString)
- }
- }
- return nil
- }
- func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
- // Unconditionally admit the pod if we are running with the 'none' policy.
- if m.policy.Name() == PolicyNone {
- return lifecycle.PodAdmitResult{Admit: true}
- }
- klog.Infof("[topologymanager] Topology Admit Handler")
- pod := attrs.Pod
- hints := make(map[string]TopologyHint)
- for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
- result, admit := m.calculateAffinity(pod, &container)
- if !admit {
- return lifecycle.PodAdmitResult{
- Message: "Resources cannot be allocated with Topology locality",
- Reason: "TopologyAffinityError",
- Admit: false,
- }
- }
- hints[container.Name] = result
- }
- m.podTopologyHints[string(pod.UID)] = hints
- klog.Infof("[topologymanager] Topology Affinity for Pod: %v are %v", pod.UID, m.podTopologyHints[string(pod.UID)])
- return lifecycle.PodAdmitResult{Admit: true}
- }
|