policy_static.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cpumanager
  14. import (
  15. "fmt"
  16. "k8s.io/api/core/v1"
  17. "k8s.io/klog"
  18. v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
  19. "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
  20. "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
  21. "k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
  22. )
  23. // PolicyStatic is the name of the static policy
  24. const PolicyStatic policyName = "static"
  25. // staticPolicy is a CPU manager policy that does not change CPU
  26. // assignments for exclusively pinned guaranteed containers after the main
  27. // container process starts.
  28. //
  29. // This policy allocates CPUs exclusively for a container if all the following
  30. // conditions are met:
  31. //
  32. // - The pod QoS class is Guaranteed.
  33. // - The CPU request is a positive integer.
  34. //
  35. // The static policy maintains the following sets of logical CPUs:
  36. //
  37. // - SHARED: Burstable, BestEffort, and non-integral Guaranteed containers
  38. // run here. Initially this contains all CPU IDs on the system. As
  39. // exclusive allocations are created and destroyed, this CPU set shrinks
  40. // and grows, accordingly. This is stored in the state as the default
  41. // CPU set.
  42. //
  43. // - RESERVED: A subset of the shared pool which is not exclusively
  44. // allocatable. The membership of this pool is static for the lifetime of
  45. // the Kubelet. The size of the reserved pool is
  46. // ceil(systemreserved.cpu + kubereserved.cpu).
  47. // Reserved CPUs are taken topologically starting with lowest-indexed
  48. // physical core, as reported by cAdvisor.
  49. //
  50. // - ASSIGNABLE: Equal to SHARED - RESERVED. Exclusive CPUs are allocated
  51. // from this pool.
  52. //
  53. // - EXCLUSIVE ALLOCATIONS: CPU sets assigned exclusively to one container.
  54. // These are stored as explicit assignments in the state.
  55. //
  56. // When an exclusive allocation is made, the static policy also updates the
  57. // default cpuset in the state abstraction. The CPU manager's periodic
  58. // reconcile loop takes care of rewriting the cpuset in cgroupfs for any
  59. // containers that may be running in the shared pool. For this reason,
  60. // applications running within exclusively-allocated containers must tolerate
  61. // potentially sharing their allocated CPUs for up to the CPU manager
  62. // reconcile period.
  63. type staticPolicy struct {
  64. // cpu socket topology
  65. topology *topology.CPUTopology
  66. // set of CPUs that is not available for exclusive assignment
  67. reserved cpuset.CPUSet
  68. }
  69. // Ensure staticPolicy implements Policy interface
  70. var _ Policy = &staticPolicy{}
  71. // NewStaticPolicy returns a CPU manager policy that does not change CPU
  72. // assignments for exclusively pinned guaranteed containers after the main
  73. // container process starts.
  74. func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int) Policy {
  75. allCPUs := topology.CPUDetails.CPUs()
  76. // takeByTopology allocates CPUs associated with low-numbered cores from
  77. // allCPUs.
  78. //
  79. // For example: Given a system with 8 CPUs available and HT enabled,
  80. // if numReservedCPUs=2, then reserved={0,4}
  81. reserved, _ := takeByTopology(topology, allCPUs, numReservedCPUs)
  82. if reserved.Size() != numReservedCPUs {
  83. panic(fmt.Sprintf("[cpumanager] unable to reserve the required amount of CPUs (size of %s did not equal %d)", reserved, numReservedCPUs))
  84. }
  85. klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
  86. return &staticPolicy{
  87. topology: topology,
  88. reserved: reserved,
  89. }
  90. }
  91. func (p *staticPolicy) Name() string {
  92. return string(PolicyStatic)
  93. }
  94. func (p *staticPolicy) Start(s state.State) {
  95. if err := p.validateState(s); err != nil {
  96. klog.Errorf("[cpumanager] static policy invalid state: %s\n", err.Error())
  97. panic("[cpumanager] - please drain node and remove policy state file")
  98. }
  99. }
  100. func (p *staticPolicy) validateState(s state.State) error {
  101. tmpAssignments := s.GetCPUAssignments()
  102. tmpDefaultCPUset := s.GetDefaultCPUSet()
  103. // Default cpuset cannot be empty when assignments exist
  104. if tmpDefaultCPUset.IsEmpty() {
  105. if len(tmpAssignments) != 0 {
  106. return fmt.Errorf("default cpuset cannot be empty")
  107. }
  108. // state is empty initialize
  109. allCPUs := p.topology.CPUDetails.CPUs()
  110. s.SetDefaultCPUSet(allCPUs)
  111. return nil
  112. }
  113. // State has already been initialized from file (is not empty)
  114. // 1. Check if the reserved cpuset is not part of default cpuset because:
  115. // - kube/system reserved have changed (increased) - may lead to some containers not being able to start
  116. // - user tampered with file
  117. if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
  118. return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
  119. p.reserved.String(), tmpDefaultCPUset.String())
  120. }
  121. // 2. Check if state for static policy is consistent
  122. for cID, cset := range tmpAssignments {
  123. // None of the cpu in DEFAULT cset should be in s.assignments
  124. if !tmpDefaultCPUset.Intersection(cset).IsEmpty() {
  125. return fmt.Errorf("container id: %s cpuset: \"%s\" overlaps with default cpuset \"%s\"",
  126. cID, cset.String(), tmpDefaultCPUset.String())
  127. }
  128. }
  129. // 3. It's possible that the set of available CPUs has changed since
  130. // the state was written. This can be due to for example
  131. // offlining a CPU when kubelet is not running. If this happens,
  132. // CPU manager will run into trouble when later it tries to
  133. // assign non-existent CPUs to containers. Validate that the
  134. // topology that was received during CPU manager startup matches with
  135. // the set of CPUs stored in the state.
  136. totalKnownCPUs := tmpDefaultCPUset.Clone()
  137. tmpCPUSets := []cpuset.CPUSet{}
  138. for _, cset := range tmpAssignments {
  139. tmpCPUSets = append(tmpCPUSets, cset)
  140. }
  141. totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
  142. if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
  143. return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
  144. p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
  145. }
  146. return nil
  147. }
  148. // assignableCPUs returns the set of unassigned CPUs minus the reserved set.
  149. func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
  150. return s.GetDefaultCPUSet().Difference(p.reserved)
  151. }
  152. func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error {
  153. if numCPUs := guaranteedCPUs(pod, container); numCPUs != 0 {
  154. klog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
  155. // container belongs in an exclusively allocated pool
  156. if _, ok := s.GetCPUSet(containerID); ok {
  157. klog.Infof("[cpumanager] static policy: container already present in state, skipping (container: %s, container id: %s)", container.Name, containerID)
  158. return nil
  159. }
  160. cpuset, err := p.allocateCPUs(s, numCPUs)
  161. if err != nil {
  162. klog.Errorf("[cpumanager] unable to allocate %d CPUs (container id: %s, error: %v)", numCPUs, containerID, err)
  163. return err
  164. }
  165. s.SetCPUSet(containerID, cpuset)
  166. }
  167. // container belongs in the shared pool (nothing to do; use default cpuset)
  168. return nil
  169. }
  170. func (p *staticPolicy) RemoveContainer(s state.State, containerID string) error {
  171. klog.Infof("[cpumanager] static policy: RemoveContainer (container id: %s)", containerID)
  172. if toRelease, ok := s.GetCPUSet(containerID); ok {
  173. s.Delete(containerID)
  174. // Mutate the shared pool, adding released cpus.
  175. s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
  176. }
  177. return nil
  178. }
  179. func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int) (cpuset.CPUSet, error) {
  180. klog.Infof("[cpumanager] allocateCpus: (numCPUs: %d)", numCPUs)
  181. result, err := takeByTopology(p.topology, p.assignableCPUs(s), numCPUs)
  182. if err != nil {
  183. return cpuset.NewCPUSet(), err
  184. }
  185. // Remove allocated CPUs from the shared CPUSet.
  186. s.SetDefaultCPUSet(s.GetDefaultCPUSet().Difference(result))
  187. klog.Infof("[cpumanager] allocateCPUs: returning \"%v\"", result)
  188. return result, nil
  189. }
  190. func guaranteedCPUs(pod *v1.Pod, container *v1.Container) int {
  191. if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
  192. return 0
  193. }
  194. cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
  195. if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
  196. return 0
  197. }
  198. // Safe downcast to do for all systems with < 2.1 billion CPUs.
  199. // Per the language spec, `int` is guaranteed to be at least 32 bits wide.
  200. // https://golang.org/ref/spec#Numeric_types
  201. return int(cpuQuantity.Value())
  202. }