container_manager.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cm
  14. import (
  15. "time"
  16. "k8s.io/apimachinery/pkg/util/sets"
  17. // TODO: Migrate kubelet to either use its own internal objects or client library.
  18. v1 "k8s.io/api/core/v1"
  19. internalapi "k8s.io/cri-api/pkg/apis"
  20. podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
  21. "k8s.io/kubernetes/pkg/kubelet/config"
  22. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  23. evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
  24. "k8s.io/kubernetes/pkg/kubelet/lifecycle"
  25. "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
  26. "k8s.io/kubernetes/pkg/kubelet/status"
  27. schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
  28. "fmt"
  29. "strconv"
  30. "strings"
  31. )
  32. type ActivePodsFunc func() []*v1.Pod
  33. // Manages the containers running on a machine.
  34. type ContainerManager interface {
  35. // Runs the container manager's housekeeping.
  36. // - Ensures that the Docker daemon is in a container.
  37. // - Creates the system container where all non-containerized processes run.
  38. Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error
  39. // SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
  40. // These cgroups include the system and Kubernetes services.
  41. SystemCgroupsLimit() v1.ResourceList
  42. // GetNodeConfig returns a NodeConfig that is being used by the container manager.
  43. GetNodeConfig() NodeConfig
  44. // Status returns internal Status.
  45. Status() Status
  46. // NewPodContainerManager is a factory method which returns a podContainerManager object
  47. // Returns a noop implementation if qos cgroup hierarchy is not enabled
  48. NewPodContainerManager() PodContainerManager
  49. // GetMountedSubsystems returns the mounted cgroup subsystems on the node
  50. GetMountedSubsystems() *CgroupSubsystems
  51. // GetQOSContainersInfo returns the names of top level QoS containers
  52. GetQOSContainersInfo() QOSContainersInfo
  53. // GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
  54. GetNodeAllocatableReservation() v1.ResourceList
  55. // GetCapacity returns the amount of compute resources tracked by container manager available on the node.
  56. GetCapacity() v1.ResourceList
  57. // GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
  58. // node allocatable (amount of total healthy resources reported by device plugin),
  59. // and inactive device plugin resources previously registered on the node.
  60. GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string)
  61. // UpdateQOSCgroups performs housekeeping updates to ensure that the top
  62. // level QoS containers have their desired state in a thread-safe way
  63. UpdateQOSCgroups() error
  64. // GetResources returns RunContainerOptions with devices, mounts, and env fields populated for
  65. // extended resources required by container.
  66. GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error)
  67. // UpdatePluginResources calls Allocate of device plugin handler for potential
  68. // requests for device plugin resources, and returns an error if fails.
  69. // Otherwise, it updates allocatableResource in nodeInfo if necessary,
  70. // to make sure it is at least equal to the pod's requested capacity for
  71. // any registered device plugin resource
  72. UpdatePluginResources(*schedulernodeinfo.NodeInfo, *lifecycle.PodAdmitAttributes) error
  73. InternalContainerLifecycle() InternalContainerLifecycle
  74. // GetPodCgroupRoot returns the cgroup which contains all pods.
  75. GetPodCgroupRoot() string
  76. // GetPluginRegistrationHandler returns a plugin registration handler
  77. // The pluginwatcher's Handlers allow to have a single module for handling
  78. // registration.
  79. GetPluginRegistrationHandler() cache.PluginHandler
  80. // GetDevices returns information about the devices assigned to pods and containers
  81. GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices
  82. // ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed,
  83. // due to node recreation.
  84. ShouldResetExtendedResourceCapacity() bool
  85. }
  86. type NodeConfig struct {
  87. RuntimeCgroupsName string
  88. SystemCgroupsName string
  89. KubeletCgroupsName string
  90. ContainerRuntime string
  91. CgroupsPerQOS bool
  92. CgroupRoot string
  93. CgroupDriver string
  94. KubeletRootDir string
  95. ProtectKernelDefaults bool
  96. NodeAllocatableConfig
  97. QOSReserved map[v1.ResourceName]int64
  98. ExperimentalCPUManagerPolicy string
  99. ExperimentalCPUManagerReconcilePeriod time.Duration
  100. ExperimentalPodPidsLimit int64
  101. EnforceCPULimits bool
  102. CPUCFSQuotaPeriod time.Duration
  103. }
  104. type NodeAllocatableConfig struct {
  105. KubeReservedCgroupName string
  106. SystemReservedCgroupName string
  107. EnforceNodeAllocatable sets.String
  108. KubeReserved v1.ResourceList
  109. SystemReserved v1.ResourceList
  110. HardEvictionThresholds []evictionapi.Threshold
  111. }
  112. type Status struct {
  113. // Any soft requirements that were unsatisfied.
  114. SoftRequirements error
  115. }
  116. // parsePercentage parses the percentage string to numeric value.
  117. func parsePercentage(v string) (int64, error) {
  118. if !strings.HasSuffix(v, "%") {
  119. return 0, fmt.Errorf("percentage expected, got '%s'", v)
  120. }
  121. percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0)
  122. if err != nil {
  123. return 0, fmt.Errorf("invalid number in percentage '%s'", v)
  124. }
  125. if percentage < 0 || percentage > 100 {
  126. return 0, fmt.Errorf("percentage must be between 0 and 100")
  127. }
  128. return percentage, nil
  129. }
  130. // ParseQOSReserved parses the --qos-reserve-requests option
  131. func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) {
  132. reservations := make(map[v1.ResourceName]int64)
  133. for k, v := range m {
  134. switch v1.ResourceName(k) {
  135. // Only memory resources are supported.
  136. case v1.ResourceMemory:
  137. q, err := parsePercentage(v)
  138. if err != nil {
  139. return nil, err
  140. }
  141. reservations[v1.ResourceName(k)] = q
  142. default:
  143. return nil, fmt.Errorf("cannot reserve %q resource", k)
  144. }
  145. }
  146. return &reservations, nil
  147. }