helpers_linux.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cm
  14. import (
  15. "bufio"
  16. "fmt"
  17. "os"
  18. "path/filepath"
  19. "strconv"
  20. libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
  21. "k8s.io/api/core/v1"
  22. "k8s.io/apimachinery/pkg/types"
  23. utilfeature "k8s.io/apiserver/pkg/util/feature"
  24. "k8s.io/kubernetes/pkg/api/v1/resource"
  25. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  26. v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
  27. kubefeatures "k8s.io/kubernetes/pkg/features"
  28. )
  29. const (
  30. // Taken from lmctfy https://github.com/google/lmctfy/blob/master/lmctfy/controllers/cpu_controller.cc
  31. MinShares = 2
  32. SharesPerCPU = 1024
  33. MilliCPUToCPU = 1000
  34. // 100000 is equivalent to 100ms
  35. QuotaPeriod = 100000
  36. MinQuotaPeriod = 1000
  37. )
  38. // MilliCPUToQuota converts milliCPU to CFS quota and period values.
  39. func MilliCPUToQuota(milliCPU int64, period int64) (quota int64) {
  40. // CFS quota is measured in two values:
  41. // - cfs_period_us=100ms (the amount of time to measure usage across given by period)
  42. // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period)
  43. // so in the above example, you are limited to 20% of a single CPU
  44. // for multi-cpu environments, you just scale equivalent amounts
  45. // see https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt for details
  46. if milliCPU == 0 {
  47. return
  48. }
  49. if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUCFSQuotaPeriod) {
  50. period = QuotaPeriod
  51. }
  52. // we then convert your milliCPU to a value normalized over a period
  53. quota = (milliCPU * period) / MilliCPUToCPU
  54. // quota needs to be a minimum of 1ms.
  55. if quota < MinQuotaPeriod {
  56. quota = MinQuotaPeriod
  57. }
  58. return
  59. }
  60. // MilliCPUToShares converts the milliCPU to CFS shares.
  61. func MilliCPUToShares(milliCPU int64) uint64 {
  62. if milliCPU == 0 {
  63. // Docker converts zero milliCPU to unset, which maps to kernel default
  64. // for unset: 1024. Return 2 here to really match kernel default for
  65. // zero milliCPU.
  66. return MinShares
  67. }
  68. // Conceptually (milliCPU / milliCPUToCPU) * sharesPerCPU, but factored to improve rounding.
  69. shares := (milliCPU * SharesPerCPU) / MilliCPUToCPU
  70. if shares < MinShares {
  71. return MinShares
  72. }
  73. return uint64(shares)
  74. }
  75. // HugePageLimits converts the API representation to a map
  76. // from huge page size (in bytes) to huge page limit (in bytes).
  77. func HugePageLimits(resourceList v1.ResourceList) map[int64]int64 {
  78. hugePageLimits := map[int64]int64{}
  79. for k, v := range resourceList {
  80. if v1helper.IsHugePageResourceName(k) {
  81. pageSize, _ := v1helper.HugePageSizeFromResourceName(k)
  82. if value, exists := hugePageLimits[pageSize.Value()]; exists {
  83. hugePageLimits[pageSize.Value()] = value + v.Value()
  84. } else {
  85. hugePageLimits[pageSize.Value()] = v.Value()
  86. }
  87. }
  88. }
  89. return hugePageLimits
  90. }
  91. // ResourceConfigForPod takes the input pod and outputs the cgroup resource config.
  92. func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64) *ResourceConfig {
  93. // sum requests and limits.
  94. reqs, limits := resource.PodRequestsAndLimits(pod)
  95. cpuRequests := int64(0)
  96. cpuLimits := int64(0)
  97. memoryLimits := int64(0)
  98. if request, found := reqs[v1.ResourceCPU]; found {
  99. cpuRequests = request.MilliValue()
  100. }
  101. if limit, found := limits[v1.ResourceCPU]; found {
  102. cpuLimits = limit.MilliValue()
  103. }
  104. if limit, found := limits[v1.ResourceMemory]; found {
  105. memoryLimits = limit.Value()
  106. }
  107. // convert to CFS values
  108. cpuShares := MilliCPUToShares(cpuRequests)
  109. cpuQuota := MilliCPUToQuota(cpuLimits, int64(cpuPeriod))
  110. // track if limits were applied for each resource.
  111. memoryLimitsDeclared := true
  112. cpuLimitsDeclared := true
  113. // map hugepage pagesize (bytes) to limits (bytes)
  114. hugePageLimits := map[int64]int64{}
  115. for _, container := range pod.Spec.Containers {
  116. if container.Resources.Limits.Cpu().IsZero() {
  117. cpuLimitsDeclared = false
  118. }
  119. if container.Resources.Limits.Memory().IsZero() {
  120. memoryLimitsDeclared = false
  121. }
  122. containerHugePageLimits := HugePageLimits(container.Resources.Requests)
  123. for k, v := range containerHugePageLimits {
  124. if value, exists := hugePageLimits[k]; exists {
  125. hugePageLimits[k] = value + v
  126. } else {
  127. hugePageLimits[k] = v
  128. }
  129. }
  130. }
  131. // quota is not capped when cfs quota is disabled
  132. if !enforceCPULimits {
  133. cpuQuota = int64(-1)
  134. }
  135. // determine the qos class
  136. qosClass := v1qos.GetPodQOS(pod)
  137. // build the result
  138. result := &ResourceConfig{}
  139. if qosClass == v1.PodQOSGuaranteed {
  140. result.CpuShares = &cpuShares
  141. result.CpuQuota = &cpuQuota
  142. result.CpuPeriod = &cpuPeriod
  143. result.Memory = &memoryLimits
  144. } else if qosClass == v1.PodQOSBurstable {
  145. result.CpuShares = &cpuShares
  146. if cpuLimitsDeclared {
  147. result.CpuQuota = &cpuQuota
  148. result.CpuPeriod = &cpuPeriod
  149. }
  150. if memoryLimitsDeclared {
  151. result.Memory = &memoryLimits
  152. }
  153. } else {
  154. shares := uint64(MinShares)
  155. result.CpuShares = &shares
  156. }
  157. result.HugePageLimit = hugePageLimits
  158. return result
  159. }
  160. // GetCgroupSubsystems returns information about the mounted cgroup subsystems
  161. func GetCgroupSubsystems() (*CgroupSubsystems, error) {
  162. // get all cgroup mounts.
  163. allCgroups, err := libcontainercgroups.GetCgroupMounts(true)
  164. if err != nil {
  165. return &CgroupSubsystems{}, err
  166. }
  167. if len(allCgroups) == 0 {
  168. return &CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
  169. }
  170. mountPoints := make(map[string]string, len(allCgroups))
  171. for _, mount := range allCgroups {
  172. for _, subsystem := range mount.Subsystems {
  173. mountPoints[subsystem] = mount.Mountpoint
  174. }
  175. }
  176. return &CgroupSubsystems{
  177. Mounts: allCgroups,
  178. MountPoints: mountPoints,
  179. }, nil
  180. }
  181. // getCgroupProcs takes a cgroup directory name as an argument
  182. // reads through the cgroup's procs file and returns a list of tgid's.
  183. // It returns an empty list if a procs file doesn't exists
  184. func getCgroupProcs(dir string) ([]int, error) {
  185. procsFile := filepath.Join(dir, "cgroup.procs")
  186. f, err := os.Open(procsFile)
  187. if err != nil {
  188. if os.IsNotExist(err) {
  189. // The procsFile does not exist, So no pids attached to this directory
  190. return []int{}, nil
  191. }
  192. return nil, err
  193. }
  194. defer f.Close()
  195. s := bufio.NewScanner(f)
  196. out := []int{}
  197. for s.Scan() {
  198. if t := s.Text(); t != "" {
  199. pid, err := strconv.Atoi(t)
  200. if err != nil {
  201. return nil, fmt.Errorf("unexpected line in %v; could not convert to pid: %v", procsFile, err)
  202. }
  203. out = append(out, pid)
  204. }
  205. }
  206. return out, nil
  207. }
  208. // GetPodCgroupNameSuffix returns the last element of the pod CgroupName identifier
  209. func GetPodCgroupNameSuffix(podUID types.UID) string {
  210. return podCgroupNamePrefix + string(podUID)
  211. }
  212. // NodeAllocatableRoot returns the literal cgroup path for the node allocatable cgroup
  213. func NodeAllocatableRoot(cgroupRoot, cgroupDriver string) string {
  214. root := ParseCgroupfsToCgroupName(cgroupRoot)
  215. nodeAllocatableRoot := NewCgroupName(root, defaultNodeAllocatableCgroupName)
  216. if libcontainerCgroupManagerType(cgroupDriver) == libcontainerSystemd {
  217. return nodeAllocatableRoot.ToSystemd()
  218. }
  219. return nodeAllocatableRoot.ToCgroupfs()
  220. }
  221. // GetKubeletContainer returns the cgroup the kubelet will use
  222. func GetKubeletContainer(kubeletCgroups string) (string, error) {
  223. if kubeletCgroups == "" {
  224. cont, err := getContainer(os.Getpid())
  225. if err != nil {
  226. return "", err
  227. }
  228. return cont, nil
  229. }
  230. return kubeletCgroups, nil
  231. }
  232. // GetRuntimeContainer returns the cgroup used by the container runtime
  233. func GetRuntimeContainer(containerRuntime, runtimeCgroups string) (string, error) {
  234. if containerRuntime == "docker" {
  235. cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
  236. if err != nil {
  237. return "", fmt.Errorf("failed to get container name for docker process: %v", err)
  238. }
  239. return cont, nil
  240. }
  241. return runtimeCgroups, nil
  242. }