kuberuntime_gc.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kuberuntime
  14. import (
  15. "fmt"
  16. "os"
  17. "path/filepath"
  18. "sort"
  19. "time"
  20. "k8s.io/apimachinery/pkg/types"
  21. utilerrors "k8s.io/apimachinery/pkg/util/errors"
  22. "k8s.io/apimachinery/pkg/util/sets"
  23. internalapi "k8s.io/cri-api/pkg/apis"
  24. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  25. "k8s.io/klog"
  26. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  27. )
  28. // containerGC is the manager of garbage collection.
  29. type containerGC struct {
  30. client internalapi.RuntimeService
  31. manager *kubeGenericRuntimeManager
  32. podStateProvider podStateProvider
  33. }
  34. // NewContainerGC creates a new containerGC.
  35. func newContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC {
  36. return &containerGC{
  37. client: client,
  38. manager: manager,
  39. podStateProvider: podStateProvider,
  40. }
  41. }
  42. // containerGCInfo is the internal information kept for containers being considered for GC.
  43. type containerGCInfo struct {
  44. // The ID of the container.
  45. id string
  46. // The name of the container.
  47. name string
  48. // Creation time for the container.
  49. createTime time.Time
  50. // If true, the container is in unknown state. Garbage collector should try
  51. // to stop containers before removal.
  52. unknown bool
  53. }
  54. // sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
  55. type sandboxGCInfo struct {
  56. // The ID of the sandbox.
  57. id string
  58. // Creation time for the sandbox.
  59. createTime time.Time
  60. // If true, the sandbox is ready or still has containers.
  61. active bool
  62. }
  63. // evictUnit is considered for eviction as units of (UID, container name) pair.
  64. type evictUnit struct {
  65. // UID of the pod.
  66. uid types.UID
  67. // Name of the container in the pod.
  68. name string
  69. }
  70. type containersByEvictUnit map[evictUnit][]containerGCInfo
  71. type sandboxesByPodUID map[types.UID][]sandboxGCInfo
  72. // NumContainers returns the number of containers in this map.
  73. func (cu containersByEvictUnit) NumContainers() int {
  74. num := 0
  75. for key := range cu {
  76. num += len(cu[key])
  77. }
  78. return num
  79. }
  80. // NumEvictUnits returns the number of pod in this map.
  81. func (cu containersByEvictUnit) NumEvictUnits() int {
  82. return len(cu)
  83. }
  84. // Newest first.
  85. type byCreated []containerGCInfo
  86. func (a byCreated) Len() int { return len(a) }
  87. func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  88. func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
  89. // Newest first.
  90. type sandboxByCreated []sandboxGCInfo
  91. func (a sandboxByCreated) Len() int { return len(a) }
  92. func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  93. func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
  94. // enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
  95. func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
  96. for key := range evictUnits {
  97. toRemove := len(evictUnits[key]) - MaxContainers
  98. if toRemove > 0 {
  99. evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
  100. }
  101. }
  102. }
  103. // removeOldestN removes the oldest toRemove containers and returns the resulting slice.
  104. func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
  105. // Remove from oldest to newest (last to first).
  106. numToKeep := len(containers) - toRemove
  107. for i := len(containers) - 1; i >= numToKeep; i-- {
  108. if containers[i].unknown {
  109. // Containers in known state could be running, we should try
  110. // to stop it before removal.
  111. id := kubecontainer.ContainerID{
  112. Type: cgc.manager.runtimeName,
  113. ID: containers[i].id,
  114. }
  115. message := "Container is in unknown state, try killing it before removal"
  116. if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
  117. klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
  118. continue
  119. }
  120. }
  121. if err := cgc.manager.removeContainer(containers[i].id); err != nil {
  122. klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
  123. }
  124. }
  125. // Assume we removed the containers so that we're not too aggressive.
  126. return containers[:numToKeep]
  127. }
  128. // removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
  129. // returns the resulting slice.
  130. func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
  131. // Remove from oldest to newest (last to first).
  132. numToKeep := len(sandboxes) - toRemove
  133. for i := len(sandboxes) - 1; i >= numToKeep; i-- {
  134. if !sandboxes[i].active {
  135. cgc.removeSandbox(sandboxes[i].id)
  136. }
  137. }
  138. }
  139. // removeSandbox removes the sandbox by sandboxID.
  140. func (cgc *containerGC) removeSandbox(sandboxID string) {
  141. klog.V(4).Infof("Removing sandbox %q", sandboxID)
  142. // In normal cases, kubelet should've already called StopPodSandbox before
  143. // GC kicks in. To guard against the rare cases where this is not true, try
  144. // stopping the sandbox before removing it.
  145. if err := cgc.client.StopPodSandbox(sandboxID); err != nil {
  146. klog.Errorf("Failed to stop sandbox %q before removing: %v", sandboxID, err)
  147. return
  148. }
  149. if err := cgc.client.RemovePodSandbox(sandboxID); err != nil {
  150. klog.Errorf("Failed to remove sandbox %q: %v", sandboxID, err)
  151. }
  152. }
  153. // evictableContainers gets all containers that are evictable. Evictable containers are: not running
  154. // and created more than MinAge ago.
  155. func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
  156. containers, err := cgc.manager.getKubeletContainers(true)
  157. if err != nil {
  158. return containersByEvictUnit{}, err
  159. }
  160. evictUnits := make(containersByEvictUnit)
  161. newestGCTime := time.Now().Add(-minAge)
  162. for _, container := range containers {
  163. // Prune out running containers.
  164. if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
  165. continue
  166. }
  167. createdAt := time.Unix(0, container.CreatedAt)
  168. if newestGCTime.Before(createdAt) {
  169. continue
  170. }
  171. labeledInfo := getContainerInfoFromLabels(container.Labels)
  172. containerInfo := containerGCInfo{
  173. id: container.Id,
  174. name: container.Metadata.Name,
  175. createTime: createdAt,
  176. unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
  177. }
  178. key := evictUnit{
  179. uid: labeledInfo.PodUID,
  180. name: containerInfo.name,
  181. }
  182. evictUnits[key] = append(evictUnits[key], containerInfo)
  183. }
  184. // Sort the containers by age.
  185. for uid := range evictUnits {
  186. sort.Sort(byCreated(evictUnits[uid]))
  187. }
  188. return evictUnits, nil
  189. }
  190. // evict all containers that are evictable
  191. func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
  192. // Separate containers by evict units.
  193. evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
  194. if err != nil {
  195. return err
  196. }
  197. // Remove deleted pod containers if all sources are ready.
  198. if allSourcesReady {
  199. for key, unit := range evictUnits {
  200. if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
  201. cgc.removeOldestN(unit, len(unit)) // Remove all.
  202. delete(evictUnits, key)
  203. }
  204. }
  205. }
  206. // Enforce max containers per evict unit.
  207. if gcPolicy.MaxPerPodContainer >= 0 {
  208. cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
  209. }
  210. // Enforce max total number of containers.
  211. if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
  212. // Leave an equal number of containers per evict unit (min: 1).
  213. numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
  214. if numContainersPerEvictUnit < 1 {
  215. numContainersPerEvictUnit = 1
  216. }
  217. cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
  218. // If we still need to evict, evict oldest first.
  219. numContainers := evictUnits.NumContainers()
  220. if numContainers > gcPolicy.MaxContainers {
  221. flattened := make([]containerGCInfo, 0, numContainers)
  222. for key := range evictUnits {
  223. flattened = append(flattened, evictUnits[key]...)
  224. }
  225. sort.Sort(byCreated(flattened))
  226. cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
  227. }
  228. }
  229. return nil
  230. }
  231. // evictSandboxes remove all evictable sandboxes. An evictable sandbox must
  232. // meet the following requirements:
  233. // 1. not in ready state
  234. // 2. contains no containers.
  235. // 3. belong to a non-existent (i.e., already removed) pod, or is not the
  236. // most recently created sandbox for the pod.
  237. func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
  238. containers, err := cgc.manager.getKubeletContainers(true)
  239. if err != nil {
  240. return err
  241. }
  242. // collect all the PodSandboxId of container
  243. sandboxIDs := sets.NewString()
  244. for _, container := range containers {
  245. sandboxIDs.Insert(container.PodSandboxId)
  246. }
  247. sandboxes, err := cgc.manager.getKubeletSandboxes(true)
  248. if err != nil {
  249. return err
  250. }
  251. sandboxesByPod := make(sandboxesByPodUID)
  252. for _, sandbox := range sandboxes {
  253. podUID := types.UID(sandbox.Metadata.Uid)
  254. sandboxInfo := sandboxGCInfo{
  255. id: sandbox.Id,
  256. createTime: time.Unix(0, sandbox.CreatedAt),
  257. }
  258. // Set ready sandboxes to be active.
  259. if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
  260. sandboxInfo.active = true
  261. }
  262. // Set sandboxes that still have containers to be active.
  263. if sandboxIDs.Has(sandbox.Id) {
  264. sandboxInfo.active = true
  265. }
  266. sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
  267. }
  268. // Sort the sandboxes by age.
  269. for uid := range sandboxesByPod {
  270. sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
  271. }
  272. for podUID, sandboxes := range sandboxesByPod {
  273. if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
  274. // Remove all evictable sandboxes if the pod has been removed.
  275. // Note that the latest dead sandbox is also removed if there is
  276. // already an active one.
  277. cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
  278. } else {
  279. // Keep latest one if the pod still exists.
  280. cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
  281. }
  282. }
  283. return nil
  284. }
  285. // evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
  286. // are evictable if there are no corresponding pods.
  287. func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
  288. osInterface := cgc.manager.osInterface
  289. if allSourcesReady {
  290. // Only remove pod logs directories when all sources are ready.
  291. dirs, err := osInterface.ReadDir(podLogsRootDirectory)
  292. if err != nil {
  293. return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
  294. }
  295. for _, dir := range dirs {
  296. name := dir.Name()
  297. podUID := parsePodUIDFromLogsDirectory(name)
  298. if !cgc.podStateProvider.IsPodDeleted(podUID) {
  299. continue
  300. }
  301. err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
  302. if err != nil {
  303. klog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
  304. }
  305. }
  306. }
  307. // Remove dead container log symlinks.
  308. // TODO(random-liu): Remove this after cluster logging supports CRI container log path.
  309. logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
  310. for _, logSymlink := range logSymlinks {
  311. if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
  312. err := osInterface.Remove(logSymlink)
  313. if err != nil {
  314. klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
  315. }
  316. }
  317. }
  318. return nil
  319. }
  320. // GarbageCollect removes dead containers using the specified container gc policy.
  321. // Note that gc policy is not applied to sandboxes. Sandboxes are only removed when they are
  322. // not ready and containing no containers.
  323. //
  324. // GarbageCollect consists of the following steps:
  325. // * gets evictable containers which are not active and created more than gcPolicy.MinAge ago.
  326. // * removes oldest dead containers for each pod by enforcing gcPolicy.MaxPerPodContainer.
  327. // * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
  328. // * gets evictable sandboxes which are not ready and contains no containers.
  329. // * removes evictable sandboxes.
  330. func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
  331. errors := []error{}
  332. // Remove evictable containers
  333. if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
  334. errors = append(errors, err)
  335. }
  336. // Remove sandboxes with zero containers
  337. if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
  338. errors = append(errors, err)
  339. }
  340. // Remove pod sandbox log directory
  341. if err := cgc.evictPodLogsDirectories(allSourcesReady); err != nil {
  342. errors = append(errors, err)
  343. }
  344. return utilerrors.NewAggregate(errors)
  345. }