123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 |
- /*
- Copyright 2016 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package kuberuntime
- import (
- "fmt"
- "os"
- "path/filepath"
- "sort"
- "time"
- "k8s.io/apimachinery/pkg/types"
- utilerrors "k8s.io/apimachinery/pkg/util/errors"
- "k8s.io/apimachinery/pkg/util/sets"
- internalapi "k8s.io/cri-api/pkg/apis"
- runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
- "k8s.io/klog"
- kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
- )
- // containerGC is the manager of garbage collection.
- type containerGC struct {
- client internalapi.RuntimeService
- manager *kubeGenericRuntimeManager
- podStateProvider podStateProvider
- }
- // NewContainerGC creates a new containerGC.
- func newContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC {
- return &containerGC{
- client: client,
- manager: manager,
- podStateProvider: podStateProvider,
- }
- }
- // containerGCInfo is the internal information kept for containers being considered for GC.
- type containerGCInfo struct {
- // The ID of the container.
- id string
- // The name of the container.
- name string
- // Creation time for the container.
- createTime time.Time
- // If true, the container is in unknown state. Garbage collector should try
- // to stop containers before removal.
- unknown bool
- }
- // sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
- type sandboxGCInfo struct {
- // The ID of the sandbox.
- id string
- // Creation time for the sandbox.
- createTime time.Time
- // If true, the sandbox is ready or still has containers.
- active bool
- }
- // evictUnit is considered for eviction as units of (UID, container name) pair.
- type evictUnit struct {
- // UID of the pod.
- uid types.UID
- // Name of the container in the pod.
- name string
- }
- type containersByEvictUnit map[evictUnit][]containerGCInfo
- type sandboxesByPodUID map[types.UID][]sandboxGCInfo
- // NumContainers returns the number of containers in this map.
- func (cu containersByEvictUnit) NumContainers() int {
- num := 0
- for key := range cu {
- num += len(cu[key])
- }
- return num
- }
- // NumEvictUnits returns the number of pod in this map.
- func (cu containersByEvictUnit) NumEvictUnits() int {
- return len(cu)
- }
- // Newest first.
- type byCreated []containerGCInfo
- func (a byCreated) Len() int { return len(a) }
- func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
- // Newest first.
- type sandboxByCreated []sandboxGCInfo
- func (a sandboxByCreated) Len() int { return len(a) }
- func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
- // enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
- func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
- for key := range evictUnits {
- toRemove := len(evictUnits[key]) - MaxContainers
- if toRemove > 0 {
- evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
- }
- }
- }
- // removeOldestN removes the oldest toRemove containers and returns the resulting slice.
- func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
- // Remove from oldest to newest (last to first).
- numToKeep := len(containers) - toRemove
- for i := len(containers) - 1; i >= numToKeep; i-- {
- if containers[i].unknown {
- // Containers in known state could be running, we should try
- // to stop it before removal.
- id := kubecontainer.ContainerID{
- Type: cgc.manager.runtimeName,
- ID: containers[i].id,
- }
- message := "Container is in unknown state, try killing it before removal"
- if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
- klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
- continue
- }
- }
- if err := cgc.manager.removeContainer(containers[i].id); err != nil {
- klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
- }
- }
- // Assume we removed the containers so that we're not too aggressive.
- return containers[:numToKeep]
- }
- // removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
- // returns the resulting slice.
- func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
- // Remove from oldest to newest (last to first).
- numToKeep := len(sandboxes) - toRemove
- for i := len(sandboxes) - 1; i >= numToKeep; i-- {
- if !sandboxes[i].active {
- cgc.removeSandbox(sandboxes[i].id)
- }
- }
- }
- // removeSandbox removes the sandbox by sandboxID.
- func (cgc *containerGC) removeSandbox(sandboxID string) {
- klog.V(4).Infof("Removing sandbox %q", sandboxID)
- // In normal cases, kubelet should've already called StopPodSandbox before
- // GC kicks in. To guard against the rare cases where this is not true, try
- // stopping the sandbox before removing it.
- if err := cgc.client.StopPodSandbox(sandboxID); err != nil {
- klog.Errorf("Failed to stop sandbox %q before removing: %v", sandboxID, err)
- return
- }
- if err := cgc.client.RemovePodSandbox(sandboxID); err != nil {
- klog.Errorf("Failed to remove sandbox %q: %v", sandboxID, err)
- }
- }
- // evictableContainers gets all containers that are evictable. Evictable containers are: not running
- // and created more than MinAge ago.
- func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
- containers, err := cgc.manager.getKubeletContainers(true)
- if err != nil {
- return containersByEvictUnit{}, err
- }
- evictUnits := make(containersByEvictUnit)
- newestGCTime := time.Now().Add(-minAge)
- for _, container := range containers {
- // Prune out running containers.
- if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
- continue
- }
- createdAt := time.Unix(0, container.CreatedAt)
- if newestGCTime.Before(createdAt) {
- continue
- }
- labeledInfo := getContainerInfoFromLabels(container.Labels)
- containerInfo := containerGCInfo{
- id: container.Id,
- name: container.Metadata.Name,
- createTime: createdAt,
- unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
- }
- key := evictUnit{
- uid: labeledInfo.PodUID,
- name: containerInfo.name,
- }
- evictUnits[key] = append(evictUnits[key], containerInfo)
- }
- // Sort the containers by age.
- for uid := range evictUnits {
- sort.Sort(byCreated(evictUnits[uid]))
- }
- return evictUnits, nil
- }
- // evict all containers that are evictable
- func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
- // Separate containers by evict units.
- evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
- if err != nil {
- return err
- }
- // Remove deleted pod containers if all sources are ready.
- if allSourcesReady {
- for key, unit := range evictUnits {
- if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
- cgc.removeOldestN(unit, len(unit)) // Remove all.
- delete(evictUnits, key)
- }
- }
- }
- // Enforce max containers per evict unit.
- if gcPolicy.MaxPerPodContainer >= 0 {
- cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
- }
- // Enforce max total number of containers.
- if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
- // Leave an equal number of containers per evict unit (min: 1).
- numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
- if numContainersPerEvictUnit < 1 {
- numContainersPerEvictUnit = 1
- }
- cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
- // If we still need to evict, evict oldest first.
- numContainers := evictUnits.NumContainers()
- if numContainers > gcPolicy.MaxContainers {
- flattened := make([]containerGCInfo, 0, numContainers)
- for key := range evictUnits {
- flattened = append(flattened, evictUnits[key]...)
- }
- sort.Sort(byCreated(flattened))
- cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
- }
- }
- return nil
- }
- // evictSandboxes remove all evictable sandboxes. An evictable sandbox must
- // meet the following requirements:
- // 1. not in ready state
- // 2. contains no containers.
- // 3. belong to a non-existent (i.e., already removed) pod, or is not the
- // most recently created sandbox for the pod.
- func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
- containers, err := cgc.manager.getKubeletContainers(true)
- if err != nil {
- return err
- }
- // collect all the PodSandboxId of container
- sandboxIDs := sets.NewString()
- for _, container := range containers {
- sandboxIDs.Insert(container.PodSandboxId)
- }
- sandboxes, err := cgc.manager.getKubeletSandboxes(true)
- if err != nil {
- return err
- }
- sandboxesByPod := make(sandboxesByPodUID)
- for _, sandbox := range sandboxes {
- podUID := types.UID(sandbox.Metadata.Uid)
- sandboxInfo := sandboxGCInfo{
- id: sandbox.Id,
- createTime: time.Unix(0, sandbox.CreatedAt),
- }
- // Set ready sandboxes to be active.
- if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
- sandboxInfo.active = true
- }
- // Set sandboxes that still have containers to be active.
- if sandboxIDs.Has(sandbox.Id) {
- sandboxInfo.active = true
- }
- sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
- }
- // Sort the sandboxes by age.
- for uid := range sandboxesByPod {
- sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
- }
- for podUID, sandboxes := range sandboxesByPod {
- if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
- // Remove all evictable sandboxes if the pod has been removed.
- // Note that the latest dead sandbox is also removed if there is
- // already an active one.
- cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
- } else {
- // Keep latest one if the pod still exists.
- cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
- }
- }
- return nil
- }
- // evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
- // are evictable if there are no corresponding pods.
- func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
- osInterface := cgc.manager.osInterface
- if allSourcesReady {
- // Only remove pod logs directories when all sources are ready.
- dirs, err := osInterface.ReadDir(podLogsRootDirectory)
- if err != nil {
- return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
- }
- for _, dir := range dirs {
- name := dir.Name()
- podUID := parsePodUIDFromLogsDirectory(name)
- if !cgc.podStateProvider.IsPodDeleted(podUID) {
- continue
- }
- err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
- if err != nil {
- klog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
- }
- }
- }
- // Remove dead container log symlinks.
- // TODO(random-liu): Remove this after cluster logging supports CRI container log path.
- logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
- for _, logSymlink := range logSymlinks {
- if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
- err := osInterface.Remove(logSymlink)
- if err != nil {
- klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
- }
- }
- }
- return nil
- }
- // GarbageCollect removes dead containers using the specified container gc policy.
- // Note that gc policy is not applied to sandboxes. Sandboxes are only removed when they are
- // not ready and containing no containers.
- //
- // GarbageCollect consists of the following steps:
- // * gets evictable containers which are not active and created more than gcPolicy.MinAge ago.
- // * removes oldest dead containers for each pod by enforcing gcPolicy.MaxPerPodContainer.
- // * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
- // * gets evictable sandboxes which are not ready and contains no containers.
- // * removes evictable sandboxes.
- func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
- errors := []error{}
- // Remove evictable containers
- if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
- errors = append(errors, err)
- }
- // Remove sandboxes with zero containers
- if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
- errors = append(errors, err)
- }
- // Remove pod sandbox log directory
- if err := cgc.evictPodLogsDirectories(allSourcesReady); err != nil {
- errors = append(errors, err)
- }
- return utilerrors.NewAggregate(errors)
- }
|