123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- /*
- Copyright 2015 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package images
- import (
- goerrors "errors"
- "fmt"
- "math"
- "sort"
- "sync"
- "time"
- "k8s.io/klog"
- "k8s.io/api/core/v1"
- "k8s.io/apimachinery/pkg/util/errors"
- "k8s.io/apimachinery/pkg/util/sets"
- "k8s.io/apimachinery/pkg/util/wait"
- "k8s.io/client-go/tools/record"
- statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
- "k8s.io/kubernetes/pkg/kubelet/container"
- "k8s.io/kubernetes/pkg/kubelet/events"
- "k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
- )
- // StatsProvider is an interface for fetching stats used during image garbage
- // collection.
- type StatsProvider interface {
- // ImageFsStats returns the stats of the image filesystem.
- ImageFsStats() (*statsapi.FsStats, error)
- }
- // ImageGCManager is an interface for managing lifecycle of all images.
- // Implementation is thread-safe.
- type ImageGCManager interface {
- // Applies the garbage collection policy. Errors include being unable to free
- // enough space as per the garbage collection policy.
- GarbageCollect() error
- // Start async garbage collection of images.
- Start()
- GetImageList() ([]container.Image, error)
- // Delete all unused images.
- DeleteUnusedImages() error
- }
- // ImageGCPolicy is a policy for garbage collecting images. Policy defines an allowed band in
- // which garbage collection will be run.
- type ImageGCPolicy struct {
- // Any usage above this threshold will always trigger garbage collection.
- // This is the highest usage we will allow.
- HighThresholdPercent int
- // Any usage below this threshold will never trigger garbage collection.
- // This is the lowest threshold we will try to garbage collect to.
- LowThresholdPercent int
- // Minimum age at which an image can be garbage collected.
- MinAge time.Duration
- }
- type realImageGCManager struct {
- // Container runtime
- runtime container.Runtime
- // Records of images and their use.
- imageRecords map[string]*imageRecord
- imageRecordsLock sync.Mutex
- // The image garbage collection policy in use.
- policy ImageGCPolicy
- // statsProvider provides stats used during image garbage collection.
- statsProvider StatsProvider
- // Recorder for Kubernetes events.
- recorder record.EventRecorder
- // Reference to this node.
- nodeRef *v1.ObjectReference
- // Track initialization
- initialized bool
- // imageCache is the cache of latest image list.
- imageCache imageCache
- // sandbox image exempted from GC
- sandboxImage string
- }
- // imageCache caches latest result of ListImages.
- type imageCache struct {
- // sync.Mutex is the mutex protects the image cache.
- sync.Mutex
- // images is the image cache.
- images []container.Image
- }
- // set updates image cache.
- func (i *imageCache) set(images []container.Image) {
- i.Lock()
- defer i.Unlock()
- i.images = images
- }
- // get gets a sorted (by image size) image list from image cache.
- // There is a potentical data race in this function. See PR #60448
- // Because there is deepcopy function available currently, move sort
- // function inside this function
- func (i *imageCache) get() []container.Image {
- i.Lock()
- defer i.Unlock()
- sort.Sort(sliceutils.ByImageSize(i.images))
- return i.images
- }
- // Information about the images we track.
- type imageRecord struct {
- // Time when this image was first detected.
- firstDetected time.Time
- // Time when we last saw this image being used.
- lastUsed time.Time
- // Size of the image in bytes.
- size int64
- }
- // NewImageGCManager instantiates a new ImageGCManager object.
- func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
- // Validate policy.
- if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
- return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
- }
- if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
- return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
- }
- if policy.LowThresholdPercent > policy.HighThresholdPercent {
- return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
- }
- im := &realImageGCManager{
- runtime: runtime,
- policy: policy,
- imageRecords: make(map[string]*imageRecord),
- statsProvider: statsProvider,
- recorder: recorder,
- nodeRef: nodeRef,
- initialized: false,
- sandboxImage: sandboxImage,
- }
- return im, nil
- }
- func (im *realImageGCManager) Start() {
- go wait.Until(func() {
- // Initial detection make detected time "unknown" in the past.
- var ts time.Time
- if im.initialized {
- ts = time.Now()
- }
- _, err := im.detectImages(ts)
- if err != nil {
- klog.Warningf("[imageGCManager] Failed to monitor images: %v", err)
- } else {
- im.initialized = true
- }
- }, 5*time.Minute, wait.NeverStop)
- // Start a goroutine periodically updates image cache.
- // TODO(random-liu): Merge this with the previous loop.
- go wait.Until(func() {
- images, err := im.runtime.ListImages()
- if err != nil {
- klog.Warningf("[imageGCManager] Failed to update image list: %v", err)
- } else {
- im.imageCache.set(images)
- }
- }, 30*time.Second, wait.NeverStop)
- }
- // Get a list of images on this node
- func (im *realImageGCManager) GetImageList() ([]container.Image, error) {
- return im.imageCache.get(), nil
- }
- func (im *realImageGCManager) detectImages(detectTime time.Time) (sets.String, error) {
- imagesInUse := sets.NewString()
- // Always consider the container runtime pod sandbox image in use
- imageRef, err := im.runtime.GetImageRef(container.ImageSpec{Image: im.sandboxImage})
- if err == nil && imageRef != "" {
- imagesInUse.Insert(imageRef)
- }
- images, err := im.runtime.ListImages()
- if err != nil {
- return imagesInUse, err
- }
- pods, err := im.runtime.GetPods(true)
- if err != nil {
- return imagesInUse, err
- }
- // Make a set of images in use by containers.
- for _, pod := range pods {
- for _, container := range pod.Containers {
- klog.V(5).Infof("Pod %s/%s, container %s uses image %s(%s)", pod.Namespace, pod.Name, container.Name, container.Image, container.ImageID)
- imagesInUse.Insert(container.ImageID)
- }
- }
- // Add new images and record those being used.
- now := time.Now()
- currentImages := sets.NewString()
- im.imageRecordsLock.Lock()
- defer im.imageRecordsLock.Unlock()
- for _, image := range images {
- klog.V(5).Infof("Adding image ID %s to currentImages", image.ID)
- currentImages.Insert(image.ID)
- // New image, set it as detected now.
- if _, ok := im.imageRecords[image.ID]; !ok {
- klog.V(5).Infof("Image ID %s is new", image.ID)
- im.imageRecords[image.ID] = &imageRecord{
- firstDetected: detectTime,
- }
- }
- // Set last used time to now if the image is being used.
- if isImageUsed(image.ID, imagesInUse) {
- klog.V(5).Infof("Setting Image ID %s lastUsed to %v", image.ID, now)
- im.imageRecords[image.ID].lastUsed = now
- }
- klog.V(5).Infof("Image ID %s has size %d", image.ID, image.Size)
- im.imageRecords[image.ID].size = image.Size
- }
- // Remove old images from our records.
- for image := range im.imageRecords {
- if !currentImages.Has(image) {
- klog.V(5).Infof("Image ID %s is no longer present; removing from imageRecords", image)
- delete(im.imageRecords, image)
- }
- }
- return imagesInUse, nil
- }
- func (im *realImageGCManager) GarbageCollect() error {
- // Get disk usage on disk holding images.
- fsStats, err := im.statsProvider.ImageFsStats()
- if err != nil {
- return err
- }
- var capacity, available int64
- if fsStats.CapacityBytes != nil {
- capacity = int64(*fsStats.CapacityBytes)
- }
- if fsStats.AvailableBytes != nil {
- available = int64(*fsStats.AvailableBytes)
- }
- if available > capacity {
- klog.Warningf("available %d is larger than capacity %d", available, capacity)
- available = capacity
- }
- // Check valid capacity.
- if capacity == 0 {
- err := goerrors.New("invalid capacity 0 on image filesystem")
- im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
- return err
- }
- // If over the max threshold, free enough to place us at the lower threshold.
- usagePercent := 100 - int(available*100/capacity)
- if usagePercent >= im.policy.HighThresholdPercent {
- amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
- klog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes down to the low threshold (%d%%).", usagePercent, im.policy.HighThresholdPercent, amountToFree, im.policy.LowThresholdPercent)
- freed, err := im.freeSpace(amountToFree, time.Now())
- if err != nil {
- return err
- }
- if freed < amountToFree {
- err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
- im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
- return err
- }
- }
- return nil
- }
- func (im *realImageGCManager) DeleteUnusedImages() error {
- klog.Infof("attempting to delete unused images")
- _, err := im.freeSpace(math.MaxInt64, time.Now())
- return err
- }
- // Tries to free bytesToFree worth of images on the disk.
- //
- // Returns the number of bytes free and an error if any occurred. The number of
- // bytes freed is always returned.
- // Note that error may be nil and the number of bytes free may be less
- // than bytesToFree.
- func (im *realImageGCManager) freeSpace(bytesToFree int64, freeTime time.Time) (int64, error) {
- imagesInUse, err := im.detectImages(freeTime)
- if err != nil {
- return 0, err
- }
- im.imageRecordsLock.Lock()
- defer im.imageRecordsLock.Unlock()
- // Get all images in eviction order.
- images := make([]evictionInfo, 0, len(im.imageRecords))
- for image, record := range im.imageRecords {
- if isImageUsed(image, imagesInUse) {
- klog.V(5).Infof("Image ID %s is being used", image)
- continue
- }
- images = append(images, evictionInfo{
- id: image,
- imageRecord: *record,
- })
- }
- sort.Sort(byLastUsedAndDetected(images))
- // Delete unused images until we've freed up enough space.
- var deletionErrors []error
- spaceFreed := int64(0)
- for _, image := range images {
- klog.V(5).Infof("Evaluating image ID %s for possible garbage collection", image.id)
- // Images that are currently in used were given a newer lastUsed.
- if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
- klog.V(5).Infof("Image ID %s has lastUsed=%v which is >= freeTime=%v, not eligible for garbage collection", image.id, image.lastUsed, freeTime)
- continue
- }
- // Avoid garbage collect the image if the image is not old enough.
- // In such a case, the image may have just been pulled down, and will be used by a container right away.
- if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
- klog.V(5).Infof("Image ID %s has age %v which is less than the policy's minAge of %v, not eligible for garbage collection", image.id, freeTime.Sub(image.firstDetected), im.policy.MinAge)
- continue
- }
- // Remove image. Continue despite errors.
- klog.Infof("[imageGCManager]: Removing image %q to free %d bytes", image.id, image.size)
- err := im.runtime.RemoveImage(container.ImageSpec{Image: image.id})
- if err != nil {
- deletionErrors = append(deletionErrors, err)
- continue
- }
- delete(im.imageRecords, image.id)
- spaceFreed += image.size
- if spaceFreed >= bytesToFree {
- break
- }
- }
- if len(deletionErrors) > 0 {
- return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors))
- }
- return spaceFreed, nil
- }
- type evictionInfo struct {
- id string
- imageRecord
- }
- type byLastUsedAndDetected []evictionInfo
- func (ev byLastUsedAndDetected) Len() int { return len(ev) }
- func (ev byLastUsedAndDetected) Swap(i, j int) { ev[i], ev[j] = ev[j], ev[i] }
- func (ev byLastUsedAndDetected) Less(i, j int) bool {
- // Sort by last used, break ties by detected.
- if ev[i].lastUsed.Equal(ev[j].lastUsed) {
- return ev[i].firstDetected.Before(ev[j].firstDetected)
- }
- return ev[i].lastUsed.Before(ev[j].lastUsed)
- }
- func isImageUsed(imageID string, imagesInUse sets.String) bool {
- // Check the image ID.
- if _, ok := imagesInUse[imageID]; ok {
- return true
- }
- return false
- }
|