123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936 |
- /*
- Copyright 2016 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package kuberuntime
- import (
- "errors"
- "fmt"
- "os"
- "time"
- cadvisorapi "github.com/google/cadvisor/info/v1"
- "k8s.io/klog"
- v1 "k8s.io/api/core/v1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- kubetypes "k8s.io/apimachinery/pkg/types"
- utilruntime "k8s.io/apimachinery/pkg/util/runtime"
- utilversion "k8s.io/apimachinery/pkg/util/version"
- "k8s.io/client-go/tools/record"
- ref "k8s.io/client-go/tools/reference"
- "k8s.io/client-go/util/flowcontrol"
- internalapi "k8s.io/cri-api/pkg/apis"
- runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
- "k8s.io/kubernetes/pkg/api/legacyscheme"
- "k8s.io/kubernetes/pkg/credentialprovider"
- "k8s.io/kubernetes/pkg/kubelet/cm"
- kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
- "k8s.io/kubernetes/pkg/kubelet/events"
- "k8s.io/kubernetes/pkg/kubelet/images"
- "k8s.io/kubernetes/pkg/kubelet/lifecycle"
- proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
- "k8s.io/kubernetes/pkg/kubelet/runtimeclass"
- "k8s.io/kubernetes/pkg/kubelet/types"
- "k8s.io/kubernetes/pkg/kubelet/util/cache"
- "k8s.io/kubernetes/pkg/kubelet/util/format"
- "k8s.io/kubernetes/pkg/kubelet/util/logreduction"
- )
- const (
- // The api version of kubelet runtime api
- kubeRuntimeAPIVersion = "0.1.0"
- // The root directory for pod logs
- podLogsRootDirectory = "/var/log/pods"
- // A minimal shutdown window for avoiding unnecessary SIGKILLs
- minimumGracePeriodInSeconds = 2
- // The expiration time of version cache.
- versionCacheTTL = 60 * time.Second
- // How frequently to report identical errors
- identicalErrorDelay = 1 * time.Minute
- )
- var (
- // ErrVersionNotSupported is returned when the api version of runtime interface is not supported
- ErrVersionNotSupported = errors.New("Runtime api version is not supported")
- )
- // podStateProvider can determine if a pod is deleted ir terminated
- type podStateProvider interface {
- IsPodDeleted(kubetypes.UID) bool
- IsPodTerminated(kubetypes.UID) bool
- }
- type kubeGenericRuntimeManager struct {
- runtimeName string
- recorder record.EventRecorder
- osInterface kubecontainer.OSInterface
- containerRefManager *kubecontainer.RefManager
- // machineInfo contains the machine information.
- machineInfo *cadvisorapi.MachineInfo
- // Container GC manager
- containerGC *containerGC
- // Keyring for pulling images
- keyring credentialprovider.DockerKeyring
- // Runner of lifecycle events.
- runner kubecontainer.HandlerRunner
- // RuntimeHelper that wraps kubelet to generate runtime container options.
- runtimeHelper kubecontainer.RuntimeHelper
- // Health check results.
- livenessManager proberesults.Manager
- // If true, enforce container cpu limits with CFS quota support
- cpuCFSQuota bool
- // CPUCFSQuotaPeriod sets the CPU CFS quota period value, cpu.cfs_period_us, defaults to 100ms
- cpuCFSQuotaPeriod metav1.Duration
- // wrapped image puller.
- imagePuller images.ImageManager
- // gRPC service clients
- runtimeService internalapi.RuntimeService
- imageService internalapi.ImageManagerService
- // The version cache of runtime daemon.
- versionCache *cache.ObjectCache
- // The directory path for seccomp profiles.
- seccompProfileRoot string
- // Internal lifecycle event handlers for container resource management.
- internalLifecycle cm.InternalContainerLifecycle
- // A shim to legacy functions for backward compatibility.
- legacyLogProvider LegacyLogProvider
- // Manage RuntimeClass resources.
- runtimeClassManager *runtimeclass.Manager
- // Cache last per-container error message to reduce log spam
- logReduction *logreduction.LogReduction
- }
- // KubeGenericRuntime is a interface contains interfaces for container runtime and command.
- type KubeGenericRuntime interface {
- kubecontainer.Runtime
- kubecontainer.StreamingRuntime
- kubecontainer.ContainerCommandRunner
- }
- // LegacyLogProvider gives the ability to use unsupported docker log drivers (e.g. journald)
- type LegacyLogProvider interface {
- // Get the last few lines of the logs for a specific container.
- GetContainerLogTail(uid kubetypes.UID, name, namespace string, containerID kubecontainer.ContainerID) (string, error)
- }
- // NewKubeGenericRuntimeManager creates a new kubeGenericRuntimeManager
- func NewKubeGenericRuntimeManager(
- recorder record.EventRecorder,
- livenessManager proberesults.Manager,
- seccompProfileRoot string,
- containerRefManager *kubecontainer.RefManager,
- machineInfo *cadvisorapi.MachineInfo,
- podStateProvider podStateProvider,
- osInterface kubecontainer.OSInterface,
- runtimeHelper kubecontainer.RuntimeHelper,
- httpClient types.HttpGetter,
- imageBackOff *flowcontrol.Backoff,
- serializeImagePulls bool,
- imagePullQPS float32,
- imagePullBurst int,
- cpuCFSQuota bool,
- cpuCFSQuotaPeriod metav1.Duration,
- runtimeService internalapi.RuntimeService,
- imageService internalapi.ImageManagerService,
- internalLifecycle cm.InternalContainerLifecycle,
- legacyLogProvider LegacyLogProvider,
- runtimeClassManager *runtimeclass.Manager,
- ) (KubeGenericRuntime, error) {
- kubeRuntimeManager := &kubeGenericRuntimeManager{
- recorder: recorder,
- cpuCFSQuota: cpuCFSQuota,
- cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
- seccompProfileRoot: seccompProfileRoot,
- livenessManager: livenessManager,
- containerRefManager: containerRefManager,
- machineInfo: machineInfo,
- osInterface: osInterface,
- runtimeHelper: runtimeHelper,
- runtimeService: newInstrumentedRuntimeService(runtimeService),
- imageService: newInstrumentedImageManagerService(imageService),
- keyring: credentialprovider.NewDockerKeyring(),
- internalLifecycle: internalLifecycle,
- legacyLogProvider: legacyLogProvider,
- runtimeClassManager: runtimeClassManager,
- logReduction: logreduction.NewLogReduction(identicalErrorDelay),
- }
- typedVersion, err := kubeRuntimeManager.runtimeService.Version(kubeRuntimeAPIVersion)
- if err != nil {
- klog.Errorf("Get runtime version failed: %v", err)
- return nil, err
- }
- // Only matching kubeRuntimeAPIVersion is supported now
- // TODO: Runtime API machinery is under discussion at https://github.com/kubernetes/kubernetes/issues/28642
- if typedVersion.Version != kubeRuntimeAPIVersion {
- klog.Errorf("Runtime api version %s is not supported, only %s is supported now",
- typedVersion.Version,
- kubeRuntimeAPIVersion)
- return nil, ErrVersionNotSupported
- }
- kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
- klog.Infof("Container runtime %s initialized, version: %s, apiVersion: %s",
- typedVersion.RuntimeName,
- typedVersion.RuntimeVersion,
- typedVersion.RuntimeApiVersion)
- // If the container logs directory does not exist, create it.
- // TODO: create podLogsRootDirectory at kubelet.go when kubelet is refactored to
- // new runtime interface
- if _, err := osInterface.Stat(podLogsRootDirectory); os.IsNotExist(err) {
- if err := osInterface.MkdirAll(podLogsRootDirectory, 0755); err != nil {
- klog.Errorf("Failed to create directory %q: %v", podLogsRootDirectory, err)
- }
- }
- kubeRuntimeManager.imagePuller = images.NewImageManager(
- kubecontainer.FilterEventRecorder(recorder),
- kubeRuntimeManager,
- imageBackOff,
- serializeImagePulls,
- imagePullQPS,
- imagePullBurst)
- kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
- kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
- kubeRuntimeManager.versionCache = cache.NewObjectCache(
- func() (interface{}, error) {
- return kubeRuntimeManager.getTypedVersion()
- },
- versionCacheTTL,
- )
- return kubeRuntimeManager, nil
- }
- // Type returns the type of the container runtime.
- func (m *kubeGenericRuntimeManager) Type() string {
- return m.runtimeName
- }
- func newRuntimeVersion(version string) (*utilversion.Version, error) {
- if ver, err := utilversion.ParseSemantic(version); err == nil {
- return ver, err
- }
- return utilversion.ParseGeneric(version)
- }
- func (m *kubeGenericRuntimeManager) getTypedVersion() (*runtimeapi.VersionResponse, error) {
- typedVersion, err := m.runtimeService.Version(kubeRuntimeAPIVersion)
- if err != nil {
- klog.Errorf("Get remote runtime typed version failed: %v", err)
- return nil, err
- }
- return typedVersion, nil
- }
- // Version returns the version information of the container runtime.
- func (m *kubeGenericRuntimeManager) Version() (kubecontainer.Version, error) {
- typedVersion, err := m.runtimeService.Version(kubeRuntimeAPIVersion)
- if err != nil {
- klog.Errorf("Get remote runtime version failed: %v", err)
- return nil, err
- }
- return newRuntimeVersion(typedVersion.RuntimeVersion)
- }
- // APIVersion returns the cached API version information of the container
- // runtime. Implementation is expected to update this cache periodically.
- // This may be different from the runtime engine's version.
- func (m *kubeGenericRuntimeManager) APIVersion() (kubecontainer.Version, error) {
- versionObject, err := m.versionCache.Get(m.machineInfo.MachineID)
- if err != nil {
- return nil, err
- }
- typedVersion := versionObject.(*runtimeapi.VersionResponse)
- return newRuntimeVersion(typedVersion.RuntimeApiVersion)
- }
- // Status returns the status of the runtime. An error is returned if the Status
- // function itself fails, nil otherwise.
- func (m *kubeGenericRuntimeManager) Status() (*kubecontainer.RuntimeStatus, error) {
- status, err := m.runtimeService.Status()
- if err != nil {
- return nil, err
- }
- return toKubeRuntimeStatus(status), nil
- }
- // GetPods returns a list of containers grouped by pods. The boolean parameter
- // specifies whether the runtime returns all containers including those already
- // exited and dead containers (used for garbage collection).
- func (m *kubeGenericRuntimeManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
- pods := make(map[kubetypes.UID]*kubecontainer.Pod)
- sandboxes, err := m.getKubeletSandboxes(all)
- if err != nil {
- return nil, err
- }
- for i := range sandboxes {
- s := sandboxes[i]
- if s.Metadata == nil {
- klog.V(4).Infof("Sandbox does not have metadata: %+v", s)
- continue
- }
- podUID := kubetypes.UID(s.Metadata.Uid)
- if _, ok := pods[podUID]; !ok {
- pods[podUID] = &kubecontainer.Pod{
- ID: podUID,
- Name: s.Metadata.Name,
- Namespace: s.Metadata.Namespace,
- }
- }
- p := pods[podUID]
- converted, err := m.sandboxToKubeContainer(s)
- if err != nil {
- klog.V(4).Infof("Convert %q sandbox %v of pod %q failed: %v", m.runtimeName, s, podUID, err)
- continue
- }
- p.Sandboxes = append(p.Sandboxes, converted)
- }
- containers, err := m.getKubeletContainers(all)
- if err != nil {
- return nil, err
- }
- for i := range containers {
- c := containers[i]
- if c.Metadata == nil {
- klog.V(4).Infof("Container does not have metadata: %+v", c)
- continue
- }
- labelledInfo := getContainerInfoFromLabels(c.Labels)
- pod, found := pods[labelledInfo.PodUID]
- if !found {
- pod = &kubecontainer.Pod{
- ID: labelledInfo.PodUID,
- Name: labelledInfo.PodName,
- Namespace: labelledInfo.PodNamespace,
- }
- pods[labelledInfo.PodUID] = pod
- }
- converted, err := m.toKubeContainer(c)
- if err != nil {
- klog.V(4).Infof("Convert %s container %v of pod %q failed: %v", m.runtimeName, c, labelledInfo.PodUID, err)
- continue
- }
- pod.Containers = append(pod.Containers, converted)
- }
- // Convert map to list.
- var result []*kubecontainer.Pod
- for _, pod := range pods {
- result = append(result, pod)
- }
- return result, nil
- }
- // containerToKillInfo contains necessary information to kill a container.
- type containerToKillInfo struct {
- // The spec of the container.
- container *v1.Container
- // The name of the container.
- name string
- // The message indicates why the container will be killed.
- message string
- }
- // podActions keeps information what to do for a pod.
- type podActions struct {
- // Stop all running (regular and init) containers and the sandbox for the pod.
- KillPod bool
- // Whether need to create a new sandbox. If needed to kill pod and create
- // a new pod sandbox, all init containers need to be purged (i.e., removed).
- CreateSandbox bool
- // The id of existing sandbox. It is used for starting containers in ContainersToStart.
- SandboxID string
- // The attempt number of creating sandboxes for the pod.
- Attempt uint32
- // The next init container to start.
- NextInitContainerToStart *v1.Container
- // ContainersToStart keeps a list of indexes for the containers to start,
- // where the index is the index of the specific container in the pod spec (
- // pod.Spec.Containers.
- ContainersToStart []int
- // ContainersToKill keeps a map of containers that need to be killed, note that
- // the key is the container ID of the container, while
- // the value contains necessary information to kill a container.
- ContainersToKill map[kubecontainer.ContainerID]containerToKillInfo
- }
- // podSandboxChanged checks whether the spec of the pod is changed and returns
- // (changed, new attempt, original sandboxID if exist).
- func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (bool, uint32, string) {
- if len(podStatus.SandboxStatuses) == 0 {
- klog.V(2).Infof("No sandbox for pod %q can be found. Need to start a new one", format.Pod(pod))
- return true, 0, ""
- }
- readySandboxCount := 0
- for _, s := range podStatus.SandboxStatuses {
- if s.State == runtimeapi.PodSandboxState_SANDBOX_READY {
- readySandboxCount++
- }
- }
- // Needs to create a new sandbox when readySandboxCount > 1 or the ready sandbox is not the latest one.
- sandboxStatus := podStatus.SandboxStatuses[0]
- if readySandboxCount > 1 {
- klog.V(2).Infof("More than 1 sandboxes for pod %q are ready. Need to reconcile them", format.Pod(pod))
- return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
- }
- if sandboxStatus.State != runtimeapi.PodSandboxState_SANDBOX_READY {
- klog.V(2).Infof("No ready sandbox for pod %q can be found. Need to start a new one", format.Pod(pod))
- return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
- }
- // Needs to create a new sandbox when network namespace changed.
- if sandboxStatus.GetLinux().GetNamespaces().GetOptions().GetNetwork() != networkNamespaceForPod(pod) {
- klog.V(2).Infof("Sandbox for pod %q has changed. Need to start a new one", format.Pod(pod))
- return true, sandboxStatus.Metadata.Attempt + 1, ""
- }
- // Needs to create a new sandbox when the sandbox does not have an IP address.
- if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network.Ip == "" {
- klog.V(2).Infof("Sandbox for pod %q has no IP address. Need to start a new one", format.Pod(pod))
- return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
- }
- return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
- }
- func containerChanged(container *v1.Container, containerStatus *kubecontainer.ContainerStatus) (uint64, uint64, bool) {
- expectedHash := kubecontainer.HashContainer(container)
- return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash
- }
- func shouldRestartOnFailure(pod *v1.Pod) bool {
- return pod.Spec.RestartPolicy != v1.RestartPolicyNever
- }
- func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool {
- cStatus := podStatus.FindContainerStatusByName(c.Name)
- if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning {
- return false
- }
- return cStatus.ExitCode == 0
- }
- // computePodActions checks whether the pod spec has changed and returns the changes if true.
- func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
- klog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod)
- createPodSandbox, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
- changes := podActions{
- KillPod: createPodSandbox,
- CreateSandbox: createPodSandbox,
- SandboxID: sandboxID,
- Attempt: attempt,
- ContainersToStart: []int{},
- ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo),
- }
- // If we need to (re-)create the pod sandbox, everything will need to be
- // killed and recreated, and init containers should be purged.
- if createPodSandbox {
- if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 {
- // Should not restart the pod, just return.
- // we should not create a sandbox for a pod if it is already done.
- // if all containers are done and should not be started, there is no need to create a new sandbox.
- // this stops confusing logs on pods whose containers all have exit codes, but we recreate a sandbox before terminating it.
- //
- // If ContainerStatuses is empty, we assume that we've never
- // successfully created any containers. In this case, we should
- // retry creating the sandbox.
- changes.CreateSandbox = false
- return changes
- }
- if len(pod.Spec.InitContainers) != 0 {
- // Pod has init containers, return the first one.
- changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
- return changes
- }
- // Start all containers by default but exclude the ones that succeeded if
- // RestartPolicy is OnFailure.
- for idx, c := range pod.Spec.Containers {
- if containerSucceeded(&c, podStatus) && pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
- continue
- }
- changes.ContainersToStart = append(changes.ContainersToStart, idx)
- }
- return changes
- }
- // Check initialization progress.
- initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
- if !done {
- if next != nil {
- initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
- if initFailed && !shouldRestartOnFailure(pod) {
- changes.KillPod = true
- } else {
- // Always try to stop containers in unknown state first.
- if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
- changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
- name: next.Name,
- container: next,
- message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
- initLastStatus.State),
- }
- }
- changes.NextInitContainerToStart = next
- }
- }
- // Initialization failed or still in progress. Skip inspecting non-init
- // containers.
- return changes
- }
- // Number of running containers to keep.
- keepCount := 0
- // check the status of containers.
- for idx, container := range pod.Spec.Containers {
- containerStatus := podStatus.FindContainerStatusByName(container.Name)
- // Call internal container post-stop lifecycle hook for any non-running container so that any
- // allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
- // to it.
- if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
- if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
- klog.Errorf("internal container post-stop lifecycle hook failed for container %v in pod %v with error %v",
- container.Name, pod.Name, err)
- }
- }
- // If container does not exist, or is not running, check whether we
- // need to restart it.
- if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
- if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
- message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
- klog.V(3).Infof(message)
- changes.ContainersToStart = append(changes.ContainersToStart, idx)
- if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
- // If container is in unknown state, we don't know whether it
- // is actually running or not, always try killing it before
- // restart to avoid having 2 running instances of the same container.
- changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
- name: containerStatus.Name,
- container: &pod.Spec.Containers[idx],
- message: fmt.Sprintf("Container is in %q state, try killing it before restart",
- containerStatus.State),
- }
- }
- }
- continue
- }
- // The container is running, but kill the container if any of the following condition is met.
- var message string
- restart := shouldRestartOnFailure(pod)
- if _, _, changed := containerChanged(&container, containerStatus); changed {
- message = fmt.Sprintf("Container %s definition changed", container.Name)
- // Restart regardless of the restart policy because the container
- // spec changed.
- restart = true
- } else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
- // If the container failed the liveness probe, we should kill it.
- message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
- } else {
- // Keep the container.
- keepCount++
- continue
- }
- // We need to kill the container, but if we also want to restart the
- // container afterwards, make the intent clear in the message. Also do
- // not kill the entire pod since we expect container to be running eventually.
- if restart {
- message = fmt.Sprintf("%s, will be restarted", message)
- changes.ContainersToStart = append(changes.ContainersToStart, idx)
- }
- changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
- name: containerStatus.Name,
- container: &pod.Spec.Containers[idx],
- message: message,
- }
- klog.V(2).Infof("Container %q (%q) of pod %s: %s", container.Name, containerStatus.ID, format.Pod(pod), message)
- }
- if keepCount == 0 && len(changes.ContainersToStart) == 0 {
- changes.KillPod = true
- }
- return changes
- }
- // SyncPod syncs the running pod into the desired pod by executing following steps:
- //
- // 1. Compute sandbox and container changes.
- // 2. Kill pod sandbox if necessary.
- // 3. Kill any containers that should not be running.
- // 4. Create sandbox if necessary.
- // 5. Create init containers.
- // 6. Create normal containers.
- func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
- // Step 1: Compute sandbox and container changes.
- podContainerChanges := m.computePodActions(pod, podStatus)
- klog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
- if podContainerChanges.CreateSandbox {
- ref, err := ref.GetReference(legacyscheme.Scheme, pod)
- if err != nil {
- klog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
- }
- if podContainerChanges.SandboxID != "" {
- m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
- } else {
- klog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
- }
- }
- // Step 2: Kill the pod if the sandbox has changed.
- if podContainerChanges.KillPod {
- if podContainerChanges.CreateSandbox {
- klog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
- } else {
- klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
- }
- killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
- result.AddPodSyncResult(killResult)
- if killResult.Error() != nil {
- klog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
- return
- }
- if podContainerChanges.CreateSandbox {
- m.purgeInitContainers(pod, podStatus)
- }
- } else {
- // Step 3: kill any running containers in this pod which are not to keep.
- for containerID, containerInfo := range podContainerChanges.ContainersToKill {
- klog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
- killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
- result.AddSyncResult(killContainerResult)
- if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
- killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
- klog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
- return
- }
- }
- }
- // Keep terminated init containers fairly aggressively controlled
- // This is an optimization because container removals are typically handled
- // by container garbage collector.
- m.pruneInitContainersBeforeStart(pod, podStatus)
- // We pass the value of the podIP down to generatePodSandboxConfig and
- // generateContainerConfig, which in turn passes it to various other
- // functions, in order to facilitate functionality that requires this
- // value (hosts file and downward API) and avoid races determining
- // the pod IP in cases where a container requires restart but the
- // podIP isn't in the status manager yet.
- //
- // We default to the IP in the passed-in pod status, and overwrite it if the
- // sandbox needs to be (re)started.
- podIP := ""
- if podStatus != nil {
- podIP = podStatus.IP
- }
- // Step 4: Create a sandbox for the pod if necessary.
- podSandboxID := podContainerChanges.SandboxID
- if podContainerChanges.CreateSandbox {
- var msg string
- var err error
- klog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
- createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
- result.AddSyncResult(createSandboxResult)
- podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
- if err != nil {
- createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
- klog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
- ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
- if referr != nil {
- klog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), referr)
- }
- m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox: %v", err)
- return
- }
- klog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))
- podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
- if err != nil {
- ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
- if referr != nil {
- klog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), referr)
- }
- m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
- klog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
- result.Fail(err)
- return
- }
- // If we ever allow updating a pod from non-host-network to
- // host-network, we may use a stale IP.
- if !kubecontainer.IsHostNetworkPod(pod) {
- // Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
- podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
- klog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
- }
- }
- // Get podSandboxConfig for containers to start.
- configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
- result.AddSyncResult(configPodSandboxResult)
- podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
- if err != nil {
- message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
- klog.Error(message)
- configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
- return
- }
- // Step 5: start the init container.
- if container := podContainerChanges.NextInitContainerToStart; container != nil {
- // Start the next init container.
- startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
- result.AddSyncResult(startContainerResult)
- isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
- if isInBackOff {
- startContainerResult.Fail(err, msg)
- klog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
- return
- }
- klog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
- if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
- startContainerResult.Fail(err, msg)
- utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
- return
- }
- // Successfully started the container; clear the entry in the failure
- klog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
- }
- // Step 6: start containers in podContainerChanges.ContainersToStart.
- for _, idx := range podContainerChanges.ContainersToStart {
- container := &pod.Spec.Containers[idx]
- startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
- result.AddSyncResult(startContainerResult)
- isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
- if isInBackOff {
- startContainerResult.Fail(err, msg)
- klog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
- continue
- }
- klog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
- if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
- startContainerResult.Fail(err, msg)
- // known errors that are logged in other places are logged at higher levels here to avoid
- // repetitive log spam
- switch {
- case err == images.ErrImagePullBackOff:
- klog.V(3).Infof("container start failed: %v: %s", err, msg)
- default:
- utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
- }
- continue
- }
- }
- return
- }
- // If a container is still in backoff, the function will return a brief backoff error and
- // a detailed error message.
- func (m *kubeGenericRuntimeManager) doBackOff(pod *v1.Pod, container *v1.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, string, error) {
- var cStatus *kubecontainer.ContainerStatus
- for _, c := range podStatus.ContainerStatuses {
- if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited {
- cStatus = c
- break
- }
- }
- if cStatus == nil {
- return false, "", nil
- }
- klog.V(3).Infof("checking backoff for container %q in pod %q", container.Name, format.Pod(pod))
- // Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
- ts := cStatus.FinishedAt
- // backOff requires a unique key to identify the container.
- key := getStableKey(pod, container)
- if backOff.IsInBackOffSince(key, ts) {
- if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
- m.recorder.Eventf(ref, v1.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed container")
- }
- err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(key), container.Name, format.Pod(pod))
- klog.V(3).Infof("%s", err.Error())
- return true, err.Error(), kubecontainer.ErrCrashLoopBackOff
- }
- backOff.Next(key, ts)
- return false, "", nil
- }
- // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
- // gracePeriodOverride if specified allows the caller to override the pod default grace period.
- // only hard kill paths are allowed to specify a gracePeriodOverride in the kubelet in order to not corrupt user data.
- // it is useful when doing SIGKILL for hard eviction scenarios, or max grace period during soft eviction scenarios.
- func (m *kubeGenericRuntimeManager) KillPod(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
- err := m.killPodWithSyncResult(pod, runningPod, gracePeriodOverride)
- return err.Error()
- }
- // killPodWithSyncResult kills a runningPod and returns SyncResult.
- // Note: The pod passed in could be *nil* when kubelet restarted.
- func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
- killContainerResults := m.killContainersWithSyncResult(pod, runningPod, gracePeriodOverride)
- for _, containerResult := range killContainerResults {
- result.AddSyncResult(containerResult)
- }
- // stop sandbox, the sandbox will be removed in GarbageCollect
- killSandboxResult := kubecontainer.NewSyncResult(kubecontainer.KillPodSandbox, runningPod.ID)
- result.AddSyncResult(killSandboxResult)
- // Stop all sandboxes belongs to same pod
- for _, podSandbox := range runningPod.Sandboxes {
- if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil {
- killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error())
- klog.Errorf("Failed to stop sandbox %q", podSandbox.ID)
- }
- }
- return
- }
- // GetPodStatus retrieves the status of the pod, including the
- // information of all containers in the pod that are visible in Runtime.
- func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) {
- // Now we retain restart count of container as a container label. Each time a container
- // restarts, pod will read the restart count from the registered dead container, increment
- // it to get the new restart count, and then add a label with the new restart count on
- // the newly started container.
- // However, there are some limitations of this method:
- // 1. When all dead containers were garbage collected, the container status could
- // not get the historical value and would be *inaccurate*. Fortunately, the chance
- // is really slim.
- // 2. When working with old version containers which have no restart count label,
- // we can only assume their restart count is 0.
- // Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
- // these limitations now.
- // TODO: move this comment to SyncPod.
- podSandboxIDs, err := m.getSandboxIDByPodUID(uid, nil)
- if err != nil {
- return nil, err
- }
- podFullName := format.Pod(&v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: name,
- Namespace: namespace,
- UID: uid,
- },
- })
- klog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q", podSandboxIDs, podFullName)
- sandboxStatuses := make([]*runtimeapi.PodSandboxStatus, len(podSandboxIDs))
- podIP := ""
- for idx, podSandboxID := range podSandboxIDs {
- podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
- if err != nil {
- klog.Errorf("PodSandboxStatus of sandbox %q for pod %q error: %v", podSandboxID, podFullName, err)
- return nil, err
- }
- sandboxStatuses[idx] = podSandboxStatus
- // Only get pod IP from latest sandbox
- if idx == 0 && podSandboxStatus.State == runtimeapi.PodSandboxState_SANDBOX_READY {
- podIP = m.determinePodSandboxIP(namespace, name, podSandboxStatus)
- }
- }
- // Get statuses of all containers visible in the pod.
- containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace)
- if err != nil {
- if m.logReduction.ShouldMessageBePrinted(err.Error(), podFullName) {
- klog.Errorf("getPodContainerStatuses for pod %q failed: %v", podFullName, err)
- }
- return nil, err
- }
- m.logReduction.ClearID(podFullName)
- return &kubecontainer.PodStatus{
- ID: uid,
- Name: name,
- Namespace: namespace,
- IP: podIP,
- SandboxStatuses: sandboxStatuses,
- ContainerStatuses: containerStatuses,
- }, nil
- }
- // GarbageCollect removes dead containers using the specified container gc policy.
- func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
- return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods)
- }
- // UpdatePodCIDR is just a passthrough method to update the runtimeConfig of the shim
- // with the podCIDR supplied by the kubelet.
- func (m *kubeGenericRuntimeManager) UpdatePodCIDR(podCIDR string) error {
- // TODO(#35531): do we really want to write a method on this manager for each
- // field of the config?
- klog.Infof("updating runtime config through cri with podcidr %v", podCIDR)
- return m.runtimeService.UpdateRuntimeConfig(
- &runtimeapi.RuntimeConfig{
- NetworkConfig: &runtimeapi.NetworkConfig{
- PodCidr: podCIDR,
- },
- })
- }
|