kuberuntime_container.go 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kuberuntime
  14. import (
  15. "context"
  16. "errors"
  17. "fmt"
  18. "io"
  19. "math/rand"
  20. "net/url"
  21. "os"
  22. "path/filepath"
  23. goruntime "runtime"
  24. "sort"
  25. "strings"
  26. "sync"
  27. "time"
  28. grpcstatus "google.golang.org/grpc/status"
  29. "github.com/armon/circbuf"
  30. "k8s.io/klog"
  31. v1 "k8s.io/api/core/v1"
  32. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  33. kubetypes "k8s.io/apimachinery/pkg/types"
  34. utilruntime "k8s.io/apimachinery/pkg/util/runtime"
  35. "k8s.io/apimachinery/pkg/util/sets"
  36. utilfeature "k8s.io/apiserver/pkg/util/feature"
  37. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  38. "k8s.io/kubernetes/pkg/features"
  39. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  40. "k8s.io/kubernetes/pkg/kubelet/events"
  41. "k8s.io/kubernetes/pkg/kubelet/types"
  42. "k8s.io/kubernetes/pkg/kubelet/util/format"
  43. "k8s.io/kubernetes/pkg/util/selinux"
  44. "k8s.io/kubernetes/pkg/util/tail"
  45. volumeutil "k8s.io/kubernetes/pkg/volume/util"
  46. )
  47. var (
  48. // ErrCreateContainerConfig - failed to create container config
  49. ErrCreateContainerConfig = errors.New("CreateContainerConfigError")
  50. // ErrCreateContainer - failed to create container
  51. ErrCreateContainer = errors.New("CreateContainerError")
  52. // ErrPreStartHook - failed to execute PreStartHook
  53. ErrPreStartHook = errors.New("PreStartHookError")
  54. // ErrPostStartHook - failed to execute PostStartHook
  55. ErrPostStartHook = errors.New("PostStartHookError")
  56. )
  57. // recordContainerEvent should be used by the runtime manager for all container related events.
  58. // it has sanity checks to ensure that we do not write events that can abuse our masters.
  59. // in particular, it ensures that a containerID never appears in an event message as that
  60. // is prone to causing a lot of distinct events that do not count well.
  61. // it replaces any reference to a containerID with the containerName which is stable, and is what users know.
  62. func (m *kubeGenericRuntimeManager) recordContainerEvent(pod *v1.Pod, container *v1.Container, containerID, eventType, reason, message string, args ...interface{}) {
  63. ref, err := kubecontainer.GenerateContainerRef(pod, container)
  64. if err != nil {
  65. klog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
  66. return
  67. }
  68. eventMessage := message
  69. if len(args) > 0 {
  70. eventMessage = fmt.Sprintf(message, args...)
  71. }
  72. // this is a hack, but often the error from the runtime includes the containerID
  73. // which kills our ability to deduplicate events. this protection makes a huge
  74. // difference in the number of unique events
  75. if containerID != "" {
  76. eventMessage = strings.Replace(eventMessage, containerID, container.Name, -1)
  77. }
  78. m.recorder.Event(ref, eventType, reason, eventMessage)
  79. }
  80. // startSpec wraps the spec required to start a container, either a regular/init container
  81. // or an ephemeral container. Ephemeral containers contain all the fields of regular/init
  82. // containers, plus some additional fields. In both cases startSpec.container will be set.
  83. type startSpec struct {
  84. container *v1.Container
  85. ephemeralContainer *v1.EphemeralContainer
  86. }
  87. func containerStartSpec(c *v1.Container) *startSpec {
  88. return &startSpec{container: c}
  89. }
  90. func ephemeralContainerStartSpec(ec *v1.EphemeralContainer) *startSpec {
  91. return &startSpec{
  92. container: (*v1.Container)(&ec.EphemeralContainerCommon),
  93. ephemeralContainer: ec,
  94. }
  95. }
  96. // getTargetID returns the kubecontainer.ContainerID for ephemeral container namespace
  97. // targeting. The target is stored as EphemeralContainer.TargetContainerName, which must be
  98. // resolved to a ContainerID using podStatus. The target container must already exist, which
  99. // usually isn't a problem since ephemeral containers aren't allowed at pod creation time.
  100. // This always returns nil when the EphemeralContainers feature is disabled.
  101. func (s *startSpec) getTargetID(podStatus *kubecontainer.PodStatus) (*kubecontainer.ContainerID, error) {
  102. if s.ephemeralContainer == nil || s.ephemeralContainer.TargetContainerName == "" || !utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
  103. return nil, nil
  104. }
  105. targetStatus := podStatus.FindContainerStatusByName(s.ephemeralContainer.TargetContainerName)
  106. if targetStatus == nil {
  107. return nil, fmt.Errorf("unable to find target container %v", s.ephemeralContainer.TargetContainerName)
  108. }
  109. return &targetStatus.ID, nil
  110. }
  111. // startContainer starts a container and returns a message indicates why it is failed on error.
  112. // It starts the container through the following steps:
  113. // * pull the image
  114. // * create the container
  115. // * start the container
  116. // * run the post start lifecycle hooks (if applicable)
  117. func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, spec *startSpec, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string, podIPs []string) (string, error) {
  118. container := spec.container
  119. // Step 1: pull the image.
  120. imageRef, msg, err := m.imagePuller.EnsureImageExists(pod, container, pullSecrets, podSandboxConfig)
  121. if err != nil {
  122. s, _ := grpcstatus.FromError(err)
  123. m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message())
  124. return msg, err
  125. }
  126. // Step 2: create the container.
  127. ref, err := kubecontainer.GenerateContainerRef(pod, container)
  128. if err != nil {
  129. klog.Errorf("Can't make a ref to pod %q, container %v: %v", format.Pod(pod), container.Name, err)
  130. }
  131. klog.V(4).Infof("Generating ref for container %s: %#v", container.Name, ref)
  132. // For a new container, the RestartCount should be 0
  133. restartCount := 0
  134. containerStatus := podStatus.FindContainerStatusByName(container.Name)
  135. if containerStatus != nil {
  136. restartCount = containerStatus.RestartCount + 1
  137. }
  138. target, err := spec.getTargetID(podStatus)
  139. if err != nil {
  140. s, _ := grpcstatus.FromError(err)
  141. m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message())
  142. return s.Message(), ErrCreateContainerConfig
  143. }
  144. containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef, podIPs, target)
  145. if cleanupAction != nil {
  146. defer cleanupAction()
  147. }
  148. if err != nil {
  149. s, _ := grpcstatus.FromError(err)
  150. m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message())
  151. return s.Message(), ErrCreateContainerConfig
  152. }
  153. containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
  154. if err != nil {
  155. s, _ := grpcstatus.FromError(err)
  156. m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", s.Message())
  157. return s.Message(), ErrCreateContainer
  158. }
  159. err = m.internalLifecycle.PreStartContainer(pod, container, containerID)
  160. if err != nil {
  161. s, _ := grpcstatus.FromError(err)
  162. m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", s.Message())
  163. return s.Message(), ErrPreStartHook
  164. }
  165. m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, fmt.Sprintf("Created container %s", container.Name))
  166. if ref != nil {
  167. m.containerRefManager.SetRef(kubecontainer.ContainerID{
  168. Type: m.runtimeName,
  169. ID: containerID,
  170. }, ref)
  171. }
  172. // Step 3: start the container.
  173. err = m.runtimeService.StartContainer(containerID)
  174. if err != nil {
  175. s, _ := grpcstatus.FromError(err)
  176. m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", s.Message())
  177. return s.Message(), kubecontainer.ErrRunContainer
  178. }
  179. m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, fmt.Sprintf("Started container %s", container.Name))
  180. // Symlink container logs to the legacy container log location for cluster logging
  181. // support.
  182. // TODO(random-liu): Remove this after cluster logging supports CRI container log path.
  183. containerMeta := containerConfig.GetMetadata()
  184. sandboxMeta := podSandboxConfig.GetMetadata()
  185. legacySymlink := legacyLogSymlink(containerID, containerMeta.Name, sandboxMeta.Name,
  186. sandboxMeta.Namespace)
  187. containerLog := filepath.Join(podSandboxConfig.LogDirectory, containerConfig.LogPath)
  188. // only create legacy symlink if containerLog path exists (or the error is not IsNotExist).
  189. // Because if containerLog path does not exist, only dandling legacySymlink is created.
  190. // This dangling legacySymlink is later removed by container gc, so it does not make sense
  191. // to create it in the first place. it happens when journald logging driver is used with docker.
  192. if _, err := m.osInterface.Stat(containerLog); !os.IsNotExist(err) {
  193. if err := m.osInterface.Symlink(containerLog, legacySymlink); err != nil {
  194. klog.Errorf("Failed to create legacy symbolic link %q to container %q log %q: %v",
  195. legacySymlink, containerID, containerLog, err)
  196. }
  197. }
  198. // Step 4: execute the post start hook.
  199. if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
  200. kubeContainerID := kubecontainer.ContainerID{
  201. Type: m.runtimeName,
  202. ID: containerID,
  203. }
  204. msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
  205. if handlerErr != nil {
  206. m.recordContainerEvent(pod, container, kubeContainerID.ID, v1.EventTypeWarning, events.FailedPostStartHook, msg)
  207. if err := m.killContainer(pod, kubeContainerID, container.Name, "FailedPostStartHook", nil); err != nil {
  208. klog.Errorf("Failed to kill container %q(id=%q) in pod %q: %v, %v",
  209. container.Name, kubeContainerID.String(), format.Pod(pod), ErrPostStartHook, err)
  210. }
  211. return msg, fmt.Errorf("%s: %v", ErrPostStartHook, handlerErr)
  212. }
  213. }
  214. return "", nil
  215. }
  216. // generateContainerConfig generates container config for kubelet runtime v1.
  217. func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string, podIPs []string, nsTarget *kubecontainer.ContainerID) (*runtimeapi.ContainerConfig, func(), error) {
  218. opts, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP, podIPs)
  219. if err != nil {
  220. return nil, nil, err
  221. }
  222. uid, username, err := m.getImageUser(container.Image)
  223. if err != nil {
  224. return nil, cleanupAction, err
  225. }
  226. // Verify RunAsNonRoot. Non-root verification only supports numeric user.
  227. if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil {
  228. return nil, cleanupAction, err
  229. }
  230. command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
  231. logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name)
  232. err = m.osInterface.MkdirAll(logDir, 0755)
  233. if err != nil {
  234. return nil, cleanupAction, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err)
  235. }
  236. containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
  237. restartCountUint32 := uint32(restartCount)
  238. config := &runtimeapi.ContainerConfig{
  239. Metadata: &runtimeapi.ContainerMetadata{
  240. Name: container.Name,
  241. Attempt: restartCountUint32,
  242. },
  243. Image: &runtimeapi.ImageSpec{Image: imageRef},
  244. Command: command,
  245. Args: args,
  246. WorkingDir: container.WorkingDir,
  247. Labels: newContainerLabels(container, pod),
  248. Annotations: newContainerAnnotations(container, pod, restartCount, opts),
  249. Devices: makeDevices(opts),
  250. Mounts: m.makeMounts(opts, container),
  251. LogPath: containerLogsPath,
  252. Stdin: container.Stdin,
  253. StdinOnce: container.StdinOnce,
  254. Tty: container.TTY,
  255. }
  256. // set platform specific configurations.
  257. if err := m.applyPlatformSpecificContainerConfig(config, container, pod, uid, username, nsTarget); err != nil {
  258. return nil, cleanupAction, err
  259. }
  260. // set environment variables
  261. envs := make([]*runtimeapi.KeyValue, len(opts.Envs))
  262. for idx := range opts.Envs {
  263. e := opts.Envs[idx]
  264. envs[idx] = &runtimeapi.KeyValue{
  265. Key: e.Name,
  266. Value: e.Value,
  267. }
  268. }
  269. config.Envs = envs
  270. return config, cleanupAction, nil
  271. }
  272. // makeDevices generates container devices for kubelet runtime v1.
  273. func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device {
  274. devices := make([]*runtimeapi.Device, len(opts.Devices))
  275. for idx := range opts.Devices {
  276. device := opts.Devices[idx]
  277. devices[idx] = &runtimeapi.Device{
  278. HostPath: device.PathOnHost,
  279. ContainerPath: device.PathInContainer,
  280. Permissions: device.Permissions,
  281. }
  282. }
  283. return devices
  284. }
  285. // makeMounts generates container volume mounts for kubelet runtime v1.
  286. func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *v1.Container) []*runtimeapi.Mount {
  287. volumeMounts := []*runtimeapi.Mount{}
  288. for idx := range opts.Mounts {
  289. v := opts.Mounts[idx]
  290. selinuxRelabel := v.SELinuxRelabel && selinux.SELinuxEnabled()
  291. mount := &runtimeapi.Mount{
  292. HostPath: v.HostPath,
  293. ContainerPath: v.ContainerPath,
  294. Readonly: v.ReadOnly,
  295. SelinuxRelabel: selinuxRelabel,
  296. Propagation: v.Propagation,
  297. }
  298. volumeMounts = append(volumeMounts, mount)
  299. }
  300. // The reason we create and mount the log file in here (not in kubelet) is because
  301. // the file's location depends on the ID of the container, and we need to create and
  302. // mount the file before actually starting the container.
  303. // we can only mount individual files (e.g.: /etc/hosts, termination-log files) on Windows only if we're using Containerd.
  304. supportsSingleFileMapping := m.SupportsSingleFileMapping()
  305. if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 && supportsSingleFileMapping {
  306. // Because the PodContainerDir contains pod uid and container name which is unique enough,
  307. // here we just add a random id to make the path unique for different instances
  308. // of the same container.
  309. cid := makeUID()
  310. containerLogPath := filepath.Join(opts.PodContainerDir, cid)
  311. fs, err := m.osInterface.Create(containerLogPath)
  312. if err != nil {
  313. utilruntime.HandleError(fmt.Errorf("error on creating termination-log file %q: %v", containerLogPath, err))
  314. } else {
  315. fs.Close()
  316. // Chmod is needed because ioutil.WriteFile() ends up calling
  317. // open(2) to create the file, so the final mode used is "mode &
  318. // ~umask". But we want to make sure the specified mode is used
  319. // in the file no matter what the umask is.
  320. if err := m.osInterface.Chmod(containerLogPath, 0666); err != nil {
  321. utilruntime.HandleError(fmt.Errorf("unable to set termination-log file permissions %q: %v", containerLogPath, err))
  322. }
  323. // Volume Mounts fail on Windows if it is not of the form C:/
  324. containerLogPath = volumeutil.MakeAbsolutePath(goruntime.GOOS, containerLogPath)
  325. terminationMessagePath := volumeutil.MakeAbsolutePath(goruntime.GOOS, container.TerminationMessagePath)
  326. selinuxRelabel := selinux.SELinuxEnabled()
  327. volumeMounts = append(volumeMounts, &runtimeapi.Mount{
  328. HostPath: containerLogPath,
  329. ContainerPath: terminationMessagePath,
  330. SelinuxRelabel: selinuxRelabel,
  331. })
  332. }
  333. }
  334. return volumeMounts
  335. }
  336. // getKubeletContainers lists containers managed by kubelet.
  337. // The boolean parameter specifies whether returns all containers including
  338. // those already exited and dead containers (used for garbage collection).
  339. func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([]*runtimeapi.Container, error) {
  340. filter := &runtimeapi.ContainerFilter{}
  341. if !allContainers {
  342. filter.State = &runtimeapi.ContainerStateValue{
  343. State: runtimeapi.ContainerState_CONTAINER_RUNNING,
  344. }
  345. }
  346. containers, err := m.runtimeService.ListContainers(filter)
  347. if err != nil {
  348. klog.Errorf("getKubeletContainers failed: %v", err)
  349. return nil, err
  350. }
  351. return containers, nil
  352. }
  353. // makeUID returns a randomly generated string.
  354. func makeUID() string {
  355. return fmt.Sprintf("%08x", rand.Uint32())
  356. }
  357. // getTerminationMessage looks on the filesystem for the provided termination message path, returning a limited
  358. // amount of those bytes, or returns true if the logs should be checked.
  359. func getTerminationMessage(status *runtimeapi.ContainerStatus, terminationMessagePath string, fallbackToLogs bool) (string, bool) {
  360. if len(terminationMessagePath) == 0 {
  361. return "", fallbackToLogs
  362. }
  363. // Volume Mounts fail on Windows if it is not of the form C:/
  364. terminationMessagePath = volumeutil.MakeAbsolutePath(goruntime.GOOS, terminationMessagePath)
  365. for _, mount := range status.Mounts {
  366. if mount.ContainerPath != terminationMessagePath {
  367. continue
  368. }
  369. path := mount.HostPath
  370. data, _, err := tail.ReadAtMost(path, kubecontainer.MaxContainerTerminationMessageLength)
  371. if err != nil {
  372. if os.IsNotExist(err) {
  373. return "", fallbackToLogs
  374. }
  375. return fmt.Sprintf("Error on reading termination log %s: %v", path, err), false
  376. }
  377. return string(data), (fallbackToLogs && len(data) == 0)
  378. }
  379. return "", fallbackToLogs
  380. }
  381. // readLastStringFromContainerLogs attempts to read up to the max log length from the end of the CRI log represented
  382. // by path. It reads up to max log lines.
  383. func (m *kubeGenericRuntimeManager) readLastStringFromContainerLogs(path string) string {
  384. value := int64(kubecontainer.MaxContainerTerminationMessageLogLines)
  385. buf, _ := circbuf.NewBuffer(kubecontainer.MaxContainerTerminationMessageLogLength)
  386. if err := m.ReadLogs(context.Background(), path, "", &v1.PodLogOptions{TailLines: &value}, buf, buf); err != nil {
  387. return fmt.Sprintf("Error on reading termination message from logs: %v", err)
  388. }
  389. return buf.String()
  390. }
  391. // getPodContainerStatuses gets all containers' statuses for the pod.
  392. func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, name, namespace string) ([]*kubecontainer.ContainerStatus, error) {
  393. // Select all containers of the given pod.
  394. containers, err := m.runtimeService.ListContainers(&runtimeapi.ContainerFilter{
  395. LabelSelector: map[string]string{types.KubernetesPodUIDLabel: string(uid)},
  396. })
  397. if err != nil {
  398. klog.Errorf("ListContainers error: %v", err)
  399. return nil, err
  400. }
  401. statuses := make([]*kubecontainer.ContainerStatus, len(containers))
  402. // TODO: optimization: set maximum number of containers per container name to examine.
  403. for i, c := range containers {
  404. status, err := m.runtimeService.ContainerStatus(c.Id)
  405. if err != nil {
  406. // Merely log this here; GetPodStatus will actually report the error out.
  407. klog.V(4).Infof("ContainerStatus for %s error: %v", c.Id, err)
  408. return nil, err
  409. }
  410. cStatus := toKubeContainerStatus(status, m.runtimeName)
  411. if status.State == runtimeapi.ContainerState_CONTAINER_EXITED {
  412. // Populate the termination message if needed.
  413. annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
  414. // If a container cannot even be started, it certainly does not have logs, so no need to fallbackToLogs.
  415. fallbackToLogs := annotatedInfo.TerminationMessagePolicy == v1.TerminationMessageFallbackToLogsOnError &&
  416. cStatus.ExitCode != 0 && cStatus.Reason != "ContainerCannotRun"
  417. tMessage, checkLogs := getTerminationMessage(status, annotatedInfo.TerminationMessagePath, fallbackToLogs)
  418. if checkLogs {
  419. // if dockerLegacyService is populated, we're supposed to use it to fetch logs
  420. if m.legacyLogProvider != nil {
  421. tMessage, err = m.legacyLogProvider.GetContainerLogTail(uid, name, namespace, kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id})
  422. if err != nil {
  423. tMessage = fmt.Sprintf("Error reading termination message from logs: %v", err)
  424. }
  425. } else {
  426. tMessage = m.readLastStringFromContainerLogs(status.GetLogPath())
  427. }
  428. }
  429. // Enrich the termination message written by the application is not empty
  430. if len(tMessage) != 0 {
  431. if len(cStatus.Message) != 0 {
  432. cStatus.Message += ": "
  433. }
  434. cStatus.Message += tMessage
  435. }
  436. }
  437. statuses[i] = cStatus
  438. }
  439. sort.Sort(containerStatusByCreated(statuses))
  440. return statuses, nil
  441. }
  442. func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.ContainerStatus {
  443. annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
  444. labeledInfo := getContainerInfoFromLabels(status.Labels)
  445. cStatus := &kubecontainer.ContainerStatus{
  446. ID: kubecontainer.ContainerID{
  447. Type: runtimeName,
  448. ID: status.Id,
  449. },
  450. Name: labeledInfo.ContainerName,
  451. Image: status.Image.Image,
  452. ImageID: status.ImageRef,
  453. Hash: annotatedInfo.Hash,
  454. RestartCount: annotatedInfo.RestartCount,
  455. State: toKubeContainerState(status.State),
  456. CreatedAt: time.Unix(0, status.CreatedAt),
  457. }
  458. if status.State != runtimeapi.ContainerState_CONTAINER_CREATED {
  459. // If container is not in the created state, we have tried and
  460. // started the container. Set the StartedAt time.
  461. cStatus.StartedAt = time.Unix(0, status.StartedAt)
  462. }
  463. if status.State == runtimeapi.ContainerState_CONTAINER_EXITED {
  464. cStatus.Reason = status.Reason
  465. cStatus.Message = status.Message
  466. cStatus.ExitCode = int(status.ExitCode)
  467. cStatus.FinishedAt = time.Unix(0, status.FinishedAt)
  468. }
  469. return cStatus
  470. }
  471. // executePreStopHook runs the pre-stop lifecycle hooks if applicable and returns the duration it takes.
  472. func (m *kubeGenericRuntimeManager) executePreStopHook(pod *v1.Pod, containerID kubecontainer.ContainerID, containerSpec *v1.Container, gracePeriod int64) int64 {
  473. klog.V(3).Infof("Running preStop hook for container %q", containerID.String())
  474. start := metav1.Now()
  475. done := make(chan struct{})
  476. go func() {
  477. defer close(done)
  478. defer utilruntime.HandleCrash()
  479. if msg, err := m.runner.Run(containerID, pod, containerSpec, containerSpec.Lifecycle.PreStop); err != nil {
  480. klog.Errorf("preStop hook for container %q failed: %v", containerSpec.Name, err)
  481. m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeWarning, events.FailedPreStopHook, msg)
  482. }
  483. }()
  484. select {
  485. case <-time.After(time.Duration(gracePeriod) * time.Second):
  486. klog.V(2).Infof("preStop hook for container %q did not complete in %d seconds", containerID, gracePeriod)
  487. case <-done:
  488. klog.V(3).Infof("preStop hook for container %q completed", containerID)
  489. }
  490. return int64(metav1.Now().Sub(start.Time).Seconds())
  491. }
  492. // restoreSpecsFromContainerLabels restores all information needed for killing a container. In some
  493. // case we may not have pod and container spec when killing a container, e.g. pod is deleted during
  494. // kubelet restart.
  495. // To solve this problem, we've already written necessary information into container labels. Here we
  496. // just need to retrieve them from container labels and restore the specs.
  497. // TODO(random-liu): Add a node e2e test to test this behaviour.
  498. // TODO(random-liu): Change the lifecycle handler to just accept information needed, so that we can
  499. // just pass the needed function not create the fake object.
  500. func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(containerID kubecontainer.ContainerID) (*v1.Pod, *v1.Container, error) {
  501. var pod *v1.Pod
  502. var container *v1.Container
  503. s, err := m.runtimeService.ContainerStatus(containerID.ID)
  504. if err != nil {
  505. return nil, nil, err
  506. }
  507. l := getContainerInfoFromLabels(s.Labels)
  508. a := getContainerInfoFromAnnotations(s.Annotations)
  509. // Notice that the followings are not full spec. The container killing code should not use
  510. // un-restored fields.
  511. pod = &v1.Pod{
  512. ObjectMeta: metav1.ObjectMeta{
  513. UID: l.PodUID,
  514. Name: l.PodName,
  515. Namespace: l.PodNamespace,
  516. DeletionGracePeriodSeconds: a.PodDeletionGracePeriod,
  517. },
  518. Spec: v1.PodSpec{
  519. TerminationGracePeriodSeconds: a.PodTerminationGracePeriod,
  520. },
  521. }
  522. container = &v1.Container{
  523. Name: l.ContainerName,
  524. Ports: a.ContainerPorts,
  525. TerminationMessagePath: a.TerminationMessagePath,
  526. }
  527. if a.PreStopHandler != nil {
  528. container.Lifecycle = &v1.Lifecycle{
  529. PreStop: a.PreStopHandler,
  530. }
  531. }
  532. return pod, container, nil
  533. }
  534. // killContainer kills a container through the following steps:
  535. // * Run the pre-stop lifecycle hooks (if applicable).
  536. // * Stop the container.
  537. func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, gracePeriodOverride *int64) error {
  538. var containerSpec *v1.Container
  539. if pod != nil {
  540. if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil {
  541. return fmt.Errorf("failed to get containerSpec %q(id=%q) in pod %q when killing container for reason %q",
  542. containerName, containerID.String(), format.Pod(pod), message)
  543. }
  544. } else {
  545. // Restore necessary information if one of the specs is nil.
  546. restoredPod, restoredContainer, err := m.restoreSpecsFromContainerLabels(containerID)
  547. if err != nil {
  548. return err
  549. }
  550. pod, containerSpec = restoredPod, restoredContainer
  551. }
  552. // From this point, pod and container must be non-nil.
  553. gracePeriod := int64(minimumGracePeriodInSeconds)
  554. switch {
  555. case pod.DeletionGracePeriodSeconds != nil:
  556. gracePeriod = *pod.DeletionGracePeriodSeconds
  557. case pod.Spec.TerminationGracePeriodSeconds != nil:
  558. gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
  559. }
  560. if len(message) == 0 {
  561. message = fmt.Sprintf("Stopping container %s", containerSpec.Name)
  562. }
  563. m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
  564. // Run internal pre-stop lifecycle hook
  565. if err := m.internalLifecycle.PreStopContainer(containerID.ID); err != nil {
  566. return err
  567. }
  568. // Run the pre-stop lifecycle hooks if applicable and if there is enough time to run it
  569. if containerSpec.Lifecycle != nil && containerSpec.Lifecycle.PreStop != nil && gracePeriod > 0 {
  570. gracePeriod = gracePeriod - m.executePreStopHook(pod, containerID, containerSpec, gracePeriod)
  571. }
  572. // always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
  573. if gracePeriod < minimumGracePeriodInSeconds {
  574. gracePeriod = minimumGracePeriodInSeconds
  575. }
  576. if gracePeriodOverride != nil {
  577. gracePeriod = *gracePeriodOverride
  578. klog.V(3).Infof("Killing container %q, but using %d second grace period override", containerID, gracePeriod)
  579. }
  580. klog.V(2).Infof("Killing container %q with %d second grace period", containerID.String(), gracePeriod)
  581. err := m.runtimeService.StopContainer(containerID.ID, gracePeriod)
  582. if err != nil {
  583. klog.Errorf("Container %q termination failed with gracePeriod %d: %v", containerID.String(), gracePeriod, err)
  584. } else {
  585. klog.V(3).Infof("Container %q exited normally", containerID.String())
  586. }
  587. m.containerRefManager.ClearRef(containerID)
  588. return err
  589. }
  590. // killContainersWithSyncResult kills all pod's containers with sync results.
  591. func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (syncResults []*kubecontainer.SyncResult) {
  592. containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers))
  593. wg := sync.WaitGroup{}
  594. wg.Add(len(runningPod.Containers))
  595. for _, container := range runningPod.Containers {
  596. go func(container *kubecontainer.Container) {
  597. defer utilruntime.HandleCrash()
  598. defer wg.Done()
  599. killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
  600. if err := m.killContainer(pod, container.ID, container.Name, "", gracePeriodOverride); err != nil {
  601. killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
  602. }
  603. containerResults <- killContainerResult
  604. }(container)
  605. }
  606. wg.Wait()
  607. close(containerResults)
  608. for containerResult := range containerResults {
  609. syncResults = append(syncResults, containerResult)
  610. }
  611. return
  612. }
  613. // pruneInitContainersBeforeStart ensures that before we begin creating init
  614. // containers, we have reduced the number of outstanding init containers still
  615. // present. This reduces load on the container garbage collector by only
  616. // preserving the most recent terminated init container.
  617. func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus) {
  618. // only the last execution of each init container should be preserved, and only preserve it if it is in the
  619. // list of init containers to keep.
  620. initContainerNames := sets.NewString()
  621. for _, container := range pod.Spec.InitContainers {
  622. initContainerNames.Insert(container.Name)
  623. }
  624. for name := range initContainerNames {
  625. count := 0
  626. for _, status := range podStatus.ContainerStatuses {
  627. if status.Name != name ||
  628. (status.State != kubecontainer.ContainerStateExited &&
  629. status.State != kubecontainer.ContainerStateUnknown) {
  630. continue
  631. }
  632. // Remove init containers in unknown state. It should have
  633. // been stopped before pruneInitContainersBeforeStart is
  634. // called.
  635. count++
  636. // keep the first init container for this name
  637. if count == 1 {
  638. continue
  639. }
  640. // prune all other init containers that match this container name
  641. klog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
  642. if err := m.removeContainer(status.ID.ID); err != nil {
  643. utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
  644. continue
  645. }
  646. // remove any references to this container
  647. if _, ok := m.containerRefManager.GetRef(status.ID); ok {
  648. m.containerRefManager.ClearRef(status.ID)
  649. } else {
  650. klog.Warningf("No ref for container %q", status.ID)
  651. }
  652. }
  653. }
  654. }
  655. // Remove all init containres. Note that this function does not check the state
  656. // of the container because it assumes all init containers have been stopped
  657. // before the call happens.
  658. func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *kubecontainer.PodStatus) {
  659. initContainerNames := sets.NewString()
  660. for _, container := range pod.Spec.InitContainers {
  661. initContainerNames.Insert(container.Name)
  662. }
  663. for name := range initContainerNames {
  664. count := 0
  665. for _, status := range podStatus.ContainerStatuses {
  666. if status.Name != name {
  667. continue
  668. }
  669. count++
  670. // Purge all init containers that match this container name
  671. klog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
  672. if err := m.removeContainer(status.ID.ID); err != nil {
  673. utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
  674. continue
  675. }
  676. // Remove any references to this container
  677. if _, ok := m.containerRefManager.GetRef(status.ID); ok {
  678. m.containerRefManager.ClearRef(status.ID)
  679. } else {
  680. klog.Warningf("No ref for container %q", status.ID)
  681. }
  682. }
  683. }
  684. }
  685. // findNextInitContainerToRun returns the status of the last failed container, the
  686. // index of next init container to start, or done if there are no further init containers.
  687. // Status is only returned if an init container is failed, in which case next will
  688. // point to the current container.
  689. func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) {
  690. if len(pod.Spec.InitContainers) == 0 {
  691. return nil, nil, true
  692. }
  693. // If there are failed containers, return the status of the last failed one.
  694. for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
  695. container := &pod.Spec.InitContainers[i]
  696. status := podStatus.FindContainerStatusByName(container.Name)
  697. if status != nil && isInitContainerFailed(status) {
  698. return status, container, false
  699. }
  700. }
  701. // There are no failed containers now.
  702. for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
  703. container := &pod.Spec.InitContainers[i]
  704. status := podStatus.FindContainerStatusByName(container.Name)
  705. if status == nil {
  706. continue
  707. }
  708. // container is still running, return not done.
  709. if status.State == kubecontainer.ContainerStateRunning {
  710. return nil, nil, false
  711. }
  712. if status.State == kubecontainer.ContainerStateExited {
  713. // all init containers successful
  714. if i == (len(pod.Spec.InitContainers) - 1) {
  715. return nil, nil, true
  716. }
  717. // all containers up to i successful, go to i+1
  718. return nil, &pod.Spec.InitContainers[i+1], false
  719. }
  720. }
  721. return nil, &pod.Spec.InitContainers[0], false
  722. }
  723. // GetContainerLogs returns logs of a specific container.
  724. func (m *kubeGenericRuntimeManager) GetContainerLogs(ctx context.Context, pod *v1.Pod, containerID kubecontainer.ContainerID, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) (err error) {
  725. status, err := m.runtimeService.ContainerStatus(containerID.ID)
  726. if err != nil {
  727. klog.V(4).Infof("failed to get container status for %v: %v", containerID.String(), err)
  728. return fmt.Errorf("unable to retrieve container logs for %v", containerID.String())
  729. }
  730. return m.ReadLogs(ctx, status.GetLogPath(), containerID.ID, logOptions, stdout, stderr)
  731. }
  732. // GetExec gets the endpoint the runtime will serve the exec request from.
  733. func (m *kubeGenericRuntimeManager) GetExec(id kubecontainer.ContainerID, cmd []string, stdin, stdout, stderr, tty bool) (*url.URL, error) {
  734. req := &runtimeapi.ExecRequest{
  735. ContainerId: id.ID,
  736. Cmd: cmd,
  737. Tty: tty,
  738. Stdin: stdin,
  739. Stdout: stdout,
  740. Stderr: stderr,
  741. }
  742. resp, err := m.runtimeService.Exec(req)
  743. if err != nil {
  744. return nil, err
  745. }
  746. return url.Parse(resp.Url)
  747. }
  748. // GetAttach gets the endpoint the runtime will serve the attach request from.
  749. func (m *kubeGenericRuntimeManager) GetAttach(id kubecontainer.ContainerID, stdin, stdout, stderr, tty bool) (*url.URL, error) {
  750. req := &runtimeapi.AttachRequest{
  751. ContainerId: id.ID,
  752. Stdin: stdin,
  753. Stdout: stdout,
  754. Stderr: stderr,
  755. Tty: tty,
  756. }
  757. resp, err := m.runtimeService.Attach(req)
  758. if err != nil {
  759. return nil, err
  760. }
  761. return url.Parse(resp.Url)
  762. }
  763. // RunInContainer synchronously executes the command in the container, and returns the output.
  764. func (m *kubeGenericRuntimeManager) RunInContainer(id kubecontainer.ContainerID, cmd []string, timeout time.Duration) ([]byte, error) {
  765. stdout, stderr, err := m.runtimeService.ExecSync(id.ID, cmd, timeout)
  766. // NOTE(tallclair): This does not correctly interleave stdout & stderr, but should be sufficient
  767. // for logging purposes. A combined output option will need to be added to the ExecSyncRequest
  768. // if more precise output ordering is ever required.
  769. return append(stdout, stderr...), err
  770. }
  771. // removeContainer removes the container and the container logs.
  772. // Notice that we remove the container logs first, so that container will not be removed if
  773. // container logs are failed to be removed, and kubelet will retry this later. This guarantees
  774. // that container logs to be removed with the container.
  775. // Notice that we assume that the container should only be removed in non-running state, and
  776. // it will not write container logs anymore in that state.
  777. func (m *kubeGenericRuntimeManager) removeContainer(containerID string) error {
  778. klog.V(4).Infof("Removing container %q", containerID)
  779. // Call internal container post-stop lifecycle hook.
  780. if err := m.internalLifecycle.PostStopContainer(containerID); err != nil {
  781. return err
  782. }
  783. // Remove the container log.
  784. // TODO: Separate log and container lifecycle management.
  785. if err := m.removeContainerLog(containerID); err != nil {
  786. return err
  787. }
  788. // Remove the container.
  789. return m.runtimeService.RemoveContainer(containerID)
  790. }
  791. // removeContainerLog removes the container log.
  792. func (m *kubeGenericRuntimeManager) removeContainerLog(containerID string) error {
  793. // Remove the container log.
  794. status, err := m.runtimeService.ContainerStatus(containerID)
  795. if err != nil {
  796. return fmt.Errorf("failed to get container status %q: %v", containerID, err)
  797. }
  798. labeledInfo := getContainerInfoFromLabels(status.Labels)
  799. path := status.GetLogPath()
  800. if err := m.osInterface.Remove(path); err != nil && !os.IsNotExist(err) {
  801. return fmt.Errorf("failed to remove container %q log %q: %v", containerID, path, err)
  802. }
  803. // Remove the legacy container log symlink.
  804. // TODO(random-liu): Remove this after cluster logging supports CRI container log path.
  805. legacySymlink := legacyLogSymlink(containerID, labeledInfo.ContainerName, labeledInfo.PodName,
  806. labeledInfo.PodNamespace)
  807. if err := m.osInterface.Remove(legacySymlink); err != nil && !os.IsNotExist(err) {
  808. return fmt.Errorf("failed to remove container %q log legacy symbolic link %q: %v",
  809. containerID, legacySymlink, err)
  810. }
  811. return nil
  812. }
  813. // DeleteContainer removes a container.
  814. func (m *kubeGenericRuntimeManager) DeleteContainer(containerID kubecontainer.ContainerID) error {
  815. return m.removeContainer(containerID.ID)
  816. }