kubelet_pods.go 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kubelet
  14. import (
  15. "bytes"
  16. "context"
  17. "fmt"
  18. "io"
  19. "io/ioutil"
  20. "net/http"
  21. "net/url"
  22. "os"
  23. "path"
  24. "path/filepath"
  25. "runtime"
  26. "sort"
  27. "strings"
  28. "sync"
  29. "k8s.io/api/core/v1"
  30. "k8s.io/apimachinery/pkg/api/errors"
  31. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  32. "k8s.io/apimachinery/pkg/labels"
  33. "k8s.io/apimachinery/pkg/types"
  34. "k8s.io/apimachinery/pkg/util/sets"
  35. utilvalidation "k8s.io/apimachinery/pkg/util/validation"
  36. utilfeature "k8s.io/apiserver/pkg/util/feature"
  37. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  38. "k8s.io/klog"
  39. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  40. "k8s.io/kubernetes/pkg/api/v1/resource"
  41. podshelper "k8s.io/kubernetes/pkg/apis/core/pods"
  42. v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
  43. v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
  44. "k8s.io/kubernetes/pkg/features"
  45. "k8s.io/kubernetes/pkg/fieldpath"
  46. "k8s.io/kubernetes/pkg/kubelet/cm"
  47. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  48. "k8s.io/kubernetes/pkg/kubelet/envvars"
  49. "k8s.io/kubernetes/pkg/kubelet/eviction"
  50. "k8s.io/kubernetes/pkg/kubelet/images"
  51. "k8s.io/kubernetes/pkg/kubelet/server/portforward"
  52. remotecommandserver "k8s.io/kubernetes/pkg/kubelet/server/remotecommand"
  53. "k8s.io/kubernetes/pkg/kubelet/status"
  54. kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
  55. "k8s.io/kubernetes/pkg/kubelet/util/format"
  56. mountutil "k8s.io/kubernetes/pkg/util/mount"
  57. volumeutil "k8s.io/kubernetes/pkg/volume/util"
  58. "k8s.io/kubernetes/pkg/volume/util/subpath"
  59. "k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
  60. volumevalidation "k8s.io/kubernetes/pkg/volume/validation"
  61. "k8s.io/kubernetes/third_party/forked/golang/expansion"
  62. )
  63. const (
  64. managedHostsHeader = "# Kubernetes-managed hosts file.\n"
  65. managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n"
  66. )
  67. // Get a list of pods that have data directories.
  68. func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
  69. podInfos, err := ioutil.ReadDir(kl.getPodsDir())
  70. if err != nil {
  71. return nil, err
  72. }
  73. pods := []types.UID{}
  74. for i := range podInfos {
  75. if podInfos[i].IsDir() {
  76. pods = append(pods, types.UID(podInfos[i].Name()))
  77. }
  78. }
  79. return pods, nil
  80. }
  81. // GetActivePods returns non-terminal pods
  82. func (kl *Kubelet) GetActivePods() []*v1.Pod {
  83. allPods := kl.podManager.GetPods()
  84. activePods := kl.filterOutTerminatedPods(allPods)
  85. return activePods
  86. }
  87. // makeBlockVolumes maps the raw block devices specified in the path of the container
  88. // Experimental
  89. func (kl *Kubelet) makeBlockVolumes(pod *v1.Pod, container *v1.Container, podVolumes kubecontainer.VolumeMap, blkutil volumepathhandler.BlockVolumePathHandler) ([]kubecontainer.DeviceInfo, error) {
  90. var devices []kubecontainer.DeviceInfo
  91. for _, device := range container.VolumeDevices {
  92. // check path is absolute
  93. if !filepath.IsAbs(device.DevicePath) {
  94. return nil, fmt.Errorf("error DevicePath `%s` must be an absolute path", device.DevicePath)
  95. }
  96. vol, ok := podVolumes[device.Name]
  97. if !ok || vol.BlockVolumeMapper == nil {
  98. klog.Errorf("Block volume cannot be satisfied for container %q, because the volume is missing or the volume mapper is nil: %+v", container.Name, device)
  99. return nil, fmt.Errorf("cannot find volume %q to pass into container %q", device.Name, container.Name)
  100. }
  101. // Get a symbolic link associated to a block device under pod device path
  102. dirPath, volName := vol.BlockVolumeMapper.GetPodDeviceMapPath()
  103. symlinkPath := path.Join(dirPath, volName)
  104. if islinkExist, checkErr := blkutil.IsSymlinkExist(symlinkPath); checkErr != nil {
  105. return nil, checkErr
  106. } else if islinkExist {
  107. // Check readOnly in PVCVolumeSource and set read only permission if it's true.
  108. permission := "mrw"
  109. if vol.ReadOnly {
  110. permission = "r"
  111. }
  112. klog.V(4).Infof("Device will be attached to container %q. Path on host: %v", container.Name, symlinkPath)
  113. devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: symlinkPath, PathInContainer: device.DevicePath, Permissions: permission})
  114. }
  115. }
  116. return devices, nil
  117. }
  118. // makeMounts determines the mount points for the given container.
  119. func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap, mounter mountutil.Interface, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar) ([]kubecontainer.Mount, func(), error) {
  120. // Kubernetes only mounts on /etc/hosts if:
  121. // - container is not an infrastructure (pause) container
  122. // - container is not already mounting on /etc/hosts
  123. // - OS is not Windows
  124. // Kubernetes will not mount /etc/hosts if:
  125. // - when the Pod sandbox is being created, its IP is still unknown. Hence, PodIP will not have been set.
  126. mountEtcHostsFile := len(podIP) > 0 && runtime.GOOS != "windows"
  127. klog.V(3).Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile)
  128. mounts := []kubecontainer.Mount{}
  129. var cleanupAction func()
  130. for i, mount := range container.VolumeMounts {
  131. // do not mount /etc/hosts if container is already mounting on the path
  132. mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath)
  133. vol, ok := podVolumes[mount.Name]
  134. if !ok || vol.Mounter == nil {
  135. klog.Errorf("Mount cannot be satisfied for container %q, because the volume is missing or the volume mounter is nil: %+v", container.Name, mount)
  136. return nil, cleanupAction, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name)
  137. }
  138. relabelVolume := false
  139. // If the volume supports SELinux and it has not been
  140. // relabeled already and it is not a read-only volume,
  141. // relabel it and mark it as labeled
  142. if vol.Mounter.GetAttributes().Managed && vol.Mounter.GetAttributes().SupportsSELinux && !vol.SELinuxLabeled {
  143. vol.SELinuxLabeled = true
  144. relabelVolume = true
  145. }
  146. hostPath, err := volumeutil.GetPath(vol.Mounter)
  147. if err != nil {
  148. return nil, cleanupAction, err
  149. }
  150. subPath := mount.SubPath
  151. if mount.SubPathExpr != "" {
  152. if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) {
  153. return nil, cleanupAction, fmt.Errorf("volume subpaths are disabled")
  154. }
  155. if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpathEnvExpansion) {
  156. return nil, cleanupAction, fmt.Errorf("volume subpath expansion is disabled")
  157. }
  158. subPath, err = kubecontainer.ExpandContainerVolumeMounts(mount, expandEnvs)
  159. if err != nil {
  160. return nil, cleanupAction, err
  161. }
  162. }
  163. if subPath != "" {
  164. if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) {
  165. return nil, cleanupAction, fmt.Errorf("volume subpaths are disabled")
  166. }
  167. if filepath.IsAbs(subPath) {
  168. return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", subPath)
  169. }
  170. err = volumevalidation.ValidatePathNoBacksteps(subPath)
  171. if err != nil {
  172. return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", subPath, err)
  173. }
  174. volumePath := hostPath
  175. hostPath = filepath.Join(volumePath, subPath)
  176. if subPathExists, err := mounter.ExistsPath(hostPath); err != nil {
  177. klog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath)
  178. } else if !subPathExists {
  179. // Create the sub path now because if it's auto-created later when referenced, it may have an
  180. // incorrect ownership and mode. For example, the sub path directory must have at least g+rwx
  181. // when the pod specifies an fsGroup, and if the directory is not created here, Docker will
  182. // later auto-create it with the incorrect mode 0750
  183. // Make extra care not to escape the volume!
  184. perm, err := mounter.GetMode(volumePath)
  185. if err != nil {
  186. return nil, cleanupAction, err
  187. }
  188. if err := subpather.SafeMakeDir(subPath, volumePath, perm); err != nil {
  189. // Don't pass detailed error back to the user because it could give information about host filesystem
  190. klog.Errorf("failed to create subPath directory for volumeMount %q of container %q: %v", mount.Name, container.Name, err)
  191. return nil, cleanupAction, fmt.Errorf("failed to create subPath directory for volumeMount %q of container %q", mount.Name, container.Name)
  192. }
  193. }
  194. hostPath, cleanupAction, err = subpather.PrepareSafeSubpath(subpath.Subpath{
  195. VolumeMountIndex: i,
  196. Path: hostPath,
  197. VolumeName: vol.InnerVolumeSpecName,
  198. VolumePath: volumePath,
  199. PodDir: podDir,
  200. ContainerName: container.Name,
  201. })
  202. if err != nil {
  203. // Don't pass detailed error back to the user because it could give information about host filesystem
  204. klog.Errorf("failed to prepare subPath for volumeMount %q of container %q: %v", mount.Name, container.Name, err)
  205. return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name)
  206. }
  207. }
  208. // Docker Volume Mounts fail on Windows if it is not of the form C:/
  209. if volumeutil.IsWindowsLocalPath(runtime.GOOS, hostPath) {
  210. hostPath = volumeutil.MakeAbsolutePath(runtime.GOOS, hostPath)
  211. }
  212. containerPath := mount.MountPath
  213. // IsAbs returns false for UNC path/SMB shares/named pipes in Windows. So check for those specifically and skip MakeAbsolutePath
  214. if !volumeutil.IsWindowsUNCPath(runtime.GOOS, containerPath) && !filepath.IsAbs(containerPath) {
  215. containerPath = volumeutil.MakeAbsolutePath(runtime.GOOS, containerPath)
  216. }
  217. propagation, err := translateMountPropagation(mount.MountPropagation)
  218. if err != nil {
  219. return nil, cleanupAction, err
  220. }
  221. klog.V(5).Infof("Pod %q container %q mount %q has propagation %q", format.Pod(pod), container.Name, mount.Name, propagation)
  222. mustMountRO := vol.Mounter.GetAttributes().ReadOnly
  223. mounts = append(mounts, kubecontainer.Mount{
  224. Name: mount.Name,
  225. ContainerPath: containerPath,
  226. HostPath: hostPath,
  227. ReadOnly: mount.ReadOnly || mustMountRO,
  228. SELinuxRelabel: relabelVolume,
  229. Propagation: propagation,
  230. })
  231. }
  232. if mountEtcHostsFile {
  233. hostAliases := pod.Spec.HostAliases
  234. hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork)
  235. if err != nil {
  236. return nil, cleanupAction, err
  237. }
  238. mounts = append(mounts, *hostsMount)
  239. }
  240. return mounts, cleanupAction, nil
  241. }
  242. // translateMountPropagation transforms v1.MountPropagationMode to
  243. // runtimeapi.MountPropagation.
  244. func translateMountPropagation(mountMode *v1.MountPropagationMode) (runtimeapi.MountPropagation, error) {
  245. if runtime.GOOS == "windows" {
  246. // Windows containers doesn't support mount propagation, use private for it.
  247. // Refer https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation.
  248. return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
  249. }
  250. switch {
  251. case mountMode == nil:
  252. // PRIVATE is the default
  253. return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
  254. case *mountMode == v1.MountPropagationHostToContainer:
  255. return runtimeapi.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, nil
  256. case *mountMode == v1.MountPropagationBidirectional:
  257. return runtimeapi.MountPropagation_PROPAGATION_BIDIRECTIONAL, nil
  258. case *mountMode == v1.MountPropagationNone:
  259. return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
  260. default:
  261. return 0, fmt.Errorf("invalid MountPropagation mode: %q", *mountMode)
  262. }
  263. }
  264. // makeHostsMount makes the mountpoint for the hosts file that the containers
  265. // in a pod are injected with.
  266. func makeHostsMount(podDir, podIP, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) (*kubecontainer.Mount, error) {
  267. hostsFilePath := path.Join(podDir, "etc-hosts")
  268. if err := ensureHostsFile(hostsFilePath, podIP, hostName, hostDomainName, hostAliases, useHostNetwork); err != nil {
  269. return nil, err
  270. }
  271. return &kubecontainer.Mount{
  272. Name: "k8s-managed-etc-hosts",
  273. ContainerPath: etcHostsPath,
  274. HostPath: hostsFilePath,
  275. ReadOnly: false,
  276. SELinuxRelabel: true,
  277. }, nil
  278. }
  279. // ensureHostsFile ensures that the given host file has an up-to-date ip, host
  280. // name, and domain name.
  281. func ensureHostsFile(fileName, hostIP, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) error {
  282. var hostsFileContent []byte
  283. var err error
  284. if useHostNetwork {
  285. // if Pod is using host network, read hosts file from the node's filesystem.
  286. // `etcHostsPath` references the location of the hosts file on the node.
  287. // `/etc/hosts` for *nix systems.
  288. hostsFileContent, err = nodeHostsFileContent(etcHostsPath, hostAliases)
  289. if err != nil {
  290. return err
  291. }
  292. } else {
  293. // if Pod is not using host network, create a managed hosts file with Pod IP and other information.
  294. hostsFileContent = managedHostsFileContent(hostIP, hostName, hostDomainName, hostAliases)
  295. }
  296. return ioutil.WriteFile(fileName, hostsFileContent, 0644)
  297. }
  298. // nodeHostsFileContent reads the content of node's hosts file.
  299. func nodeHostsFileContent(hostsFilePath string, hostAliases []v1.HostAlias) ([]byte, error) {
  300. hostsFileContent, err := ioutil.ReadFile(hostsFilePath)
  301. if err != nil {
  302. return nil, err
  303. }
  304. var buffer bytes.Buffer
  305. buffer.WriteString(managedHostsHeaderWithHostNetwork)
  306. buffer.Write(hostsFileContent)
  307. buffer.Write(hostsEntriesFromHostAliases(hostAliases))
  308. return buffer.Bytes(), nil
  309. }
  310. // managedHostsFileContent generates the content of the managed etc hosts based on Pod IP and other
  311. // information.
  312. func managedHostsFileContent(hostIP, hostName, hostDomainName string, hostAliases []v1.HostAlias) []byte {
  313. var buffer bytes.Buffer
  314. buffer.WriteString(managedHostsHeader)
  315. buffer.WriteString("127.0.0.1\tlocalhost\n") // ipv4 localhost
  316. buffer.WriteString("::1\tlocalhost ip6-localhost ip6-loopback\n") // ipv6 localhost
  317. buffer.WriteString("fe00::0\tip6-localnet\n")
  318. buffer.WriteString("fe00::0\tip6-mcastprefix\n")
  319. buffer.WriteString("fe00::1\tip6-allnodes\n")
  320. buffer.WriteString("fe00::2\tip6-allrouters\n")
  321. if len(hostDomainName) > 0 {
  322. buffer.WriteString(fmt.Sprintf("%s\t%s.%s\t%s\n", hostIP, hostName, hostDomainName, hostName))
  323. } else {
  324. buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostIP, hostName))
  325. }
  326. buffer.Write(hostsEntriesFromHostAliases(hostAliases))
  327. return buffer.Bytes()
  328. }
  329. func hostsEntriesFromHostAliases(hostAliases []v1.HostAlias) []byte {
  330. if len(hostAliases) == 0 {
  331. return []byte{}
  332. }
  333. var buffer bytes.Buffer
  334. buffer.WriteString("\n")
  335. buffer.WriteString("# Entries added by HostAliases.\n")
  336. // for each IP, write all aliases onto single line in hosts file
  337. for _, hostAlias := range hostAliases {
  338. buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostAlias.IP, strings.Join(hostAlias.Hostnames, "\t")))
  339. }
  340. return buffer.Bytes()
  341. }
  342. // truncatePodHostnameIfNeeded truncates the pod hostname if it's longer than 63 chars.
  343. func truncatePodHostnameIfNeeded(podName, hostname string) (string, error) {
  344. // Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char).
  345. const hostnameMaxLen = 63
  346. if len(hostname) <= hostnameMaxLen {
  347. return hostname, nil
  348. }
  349. truncated := hostname[:hostnameMaxLen]
  350. klog.Errorf("hostname for pod:%q was longer than %d. Truncated hostname to :%q", podName, hostnameMaxLen, truncated)
  351. // hostname should not end with '-' or '.'
  352. truncated = strings.TrimRight(truncated, "-.")
  353. if len(truncated) == 0 {
  354. // This should never happen.
  355. return "", fmt.Errorf("hostname for pod %q was invalid: %q", podName, hostname)
  356. }
  357. return truncated, nil
  358. }
  359. // GeneratePodHostNameAndDomain creates a hostname and domain name for a pod,
  360. // given that pod's spec and annotations or returns an error.
  361. func (kl *Kubelet) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) {
  362. clusterDomain := kl.dnsConfigurer.ClusterDomain
  363. hostname := pod.Name
  364. if len(pod.Spec.Hostname) > 0 {
  365. if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Hostname); len(msgs) != 0 {
  366. return "", "", fmt.Errorf("Pod Hostname %q is not a valid DNS label: %s", pod.Spec.Hostname, strings.Join(msgs, ";"))
  367. }
  368. hostname = pod.Spec.Hostname
  369. }
  370. hostname, err := truncatePodHostnameIfNeeded(pod.Name, hostname)
  371. if err != nil {
  372. return "", "", err
  373. }
  374. hostDomain := ""
  375. if len(pod.Spec.Subdomain) > 0 {
  376. if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Subdomain); len(msgs) != 0 {
  377. return "", "", fmt.Errorf("Pod Subdomain %q is not a valid DNS label: %s", pod.Spec.Subdomain, strings.Join(msgs, ";"))
  378. }
  379. hostDomain = fmt.Sprintf("%s.%s.svc.%s", pod.Spec.Subdomain, pod.Namespace, clusterDomain)
  380. }
  381. return hostname, hostDomain, nil
  382. }
  383. // GetPodCgroupParent gets pod cgroup parent from container manager.
  384. func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {
  385. pcm := kl.containerManager.NewPodContainerManager()
  386. _, cgroupParent := pcm.GetPodContainerName(pod)
  387. return cgroupParent
  388. }
  389. // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
  390. // the container runtime to set parameters for launching a container.
  391. func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, func(), error) {
  392. opts, err := kl.containerManager.GetResources(pod, container)
  393. if err != nil {
  394. return nil, nil, err
  395. }
  396. hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod)
  397. if err != nil {
  398. return nil, nil, err
  399. }
  400. opts.Hostname = hostname
  401. podName := volumeutil.GetUniquePodName(pod)
  402. volumes := kl.volumeManager.GetMountedVolumesForPod(podName)
  403. opts.PortMappings = kubecontainer.MakePortMappings(container)
  404. // TODO: remove feature gate check after no longer needed
  405. if utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) {
  406. blkutil := volumepathhandler.NewBlockVolumePathHandler()
  407. blkVolumes, err := kl.makeBlockVolumes(pod, container, volumes, blkutil)
  408. if err != nil {
  409. return nil, nil, err
  410. }
  411. opts.Devices = append(opts.Devices, blkVolumes...)
  412. }
  413. envs, err := kl.makeEnvironmentVariables(pod, container, podIP)
  414. if err != nil {
  415. return nil, nil, err
  416. }
  417. opts.Envs = append(opts.Envs, envs...)
  418. mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes, kl.mounter, kl.subpather, opts.Envs)
  419. if err != nil {
  420. return nil, cleanupAction, err
  421. }
  422. opts.Mounts = append(opts.Mounts, mounts...)
  423. // Disabling adding TerminationMessagePath on Windows as these files would be mounted as docker volume and
  424. // Docker for Windows has a bug where only directories can be mounted
  425. if len(container.TerminationMessagePath) != 0 && runtime.GOOS != "windows" {
  426. p := kl.getPodContainerDir(pod.UID, container.Name)
  427. if err := os.MkdirAll(p, 0750); err != nil {
  428. klog.Errorf("Error on creating %q: %v", p, err)
  429. } else {
  430. opts.PodContainerDir = p
  431. }
  432. }
  433. // only do this check if the experimental behavior is enabled, otherwise allow it to default to false
  434. if kl.experimentalHostUserNamespaceDefaulting {
  435. opts.EnableHostUserNamespace = kl.enableHostUserNamespace(pod)
  436. }
  437. return opts, cleanupAction, nil
  438. }
  439. var masterServices = sets.NewString("kubernetes")
  440. // getServiceEnvVarMap makes a map[string]string of env vars for services a
  441. // pod in namespace ns should see.
  442. func (kl *Kubelet) getServiceEnvVarMap(ns string, enableServiceLinks bool) (map[string]string, error) {
  443. var (
  444. serviceMap = make(map[string]*v1.Service)
  445. m = make(map[string]string)
  446. )
  447. // Get all service resources from the master (via a cache),
  448. // and populate them into service environment variables.
  449. if kl.serviceLister == nil {
  450. // Kubelets without masters (e.g. plain GCE ContainerVM) don't set env vars.
  451. return m, nil
  452. }
  453. services, err := kl.serviceLister.List(labels.Everything())
  454. if err != nil {
  455. return m, fmt.Errorf("failed to list services when setting up env vars")
  456. }
  457. // project the services in namespace ns onto the master services
  458. for i := range services {
  459. service := services[i]
  460. // ignore services where ClusterIP is "None" or empty
  461. if !v1helper.IsServiceIPSet(service) {
  462. continue
  463. }
  464. serviceName := service.Name
  465. // We always want to add environment variabled for master services
  466. // from the master service namespace, even if enableServiceLinks is false.
  467. // We also add environment variables for other services in the same
  468. // namespace, if enableServiceLinks is true.
  469. if service.Namespace == kl.masterServiceNamespace && masterServices.Has(serviceName) {
  470. if _, exists := serviceMap[serviceName]; !exists {
  471. serviceMap[serviceName] = service
  472. }
  473. } else if service.Namespace == ns && enableServiceLinks {
  474. serviceMap[serviceName] = service
  475. }
  476. }
  477. mappedServices := []*v1.Service{}
  478. for key := range serviceMap {
  479. mappedServices = append(mappedServices, serviceMap[key])
  480. }
  481. for _, e := range envvars.FromServices(mappedServices) {
  482. m[e.Name] = e.Value
  483. }
  484. return m, nil
  485. }
  486. // Make the environment variables for a pod in the given namespace.
  487. func (kl *Kubelet) makeEnvironmentVariables(pod *v1.Pod, container *v1.Container, podIP string) ([]kubecontainer.EnvVar, error) {
  488. if pod.Spec.EnableServiceLinks == nil {
  489. return nil, fmt.Errorf("nil pod.spec.enableServiceLinks encountered, cannot construct envvars")
  490. }
  491. var result []kubecontainer.EnvVar
  492. // Note: These are added to the docker Config, but are not included in the checksum computed
  493. // by kubecontainer.HashContainer(...). That way, we can still determine whether an
  494. // v1.Container is already running by its hash. (We don't want to restart a container just
  495. // because some service changed.)
  496. //
  497. // Note that there is a race between Kubelet seeing the pod and kubelet seeing the service.
  498. // To avoid this users can: (1) wait between starting a service and starting; or (2) detect
  499. // missing service env var and exit and be restarted; or (3) use DNS instead of env vars
  500. // and keep trying to resolve the DNS name of the service (recommended).
  501. serviceEnv, err := kl.getServiceEnvVarMap(pod.Namespace, *pod.Spec.EnableServiceLinks)
  502. if err != nil {
  503. return result, err
  504. }
  505. var (
  506. configMaps = make(map[string]*v1.ConfigMap)
  507. secrets = make(map[string]*v1.Secret)
  508. tmpEnv = make(map[string]string)
  509. )
  510. // Env will override EnvFrom variables.
  511. // Process EnvFrom first then allow Env to replace existing values.
  512. for _, envFrom := range container.EnvFrom {
  513. switch {
  514. case envFrom.ConfigMapRef != nil:
  515. cm := envFrom.ConfigMapRef
  516. name := cm.Name
  517. configMap, ok := configMaps[name]
  518. if !ok {
  519. if kl.kubeClient == nil {
  520. return result, fmt.Errorf("Couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name)
  521. }
  522. optional := cm.Optional != nil && *cm.Optional
  523. configMap, err = kl.configMapManager.GetConfigMap(pod.Namespace, name)
  524. if err != nil {
  525. if errors.IsNotFound(err) && optional {
  526. // ignore error when marked optional
  527. continue
  528. }
  529. return result, err
  530. }
  531. configMaps[name] = configMap
  532. }
  533. invalidKeys := []string{}
  534. for k, v := range configMap.Data {
  535. if len(envFrom.Prefix) > 0 {
  536. k = envFrom.Prefix + k
  537. }
  538. if errMsgs := utilvalidation.IsEnvVarName(k); len(errMsgs) != 0 {
  539. invalidKeys = append(invalidKeys, k)
  540. continue
  541. }
  542. tmpEnv[k] = v
  543. }
  544. if len(invalidKeys) > 0 {
  545. sort.Strings(invalidKeys)
  546. kl.recorder.Eventf(pod, v1.EventTypeWarning, "InvalidEnvironmentVariableNames", "Keys [%s] from the EnvFrom configMap %s/%s were skipped since they are considered invalid environment variable names.", strings.Join(invalidKeys, ", "), pod.Namespace, name)
  547. }
  548. case envFrom.SecretRef != nil:
  549. s := envFrom.SecretRef
  550. name := s.Name
  551. secret, ok := secrets[name]
  552. if !ok {
  553. if kl.kubeClient == nil {
  554. return result, fmt.Errorf("Couldn't get secret %v/%v, no kubeClient defined", pod.Namespace, name)
  555. }
  556. optional := s.Optional != nil && *s.Optional
  557. secret, err = kl.secretManager.GetSecret(pod.Namespace, name)
  558. if err != nil {
  559. if errors.IsNotFound(err) && optional {
  560. // ignore error when marked optional
  561. continue
  562. }
  563. return result, err
  564. }
  565. secrets[name] = secret
  566. }
  567. invalidKeys := []string{}
  568. for k, v := range secret.Data {
  569. if len(envFrom.Prefix) > 0 {
  570. k = envFrom.Prefix + k
  571. }
  572. if errMsgs := utilvalidation.IsEnvVarName(k); len(errMsgs) != 0 {
  573. invalidKeys = append(invalidKeys, k)
  574. continue
  575. }
  576. tmpEnv[k] = string(v)
  577. }
  578. if len(invalidKeys) > 0 {
  579. sort.Strings(invalidKeys)
  580. kl.recorder.Eventf(pod, v1.EventTypeWarning, "InvalidEnvironmentVariableNames", "Keys [%s] from the EnvFrom secret %s/%s were skipped since they are considered invalid environment variable names.", strings.Join(invalidKeys, ", "), pod.Namespace, name)
  581. }
  582. }
  583. }
  584. // Determine the final values of variables:
  585. //
  586. // 1. Determine the final value of each variable:
  587. // a. If the variable's Value is set, expand the `$(var)` references to other
  588. // variables in the .Value field; the sources of variables are the declared
  589. // variables of the container and the service environment variables
  590. // b. If a source is defined for an environment variable, resolve the source
  591. // 2. Create the container's environment in the order variables are declared
  592. // 3. Add remaining service environment vars
  593. var (
  594. mappingFunc = expansion.MappingFuncFor(tmpEnv, serviceEnv)
  595. )
  596. for _, envVar := range container.Env {
  597. runtimeVal := envVar.Value
  598. if runtimeVal != "" {
  599. // Step 1a: expand variable references
  600. runtimeVal = expansion.Expand(runtimeVal, mappingFunc)
  601. } else if envVar.ValueFrom != nil {
  602. // Step 1b: resolve alternate env var sources
  603. switch {
  604. case envVar.ValueFrom.FieldRef != nil:
  605. runtimeVal, err = kl.podFieldSelectorRuntimeValue(envVar.ValueFrom.FieldRef, pod, podIP)
  606. if err != nil {
  607. return result, err
  608. }
  609. case envVar.ValueFrom.ResourceFieldRef != nil:
  610. defaultedPod, defaultedContainer, err := kl.defaultPodLimitsForDownwardAPI(pod, container)
  611. if err != nil {
  612. return result, err
  613. }
  614. runtimeVal, err = containerResourceRuntimeValue(envVar.ValueFrom.ResourceFieldRef, defaultedPod, defaultedContainer)
  615. if err != nil {
  616. return result, err
  617. }
  618. case envVar.ValueFrom.ConfigMapKeyRef != nil:
  619. cm := envVar.ValueFrom.ConfigMapKeyRef
  620. name := cm.Name
  621. key := cm.Key
  622. optional := cm.Optional != nil && *cm.Optional
  623. configMap, ok := configMaps[name]
  624. if !ok {
  625. if kl.kubeClient == nil {
  626. return result, fmt.Errorf("Couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name)
  627. }
  628. configMap, err = kl.configMapManager.GetConfigMap(pod.Namespace, name)
  629. if err != nil {
  630. if errors.IsNotFound(err) && optional {
  631. // ignore error when marked optional
  632. continue
  633. }
  634. return result, err
  635. }
  636. configMaps[name] = configMap
  637. }
  638. runtimeVal, ok = configMap.Data[key]
  639. if !ok {
  640. if optional {
  641. continue
  642. }
  643. return result, fmt.Errorf("Couldn't find key %v in ConfigMap %v/%v", key, pod.Namespace, name)
  644. }
  645. case envVar.ValueFrom.SecretKeyRef != nil:
  646. s := envVar.ValueFrom.SecretKeyRef
  647. name := s.Name
  648. key := s.Key
  649. optional := s.Optional != nil && *s.Optional
  650. secret, ok := secrets[name]
  651. if !ok {
  652. if kl.kubeClient == nil {
  653. return result, fmt.Errorf("Couldn't get secret %v/%v, no kubeClient defined", pod.Namespace, name)
  654. }
  655. secret, err = kl.secretManager.GetSecret(pod.Namespace, name)
  656. if err != nil {
  657. if errors.IsNotFound(err) && optional {
  658. // ignore error when marked optional
  659. continue
  660. }
  661. return result, err
  662. }
  663. secrets[name] = secret
  664. }
  665. runtimeValBytes, ok := secret.Data[key]
  666. if !ok {
  667. if optional {
  668. continue
  669. }
  670. return result, fmt.Errorf("Couldn't find key %v in Secret %v/%v", key, pod.Namespace, name)
  671. }
  672. runtimeVal = string(runtimeValBytes)
  673. }
  674. }
  675. // Accesses apiserver+Pods.
  676. // So, the master may set service env vars, or kubelet may. In case both are doing
  677. // it, we delete the key from the kubelet-generated ones so we don't have duplicate
  678. // env vars.
  679. // TODO: remove this next line once all platforms use apiserver+Pods.
  680. delete(serviceEnv, envVar.Name)
  681. tmpEnv[envVar.Name] = runtimeVal
  682. }
  683. // Append the env vars
  684. for k, v := range tmpEnv {
  685. result = append(result, kubecontainer.EnvVar{Name: k, Value: v})
  686. }
  687. // Append remaining service env vars.
  688. for k, v := range serviceEnv {
  689. // Accesses apiserver+Pods.
  690. // So, the master may set service env vars, or kubelet may. In case both are doing
  691. // it, we skip the key from the kubelet-generated ones so we don't have duplicate
  692. // env vars.
  693. // TODO: remove this next line once all platforms use apiserver+Pods.
  694. if _, present := tmpEnv[k]; !present {
  695. result = append(result, kubecontainer.EnvVar{Name: k, Value: v})
  696. }
  697. }
  698. return result, nil
  699. }
  700. // podFieldSelectorRuntimeValue returns the runtime value of the given
  701. // selector for a pod.
  702. func (kl *Kubelet) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod *v1.Pod, podIP string) (string, error) {
  703. internalFieldPath, _, err := podshelper.ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "")
  704. if err != nil {
  705. return "", err
  706. }
  707. switch internalFieldPath {
  708. case "spec.nodeName":
  709. return pod.Spec.NodeName, nil
  710. case "spec.serviceAccountName":
  711. return pod.Spec.ServiceAccountName, nil
  712. case "status.hostIP":
  713. hostIP, err := kl.getHostIPAnyWay()
  714. if err != nil {
  715. return "", err
  716. }
  717. return hostIP.String(), nil
  718. case "status.podIP":
  719. return podIP, nil
  720. }
  721. return fieldpath.ExtractFieldPathAsString(pod, internalFieldPath)
  722. }
  723. // containerResourceRuntimeValue returns the value of the provided container resource
  724. func containerResourceRuntimeValue(fs *v1.ResourceFieldSelector, pod *v1.Pod, container *v1.Container) (string, error) {
  725. containerName := fs.ContainerName
  726. if len(containerName) == 0 {
  727. return resource.ExtractContainerResourceValue(fs, container)
  728. }
  729. return resource.ExtractResourceValueByContainerName(fs, pod, containerName)
  730. }
  731. // One of the following arguments must be non-nil: runningPod, status.
  732. // TODO: Modify containerRuntime.KillPod() to accept the right arguments.
  733. func (kl *Kubelet) killPod(pod *v1.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error {
  734. var p kubecontainer.Pod
  735. if runningPod != nil {
  736. p = *runningPod
  737. } else if status != nil {
  738. p = kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), status)
  739. } else {
  740. return fmt.Errorf("one of the two arguments must be non-nil: runningPod, status")
  741. }
  742. // Call the container runtime KillPod method which stops all running containers of the pod
  743. if err := kl.containerRuntime.KillPod(pod, p, gracePeriodOverride); err != nil {
  744. return err
  745. }
  746. if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
  747. klog.V(2).Infof("Failed to update QoS cgroups while killing pod: %v", err)
  748. }
  749. return nil
  750. }
  751. // makePodDataDirs creates the dirs for the pod datas.
  752. func (kl *Kubelet) makePodDataDirs(pod *v1.Pod) error {
  753. uid := pod.UID
  754. if err := os.MkdirAll(kl.getPodDir(uid), 0750); err != nil && !os.IsExist(err) {
  755. return err
  756. }
  757. if err := os.MkdirAll(kl.getPodVolumesDir(uid), 0750); err != nil && !os.IsExist(err) {
  758. return err
  759. }
  760. if err := os.MkdirAll(kl.getPodPluginsDir(uid), 0750); err != nil && !os.IsExist(err) {
  761. return err
  762. }
  763. return nil
  764. }
  765. // getPullSecretsForPod inspects the Pod and retrieves the referenced pull
  766. // secrets.
  767. func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
  768. pullSecrets := []v1.Secret{}
  769. for _, secretRef := range pod.Spec.ImagePullSecrets {
  770. secret, err := kl.secretManager.GetSecret(pod.Namespace, secretRef.Name)
  771. if err != nil {
  772. klog.Warningf("Unable to retrieve pull secret %s/%s for %s/%s due to %v. The image pull may not succeed.", pod.Namespace, secretRef.Name, pod.Namespace, pod.Name, err)
  773. continue
  774. }
  775. pullSecrets = append(pullSecrets, *secret)
  776. }
  777. return pullSecrets
  778. }
  779. // podIsTerminated returns true if pod is in the terminated state ("Failed" or "Succeeded").
  780. func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
  781. // Check the cached pod status which was set after the last sync.
  782. status, ok := kl.statusManager.GetPodStatus(pod.UID)
  783. if !ok {
  784. // If there is no cached status, use the status from the
  785. // apiserver. This is useful if kubelet has recently been
  786. // restarted.
  787. status = pod.Status
  788. }
  789. return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
  790. }
  791. // IsPodTerminated returns true if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded")
  792. // or if the pod has been deleted or removed
  793. func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
  794. pod, podFound := kl.podManager.GetPodByUID(uid)
  795. if !podFound {
  796. return true
  797. }
  798. return kl.podIsTerminated(pod)
  799. }
  800. // IsPodDeleted returns true if the pod is deleted. For the pod to be deleted, either:
  801. // 1. The pod object is deleted
  802. // 2. The pod's status is evicted
  803. // 3. The pod's deletion timestamp is set, and containers are not running
  804. func (kl *Kubelet) IsPodDeleted(uid types.UID) bool {
  805. pod, podFound := kl.podManager.GetPodByUID(uid)
  806. if !podFound {
  807. return true
  808. }
  809. status, statusFound := kl.statusManager.GetPodStatus(pod.UID)
  810. if !statusFound {
  811. status = pod.Status
  812. }
  813. return eviction.PodIsEvicted(status) || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
  814. }
  815. // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
  816. // been reclaimed by the kubelet. Reclaiming resources is a prerequisite to deleting a pod from the API server.
  817. func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool {
  818. if !notRunning(status.ContainerStatuses) {
  819. // We shouldnt delete pods that still have running containers
  820. klog.V(3).Infof("Pod %q is terminated, but some containers are still running", format.Pod(pod))
  821. return false
  822. }
  823. // pod's containers should be deleted
  824. runtimeStatus, err := kl.podCache.Get(pod.UID)
  825. if err != nil {
  826. klog.V(3).Infof("Pod %q is terminated, Error getting runtimeStatus from the podCache: %s", format.Pod(pod), err)
  827. return false
  828. }
  829. if len(runtimeStatus.ContainerStatuses) > 0 {
  830. var statusStr string
  831. for _, status := range runtimeStatus.ContainerStatuses {
  832. statusStr += fmt.Sprintf("%+v ", *status)
  833. }
  834. klog.V(3).Infof("Pod %q is terminated, but some containers have not been cleaned up: %s", format.Pod(pod), statusStr)
  835. return false
  836. }
  837. if kl.podVolumesExist(pod.UID) && !kl.keepTerminatedPodVolumes {
  838. // We shouldnt delete pods whose volumes have not been cleaned up if we are not keeping terminated pod volumes
  839. klog.V(3).Infof("Pod %q is terminated, but some volumes have not been cleaned up", format.Pod(pod))
  840. return false
  841. }
  842. if kl.kubeletConfiguration.CgroupsPerQOS {
  843. pcm := kl.containerManager.NewPodContainerManager()
  844. if pcm.Exists(pod) {
  845. klog.V(3).Infof("Pod %q is terminated, but pod cgroup sandbox has not been cleaned up", format.Pod(pod))
  846. return false
  847. }
  848. }
  849. return true
  850. }
  851. // podResourcesAreReclaimed simply calls PodResourcesAreReclaimed with the most up-to-date status.
  852. func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool {
  853. status, ok := kl.statusManager.GetPodStatus(pod.UID)
  854. if !ok {
  855. status = pod.Status
  856. }
  857. return kl.PodResourcesAreReclaimed(pod, status)
  858. }
  859. // notRunning returns true if every status is terminated or waiting, or the status list
  860. // is empty.
  861. func notRunning(statuses []v1.ContainerStatus) bool {
  862. for _, status := range statuses {
  863. if status.State.Terminated == nil && status.State.Waiting == nil {
  864. return false
  865. }
  866. }
  867. return true
  868. }
  869. // filterOutTerminatedPods returns the given pods which the status manager
  870. // does not consider failed or succeeded.
  871. func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod {
  872. var filteredPods []*v1.Pod
  873. for _, p := range pods {
  874. if kl.podIsTerminated(p) {
  875. continue
  876. }
  877. filteredPods = append(filteredPods, p)
  878. }
  879. return filteredPods
  880. }
  881. // removeOrphanedPodStatuses removes obsolete entries in podStatus where
  882. // the pod is no longer considered bound to this node.
  883. func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Pod) {
  884. podUIDs := make(map[types.UID]bool)
  885. for _, pod := range pods {
  886. podUIDs[pod.UID] = true
  887. }
  888. for _, pod := range mirrorPods {
  889. podUIDs[pod.UID] = true
  890. }
  891. kl.statusManager.RemoveOrphanedStatuses(podUIDs)
  892. }
  893. // HandlePodCleanups performs a series of cleanup work, including terminating
  894. // pod workers, killing unwanted pods, and removing orphaned volumes/pod
  895. // directories.
  896. // NOTE: This function is executed by the main sync loop, so it
  897. // should not contain any blocking calls.
  898. func (kl *Kubelet) HandlePodCleanups() error {
  899. // The kubelet lacks checkpointing, so we need to introspect the set of pods
  900. // in the cgroup tree prior to inspecting the set of pods in our pod manager.
  901. // this ensures our view of the cgroup tree does not mistakenly observe pods
  902. // that are added after the fact...
  903. var (
  904. cgroupPods map[types.UID]cm.CgroupName
  905. err error
  906. )
  907. if kl.cgroupsPerQOS {
  908. pcm := kl.containerManager.NewPodContainerManager()
  909. cgroupPods, err = pcm.GetAllPodsFromCgroups()
  910. if err != nil {
  911. return fmt.Errorf("failed to get list of pods that still exist on cgroup mounts: %v", err)
  912. }
  913. }
  914. allPods, mirrorPods := kl.podManager.GetPodsAndMirrorPods()
  915. // Pod phase progresses monotonically. Once a pod has reached a final state,
  916. // it should never leave regardless of the restart policy. The statuses
  917. // of such pods should not be changed, and there is no need to sync them.
  918. // TODO: the logic here does not handle two cases:
  919. // 1. If the containers were removed immediately after they died, kubelet
  920. // may fail to generate correct statuses, let alone filtering correctly.
  921. // 2. If kubelet restarted before writing the terminated status for a pod
  922. // to the apiserver, it could still restart the terminated pod (even
  923. // though the pod was not considered terminated by the apiserver).
  924. // These two conditions could be alleviated by checkpointing kubelet.
  925. activePods := kl.filterOutTerminatedPods(allPods)
  926. desiredPods := make(map[types.UID]empty)
  927. for _, pod := range activePods {
  928. desiredPods[pod.UID] = empty{}
  929. }
  930. // Stop the workers for no-longer existing pods.
  931. // TODO: is here the best place to forget pod workers?
  932. kl.podWorkers.ForgetNonExistingPodWorkers(desiredPods)
  933. kl.probeManager.CleanupPods(activePods)
  934. runningPods, err := kl.runtimeCache.GetPods()
  935. if err != nil {
  936. klog.Errorf("Error listing containers: %#v", err)
  937. return err
  938. }
  939. for _, pod := range runningPods {
  940. if _, found := desiredPods[pod.ID]; !found {
  941. kl.podKillingCh <- &kubecontainer.PodPair{APIPod: nil, RunningPod: pod}
  942. }
  943. }
  944. kl.removeOrphanedPodStatuses(allPods, mirrorPods)
  945. // Note that we just killed the unwanted pods. This may not have reflected
  946. // in the cache. We need to bypass the cache to get the latest set of
  947. // running pods to clean up the volumes.
  948. // TODO: Evaluate the performance impact of bypassing the runtime cache.
  949. runningPods, err = kl.containerRuntime.GetPods(false)
  950. if err != nil {
  951. klog.Errorf("Error listing containers: %#v", err)
  952. return err
  953. }
  954. // Remove any orphaned volumes.
  955. // Note that we pass all pods (including terminated pods) to the function,
  956. // so that we don't remove volumes associated with terminated but not yet
  957. // deleted pods.
  958. err = kl.cleanupOrphanedPodDirs(allPods, runningPods)
  959. if err != nil {
  960. // We want all cleanup tasks to be run even if one of them failed. So
  961. // we just log an error here and continue other cleanup tasks.
  962. // This also applies to the other clean up tasks.
  963. klog.Errorf("Failed cleaning up orphaned pod directories: %v", err)
  964. }
  965. // Remove any orphaned mirror pods.
  966. kl.podManager.DeleteOrphanedMirrorPods()
  967. // Remove any cgroups in the hierarchy for pods that are no longer running.
  968. if kl.cgroupsPerQOS {
  969. kl.cleanupOrphanedPodCgroups(cgroupPods, activePods)
  970. }
  971. kl.backOff.GC()
  972. return nil
  973. }
  974. // podKiller launches a goroutine to kill a pod received from the channel if
  975. // another goroutine isn't already in action.
  976. func (kl *Kubelet) podKiller() {
  977. killing := sets.NewString()
  978. // guard for the killing set
  979. lock := sync.Mutex{}
  980. for podPair := range kl.podKillingCh {
  981. runningPod := podPair.RunningPod
  982. apiPod := podPair.APIPod
  983. lock.Lock()
  984. exists := killing.Has(string(runningPod.ID))
  985. if !exists {
  986. killing.Insert(string(runningPod.ID))
  987. }
  988. lock.Unlock()
  989. if !exists {
  990. go func(apiPod *v1.Pod, runningPod *kubecontainer.Pod) {
  991. klog.V(2).Infof("Killing unwanted pod %q", runningPod.Name)
  992. err := kl.killPod(apiPod, runningPod, nil, nil)
  993. if err != nil {
  994. klog.Errorf("Failed killing the pod %q: %v", runningPod.Name, err)
  995. }
  996. lock.Lock()
  997. killing.Delete(string(runningPod.ID))
  998. lock.Unlock()
  999. }(apiPod, runningPod)
  1000. }
  1001. }
  1002. }
  1003. // validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state
  1004. // of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current
  1005. // running container is preferred over a previous termination. If info about the container is not available then a specific
  1006. // error is returned to the end user.
  1007. func (kl *Kubelet) validateContainerLogStatus(podName string, podStatus *v1.PodStatus, containerName string, previous bool) (containerID kubecontainer.ContainerID, err error) {
  1008. var cID string
  1009. cStatus, found := podutil.GetContainerStatus(podStatus.ContainerStatuses, containerName)
  1010. // if not found, check the init containers
  1011. if !found {
  1012. cStatus, found = podutil.GetContainerStatus(podStatus.InitContainerStatuses, containerName)
  1013. }
  1014. if !found {
  1015. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is not available", containerName, podName)
  1016. }
  1017. lastState := cStatus.LastTerminationState
  1018. waiting, running, terminated := cStatus.State.Waiting, cStatus.State.Running, cStatus.State.Terminated
  1019. switch {
  1020. case previous:
  1021. if lastState.Terminated == nil || lastState.Terminated.ContainerID == "" {
  1022. return kubecontainer.ContainerID{}, fmt.Errorf("previous terminated container %q in pod %q not found", containerName, podName)
  1023. }
  1024. cID = lastState.Terminated.ContainerID
  1025. case running != nil:
  1026. cID = cStatus.ContainerID
  1027. case terminated != nil:
  1028. // in cases where the next container didn't start, terminated.ContainerID will be empty, so get logs from the lastState.Terminated.
  1029. if terminated.ContainerID == "" {
  1030. if lastState.Terminated != nil && lastState.Terminated.ContainerID != "" {
  1031. cID = lastState.Terminated.ContainerID
  1032. } else {
  1033. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is terminated", containerName, podName)
  1034. }
  1035. } else {
  1036. cID = terminated.ContainerID
  1037. }
  1038. case lastState.Terminated != nil:
  1039. if lastState.Terminated.ContainerID == "" {
  1040. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is terminated", containerName, podName)
  1041. }
  1042. cID = lastState.Terminated.ContainerID
  1043. case waiting != nil:
  1044. // output some info for the most common pending failures
  1045. switch reason := waiting.Reason; reason {
  1046. case images.ErrImagePull.Error():
  1047. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: image can't be pulled", containerName, podName)
  1048. case images.ErrImagePullBackOff.Error():
  1049. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: trying and failing to pull image", containerName, podName)
  1050. default:
  1051. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: %v", containerName, podName, reason)
  1052. }
  1053. default:
  1054. // unrecognized state
  1055. return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start - no logs yet", containerName, podName)
  1056. }
  1057. return kubecontainer.ParseContainerID(cID), nil
  1058. }
  1059. // GetKubeletContainerLogs returns logs from the container
  1060. // TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt
  1061. // or all of them.
  1062. func (kl *Kubelet) GetKubeletContainerLogs(ctx context.Context, podFullName, containerName string, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) error {
  1063. // Pod workers periodically write status to statusManager. If status is not
  1064. // cached there, something is wrong (or kubelet just restarted and hasn't
  1065. // caught up yet). Just assume the pod is not ready yet.
  1066. name, namespace, err := kubecontainer.ParsePodFullName(podFullName)
  1067. if err != nil {
  1068. return fmt.Errorf("unable to parse pod full name %q: %v", podFullName, err)
  1069. }
  1070. pod, ok := kl.GetPodByName(namespace, name)
  1071. if !ok {
  1072. return fmt.Errorf("pod %q cannot be found - no logs available", name)
  1073. }
  1074. podUID := pod.UID
  1075. if mirrorPod, ok := kl.podManager.GetMirrorPodByPod(pod); ok {
  1076. podUID = mirrorPod.UID
  1077. }
  1078. podStatus, found := kl.statusManager.GetPodStatus(podUID)
  1079. if !found {
  1080. // If there is no cached status, use the status from the
  1081. // apiserver. This is useful if kubelet has recently been
  1082. // restarted.
  1083. podStatus = pod.Status
  1084. }
  1085. // TODO: Consolidate the logic here with kuberuntime.GetContainerLogs, here we convert container name to containerID,
  1086. // but inside kuberuntime we convert container id back to container name and restart count.
  1087. // TODO: After separate container log lifecycle management, we should get log based on the existing log files
  1088. // instead of container status.
  1089. containerID, err := kl.validateContainerLogStatus(pod.Name, &podStatus, containerName, logOptions.Previous)
  1090. if err != nil {
  1091. return err
  1092. }
  1093. // Do a zero-byte write to stdout before handing off to the container runtime.
  1094. // This ensures at least one Write call is made to the writer when copying starts,
  1095. // even if we then block waiting for log output from the container.
  1096. if _, err := stdout.Write([]byte{}); err != nil {
  1097. return err
  1098. }
  1099. if kl.dockerLegacyService != nil {
  1100. // dockerLegacyService should only be non-nil when we actually need it, so
  1101. // inject it into the runtimeService.
  1102. // TODO(random-liu): Remove this hack after deprecating unsupported log driver.
  1103. return kl.dockerLegacyService.GetContainerLogs(ctx, pod, containerID, logOptions, stdout, stderr)
  1104. }
  1105. return kl.containerRuntime.GetContainerLogs(ctx, pod, containerID, logOptions, stdout, stderr)
  1106. }
  1107. // getPhase returns the phase of a pod given its container info.
  1108. func getPhase(spec *v1.PodSpec, info []v1.ContainerStatus) v1.PodPhase {
  1109. pendingInitialization := 0
  1110. failedInitialization := 0
  1111. for _, container := range spec.InitContainers {
  1112. containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1113. if !ok {
  1114. pendingInitialization++
  1115. continue
  1116. }
  1117. switch {
  1118. case containerStatus.State.Running != nil:
  1119. pendingInitialization++
  1120. case containerStatus.State.Terminated != nil:
  1121. if containerStatus.State.Terminated.ExitCode != 0 {
  1122. failedInitialization++
  1123. }
  1124. case containerStatus.State.Waiting != nil:
  1125. if containerStatus.LastTerminationState.Terminated != nil {
  1126. if containerStatus.LastTerminationState.Terminated.ExitCode != 0 {
  1127. failedInitialization++
  1128. }
  1129. } else {
  1130. pendingInitialization++
  1131. }
  1132. default:
  1133. pendingInitialization++
  1134. }
  1135. }
  1136. unknown := 0
  1137. running := 0
  1138. waiting := 0
  1139. stopped := 0
  1140. succeeded := 0
  1141. for _, container := range spec.Containers {
  1142. containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1143. if !ok {
  1144. unknown++
  1145. continue
  1146. }
  1147. switch {
  1148. case containerStatus.State.Running != nil:
  1149. running++
  1150. case containerStatus.State.Terminated != nil:
  1151. stopped++
  1152. if containerStatus.State.Terminated.ExitCode == 0 {
  1153. succeeded++
  1154. }
  1155. case containerStatus.State.Waiting != nil:
  1156. if containerStatus.LastTerminationState.Terminated != nil {
  1157. stopped++
  1158. } else {
  1159. waiting++
  1160. }
  1161. default:
  1162. unknown++
  1163. }
  1164. }
  1165. if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever {
  1166. return v1.PodFailed
  1167. }
  1168. switch {
  1169. case pendingInitialization > 0:
  1170. fallthrough
  1171. case waiting > 0:
  1172. klog.V(5).Infof("pod waiting > 0, pending")
  1173. // One or more containers has not been started
  1174. return v1.PodPending
  1175. case running > 0 && unknown == 0:
  1176. // All containers have been started, and at least
  1177. // one container is running
  1178. return v1.PodRunning
  1179. case running == 0 && stopped > 0 && unknown == 0:
  1180. // All containers are terminated
  1181. if spec.RestartPolicy == v1.RestartPolicyAlways {
  1182. // All containers are in the process of restarting
  1183. return v1.PodRunning
  1184. }
  1185. if stopped == succeeded {
  1186. // RestartPolicy is not Always, and all
  1187. // containers are terminated in success
  1188. return v1.PodSucceeded
  1189. }
  1190. if spec.RestartPolicy == v1.RestartPolicyNever {
  1191. // RestartPolicy is Never, and all containers are
  1192. // terminated with at least one in failure
  1193. return v1.PodFailed
  1194. }
  1195. // RestartPolicy is OnFailure, and at least one in failure
  1196. // and in the process of restarting
  1197. return v1.PodRunning
  1198. default:
  1199. klog.V(5).Infof("pod default case, pending")
  1200. return v1.PodPending
  1201. }
  1202. }
  1203. // generateAPIPodStatus creates the final API pod status for a pod, given the
  1204. // internal pod status.
  1205. func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus {
  1206. klog.V(3).Infof("Generating status for %q", format.Pod(pod))
  1207. s := kl.convertStatusToAPIStatus(pod, podStatus)
  1208. // check if an internal module has requested the pod is evicted.
  1209. for _, podSyncHandler := range kl.PodSyncHandlers {
  1210. if result := podSyncHandler.ShouldEvict(pod); result.Evict {
  1211. s.Phase = v1.PodFailed
  1212. s.Reason = result.Reason
  1213. s.Message = result.Message
  1214. return *s
  1215. }
  1216. }
  1217. // Assume info is ready to process
  1218. spec := &pod.Spec
  1219. allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
  1220. s.Phase = getPhase(spec, allStatus)
  1221. // Check for illegal phase transition
  1222. if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
  1223. // API server shows terminal phase; transitions are not allowed
  1224. if s.Phase != pod.Status.Phase {
  1225. klog.Errorf("Pod attempted illegal phase transition from %s to %s: %v", pod.Status.Phase, s.Phase, s)
  1226. // Force back to phase from the API server
  1227. s.Phase = pod.Status.Phase
  1228. }
  1229. }
  1230. kl.probeManager.UpdatePodStatus(pod.UID, s)
  1231. s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(spec, s.InitContainerStatuses, s.Phase))
  1232. s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(spec, s.Conditions, s.ContainerStatuses, s.Phase))
  1233. s.Conditions = append(s.Conditions, status.GenerateContainersReadyCondition(spec, s.ContainerStatuses, s.Phase))
  1234. // Status manager will take care of the LastTransitionTimestamp, either preserve
  1235. // the timestamp from apiserver, or set a new one. When kubelet sees the pod,
  1236. // `PodScheduled` condition must be true.
  1237. s.Conditions = append(s.Conditions, v1.PodCondition{
  1238. Type: v1.PodScheduled,
  1239. Status: v1.ConditionTrue,
  1240. })
  1241. if kl.kubeClient != nil {
  1242. hostIP, err := kl.getHostIPAnyWay()
  1243. if err != nil {
  1244. klog.V(4).Infof("Cannot get host IP: %v", err)
  1245. } else {
  1246. s.HostIP = hostIP.String()
  1247. if kubecontainer.IsHostNetworkPod(pod) && s.PodIP == "" {
  1248. s.PodIP = hostIP.String()
  1249. }
  1250. }
  1251. }
  1252. return *s
  1253. }
  1254. // convertStatusToAPIStatus creates an api PodStatus for the given pod from
  1255. // the given internal pod status. It is purely transformative and does not
  1256. // alter the kubelet state at all.
  1257. func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *v1.PodStatus {
  1258. var apiPodStatus v1.PodStatus
  1259. apiPodStatus.PodIP = podStatus.IP
  1260. // set status for Pods created on versions of kube older than 1.6
  1261. apiPodStatus.QOSClass = v1qos.GetPodQOS(pod)
  1262. oldPodStatus, found := kl.statusManager.GetPodStatus(pod.UID)
  1263. if !found {
  1264. oldPodStatus = pod.Status
  1265. }
  1266. apiPodStatus.ContainerStatuses = kl.convertToAPIContainerStatuses(
  1267. pod, podStatus,
  1268. oldPodStatus.ContainerStatuses,
  1269. pod.Spec.Containers,
  1270. len(pod.Spec.InitContainers) > 0,
  1271. false,
  1272. )
  1273. apiPodStatus.InitContainerStatuses = kl.convertToAPIContainerStatuses(
  1274. pod, podStatus,
  1275. oldPodStatus.InitContainerStatuses,
  1276. pod.Spec.InitContainers,
  1277. len(pod.Spec.InitContainers) > 0,
  1278. true,
  1279. )
  1280. // Preserves conditions not controlled by kubelet
  1281. for _, c := range pod.Status.Conditions {
  1282. if !kubetypes.PodConditionByKubelet(c.Type) {
  1283. apiPodStatus.Conditions = append(apiPodStatus.Conditions, c)
  1284. }
  1285. }
  1286. return &apiPodStatus
  1287. }
  1288. // convertToAPIContainerStatuses converts the given internal container
  1289. // statuses into API container statuses.
  1290. func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus {
  1291. convertContainerStatus := func(cs *kubecontainer.ContainerStatus) *v1.ContainerStatus {
  1292. cid := cs.ID.String()
  1293. status := &v1.ContainerStatus{
  1294. Name: cs.Name,
  1295. RestartCount: int32(cs.RestartCount),
  1296. Image: cs.Image,
  1297. ImageID: cs.ImageID,
  1298. ContainerID: cid,
  1299. }
  1300. switch cs.State {
  1301. case kubecontainer.ContainerStateRunning:
  1302. status.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)}
  1303. case kubecontainer.ContainerStateCreated:
  1304. // Treat containers in the "created" state as if they are exited.
  1305. // The pod workers are supposed start all containers it creates in
  1306. // one sync (syncPod) iteration. There should not be any normal
  1307. // "created" containers when the pod worker generates the status at
  1308. // the beginning of a sync iteration.
  1309. fallthrough
  1310. case kubecontainer.ContainerStateExited:
  1311. status.State.Terminated = &v1.ContainerStateTerminated{
  1312. ExitCode: int32(cs.ExitCode),
  1313. Reason: cs.Reason,
  1314. Message: cs.Message,
  1315. StartedAt: metav1.NewTime(cs.StartedAt),
  1316. FinishedAt: metav1.NewTime(cs.FinishedAt),
  1317. ContainerID: cid,
  1318. }
  1319. default:
  1320. status.State.Waiting = &v1.ContainerStateWaiting{}
  1321. }
  1322. return status
  1323. }
  1324. // Fetch old containers statuses from old pod status.
  1325. oldStatuses := make(map[string]v1.ContainerStatus, len(containers))
  1326. for _, status := range previousStatus {
  1327. oldStatuses[status.Name] = status
  1328. }
  1329. // Set all container statuses to default waiting state
  1330. statuses := make(map[string]*v1.ContainerStatus, len(containers))
  1331. defaultWaitingState := v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "ContainerCreating"}}
  1332. if hasInitContainers {
  1333. defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "PodInitializing"}}
  1334. }
  1335. for _, container := range containers {
  1336. status := &v1.ContainerStatus{
  1337. Name: container.Name,
  1338. Image: container.Image,
  1339. State: defaultWaitingState,
  1340. }
  1341. oldStatus, found := oldStatuses[container.Name]
  1342. if found {
  1343. if oldStatus.State.Terminated != nil {
  1344. // Do not update status on terminated init containers as
  1345. // they be removed at any time.
  1346. status = &oldStatus
  1347. } else {
  1348. // Apply some values from the old statuses as the default values.
  1349. status.RestartCount = oldStatus.RestartCount
  1350. status.LastTerminationState = oldStatus.LastTerminationState
  1351. }
  1352. }
  1353. statuses[container.Name] = status
  1354. }
  1355. // Make the latest container status comes first.
  1356. sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(podStatus.ContainerStatuses)))
  1357. // Set container statuses according to the statuses seen in pod status
  1358. containerSeen := map[string]int{}
  1359. for _, cStatus := range podStatus.ContainerStatuses {
  1360. cName := cStatus.Name
  1361. if _, ok := statuses[cName]; !ok {
  1362. // This would also ignore the infra container.
  1363. continue
  1364. }
  1365. if containerSeen[cName] >= 2 {
  1366. continue
  1367. }
  1368. status := convertContainerStatus(cStatus)
  1369. if containerSeen[cName] == 0 {
  1370. statuses[cName] = status
  1371. } else {
  1372. statuses[cName].LastTerminationState = status.State
  1373. }
  1374. containerSeen[cName] = containerSeen[cName] + 1
  1375. }
  1376. // Handle the containers failed to be started, which should be in Waiting state.
  1377. for _, container := range containers {
  1378. if isInitContainer {
  1379. // If the init container is terminated with exit code 0, it won't be restarted.
  1380. // TODO(random-liu): Handle this in a cleaner way.
  1381. s := podStatus.FindContainerStatusByName(container.Name)
  1382. if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
  1383. continue
  1384. }
  1385. }
  1386. // If a container should be restarted in next syncpod, it is *Waiting*.
  1387. if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
  1388. continue
  1389. }
  1390. status := statuses[container.Name]
  1391. reason, ok := kl.reasonCache.Get(pod.UID, container.Name)
  1392. if !ok {
  1393. // In fact, we could also apply Waiting state here, but it is less informative,
  1394. // and the container will be restarted soon, so we prefer the original state here.
  1395. // Note that with the current implementation of ShouldContainerBeRestarted the original state here
  1396. // could be:
  1397. // * Waiting: There is no associated historical container and start failure reason record.
  1398. // * Terminated: The container is terminated.
  1399. continue
  1400. }
  1401. if status.State.Terminated != nil {
  1402. status.LastTerminationState = status.State
  1403. }
  1404. status.State = v1.ContainerState{
  1405. Waiting: &v1.ContainerStateWaiting{
  1406. Reason: reason.Err.Error(),
  1407. Message: reason.Message,
  1408. },
  1409. }
  1410. statuses[container.Name] = status
  1411. }
  1412. var containerStatuses []v1.ContainerStatus
  1413. for _, status := range statuses {
  1414. containerStatuses = append(containerStatuses, *status)
  1415. }
  1416. // Sort the container statuses since clients of this interface expect the list
  1417. // of containers in a pod has a deterministic order.
  1418. if isInitContainer {
  1419. kubetypes.SortInitContainerStatuses(pod, containerStatuses)
  1420. } else {
  1421. sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses))
  1422. }
  1423. return containerStatuses
  1424. }
  1425. // ServeLogs returns logs of current machine.
  1426. func (kl *Kubelet) ServeLogs(w http.ResponseWriter, req *http.Request) {
  1427. // TODO: whitelist logs we are willing to serve
  1428. kl.logServer.ServeHTTP(w, req)
  1429. }
  1430. // findContainer finds and returns the container with the given pod ID, full name, and container name.
  1431. // It returns nil if not found.
  1432. func (kl *Kubelet) findContainer(podFullName string, podUID types.UID, containerName string) (*kubecontainer.Container, error) {
  1433. pods, err := kl.containerRuntime.GetPods(false)
  1434. if err != nil {
  1435. return nil, err
  1436. }
  1437. // Resolve and type convert back again.
  1438. // We need the static pod UID but the kubecontainer API works with types.UID.
  1439. podUID = types.UID(kl.podManager.TranslatePodUID(podUID))
  1440. pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
  1441. return pod.FindContainerByName(containerName), nil
  1442. }
  1443. // RunInContainer runs a command in a container, returns the combined stdout, stderr as an array of bytes
  1444. func (kl *Kubelet) RunInContainer(podFullName string, podUID types.UID, containerName string, cmd []string) ([]byte, error) {
  1445. container, err := kl.findContainer(podFullName, podUID, containerName)
  1446. if err != nil {
  1447. return nil, err
  1448. }
  1449. if container == nil {
  1450. return nil, fmt.Errorf("container not found (%q)", containerName)
  1451. }
  1452. // TODO(tallclair): Pass a proper timeout value.
  1453. return kl.runner.RunInContainer(container.ID, cmd, 0)
  1454. }
  1455. // GetExec gets the URL the exec will be served from, or nil if the Kubelet will serve it.
  1456. func (kl *Kubelet) GetExec(podFullName string, podUID types.UID, containerName string, cmd []string, streamOpts remotecommandserver.Options) (*url.URL, error) {
  1457. container, err := kl.findContainer(podFullName, podUID, containerName)
  1458. if err != nil {
  1459. return nil, err
  1460. }
  1461. if container == nil {
  1462. return nil, fmt.Errorf("container not found (%q)", containerName)
  1463. }
  1464. return kl.streamingRuntime.GetExec(container.ID, cmd, streamOpts.Stdin, streamOpts.Stdout, streamOpts.Stderr, streamOpts.TTY)
  1465. }
  1466. // GetAttach gets the URL the attach will be served from, or nil if the Kubelet will serve it.
  1467. func (kl *Kubelet) GetAttach(podFullName string, podUID types.UID, containerName string, streamOpts remotecommandserver.Options) (*url.URL, error) {
  1468. container, err := kl.findContainer(podFullName, podUID, containerName)
  1469. if err != nil {
  1470. return nil, err
  1471. }
  1472. if container == nil {
  1473. return nil, fmt.Errorf("container %s not found in pod %s", containerName, podFullName)
  1474. }
  1475. // The TTY setting for attach must match the TTY setting in the initial container configuration,
  1476. // since whether the process is running in a TTY cannot be changed after it has started. We
  1477. // need the api.Pod to get the TTY status.
  1478. pod, found := kl.GetPodByFullName(podFullName)
  1479. if !found || (string(podUID) != "" && pod.UID != podUID) {
  1480. return nil, fmt.Errorf("pod %s not found", podFullName)
  1481. }
  1482. containerSpec := kubecontainer.GetContainerSpec(pod, containerName)
  1483. if containerSpec == nil {
  1484. return nil, fmt.Errorf("container %s not found in pod %s", containerName, podFullName)
  1485. }
  1486. tty := containerSpec.TTY
  1487. return kl.streamingRuntime.GetAttach(container.ID, streamOpts.Stdin, streamOpts.Stdout, streamOpts.Stderr, tty)
  1488. }
  1489. // GetPortForward gets the URL the port-forward will be served from, or nil if the Kubelet will serve it.
  1490. func (kl *Kubelet) GetPortForward(podName, podNamespace string, podUID types.UID, portForwardOpts portforward.V4Options) (*url.URL, error) {
  1491. pods, err := kl.containerRuntime.GetPods(false)
  1492. if err != nil {
  1493. return nil, err
  1494. }
  1495. // Resolve and type convert back again.
  1496. // We need the static pod UID but the kubecontainer API works with types.UID.
  1497. podUID = types.UID(kl.podManager.TranslatePodUID(podUID))
  1498. podFullName := kubecontainer.BuildPodFullName(podName, podNamespace)
  1499. pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
  1500. if pod.IsEmpty() {
  1501. return nil, fmt.Errorf("pod not found (%q)", podFullName)
  1502. }
  1503. return kl.streamingRuntime.GetPortForward(podName, podNamespace, podUID, portForwardOpts.Ports)
  1504. }
  1505. // cleanupOrphanedPodCgroups removes cgroups that should no longer exist.
  1506. // it reconciles the cached state of cgroupPods with the specified list of runningPods
  1507. func (kl *Kubelet) cleanupOrphanedPodCgroups(cgroupPods map[types.UID]cm.CgroupName, activePods []*v1.Pod) {
  1508. // Add all running pods to the set that we want to preserve
  1509. podSet := sets.NewString()
  1510. for _, pod := range activePods {
  1511. podSet.Insert(string(pod.UID))
  1512. }
  1513. pcm := kl.containerManager.NewPodContainerManager()
  1514. // Iterate over all the found pods to verify if they should be running
  1515. for uid, val := range cgroupPods {
  1516. // if the pod is in the running set, its not a candidate for cleanup
  1517. if podSet.Has(string(uid)) {
  1518. continue
  1519. }
  1520. // If volumes have not been unmounted/detached, do not delete the cgroup
  1521. // so any memory backed volumes don't have their charges propagated to the
  1522. // parent croup. If the volumes still exist, reduce the cpu shares for any
  1523. // process in the cgroup to the minimum value while we wait. if the kubelet
  1524. // is configured to keep terminated volumes, we will delete the cgroup and not block.
  1525. if podVolumesExist := kl.podVolumesExist(uid); podVolumesExist && !kl.keepTerminatedPodVolumes {
  1526. klog.V(3).Infof("Orphaned pod %q found, but volumes not yet removed. Reducing cpu to minimum", uid)
  1527. if err := pcm.ReduceCPULimits(val); err != nil {
  1528. klog.Warningf("Failed to reduce cpu time for pod %q pending volume cleanup due to %v", uid, err)
  1529. }
  1530. continue
  1531. }
  1532. klog.V(3).Infof("Orphaned pod %q found, removing pod cgroups", uid)
  1533. // Destroy all cgroups of pod that should not be running,
  1534. // by first killing all the attached processes to these cgroups.
  1535. // We ignore errors thrown by the method, as the housekeeping loop would
  1536. // again try to delete these unwanted pod cgroups
  1537. go pcm.Destroy(val)
  1538. }
  1539. }
  1540. // enableHostUserNamespace determines if the host user namespace should be used by the container runtime.
  1541. // Returns true if the pod is using a host pid, pic, or network namespace, the pod is using a non-namespaced
  1542. // capability, the pod contains a privileged container, or the pod has a host path volume.
  1543. //
  1544. // NOTE: when if a container shares any namespace with another container it must also share the user namespace
  1545. // or it will not have the correct capabilities in the namespace. This means that host user namespace
  1546. // is enabled per pod, not per container.
  1547. func (kl *Kubelet) enableHostUserNamespace(pod *v1.Pod) bool {
  1548. if kubecontainer.HasPrivilegedContainer(pod) || hasHostNamespace(pod) ||
  1549. hasHostVolume(pod) || hasNonNamespacedCapability(pod) || kl.hasHostMountPVC(pod) {
  1550. return true
  1551. }
  1552. return false
  1553. }
  1554. // hasNonNamespacedCapability returns true if MKNOD, SYS_TIME, or SYS_MODULE is requested for any container.
  1555. func hasNonNamespacedCapability(pod *v1.Pod) bool {
  1556. for _, c := range pod.Spec.Containers {
  1557. if c.SecurityContext != nil && c.SecurityContext.Capabilities != nil {
  1558. for _, cap := range c.SecurityContext.Capabilities.Add {
  1559. if cap == "MKNOD" || cap == "SYS_TIME" || cap == "SYS_MODULE" {
  1560. return true
  1561. }
  1562. }
  1563. }
  1564. }
  1565. return false
  1566. }
  1567. // hasHostVolume returns true if the pod spec has a HostPath volume.
  1568. func hasHostVolume(pod *v1.Pod) bool {
  1569. for _, v := range pod.Spec.Volumes {
  1570. if v.HostPath != nil {
  1571. return true
  1572. }
  1573. }
  1574. return false
  1575. }
  1576. // hasHostNamespace returns true if hostIPC, hostNetwork, or hostPID are set to true.
  1577. func hasHostNamespace(pod *v1.Pod) bool {
  1578. if pod.Spec.SecurityContext == nil {
  1579. return false
  1580. }
  1581. return pod.Spec.HostIPC || pod.Spec.HostNetwork || pod.Spec.HostPID
  1582. }
  1583. // hasHostMountPVC returns true if a PVC is referencing a HostPath volume.
  1584. func (kl *Kubelet) hasHostMountPVC(pod *v1.Pod) bool {
  1585. for _, volume := range pod.Spec.Volumes {
  1586. if volume.PersistentVolumeClaim != nil {
  1587. pvc, err := kl.kubeClient.CoreV1().PersistentVolumeClaims(pod.Namespace).Get(volume.PersistentVolumeClaim.ClaimName, metav1.GetOptions{})
  1588. if err != nil {
  1589. klog.Warningf("unable to retrieve pvc %s:%s - %v", pod.Namespace, volume.PersistentVolumeClaim.ClaimName, err)
  1590. continue
  1591. }
  1592. if pvc != nil {
  1593. referencedVolume, err := kl.kubeClient.CoreV1().PersistentVolumes().Get(pvc.Spec.VolumeName, metav1.GetOptions{})
  1594. if err != nil {
  1595. klog.Warningf("unable to retrieve pv %s - %v", pvc.Spec.VolumeName, err)
  1596. continue
  1597. }
  1598. if referencedVolume != nil && referencedVolume.Spec.HostPath != nil {
  1599. return true
  1600. }
  1601. }
  1602. }
  1603. }
  1604. return false
  1605. }