docker_sandbox.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package dockershim
  14. import (
  15. "context"
  16. "encoding/json"
  17. "fmt"
  18. "os"
  19. "strings"
  20. "time"
  21. dockertypes "github.com/docker/docker/api/types"
  22. dockercontainer "github.com/docker/docker/api/types/container"
  23. dockerfilters "github.com/docker/docker/api/types/filters"
  24. utilerrors "k8s.io/apimachinery/pkg/util/errors"
  25. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  26. "k8s.io/klog"
  27. "k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
  28. "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
  29. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  30. "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
  31. "k8s.io/kubernetes/pkg/kubelet/types"
  32. )
  33. const (
  34. defaultSandboxImage = "k8s.gcr.io/pause:3.2"
  35. // Various default sandbox resources requests/limits.
  36. defaultSandboxCPUshares int64 = 2
  37. // defaultSandboxOOMAdj is the oom score adjustment for the docker
  38. // sandbox container. Using this OOM adj makes it very unlikely, but not
  39. // impossible, that the defaultSandox will experience an oom kill. -998
  40. // is chosen to signify sandbox should be OOM killed before other more
  41. // vital processes like the docker daemon, the kubelet, etc...
  42. defaultSandboxOOMAdj int = -998
  43. // Name of the underlying container runtime
  44. runtimeName = "docker"
  45. )
  46. var (
  47. // Termination grace period
  48. defaultSandboxGracePeriod = time.Duration(10) * time.Second
  49. )
  50. // Returns whether the sandbox network is ready, and whether the sandbox is known
  51. func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) {
  52. ds.networkReadyLock.Lock()
  53. defer ds.networkReadyLock.Unlock()
  54. ready, ok := ds.networkReady[podSandboxID]
  55. return ready, ok
  56. }
  57. func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) {
  58. ds.networkReadyLock.Lock()
  59. defer ds.networkReadyLock.Unlock()
  60. ds.networkReady[podSandboxID] = ready
  61. }
  62. func (ds *dockerService) clearNetworkReady(podSandboxID string) {
  63. ds.networkReadyLock.Lock()
  64. defer ds.networkReadyLock.Unlock()
  65. delete(ds.networkReady, podSandboxID)
  66. }
  67. // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
  68. // the sandbox is in ready state.
  69. // For docker, PodSandbox is implemented by a container holding the network
  70. // namespace for the pod.
  71. // Note: docker doesn't use LogDirectory (yet).
  72. func (ds *dockerService) RunPodSandbox(ctx context.Context, r *runtimeapi.RunPodSandboxRequest) (*runtimeapi.RunPodSandboxResponse, error) {
  73. config := r.GetConfig()
  74. // Step 1: Pull the image for the sandbox.
  75. image := defaultSandboxImage
  76. podSandboxImage := ds.podSandboxImage
  77. if len(podSandboxImage) != 0 {
  78. image = podSandboxImage
  79. }
  80. // NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly.
  81. // see: http://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-repository
  82. // Only pull sandbox image when it's not present - v1.PullIfNotPresent.
  83. if err := ensureSandboxImageExists(ds.client, image); err != nil {
  84. return nil, err
  85. }
  86. // Step 2: Create the sandbox container.
  87. if r.GetRuntimeHandler() != "" && r.GetRuntimeHandler() != runtimeName {
  88. return nil, fmt.Errorf("RuntimeHandler %q not supported", r.GetRuntimeHandler())
  89. }
  90. createConfig, err := ds.makeSandboxDockerConfig(config, image)
  91. if err != nil {
  92. return nil, fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
  93. }
  94. createResp, err := ds.client.CreateContainer(*createConfig)
  95. if err != nil {
  96. createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
  97. }
  98. if err != nil || createResp == nil {
  99. return nil, fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
  100. }
  101. resp := &runtimeapi.RunPodSandboxResponse{PodSandboxId: createResp.ID}
  102. ds.setNetworkReady(createResp.ID, false)
  103. defer func(e *error) {
  104. // Set networking ready depending on the error return of
  105. // the parent function
  106. if *e == nil {
  107. ds.setNetworkReady(createResp.ID, true)
  108. }
  109. }(&err)
  110. // Step 3: Create Sandbox Checkpoint.
  111. if err = ds.checkpointManager.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
  112. return nil, err
  113. }
  114. // Step 4: Start the sandbox container.
  115. // Assume kubelet's garbage collector would remove the sandbox later, if
  116. // startContainer failed.
  117. err = ds.client.StartContainer(createResp.ID)
  118. if err != nil {
  119. return nil, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err)
  120. }
  121. // Rewrite resolv.conf file generated by docker.
  122. // NOTE: cluster dns settings aren't passed anymore to docker api in all cases,
  123. // not only for pods with host network: the resolver conf will be overwritten
  124. // after sandbox creation to override docker's behaviour. This resolv.conf
  125. // file is shared by all containers of the same pod, and needs to be modified
  126. // only once per pod.
  127. if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
  128. containerInfo, err := ds.client.InspectContainer(createResp.ID)
  129. if err != nil {
  130. return nil, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err)
  131. }
  132. if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil {
  133. return nil, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err)
  134. }
  135. }
  136. // Do not invoke network plugins if in hostNetwork mode.
  137. if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE {
  138. return resp, nil
  139. }
  140. // Step 5: Setup networking for the sandbox.
  141. // All pod networking is setup by a CNI plugin discovered at startup time.
  142. // This plugin assigns the pod ip, sets up routes inside the sandbox,
  143. // creates interfaces etc. In theory, its jurisdiction ends with pod
  144. // sandbox networking, but it might insert iptables rules or open ports
  145. // on the host as well, to satisfy parts of the pod spec that aren't
  146. // recognized by the CNI standard yet.
  147. cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
  148. networkOptions := make(map[string]string)
  149. if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
  150. // Build DNS options.
  151. dnsOption, err := json.Marshal(dnsConfig)
  152. if err != nil {
  153. return nil, fmt.Errorf("failed to marshal dns config for pod %q: %v", config.Metadata.Name, err)
  154. }
  155. networkOptions["dns"] = string(dnsOption)
  156. }
  157. err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations, networkOptions)
  158. if err != nil {
  159. errList := []error{fmt.Errorf("failed to set up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err)}
  160. // Ensure network resources are cleaned up even if the plugin
  161. // succeeded but an error happened between that success and here.
  162. err = ds.network.TearDownPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID)
  163. if err != nil {
  164. errList = append(errList, fmt.Errorf("failed to clean up sandbox container %q network for pod %q: %v", createResp.ID, config.Metadata.Name, err))
  165. }
  166. err = ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod)
  167. if err != nil {
  168. errList = append(errList, fmt.Errorf("failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err))
  169. }
  170. return resp, utilerrors.NewAggregate(errList)
  171. }
  172. return resp, nil
  173. }
  174. // StopPodSandbox stops the sandbox. If there are any running containers in the
  175. // sandbox, they should be force terminated.
  176. // TODO: This function blocks sandbox teardown on networking teardown. Is it
  177. // better to cut our losses assuming an out of band GC routine will cleanup
  178. // after us?
  179. func (ds *dockerService) StopPodSandbox(ctx context.Context, r *runtimeapi.StopPodSandboxRequest) (*runtimeapi.StopPodSandboxResponse, error) {
  180. var namespace, name string
  181. var hostNetwork bool
  182. podSandboxID := r.PodSandboxId
  183. resp := &runtimeapi.StopPodSandboxResponse{}
  184. // Try to retrieve minimal sandbox information from docker daemon or sandbox checkpoint.
  185. inspectResult, metadata, statusErr := ds.getPodSandboxDetails(podSandboxID)
  186. if statusErr == nil {
  187. namespace = metadata.Namespace
  188. name = metadata.Name
  189. hostNetwork = (networkNamespaceMode(inspectResult) == runtimeapi.NamespaceMode_NODE)
  190. } else {
  191. checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
  192. checkpointErr := ds.checkpointManager.GetCheckpoint(podSandboxID, checkpoint)
  193. // Proceed if both sandbox container and checkpoint could not be found. This means that following
  194. // actions will only have sandbox ID and not have pod namespace and name information.
  195. // Return error if encounter any unexpected error.
  196. if checkpointErr != nil {
  197. if checkpointErr != errors.ErrCheckpointNotFound {
  198. err := ds.checkpointManager.RemoveCheckpoint(podSandboxID)
  199. if err != nil {
  200. klog.Errorf("Failed to delete corrupt checkpoint for sandbox %q: %v", podSandboxID, err)
  201. }
  202. }
  203. if libdocker.IsContainerNotFoundError(statusErr) {
  204. klog.Warningf("Both sandbox container and checkpoint for id %q could not be found. "+
  205. "Proceed without further sandbox information.", podSandboxID)
  206. } else {
  207. return nil, utilerrors.NewAggregate([]error{
  208. fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr),
  209. fmt.Errorf("failed to get sandbox status: %v", statusErr)})
  210. }
  211. } else {
  212. _, name, namespace, _, hostNetwork = checkpoint.GetData()
  213. }
  214. }
  215. // WARNING: The following operations made the following assumption:
  216. // 1. kubelet will retry on any error returned by StopPodSandbox.
  217. // 2. tearing down network and stopping sandbox container can succeed in any sequence.
  218. // This depends on the implementation detail of network plugin and proper error handling.
  219. // For kubenet, if tearing down network failed and sandbox container is stopped, kubelet
  220. // will retry. On retry, kubenet will not be able to retrieve network namespace of the sandbox
  221. // since it is stopped. With empty network namespace, CNI bridge plugin will conduct best
  222. // effort clean up and will not return error.
  223. errList := []error{}
  224. ready, ok := ds.getNetworkReady(podSandboxID)
  225. if !hostNetwork && (ready || !ok) {
  226. // Only tear down the pod network if we haven't done so already
  227. cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
  228. err := ds.network.TearDownPod(namespace, name, cID)
  229. if err == nil {
  230. ds.setNetworkReady(podSandboxID, false)
  231. } else {
  232. errList = append(errList, err)
  233. }
  234. }
  235. if err := ds.client.StopContainer(podSandboxID, defaultSandboxGracePeriod); err != nil {
  236. // Do not return error if the container does not exist
  237. if !libdocker.IsContainerNotFoundError(err) {
  238. klog.Errorf("Failed to stop sandbox %q: %v", podSandboxID, err)
  239. errList = append(errList, err)
  240. } else {
  241. // remove the checkpoint for any sandbox that is not found in the runtime
  242. ds.checkpointManager.RemoveCheckpoint(podSandboxID)
  243. }
  244. }
  245. if len(errList) == 0 {
  246. return resp, nil
  247. }
  248. // TODO: Stop all running containers in the sandbox.
  249. return nil, utilerrors.NewAggregate(errList)
  250. }
  251. // RemovePodSandbox removes the sandbox. If there are running containers in the
  252. // sandbox, they should be forcibly removed.
  253. func (ds *dockerService) RemovePodSandbox(ctx context.Context, r *runtimeapi.RemovePodSandboxRequest) (*runtimeapi.RemovePodSandboxResponse, error) {
  254. podSandboxID := r.PodSandboxId
  255. var errs []error
  256. opts := dockertypes.ContainerListOptions{All: true}
  257. opts.Filters = dockerfilters.NewArgs()
  258. f := newDockerFilter(&opts.Filters)
  259. f.AddLabel(sandboxIDLabelKey, podSandboxID)
  260. containers, err := ds.client.ListContainers(opts)
  261. if err != nil {
  262. errs = append(errs, err)
  263. }
  264. // Remove all containers in the sandbox.
  265. for i := range containers {
  266. if _, err := ds.RemoveContainer(ctx, &runtimeapi.RemoveContainerRequest{ContainerId: containers[i].ID}); err != nil && !libdocker.IsContainerNotFoundError(err) {
  267. errs = append(errs, err)
  268. }
  269. }
  270. // Remove the sandbox container.
  271. err = ds.client.RemoveContainer(podSandboxID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true, Force: true})
  272. if err == nil || libdocker.IsContainerNotFoundError(err) {
  273. // Only clear network ready when the sandbox has actually been
  274. // removed from docker or doesn't exist
  275. ds.clearNetworkReady(podSandboxID)
  276. } else {
  277. errs = append(errs, err)
  278. }
  279. // Remove the checkpoint of the sandbox.
  280. if err := ds.checkpointManager.RemoveCheckpoint(podSandboxID); err != nil {
  281. errs = append(errs, err)
  282. }
  283. if len(errs) == 0 {
  284. return &runtimeapi.RemovePodSandboxResponse{}, nil
  285. }
  286. return nil, utilerrors.NewAggregate(errs)
  287. }
  288. // getIPsFromPlugin interrogates the network plugin for sandbox IPs.
  289. func (ds *dockerService) getIPsFromPlugin(sandbox *dockertypes.ContainerJSON) ([]string, error) {
  290. metadata, err := parseSandboxName(sandbox.Name)
  291. if err != nil {
  292. return nil, err
  293. }
  294. msg := fmt.Sprintf("Couldn't find network status for %s/%s through plugin", metadata.Namespace, metadata.Name)
  295. cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
  296. networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
  297. if err != nil {
  298. return nil, err
  299. }
  300. if networkStatus == nil {
  301. return nil, fmt.Errorf("%v: invalid network status for", msg)
  302. }
  303. ips := make([]string, 0)
  304. for _, ip := range networkStatus.IPs {
  305. ips = append(ips, ip.String())
  306. }
  307. // if we don't have any ip in our list then cni is using classic primary IP only
  308. if len(ips) == 0 {
  309. ips = append(ips, networkStatus.IP.String())
  310. }
  311. return ips, nil
  312. }
  313. // getIPs returns the ip given the output of `docker inspect` on a pod sandbox,
  314. // first interrogating any registered plugins, then simply trusting the ip
  315. // in the sandbox itself. We look for an ipv4 address before ipv6.
  316. func (ds *dockerService) getIPs(podSandboxID string, sandbox *dockertypes.ContainerJSON) []string {
  317. if sandbox.NetworkSettings == nil {
  318. return nil
  319. }
  320. if networkNamespaceMode(sandbox) == runtimeapi.NamespaceMode_NODE {
  321. // For sandboxes using host network, the shim is not responsible for
  322. // reporting the IP.
  323. return nil
  324. }
  325. // Don't bother getting IP if the pod is known and networking isn't ready
  326. ready, ok := ds.getNetworkReady(podSandboxID)
  327. if ok && !ready {
  328. return nil
  329. }
  330. ips, err := ds.getIPsFromPlugin(sandbox)
  331. if err == nil {
  332. return ips
  333. }
  334. ips = make([]string, 0)
  335. // TODO: trusting the docker ip is not a great idea. However docker uses
  336. // eth0 by default and so does CNI, so if we find a docker IP here, we
  337. // conclude that the plugin must have failed setup, or forgotten its ip.
  338. // This is not a sensible assumption for plugins across the board, but if
  339. // a plugin doesn't want this behavior, it can throw an error.
  340. if sandbox.NetworkSettings.IPAddress != "" {
  341. ips = append(ips, sandbox.NetworkSettings.IPAddress)
  342. }
  343. if sandbox.NetworkSettings.GlobalIPv6Address != "" {
  344. ips = append(ips, sandbox.NetworkSettings.GlobalIPv6Address)
  345. }
  346. // If all else fails, warn but don't return an error, as pod status
  347. // should generally not return anything except fatal errors
  348. // FIXME: handle network errors by restarting the pod somehow?
  349. klog.Warningf("failed to read pod IP from plugin/docker: %v", err)
  350. return ips
  351. }
  352. // Returns the inspect container response, the sandbox metadata, and network namespace mode
  353. func (ds *dockerService) getPodSandboxDetails(podSandboxID string) (*dockertypes.ContainerJSON, *runtimeapi.PodSandboxMetadata, error) {
  354. resp, err := ds.client.InspectContainer(podSandboxID)
  355. if err != nil {
  356. return nil, nil, err
  357. }
  358. metadata, err := parseSandboxName(resp.Name)
  359. if err != nil {
  360. return nil, nil, err
  361. }
  362. return resp, metadata, nil
  363. }
  364. // PodSandboxStatus returns the status of the PodSandbox.
  365. func (ds *dockerService) PodSandboxStatus(ctx context.Context, req *runtimeapi.PodSandboxStatusRequest) (*runtimeapi.PodSandboxStatusResponse, error) {
  366. podSandboxID := req.PodSandboxId
  367. r, metadata, err := ds.getPodSandboxDetails(podSandboxID)
  368. if err != nil {
  369. return nil, err
  370. }
  371. // Parse the timestamps.
  372. createdAt, _, _, err := getContainerTimestamps(r)
  373. if err != nil {
  374. return nil, fmt.Errorf("failed to parse timestamp for container %q: %v", podSandboxID, err)
  375. }
  376. ct := createdAt.UnixNano()
  377. // Translate container to sandbox state.
  378. state := runtimeapi.PodSandboxState_SANDBOX_NOTREADY
  379. if r.State.Running {
  380. state = runtimeapi.PodSandboxState_SANDBOX_READY
  381. }
  382. var ips []string
  383. // TODO: Remove this when sandbox is available on windows
  384. // This is a workaround for windows, where sandbox is not in use, and pod IP is determined through containers belonging to the Pod.
  385. if ips = ds.determinePodIPBySandboxID(podSandboxID); len(ips) == 0 {
  386. ips = ds.getIPs(podSandboxID, r)
  387. }
  388. // ip is primary ips
  389. // ips is all other ips
  390. ip := ""
  391. if len(ips) != 0 {
  392. ip = ips[0]
  393. ips = ips[1:]
  394. }
  395. labels, annotations := extractLabels(r.Config.Labels)
  396. status := &runtimeapi.PodSandboxStatus{
  397. Id: r.ID,
  398. State: state,
  399. CreatedAt: ct,
  400. Metadata: metadata,
  401. Labels: labels,
  402. Annotations: annotations,
  403. Network: &runtimeapi.PodSandboxNetworkStatus{
  404. Ip: ip,
  405. },
  406. Linux: &runtimeapi.LinuxPodSandboxStatus{
  407. Namespaces: &runtimeapi.Namespace{
  408. Options: &runtimeapi.NamespaceOption{
  409. Network: networkNamespaceMode(r),
  410. Pid: pidNamespaceMode(r),
  411. Ipc: ipcNamespaceMode(r),
  412. },
  413. },
  414. },
  415. }
  416. // add additional IPs
  417. additionalPodIPs := make([]*runtimeapi.PodIP, 0, len(ips))
  418. for _, ip := range ips {
  419. additionalPodIPs = append(additionalPodIPs, &runtimeapi.PodIP{
  420. Ip: ip,
  421. })
  422. }
  423. status.Network.AdditionalIps = additionalPodIPs
  424. return &runtimeapi.PodSandboxStatusResponse{Status: status}, nil
  425. }
  426. // ListPodSandbox returns a list of Sandbox.
  427. func (ds *dockerService) ListPodSandbox(_ context.Context, r *runtimeapi.ListPodSandboxRequest) (*runtimeapi.ListPodSandboxResponse, error) {
  428. filter := r.GetFilter()
  429. // By default, list all containers whether they are running or not.
  430. opts := dockertypes.ContainerListOptions{All: true}
  431. filterOutReadySandboxes := false
  432. opts.Filters = dockerfilters.NewArgs()
  433. f := newDockerFilter(&opts.Filters)
  434. // Add filter to select only sandbox containers.
  435. f.AddLabel(containerTypeLabelKey, containerTypeLabelSandbox)
  436. if filter != nil {
  437. if filter.Id != "" {
  438. f.Add("id", filter.Id)
  439. }
  440. if filter.State != nil {
  441. if filter.GetState().State == runtimeapi.PodSandboxState_SANDBOX_READY {
  442. // Only list running containers.
  443. opts.All = false
  444. } else {
  445. // runtimeapi.PodSandboxState_SANDBOX_NOTREADY can mean the
  446. // container is in any of the non-running state (e.g., created,
  447. // exited). We can't tell docker to filter out running
  448. // containers directly, so we'll need to filter them out
  449. // ourselves after getting the results.
  450. filterOutReadySandboxes = true
  451. }
  452. }
  453. if filter.LabelSelector != nil {
  454. for k, v := range filter.LabelSelector {
  455. f.AddLabel(k, v)
  456. }
  457. }
  458. }
  459. // Make sure we get the list of checkpoints first so that we don't include
  460. // new PodSandboxes that are being created right now.
  461. var err error
  462. checkpoints := []string{}
  463. if filter == nil {
  464. checkpoints, err = ds.checkpointManager.ListCheckpoints()
  465. if err != nil {
  466. klog.Errorf("Failed to list checkpoints: %v", err)
  467. }
  468. }
  469. containers, err := ds.client.ListContainers(opts)
  470. if err != nil {
  471. return nil, err
  472. }
  473. // Convert docker containers to runtime api sandboxes.
  474. result := []*runtimeapi.PodSandbox{}
  475. // using map as set
  476. sandboxIDs := make(map[string]bool)
  477. for i := range containers {
  478. c := containers[i]
  479. converted, err := containerToRuntimeAPISandbox(&c)
  480. if err != nil {
  481. klog.V(4).Infof("Unable to convert docker to runtime API sandbox %+v: %v", c, err)
  482. continue
  483. }
  484. if filterOutReadySandboxes && converted.State == runtimeapi.PodSandboxState_SANDBOX_READY {
  485. continue
  486. }
  487. sandboxIDs[converted.Id] = true
  488. result = append(result, converted)
  489. }
  490. // Include sandbox that could only be found with its checkpoint if no filter is applied
  491. // These PodSandbox will only include PodSandboxID, Name, Namespace.
  492. // These PodSandbox will be in PodSandboxState_SANDBOX_NOTREADY state.
  493. for _, id := range checkpoints {
  494. if _, ok := sandboxIDs[id]; ok {
  495. continue
  496. }
  497. checkpoint := NewPodSandboxCheckpoint("", "", &CheckpointData{})
  498. err := ds.checkpointManager.GetCheckpoint(id, checkpoint)
  499. if err != nil {
  500. klog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
  501. if err == errors.ErrCorruptCheckpoint {
  502. err = ds.checkpointManager.RemoveCheckpoint(id)
  503. if err != nil {
  504. klog.Errorf("Failed to delete corrupt checkpoint for sandbox %q: %v", id, err)
  505. }
  506. }
  507. continue
  508. }
  509. result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
  510. }
  511. return &runtimeapi.ListPodSandboxResponse{Items: result}, nil
  512. }
  513. // applySandboxLinuxOptions applies LinuxPodSandboxConfig to dockercontainer.HostConfig and dockercontainer.ContainerCreateConfig.
  514. func (ds *dockerService) applySandboxLinuxOptions(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig, createConfig *dockertypes.ContainerCreateConfig, image string, separator rune) error {
  515. if lc == nil {
  516. return nil
  517. }
  518. // Apply security context.
  519. if err := applySandboxSecurityContext(lc, createConfig.Config, hc, ds.network, separator); err != nil {
  520. return err
  521. }
  522. // Set sysctls.
  523. hc.Sysctls = lc.Sysctls
  524. return nil
  525. }
  526. func (ds *dockerService) applySandboxResources(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig) error {
  527. hc.Resources = dockercontainer.Resources{
  528. MemorySwap: DefaultMemorySwap(),
  529. CPUShares: defaultSandboxCPUshares,
  530. // Use docker's default cpu quota/period.
  531. }
  532. if lc != nil {
  533. // Apply Cgroup options.
  534. cgroupParent, err := ds.GenerateExpectedCgroupParent(lc.CgroupParent)
  535. if err != nil {
  536. return err
  537. }
  538. hc.CgroupParent = cgroupParent
  539. }
  540. return nil
  541. }
  542. // makeSandboxDockerConfig returns dockertypes.ContainerCreateConfig based on runtimeapi.PodSandboxConfig.
  543. func (ds *dockerService) makeSandboxDockerConfig(c *runtimeapi.PodSandboxConfig, image string) (*dockertypes.ContainerCreateConfig, error) {
  544. // Merge annotations and labels because docker supports only labels.
  545. labels := makeLabels(c.GetLabels(), c.GetAnnotations())
  546. // Apply a label to distinguish sandboxes from regular containers.
  547. labels[containerTypeLabelKey] = containerTypeLabelSandbox
  548. // Apply a container name label for infra container. This is used in summary v1.
  549. // TODO(random-liu): Deprecate this label once container metrics is directly got from CRI.
  550. labels[types.KubernetesContainerNameLabel] = sandboxContainerName
  551. hc := &dockercontainer.HostConfig{
  552. IpcMode: dockercontainer.IpcMode("shareable"),
  553. }
  554. createConfig := &dockertypes.ContainerCreateConfig{
  555. Name: makeSandboxName(c),
  556. Config: &dockercontainer.Config{
  557. Hostname: c.Hostname,
  558. // TODO: Handle environment variables.
  559. Image: image,
  560. Labels: labels,
  561. },
  562. HostConfig: hc,
  563. }
  564. // Apply linux-specific options.
  565. if err := ds.applySandboxLinuxOptions(hc, c.GetLinux(), createConfig, image, securityOptSeparator); err != nil {
  566. return nil, err
  567. }
  568. // Set port mappings.
  569. exposedPorts, portBindings := makePortsAndBindings(c.GetPortMappings())
  570. createConfig.Config.ExposedPorts = exposedPorts
  571. hc.PortBindings = portBindings
  572. hc.OomScoreAdj = defaultSandboxOOMAdj
  573. // Apply resource options.
  574. if err := ds.applySandboxResources(hc, c.GetLinux()); err != nil {
  575. return nil, err
  576. }
  577. // Set security options.
  578. securityOpts, err := ds.getSecurityOpts(c.GetLinux().GetSecurityContext().GetSeccompProfilePath(), securityOptSeparator)
  579. if err != nil {
  580. return nil, fmt.Errorf("failed to generate sandbox security options for sandbox %q: %v", c.Metadata.Name, err)
  581. }
  582. hc.SecurityOpt = append(hc.SecurityOpt, securityOpts...)
  583. applyExperimentalCreateConfig(createConfig, c.Annotations)
  584. return createConfig, nil
  585. }
  586. // networkNamespaceMode returns the network runtimeapi.NamespaceMode for this container.
  587. // Supports: POD, NODE
  588. func networkNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
  589. if container != nil && container.HostConfig != nil && string(container.HostConfig.NetworkMode) == namespaceModeHost {
  590. return runtimeapi.NamespaceMode_NODE
  591. }
  592. return runtimeapi.NamespaceMode_POD
  593. }
  594. // pidNamespaceMode returns the PID runtimeapi.NamespaceMode for this container.
  595. // Supports: CONTAINER, NODE
  596. // TODO(verb): add support for POD PID namespace sharing
  597. func pidNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
  598. if container != nil && container.HostConfig != nil && string(container.HostConfig.PidMode) == namespaceModeHost {
  599. return runtimeapi.NamespaceMode_NODE
  600. }
  601. return runtimeapi.NamespaceMode_CONTAINER
  602. }
  603. // ipcNamespaceMode returns the IPC runtimeapi.NamespaceMode for this container.
  604. // Supports: POD, NODE
  605. func ipcNamespaceMode(container *dockertypes.ContainerJSON) runtimeapi.NamespaceMode {
  606. if container != nil && container.HostConfig != nil && string(container.HostConfig.IpcMode) == namespaceModeHost {
  607. return runtimeapi.NamespaceMode_NODE
  608. }
  609. return runtimeapi.NamespaceMode_POD
  610. }
  611. func constructPodSandboxCheckpoint(config *runtimeapi.PodSandboxConfig) checkpointmanager.Checkpoint {
  612. data := CheckpointData{}
  613. for _, pm := range config.GetPortMappings() {
  614. proto := toCheckpointProtocol(pm.Protocol)
  615. data.PortMappings = append(data.PortMappings, &PortMapping{
  616. HostPort: &pm.HostPort,
  617. ContainerPort: &pm.ContainerPort,
  618. Protocol: &proto,
  619. HostIP: pm.HostIp,
  620. })
  621. }
  622. if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtimeapi.NamespaceMode_NODE {
  623. data.HostNetwork = true
  624. }
  625. return NewPodSandboxCheckpoint(config.Metadata.Namespace, config.Metadata.Name, &data)
  626. }
  627. func toCheckpointProtocol(protocol runtimeapi.Protocol) Protocol {
  628. switch protocol {
  629. case runtimeapi.Protocol_TCP:
  630. return protocolTCP
  631. case runtimeapi.Protocol_UDP:
  632. return protocolUDP
  633. case runtimeapi.Protocol_SCTP:
  634. return protocolSCTP
  635. }
  636. klog.Warningf("Unknown protocol %q: defaulting to TCP", protocol)
  637. return protocolTCP
  638. }
  639. // rewriteResolvFile rewrites resolv.conf file generated by docker.
  640. func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error {
  641. if len(resolvFilePath) == 0 {
  642. klog.Errorf("ResolvConfPath is empty.")
  643. return nil
  644. }
  645. if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
  646. return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
  647. }
  648. var resolvFileContent []string
  649. for _, srv := range dns {
  650. resolvFileContent = append(resolvFileContent, "nameserver "+srv)
  651. }
  652. if len(dnsSearch) > 0 {
  653. resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " "))
  654. }
  655. if len(dnsOptions) > 0 {
  656. resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " "))
  657. }
  658. if len(resolvFileContent) > 0 {
  659. resolvFileContentStr := strings.Join(resolvFileContent, "\n")
  660. resolvFileContentStr += "\n"
  661. klog.V(4).Infof("Will attempt to re-write config file %s with: \n%s", resolvFilePath, resolvFileContent)
  662. if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil {
  663. klog.Errorf("resolv.conf could not be updated: %v", err)
  664. return err
  665. }
  666. }
  667. return nil
  668. }
  669. func rewriteFile(filePath, stringToWrite string) error {
  670. f, err := os.OpenFile(filePath, os.O_TRUNC|os.O_WRONLY, 0644)
  671. if err != nil {
  672. return err
  673. }
  674. defer f.Close()
  675. _, err = f.WriteString(stringToWrite)
  676. return err
  677. }