container_linux.go 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898
  1. // +build linux
  2. package libcontainer
  3. import (
  4. "bytes"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "net"
  11. "os"
  12. "os/exec"
  13. "path/filepath"
  14. "reflect"
  15. "strings"
  16. "sync"
  17. "syscall" // only for SysProcAttr and Signal
  18. "time"
  19. "github.com/opencontainers/runc/libcontainer/cgroups"
  20. "github.com/opencontainers/runc/libcontainer/configs"
  21. "github.com/opencontainers/runc/libcontainer/criurpc"
  22. "github.com/opencontainers/runc/libcontainer/intelrdt"
  23. "github.com/opencontainers/runc/libcontainer/system"
  24. "github.com/opencontainers/runc/libcontainer/utils"
  25. "github.com/golang/protobuf/proto"
  26. "github.com/sirupsen/logrus"
  27. "github.com/vishvananda/netlink/nl"
  28. "golang.org/x/sys/unix"
  29. )
  30. const stdioFdCount = 3
  31. type linuxContainer struct {
  32. id string
  33. root string
  34. config *configs.Config
  35. cgroupManager cgroups.Manager
  36. intelRdtManager intelrdt.Manager
  37. initPath string
  38. initArgs []string
  39. initProcess parentProcess
  40. initProcessStartTime uint64
  41. criuPath string
  42. newuidmapPath string
  43. newgidmapPath string
  44. m sync.Mutex
  45. criuVersion int
  46. state containerState
  47. created time.Time
  48. }
  49. // State represents a running container's state
  50. type State struct {
  51. BaseState
  52. // Platform specific fields below here
  53. // Specified if the container was started under the rootless mode.
  54. // Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
  55. Rootless bool `json:"rootless"`
  56. // Path to all the cgroups setup for a container. Key is cgroup subsystem name
  57. // with the value as the path.
  58. CgroupPaths map[string]string `json:"cgroup_paths"`
  59. // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
  60. // with the value as the path.
  61. NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"`
  62. // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
  63. ExternalDescriptors []string `json:"external_descriptors,omitempty"`
  64. // Intel RDT "resource control" filesystem path
  65. IntelRdtPath string `json:"intel_rdt_path"`
  66. }
  67. // Container is a libcontainer container object.
  68. //
  69. // Each container is thread-safe within the same process. Since a container can
  70. // be destroyed by a separate process, any function may return that the container
  71. // was not found.
  72. type Container interface {
  73. BaseContainer
  74. // Methods below here are platform specific
  75. // Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
  76. //
  77. // errors:
  78. // Systemerror - System error.
  79. Checkpoint(criuOpts *CriuOpts) error
  80. // Restore restores the checkpointed container to a running state using the criu(8) utility.
  81. //
  82. // errors:
  83. // Systemerror - System error.
  84. Restore(process *Process, criuOpts *CriuOpts) error
  85. // If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses
  86. // the execution of any user processes. Asynchronously, when the container finished being paused the
  87. // state is changed to PAUSED.
  88. // If the Container state is PAUSED, do nothing.
  89. //
  90. // errors:
  91. // ContainerNotExists - Container no longer exists,
  92. // ContainerNotRunning - Container not running or created,
  93. // Systemerror - System error.
  94. Pause() error
  95. // If the Container state is PAUSED, resumes the execution of any user processes in the
  96. // Container before setting the Container state to RUNNING.
  97. // If the Container state is RUNNING, do nothing.
  98. //
  99. // errors:
  100. // ContainerNotExists - Container no longer exists,
  101. // ContainerNotPaused - Container is not paused,
  102. // Systemerror - System error.
  103. Resume() error
  104. // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
  105. //
  106. // errors:
  107. // Systemerror - System error.
  108. NotifyOOM() (<-chan struct{}, error)
  109. // NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level
  110. //
  111. // errors:
  112. // Systemerror - System error.
  113. NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error)
  114. }
  115. // ID returns the container's unique ID
  116. func (c *linuxContainer) ID() string {
  117. return c.id
  118. }
  119. // Config returns the container's configuration
  120. func (c *linuxContainer) Config() configs.Config {
  121. return *c.config
  122. }
  123. func (c *linuxContainer) Status() (Status, error) {
  124. c.m.Lock()
  125. defer c.m.Unlock()
  126. return c.currentStatus()
  127. }
  128. func (c *linuxContainer) State() (*State, error) {
  129. c.m.Lock()
  130. defer c.m.Unlock()
  131. return c.currentState()
  132. }
  133. func (c *linuxContainer) Processes() ([]int, error) {
  134. pids, err := c.cgroupManager.GetAllPids()
  135. if err != nil {
  136. return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
  137. }
  138. return pids, nil
  139. }
  140. func (c *linuxContainer) Stats() (*Stats, error) {
  141. var (
  142. err error
  143. stats = &Stats{}
  144. )
  145. if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
  146. return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
  147. }
  148. if c.intelRdtManager != nil {
  149. if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
  150. return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
  151. }
  152. }
  153. for _, iface := range c.config.Networks {
  154. switch iface.Type {
  155. case "veth":
  156. istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
  157. if err != nil {
  158. return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
  159. }
  160. stats.Interfaces = append(stats.Interfaces, istats)
  161. }
  162. }
  163. return stats, nil
  164. }
  165. func (c *linuxContainer) Set(config configs.Config) error {
  166. c.m.Lock()
  167. defer c.m.Unlock()
  168. status, err := c.currentStatus()
  169. if err != nil {
  170. return err
  171. }
  172. if status == Stopped {
  173. return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
  174. }
  175. if err := c.cgroupManager.Set(&config); err != nil {
  176. // Set configs back
  177. if err2 := c.cgroupManager.Set(c.config); err2 != nil {
  178. logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
  179. }
  180. return err
  181. }
  182. if c.intelRdtManager != nil {
  183. if err := c.intelRdtManager.Set(&config); err != nil {
  184. // Set configs back
  185. if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
  186. logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
  187. }
  188. return err
  189. }
  190. }
  191. // After config setting succeed, update config and states
  192. c.config = &config
  193. _, err = c.updateState(nil)
  194. return err
  195. }
  196. func (c *linuxContainer) Start(process *Process) error {
  197. c.m.Lock()
  198. defer c.m.Unlock()
  199. if process.Init {
  200. if err := c.createExecFifo(); err != nil {
  201. return err
  202. }
  203. }
  204. if err := c.start(process); err != nil {
  205. if process.Init {
  206. c.deleteExecFifo()
  207. }
  208. return err
  209. }
  210. return nil
  211. }
  212. func (c *linuxContainer) Run(process *Process) error {
  213. if err := c.Start(process); err != nil {
  214. return err
  215. }
  216. if process.Init {
  217. return c.exec()
  218. }
  219. return nil
  220. }
  221. func (c *linuxContainer) Exec() error {
  222. c.m.Lock()
  223. defer c.m.Unlock()
  224. return c.exec()
  225. }
  226. func (c *linuxContainer) exec() error {
  227. path := filepath.Join(c.root, execFifoFilename)
  228. fifoOpen := make(chan struct{})
  229. select {
  230. case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
  231. return errors.New("container process is already dead")
  232. case result := <-awaitFifoOpen(path):
  233. close(fifoOpen)
  234. if result.err != nil {
  235. return result.err
  236. }
  237. f := result.file
  238. defer f.Close()
  239. if err := readFromExecFifo(f); err != nil {
  240. return err
  241. }
  242. return os.Remove(path)
  243. }
  244. }
  245. func readFromExecFifo(execFifo io.Reader) error {
  246. data, err := ioutil.ReadAll(execFifo)
  247. if err != nil {
  248. return err
  249. }
  250. if len(data) <= 0 {
  251. return fmt.Errorf("cannot start an already running container")
  252. }
  253. return nil
  254. }
  255. func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
  256. isDead := make(chan struct{})
  257. go func() {
  258. for {
  259. select {
  260. case <-exit:
  261. return
  262. case <-time.After(time.Millisecond * 100):
  263. stat, err := system.Stat(pid)
  264. if err != nil || stat.State == system.Zombie {
  265. close(isDead)
  266. return
  267. }
  268. }
  269. }
  270. }()
  271. return isDead
  272. }
  273. func awaitFifoOpen(path string) <-chan openResult {
  274. fifoOpened := make(chan openResult)
  275. go func() {
  276. f, err := os.OpenFile(path, os.O_RDONLY, 0)
  277. if err != nil {
  278. fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
  279. return
  280. }
  281. fifoOpened <- openResult{file: f}
  282. }()
  283. return fifoOpened
  284. }
  285. type openResult struct {
  286. file *os.File
  287. err error
  288. }
  289. func (c *linuxContainer) start(process *Process) error {
  290. parent, err := c.newParentProcess(process)
  291. if err != nil {
  292. return newSystemErrorWithCause(err, "creating new parent process")
  293. }
  294. if err := parent.start(); err != nil {
  295. // terminate the process to ensure that it properly is reaped.
  296. if err := ignoreTerminateErrors(parent.terminate()); err != nil {
  297. logrus.Warn(err)
  298. }
  299. return newSystemErrorWithCause(err, "starting container process")
  300. }
  301. // generate a timestamp indicating when the container was started
  302. c.created = time.Now().UTC()
  303. if process.Init {
  304. c.state = &createdState{
  305. c: c,
  306. }
  307. state, err := c.updateState(parent)
  308. if err != nil {
  309. return err
  310. }
  311. c.initProcessStartTime = state.InitProcessStartTime
  312. if c.config.Hooks != nil {
  313. bundle, annotations := utils.Annotations(c.config.Labels)
  314. s := configs.HookState{
  315. Version: c.config.Version,
  316. ID: c.id,
  317. Pid: parent.pid(),
  318. Bundle: bundle,
  319. Annotations: annotations,
  320. }
  321. for i, hook := range c.config.Hooks.Poststart {
  322. if err := hook.Run(s); err != nil {
  323. if err := ignoreTerminateErrors(parent.terminate()); err != nil {
  324. logrus.Warn(err)
  325. }
  326. return newSystemErrorWithCausef(err, "running poststart hook %d", i)
  327. }
  328. }
  329. }
  330. }
  331. return nil
  332. }
  333. func (c *linuxContainer) Signal(s os.Signal, all bool) error {
  334. if all {
  335. return signalAllProcesses(c.cgroupManager, s)
  336. }
  337. if err := c.initProcess.signal(s); err != nil {
  338. return newSystemErrorWithCause(err, "signaling init process")
  339. }
  340. return nil
  341. }
  342. func (c *linuxContainer) createExecFifo() error {
  343. rootuid, err := c.Config().HostRootUID()
  344. if err != nil {
  345. return err
  346. }
  347. rootgid, err := c.Config().HostRootGID()
  348. if err != nil {
  349. return err
  350. }
  351. fifoName := filepath.Join(c.root, execFifoFilename)
  352. if _, err := os.Stat(fifoName); err == nil {
  353. return fmt.Errorf("exec fifo %s already exists", fifoName)
  354. }
  355. oldMask := unix.Umask(0000)
  356. if err := unix.Mkfifo(fifoName, 0622); err != nil {
  357. unix.Umask(oldMask)
  358. return err
  359. }
  360. unix.Umask(oldMask)
  361. return os.Chown(fifoName, rootuid, rootgid)
  362. }
  363. func (c *linuxContainer) deleteExecFifo() {
  364. fifoName := filepath.Join(c.root, execFifoFilename)
  365. os.Remove(fifoName)
  366. }
  367. // includeExecFifo opens the container's execfifo as a pathfd, so that the
  368. // container cannot access the statedir (and the FIFO itself remains
  369. // un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
  370. // fd, with _LIBCONTAINER_FIFOFD set to its fd number.
  371. func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
  372. fifoName := filepath.Join(c.root, execFifoFilename)
  373. fifoFd, err := unix.Open(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
  374. if err != nil {
  375. return err
  376. }
  377. cmd.ExtraFiles = append(cmd.ExtraFiles, os.NewFile(uintptr(fifoFd), fifoName))
  378. cmd.Env = append(cmd.Env,
  379. fmt.Sprintf("_LIBCONTAINER_FIFOFD=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
  380. return nil
  381. }
  382. func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
  383. parentPipe, childPipe, err := utils.NewSockPair("init")
  384. if err != nil {
  385. return nil, newSystemErrorWithCause(err, "creating new init pipe")
  386. }
  387. cmd, err := c.commandTemplate(p, childPipe)
  388. if err != nil {
  389. return nil, newSystemErrorWithCause(err, "creating new command template")
  390. }
  391. if !p.Init {
  392. return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
  393. }
  394. // We only set up fifoFd if we're not doing a `runc exec`. The historic
  395. // reason for this is that previously we would pass a dirfd that allowed
  396. // for container rootfs escape (and not doing it in `runc exec` avoided
  397. // that problem), but we no longer do that. However, there's no need to do
  398. // this for `runc exec` so we just keep it this way to be safe.
  399. if err := c.includeExecFifo(cmd); err != nil {
  400. return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
  401. }
  402. return c.newInitProcess(p, cmd, parentPipe, childPipe)
  403. }
  404. func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
  405. cmd := exec.Command(c.initPath, c.initArgs[1:]...)
  406. cmd.Args[0] = c.initArgs[0]
  407. cmd.Stdin = p.Stdin
  408. cmd.Stdout = p.Stdout
  409. cmd.Stderr = p.Stderr
  410. cmd.Dir = c.config.Rootfs
  411. if cmd.SysProcAttr == nil {
  412. cmd.SysProcAttr = &syscall.SysProcAttr{}
  413. }
  414. cmd.Env = append(cmd.Env, fmt.Sprintf("GOMAXPROCS=%s", os.Getenv("GOMAXPROCS")))
  415. cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...)
  416. if p.ConsoleSocket != nil {
  417. cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket)
  418. cmd.Env = append(cmd.Env,
  419. fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
  420. )
  421. }
  422. cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
  423. cmd.Env = append(cmd.Env,
  424. fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
  425. )
  426. // NOTE: when running a container with no PID namespace and the parent process spawning the container is
  427. // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
  428. // even with the parent still running.
  429. if c.config.ParentDeathSignal > 0 {
  430. cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
  431. }
  432. return cmd, nil
  433. }
  434. func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
  435. cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
  436. nsMaps := make(map[configs.NamespaceType]string)
  437. for _, ns := range c.config.Namespaces {
  438. if ns.Path != "" {
  439. nsMaps[ns.Type] = ns.Path
  440. }
  441. }
  442. _, sharePidns := nsMaps[configs.NEWPID]
  443. data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
  444. if err != nil {
  445. return nil, err
  446. }
  447. return &initProcess{
  448. cmd: cmd,
  449. childPipe: childPipe,
  450. parentPipe: parentPipe,
  451. manager: c.cgroupManager,
  452. intelRdtManager: c.intelRdtManager,
  453. config: c.newInitConfig(p),
  454. container: c,
  455. process: p,
  456. bootstrapData: data,
  457. sharePidns: sharePidns,
  458. }, nil
  459. }
  460. func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
  461. cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
  462. state, err := c.currentState()
  463. if err != nil {
  464. return nil, newSystemErrorWithCause(err, "getting container's current state")
  465. }
  466. // for setns process, we don't have to set cloneflags as the process namespaces
  467. // will only be set via setns syscall
  468. data, err := c.bootstrapData(0, state.NamespacePaths)
  469. if err != nil {
  470. return nil, err
  471. }
  472. return &setnsProcess{
  473. cmd: cmd,
  474. cgroupPaths: c.cgroupManager.GetPaths(),
  475. rootlessCgroups: c.config.RootlessCgroups,
  476. intelRdtPath: state.IntelRdtPath,
  477. childPipe: childPipe,
  478. parentPipe: parentPipe,
  479. config: c.newInitConfig(p),
  480. process: p,
  481. bootstrapData: data,
  482. }, nil
  483. }
  484. func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
  485. cfg := &initConfig{
  486. Config: c.config,
  487. Args: process.Args,
  488. Env: process.Env,
  489. User: process.User,
  490. AdditionalGroups: process.AdditionalGroups,
  491. Cwd: process.Cwd,
  492. Capabilities: process.Capabilities,
  493. PassedFilesCount: len(process.ExtraFiles),
  494. ContainerId: c.ID(),
  495. NoNewPrivileges: c.config.NoNewPrivileges,
  496. RootlessEUID: c.config.RootlessEUID,
  497. RootlessCgroups: c.config.RootlessCgroups,
  498. AppArmorProfile: c.config.AppArmorProfile,
  499. ProcessLabel: c.config.ProcessLabel,
  500. Rlimits: c.config.Rlimits,
  501. }
  502. if process.NoNewPrivileges != nil {
  503. cfg.NoNewPrivileges = *process.NoNewPrivileges
  504. }
  505. if process.AppArmorProfile != "" {
  506. cfg.AppArmorProfile = process.AppArmorProfile
  507. }
  508. if process.Label != "" {
  509. cfg.ProcessLabel = process.Label
  510. }
  511. if len(process.Rlimits) > 0 {
  512. cfg.Rlimits = process.Rlimits
  513. }
  514. cfg.CreateConsole = process.ConsoleSocket != nil
  515. cfg.ConsoleWidth = process.ConsoleWidth
  516. cfg.ConsoleHeight = process.ConsoleHeight
  517. return cfg
  518. }
  519. func (c *linuxContainer) Destroy() error {
  520. c.m.Lock()
  521. defer c.m.Unlock()
  522. return c.state.destroy()
  523. }
  524. func (c *linuxContainer) Pause() error {
  525. c.m.Lock()
  526. defer c.m.Unlock()
  527. status, err := c.currentStatus()
  528. if err != nil {
  529. return err
  530. }
  531. switch status {
  532. case Running, Created:
  533. if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
  534. return err
  535. }
  536. return c.state.transition(&pausedState{
  537. c: c,
  538. })
  539. }
  540. return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning)
  541. }
  542. func (c *linuxContainer) Resume() error {
  543. c.m.Lock()
  544. defer c.m.Unlock()
  545. status, err := c.currentStatus()
  546. if err != nil {
  547. return err
  548. }
  549. if status != Paused {
  550. return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused)
  551. }
  552. if err := c.cgroupManager.Freeze(configs.Thawed); err != nil {
  553. return err
  554. }
  555. return c.state.transition(&runningState{
  556. c: c,
  557. })
  558. }
  559. func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
  560. // XXX(cyphar): This requires cgroups.
  561. if c.config.RootlessCgroups {
  562. logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
  563. }
  564. return notifyOnOOM(c.cgroupManager.GetPaths())
  565. }
  566. func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
  567. // XXX(cyphar): This requires cgroups.
  568. if c.config.RootlessCgroups {
  569. logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
  570. }
  571. return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
  572. }
  573. var criuFeatures *criurpc.CriuFeatures
  574. func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error {
  575. var t criurpc.CriuReqType
  576. t = criurpc.CriuReqType_FEATURE_CHECK
  577. // criu 1.8 => 10800
  578. if err := c.checkCriuVersion(10800); err != nil {
  579. // Feature checking was introduced with CRIU 1.8.
  580. // Ignore the feature check if an older CRIU version is used
  581. // and just act as before.
  582. // As all automated PR testing is done using CRIU 1.7 this
  583. // code will not be tested by automated PR testing.
  584. return nil
  585. }
  586. // make sure the features we are looking for are really not from
  587. // some previous check
  588. criuFeatures = nil
  589. req := &criurpc.CriuReq{
  590. Type: &t,
  591. // Theoretically this should not be necessary but CRIU
  592. // segfaults if Opts is empty.
  593. // Fixed in CRIU 2.12
  594. Opts: rpcOpts,
  595. Features: criuFeat,
  596. }
  597. err := c.criuSwrk(nil, req, criuOpts, false, nil)
  598. if err != nil {
  599. logrus.Debugf("%s", err)
  600. return fmt.Errorf("CRIU feature check failed")
  601. }
  602. logrus.Debugf("Feature check says: %s", criuFeatures)
  603. missingFeatures := false
  604. // The outer if checks if the fields actually exist
  605. if (criuFeat.MemTrack != nil) &&
  606. (criuFeatures.MemTrack != nil) {
  607. // The inner if checks if they are set to true
  608. if *criuFeat.MemTrack && !*criuFeatures.MemTrack {
  609. missingFeatures = true
  610. logrus.Debugf("CRIU does not support MemTrack")
  611. }
  612. }
  613. // This needs to be repeated for every new feature check.
  614. // Is there a way to put this in a function. Reflection?
  615. if (criuFeat.LazyPages != nil) &&
  616. (criuFeatures.LazyPages != nil) {
  617. if *criuFeat.LazyPages && !*criuFeatures.LazyPages {
  618. missingFeatures = true
  619. logrus.Debugf("CRIU does not support LazyPages")
  620. }
  621. }
  622. if missingFeatures {
  623. return fmt.Errorf("CRIU is missing features")
  624. }
  625. return nil
  626. }
  627. func parseCriuVersion(path string) (int, error) {
  628. var x, y, z int
  629. out, err := exec.Command(path, "-V").Output()
  630. if err != nil {
  631. return 0, fmt.Errorf("Unable to execute CRIU command: %s", path)
  632. }
  633. x = 0
  634. y = 0
  635. z = 0
  636. if ep := strings.Index(string(out), "-"); ep >= 0 {
  637. // criu Git version format
  638. var version string
  639. if sp := strings.Index(string(out), "GitID"); sp > 0 {
  640. version = string(out)[sp:ep]
  641. } else {
  642. return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path)
  643. }
  644. n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
  645. if err != nil {
  646. n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6
  647. y++
  648. } else {
  649. z++
  650. }
  651. if n < 2 || err != nil {
  652. return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err)
  653. }
  654. } else {
  655. // criu release version format
  656. n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
  657. if err != nil {
  658. n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
  659. }
  660. if n < 2 || err != nil {
  661. return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
  662. }
  663. }
  664. return x*10000 + y*100 + z, nil
  665. }
  666. func compareCriuVersion(criuVersion int, minVersion int) error {
  667. // simple function to perform the actual version compare
  668. if criuVersion < minVersion {
  669. return fmt.Errorf("CRIU version %d must be %d or higher", criuVersion, minVersion)
  670. }
  671. return nil
  672. }
  673. // This is used to store the result of criu version RPC
  674. var criuVersionRPC *criurpc.CriuVersion
  675. // checkCriuVersion checks Criu version greater than or equal to minVersion
  676. func (c *linuxContainer) checkCriuVersion(minVersion int) error {
  677. // If the version of criu has already been determined there is no need
  678. // to ask criu for the version again. Use the value from c.criuVersion.
  679. if c.criuVersion != 0 {
  680. return compareCriuVersion(c.criuVersion, minVersion)
  681. }
  682. // First try if this version of CRIU support the version RPC.
  683. // The CRIU version RPC was introduced with CRIU 3.0.
  684. // First, reset the variable for the RPC answer to nil
  685. criuVersionRPC = nil
  686. var t criurpc.CriuReqType
  687. t = criurpc.CriuReqType_VERSION
  688. req := &criurpc.CriuReq{
  689. Type: &t,
  690. }
  691. err := c.criuSwrk(nil, req, nil, false, nil)
  692. if err != nil {
  693. return fmt.Errorf("CRIU version check failed: %s", err)
  694. }
  695. if criuVersionRPC != nil {
  696. logrus.Debugf("CRIU version: %s", criuVersionRPC)
  697. // major and minor are always set
  698. c.criuVersion = int(*criuVersionRPC.Major) * 10000
  699. c.criuVersion += int(*criuVersionRPC.Minor) * 100
  700. if criuVersionRPC.Sublevel != nil {
  701. c.criuVersion += int(*criuVersionRPC.Sublevel)
  702. }
  703. if criuVersionRPC.Gitid != nil {
  704. // runc's convention is that a CRIU git release is
  705. // always the same as increasing the minor by 1
  706. c.criuVersion -= (c.criuVersion % 100)
  707. c.criuVersion += 100
  708. }
  709. return compareCriuVersion(c.criuVersion, minVersion)
  710. }
  711. // This is CRIU without the version RPC and therefore
  712. // older than 3.0. Parsing the output is required.
  713. // This can be remove once runc does not work with criu older than 3.0
  714. c.criuVersion, err = parseCriuVersion(c.criuPath)
  715. if err != nil {
  716. return err
  717. }
  718. return compareCriuVersion(c.criuVersion, minVersion)
  719. }
  720. const descriptorsFilename = "descriptors.json"
  721. func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
  722. mountDest := m.Destination
  723. if strings.HasPrefix(mountDest, c.config.Rootfs) {
  724. mountDest = mountDest[len(c.config.Rootfs):]
  725. }
  726. extMnt := &criurpc.ExtMountMap{
  727. Key: proto.String(mountDest),
  728. Val: proto.String(mountDest),
  729. }
  730. req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
  731. }
  732. func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
  733. for _, path := range c.config.MaskPaths {
  734. fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path))
  735. if err != nil {
  736. if os.IsNotExist(err) {
  737. continue
  738. }
  739. return err
  740. }
  741. if fi.IsDir() {
  742. continue
  743. }
  744. extMnt := &criurpc.ExtMountMap{
  745. Key: proto.String(path),
  746. Val: proto.String("/dev/null"),
  747. }
  748. req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
  749. }
  750. return nil
  751. }
  752. func waitForCriuLazyServer(r *os.File, status string) error {
  753. data := make([]byte, 1)
  754. _, err := r.Read(data)
  755. if err != nil {
  756. return err
  757. }
  758. fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
  759. if err != nil {
  760. return err
  761. }
  762. _, err = fd.Write(data)
  763. if err != nil {
  764. return err
  765. }
  766. fd.Close()
  767. return nil
  768. }
  769. func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
  770. c.m.Lock()
  771. defer c.m.Unlock()
  772. // Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS().
  773. // (CLI prints a warning)
  774. // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has
  775. // support for doing unprivileged dumps, but the setup of
  776. // rootless containers might make this complicated.
  777. // criu 1.5.2 => 10502
  778. if err := c.checkCriuVersion(10502); err != nil {
  779. return err
  780. }
  781. if criuOpts.ImagesDirectory == "" {
  782. return fmt.Errorf("invalid directory to save checkpoint")
  783. }
  784. // Since a container can be C/R'ed multiple times,
  785. // the checkpoint directory may already exist.
  786. if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
  787. return err
  788. }
  789. if criuOpts.WorkDirectory == "" {
  790. criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
  791. }
  792. if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
  793. return err
  794. }
  795. workDir, err := os.Open(criuOpts.WorkDirectory)
  796. if err != nil {
  797. return err
  798. }
  799. defer workDir.Close()
  800. imageDir, err := os.Open(criuOpts.ImagesDirectory)
  801. if err != nil {
  802. return err
  803. }
  804. defer imageDir.Close()
  805. rpcOpts := criurpc.CriuOpts{
  806. ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
  807. WorkDirFd: proto.Int32(int32(workDir.Fd())),
  808. LogLevel: proto.Int32(4),
  809. LogFile: proto.String("dump.log"),
  810. Root: proto.String(c.config.Rootfs),
  811. ManageCgroups: proto.Bool(true),
  812. NotifyScripts: proto.Bool(true),
  813. Pid: proto.Int32(int32(c.initProcess.pid())),
  814. ShellJob: proto.Bool(criuOpts.ShellJob),
  815. LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
  816. TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
  817. ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
  818. FileLocks: proto.Bool(criuOpts.FileLocks),
  819. EmptyNs: proto.Uint32(criuOpts.EmptyNs),
  820. OrphanPtsMaster: proto.Bool(true),
  821. AutoDedup: proto.Bool(criuOpts.AutoDedup),
  822. LazyPages: proto.Bool(criuOpts.LazyPages),
  823. }
  824. // If the container is running in a network namespace and has
  825. // a path to the network namespace configured, we will dump
  826. // that network namespace as an external namespace and we
  827. // will expect that the namespace exists during restore.
  828. // This basically means that CRIU will ignore the namespace
  829. // and expect to be setup correctly.
  830. nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
  831. if nsPath != "" {
  832. // For this to work we need at least criu 3.11.0 => 31100.
  833. // As there was already a successful version check we will
  834. // not error out if it fails. runc will just behave as it used
  835. // to do and ignore external network namespaces.
  836. err := c.checkCriuVersion(31100)
  837. if err == nil {
  838. // CRIU expects the information about an external namespace
  839. // like this: --external net[<inode>]:<key>
  840. // This <key> is always 'extRootNetNS'.
  841. var netns syscall.Stat_t
  842. err = syscall.Stat(nsPath, &netns)
  843. if err != nil {
  844. return err
  845. }
  846. criuExternal := fmt.Sprintf("net[%d]:extRootNetNS", netns.Ino)
  847. rpcOpts.External = append(rpcOpts.External, criuExternal)
  848. }
  849. }
  850. fcg := c.cgroupManager.GetPaths()["freezer"]
  851. if fcg != "" {
  852. rpcOpts.FreezeCgroup = proto.String(fcg)
  853. }
  854. // append optional criu opts, e.g., page-server and port
  855. if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 {
  856. rpcOpts.Ps = &criurpc.CriuPageServerInfo{
  857. Address: proto.String(criuOpts.PageServer.Address),
  858. Port: proto.Int32(criuOpts.PageServer.Port),
  859. }
  860. }
  861. //pre-dump may need parentImage param to complete iterative migration
  862. if criuOpts.ParentImage != "" {
  863. rpcOpts.ParentImg = proto.String(criuOpts.ParentImage)
  864. rpcOpts.TrackMem = proto.Bool(true)
  865. }
  866. // append optional manage cgroups mode
  867. if criuOpts.ManageCgroupsMode != 0 {
  868. // criu 1.7 => 10700
  869. if err := c.checkCriuVersion(10700); err != nil {
  870. return err
  871. }
  872. mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
  873. rpcOpts.ManageCgroupsMode = &mode
  874. }
  875. var t criurpc.CriuReqType
  876. if criuOpts.PreDump {
  877. feat := criurpc.CriuFeatures{
  878. MemTrack: proto.Bool(true),
  879. }
  880. if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
  881. return err
  882. }
  883. t = criurpc.CriuReqType_PRE_DUMP
  884. } else {
  885. t = criurpc.CriuReqType_DUMP
  886. }
  887. req := &criurpc.CriuReq{
  888. Type: &t,
  889. Opts: &rpcOpts,
  890. }
  891. if criuOpts.LazyPages {
  892. // lazy migration requested; check if criu supports it
  893. feat := criurpc.CriuFeatures{
  894. LazyPages: proto.Bool(true),
  895. }
  896. if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
  897. return err
  898. }
  899. statusRead, statusWrite, err := os.Pipe()
  900. if err != nil {
  901. return err
  902. }
  903. rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
  904. go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
  905. }
  906. //no need to dump these information in pre-dump
  907. if !criuOpts.PreDump {
  908. for _, m := range c.config.Mounts {
  909. switch m.Device {
  910. case "bind":
  911. c.addCriuDumpMount(req, m)
  912. case "cgroup":
  913. binds, err := getCgroupMounts(m)
  914. if err != nil {
  915. return err
  916. }
  917. for _, b := range binds {
  918. c.addCriuDumpMount(req, b)
  919. }
  920. }
  921. }
  922. if err := c.addMaskPaths(req); err != nil {
  923. return err
  924. }
  925. for _, node := range c.config.Devices {
  926. m := &configs.Mount{Destination: node.Path, Source: node.Path}
  927. c.addCriuDumpMount(req, m)
  928. }
  929. // Write the FD info to a file in the image directory
  930. fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
  931. if err != nil {
  932. return err
  933. }
  934. err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
  935. if err != nil {
  936. return err
  937. }
  938. }
  939. err = c.criuSwrk(nil, req, criuOpts, false, nil)
  940. if err != nil {
  941. return err
  942. }
  943. return nil
  944. }
  945. func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) {
  946. mountDest := m.Destination
  947. if strings.HasPrefix(mountDest, c.config.Rootfs) {
  948. mountDest = mountDest[len(c.config.Rootfs):]
  949. }
  950. extMnt := &criurpc.ExtMountMap{
  951. Key: proto.String(mountDest),
  952. Val: proto.String(m.Source),
  953. }
  954. req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
  955. }
  956. func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) {
  957. for _, iface := range c.config.Networks {
  958. switch iface.Type {
  959. case "veth":
  960. veth := new(criurpc.CriuVethPair)
  961. veth.IfOut = proto.String(iface.HostInterfaceName)
  962. veth.IfIn = proto.String(iface.Name)
  963. req.Opts.Veths = append(req.Opts.Veths, veth)
  964. case "loopback":
  965. // Do nothing
  966. }
  967. }
  968. for _, i := range criuOpts.VethPairs {
  969. veth := new(criurpc.CriuVethPair)
  970. veth.IfOut = proto.String(i.HostInterfaceName)
  971. veth.IfIn = proto.String(i.ContainerInterfaceName)
  972. req.Opts.Veths = append(req.Opts.Veths, veth)
  973. }
  974. }
  975. func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
  976. c.m.Lock()
  977. defer c.m.Unlock()
  978. var extraFiles []*os.File
  979. // Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS().
  980. // (CLI prints a warning)
  981. // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
  982. // support for unprivileged restore at the moment.
  983. // criu 1.5.2 => 10502
  984. if err := c.checkCriuVersion(10502); err != nil {
  985. return err
  986. }
  987. if criuOpts.WorkDirectory == "" {
  988. criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
  989. }
  990. // Since a container can be C/R'ed multiple times,
  991. // the work directory may already exist.
  992. if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
  993. return err
  994. }
  995. workDir, err := os.Open(criuOpts.WorkDirectory)
  996. if err != nil {
  997. return err
  998. }
  999. defer workDir.Close()
  1000. if criuOpts.ImagesDirectory == "" {
  1001. return fmt.Errorf("invalid directory to restore checkpoint")
  1002. }
  1003. imageDir, err := os.Open(criuOpts.ImagesDirectory)
  1004. if err != nil {
  1005. return err
  1006. }
  1007. defer imageDir.Close()
  1008. // CRIU has a few requirements for a root directory:
  1009. // * it must be a mount point
  1010. // * its parent must not be overmounted
  1011. // c.config.Rootfs is bind-mounted to a temporary directory
  1012. // to satisfy these requirements.
  1013. root := filepath.Join(c.root, "criu-root")
  1014. if err := os.Mkdir(root, 0755); err != nil {
  1015. return err
  1016. }
  1017. defer os.Remove(root)
  1018. root, err = filepath.EvalSymlinks(root)
  1019. if err != nil {
  1020. return err
  1021. }
  1022. err = unix.Mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "")
  1023. if err != nil {
  1024. return err
  1025. }
  1026. defer unix.Unmount(root, unix.MNT_DETACH)
  1027. t := criurpc.CriuReqType_RESTORE
  1028. req := &criurpc.CriuReq{
  1029. Type: &t,
  1030. Opts: &criurpc.CriuOpts{
  1031. ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
  1032. WorkDirFd: proto.Int32(int32(workDir.Fd())),
  1033. EvasiveDevices: proto.Bool(true),
  1034. LogLevel: proto.Int32(4),
  1035. LogFile: proto.String("restore.log"),
  1036. RstSibling: proto.Bool(true),
  1037. Root: proto.String(root),
  1038. ManageCgroups: proto.Bool(true),
  1039. NotifyScripts: proto.Bool(true),
  1040. ShellJob: proto.Bool(criuOpts.ShellJob),
  1041. ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
  1042. TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
  1043. FileLocks: proto.Bool(criuOpts.FileLocks),
  1044. EmptyNs: proto.Uint32(criuOpts.EmptyNs),
  1045. OrphanPtsMaster: proto.Bool(true),
  1046. AutoDedup: proto.Bool(criuOpts.AutoDedup),
  1047. LazyPages: proto.Bool(criuOpts.LazyPages),
  1048. },
  1049. }
  1050. // Same as during checkpointing. If the container has a specific network namespace
  1051. // assigned to it, this now expects that the checkpoint will be restored in a
  1052. // already created network namespace.
  1053. nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
  1054. if nsPath != "" {
  1055. // For this to work we need at least criu 3.11.0 => 31100.
  1056. // As there was already a successful version check we will
  1057. // not error out if it fails. runc will just behave as it used
  1058. // to do and ignore external network namespaces.
  1059. err := c.checkCriuVersion(31100)
  1060. if err == nil {
  1061. // CRIU wants the information about an existing network namespace
  1062. // like this: --inherit-fd fd[<fd>]:<key>
  1063. // The <key> needs to be the same as during checkpointing.
  1064. // We are always using 'extRootNetNS' as the key in this.
  1065. netns, err := os.Open(nsPath)
  1066. defer netns.Close()
  1067. if err != nil {
  1068. logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
  1069. return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
  1070. }
  1071. inheritFd := new(criurpc.InheritFd)
  1072. inheritFd.Key = proto.String("extRootNetNS")
  1073. // The offset of four is necessary because 0, 1, 2 and 3 is already
  1074. // used by stdin, stdout, stderr, 'criu swrk' socket.
  1075. inheritFd.Fd = proto.Int32(int32(4 + len(extraFiles)))
  1076. req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
  1077. // All open FDs need to be transferred to CRIU via extraFiles
  1078. extraFiles = append(extraFiles, netns)
  1079. }
  1080. }
  1081. for _, m := range c.config.Mounts {
  1082. switch m.Device {
  1083. case "bind":
  1084. c.addCriuRestoreMount(req, m)
  1085. case "cgroup":
  1086. binds, err := getCgroupMounts(m)
  1087. if err != nil {
  1088. return err
  1089. }
  1090. for _, b := range binds {
  1091. c.addCriuRestoreMount(req, b)
  1092. }
  1093. }
  1094. }
  1095. if len(c.config.MaskPaths) > 0 {
  1096. m := &configs.Mount{Destination: "/dev/null", Source: "/dev/null"}
  1097. c.addCriuRestoreMount(req, m)
  1098. }
  1099. for _, node := range c.config.Devices {
  1100. m := &configs.Mount{Destination: node.Path, Source: node.Path}
  1101. c.addCriuRestoreMount(req, m)
  1102. }
  1103. if criuOpts.EmptyNs&unix.CLONE_NEWNET == 0 {
  1104. c.restoreNetwork(req, criuOpts)
  1105. }
  1106. // append optional manage cgroups mode
  1107. if criuOpts.ManageCgroupsMode != 0 {
  1108. // criu 1.7 => 10700
  1109. if err := c.checkCriuVersion(10700); err != nil {
  1110. return err
  1111. }
  1112. mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
  1113. req.Opts.ManageCgroupsMode = &mode
  1114. }
  1115. var (
  1116. fds []string
  1117. fdJSON []byte
  1118. )
  1119. if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
  1120. return err
  1121. }
  1122. if err := json.Unmarshal(fdJSON, &fds); err != nil {
  1123. return err
  1124. }
  1125. for i := range fds {
  1126. if s := fds[i]; strings.Contains(s, "pipe:") {
  1127. inheritFd := new(criurpc.InheritFd)
  1128. inheritFd.Key = proto.String(s)
  1129. inheritFd.Fd = proto.Int32(int32(i))
  1130. req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
  1131. }
  1132. }
  1133. return c.criuSwrk(process, req, criuOpts, true, extraFiles)
  1134. }
  1135. func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
  1136. // XXX: Do we need to deal with this case? AFAIK criu still requires root.
  1137. if err := c.cgroupManager.Apply(pid); err != nil {
  1138. return err
  1139. }
  1140. if err := c.cgroupManager.Set(c.config); err != nil {
  1141. return newSystemError(err)
  1142. }
  1143. path := fmt.Sprintf("/proc/%d/cgroup", pid)
  1144. cgroupsPaths, err := cgroups.ParseCgroupFile(path)
  1145. if err != nil {
  1146. return err
  1147. }
  1148. for c, p := range cgroupsPaths {
  1149. cgroupRoot := &criurpc.CgroupRoot{
  1150. Ctrl: proto.String(c),
  1151. Path: proto.String(p),
  1152. }
  1153. req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot)
  1154. }
  1155. return nil
  1156. }
  1157. func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
  1158. fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
  1159. if err != nil {
  1160. return err
  1161. }
  1162. var logPath string
  1163. if opts != nil {
  1164. logPath = filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
  1165. } else {
  1166. // For the VERSION RPC 'opts' is set to 'nil' and therefore
  1167. // opts.WorkDirectory does not exist. Set logPath to "".
  1168. logPath = ""
  1169. }
  1170. criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
  1171. criuClientFileCon, err := net.FileConn(criuClient)
  1172. criuClient.Close()
  1173. if err != nil {
  1174. return err
  1175. }
  1176. criuClientCon := criuClientFileCon.(*net.UnixConn)
  1177. defer criuClientCon.Close()
  1178. criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
  1179. defer criuServer.Close()
  1180. args := []string{"swrk", "3"}
  1181. if c.criuVersion != 0 {
  1182. // If the CRIU Version is still '0' then this is probably
  1183. // the initial CRIU run to detect the version. Skip it.
  1184. logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath)
  1185. }
  1186. logrus.Debugf("Using CRIU with following args: %s", args)
  1187. cmd := exec.Command(c.criuPath, args...)
  1188. if process != nil {
  1189. cmd.Stdin = process.Stdin
  1190. cmd.Stdout = process.Stdout
  1191. cmd.Stderr = process.Stderr
  1192. }
  1193. cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
  1194. if extraFiles != nil {
  1195. cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...)
  1196. }
  1197. if err := cmd.Start(); err != nil {
  1198. return err
  1199. }
  1200. criuServer.Close()
  1201. defer func() {
  1202. criuClientCon.Close()
  1203. _, err := cmd.Process.Wait()
  1204. if err != nil {
  1205. return
  1206. }
  1207. }()
  1208. if applyCgroups {
  1209. err := c.criuApplyCgroups(cmd.Process.Pid, req)
  1210. if err != nil {
  1211. return err
  1212. }
  1213. }
  1214. var extFds []string
  1215. if process != nil {
  1216. extFds, err = getPipeFds(cmd.Process.Pid)
  1217. if err != nil {
  1218. return err
  1219. }
  1220. }
  1221. logrus.Debugf("Using CRIU in %s mode", req.GetType().String())
  1222. // In the case of criurpc.CriuReqType_FEATURE_CHECK req.GetOpts()
  1223. // should be empty. For older CRIU versions it still will be
  1224. // available but empty. criurpc.CriuReqType_VERSION actually
  1225. // has no req.GetOpts().
  1226. if !(req.GetType() == criurpc.CriuReqType_FEATURE_CHECK ||
  1227. req.GetType() == criurpc.CriuReqType_VERSION) {
  1228. val := reflect.ValueOf(req.GetOpts())
  1229. v := reflect.Indirect(val)
  1230. for i := 0; i < v.NumField(); i++ {
  1231. st := v.Type()
  1232. name := st.Field(i).Name
  1233. if strings.HasPrefix(name, "XXX_") {
  1234. continue
  1235. }
  1236. value := val.MethodByName("Get" + name).Call([]reflect.Value{})
  1237. logrus.Debugf("CRIU option %s with value %v", name, value[0])
  1238. }
  1239. }
  1240. data, err := proto.Marshal(req)
  1241. if err != nil {
  1242. return err
  1243. }
  1244. _, err = criuClientCon.Write(data)
  1245. if err != nil {
  1246. return err
  1247. }
  1248. buf := make([]byte, 10*4096)
  1249. oob := make([]byte, 4096)
  1250. for true {
  1251. n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
  1252. if err != nil {
  1253. return err
  1254. }
  1255. if n == 0 {
  1256. return fmt.Errorf("unexpected EOF")
  1257. }
  1258. if n == len(buf) {
  1259. return fmt.Errorf("buffer is too small")
  1260. }
  1261. resp := new(criurpc.CriuResp)
  1262. err = proto.Unmarshal(buf[:n], resp)
  1263. if err != nil {
  1264. return err
  1265. }
  1266. if !resp.GetSuccess() {
  1267. typeString := req.GetType().String()
  1268. if typeString == "VERSION" {
  1269. // If the VERSION RPC fails this probably means that the CRIU
  1270. // version is too old for this RPC. Just return 'nil'.
  1271. return nil
  1272. }
  1273. return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
  1274. }
  1275. t := resp.GetType()
  1276. switch {
  1277. case t == criurpc.CriuReqType_VERSION:
  1278. logrus.Debugf("CRIU version: %s", resp)
  1279. criuVersionRPC = resp.GetVersion()
  1280. break
  1281. case t == criurpc.CriuReqType_FEATURE_CHECK:
  1282. logrus.Debugf("Feature check says: %s", resp)
  1283. criuFeatures = resp.GetFeatures()
  1284. case t == criurpc.CriuReqType_NOTIFY:
  1285. if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
  1286. return err
  1287. }
  1288. t = criurpc.CriuReqType_NOTIFY
  1289. req = &criurpc.CriuReq{
  1290. Type: &t,
  1291. NotifySuccess: proto.Bool(true),
  1292. }
  1293. data, err = proto.Marshal(req)
  1294. if err != nil {
  1295. return err
  1296. }
  1297. _, err = criuClientCon.Write(data)
  1298. if err != nil {
  1299. return err
  1300. }
  1301. continue
  1302. case t == criurpc.CriuReqType_RESTORE:
  1303. case t == criurpc.CriuReqType_DUMP:
  1304. case t == criurpc.CriuReqType_PRE_DUMP:
  1305. default:
  1306. return fmt.Errorf("unable to parse the response %s", resp.String())
  1307. }
  1308. break
  1309. }
  1310. criuClientCon.CloseWrite()
  1311. // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
  1312. // Here we want to wait only the CRIU process.
  1313. st, err := cmd.Process.Wait()
  1314. if err != nil {
  1315. return err
  1316. }
  1317. // In pre-dump mode CRIU is in a loop and waits for
  1318. // the final DUMP command.
  1319. // The current runc pre-dump approach, however, is
  1320. // start criu in PRE_DUMP once for a single pre-dump
  1321. // and not the whole series of pre-dump, pre-dump, ...m, dump
  1322. // If we got the message CriuReqType_PRE_DUMP it means
  1323. // CRIU was successful and we need to forcefully stop CRIU
  1324. if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
  1325. return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
  1326. }
  1327. return nil
  1328. }
  1329. // block any external network activity
  1330. func lockNetwork(config *configs.Config) error {
  1331. for _, config := range config.Networks {
  1332. strategy, err := getStrategy(config.Type)
  1333. if err != nil {
  1334. return err
  1335. }
  1336. if err := strategy.detach(config); err != nil {
  1337. return err
  1338. }
  1339. }
  1340. return nil
  1341. }
  1342. func unlockNetwork(config *configs.Config) error {
  1343. for _, config := range config.Networks {
  1344. strategy, err := getStrategy(config.Type)
  1345. if err != nil {
  1346. return err
  1347. }
  1348. if err = strategy.attach(config); err != nil {
  1349. return err
  1350. }
  1351. }
  1352. return nil
  1353. }
  1354. func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
  1355. notify := resp.GetNotify()
  1356. if notify == nil {
  1357. return fmt.Errorf("invalid response: %s", resp.String())
  1358. }
  1359. logrus.Debugf("notify: %s\n", notify.GetScript())
  1360. switch {
  1361. case notify.GetScript() == "post-dump":
  1362. f, err := os.Create(filepath.Join(c.root, "checkpoint"))
  1363. if err != nil {
  1364. return err
  1365. }
  1366. f.Close()
  1367. case notify.GetScript() == "network-unlock":
  1368. if err := unlockNetwork(c.config); err != nil {
  1369. return err
  1370. }
  1371. case notify.GetScript() == "network-lock":
  1372. if err := lockNetwork(c.config); err != nil {
  1373. return err
  1374. }
  1375. case notify.GetScript() == "setup-namespaces":
  1376. if c.config.Hooks != nil {
  1377. bundle, annotations := utils.Annotations(c.config.Labels)
  1378. s := configs.HookState{
  1379. Version: c.config.Version,
  1380. ID: c.id,
  1381. Pid: int(notify.GetPid()),
  1382. Bundle: bundle,
  1383. Annotations: annotations,
  1384. }
  1385. for i, hook := range c.config.Hooks.Prestart {
  1386. if err := hook.Run(s); err != nil {
  1387. return newSystemErrorWithCausef(err, "running prestart hook %d", i)
  1388. }
  1389. }
  1390. }
  1391. case notify.GetScript() == "post-restore":
  1392. pid := notify.GetPid()
  1393. r, err := newRestoredProcess(int(pid), fds)
  1394. if err != nil {
  1395. return err
  1396. }
  1397. process.ops = r
  1398. if err := c.state.transition(&restoredState{
  1399. imageDir: opts.ImagesDirectory,
  1400. c: c,
  1401. }); err != nil {
  1402. return err
  1403. }
  1404. // create a timestamp indicating when the restored checkpoint was started
  1405. c.created = time.Now().UTC()
  1406. if _, err := c.updateState(r); err != nil {
  1407. return err
  1408. }
  1409. if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil {
  1410. if !os.IsNotExist(err) {
  1411. logrus.Error(err)
  1412. }
  1413. }
  1414. case notify.GetScript() == "orphan-pts-master":
  1415. scm, err := unix.ParseSocketControlMessage(oob)
  1416. if err != nil {
  1417. return err
  1418. }
  1419. fds, err := unix.ParseUnixRights(&scm[0])
  1420. if err != nil {
  1421. return err
  1422. }
  1423. master := os.NewFile(uintptr(fds[0]), "orphan-pts-master")
  1424. defer master.Close()
  1425. // While we can access console.master, using the API is a good idea.
  1426. if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
  1427. return err
  1428. }
  1429. }
  1430. return nil
  1431. }
  1432. func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
  1433. if process != nil {
  1434. c.initProcess = process
  1435. }
  1436. state, err := c.currentState()
  1437. if err != nil {
  1438. return nil, err
  1439. }
  1440. err = c.saveState(state)
  1441. if err != nil {
  1442. return nil, err
  1443. }
  1444. return state, nil
  1445. }
  1446. func (c *linuxContainer) saveState(s *State) error {
  1447. f, err := os.Create(filepath.Join(c.root, stateFilename))
  1448. if err != nil {
  1449. return err
  1450. }
  1451. defer f.Close()
  1452. return utils.WriteJSON(f, s)
  1453. }
  1454. func (c *linuxContainer) deleteState() error {
  1455. return os.Remove(filepath.Join(c.root, stateFilename))
  1456. }
  1457. func (c *linuxContainer) currentStatus() (Status, error) {
  1458. if err := c.refreshState(); err != nil {
  1459. return -1, err
  1460. }
  1461. return c.state.status(), nil
  1462. }
  1463. // refreshState needs to be called to verify that the current state on the
  1464. // container is what is true. Because consumers of libcontainer can use it
  1465. // out of process we need to verify the container's status based on runtime
  1466. // information and not rely on our in process info.
  1467. func (c *linuxContainer) refreshState() error {
  1468. paused, err := c.isPaused()
  1469. if err != nil {
  1470. return err
  1471. }
  1472. if paused {
  1473. return c.state.transition(&pausedState{c: c})
  1474. }
  1475. t, err := c.runType()
  1476. if err != nil {
  1477. return err
  1478. }
  1479. switch t {
  1480. case Created:
  1481. return c.state.transition(&createdState{c: c})
  1482. case Running:
  1483. return c.state.transition(&runningState{c: c})
  1484. }
  1485. return c.state.transition(&stoppedState{c: c})
  1486. }
  1487. func (c *linuxContainer) runType() (Status, error) {
  1488. if c.initProcess == nil {
  1489. return Stopped, nil
  1490. }
  1491. pid := c.initProcess.pid()
  1492. stat, err := system.Stat(pid)
  1493. if err != nil {
  1494. return Stopped, nil
  1495. }
  1496. if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
  1497. return Stopped, nil
  1498. }
  1499. // We'll create exec fifo and blocking on it after container is created,
  1500. // and delete it after start container.
  1501. if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil {
  1502. return Created, nil
  1503. }
  1504. return Running, nil
  1505. }
  1506. func (c *linuxContainer) isPaused() (bool, error) {
  1507. fcg := c.cgroupManager.GetPaths()["freezer"]
  1508. if fcg == "" {
  1509. // A container doesn't have a freezer cgroup
  1510. return false, nil
  1511. }
  1512. data, err := ioutil.ReadFile(filepath.Join(fcg, "freezer.state"))
  1513. if err != nil {
  1514. // If freezer cgroup is not mounted, the container would just be not paused.
  1515. if os.IsNotExist(err) {
  1516. return false, nil
  1517. }
  1518. return false, newSystemErrorWithCause(err, "checking if container is paused")
  1519. }
  1520. return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
  1521. }
  1522. func (c *linuxContainer) currentState() (*State, error) {
  1523. var (
  1524. startTime uint64
  1525. externalDescriptors []string
  1526. pid = -1
  1527. )
  1528. if c.initProcess != nil {
  1529. pid = c.initProcess.pid()
  1530. startTime, _ = c.initProcess.startTime()
  1531. externalDescriptors = c.initProcess.externalDescriptors()
  1532. }
  1533. intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
  1534. if err != nil {
  1535. intelRdtPath = ""
  1536. }
  1537. state := &State{
  1538. BaseState: BaseState{
  1539. ID: c.ID(),
  1540. Config: *c.config,
  1541. InitProcessPid: pid,
  1542. InitProcessStartTime: startTime,
  1543. Created: c.created,
  1544. },
  1545. Rootless: c.config.RootlessEUID && c.config.RootlessCgroups,
  1546. CgroupPaths: c.cgroupManager.GetPaths(),
  1547. IntelRdtPath: intelRdtPath,
  1548. NamespacePaths: make(map[configs.NamespaceType]string),
  1549. ExternalDescriptors: externalDescriptors,
  1550. }
  1551. if pid > 0 {
  1552. for _, ns := range c.config.Namespaces {
  1553. state.NamespacePaths[ns.Type] = ns.GetPath(pid)
  1554. }
  1555. for _, nsType := range configs.NamespaceTypes() {
  1556. if !configs.IsNamespaceSupported(nsType) {
  1557. continue
  1558. }
  1559. if _, ok := state.NamespacePaths[nsType]; !ok {
  1560. ns := configs.Namespace{Type: nsType}
  1561. state.NamespacePaths[ns.Type] = ns.GetPath(pid)
  1562. }
  1563. }
  1564. }
  1565. return state, nil
  1566. }
  1567. // orderNamespacePaths sorts namespace paths into a list of paths that we
  1568. // can setns in order.
  1569. func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
  1570. paths := []string{}
  1571. for _, ns := range configs.NamespaceTypes() {
  1572. // Remove namespaces that we don't need to join.
  1573. if !c.config.Namespaces.Contains(ns) {
  1574. continue
  1575. }
  1576. if p, ok := namespaces[ns]; ok && p != "" {
  1577. // check if the requested namespace is supported
  1578. if !configs.IsNamespaceSupported(ns) {
  1579. return nil, newSystemError(fmt.Errorf("namespace %s is not supported", ns))
  1580. }
  1581. // only set to join this namespace if it exists
  1582. if _, err := os.Lstat(p); err != nil {
  1583. return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
  1584. }
  1585. // do not allow namespace path with comma as we use it to separate
  1586. // the namespace paths
  1587. if strings.ContainsRune(p, ',') {
  1588. return nil, newSystemError(fmt.Errorf("invalid path %s", p))
  1589. }
  1590. paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p))
  1591. }
  1592. }
  1593. return paths, nil
  1594. }
  1595. func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
  1596. data := bytes.NewBuffer(nil)
  1597. for _, im := range idMap {
  1598. line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
  1599. if _, err := data.WriteString(line); err != nil {
  1600. return nil, err
  1601. }
  1602. }
  1603. return data.Bytes(), nil
  1604. }
  1605. // bootstrapData encodes the necessary data in netlink binary format
  1606. // as a io.Reader.
  1607. // Consumer can write the data to a bootstrap program
  1608. // such as one that uses nsenter package to bootstrap the container's
  1609. // init process correctly, i.e. with correct namespaces, uid/gid
  1610. // mapping etc.
  1611. func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
  1612. // create the netlink message
  1613. r := nl.NewNetlinkRequest(int(InitMsg), 0)
  1614. // write cloneFlags
  1615. r.AddData(&Int32msg{
  1616. Type: CloneFlagsAttr,
  1617. Value: uint32(cloneFlags),
  1618. })
  1619. // write custom namespace paths
  1620. if len(nsMaps) > 0 {
  1621. nsPaths, err := c.orderNamespacePaths(nsMaps)
  1622. if err != nil {
  1623. return nil, err
  1624. }
  1625. r.AddData(&Bytemsg{
  1626. Type: NsPathsAttr,
  1627. Value: []byte(strings.Join(nsPaths, ",")),
  1628. })
  1629. }
  1630. // write namespace paths only when we are not joining an existing user ns
  1631. _, joinExistingUser := nsMaps[configs.NEWUSER]
  1632. if !joinExistingUser {
  1633. // write uid mappings
  1634. if len(c.config.UidMappings) > 0 {
  1635. if c.config.RootlessEUID && c.newuidmapPath != "" {
  1636. r.AddData(&Bytemsg{
  1637. Type: UidmapPathAttr,
  1638. Value: []byte(c.newuidmapPath),
  1639. })
  1640. }
  1641. b, err := encodeIDMapping(c.config.UidMappings)
  1642. if err != nil {
  1643. return nil, err
  1644. }
  1645. r.AddData(&Bytemsg{
  1646. Type: UidmapAttr,
  1647. Value: b,
  1648. })
  1649. }
  1650. // write gid mappings
  1651. if len(c.config.GidMappings) > 0 {
  1652. b, err := encodeIDMapping(c.config.GidMappings)
  1653. if err != nil {
  1654. return nil, err
  1655. }
  1656. r.AddData(&Bytemsg{
  1657. Type: GidmapAttr,
  1658. Value: b,
  1659. })
  1660. if c.config.RootlessEUID && c.newgidmapPath != "" {
  1661. r.AddData(&Bytemsg{
  1662. Type: GidmapPathAttr,
  1663. Value: []byte(c.newgidmapPath),
  1664. })
  1665. }
  1666. if requiresRootOrMappingTool(c.config) {
  1667. r.AddData(&Boolmsg{
  1668. Type: SetgroupAttr,
  1669. Value: true,
  1670. })
  1671. }
  1672. }
  1673. }
  1674. if c.config.OomScoreAdj != nil {
  1675. // write oom_score_adj
  1676. r.AddData(&Bytemsg{
  1677. Type: OomScoreAdjAttr,
  1678. Value: []byte(fmt.Sprintf("%d", *c.config.OomScoreAdj)),
  1679. })
  1680. }
  1681. // write rootless
  1682. r.AddData(&Boolmsg{
  1683. Type: RootlessEUIDAttr,
  1684. Value: c.config.RootlessEUID,
  1685. })
  1686. return bytes.NewReader(r.Serialize()), nil
  1687. }
  1688. // ignoreTerminateErrors returns nil if the given err matches an error known
  1689. // to indicate that the terminate occurred successfully or err was nil, otherwise
  1690. // err is returned unaltered.
  1691. func ignoreTerminateErrors(err error) error {
  1692. if err == nil {
  1693. return nil
  1694. }
  1695. s := err.Error()
  1696. switch {
  1697. case strings.Contains(s, "process already finished"), strings.Contains(s, "Wait was already called"):
  1698. return nil
  1699. }
  1700. return err
  1701. }
  1702. func requiresRootOrMappingTool(c *configs.Config) bool {
  1703. gidMap := []configs.IDMap{
  1704. {ContainerID: 0, HostID: os.Getegid(), Size: 1},
  1705. }
  1706. return !reflect.DeepEqual(c.GidMappings, gidMap)
  1707. }