factory_linux.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. // +build linux
  2. package libcontainer
  3. import (
  4. "encoding/json"
  5. "fmt"
  6. "os"
  7. "path/filepath"
  8. "regexp"
  9. "runtime/debug"
  10. "strconv"
  11. "github.com/cyphar/filepath-securejoin"
  12. "github.com/opencontainers/runc/libcontainer/cgroups"
  13. "github.com/opencontainers/runc/libcontainer/cgroups/fs"
  14. "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
  15. "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
  16. "github.com/opencontainers/runc/libcontainer/configs"
  17. "github.com/opencontainers/runc/libcontainer/configs/validate"
  18. "github.com/opencontainers/runc/libcontainer/intelrdt"
  19. "github.com/opencontainers/runc/libcontainer/mount"
  20. "github.com/opencontainers/runc/libcontainer/utils"
  21. "github.com/pkg/errors"
  22. "golang.org/x/sys/unix"
  23. )
  24. const (
  25. stateFilename = "state.json"
  26. execFifoFilename = "exec.fifo"
  27. )
  28. var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
  29. // InitArgs returns an options func to configure a LinuxFactory with the
  30. // provided init binary path and arguments.
  31. func InitArgs(args ...string) func(*LinuxFactory) error {
  32. return func(l *LinuxFactory) (err error) {
  33. if len(args) > 0 {
  34. // Resolve relative paths to ensure that its available
  35. // after directory changes.
  36. if args[0], err = filepath.Abs(args[0]); err != nil {
  37. return newGenericError(err, ConfigInvalid)
  38. }
  39. }
  40. l.InitArgs = args
  41. return nil
  42. }
  43. }
  44. // SystemdCgroups is an options func to configure a LinuxFactory to return
  45. // containers that use systemd to create and manage cgroups.
  46. func SystemdCgroups(l *LinuxFactory) error {
  47. systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager()
  48. if err != nil {
  49. return err
  50. }
  51. l.NewCgroupsManager = systemdCgroupsManager
  52. return nil
  53. }
  54. func getUnifiedPath(paths map[string]string) string {
  55. unifiedPath := ""
  56. for k, v := range paths {
  57. if unifiedPath == "" {
  58. unifiedPath = v
  59. } else if v != unifiedPath {
  60. panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v))
  61. }
  62. }
  63. // can be empty
  64. return unifiedPath
  65. }
  66. func cgroupfs2(l *LinuxFactory, rootless bool) error {
  67. l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
  68. m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless)
  69. if err != nil {
  70. panic(err)
  71. }
  72. return m
  73. }
  74. return nil
  75. }
  76. // Cgroupfs is an options func to configure a LinuxFactory to return containers
  77. // that use the native cgroups filesystem implementation to create and manage
  78. // cgroups.
  79. func Cgroupfs(l *LinuxFactory) error {
  80. if cgroups.IsCgroup2UnifiedMode() {
  81. return cgroupfs2(l, false)
  82. }
  83. l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
  84. return &fs.Manager{
  85. Cgroups: config,
  86. Paths: paths,
  87. }
  88. }
  89. return nil
  90. }
  91. // RootlessCgroupfs is an options func to configure a LinuxFactory to return
  92. // containers that use the native cgroups filesystem implementation to create
  93. // and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
  94. // that RootlessCgroupfs can transparently handle permission errors that occur
  95. // during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
  96. // they've been set up properly).
  97. func RootlessCgroupfs(l *LinuxFactory) error {
  98. if cgroups.IsCgroup2UnifiedMode() {
  99. return cgroupfs2(l, true)
  100. }
  101. l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
  102. return &fs.Manager{
  103. Cgroups: config,
  104. Rootless: true,
  105. Paths: paths,
  106. }
  107. }
  108. return nil
  109. }
  110. // IntelRdtfs is an options func to configure a LinuxFactory to return
  111. // containers that use the Intel RDT "resource control" filesystem to
  112. // create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
  113. func IntelRdtFs(l *LinuxFactory) error {
  114. l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
  115. return &intelrdt.IntelRdtManager{
  116. Config: config,
  117. Id: id,
  118. Path: path,
  119. }
  120. }
  121. return nil
  122. }
  123. // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
  124. func TmpfsRoot(l *LinuxFactory) error {
  125. mounted, err := mount.Mounted(l.Root)
  126. if err != nil {
  127. return err
  128. }
  129. if !mounted {
  130. if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
  131. return err
  132. }
  133. }
  134. return nil
  135. }
  136. // CriuPath returns an option func to configure a LinuxFactory with the
  137. // provided criupath
  138. func CriuPath(criupath string) func(*LinuxFactory) error {
  139. return func(l *LinuxFactory) error {
  140. l.CriuPath = criupath
  141. return nil
  142. }
  143. }
  144. // New returns a linux based container factory based in the root directory and
  145. // configures the factory with the provided option funcs.
  146. func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
  147. if root != "" {
  148. if err := os.MkdirAll(root, 0700); err != nil {
  149. return nil, newGenericError(err, SystemError)
  150. }
  151. }
  152. l := &LinuxFactory{
  153. Root: root,
  154. InitPath: "/proc/self/exe",
  155. InitArgs: []string{os.Args[0], "init"},
  156. Validator: validate.New(),
  157. CriuPath: "criu",
  158. }
  159. Cgroupfs(l)
  160. for _, opt := range options {
  161. if opt == nil {
  162. continue
  163. }
  164. if err := opt(l); err != nil {
  165. return nil, err
  166. }
  167. }
  168. return l, nil
  169. }
  170. // LinuxFactory implements the default factory interface for linux based systems.
  171. type LinuxFactory struct {
  172. // Root directory for the factory to store state.
  173. Root string
  174. // InitPath is the path for calling the init responsibilities for spawning
  175. // a container.
  176. InitPath string
  177. // InitArgs are arguments for calling the init responsibilities for spawning
  178. // a container.
  179. InitArgs []string
  180. // CriuPath is the path to the criu binary used for checkpoint and restore of
  181. // containers.
  182. CriuPath string
  183. // New{u,g}uidmapPath is the path to the binaries used for mapping with
  184. // rootless containers.
  185. NewuidmapPath string
  186. NewgidmapPath string
  187. // Validator provides validation to container configurations.
  188. Validator validate.Validator
  189. // NewCgroupsManager returns an initialized cgroups manager for a single container.
  190. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
  191. // NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
  192. NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
  193. }
  194. func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
  195. if l.Root == "" {
  196. return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
  197. }
  198. if err := l.validateID(id); err != nil {
  199. return nil, err
  200. }
  201. if err := l.Validator.Validate(config); err != nil {
  202. return nil, newGenericError(err, ConfigInvalid)
  203. }
  204. containerRoot, err := securejoin.SecureJoin(l.Root, id)
  205. if err != nil {
  206. return nil, err
  207. }
  208. if _, err := os.Stat(containerRoot); err == nil {
  209. return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
  210. } else if !os.IsNotExist(err) {
  211. return nil, newGenericError(err, SystemError)
  212. }
  213. if err := os.MkdirAll(containerRoot, 0711); err != nil {
  214. return nil, newGenericError(err, SystemError)
  215. }
  216. if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
  217. return nil, newGenericError(err, SystemError)
  218. }
  219. c := &linuxContainer{
  220. id: id,
  221. root: containerRoot,
  222. config: config,
  223. initPath: l.InitPath,
  224. initArgs: l.InitArgs,
  225. criuPath: l.CriuPath,
  226. newuidmapPath: l.NewuidmapPath,
  227. newgidmapPath: l.NewgidmapPath,
  228. cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
  229. }
  230. if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
  231. c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
  232. }
  233. c.state = &stoppedState{c: c}
  234. return c, nil
  235. }
  236. func (l *LinuxFactory) Load(id string) (Container, error) {
  237. if l.Root == "" {
  238. return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
  239. }
  240. //when load, we need to check id is valid or not.
  241. if err := l.validateID(id); err != nil {
  242. return nil, err
  243. }
  244. containerRoot, err := securejoin.SecureJoin(l.Root, id)
  245. if err != nil {
  246. return nil, err
  247. }
  248. state, err := l.loadState(containerRoot, id)
  249. if err != nil {
  250. return nil, err
  251. }
  252. r := &nonChildProcess{
  253. processPid: state.InitProcessPid,
  254. processStartTime: state.InitProcessStartTime,
  255. fds: state.ExternalDescriptors,
  256. }
  257. c := &linuxContainer{
  258. initProcess: r,
  259. initProcessStartTime: state.InitProcessStartTime,
  260. id: id,
  261. config: &state.Config,
  262. initPath: l.InitPath,
  263. initArgs: l.InitArgs,
  264. criuPath: l.CriuPath,
  265. newuidmapPath: l.NewuidmapPath,
  266. newgidmapPath: l.NewgidmapPath,
  267. cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
  268. root: containerRoot,
  269. created: state.Created,
  270. }
  271. c.state = &loadedState{c: c}
  272. if err := c.refreshState(); err != nil {
  273. return nil, err
  274. }
  275. if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
  276. c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
  277. }
  278. return c, nil
  279. }
  280. func (l *LinuxFactory) Type() string {
  281. return "libcontainer"
  282. }
  283. // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
  284. // This is a low level implementation detail of the reexec and should not be consumed externally
  285. func (l *LinuxFactory) StartInitialization() (err error) {
  286. var (
  287. pipefd, fifofd int
  288. consoleSocket *os.File
  289. envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
  290. envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
  291. envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
  292. )
  293. // Get the INITPIPE.
  294. pipefd, err = strconv.Atoi(envInitPipe)
  295. if err != nil {
  296. return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
  297. }
  298. var (
  299. pipe = os.NewFile(uintptr(pipefd), "pipe")
  300. it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
  301. )
  302. defer pipe.Close()
  303. // Only init processes have FIFOFD.
  304. fifofd = -1
  305. if it == initStandard {
  306. if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
  307. return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
  308. }
  309. }
  310. if envConsole != "" {
  311. console, err := strconv.Atoi(envConsole)
  312. if err != nil {
  313. return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
  314. }
  315. consoleSocket = os.NewFile(uintptr(console), "console-socket")
  316. defer consoleSocket.Close()
  317. }
  318. // clear the current process's environment to clean any libcontainer
  319. // specific env vars.
  320. os.Clearenv()
  321. defer func() {
  322. // We have an error during the initialization of the container's init,
  323. // send it back to the parent process in the form of an initError.
  324. if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
  325. fmt.Fprintln(os.Stderr, err)
  326. return
  327. }
  328. if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
  329. fmt.Fprintln(os.Stderr, err)
  330. return
  331. }
  332. }()
  333. defer func() {
  334. if e := recover(); e != nil {
  335. err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
  336. }
  337. }()
  338. i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
  339. if err != nil {
  340. return err
  341. }
  342. // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
  343. return i.Init()
  344. }
  345. func (l *LinuxFactory) loadState(root, id string) (*State, error) {
  346. stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
  347. if err != nil {
  348. return nil, err
  349. }
  350. f, err := os.Open(stateFilePath)
  351. if err != nil {
  352. if os.IsNotExist(err) {
  353. return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
  354. }
  355. return nil, newGenericError(err, SystemError)
  356. }
  357. defer f.Close()
  358. var state *State
  359. if err := json.NewDecoder(f).Decode(&state); err != nil {
  360. return nil, newGenericError(err, SystemError)
  361. }
  362. return state, nil
  363. }
  364. func (l *LinuxFactory) validateID(id string) error {
  365. if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
  366. return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
  367. }
  368. return nil
  369. }
  370. // NewuidmapPath returns an option func to configure a LinuxFactory with the
  371. // provided ..
  372. func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
  373. return func(l *LinuxFactory) error {
  374. l.NewuidmapPath = newuidmapPath
  375. return nil
  376. }
  377. }
  378. // NewgidmapPath returns an option func to configure a LinuxFactory with the
  379. // provided ..
  380. func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
  381. return func(l *LinuxFactory) error {
  382. l.NewgidmapPath = newgidmapPath
  383. return nil
  384. }
  385. }