factory_linux.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. // +build linux
  2. package libcontainer
  3. import (
  4. "encoding/json"
  5. "fmt"
  6. "os"
  7. "path/filepath"
  8. "regexp"
  9. "runtime/debug"
  10. "strconv"
  11. "github.com/cyphar/filepath-securejoin"
  12. "github.com/opencontainers/runc/libcontainer/cgroups"
  13. "github.com/opencontainers/runc/libcontainer/cgroups/fs"
  14. "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
  15. "github.com/opencontainers/runc/libcontainer/configs"
  16. "github.com/opencontainers/runc/libcontainer/configs/validate"
  17. "github.com/opencontainers/runc/libcontainer/intelrdt"
  18. "github.com/opencontainers/runc/libcontainer/mount"
  19. "github.com/opencontainers/runc/libcontainer/utils"
  20. "golang.org/x/sys/unix"
  21. )
  22. const (
  23. stateFilename = "state.json"
  24. execFifoFilename = "exec.fifo"
  25. )
  26. var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
  27. // InitArgs returns an options func to configure a LinuxFactory with the
  28. // provided init binary path and arguments.
  29. func InitArgs(args ...string) func(*LinuxFactory) error {
  30. return func(l *LinuxFactory) (err error) {
  31. if len(args) > 0 {
  32. // Resolve relative paths to ensure that its available
  33. // after directory changes.
  34. if args[0], err = filepath.Abs(args[0]); err != nil {
  35. return newGenericError(err, ConfigInvalid)
  36. }
  37. }
  38. l.InitArgs = args
  39. return nil
  40. }
  41. }
  42. // SystemdCgroups is an options func to configure a LinuxFactory to return
  43. // containers that use systemd to create and manage cgroups.
  44. func SystemdCgroups(l *LinuxFactory) error {
  45. l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
  46. return &systemd.Manager{
  47. Cgroups: config,
  48. Paths: paths,
  49. }
  50. }
  51. return nil
  52. }
  53. // Cgroupfs is an options func to configure a LinuxFactory to return containers
  54. // that use the native cgroups filesystem implementation to create and manage
  55. // cgroups.
  56. func Cgroupfs(l *LinuxFactory) error {
  57. l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
  58. return &fs.Manager{
  59. Cgroups: config,
  60. Paths: paths,
  61. }
  62. }
  63. return nil
  64. }
  65. // RootlessCgroupfs is an options func to configure a LinuxFactory to return
  66. // containers that use the native cgroups filesystem implementation to create
  67. // and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
  68. // that RootlessCgroupfs can transparently handle permission errors that occur
  69. // during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
  70. // they've been set up properly).
  71. func RootlessCgroupfs(l *LinuxFactory) error {
  72. l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
  73. return &fs.Manager{
  74. Cgroups: config,
  75. Rootless: true,
  76. Paths: paths,
  77. }
  78. }
  79. return nil
  80. }
  81. // IntelRdtfs is an options func to configure a LinuxFactory to return
  82. // containers that use the Intel RDT "resource control" filesystem to
  83. // create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
  84. func IntelRdtFs(l *LinuxFactory) error {
  85. l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
  86. return &intelrdt.IntelRdtManager{
  87. Config: config,
  88. Id: id,
  89. Path: path,
  90. }
  91. }
  92. return nil
  93. }
  94. // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
  95. func TmpfsRoot(l *LinuxFactory) error {
  96. mounted, err := mount.Mounted(l.Root)
  97. if err != nil {
  98. return err
  99. }
  100. if !mounted {
  101. if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
  102. return err
  103. }
  104. }
  105. return nil
  106. }
  107. // CriuPath returns an option func to configure a LinuxFactory with the
  108. // provided criupath
  109. func CriuPath(criupath string) func(*LinuxFactory) error {
  110. return func(l *LinuxFactory) error {
  111. l.CriuPath = criupath
  112. return nil
  113. }
  114. }
  115. // New returns a linux based container factory based in the root directory and
  116. // configures the factory with the provided option funcs.
  117. func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
  118. if root != "" {
  119. if err := os.MkdirAll(root, 0700); err != nil {
  120. return nil, newGenericError(err, SystemError)
  121. }
  122. }
  123. l := &LinuxFactory{
  124. Root: root,
  125. InitPath: "/proc/self/exe",
  126. InitArgs: []string{os.Args[0], "init"},
  127. Validator: validate.New(),
  128. CriuPath: "criu",
  129. }
  130. Cgroupfs(l)
  131. for _, opt := range options {
  132. if opt == nil {
  133. continue
  134. }
  135. if err := opt(l); err != nil {
  136. return nil, err
  137. }
  138. }
  139. return l, nil
  140. }
  141. // LinuxFactory implements the default factory interface for linux based systems.
  142. type LinuxFactory struct {
  143. // Root directory for the factory to store state.
  144. Root string
  145. // InitPath is the path for calling the init responsibilities for spawning
  146. // a container.
  147. InitPath string
  148. // InitArgs are arguments for calling the init responsibilities for spawning
  149. // a container.
  150. InitArgs []string
  151. // CriuPath is the path to the criu binary used for checkpoint and restore of
  152. // containers.
  153. CriuPath string
  154. // New{u,g}uidmapPath is the path to the binaries used for mapping with
  155. // rootless containers.
  156. NewuidmapPath string
  157. NewgidmapPath string
  158. // Validator provides validation to container configurations.
  159. Validator validate.Validator
  160. // NewCgroupsManager returns an initialized cgroups manager for a single container.
  161. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
  162. // NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
  163. NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
  164. }
  165. func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
  166. if l.Root == "" {
  167. return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
  168. }
  169. if err := l.validateID(id); err != nil {
  170. return nil, err
  171. }
  172. if err := l.Validator.Validate(config); err != nil {
  173. return nil, newGenericError(err, ConfigInvalid)
  174. }
  175. containerRoot, err := securejoin.SecureJoin(l.Root, id)
  176. if err != nil {
  177. return nil, err
  178. }
  179. if _, err := os.Stat(containerRoot); err == nil {
  180. return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
  181. } else if !os.IsNotExist(err) {
  182. return nil, newGenericError(err, SystemError)
  183. }
  184. if err := os.MkdirAll(containerRoot, 0711); err != nil {
  185. return nil, newGenericError(err, SystemError)
  186. }
  187. if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
  188. return nil, newGenericError(err, SystemError)
  189. }
  190. c := &linuxContainer{
  191. id: id,
  192. root: containerRoot,
  193. config: config,
  194. initPath: l.InitPath,
  195. initArgs: l.InitArgs,
  196. criuPath: l.CriuPath,
  197. newuidmapPath: l.NewuidmapPath,
  198. newgidmapPath: l.NewgidmapPath,
  199. cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
  200. }
  201. if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
  202. c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
  203. }
  204. c.state = &stoppedState{c: c}
  205. return c, nil
  206. }
  207. func (l *LinuxFactory) Load(id string) (Container, error) {
  208. if l.Root == "" {
  209. return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
  210. }
  211. //when load, we need to check id is valid or not.
  212. if err := l.validateID(id); err != nil {
  213. return nil, err
  214. }
  215. containerRoot, err := securejoin.SecureJoin(l.Root, id)
  216. if err != nil {
  217. return nil, err
  218. }
  219. state, err := l.loadState(containerRoot, id)
  220. if err != nil {
  221. return nil, err
  222. }
  223. r := &nonChildProcess{
  224. processPid: state.InitProcessPid,
  225. processStartTime: state.InitProcessStartTime,
  226. fds: state.ExternalDescriptors,
  227. }
  228. c := &linuxContainer{
  229. initProcess: r,
  230. initProcessStartTime: state.InitProcessStartTime,
  231. id: id,
  232. config: &state.Config,
  233. initPath: l.InitPath,
  234. initArgs: l.InitArgs,
  235. criuPath: l.CriuPath,
  236. newuidmapPath: l.NewuidmapPath,
  237. newgidmapPath: l.NewgidmapPath,
  238. cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
  239. root: containerRoot,
  240. created: state.Created,
  241. }
  242. c.state = &loadedState{c: c}
  243. if err := c.refreshState(); err != nil {
  244. return nil, err
  245. }
  246. if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
  247. c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
  248. }
  249. return c, nil
  250. }
  251. func (l *LinuxFactory) Type() string {
  252. return "libcontainer"
  253. }
  254. // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
  255. // This is a low level implementation detail of the reexec and should not be consumed externally
  256. func (l *LinuxFactory) StartInitialization() (err error) {
  257. var (
  258. pipefd, fifofd int
  259. consoleSocket *os.File
  260. envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
  261. envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
  262. envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
  263. )
  264. // Get the INITPIPE.
  265. pipefd, err = strconv.Atoi(envInitPipe)
  266. if err != nil {
  267. return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
  268. }
  269. var (
  270. pipe = os.NewFile(uintptr(pipefd), "pipe")
  271. it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
  272. )
  273. defer pipe.Close()
  274. // Only init processes have FIFOFD.
  275. fifofd = -1
  276. if it == initStandard {
  277. if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
  278. return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
  279. }
  280. }
  281. if envConsole != "" {
  282. console, err := strconv.Atoi(envConsole)
  283. if err != nil {
  284. return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
  285. }
  286. consoleSocket = os.NewFile(uintptr(console), "console-socket")
  287. defer consoleSocket.Close()
  288. }
  289. // clear the current process's environment to clean any libcontainer
  290. // specific env vars.
  291. os.Clearenv()
  292. defer func() {
  293. // We have an error during the initialization of the container's init,
  294. // send it back to the parent process in the form of an initError.
  295. if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
  296. fmt.Fprintln(os.Stderr, err)
  297. return
  298. }
  299. if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
  300. fmt.Fprintln(os.Stderr, err)
  301. return
  302. }
  303. }()
  304. defer func() {
  305. if e := recover(); e != nil {
  306. err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
  307. }
  308. }()
  309. i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
  310. if err != nil {
  311. return err
  312. }
  313. // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
  314. return i.Init()
  315. }
  316. func (l *LinuxFactory) loadState(root, id string) (*State, error) {
  317. stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
  318. if err != nil {
  319. return nil, err
  320. }
  321. f, err := os.Open(stateFilePath)
  322. if err != nil {
  323. if os.IsNotExist(err) {
  324. return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
  325. }
  326. return nil, newGenericError(err, SystemError)
  327. }
  328. defer f.Close()
  329. var state *State
  330. if err := json.NewDecoder(f).Decode(&state); err != nil {
  331. return nil, newGenericError(err, SystemError)
  332. }
  333. return state, nil
  334. }
  335. func (l *LinuxFactory) validateID(id string) error {
  336. if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
  337. return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
  338. }
  339. return nil
  340. }
  341. // NewuidmapPath returns an option func to configure a LinuxFactory with the
  342. // provided ..
  343. func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
  344. return func(l *LinuxFactory) error {
  345. l.NewuidmapPath = newuidmapPath
  346. return nil
  347. }
  348. }
  349. // NewgidmapPath returns an option func to configure a LinuxFactory with the
  350. // provided ..
  351. func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
  352. return func(l *LinuxFactory) error {
  353. l.NewgidmapPath = newgidmapPath
  354. return nil
  355. }
  356. }