Skip to content

Commit 7ef1044

Browse files
committed
Pre-open container root directory
A lot of filesystem-related stuff happens inside the container root directory, and we have used its name before. It makes sense to pre-open it and use a *os.File handle instead. Function names in internal/pathrs are kept as is for simplicity (and it is an internal package), but they now accept root as *os.File. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
1 parent 9d22942 commit 7ef1044

File tree

5 files changed

+52
-38
lines changed

5 files changed

+52
-38
lines changed

internal/pathrs/mkdirall.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ import (
3131
// Callers need to be very careful operating on the trailing path, as trivial
3232
// mistakes like following symlinks can cause security bugs. Most people
3333
// should probably just use [MkdirAllInRoot] or [CreateInRoot].
34-
func MkdirAllParentInRoot(root, unsafePath string, mode os.FileMode) (*os.File, string, error) {
34+
func MkdirAllParentInRoot(root *os.File, unsafePath string, mode os.FileMode) (*os.File, string, error) {
3535
// MkdirAllInRoot also does hallucinateUnsafePath, but we need to do it
3636
// here first because when we split unsafePath into (dir, file) components
3737
// we want to be doing so with the hallucinated path (so that trailing
3838
// dangling symlinks are treated correctly).
39-
unsafePath, err := hallucinateUnsafePath(root, unsafePath)
39+
unsafePath, err := hallucinateUnsafePath(root.Name(), unsafePath)
4040
if err != nil {
4141
return nil, "", fmt.Errorf("failed to construct hallucinated target path: %w", err)
4242
}

internal/pathrs/mkdirall_pathrslite.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,11 @@ import (
2424

2525
"github.com/cyphar/filepath-securejoin/pathrs-lite"
2626
"github.com/sirupsen/logrus"
27-
"golang.org/x/sys/unix"
2827
)
2928

3029
// MkdirAllInRoot attempts to make
3130
//
32-
// path, _ := securejoin.SecureJoin(root, unsafePath)
31+
// path, _ := securejoin.SecureJoin(root.Name(), unsafePath)
3332
// os.MkdirAll(path, mode)
3433
// os.Open(path)
3534
//
@@ -48,8 +47,8 @@ import (
4847
// handling if unsafePath has already been scoped within the rootfs (this is
4948
// needed for a lot of runc callers and fixing this would require reworking a
5049
// lot of path logic).
51-
func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) (*os.File, error) {
52-
unsafePath, err := hallucinateUnsafePath(root, unsafePath)
50+
func MkdirAllInRoot(root *os.File, unsafePath string, mode os.FileMode) (*os.File, error) {
51+
unsafePath, err := hallucinateUnsafePath(root.Name(), unsafePath)
5352
if err != nil {
5453
return nil, fmt.Errorf("failed to construct hallucinated target path: %w", err)
5554
}
@@ -67,13 +66,7 @@ func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) (*os.File, error)
6766
mode &= 0o1777
6867
}
6968

70-
rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
71-
if err != nil {
72-
return nil, fmt.Errorf("open root handle: %w", err)
73-
}
74-
defer rootDir.Close()
75-
7669
return retryEAGAIN(func() (*os.File, error) {
77-
return pathrs.MkdirAllHandle(rootDir, unsafePath, mode)
70+
return pathrs.MkdirAllHandle(root, unsafePath, mode)
7871
})
7972
}

internal/pathrs/root_pathrslite.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ import (
2828
)
2929

3030
// OpenInRoot opens the given path inside the root with the provided flags. It
31-
// is effectively shorthand for [securejoin.OpenInRoot] followed by
31+
// is effectively shorthand for [securejoin.OpenatInRoot] followed by
3232
// [securejoin.Reopen].
33-
func OpenInRoot(root, subpath string, flags int) (*os.File, error) {
33+
func OpenInRoot(root *os.File, subpath string, flags int) (*os.File, error) {
3434
handle, err := retryEAGAIN(func() (*os.File, error) {
35-
return pathrs.OpenInRoot(root, subpath)
35+
return pathrs.OpenatInRoot(root, subpath)
3636
})
3737
if err != nil {
3838
return nil, err
@@ -47,7 +47,7 @@ func OpenInRoot(root, subpath string, flags int) (*os.File, error) {
4747
// open(O_CREAT|O_NOFOLLOW) semantics. If you want the creation to use O_EXCL,
4848
// include it in the passed flags. The fileMode argument uses unix.* mode bits,
4949
// *not* os.FileMode.
50-
func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, error) {
50+
func CreateInRoot(root *os.File, subpath string, flags int, fileMode uint32) (*os.File, error) {
5151
dirFd, filename, err := MkdirAllParentInRoot(root, subpath, 0o755)
5252
if err != nil {
5353
return nil, err
@@ -63,5 +63,5 @@ func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, e
6363
if err != nil {
6464
return nil, err
6565
}
66-
return os.NewFile(uintptr(fd), root+"/"+subpath), nil
66+
return os.NewFile(uintptr(fd), root.Name()+"/"+subpath), nil
6767
}

libcontainer/criu_linux.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,10 +553,16 @@ func isOnTmpfs(path string, mounts []*configs.Mount) bool {
553553
// This function also creates missing mountpoints as long as they
554554
// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway.
555555
func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
556+
rootFd, err := os.OpenFile(c.config.Rootfs, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
557+
if err != nil {
558+
return fmt.Errorf("open rootfs handle: %w", err)
559+
}
560+
defer rootFd.Close()
561+
556562
umounts := []string{}
557563
defer func() {
558564
for _, u := range umounts {
559-
mntFile, err := pathrs.OpenInRoot(c.config.Rootfs, u, unix.O_PATH)
565+
mntFile, err := pathrs.OpenInRoot(rootFd, u, unix.O_PATH)
560566
if err != nil {
561567
logrus.Warnf("Error during cleanup unmounting %s: open handle: %v", u, err)
562568
continue
@@ -590,7 +596,7 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
590596
continue
591597
}
592598
me := mountEntry{Mount: m}
593-
if err := me.createOpenMountpoint(c.config.Rootfs); err != nil {
599+
if err := me.createOpenMountpoint(rootFd); err != nil {
594600
return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err)
595601
}
596602
if me.dstFile != nil {

libcontainer/rootfs_linux.go

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
3535

3636
// mountConfig contains mount data not specific to a mount point.
3737
type mountConfig struct {
38-
root string
38+
root *os.File
3939
label string
4040
cgroup2Path string
4141
rootlessCgroups bool
@@ -106,8 +106,14 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) {
106106
return fmt.Errorf("error preparing rootfs: %w", err)
107107
}
108108

109+
rootFd, err := os.OpenFile(config.Rootfs, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
110+
if err != nil {
111+
return fmt.Errorf("open rootfs handle: %w", err)
112+
}
113+
defer rootFd.Close()
114+
109115
mountConfig := &mountConfig{
110-
root: config.Rootfs,
116+
root: rootFd,
111117
label: config.MountLabel,
112118
cgroup2Path: iConfig.Cgroup2Path,
113119
rootlessCgroups: config.RootlessCgroups,
@@ -167,7 +173,7 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) {
167173

168174
setupDev := needsSetupDev(config)
169175
if setupDev {
170-
if err := createDevices(config); err != nil {
176+
if err := createDevices(config, rootFd); err != nil {
171177
return fmt.Errorf("error creating device nodes: %w", err)
172178
}
173179
if err := setupPtmx(config); err != nil {
@@ -362,7 +368,7 @@ func mountCgroupV1(m mountEntry, c *mountConfig) error {
362368
// symlink(2) is very dumb, it will just shove the path into
363369
// the link and doesn't do any checks or relative path
364370
// conversion. Also, don't error out if the cgroup already exists.
365-
if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !errors.Is(err, os.ErrExist) {
371+
if err := os.Symlink(mc, filepath.Join(c.root.Name(), m.Destination, ss)); err != nil && !errors.Is(err, os.ErrExist) {
366372
return err
367373
}
368374
}
@@ -426,15 +432,22 @@ func doTmpfsCopyUp(m mountEntry, mountLabel string) (Err error) {
426432
}
427433
defer tmpDirFile.Close()
428434

435+
hostRootFd, err := os.OpenFile("/", unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
436+
if err != nil {
437+
return fmt.Errorf("tmpcopyup: open host root: %w", err)
438+
}
439+
defer hostRootFd.Close()
440+
429441
// Configure the *host* tmpdir as if it's the container mount. We change
430442
// m.dstFile since we are going to mount *on the host*.
431443
hostMount := mountEntry{
432444
Mount: m.Mount,
433445
dstFile: tmpDirFile,
434446
}
435-
if err := hostMount.mountPropagate("/", mountLabel); err != nil {
447+
if err := hostMount.mountPropagate(hostRootFd, mountLabel); err != nil {
436448
return err
437449
}
450+
hostRootFd.Close()
438451
defer func() {
439452
if Err != nil {
440453
if err := unmount(tmpDir, unix.MNT_DETACH); err != nil {
@@ -503,9 +516,10 @@ func statfsToMountFlags(st unix.Statfs_t) int {
503516
return flags
504517
}
505518

506-
func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) {
519+
func (m *mountEntry) createOpenMountpoint(root *os.File) (Err error) {
520+
rootfs := root.Name()
507521
unsafePath := pathrs.LexicallyStripRoot(rootfs, m.Destination)
508-
dstFile, err := pathrs.OpenInRoot(rootfs, unsafePath, unix.O_PATH)
522+
dstFile, err := pathrs.OpenInRoot(root, unsafePath, unix.O_PATH)
509523
defer func() {
510524
if dstFile != nil && Err != nil {
511525
_ = dstFile.Close()
@@ -542,9 +556,9 @@ func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) {
542556
dstIsFile = !fi.IsDir()
543557
}
544558
if dstIsFile {
545-
dstFile, err = pathrs.CreateInRoot(rootfs, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644)
559+
dstFile, err = pathrs.CreateInRoot(root, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644)
546560
} else {
547-
dstFile, err = pathrs.MkdirAllInRoot(rootfs, unsafePath, 0o755)
561+
dstFile, err = pathrs.MkdirAllInRoot(root, unsafePath, 0o755)
548562
}
549563
if err != nil {
550564
return fmt.Errorf("make mountpoint %q: %w", m.Destination, err)
@@ -590,7 +604,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
590604
// has been a "fun" attack scenario in the past.
591605
// TODO: This won't be necessary once we switch to libpathrs and we can
592606
// stop all of these symlink-exchange attacks.
593-
rootfs := c.root
607+
rootfs := c.root.Name()
594608
dest := filepath.Clean(m.Destination)
595609
if !pathrs.IsLexicallyInRoot(rootfs, dest) {
596610
// Do not use securejoin as it resolves symlinks.
@@ -923,7 +937,7 @@ func reOpenDevNull() error {
923937
}
924938

925939
// Create the device nodes in the container.
926-
func createDevices(config *configs.Config) error {
940+
func createDevices(config *configs.Config, rootFd *os.File) error {
927941
useBindMount := userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
928942
for _, node := range config.Devices {
929943

@@ -934,7 +948,7 @@ func createDevices(config *configs.Config) error {
934948

935949
// containers running in a user namespace are not allowed to mknod
936950
// devices so we can just bind mount it from the host.
937-
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
951+
if err := createDeviceNode(rootFd, node, useBindMount); err != nil {
938952
return err
939953
}
940954
}
@@ -954,12 +968,12 @@ func bindMountDeviceNode(destDir *os.File, destName string, node *devices.Device
954968
}
955969

956970
// Creates the device node in the rootfs of the container.
957-
func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
971+
func createDeviceNode(rootFd *os.File, node *devices.Device, bind bool) error {
958972
if node.Path == "" {
959973
// The node only exists for cgroup reasons, ignore it here.
960974
return nil
961975
}
962-
destDir, destName, err := pathrs.MkdirAllParentInRoot(rootfs, node.Path, 0o755)
976+
destDir, destName, err := pathrs.MkdirAllParentInRoot(rootFd, node.Path, 0o755)
963977
if err != nil {
964978
return fmt.Errorf("mkdir parent of device inode %q: %w", node.Path, err)
965979
}
@@ -1352,16 +1366,17 @@ func maskPaths(paths []string, mountLabel string) error {
13521366
return nil
13531367
}
13541368

1355-
func reopenAfterMount(rootfs string, f *os.File, flags int) (_ *os.File, Err error) {
1369+
func reopenAfterMount(rootFd, f *os.File, flags int) (_ *os.File, Err error) {
13561370
fullPath, err := procfs.ProcSelfFdReadlink(f)
13571371
if err != nil {
13581372
return nil, fmt.Errorf("get full path: %w", err)
13591373
}
1374+
rootfs := rootFd.Name()
13601375
if !pathrs.IsLexicallyInRoot(rootfs, fullPath) {
13611376
return nil, fmt.Errorf("mountpoint %q is outside of rootfs %q", fullPath, rootfs)
13621377
}
13631378
unsafePath := pathrs.LexicallyStripRoot(rootfs, fullPath)
1364-
reopened, err := pathrs.OpenInRoot(rootfs, unsafePath, flags)
1379+
reopened, err := pathrs.OpenInRoot(rootFd, unsafePath, flags)
13651380
if err != nil {
13661381
return nil, fmt.Errorf("re-open mountpoint %q: %w", unsafePath, err)
13671382
}
@@ -1391,7 +1406,7 @@ func reopenAfterMount(rootfs string, f *os.File, flags int) (_ *os.File, Err err
13911406

13921407
// Do the mount operation followed by additional mounts required to take care
13931408
// of propagation flags. This will always be scoped inside the container rootfs.
1394-
func (m *mountEntry) mountPropagate(rootfs, mountLabel string) error {
1409+
func (m *mountEntry) mountPropagate(rootFd *os.File, mountLabel string) error {
13951410
var (
13961411
data = label.FormatMountLabel(m.Data, mountLabel)
13971412
flags = m.Flags
@@ -1417,7 +1432,7 @@ func (m *mountEntry) mountPropagate(rootfs, mountLabel string) error {
14171432
//
14181433
// TODO: Use move_mount(2) on newer kernels so that this is no longer
14191434
// necessary on modern systems.
1420-
newDstFile, err := reopenAfterMount(rootfs, m.dstFile, unix.O_PATH)
1435+
newDstFile, err := reopenAfterMount(rootFd, m.dstFile, unix.O_PATH)
14211436
if err != nil {
14221437
return fmt.Errorf("reopen mountpoint after mount: %w", err)
14231438
}

0 commit comments

Comments
 (0)