Skip to content

Commit f047c6b

Browse files
authored
Merge pull request #5101 from kolyshkin/fix-exec
libct: prepareCgroupFD: fall back to container init cgroup
2 parents fffd7cf + 1fdbab8 commit f047c6b

File tree

2 files changed

+119
-26
lines changed

2 files changed

+119
-26
lines changed

libcontainer/process_linux.go

Lines changed: 84 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ import (
1818
"syscall"
1919
"time"
2020

21-
"github.com/opencontainers/runtime-spec/specs-go"
2221
"github.com/sirupsen/logrus"
2322
"golang.org/x/sys/unix"
2423

24+
"github.com/opencontainers/runtime-spec/specs-go"
25+
2526
"github.com/opencontainers/cgroups"
2627
"github.com/opencontainers/cgroups/fs2"
2728
"github.com/opencontainers/runc/libcontainer/configs"
@@ -287,32 +288,66 @@ func (p *setnsProcess) addIntoCgroupV1() error {
287288
return nil
288289
}
289290

291+
// initProcessCgroupPath returns container init's cgroup path,
292+
// as read from /proc/PID/cgroup. Only works for cgroup v2.
293+
// Returns empty string if the path can not be obtained.
294+
//
295+
// This is used by runc exec in these cases:
296+
//
297+
// 1. On cgroup v2 + nesting + domain controllers, adding to initial cgroup
298+
// may fail with EBUSY (https://github.com/opencontainers/runc/issues/2356);
299+
//
300+
// 2. A container init process with no cgroupns and /sys/fs/cgroup rw access
301+
// may move itself to any other cgroup, and the original cgroup will disappear.
302+
func (p *setnsProcess) initProcessCgroupPath() string {
303+
if p.initProcessPid == 0 || !cgroups.IsCgroup2UnifiedMode() {
304+
return ""
305+
}
306+
307+
cg, err := cgroups.ParseCgroupFile("/proc/" + strconv.Itoa(p.initProcessPid) + "/cgroup")
308+
if err != nil {
309+
return ""
310+
}
311+
cgroup, ok := cg[""]
312+
if !ok {
313+
return ""
314+
}
315+
316+
return fs2.UnifiedMountpoint + cgroup
317+
}
318+
290319
func (p *setnsProcess) addIntoCgroupV2() error {
291320
sub := p.process.SubCgroupPaths[""]
292321
err := p.manager.AddPid(sub, p.pid())
293-
if err != nil && !p.rootlessCgroups {
294-
// On cgroup v2 + nesting + domain controllers, adding to initial cgroup may fail with EBUSY.
295-
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
296-
// Try to join the cgroup of InitProcessPid, unless sub-cgroup is explicitly set.
297-
if p.initProcessPid != 0 && sub == "" {
298-
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
299-
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
300-
if initCgErr == nil {
301-
if initCgPath, ok := initCg[""]; ok {
302-
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
303-
logrus.Debugf("adding pid %d to cgroup failed (%v), attempting to join %s",
304-
p.pid(), err, initCgDirpath)
305-
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
306-
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
307-
}
308-
}
309-
}
310-
if err != nil {
311-
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
312-
}
322+
if err == nil {
323+
return nil
313324
}
314325

326+
// Failed to join the configured cgroup. Fall back to container init's cgroup
327+
// unless sub-cgroup is explicitly requested.
328+
var path string
329+
if sub != "" {
330+
goto fail
331+
}
332+
path = p.initProcessCgroupPath()
333+
if path == "" {
334+
goto fail
335+
}
336+
logrus.Debugf("adding pid %d to configured cgroup failed (%v), will join container init cgroup %q", p.pid(), err, path)
337+
// NOTE: path is not guaranteed to exist because we didn't pause the container.
338+
err = cgroups.WriteCgroupProc(path, p.pid())
339+
if err != nil {
340+
goto fail
341+
}
315342
return nil
343+
344+
fail:
345+
if p.rootlessCgroups {
346+
// Ignore cgroup join errors when rootless.
347+
return nil
348+
}
349+
350+
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
316351
}
317352

318353
func (p *setnsProcess) addIntoCgroup() error {
@@ -331,6 +366,8 @@ func (p *setnsProcess) addIntoCgroup() error {
331366
// to join cgroup early, in p.cmd.Start. Returns an *os.File which
332367
// must be closed by the caller after p.Cmd.Start return.
333368
func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
369+
const openFlags = unix.O_PATH | unix.O_DIRECTORY | unix.O_CLOEXEC
370+
334371
if !cgroups.IsCgroup2UnifiedMode() {
335372
return nil, nil
336373
}
@@ -348,14 +385,28 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
348385
return nil, fmt.Errorf("bad sub cgroup path: %s", sub)
349386
}
350387

351-
fd, err := cgroups.OpenFile(base, sub, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC)
388+
fd, err := cgroups.OpenFile(base, sub, openFlags)
389+
if err == nil {
390+
goto success
391+
}
392+
// Failed to open the configured cgroup. Fall back to container init's cgroup
393+
// unless sub-cgroup is explicitly requested. The fallback logic should be
394+
// the same as in addIntoCgroupV2.
395+
if sub != "" {
396+
goto fail
397+
}
398+
cgroup = p.initProcessCgroupPath()
399+
if cgroup == "" {
400+
goto fail
401+
}
402+
logrus.Debugf("failed to open configured cgroup (%v), will open container init cgroup %q", err, cgroup)
403+
// NOTE: path is not guaranteed to exist because we didn't pause the container.
404+
fd, err = cgroups.OpenFile(cgroup, "", openFlags)
352405
if err != nil {
353-
if p.rootlessCgroups {
354-
return nil, nil
355-
}
356-
return nil, fmt.Errorf("can't open cgroup: %w", err)
406+
goto fail
357407
}
358408

409+
success:
359410
logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
360411
if p.cmd.SysProcAttr == nil {
361412
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
@@ -364,6 +415,13 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
364415
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())
365416

366417
return fd, nil
418+
419+
fail:
420+
// Ignore cgroup join error for rootless.
421+
if p.rootlessCgroups {
422+
return nil, nil
423+
}
424+
return nil, fmt.Errorf("can't open cgroup: %w", err)
367425
}
368426

369427
// startWithCgroupFD starts a process via clone3 with CLONE_INTO_CGROUP,

tests/integration/exec.bats

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,41 @@ function check_exec_debug() {
321321
[ "$status" -eq 0 ]
322322
}
323323

324+
# https://github.com/opencontainers/runc/issues/5089
325+
@test "runc exec [init changes cgroup]" {
326+
requires root cgroups_v2
327+
328+
NEW_CGROUP_REL=/runc-tst-$$
329+
NEW_CGROUP=/sys/fs/cgroup$NEW_CGROUP_REL
330+
mkdir $NEW_CGROUP
331+
332+
# The container is placed into a $CGROUP_V2_PATH cgroup.
333+
set_cgroups_path
334+
# And upon the start it moves itself into $NEW_CGROUP.
335+
set_cgroup_mount_writable
336+
update_config ' .linux.namespaces -= [{"type": "cgroup"}]
337+
| .process.args = ["sh", "-c", "echo 1 > '$NEW_CGROUP'/cgroup.procs && exec sleep 1h"]'
338+
339+
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
340+
[ $status -eq 0 ]
341+
testcontainer test_busybox running
342+
sleep 1
343+
# Remove the original container cgroup. If systemd cgroup manager is used by runc,
344+
# the cgroup might have already be deleted by systemd, so we ignore rmdir errors.
345+
rmdir "$CGROUP_V2_PATH" || true
346+
test -d "$CGROUP_V2_PATH" && false
347+
348+
# Test that runc exec is able to fallback to container's init cgroup
349+
# even if the original cgroup is gone.
350+
runc exec test_busybox cat /proc/self/cgroup
351+
[ $status -eq 0 ]
352+
[ "$output" = "0::$NEW_CGROUP_REL" ]
353+
354+
# Cleanup.
355+
runc delete -f test_busybox
356+
rmdir "$NEW_CGROUP"
357+
}
358+
324359
@test "runc exec [execve error]" {
325360
cat <<EOF >rootfs/run.sh
326361
#!/mmnnttbb foo bar

0 commit comments

Comments
 (0)