@@ -18,10 +18,11 @@ import (
1818 "syscall"
1919 "time"
2020
21- "github.com/opencontainers/runtime-spec/specs-go"
2221 "github.com/sirupsen/logrus"
2322 "golang.org/x/sys/unix"
2423
24+ "github.com/opencontainers/runtime-spec/specs-go"
25+
2526 "github.com/opencontainers/cgroups"
2627 "github.com/opencontainers/cgroups/fs2"
2728 "github.com/opencontainers/runc/libcontainer/configs"
@@ -287,32 +288,66 @@ func (p *setnsProcess) addIntoCgroupV1() error {
287288 return nil
288289}
289290
291+ // initProcessCgroupPath returns container init's cgroup path,
292+ // as read from /proc/PID/cgroup. Only works for cgroup v2.
293+ // Returns empty string if the path can not be obtained.
294+ //
295+ // This is used by runc exec in these cases:
296+ //
297+ // 1. On cgroup v2 + nesting + domain controllers, adding to initial cgroup
298+ // may fail with EBUSY (https://github.com/opencontainers/runc/issues/2356);
299+ //
300+ // 2. A container init process with no cgroupns and /sys/fs/cgroup rw access
301+ // may move itself to any other cgroup, and the original cgroup will disappear.
302+ func (p * setnsProcess ) initProcessCgroupPath () string {
303+ if p .initProcessPid == 0 || ! cgroups .IsCgroup2UnifiedMode () {
304+ return ""
305+ }
306+
307+ cg , err := cgroups .ParseCgroupFile ("/proc/" + strconv .Itoa (p .initProcessPid ) + "/cgroup" )
308+ if err != nil {
309+ return ""
310+ }
311+ cgroup , ok := cg ["" ]
312+ if ! ok {
313+ return ""
314+ }
315+
316+ return fs2 .UnifiedMountpoint + cgroup
317+ }
318+
290319func (p * setnsProcess ) addIntoCgroupV2 () error {
291320 sub := p .process .SubCgroupPaths ["" ]
292321 err := p .manager .AddPid (sub , p .pid ())
293- if err != nil && ! p .rootlessCgroups {
294- // On cgroup v2 + nesting + domain controllers, adding to initial cgroup may fail with EBUSY.
295- // https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
296- // Try to join the cgroup of InitProcessPid, unless sub-cgroup is explicitly set.
297- if p .initProcessPid != 0 && sub == "" {
298- initProcCgroupFile := fmt .Sprintf ("/proc/%d/cgroup" , p .initProcessPid )
299- initCg , initCgErr := cgroups .ParseCgroupFile (initProcCgroupFile )
300- if initCgErr == nil {
301- if initCgPath , ok := initCg ["" ]; ok {
302- initCgDirpath := filepath .Join (fs2 .UnifiedMountpoint , initCgPath )
303- logrus .Debugf ("adding pid %d to cgroup failed (%v), attempting to join %s" ,
304- p .pid (), err , initCgDirpath )
305- // NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
306- err = cgroups .WriteCgroupProc (initCgDirpath , p .pid ())
307- }
308- }
309- }
310- if err != nil {
311- return fmt .Errorf ("error adding pid %d to cgroups: %w" , p .pid (), err )
312- }
322+ if err == nil {
323+ return nil
313324 }
314325
326+ // Failed to join the configured cgroup. Fall back to container init's cgroup
327+ // unless sub-cgroup is explicitly requested.
328+ var path string
329+ if sub != "" {
330+ goto fail
331+ }
332+ path = p .initProcessCgroupPath ()
333+ if path == "" {
334+ goto fail
335+ }
336+ logrus .Debugf ("adding pid %d to configured cgroup failed (%v), will join container init cgroup %q" , p .pid (), err , path )
337+ // NOTE: path is not guaranteed to exist because we didn't pause the container.
338+ err = cgroups .WriteCgroupProc (path , p .pid ())
339+ if err != nil {
340+ goto fail
341+ }
315342 return nil
343+
344+ fail:
345+ if p .rootlessCgroups {
346+ // Ignore cgroup join errors when rootless.
347+ return nil
348+ }
349+
350+ return fmt .Errorf ("error adding pid %d to cgroups: %w" , p .pid (), err )
316351}
317352
318353func (p * setnsProcess ) addIntoCgroup () error {
@@ -331,6 +366,8 @@ func (p *setnsProcess) addIntoCgroup() error {
331366// to join cgroup early, in p.cmd.Start. Returns an *os.File which
332367// must be closed by the caller after p.Cmd.Start return.
333368func (p * setnsProcess ) prepareCgroupFD () (* os.File , error ) {
369+ const openFlags = unix .O_PATH | unix .O_DIRECTORY | unix .O_CLOEXEC
370+
334371 if ! cgroups .IsCgroup2UnifiedMode () {
335372 return nil , nil
336373 }
@@ -348,14 +385,28 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
348385 return nil , fmt .Errorf ("bad sub cgroup path: %s" , sub )
349386 }
350387
351- fd , err := cgroups .OpenFile (base , sub , unix .O_PATH | unix .O_DIRECTORY | unix .O_CLOEXEC )
388+ fd , err := cgroups .OpenFile (base , sub , openFlags )
389+ if err == nil {
390+ goto success
391+ }
392+ // Failed to open the configured cgroup. Fall back to container init's cgroup
393+ // unless sub-cgroup is explicitly requested. The fallback logic should be
394+ // the same as in addIntoCgroupV2.
395+ if sub != "" {
396+ goto fail
397+ }
398+ cgroup = p .initProcessCgroupPath ()
399+ if cgroup == "" {
400+ goto fail
401+ }
402+ logrus .Debugf ("failed to open configured cgroup (%v), will open container init cgroup %q" , err , cgroup )
403+ // NOTE: path is not guaranteed to exist because we didn't pause the container.
404+ fd , err = cgroups .OpenFile (cgroup , "" , openFlags )
352405 if err != nil {
353- if p .rootlessCgroups {
354- return nil , nil
355- }
356- return nil , fmt .Errorf ("can't open cgroup: %w" , err )
406+ goto fail
357407 }
358408
409+ success:
359410 logrus .Debugf ("using CLONE_INTO_CGROUP %q" , cgroup )
360411 if p .cmd .SysProcAttr == nil {
361412 p .cmd .SysProcAttr = & syscall.SysProcAttr {}
@@ -364,6 +415,13 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
364415 p .cmd .SysProcAttr .CgroupFD = int (fd .Fd ())
365416
366417 return fd , nil
418+
419+ fail:
420+ // Ignore cgroup join error for rootless.
421+ if p .rootlessCgroups {
422+ return nil , nil
423+ }
424+ return nil , fmt .Errorf ("can't open cgroup: %w" , err )
367425}
368426
369427// startWithCgroupFD starts a process via clone3 with CLONE_INTO_CGROUP,
0 commit comments