Skip to content
This repository was archived by the owner on Mar 9, 2022. It is now read-only.

Commit 9658159

Browse files
authored
Merge pull request #77 from Random-Liu/unmount-dev-shm
Unmount dev shm and cleanup container when stop/remove sandbox
2 parents cc43c86 + 57b8b43 commit 9658159

25 files changed

+1131
-23
lines changed

Godeps/Godeps.json

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/os/os.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"os"
2323

2424
"github.com/containerd/fifo"
25+
"github.com/docker/docker/pkg/mount"
2526
"golang.org/x/net/context"
2627
"golang.org/x/sys/unix"
2728
)
@@ -90,7 +91,12 @@ func (RealOS) Mount(source string, target string, fstype string, flags uintptr,
9091
return unix.Mount(source, target, fstype, flags, data)
9192
}
9293

93-
// Unmount will call unix.Unmount to unmount the file.
94+
// Unmount will call unix.Unmount to unmount the file. The function doesn't
95+
// return error if target is not mounted.
9496
func (RealOS) Unmount(target string, flags int) error {
97+
// TODO(random-liu): Follow symlink to make sure the result is correct.
98+
if mounted, err := mount.Mounted(target); err != nil || !mounted {
99+
return err
100+
}
95101
return unix.Unmount(target, flags)
96102
}

pkg/server/container_stop.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,59 +55,68 @@ func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.Sto
5555
if err != nil {
5656
return nil, fmt.Errorf("an error occurred when try to find container %q: %v", r.GetContainerId(), err)
5757
}
58-
id := r.GetContainerId()
58+
59+
if err := c.stopContainer(ctx, meta, time.Duration(r.GetTimeout())*time.Second); err != nil {
60+
return nil, err
61+
}
62+
63+
return &runtime.StopContainerResponse{}, nil
64+
}
65+
66+
// stopContainer stops a container based on the container metadata.
67+
func (c *criContainerdService) stopContainer(ctx context.Context, meta *metadata.ContainerMetadata, timeout time.Duration) error {
68+
id := meta.ID
5969

6070
// Return without error if container is not running. This makes sure that
6171
// stop only takes real action after the container is started.
6272
if meta.State() != runtime.ContainerState_CONTAINER_RUNNING {
6373
glog.V(2).Infof("Container to stop %q is not running, current state %q",
6474
id, criContainerStateToString(meta.State()))
65-
return &runtime.StopContainerResponse{}, nil
75+
return nil
6676
}
6777

68-
if r.GetTimeout() > 0 {
78+
if timeout > 0 {
6979
// TODO(random-liu): [P1] Get stop signal from image config.
7080
stopSignal := unix.SIGTERM
7181
glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal)
72-
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
82+
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
7383
ContainerID: id,
7484
Signal: uint32(stopSignal),
7585
PidOrAll: &execution.KillRequest_All{All: true},
7686
})
7787
if err != nil {
7888
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
79-
return nil, fmt.Errorf("failed to stop container %q: %v", id, err)
89+
return fmt.Errorf("failed to stop container %q: %v", id, err)
8090
}
8191
// Move on to make sure container status is updated.
8292
}
8393

84-
err = c.waitContainerStop(ctx, id, time.Duration(r.GetTimeout())*time.Second)
94+
err = c.waitContainerStop(ctx, id, timeout)
8595
if err == nil {
86-
return &runtime.StopContainerResponse{}, nil
96+
return nil
8797
}
8898
glog.Errorf("Stop container %q timed out: %v", id, err)
8999
}
90100

91101
// Event handler will Delete the container from containerd after it handles the Exited event.
92102
glog.V(2).Infof("Kill container %q", id)
93-
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
103+
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
94104
ContainerID: id,
95105
Signal: uint32(unix.SIGKILL),
96106
PidOrAll: &execution.KillRequest_All{All: true},
97107
})
98108
if err != nil {
99109
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
100-
return nil, fmt.Errorf("failed to kill container %q: %v", id, err)
110+
return fmt.Errorf("failed to kill container %q: %v", id, err)
101111
}
102112
// Move on to make sure container status is updated.
103113
}
104114

105115
// Wait for a fixed timeout until container stop is observed by event monitor.
106116
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil {
107-
return nil, fmt.Errorf("an error occurs during waiting for container %q to stop: %v",
108-
id, err)
117+
return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err)
109118
}
110-
return &runtime.StopContainerResponse{}, nil
119+
return nil
111120
}
112121

113122
// waitContainerStop polls container state until timeout exceeds or container is stopped.

pkg/server/sandbox_remove.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
5353
// Use the full sandbox id.
5454
id := sandbox.ID
5555

56-
// TODO(random-liu): [P2] Remove all containers in the sandbox.
57-
5856
// Return error if sandbox container is not fully stopped.
5957
// TODO(random-liu): [P0] Make sure network is torn down, may need to introduce a state.
6058
_, err = c.taskService.Info(ctx, &execution.InfoRequest{ContainerID: id})
@@ -73,7 +71,25 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
7371
glog.V(5).Infof("Remove called for snapshot %q that does not exist", id)
7472
}
7573

76-
// TODO(random-liu): [P0] Cleanup shm created in RunPodSandbox.
74+
// Remove all containers inside the sandbox.
75+
// NOTE(random-liu): container could still be created after this point, Kubelet should
76+
// not rely on this behavior.
77+
// TODO(random-liu): Introduce an intermediate state to avoid container creation after
78+
// this point.
79+
cntrs, err := c.containerStore.List()
80+
if err != nil {
81+
return nil, fmt.Errorf("failed to list all containers: %v", err)
82+
}
83+
for _, cntr := range cntrs {
84+
if cntr.SandboxID != id {
85+
continue
86+
}
87+
_, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID})
88+
if err != nil {
89+
return nil, fmt.Errorf("failed to remove container %q: %v", cntr.ID, err)
90+
}
91+
}
92+
7793
// TODO(random-liu): [P1] Remove permanent namespace once used.
7894

7995
// Cleanup the sandbox root directory.

pkg/server/sandbox_remove_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package server
1717
import (
1818
"fmt"
1919
"testing"
20+
"time"
2021

2122
"github.com/containerd/containerd/api/services/containers"
2223
snapshotapi "github.com/containerd/containerd/api/services/snapshot"
@@ -176,3 +177,57 @@ func TestRemovePodSandbox(t *testing.T) {
176177
assert.NotNil(t, res, "remove should be idempotent")
177178
}
178179
}
180+
181+
func TestRemoveContainersInSandbox(t *testing.T) {
182+
testID := "test-id"
183+
testName := "test-name"
184+
testMetadata := metadata.SandboxMetadata{
185+
ID: testID,
186+
Name: testName,
187+
}
188+
testContainersMetadata := []*metadata.ContainerMetadata{
189+
{
190+
ID: "test-cid-1",
191+
Name: "test-cname-1",
192+
SandboxID: testID,
193+
FinishedAt: time.Now().UnixNano(),
194+
},
195+
{
196+
ID: "test-cid-2",
197+
Name: "test-cname-2",
198+
SandboxID: testID,
199+
FinishedAt: time.Now().UnixNano(),
200+
},
201+
{
202+
ID: "test-cid-3",
203+
Name: "test-cname-3",
204+
SandboxID: "other-sandbox-id",
205+
FinishedAt: time.Now().UnixNano(),
206+
},
207+
}
208+
209+
c := newTestCRIContainerdService()
210+
WithFakeSnapshotClient(c)
211+
assert.NoError(t, c.sandboxNameIndex.Reserve(testName, testID))
212+
assert.NoError(t, c.sandboxIDIndex.Add(testID))
213+
assert.NoError(t, c.sandboxStore.Create(testMetadata))
214+
for _, cntr := range testContainersMetadata {
215+
assert.NoError(t, c.containerNameIndex.Reserve(cntr.Name, cntr.ID))
216+
assert.NoError(t, c.containerStore.Create(*cntr))
217+
}
218+
219+
res, err := c.RemovePodSandbox(context.Background(), &runtime.RemovePodSandboxRequest{
220+
PodSandboxId: testID,
221+
})
222+
assert.NoError(t, err)
223+
assert.NotNil(t, res)
224+
225+
meta, err := c.sandboxStore.Get(testID)
226+
assert.Error(t, err)
227+
assert.True(t, metadata.IsNotExistError(err))
228+
assert.Nil(t, meta, "sandbox metadata should be removed")
229+
230+
cntrs, err := c.containerStore.List()
231+
assert.NoError(t, err)
232+
assert.Equal(t, testContainersMetadata[2:], cntrs, "container metadata should be removed")
233+
}

pkg/server/sandbox_run.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,10 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
183183
}
184184
defer func() {
185185
if retErr != nil {
186-
c.cleanupSandboxFiles(sandboxRootDir, config)
186+
if err = c.unmountSandboxFiles(sandboxRootDir, config); err != nil {
187+
glog.Errorf("Failed to unmount sandbox files in %q: %v",
188+
sandboxRootDir, err)
189+
}
187190
}
188191
}()
189192

@@ -403,12 +406,15 @@ func parseDNSOptions(servers, searches, options []string) (string, error) {
403406
return resolvContent, nil
404407
}
405408

406-
// cleanupSandboxFiles only unmount files, we rely on the removal of sandbox root directory to remove files.
407-
// Each cleanup task should log error instead of returning, so as to keep on cleanup on error.
408-
func (c *criContainerdService) cleanupSandboxFiles(rootDir string, config *runtime.PodSandboxConfig) {
409+
// unmountSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
410+
// remove these files. Unmount should *NOT* return error when:
411+
// 1) The mount point is already unmounted.
412+
// 2) The mount point doesn't exist.
413+
func (c *criContainerdService) unmountSandboxFiles(rootDir string, config *runtime.PodSandboxConfig) error {
409414
if !config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostIpc() {
410-
if err := c.os.Unmount(getSandboxDevShm(rootDir), unix.MNT_DETACH); err != nil && os.IsNotExist(err) {
411-
glog.Errorf("failed to unmount sandbox shm: %v", err)
415+
if err := c.os.Unmount(getSandboxDevShm(rootDir), unix.MNT_DETACH); err != nil && !os.IsNotExist(err) {
416+
return err
412417
}
413418
}
419+
return nil
414420
}

pkg/server/sandbox_stop.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,25 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
4646
// Use the full sandbox id.
4747
id := sandbox.ID
4848

49+
// Stop all containers inside the sandbox. This terminates the container forcibly,
50+
// and container may still be so production should not rely on this behavior.
51+
// TODO(random-liu): Delete the sandbox container before this after permanent network namespace
52+
// is introduced, so that no container will be started after that.
53+
containers, err := c.containerStore.List()
54+
if err != nil {
55+
return nil, fmt.Errorf("failed to list all containers: %v", err)
56+
}
57+
for _, container := range containers {
58+
if container.SandboxID != id {
59+
continue
60+
}
61+
// Forcibly stop the container. Do not use `StopContainer`, because it introduces a race
62+
// if a container is removed after list.
63+
if err = c.stopContainer(ctx, container, 0); err != nil {
64+
return nil, fmt.Errorf("failed to stop container %q: %v", container.ID, err)
65+
}
66+
}
67+
4968
// Teardown network for sandbox.
5069
_, err = c.os.Stat(sandbox.NetNS)
5170
if err == nil {
@@ -58,13 +77,17 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
5877
}
5978
glog.V(2).Infof("TearDown network for sandbox %q successfully", id)
6079

80+
sandboxRoot := getSandboxRootDir(c.rootDir, id)
81+
if err = c.unmountSandboxFiles(sandboxRoot, sandbox.Config); err != nil {
82+
return nil, fmt.Errorf("failed to unmount sandbox files in %q: %v", sandboxRoot, err)
83+
}
84+
6185
// TODO(random-liu): [P1] Handle sandbox container graceful deletion.
6286
// Delete the sandbox container from containerd.
6387
_, err = c.taskService.Delete(ctx, &execution.DeleteRequest{ContainerID: id})
6488
if err != nil && !isContainerdGRPCNotFoundError(err) {
6589
return nil, fmt.Errorf("failed to delete sandbox container %q: %v", id, err)
6690
}
6791

68-
// TODO(random-liu): [P2] Stop all containers inside the sandbox.
6992
return &runtime.StopPodSandboxResponse{}, nil
7093
}

0 commit comments

Comments
 (0)