Skip to content
This repository was archived by the owner on Mar 9, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Godeps/Godeps.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion pkg/os/os.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/containerd/fifo"
"github.com/docker/docker/pkg/mount"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
)
Expand Down Expand Up @@ -90,7 +91,12 @@ func (RealOS) Mount(source string, target string, fstype string, flags uintptr,
return unix.Mount(source, target, fstype, flags, data)
}

// Unmount will call unix.Unmount to unmount the file.
// Unmount will call unix.Unmount to unmount the file. The function doesn't
// return error if target is not mounted.
func (RealOS) Unmount(target string, flags int) error {
// TODO(random-liu): Follow symlink to make sure the result is correct.
if mounted, err := mount.Mounted(target); err != nil || !mounted {
return err
}
return unix.Unmount(target, flags)
}
33 changes: 21 additions & 12 deletions pkg/server/container_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,59 +55,68 @@ func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.Sto
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %v", r.GetContainerId(), err)
}
id := r.GetContainerId()

if err := c.stopContainer(ctx, meta, time.Duration(r.GetTimeout())*time.Second); err != nil {
return nil, err
}

return &runtime.StopContainerResponse{}, nil
}

// stopContainer stops a container based on the container metadata.
func (c *criContainerdService) stopContainer(ctx context.Context, meta *metadata.ContainerMetadata, timeout time.Duration) error {
id := meta.ID

// Return without error if container is not running. This makes sure that
// stop only takes real action after the container is started.
if meta.State() != runtime.ContainerState_CONTAINER_RUNNING {
glog.V(2).Infof("Container to stop %q is not running, current state %q",
id, criContainerStateToString(meta.State()))
return &runtime.StopContainerResponse{}, nil
return nil
}

if r.GetTimeout() > 0 {
if timeout > 0 {
// TODO(random-liu): [P1] Get stop signal from image config.
stopSignal := unix.SIGTERM
glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal)
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
ContainerID: id,
Signal: uint32(stopSignal),
PidOrAll: &execution.KillRequest_All{All: true},
})
if err != nil {
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
return nil, fmt.Errorf("failed to stop container %q: %v", id, err)
return fmt.Errorf("failed to stop container %q: %v", id, err)
}
// Move on to make sure container status is updated.
}

err = c.waitContainerStop(ctx, id, time.Duration(r.GetTimeout())*time.Second)
err = c.waitContainerStop(ctx, id, timeout)
if err == nil {
return &runtime.StopContainerResponse{}, nil
return nil
}
glog.Errorf("Stop container %q timed out: %v", id, err)
}

// Event handler will Delete the container from containerd after it handles the Exited event.
glog.V(2).Infof("Kill container %q", id)
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
ContainerID: id,
Signal: uint32(unix.SIGKILL),
PidOrAll: &execution.KillRequest_All{All: true},
})
if err != nil {
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
return nil, fmt.Errorf("failed to kill container %q: %v", id, err)
return fmt.Errorf("failed to kill container %q: %v", id, err)
}
// Move on to make sure container status is updated.
}

// Wait for a fixed timeout until container stop is observed by event monitor.
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil {
return nil, fmt.Errorf("an error occurs during waiting for container %q to stop: %v",
id, err)
return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err)
}
return &runtime.StopContainerResponse{}, nil
return nil
}

// waitContainerStop polls container state until timeout exceeds or container is stopped.
Expand Down
22 changes: 19 additions & 3 deletions pkg/server/sandbox_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
// Use the full sandbox id.
id := sandbox.ID

// TODO(random-liu): [P2] Remove all containers in the sandbox.

// Return error if sandbox container is not fully stopped.
// TODO(random-liu): [P0] Make sure network is torn down, may need to introduce a state.
_, err = c.taskService.Info(ctx, &execution.InfoRequest{ContainerID: id})
Expand All @@ -73,7 +71,25 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
glog.V(5).Infof("Remove called for snapshot %q that does not exist", id)
}

// TODO(random-liu): [P0] Cleanup shm created in RunPodSandbox.
// Remove all containers inside the sandbox.
// NOTE(random-liu): container could still be created after this point, Kubelet should
// not rely on this behavior.
// TODO(random-liu): Introduce an intermediate state to avoid container creation after
// this point.
cntrs, err := c.containerStore.List()
if err != nil {
return nil, fmt.Errorf("failed to list all containers: %v", err)
}
for _, cntr := range cntrs {
if cntr.SandboxID != id {
continue
}
_, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID})
if err != nil {
return nil, fmt.Errorf("failed to remove container %q: %v", cntr.ID, err)
}
}

// TODO(random-liu): [P1] Remove permanent namespace once used.

// Cleanup the sandbox root directory.
Expand Down
55 changes: 55 additions & 0 deletions pkg/server/sandbox_remove_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package server
import (
"fmt"
"testing"
"time"

"github.com/containerd/containerd/api/services/containers"
snapshotapi "github.com/containerd/containerd/api/services/snapshot"
Expand Down Expand Up @@ -176,3 +177,57 @@ func TestRemovePodSandbox(t *testing.T) {
assert.NotNil(t, res, "remove should be idempotent")
}
}

func TestRemoveContainersInSandbox(t *testing.T) {
testID := "test-id"
testName := "test-name"
testMetadata := metadata.SandboxMetadata{
ID: testID,
Name: testName,
}
testContainersMetadata := []*metadata.ContainerMetadata{
{
ID: "test-cid-1",
Name: "test-cname-1",
SandboxID: testID,
FinishedAt: time.Now().UnixNano(),
},
{
ID: "test-cid-2",
Name: "test-cname-2",
SandboxID: testID,
FinishedAt: time.Now().UnixNano(),
},
{
ID: "test-cid-3",
Name: "test-cname-3",
SandboxID: "other-sandbox-id",
FinishedAt: time.Now().UnixNano(),
},
}

c := newTestCRIContainerdService()
WithFakeSnapshotClient(c)
assert.NoError(t, c.sandboxNameIndex.Reserve(testName, testID))
assert.NoError(t, c.sandboxIDIndex.Add(testID))
assert.NoError(t, c.sandboxStore.Create(testMetadata))
for _, cntr := range testContainersMetadata {
assert.NoError(t, c.containerNameIndex.Reserve(cntr.Name, cntr.ID))
assert.NoError(t, c.containerStore.Create(*cntr))
}

res, err := c.RemovePodSandbox(context.Background(), &runtime.RemovePodSandboxRequest{
PodSandboxId: testID,
})
assert.NoError(t, err)
assert.NotNil(t, res)

meta, err := c.sandboxStore.Get(testID)
assert.Error(t, err)
assert.True(t, metadata.IsNotExistError(err))
assert.Nil(t, meta, "sandbox metadata should be removed")

cntrs, err := c.containerStore.List()
assert.NoError(t, err)
assert.Equal(t, testContainersMetadata[2:], cntrs, "container metadata should be removed")
}
18 changes: 12 additions & 6 deletions pkg/server/sandbox_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,10 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
}
defer func() {
if retErr != nil {
c.cleanupSandboxFiles(sandboxRootDir, config)
if err = c.unmountSandboxFiles(sandboxRootDir, config); err != nil {
glog.Errorf("Failed to unmount sandbox files in %q: %v",
sandboxRootDir, err)
}
}
}()

Expand Down Expand Up @@ -403,12 +406,15 @@ func parseDNSOptions(servers, searches, options []string) (string, error) {
return resolvContent, nil
}

// cleanupSandboxFiles only unmount files, we rely on the removal of sandbox root directory to remove files.
// Each cleanup task should log error instead of returning, so as to keep on cleanup on error.
func (c *criContainerdService) cleanupSandboxFiles(rootDir string, config *runtime.PodSandboxConfig) {
// unmountSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error when:
// 1) The mount point is already unmounted.
// 2) The mount point doesn't exist.
func (c *criContainerdService) unmountSandboxFiles(rootDir string, config *runtime.PodSandboxConfig) error {
if !config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostIpc() {
if err := c.os.Unmount(getSandboxDevShm(rootDir), unix.MNT_DETACH); err != nil && os.IsNotExist(err) {
glog.Errorf("failed to unmount sandbox shm: %v", err)
if err := c.os.Unmount(getSandboxDevShm(rootDir), unix.MNT_DETACH); err != nil && !os.IsNotExist(err) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Random-Liu any reason why this needs to be umounted (if any) at sandbox stop? we do this at sandbox stop and we didn't run into any issues yet

Copy link
Member Author

@Random-Liu Random-Liu Jun 14, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@runcom I feel like either stop or remove is ok.

The reason why we unmount in sandbox stop now is:

  1. I tried docker, after docker stop, shm is unmounted.
  2. Kubelet may garbage collect (remove) sandbox long after sandbox is stopped, we may not want to keep the shm during that. :)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense, thanks @Random-Liu !!!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@runcom Np. :)

return err
}
}
return nil
}
25 changes: 24 additions & 1 deletion pkg/server/sandbox_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,25 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
// Use the full sandbox id.
id := sandbox.ID

// Stop all containers inside the sandbox. This terminates the container forcibly,
// and container may still be so production should not rely on this behavior.
// TODO(random-liu): Delete the sandbox container before this after permanent network namespace
// is introduced, so that no container will be started after that.
containers, err := c.containerStore.List()
if err != nil {
return nil, fmt.Errorf("failed to list all containers: %v", err)
}
for _, container := range containers {
if container.SandboxID != id {
continue
}
// Forcibly stop the container. Do not use `StopContainer`, because it introduces a race
// if a container is removed after list.
if err = c.stopContainer(ctx, container, 0); err != nil {
return nil, fmt.Errorf("failed to stop container %q: %v", container.ID, err)
}
}

// Teardown network for sandbox.
_, err = c.os.Stat(sandbox.NetNS)
if err == nil {
Expand All @@ -58,13 +77,17 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
}
glog.V(2).Infof("TearDown network for sandbox %q successfully", id)

sandboxRoot := getSandboxRootDir(c.rootDir, id)
if err = c.unmountSandboxFiles(sandboxRoot, sandbox.Config); err != nil {
return nil, fmt.Errorf("failed to unmount sandbox files in %q: %v", sandboxRoot, err)
}

// TODO(random-liu): [P1] Handle sandbox container graceful deletion.
// Delete the sandbox container from containerd.
_, err = c.taskService.Delete(ctx, &execution.DeleteRequest{ContainerID: id})
if err != nil && !isContainerdGRPCNotFoundError(err) {
return nil, fmt.Errorf("failed to delete sandbox container %q: %v", id, err)
}

// TODO(random-liu): [P2] Stop all containers inside the sandbox.
return &runtime.StopPodSandboxResponse{}, nil
}
Loading