Skip to content
This repository was archived by the owner on Mar 9, 2022. It is now read-only.

Commit c44f798

Browse files
authored
Merge pull request #371 from Random-Liu/fix-removing
Fix removing state recover.
2 parents 95067d7 + 4eaaee3 commit c44f798

File tree

6 files changed

+63
-20
lines changed

6 files changed

+63
-20
lines changed

pkg/server/container_remove.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
)
3232

3333
// RemoveContainer removes the container.
34+
// TODO(random-liu): Forcibly stop container if it's running.
3435
func (c *criContainerdService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
3536
container, err := c.containerStore.Get(r.GetContainerId())
3637
if err != nil {
@@ -52,7 +53,6 @@ func (c *criContainerdService) RemoveContainer(ctx context.Context, r *runtime.R
5253
if retErr != nil {
5354
// Reset removing if remove failed.
5455
if err := resetContainerRemoving(container); err != nil {
55-
// TODO(random-liu): Do not checkpoint `Removing` state.
5656
glog.Errorf("failed to reset removing state for container %q: %v", id, err)
5757
}
5858
}
@@ -63,23 +63,23 @@ func (c *criContainerdService) RemoveContainer(ctx context.Context, r *runtime.R
6363
// kubelet implementation, we'll never start a container once we decide to remove it,
6464
// so we don't need the "Dead" state for now.
6565

66-
containerRootDir := getContainerRootDir(c.config.RootDir, id)
67-
if err := system.EnsureRemoveAll(containerRootDir); err != nil {
68-
return nil, fmt.Errorf("failed to remove container root directory %q: %v",
69-
containerRootDir, err)
66+
// Delete containerd container.
67+
if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
68+
if !errdefs.IsNotFound(err) {
69+
return nil, fmt.Errorf("failed to delete containerd container %q: %v", id, err)
70+
}
71+
glog.V(5).Infof("Remove called for containerd container %q that does not exist", id, err)
7072
}
7173

7274
// Delete container checkpoint.
7375
if err := container.Delete(); err != nil {
7476
return nil, fmt.Errorf("failed to delete container checkpoint for %q: %v", id, err)
7577
}
7678

77-
// Delete containerd container.
78-
if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
79-
if !errdefs.IsNotFound(err) {
80-
return nil, fmt.Errorf("failed to delete containerd container %q: %v", id, err)
81-
}
82-
glog.V(5).Infof("Remove called for containerd container %q that does not exist", id, err)
79+
containerRootDir := getContainerRootDir(c.config.RootDir, id)
80+
if err := system.EnsureRemoveAll(containerRootDir); err != nil {
81+
return nil, fmt.Errorf("failed to remove container root directory %q: %v",
82+
containerRootDir, err)
8383
}
8484

8585
c.containerStore.Delete(id)

pkg/server/container_start.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func (c *criContainerdService) StartContainer(ctx context.Context, r *runtime.St
3939

4040
var startErr error
4141
// update container status in one transaction to avoid race with event monitor.
42-
if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
42+
if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
4343
// Always apply status change no matter startContainer fails or not. Because startContainer
4444
// may change container state no matter it fails or succeeds.
4545
startErr = c.startContainer(ctx, container, &status)

pkg/server/events.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func (em *eventMonitor) handleEvent(evt *events.Envelope) {
125125
// Move on to make sure container status is updated.
126126
}
127127
}
128-
err = cntr.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
128+
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
129129
// If FinishedAt has been set (e.g. with start failure), keep as
130130
// it is.
131131
if status.FinishedAt != 0 {
@@ -151,7 +151,7 @@ func (em *eventMonitor) handleEvent(evt *events.Envelope) {
151151
}
152152
glog.Errorf("Failed to get container %q: %v", e.ContainerID, err)
153153
}
154-
err = cntr.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
154+
err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
155155
status.Reason = oomExitReason
156156
return status, nil
157157
})

pkg/store/container/fake_status.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ func (f *fakeStatusStorage) Get() Status {
3838
return f.status
3939
}
4040

41+
func (f *fakeStatusStorage) UpdateSync(u UpdateFunc) error {
42+
return f.Update(u)
43+
}
44+
4145
func (f *fakeStatusStorage) Update(u UpdateFunc) error {
4246
f.Lock()
4347
defer f.Unlock()

pkg/store/container/status.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,11 @@ type UpdateFunc func(Status) (Status, error)
106106
type StatusStorage interface {
107107
// Get a container status.
108108
Get() Status
109+
// UpdateSync updates the container status and the on disk checkpoint.
110+
// Note that the update MUST be applied in one transaction.
111+
UpdateSync(UpdateFunc) error
109112
// Update the container status. Note that the update MUST be applied
110113
// in one transaction.
111-
// TODO(random-liu): Distinguish `UpdateSync` and `Update`, only
112-
// `UpdateSync` should sync data onto disk, so that disk operation
113-
// for non-critical status change could be avoided.
114114
Update(UpdateFunc) error
115115
// Delete the container status.
116116
// Note:
@@ -167,8 +167,8 @@ func (s *statusStorage) Get() Status {
167167
return s.status
168168
}
169169

170-
// Update the container status.
171-
func (s *statusStorage) Update(u UpdateFunc) error {
170+
// UpdateSync updates the container status and the on disk checkpoint.
171+
func (s *statusStorage) UpdateSync(u UpdateFunc) error {
172172
s.Lock()
173173
defer s.Unlock()
174174
newStatus, err := u(s.status)
@@ -186,6 +186,18 @@ func (s *statusStorage) Update(u UpdateFunc) error {
186186
return nil
187187
}
188188

189+
// Update the container status.
190+
func (s *statusStorage) Update(u UpdateFunc) error {
191+
s.Lock()
192+
defer s.Unlock()
193+
newStatus, err := u(s.status)
194+
if err != nil {
195+
return err
196+
}
197+
s.status = newStatus
198+
return nil
199+
}
200+
189201
// Delete deletes the container status from disk atomically.
190202
func (s *statusStorage) Delete() error {
191203
temp := filepath.Dir(s.path) + ".del-" + filepath.Base(s.path)

pkg/store/container/status_test.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func TestStatus(t *testing.T) {
132132
require.NoError(err)
133133
assert.Equal(testStatus, loaded)
134134

135-
t.Logf("successful update should take effect")
135+
t.Logf("successful update should take effect but not checkpoint")
136136
err = s.Update(func(o Status) (Status, error) {
137137
o = updateStatus
138138
return o, nil
@@ -141,6 +141,33 @@ func TestStatus(t *testing.T) {
141141
assert.Equal(updateStatus, s.Get())
142142
loaded, err = LoadStatus(tempDir, testID)
143143
require.NoError(err)
144+
assert.Equal(testStatus, loaded)
145+
// Recover status.
146+
assert.NoError(s.Update(func(o Status) (Status, error) {
147+
o = testStatus
148+
return o, nil
149+
}))
150+
151+
t.Logf("failed update sync should not take effect")
152+
err = s.UpdateSync(func(o Status) (Status, error) {
153+
o = updateStatus
154+
return o, updateErr
155+
})
156+
assert.Equal(updateErr, err)
157+
assert.Equal(testStatus, s.Get())
158+
loaded, err = LoadStatus(tempDir, testID)
159+
require.NoError(err)
160+
assert.Equal(testStatus, loaded)
161+
162+
t.Logf("successful update sync should take effect and checkpoint")
163+
err = s.UpdateSync(func(o Status) (Status, error) {
164+
o = updateStatus
165+
return o, nil
166+
})
167+
assert.NoError(err)
168+
assert.Equal(updateStatus, s.Get())
169+
loaded, err = LoadStatus(tempDir, testID)
170+
require.NoError(err)
144171
assert.Equal(updateStatus, loaded)
145172

146173
t.Logf("successful update should not affect existing snapshot")

0 commit comments

Comments
 (0)