Skip to content

Commit b10ef5f

Browse files
committed
Add support for remote Firecracker snapshots
- When remote snapshots are enabled, after committing the snapshot, it is uploaded to a MinIO instance. When loading from a snapshot, if it is not available locally, it checks if it is available in MinIO and fetches it. - Remote Firecracker snapshots are currently only supported using the Stargz snapshotter (there are some container corruption issues when using devmapper). Signed-off-by: André Jesus <[email protected]>
1 parent 5b9627f commit b10ef5f

File tree

11 files changed

+822
-71
lines changed

11 files changed

+822
-71
lines changed

cri/firecracker/coordinator.go

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ import (
3535

3636
log "github.com/sirupsen/logrus"
3737
"github.com/vhive-serverless/vhive/ctriface"
38+
39+
"github.com/minio/minio-go/v7"
40+
"github.com/minio/minio-go/v7/pkg/credentials"
3841
)
3942

4043
type coordinator struct {
@@ -67,10 +70,29 @@ func newFirecrackerCoordinator(orch *ctriface.Orchestrator, opts ...coordinatorO
6770
}
6871

6972
snapshotsDir := "/fccd/test/snapshots"
73+
74+
var minioClient *minio.Client
75+
snapshotsBucket := "snapshots"
76+
minioAddr := "localhost:50052"
77+
minioAccessKey := "minio"
78+
minioSecretKey := "minio123"
79+
7080
if !c.withoutOrchestrator {
7181
snapshotsDir = orch.GetSnapshotsDir()
82+
snapshotsBucket = orch.GetSnapshotsBucket()
83+
minioAddr = orch.GetMinioAddr()
84+
minioAccessKey = orch.GetMinioAccessKey()
85+
minioSecretKey = orch.GetMinioSecretKey()
7286
}
73-
c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir)
87+
88+
if c.orch.GetSnapshotMode() == "remote" {
89+
minioClient, _ = minio.New(minioAddr, &minio.Options{
90+
Creds: credentials.NewStaticV4(minioAccessKey, minioSecretKey, ""),
91+
Secure: false,
92+
})
93+
}
94+
95+
c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir, snapshotsBucket, minioClient)
7496

7597
return c
7698
}
@@ -80,13 +102,27 @@ func (c *coordinator) startVM(ctx context.Context, image, revision string) (*fun
80102
}
81103

82104
func (c *coordinator) startVMWithEnvironment(ctx context.Context, image, revision string, environment []string) (*funcInstance, error) {
83-
if c.orch != nil && c.orch.GetSnapshotsEnabled() {
84-
// Check if snapshot is available
85-
if snap, err := c.snapshotManager.AcquireSnapshot(revision); err == nil {
105+
if c.orch != nil && c.orch.GetSnapshotMode() != "disabled" {
106+
if snap, err := c.snapshotManager.AcquireSnapshot(revision); snap == nil {
107+
log.Printf("failed to acquire snapshot: %w", err)
108+
if c.orch.GetSnapshotMode() == "remote" {
109+
log.Printf("downloading snapshot from remote storage")
110+
if _, err := c.snapshotManager.DownloadSnapshot(revision); err != nil {
111+
log.Printf("failed to download snapshot from remote storage: %w", err)
112+
c.snapshotManager.DeleteSnapshot(revision)
113+
} else {
114+
log.Printf("downloaded snapshot from remote storage")
115+
}
116+
}
117+
}
118+
119+
if snap, _ := c.snapshotManager.AcquireSnapshot(revision); snap != nil {
120+
log.Printf("loading snapshot %s", snap.GetId())
86121
return c.orchLoadInstance(ctx, snap)
87122
}
88123
}
89124

125+
log.Printf("creating fresh instance")
90126
return c.orchStartVM(ctx, image, revision, environment)
91127
}
92128

@@ -102,7 +138,7 @@ func (c *coordinator) stopVM(ctx context.Context, containerID string) error {
102138
return nil
103139
}
104140

105-
if c.orch != nil && c.orch.GetSnapshotsEnabled() && !fi.SnapBooted {
141+
if c.orch != nil && c.orch.GetSnapshotMode() != "disabled" && !fi.SnapBooted {
106142
err := c.orchCreateSnapshot(ctx, fi)
107143
if err != nil {
108144
log.Printf("Err creating snapshot %s\n", err)
@@ -199,6 +235,7 @@ func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.S
199235

200236
func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance) error {
201237
var err error
238+
log.Printf("creating snapshot for %s\n", fi.Revision)
202239

203240
snap, err := c.snapshotManager.InitSnapshot(fi.Revision, fi.Image)
204241
if err != nil {
@@ -230,11 +267,23 @@ func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance)
230267
}
231268
}
232269

270+
if err := snap.SerializeSnapInfo(); err != nil {
271+
fi.Logger.WithError(err).Error("failed to serialize snapshot info")
272+
return err
273+
}
274+
233275
if err := c.snapshotManager.CommitSnapshot(fi.Revision); err != nil {
234276
fi.Logger.WithError(err).Error("failed to commit snapshot")
235277
return err
236278
}
237279

280+
if c.orch.GetSnapshotMode() == "remote" {
281+
fi.Logger.Debug("uploading snapshot to remote storage")
282+
if err := c.snapshotManager.UploadSnapshot(fi.Revision); err != nil {
283+
fi.Logger.WithError(err).Error("failed to upload snapshot")
284+
}
285+
}
286+
238287
return nil
239288
}
240289

ctriface/orch.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,21 @@ type Orchestrator struct {
9898
imageManager *image.ImageManager
9999
dockerCredentials DockerCredentials
100100
// store *skv.KVStore
101-
snapshotsEnabled bool
102-
isUPFEnabled bool
103-
isLazyMode bool
104-
snapshotsDir string
105-
isMetricsMode bool
106-
netPoolSize int
101+
snapshotMode string
102+
isUPFEnabled bool
103+
isLazyMode bool
104+
snapshotsDir string
105+
snapshotsBucket string
106+
isMetricsMode bool
107+
netPoolSize int
107108

108109
vethPrefix string
109110
clonePrefix string
110111

112+
minioAddr string
113+
minioAccessKey string
114+
minioSecretKey string
115+
111116
memoryManager *manager.MemoryManager
112117
}
113118

@@ -119,9 +124,13 @@ func NewOrchestrator(snapshotter, hostIface string, opts ...OrchestratorOption)
119124
o.cachedImages = make(map[string]containerd.Image)
120125
o.snapshotter = snapshotter
121126
o.snapshotsDir = "/fccd/snapshots"
127+
o.snapshotsBucket = "snapshots"
122128
o.netPoolSize = 10
123129
o.vethPrefix = "172.17"
124130
o.clonePrefix = "172.18"
131+
o.minioAddr = "10.96.0.46:9000"
132+
o.minioAccessKey = "minio"
133+
o.minioSecretKey = "minio123"
125134

126135
for _, opt := range opts {
127136
opt(o)
@@ -187,9 +196,9 @@ func (o *Orchestrator) Cleanup() {
187196
}
188197
}
189198

190-
// GetSnapshotsEnabled Returns the snapshots mode of the orchestrator
191-
func (o *Orchestrator) GetSnapshotsEnabled() bool {
192-
return o.snapshotsEnabled
199+
// GetSnapshotMode Returns the snapshots mode of the orchestrator
200+
func (o *Orchestrator) GetSnapshotMode() string {
201+
return o.snapshotMode
193202
}
194203

195204
// GetUPFEnabled Returns the UPF mode of the orchestrator
@@ -252,6 +261,27 @@ func (o *Orchestrator) GetDockerCredentials() string {
252261
return string(data)
253262
}
254263

264+
// GetSnapshotsBucket returns the S3 bucket name used by the orchestrator for storing remote snapshots.
265+
func (o *Orchestrator) GetSnapshotsBucket() string {
266+
return o.snapshotsBucket
267+
}
268+
269+
// GetMinioAddr returns the address (endpoint) of the MinIO server used by the orchestrator.
270+
func (o *Orchestrator) GetMinioAddr() string {
271+
return o.minioAddr
272+
}
273+
274+
// GetMinioAccessKey returns the access key used to authenticate with the MinIO server.
275+
func (o *Orchestrator) GetMinioAccessKey() string {
276+
return o.minioAccessKey
277+
}
278+
279+
// GetMinioSecretKey returns the secret key used to authenticate with the MinIO server.
280+
// This should be handled securely and never exposed in logs or error messages.
281+
func (o *Orchestrator) GetMinioSecretKey() string {
282+
return o.minioSecretKey
283+
}
284+
255285
func (o *Orchestrator) setupHeartbeat() {
256286
heartbeat := time.NewTicker(60 * time.Second)
257287

ctriface/orch_options.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ func WithTestModeOn(testModeOn bool) OrchestratorOption {
4040
}
4141
}
4242

43-
// WithSnapshots Sets the snapshot mode on or off
44-
func WithSnapshots(snapshotsEnabled bool) OrchestratorOption {
43+
// WithSnapshotMode Sets the snapshot mode
44+
func WithSnapshotMode(snapshotMode string) OrchestratorOption {
4545
return func(o *Orchestrator) {
46-
o.snapshotsEnabled = snapshotsEnabled
46+
o.snapshotMode = snapshotMode
4747
}
4848
}
4949

@@ -111,3 +111,26 @@ func WithDockerCredentials(dockerCredentials string) OrchestratorOption {
111111
o.dockerCredentials = creds
112112
}
113113
}
114+
115+
// WithMinioAddr Sets the MinIO server address (endpoint)
116+
func WithMinioAddr(minioAddr string) OrchestratorOption {
117+
return func(o *Orchestrator) {
118+
o.minioAddr = minioAddr
119+
}
120+
}
121+
122+
// WithMinioAccessKey Sets the MinIO access key
123+
// Used in conjunction with the secret key for authentication with the MinIO server
124+
func WithMinioAccessKey(minioAccessKey string) OrchestratorOption {
125+
return func(o *Orchestrator) {
126+
o.minioAccessKey = minioAccessKey
127+
}
128+
}
129+
130+
// WithMinioSecretKey Sets the MinIO secret key
131+
// Used in conjunction with the access key for authentication with the MinIO server
132+
func WithMinioSecretKey(minioSecretKey string) OrchestratorOption {
133+
return func(o *Orchestrator) {
134+
o.minioSecretKey = minioSecretKey
135+
}
136+
}

docs/quickstart_guide.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,11 @@ Another option is to install using official instructions: [https://golang.org/do
172172
# EITHER
173173
sudo screen -dmS vhive bash -c "./vhive > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
174174
# OR
175-
sudo screen -dmS vhive bash -c "./vhive -snapshots > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
175+
sudo screen -dmS vhive bash -c "./vhive -snapshots 'local' > >(tee -a /tmp/vhive-logs/vhive.stdout) 2> >(tee -a /tmp/vhive-logs/vhive.stderr >&2)"
176176
```
177177
> **Note:**
178178
>
179-
> By default, the microVMs are booted, `-snapshots` enables snapshots after the 2nd invocation of each function.
179+
> By default, the microVMs are booted, `-snapshots <local|remote>` enables snapshots after the 2nd invocation of each function.
180180
>
181181
> If `-snapshots` and `-upf` are specified, the snapshots are accelerated with the Record-and-Prefetch (REAP)
182182
technique that we described in our ASPLOS'21
@@ -186,6 +186,8 @@ Another option is to install using official instructions: [https://golang.org/do
186186
>
187187
> If you are using `stargz` with `firecracker`, you also need to set the `-dockerCredentials` flag to be able to [pull the images
188188
from inside the microVMs](https://github.com/firecracker-microvm/firecracker-containerd/blob/main/docker-credential-mmds/README.md#docker-credential-helper-mmds).
189+
>
190+
> Remote snapshots are only supported in the `firecracker` mode using `stargz`. Check the [snapshot guide](../docs/snapshots.md) for more details on how to set up remote snapshots.
189191
190192
### 3. Configure Master Node
191193
**On the master node**, execute the following instructions below **as a non-root user with sudo rights** using **bash**:
@@ -297,7 +299,7 @@ Execute the following below **as a non-root user with sudo rights** using **bash
297299
298300
> **Note:**
299301
>
300-
> By default, the microVMs are booted, `-snapshots` enables snapshots after the 2nd invocation of each function.
302+
> By default, the microVMs are booted, `-snapshots <local|remote>` enables snapshots after the 2nd invocation of each function.
301303
>
302304
> If `-snapshots` and `-upf` are specified, the snapshots are accelerated with the Record-and-Prefetch (REAP)
303305
technique that we described in our ASPLOS'21
@@ -307,6 +309,8 @@ Execute the following below **as a non-root user with sudo rights** using **bash
307309
>
308310
> If you are using `stargz` with `firecracker`, you also need to set the `-dockerCredentials` flag to be able to [pull the images
309311
from inside the microVMs](https://github.com/firecracker-microvm/firecracker-containerd/blob/main/docker-credential-mmds/README.md#docker-credential-helper-mmds).
312+
>
313+
> Remote snapshots are only supported in the `firecracker` mode using `stargz`. Check the [snapshot guide](../docs/snapshots.md) for more details on how to set up remote snapshots.
310314
311315
6. Run the single node cluster setup script:
312316
```bash

0 commit comments

Comments
 (0)