Skip to content

Commit 1d70ca1

Browse files
committed
estargz: support lossless compression
Signed-off-by: Kohei Tokunaga <[email protected]>
1 parent 1d81483 commit 1d70ca1

File tree

16 files changed

+463
-273
lines changed

16 files changed

+463
-273
lines changed

cmd/ctr-remote/commands/get-toc-digest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ var GetTOCDigestCommand = cli.Command{
8484
decompressor = new(zstdchunked.Decompressor)
8585
}
8686

87-
tocOff, tocSize, err := decompressor.ParseFooter(footer)
87+
_, tocOff, tocSize, err := decompressor.ParseFooter(footer)
8888
if err != nil {
8989
return errors.Wrapf(err, "error parsing footer")
9090
}

estargz/build_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ func TestSort(t *testing.T) {
371371
if tt.allowMissedFiles != nil {
372372
opts = append(opts, WithAllowPrioritizeNotFound(&missedFiles))
373373
}
374-
rc, err := Build(compressBlob(t, buildTarStatic(t, tt.in, tarprefix), srcCompression),
374+
rc, err := Build(compressBlob(t, buildTar(t, tt.in, tarprefix), srcCompression),
375375
append(opts, WithPrioritizedFiles(pfiles))...)
376376
if tt.wantFail {
377377
if err != nil {
@@ -406,7 +406,7 @@ func TestSort(t *testing.T) {
406406
gotTar := tar.NewReader(zr)
407407

408408
// Compare all
409-
wantTar := tar.NewReader(buildTarStatic(t, tt.want, tarprefix))
409+
wantTar := tar.NewReader(buildTar(t, tt.want, tarprefix))
410410
for {
411411
// Fetch and parse next header.
412412
gotH, wantH, err := next(t, gotTar, wantTar)

estargz/estargz.go

Lines changed: 103 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
package estargz
2424

2525
import (
26-
"archive/tar"
2726
"bufio"
2827
"bytes"
2928
"compress/gzip"
@@ -42,6 +41,7 @@ import (
4241
"github.com/containerd/stargz-snapshotter/estargz/errorutil"
4342
digest "github.com/opencontainers/go-digest"
4443
"github.com/pkg/errors"
44+
"github.com/vbatts/tar-split/archive/tar"
4545
)
4646

4747
// A Reader permits random access reads from a stargz file.
@@ -95,10 +95,10 @@ func WithTelemetry(telemetry *Telemetry) OpenOption {
9595
}
9696
}
9797

98-
// A func which takes start time and records the diff
98+
// MeasureLatencyHook is a func which takes start time and records the diff
9999
type MeasureLatencyHook func(time.Time)
100100

101-
// A struct which defines telemetry hooks. By implementing these hooks you should be able to record
101+
// Telemetry is a struct which defines telemetry hooks. By implementing these hooks you should be able to record
102102
// the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...)
103103
type Telemetry struct {
104104
GetFooterLatency MeasureLatencyHook // measure time to get stargz footer (in milliseconds)
@@ -146,7 +146,7 @@ func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error) {
146146
fSize := d.FooterSize()
147147
fOffset := positive(int64(len(footer)) - fSize)
148148
maybeTocBytes := footer[:fOffset]
149-
tocOffset, tocSize, err := d.ParseFooter(footer[fOffset:])
149+
_, tocOffset, tocSize, err := d.ParseFooter(footer[fOffset:])
150150
if err != nil {
151151
allErr = append(allErr, err)
152152
continue
@@ -187,7 +187,7 @@ func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr e
187187
for _, d := range []Decompressor{new(GzipDecompressor), new(legacyGzipDecompressor)} {
188188
fSize := d.FooterSize()
189189
fOffset := positive(int64(len(footer)) - fSize)
190-
tocOffset, _, err := d.ParseFooter(footer[fOffset:])
190+
_, tocOffset, _, err := d.ParseFooter(footer[fOffset:])
191191
if err == nil {
192192
return tocOffset, fSize, err
193193
}
@@ -591,6 +591,11 @@ type currentCompressionWriter struct{ w *Writer }
591591

592592
func (ccw currentCompressionWriter) Write(p []byte) (int, error) {
593593
ccw.w.diffHash.Write(p)
594+
if ccw.w.gz == nil {
595+
if err := ccw.w.condOpenGz(); err != nil {
596+
return 0, err
597+
}
598+
}
594599
return ccw.w.gz.Write(p)
595600
}
596601

@@ -601,6 +606,25 @@ func (w *Writer) chunkSize() int {
601606
return w.ChunkSize
602607
}
603608

609+
// Unpack decompresses the given estargz blob and returns a ReadCloser of the tar blob.
610+
// TOC JSON and footer are removed.
611+
func Unpack(sr *io.SectionReader, c Decompressor) (io.ReadCloser, error) {
612+
footerSize := c.FooterSize()
613+
if sr.Size() < footerSize {
614+
return nil, fmt.Errorf("blob is too small; %d < %d", sr.Size(), footerSize)
615+
}
616+
footerOffset := sr.Size() - footerSize
617+
footer := make([]byte, footerSize)
618+
if _, err := sr.ReadAt(footer, footerOffset); err != nil {
619+
return nil, err
620+
}
621+
blobPayloadSize, _, _, err := c.ParseFooter(footer)
622+
if err != nil {
623+
return nil, errors.Wrapf(err, "failed to parse footer")
624+
}
625+
return c.Reader(io.LimitReader(sr, blobPayloadSize))
626+
}
627+
604628
// NewWriter returns a new stargz writer (gzip-based) writing to w.
605629
//
606630
// The writer must be closed to write its trailing table of contents.
@@ -616,7 +640,7 @@ func NewWriterLevel(w io.Writer, compressionLevel int) *Writer {
616640
return NewWriterWithCompressor(w, NewGzipCompressorWithLevel(compressionLevel))
617641
}
618642

619-
// NewWriterLevel returns a new stargz writer writing to w.
643+
// NewWriterWithCompressor returns a new stargz writer writing to w.
620644
// The compression method is configurable.
621645
//
622646
// The writer must be closed to write its trailing table of contents.
@@ -696,29 +720,71 @@ func (w *Writer) condOpenGz() (err error) {
696720
// each of its contents to w.
697721
//
698722
// The input r can optionally be gzip compressed but the output will
699-
// always be gzip compressed.
723+
// always be compressed by the specified compressor.
700724
func (w *Writer) AppendTar(r io.Reader) error {
725+
return w.appendTar(r, false)
726+
}
727+
728+
// AppendTarLossLess reads the tar or tar.gz file from r and appends
729+
// each of its contents to w.
730+
//
731+
// The input r can optionally be gzip compressed but the output will
732+
// always be compressed by the specified compressor.
733+
//
734+
// The difference of this func with AppendTar is that this writes
735+
// the input tar stream into w without any modification (e.g. to header bytes).
736+
//
737+
// Note that if the input tar stream already contains TOC JSON, this returns
738+
// error because w cannot overwrite the TOC JSON to the one generated by w without
739+
// lossy modification. To avoid this error, if the input stream is known to be stargz/estargz,
740+
// you shoud decompress it and remove TOC JSON in advance.
741+
func (w *Writer) AppendTarLossLess(r io.Reader) error {
742+
return w.appendTar(r, true)
743+
}
744+
745+
func (w *Writer) appendTar(r io.Reader, lossless bool) error {
746+
var src io.Reader
701747
br := bufio.NewReader(r)
702-
var tr *tar.Reader
703748
if isGzip(br) {
704-
// NewReader can't fail if isGzip returned true.
705749
zr, _ := gzip.NewReader(br)
706-
tr = tar.NewReader(zr)
750+
src = zr
707751
} else {
708-
tr = tar.NewReader(br)
752+
src = io.Reader(br)
753+
}
754+
dst := currentCompressionWriter{w}
755+
var tw *tar.Writer
756+
if !lossless {
757+
tw = tar.NewWriter(dst) // use tar writer only when this isn't lossless mode.
758+
}
759+
tr := tar.NewReader(src)
760+
if lossless {
761+
tr.RawAccounting = true
709762
}
710763
for {
711764
h, err := tr.Next()
712765
if err == io.EOF {
766+
if lossless {
767+
if remain := tr.RawBytes(); len(remain) > 0 {
768+
// Collect the remaining null bytes.
769+
// https://github.com/vbatts/tar-split/blob/80a436fd6164c557b131f7c59ed69bd81af69761/concept/main.go#L49-L53
770+
if _, err := dst.Write(remain); err != nil {
771+
return err
772+
}
773+
}
774+
}
713775
break
714776
}
715777
if err != nil {
716778
return fmt.Errorf("error reading from source tar: tar.Reader.Next: %v", err)
717779
}
718-
if h.Name == TOCTarName {
780+
if cleanEntryName(h.Name) == TOCTarName {
719781
// It is possible for a layer to be "stargzified" twice during the
720782
// distribution lifecycle. So we reserve "TOCTarName" here to avoid
721783
// duplicated entries in the resulting layer.
784+
if lossless {
785+
// We cannot handle this in lossless way.
786+
return fmt.Errorf("existing TOC JSON is not allowed; decompress layer before append")
787+
}
722788
continue
723789
}
724790

@@ -744,9 +810,14 @@ func (w *Writer) AppendTar(r io.Reader) error {
744810
if err := w.condOpenGz(); err != nil {
745811
return err
746812
}
747-
tw := tar.NewWriter(currentCompressionWriter{w})
748-
if err := tw.WriteHeader(h); err != nil {
749-
return err
813+
if tw != nil {
814+
if err := tw.WriteHeader(h); err != nil {
815+
return err
816+
}
817+
} else {
818+
if _, err := dst.Write(tr.RawBytes()); err != nil {
819+
return err
820+
}
750821
}
751822
switch h.Typeflag {
752823
case tar.TypeLink:
@@ -808,7 +879,13 @@ func (w *Writer) AppendTar(r io.Reader) error {
808879
}
809880

810881
teeChunk := io.TeeReader(tee, chunkDigest.Hash())
811-
if _, err := io.CopyN(tw, teeChunk, chunkSize); err != nil {
882+
var out io.Writer
883+
if tw != nil {
884+
out = tw
885+
} else {
886+
out = dst
887+
}
888+
if _, err := io.CopyN(out, teeChunk, chunkSize); err != nil {
812889
return fmt.Errorf("error copying %q: %v", h.Name, err)
813890
}
814891
ent.ChunkDigest = chunkDigest.Digest().String()
@@ -825,11 +902,18 @@ func (w *Writer) AppendTar(r io.Reader) error {
825902
if payloadDigest != nil {
826903
regFileEntry.Digest = payloadDigest.Digest().String()
827904
}
828-
if err := tw.Flush(); err != nil {
829-
return err
905+
if tw != nil {
906+
if err := tw.Flush(); err != nil {
907+
return err
908+
}
830909
}
831910
}
832-
return nil
911+
remainDest := ioutil.Discard
912+
if lossless {
913+
remainDest = dst // Preserve the remaining bytes in lossless mode
914+
}
915+
_, err := io.Copy(remainDest, src)
916+
return err
833917
}
834918

835919
// DiffID returns the SHA-256 of the uncompressed tar bytes.

estargz/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ require (
66
github.com/klauspost/compress v1.13.5
77
github.com/opencontainers/go-digest v1.0.0
88
github.com/pkg/errors v0.9.1
9+
github.com/vbatts/tar-split v0.11.2
910
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
1011
)

estargz/go.sum

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,22 @@
1+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
2+
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
3+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
14
github.com/klauspost/compress v1.13.5 h1:9O69jUPDcsT9fEm74W92rZL9FQY7rCdaXVneq+yyzl4=
25
github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
36
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
47
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
58
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
69
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
10+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
11+
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
12+
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
13+
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
14+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
15+
github.com/urfave/cli v1.22.4/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
16+
github.com/vbatts/tar-split v0.11.2 h1:Via6XqJr0hceW4wff3QRzD5gAk/tatMw/4ZA7cTlIME=
17+
github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI=
718
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a h1:DcqTD9SDLc+1P/r1EmRBwnVsrOwW+kk2vWf9n+1sGhs=
819
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
20+
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
21+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
22+
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

estargz/gzip.go

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -124,31 +124,31 @@ func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Dig
124124
return parseTOCEStargz(r)
125125
}
126126

127-
func (gz *GzipDecompressor) ParseFooter(p []byte) (tocOffset, tocSize int64, err error) {
127+
func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
128128
if len(p) != FooterSize {
129-
return 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
129+
return 0, 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
130130
}
131131
zr, err := gzip.NewReader(bytes.NewReader(p))
132132
if err != nil {
133-
return 0, 0, err
133+
return 0, 0, 0, err
134134
}
135135
defer zr.Close()
136136
extra := zr.Header.Extra
137137
si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
138138
if si1 != 'S' || si2 != 'G' {
139-
return 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
139+
return 0, 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
140140
}
141141
if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(16+len("STARGZ")) {
142-
return 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
142+
return 0, 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
143143
}
144144
if string(subfield[16:]) != "STARGZ" {
145-
return 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
145+
return 0, 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
146146
}
147147
tocOffset, err = strconv.ParseInt(string(subfield[:16]), 16, 64)
148148
if err != nil {
149-
return 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset")
149+
return 0, 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset")
150150
}
151-
return tocOffset, 0, nil
151+
return tocOffset, tocOffset, 0, nil
152152
}
153153

154154
func (gz *GzipDecompressor) FooterSize() int64 {
@@ -165,27 +165,27 @@ func (gz *legacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst dige
165165
return parseTOCEStargz(r)
166166
}
167167

168-
func (gz *legacyGzipDecompressor) ParseFooter(p []byte) (tocOffset, tocSize int64, err error) {
168+
func (gz *legacyGzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
169169
if len(p) != legacyFooterSize {
170-
return 0, 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p))
170+
return 0, 0, 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p))
171171
}
172172
zr, err := gzip.NewReader(bytes.NewReader(p))
173173
if err != nil {
174-
return 0, 0, errors.Wrapf(err, "legacy: failed to get footer gzip reader")
174+
return 0, 0, 0, errors.Wrapf(err, "legacy: failed to get footer gzip reader")
175175
}
176176
defer zr.Close()
177177
extra := zr.Header.Extra
178178
if len(extra) != 16+len("STARGZ") {
179-
return 0, 0, fmt.Errorf("legacy: invalid stargz's extra field size")
179+
return 0, 0, 0, fmt.Errorf("legacy: invalid stargz's extra field size")
180180
}
181181
if string(extra[16:]) != "STARGZ" {
182-
return 0, 0, fmt.Errorf("legacy: magic string STARGZ not found")
182+
return 0, 0, 0, fmt.Errorf("legacy: magic string STARGZ not found")
183183
}
184184
tocOffset, err = strconv.ParseInt(string(extra[:16]), 16, 64)
185185
if err != nil {
186-
return 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset")
186+
return 0, 0, 0, errors.Wrapf(err, "legacy: failed to parse toc offset")
187187
}
188-
return tocOffset, 0, nil
188+
return tocOffset, tocOffset, 0, nil
189189
}
190190

191191
func (gz *legacyGzipDecompressor) FooterSize() int64 {

estargz/gzip_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ func checkFooter(t *testing.T, off int64) {
110110
if len(footer) != FooterSize {
111111
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), FooterSize, footer)
112112
}
113-
got, _, err := (&GzipDecompressor{}).ParseFooter(footer)
113+
_, got, _, err := (&GzipDecompressor{}).ParseFooter(footer)
114114
if err != nil {
115115
t.Fatalf("failed to parse footer for offset %d, footer: %x: err: %v",
116116
off, footer, err)
@@ -125,7 +125,7 @@ func checkLegacyFooter(t *testing.T, off int64) {
125125
if len(footer) != legacyFooterSize {
126126
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), legacyFooterSize, footer)
127127
}
128-
got, _, err := (&legacyGzipDecompressor{}).ParseFooter(footer)
128+
_, got, _, err := (&legacyGzipDecompressor{}).ParseFooter(footer)
129129
if err != nil {
130130
t.Fatalf("failed to parse legacy footer for offset %d, footer: %x: err: %v",
131131
off, footer, err)

0 commit comments

Comments
 (0)