2323package estargz
2424
2525import (
26- "archive/tar"
2726 "bufio"
2827 "bytes"
2928 "compress/gzip"
@@ -42,6 +41,7 @@ import (
4241 "github.com/containerd/stargz-snapshotter/estargz/errorutil"
4342 digest "github.com/opencontainers/go-digest"
4443 "github.com/pkg/errors"
44+ "github.com/vbatts/tar-split/archive/tar"
4545)
4646
4747// A Reader permits random access reads from a stargz file.
@@ -95,10 +95,10 @@ func WithTelemetry(telemetry *Telemetry) OpenOption {
9595 }
9696}
9797
98- // A func which takes start time and records the diff
98+ // MeasureLatencyHook is a func which takes start time and records the diff
9999type MeasureLatencyHook func (time.Time )
100100
101- // A struct which defines telemetry hooks. By implementing these hooks you should be able to record
101+ // Telemetry is a struct which defines telemetry hooks. By implementing these hooks you should be able to record
102102// the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...)
103103type Telemetry struct {
104104 GetFooterLatency MeasureLatencyHook // measure time to get stargz footer (in milliseconds)
@@ -146,7 +146,7 @@ func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error) {
146146 fSize := d .FooterSize ()
147147 fOffset := positive (int64 (len (footer )) - fSize )
148148 maybeTocBytes := footer [:fOffset ]
149- tocOffset , tocSize , err := d .ParseFooter (footer [fOffset :])
149+ _ , tocOffset , tocSize , err := d .ParseFooter (footer [fOffset :])
150150 if err != nil {
151151 allErr = append (allErr , err )
152152 continue
@@ -187,7 +187,7 @@ func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr e
187187 for _ , d := range []Decompressor {new (GzipDecompressor ), new (legacyGzipDecompressor )} {
188188 fSize := d .FooterSize ()
189189 fOffset := positive (int64 (len (footer )) - fSize )
190- tocOffset , _ , err := d .ParseFooter (footer [fOffset :])
190+ _ , tocOffset , _ , err := d .ParseFooter (footer [fOffset :])
191191 if err == nil {
192192 return tocOffset , fSize , err
193193 }
@@ -591,6 +591,11 @@ type currentCompressionWriter struct{ w *Writer }
591591
592592func (ccw currentCompressionWriter ) Write (p []byte ) (int , error ) {
593593 ccw .w .diffHash .Write (p )
594+ if ccw .w .gz == nil {
595+ if err := ccw .w .condOpenGz (); err != nil {
596+ return 0 , err
597+ }
598+ }
594599 return ccw .w .gz .Write (p )
595600}
596601
@@ -601,6 +606,25 @@ func (w *Writer) chunkSize() int {
601606 return w .ChunkSize
602607}
603608
609+ // Unpack decompresses the given estargz blob and returns a ReadCloser of the tar blob.
610+ // TOC JSON and footer are removed.
611+ func Unpack (sr * io.SectionReader , c Decompressor ) (io.ReadCloser , error ) {
612+ footerSize := c .FooterSize ()
613+ if sr .Size () < footerSize {
614+ return nil , fmt .Errorf ("blob is too small; %d < %d" , sr .Size (), footerSize )
615+ }
616+ footerOffset := sr .Size () - footerSize
617+ footer := make ([]byte , footerSize )
618+ if _ , err := sr .ReadAt (footer , footerOffset ); err != nil {
619+ return nil , err
620+ }
621+ blobPayloadSize , _ , _ , err := c .ParseFooter (footer )
622+ if err != nil {
623+ return nil , errors .Wrapf (err , "failed to parse footer" )
624+ }
625+ return c .Reader (io .LimitReader (sr , blobPayloadSize ))
626+ }
627+
604628// NewWriter returns a new stargz writer (gzip-based) writing to w.
605629//
606630// The writer must be closed to write its trailing table of contents.
@@ -616,7 +640,7 @@ func NewWriterLevel(w io.Writer, compressionLevel int) *Writer {
616640 return NewWriterWithCompressor (w , NewGzipCompressorWithLevel (compressionLevel ))
617641}
618642
619- // NewWriterLevel returns a new stargz writer writing to w.
643+ // NewWriterWithCompressor returns a new stargz writer writing to w.
620644// The compression method is configurable.
621645//
622646// The writer must be closed to write its trailing table of contents.
@@ -696,29 +720,71 @@ func (w *Writer) condOpenGz() (err error) {
696720// each of its contents to w.
697721//
698722// The input r can optionally be gzip compressed but the output will
699- // always be gzip compressed.
723+ // always be compressed by the specified compressor .
700724func (w * Writer ) AppendTar (r io.Reader ) error {
725+ return w .appendTar (r , false )
726+ }
727+
728+ // AppendTarLossLess reads the tar or tar.gz file from r and appends
729+ // each of its contents to w.
730+ //
731+ // The input r can optionally be gzip compressed but the output will
732+ // always be compressed by the specified compressor.
733+ //
734+ // The difference of this func with AppendTar is that this writes
735+ // the input tar stream into w without any modification (e.g. to header bytes).
736+ //
737+ // Note that if the input tar stream already contains TOC JSON, this returns
738+ // error because w cannot overwrite the TOC JSON to the one generated by w without
739+ // lossy modification. To avoid this error, if the input stream is known to be stargz/estargz,
740+ // you shoud decompress it and remove TOC JSON in advance.
741+ func (w * Writer ) AppendTarLossLess (r io.Reader ) error {
742+ return w .appendTar (r , true )
743+ }
744+
745+ func (w * Writer ) appendTar (r io.Reader , lossless bool ) error {
746+ var src io.Reader
701747 br := bufio .NewReader (r )
702- var tr * tar.Reader
703748 if isGzip (br ) {
704- // NewReader can't fail if isGzip returned true.
705749 zr , _ := gzip .NewReader (br )
706- tr = tar . NewReader ( zr )
750+ src = zr
707751 } else {
708- tr = tar .NewReader (br )
752+ src = io .Reader (br )
753+ }
754+ dst := currentCompressionWriter {w }
755+ var tw * tar.Writer
756+ if ! lossless {
757+ tw = tar .NewWriter (dst ) // use tar writer only when this isn't lossless mode.
758+ }
759+ tr := tar .NewReader (src )
760+ if lossless {
761+ tr .RawAccounting = true
709762 }
710763 for {
711764 h , err := tr .Next ()
712765 if err == io .EOF {
766+ if lossless {
767+ if remain := tr .RawBytes (); len (remain ) > 0 {
768+ // Collect the remaining null bytes.
769+ // https://github.com/vbatts/tar-split/blob/80a436fd6164c557b131f7c59ed69bd81af69761/concept/main.go#L49-L53
770+ if _ , err := dst .Write (remain ); err != nil {
771+ return err
772+ }
773+ }
774+ }
713775 break
714776 }
715777 if err != nil {
716778 return fmt .Errorf ("error reading from source tar: tar.Reader.Next: %v" , err )
717779 }
718- if h .Name == TOCTarName {
780+ if cleanEntryName ( h .Name ) == TOCTarName {
719781 // It is possible for a layer to be "stargzified" twice during the
720782 // distribution lifecycle. So we reserve "TOCTarName" here to avoid
721783 // duplicated entries in the resulting layer.
784+ if lossless {
785+ // We cannot handle this in lossless way.
786+ return fmt .Errorf ("existing TOC JSON is not allowed; decompress layer before append" )
787+ }
722788 continue
723789 }
724790
@@ -744,9 +810,14 @@ func (w *Writer) AppendTar(r io.Reader) error {
744810 if err := w .condOpenGz (); err != nil {
745811 return err
746812 }
747- tw := tar .NewWriter (currentCompressionWriter {w })
748- if err := tw .WriteHeader (h ); err != nil {
749- return err
813+ if tw != nil {
814+ if err := tw .WriteHeader (h ); err != nil {
815+ return err
816+ }
817+ } else {
818+ if _ , err := dst .Write (tr .RawBytes ()); err != nil {
819+ return err
820+ }
750821 }
751822 switch h .Typeflag {
752823 case tar .TypeLink :
@@ -808,7 +879,13 @@ func (w *Writer) AppendTar(r io.Reader) error {
808879 }
809880
810881 teeChunk := io .TeeReader (tee , chunkDigest .Hash ())
811- if _ , err := io .CopyN (tw , teeChunk , chunkSize ); err != nil {
882+ var out io.Writer
883+ if tw != nil {
884+ out = tw
885+ } else {
886+ out = dst
887+ }
888+ if _ , err := io .CopyN (out , teeChunk , chunkSize ); err != nil {
812889 return fmt .Errorf ("error copying %q: %v" , h .Name , err )
813890 }
814891 ent .ChunkDigest = chunkDigest .Digest ().String ()
@@ -825,11 +902,18 @@ func (w *Writer) AppendTar(r io.Reader) error {
825902 if payloadDigest != nil {
826903 regFileEntry .Digest = payloadDigest .Digest ().String ()
827904 }
828- if err := tw .Flush (); err != nil {
829- return err
905+ if tw != nil {
906+ if err := tw .Flush (); err != nil {
907+ return err
908+ }
830909 }
831910 }
832- return nil
911+ remainDest := ioutil .Discard
912+ if lossless {
913+ remainDest = dst // Preserve the remaining bytes in lossless mode
914+ }
915+ _ , err := io .Copy (remainDest , src )
916+ return err
833917}
834918
835919// DiffID returns the SHA-256 of the uncompressed tar bytes.
0 commit comments