Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

plumbing: packafile, improve delta reutilization #697

Merged
merged 1 commit into from
Dec 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions plumbing/format/packfile/delta_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,6 @@ func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*O
return dw.undeltify(otp)
}

if base.Size() <= otp.Size() {
// Bases should be bigger
return dw.undeltify(otp)
}

if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
return err
}
Expand Down
57 changes: 40 additions & 17 deletions plumbing/format/packfile/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ type Encoder struct {
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
// offsets is a map of object hashes to corresponding offsets in the packfile.
// It is used to determine offset of the base of a delta when a OFS_DELTA is
// used.
offsets map[plumbing.Hash]int64

useRefDeltas bool
}

Expand All @@ -40,7 +37,6 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E
w: ow,
zw: zw,
hasher: h,
offsets: make(map[plumbing.Hash]int64),
useRefDeltas: useRefDeltas,
}
}
Expand Down Expand Up @@ -85,11 +81,34 @@ func (e *Encoder) head(numEntries int) error {
}

func (e *Encoder) entry(o *ObjectToPack) error {
offset := e.w.Offset()
e.offsets[o.Hash()] = offset
if o.WantWrite() {
// A cycle exists in this delta chain. This should only occur if a
// selected object representation disappeared during writing
// (for example due to a concurrent repack) and a different base
// was chosen, forcing a cycle. Select something other than a
// delta, and write this object.
o.BackToOriginal()
}

if o.IsWritten() {
return nil
}

o.MarkWantWrite()

if err := e.writeBaseIfDelta(o); err != nil {
return err
}

// We need to check if we already write that object due a cyclic delta chain
if o.IsWritten() {
return nil
}

o.Offset = e.w.Offset()

if o.IsDelta() {
if err := e.writeDeltaHeader(o, offset); err != nil {
if err := e.writeDeltaHeader(o); err != nil {
return err
}
} else {
Expand All @@ -112,7 +131,16 @@ func (e *Encoder) entry(o *ObjectToPack) error {
return e.zw.Close()
}

func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
func (e *Encoder) writeBaseIfDelta(o *ObjectToPack) error {
if o.IsDelta() && !o.Base.IsWritten() {
// We must write base first
return e.entry(o.Base)
}

return nil
}

func (e *Encoder) writeDeltaHeader(o *ObjectToPack) error {
// Write offset deltas by default
t := plumbing.OFSDeltaObject
if e.useRefDeltas {
Expand All @@ -126,23 +154,18 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
if e.useRefDeltas {
return e.writeRefDeltaHeader(o.Base.Hash())
} else {
return e.writeOfsDeltaHeader(offset, o.Base.Hash())
return e.writeOfsDeltaHeader(o)
}
}

func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
return binary.Write(e.w, base)
}

func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error {
baseOffset, ok := e.offsets[base]
if !ok {
return fmt.Errorf("base for delta not found, base hash: %v", base)
}

func (e *Encoder) writeOfsDeltaHeader(o *ObjectToPack) error {
// for OFS_DELTA, offset of the base is interpreted as negative offset
// relative to the type-byte of the header of the ofs-delta entry.
relativeOffset := deltaOffset - baseOffset
relativeOffset := o.Offset - o.Base.Offset
if relativeOffset <= 0 {
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}
Expand Down
6 changes: 4 additions & 2 deletions plumbing/format/packfile/encoder_advanced_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,18 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, pac

buf := bytes.NewBuffer(nil)
enc := NewEncoder(buf, storage, false)
_, err = enc.Encode(hashes, packWindow)
encodeHash, err := enc.Encode(hashes, packWindow)
c.Assert(err, IsNil)

scanner := NewScanner(buf)
storage = memory.NewStorage()
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)
_, err = d.Decode()
decodeHash, err := d.Decode()
c.Assert(err, IsNil)

c.Assert(encodeHash, Equals, decodeHash)

objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
obtainedObjects := map[plumbing.Hash]bool{}
Expand Down
81 changes: 77 additions & 4 deletions plumbing/format/packfile/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeOFS(c *C) {
s.deltaOverDeltaTest(c)
}

func (s *EncoderSuite) TestDecodeEncodeWithCycleREF(c *C) {
s.enc = NewEncoder(s.buf, s.store, true)
s.deltaOverDeltaCyclicTest(c)
}

func (s *EncoderSuite) TestDecodeEncodeWithCycleOFS(c *C) {
s.enc = NewEncoder(s.buf, s.store, false)
s.deltaOverDeltaCyclicTest(c)
}

func (s *EncoderSuite) simpleDeltaTest(c *C) {
srcObject := newObject(plumbing.BlobObject, []byte("0"))
targetObject := newObject(plumbing.BlobObject, []byte("01"))
Expand All @@ -114,7 +124,7 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) {
c.Assert(err, IsNil)

srcToPack := newObjectToPack(srcObject)
_, err = s.enc.encode([]*ObjectToPack{
encHash, err := s.enc.encode([]*ObjectToPack{
srcToPack,
newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
})
Expand All @@ -126,9 +136,11 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) {
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)

_, err = d.Decode()
decHash, err := d.Decode()
c.Assert(err, IsNil)

c.Assert(encHash, Equals, decHash)

decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash())
c.Assert(err, IsNil)
c.Assert(decSrc, DeepEquals, srcObject)
Expand All @@ -153,7 +165,8 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) {

srcToPack := newObjectToPack(srcObject)
targetToPack := newObjectToPack(targetObject)
_, err = s.enc.encode([]*ObjectToPack{
encHash, err := s.enc.encode([]*ObjectToPack{
targetToPack,
srcToPack,
newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
newDeltaObjectToPack(targetToPack, otherTargetObject, otherDeltaObject),
Expand All @@ -165,9 +178,11 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) {
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)

_, err = d.Decode()
decHash, err := d.Decode()
c.Assert(err, IsNil)

c.Assert(encHash, Equals, decHash)

decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash())
c.Assert(err, IsNil)
c.Assert(decSrc, DeepEquals, srcObject)
Expand All @@ -180,3 +195,61 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) {
c.Assert(err, IsNil)
c.Assert(decOtherTarget, DeepEquals, otherTargetObject)
}

func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) {
o1 := newObject(plumbing.BlobObject, []byte("0"))
o2 := newObject(plumbing.BlobObject, []byte("01"))
o3 := newObject(plumbing.BlobObject, []byte("011111"))
o4 := newObject(plumbing.BlobObject, []byte("01111100000"))

d2, err := GetDelta(o1, o2)
c.Assert(err, IsNil)

d3, err := GetDelta(o4, o3)
c.Assert(err, IsNil)

d4, err := GetDelta(o3, o4)
c.Assert(err, IsNil)

po1 := newObjectToPack(o1)
pd2 := newDeltaObjectToPack(po1, o2, d2)
pd3 := newObjectToPack(o3)
pd4 := newObjectToPack(o4)

pd3.SetDelta(pd4, d3)
pd4.SetDelta(pd3, d4)

encHash, err := s.enc.encode([]*ObjectToPack{
po1,
pd2,
pd3,
pd4,
})
c.Assert(err, IsNil)

scanner := NewScanner(s.buf)
storage := memory.NewStorage()
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)

decHash, err := d.Decode()
c.Assert(err, IsNil)

c.Assert(encHash, Equals, decHash)

decSrc, err := storage.EncodedObject(o1.Type(), o1.Hash())
c.Assert(err, IsNil)
c.Assert(decSrc, DeepEquals, o1)

decTarget, err := storage.EncodedObject(o2.Type(), o2.Hash())
c.Assert(err, IsNil)
c.Assert(decTarget, DeepEquals, o2)

decOtherTarget, err := storage.EncodedObject(o3.Type(), o3.Hash())
c.Assert(err, IsNil)
c.Assert(decOtherTarget, DeepEquals, o3)

decAnotherTarget, err := storage.EncodedObject(o4.Type(), o4.Hash())
c.Assert(err, IsNil)
c.Assert(decAnotherTarget, DeepEquals, o4)
}
32 changes: 31 additions & 1 deletion plumbing/format/packfile/object_pack.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ type ObjectToPack struct {
// If the main object is not a delta, Base will be null
Base *ObjectToPack
// Original is the object that we can generate applying the delta to
// Base, or the same object as EncodedObject in the case of a non-delta
// Base, or the same object as Object in the case of a non-delta
// object.
Original plumbing.EncodedObject
// Depth is the amount of deltas needed to resolve to obtain Original
// (delta based on delta based on ...)
Depth int

// offset in pack when object has been already written, or 0 if it
// has not been written yet
Offset int64
}

// newObjectToPack creates a correct ObjectToPack based on a non-delta object
Expand All @@ -41,6 +45,32 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb
}
}

// BackToOriginal converts that ObjectToPack to a non-deltified object if it was one
func (o *ObjectToPack) BackToOriginal() {
if o.IsDelta() {
o.Object = o.Original
o.Base = nil
o.Depth = 0
}
}

// IsWritten returns if that ObjectToPack was
// already written into the packfile or not
func (o *ObjectToPack) IsWritten() bool {
return o.Offset > 1
}

// MarkWantWrite marks this ObjectToPack as WantWrite
// to avoid delta chain loops
func (o *ObjectToPack) MarkWantWrite() {
o.Offset = 1
}

// WantWrite checks if this ObjectToPack was marked as WantWrite before
func (o *ObjectToPack) WantWrite() bool {
return o.Offset == 1
}

func (o *ObjectToPack) Type() plumbing.ObjectType {
if o.Original != nil {
return o.Original.Type()
Expand Down