Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

format/packfile: implement delta encoding #172

Merged
merged 3 commits into from
Dec 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions plumbing/format/packfile/diff_delta.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package packfile

import (
"fmt"
"io/ioutil"

"gopkg.in/src-d/go-git.v4/plumbing"
Expand All @@ -14,28 +15,53 @@ const (
maxCopyLen = 0xffff
)

// GetDelta returns the way of how to transform base object to target object
func GetDelta(base, target plumbing.Object) ([]byte, error) {
baseReader, err := base.Reader()
// GetOFSDelta returns an offset delta that knows the way of how to transform
// base object to target object
func GetOFSDelta(base, target plumbing.Object) (plumbing.Object, error) {
return getDelta(base, target, plumbing.OFSDeltaObject)
}

// GetRefDelta returns a reference delta that knows the way of how to transform
// base object to target object
func GetRefDelta(base, target plumbing.Object) (plumbing.Object, error) {
return getDelta(base, target, plumbing.REFDeltaObject)
}

func getDelta(base, target plumbing.Object, t plumbing.ObjectType) (plumbing.Object, error) {
if t != plumbing.OFSDeltaObject && t != plumbing.REFDeltaObject {
return nil, fmt.Errorf("Type not supported: %v", t)
}

br, err := base.Reader()
if err != nil {
return nil, err
}
targetReader, err := target.Reader()
tr, err := target.Reader()
if err != nil {
return nil, err
}

baseBuf, err := ioutil.ReadAll(baseReader)
bb, err := ioutil.ReadAll(br)
if err != nil {
return nil, err
}

targetBuf, err := ioutil.ReadAll(targetReader)
tb, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}

return DiffDelta(baseBuf, targetBuf), nil
db := DiffDelta(bb, tb)
delta := &plumbing.MemoryObject{}
_, err = delta.Write(db)
if err != nil {
return nil, err
}

delta.SetSize(int64(len(db)))
delta.SetType(t)

return delta, nil
}

// DiffDelta returns the way of how to transform baseBuf to targetBuf
Expand Down
92 changes: 78 additions & 14 deletions plumbing/format/packfile/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ import (
// format
type Encoder struct {
storage storer.ObjectStorer
w io.Writer
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
offsets map[plumbing.Hash]int64
}

// NewEncoder creates a new packfile encoder using a specific Writer and
Expand All @@ -27,36 +28,45 @@ func NewEncoder(w io.Writer, s storer.ObjectStorer) *Encoder {
Hash: sha1.New(),
}
mw := io.MultiWriter(w, h)
ow := newOffsetWriter(mw)
zw := zlib.NewWriter(mw)
return &Encoder{
storage: s,
w: mw,
w: ow,
zw: zw,
hasher: h,
offsets: make(map[plumbing.Hash]int64),
}
}

// Encode creates a packfile containing all the objects referenced in hashes
// and writes it to the writer in the Encoder.
func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
if err := e.head(len(hashes)); err != nil {
return plumbing.ZeroHash, err
}

var objects []*ObjectToPack
for _, h := range hashes {
o, err := e.storage.Object(plumbing.AnyObject, h)
if err != nil {
return plumbing.ZeroHash, err
}
// TODO delta selection logic
objects = append(objects, newObjectToPack(o))
}

return e.encode(objects)
}
func (e *Encoder) encode(objects []*ObjectToPack) (plumbing.Hash, error) {
if err := e.head(len(objects)); err != nil {
return plumbing.ZeroHash, err
}

for _, o := range objects {
if err := e.entry(o); err != nil {
return plumbing.ZeroHash, err
}
}

return e.footer()
}

func (e *Encoder) head(numEntries int) error {
return binary.Write(
e.w,
Expand All @@ -66,19 +76,22 @@ func (e *Encoder) head(numEntries int) error {
)
}

func (e *Encoder) entry(o plumbing.Object) error {
t := o.Type()
if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject {
// TODO implements delta objects
return fmt.Errorf("delta object not supported: %v", t)
func (e *Encoder) entry(o *ObjectToPack) error {
offset := e.w.Offset()

if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil {
return err
}

if err := e.entryHead(t, o.Size()); err != nil {
// Save the position using the original hash, maybe a delta will need it
e.offsets[o.Original.Hash()] = offset

if err := e.writeDeltaHeaderIfAny(o, offset); err != nil {
return err
}

e.zw.Reset(e.w)
or, err := o.Reader()
or, err := o.Object.Reader()
if err != nil {
return err
}
Expand All @@ -90,6 +103,38 @@ func (e *Encoder) entry(o plumbing.Object) error {
return e.zw.Close()
}

func (e *Encoder) writeDeltaHeaderIfAny(o *ObjectToPack, offset int64) error {
if o.IsDelta() {
switch o.Object.Type() {
case plumbing.OFSDeltaObject:
if err := e.writeOfsDeltaHeader(offset, o.Base.Original.Hash()); err != nil {
return err
}
case plumbing.REFDeltaObject:
if err := e.writeRefDeltaHeader(o.Base.Original.Hash()); err != nil {
return err
}
}
}

return nil
}

func (e *Encoder) writeRefDeltaHeader(source plumbing.Hash) error {
return binary.Write(e.w, source)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

go fmt?


func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, source plumbing.Hash) error {
// because it is an offset delta, we need the source
// object position
offset, ok := e.offsets[source]
if !ok {
return fmt.Errorf("delta source not found. Hash: %v", source)
}

return binary.WriteVariableWidthInt(e.w, deltaOffset-offset)
}

func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
t := int64(typeNum)
header := []byte{}
Expand All @@ -114,3 +159,22 @@ func (e *Encoder) footer() (plumbing.Hash, error) {
h := e.hasher.Sum()
return h, binary.Write(e.w, h)
}

type offsetWriter struct {
w io.Writer
offset int64
}

func newOffsetWriter(w io.Writer) *offsetWriter {
return &offsetWriter{w: w}
}

func (ow *offsetWriter) Write(p []byte) (n int, err error) {
n, err = ow.w.Write(p)
ow.offset += int64(n)
return n, err
}

func (ow *offsetWriter) Offset() int64 {
return ow.offset
}
128 changes: 124 additions & 4 deletions plumbing/format/packfile/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ func (s *EncoderSuite) TestMaxObjectSize(c *C) {
c.Assert(hash.IsZero(), Not(Equals), true)
}

func (s *EncoderSuite) TestHashNotFound(c *C) {
h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")})
c.Assert(h, Equals, plumbing.ZeroHash)
c.Assert(err, NotNil)
c.Assert(err, Equals, plumbing.ErrObjectNotFound)
}

func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
scanner := NewScanner(f.Packfile())
Expand All @@ -91,14 +98,16 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
c.Assert(err, IsNil)
c.Assert(ch, Equals, f.PackfileHash)

commitIter, err := d.o.IterObjects(plumbing.AnyObject)
objIter, err := d.o.IterObjects(plumbing.AnyObject)
c.Assert(err, IsNil)

objects := []plumbing.Object{}
hashes := []plumbing.Hash{}
err = commitIter.ForEach(func(o plumbing.Object) error {
err = objIter.ForEach(func(o plumbing.Object) error {
objects = append(objects, o)
hash, err := s.store.SetObject(o)
c.Assert(err, IsNil)

hashes = append(hashes, hash)

return err
Expand All @@ -115,10 +124,10 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
_, err = d.Decode()
c.Assert(err, IsNil)

commitIter, err = d.o.IterObjects(plumbing.AnyObject)
objIter, err = d.o.IterObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
obtainedObjects := []plumbing.Object{}
err = commitIter.ForEach(func(o plumbing.Object) error {
err = objIter.ForEach(func(o plumbing.Object) error {
obtainedObjects = append(obtainedObjects, o)

return nil
Expand All @@ -138,3 +147,114 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
c.Assert(len(obtainedObjects), Equals, equals)
})
}

func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) {
s.simpleDeltaTest(c, plumbing.REFDeltaObject)
}

func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeOFS(c *C) {
s.simpleDeltaTest(c, plumbing.OFSDeltaObject)
}

func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeREF(c *C) {
s.deltaOverDeltaTest(c, plumbing.REFDeltaObject)
}

func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeOFS(c *C) {
s.deltaOverDeltaTest(c, plumbing.OFSDeltaObject)
}

func (s *EncoderSuite) simpleDeltaTest(c *C, t plumbing.ObjectType) {
srcObject := newObject(plumbing.BlobObject, []byte("0"))
targetObject := newObject(plumbing.BlobObject, []byte("01"))

deltaObject, err := delta(srcObject, targetObject, t)
c.Assert(err, IsNil)

srcToPack := newObjectToPack(srcObject)
_, err = s.enc.encode([]*ObjectToPack{
srcToPack,
newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
})
c.Assert(err, IsNil)

scanner := NewScanner(s.buf)

storage := memory.NewStorage()
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)

_, err = d.Decode()
c.Assert(err, IsNil)

decSrc, err := storage.Object(srcObject.Type(), srcObject.Hash())
c.Assert(err, IsNil)
c.Assert(decSrc, DeepEquals, srcObject)

decTarget, err := storage.Object(targetObject.Type(), targetObject.Hash())
c.Assert(err, IsNil)
c.Assert(decTarget, DeepEquals, targetObject)
}

func (s *EncoderSuite) deltaOverDeltaTest(c *C, t plumbing.ObjectType) {
srcObject := newObject(plumbing.BlobObject, []byte("0"))
targetObject := newObject(plumbing.BlobObject, []byte("01"))
otherTargetObject := newObject(plumbing.BlobObject, []byte("011111"))

deltaObject, err := delta(srcObject, targetObject, t)
c.Assert(err, IsNil)
c.Assert(deltaObject.Hash(), Not(Equals), plumbing.ZeroHash)

otherDeltaObject, err := delta(targetObject, otherTargetObject, t)
c.Assert(err, IsNil)
c.Assert(otherDeltaObject.Hash(), Not(Equals), plumbing.ZeroHash)

srcToPack := newObjectToPack(srcObject)
targetToPack := newObjectToPack(targetObject)
_, err = s.enc.encode([]*ObjectToPack{
srcToPack,
newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
newDeltaObjectToPack(targetToPack, otherTargetObject, otherDeltaObject),
})
c.Assert(err, IsNil)

scanner := NewScanner(s.buf)
storage := memory.NewStorage()
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)

_, err = d.Decode()
c.Assert(err, IsNil)

decSrc, err := storage.Object(srcObject.Type(), srcObject.Hash())
c.Assert(err, IsNil)
c.Assert(decSrc, DeepEquals, srcObject)

decTarget, err := storage.Object(targetObject.Type(), targetObject.Hash())
c.Assert(err, IsNil)
c.Assert(decTarget, DeepEquals, targetObject)

decOtherTarget, err := storage.Object(otherTargetObject.Type(), otherTargetObject.Hash())
c.Assert(err, IsNil)
c.Assert(decOtherTarget, DeepEquals, otherTargetObject)
}

func delta(base, target plumbing.Object, t plumbing.ObjectType) (plumbing.Object, error) {
switch t {
case plumbing.OFSDeltaObject:
return GetOFSDelta(base, target)
case plumbing.REFDeltaObject:
return GetRefDelta(base, target)
default:
panic("delta type not found")
}
}

func newObject(t plumbing.ObjectType, cont []byte) plumbing.Object {
o := plumbing.MemoryObject{}
o.SetType(t)
o.SetSize(int64(len(cont)))
o.Write(cont)

return &o
}
Loading