Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

plumbing/packfile: PACK encoder #131

Merged
merged 4 commits into from
Nov 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions plumbing/format/packfile/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package packfile

var signature = []byte{'P', 'A', 'C', 'K'}

const (
// VersionSupported is the packfile version supported by this package
VersionSupported uint32 = 2

firstLengthBits = uint8(4) // the first byte into object header has 4 bits to store the length
lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment makes no sense now that it is the first in the const block. Move it or change the comment.

maskFirstLength = 15 // 0000 1111
maskContinue = 0x80 // 1000 0000
maskLength = uint8(127) // 0111 1111
maskType = uint8(112) // 0111 0000
)
116 changes: 116 additions & 0 deletions plumbing/format/packfile/encoder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package packfile

import (
"compress/zlib"
"crypto/sha1"
"fmt"
"io"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/utils/binary"
)

// Encoder gets the data from the storage and write it into the writer in PACK
// format
type Encoder struct {
storage storer.ObjectStorer
w io.Writer
zw *zlib.Writer
hasher plumbing.Hasher
}

// NewEncoder creates a new packfile encoder using a specific Writer and
// ObjectStorer
func NewEncoder(w io.Writer, s storer.ObjectStorer) *Encoder {
h := plumbing.Hasher{
Hash: sha1.New(),
}
mw := io.MultiWriter(w, h)
zw := zlib.NewWriter(mw)
return &Encoder{
storage: s,
w: mw,
zw: zw,
hasher: h,
}
}

// Encode creates a packfile containing all the objects referenced in hashes
// and writes it to the writer in the Encoder.
func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
Copy link
Contributor

@alcortesm alcortesm Nov 23, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the signature of the method and the comments it is not clear if you can create a single packfile by calling several times in a row to Encode or not.

I would change the comment here to clarify that every time you call Encode it creates a new packfile and write it to the writer. Something like this:

// Encode creates a packfile containing all the objects referenced in hashes and writes it to the writer in the Encoder.

if err := e.head(len(hashes)); err != nil {
return plumbing.ZeroHash, err
}

for _, h := range hashes {
o, err := e.storage.Object(plumbing.AnyObject, h)
if err != nil {
return plumbing.ZeroHash, err
}

if err := e.entry(o); err != nil {
return plumbing.ZeroHash, err
}
}

return e.footer()
}

func (e *Encoder) head(numEntries int) error {
return binary.Write(
e.w,
signature,
int32(VersionSupported),
int32(numEntries),
)
}

func (e *Encoder) entry(o plumbing.Object) error {
t := o.Type()
if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject {
// TODO implements delta objects
return fmt.Errorf("delta object not supported: %v", t)
}

if err := e.entryHead(t, o.Size()); err != nil {
return err
}

e.zw.Reset(e.w)
or, err := o.Reader()
if err != nil {
return err
}
_, err = io.Copy(e.zw, or)
if err != nil {
return err
}

return e.zw.Close()
}

func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
t := int64(typeNum)
header := []byte{}
c := (t << firstLengthBits) | (size & maskFirstLength)
size >>= firstLengthBits
for {
if size == 0 {
break
}
header = append(header, byte(c|maskContinue))
c = size & int64(maskLength)
size >>= lengthBits
}

header = append(header, byte(c))
_, err := e.w.Write(header)

return err
}

func (e *Encoder) footer() (plumbing.Hash, error) {
h := e.hasher.Sum()
return h, binary.Write(e.w, h)
}
140 changes: 140 additions & 0 deletions plumbing/format/packfile/encoder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package packfile

import (
"bytes"

"gopkg.in/src-d/go-git.v4/fixtures"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/storage/memory"

. "gopkg.in/check.v1"
)

type EncoderSuite struct {
fixtures.Suite
buf *bytes.Buffer
store *memory.Storage
enc *Encoder
}

var _ = Suite(&EncoderSuite{})

func (s *EncoderSuite) SetUpTest(c *C) {
s.buf = bytes.NewBuffer(nil)
s.store = memory.NewStorage()
s.enc = NewEncoder(s.buf, s.store)
}

func (s *EncoderSuite) TestCorrectPackHeader(c *C) {
hash, err := s.enc.Encode([]plumbing.Hash{})
c.Assert(err, IsNil)

hb := [20]byte(hash)

// PACK + VERSION + OBJECTS + HASH
expectedResult := []byte{'P', 'A', 'C', 'K', 0, 0, 0, 2, 0, 0, 0, 0}
expectedResult = append(expectedResult, hb[:]...)

result := s.buf.Bytes()

c.Assert(result, DeepEquals, expectedResult)
}

func (s *EncoderSuite) TestCorrectPackWithOneEmptyObject(c *C) {
o := &plumbing.MemoryObject{}
o.SetType(plumbing.CommitObject)
o.SetSize(0)
_, err := s.store.SetObject(o)
c.Assert(err, IsNil)

hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()})
c.Assert(err, IsNil)

// PACK + VERSION(2) + OBJECT NUMBER(1)
expectedResult := []byte{'P', 'A', 'C', 'K', 0, 0, 0, 2, 0, 0, 0, 1}
// OBJECT HEADER(TYPE + SIZE)= 0001 0000
expectedResult = append(expectedResult, []byte{16}...)

// Zlib header
expectedResult = append(expectedResult,
[]byte{120, 156, 1, 0, 0, 255, 255, 0, 0, 0, 1}...)

// + HASH
hb := [20]byte(hash)
expectedResult = append(expectedResult, hb[:]...)

result := s.buf.Bytes()

c.Assert(result, DeepEquals, expectedResult)
}

func (s *EncoderSuite) TestMaxObjectSize(c *C) {
o := s.store.NewObject()
o.SetSize(9223372036854775807)
o.SetType(plumbing.CommitObject)
_, err := s.store.SetObject(o)
c.Assert(err, IsNil)
hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()})
c.Assert(err, IsNil)
c.Assert(hash.IsZero(), Not(Equals), true)
}

func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
scanner := NewScanner(f.Packfile())
storage := memory.NewStorage()

d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)

ch, err := d.Decode()
c.Assert(err, IsNil)
c.Assert(ch, Equals, f.PackfileHash)

commitIter, err := d.o.IterObjects(plumbing.AnyObject)
c.Assert(err, IsNil)

objects := []plumbing.Object{}
hashes := []plumbing.Hash{}
err = commitIter.ForEach(func(o plumbing.Object) error {
objects = append(objects, o)
hash, err := s.store.SetObject(o)
hashes = append(hashes, hash)

return err

})
c.Assert(err, IsNil)
_, err = s.enc.Encode(hashes)
c.Assert(err, IsNil)

scanner = NewScanner(s.buf)
storage = memory.NewStorage()
d, err = NewDecoder(scanner, storage)
c.Assert(err, IsNil)
_, err = d.Decode()
c.Assert(err, IsNil)

commitIter, err = d.o.IterObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
obtainedObjects := []plumbing.Object{}
err = commitIter.ForEach(func(o plumbing.Object) error {
obtainedObjects = append(obtainedObjects, o)

return nil
})
c.Assert(err, IsNil)
c.Assert(len(obtainedObjects), Equals, len(objects))

equals := 0
for _, oo := range obtainedObjects {
for _, o := range objects {
if o.Hash() == oo.Hash() {
equals++
}
}
}

c.Assert(len(obtainedObjects), Equals, equals)
})
}
16 changes: 1 addition & 15 deletions plumbing/format/packfile/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@ var (
ErrSeekNotSupported = NewError("not seek support")
)

const (
// VersionSupported is the packfile version supported by this parser.
VersionSupported uint32 = 2
)

// ObjectHeader contains the information related to the object, this information
// is collected from the previous bytes to the content of the object.
type ObjectHeader struct {
Expand Down Expand Up @@ -124,7 +119,7 @@ func (s *Scanner) readSignature() ([]byte, error) {

// isValidSignature returns if sig is a valid packfile signature.
func (s *Scanner) isValidSignature(sig []byte) bool {
return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'})
return bytes.Equal(sig, signature)
}

// readVersion reads and returns the version field of a packfile.
Expand Down Expand Up @@ -230,15 +225,6 @@ func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error)
return t, l, err
}

const (
maskType = uint8(112) // 0111 0000
maskFirstLength = uint8(15) // 0000 1111
maskContinue = uint8(128) // 1000 000
firstLengthBits = uint8(4) // the first byte has 4 bits to store the length
maskLength = uint8(127) // 0111 1111
lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
)

func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
var c byte
var err error
Expand Down