Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

packfile/decoder: speed up packfile iterator when specific type #200

Merged
merged 3 commits into from
Jan 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion plumbing/format/packfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ type Decoder struct {
offsetToHash map[int64]plumbing.Hash
hashToOffset map[plumbing.Hash]int64
crcs map[plumbing.Hash]uint32

offsetToType map[int64]plumbing.ObjectType
decoderType plumbing.ObjectType
}

// NewDecoder returns a new Decoder that decodes a Packfile using the given
Expand All @@ -72,6 +75,22 @@ type Decoder struct {
// If the ObjectStorer implements storer.Transactioner, a transaction is created
// during the Decode execution, if something fails the Rollback is called
func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
return NewDecoderForType(s, o, plumbing.AnyObject)
}

// NewDecoderForType returns a new Decoder but in this case for a specific object type.
// When an object is read using this Decoder instance and it is not of the same type of
// the specified one, nil will be returned. This is intended to avoid the content
// deserialization of all the objects
func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add documentation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

t plumbing.ObjectType) (*Decoder, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

first thing to do is probably returning an error if t is plumbing.InvalidObject.

In case t is plumbing.OFSDeltaObject or plumbing.REFDeltaObject I would recommend returning also an error.


if t == plumbing.OFSDeltaObject ||
t == plumbing.REFDeltaObject ||
t == plumbing.InvalidObject {
return nil, plumbing.ErrInvalidType
}

if !canResolveDeltas(s, o) {
return nil, ErrResolveDeltasNotSupported
}
Expand All @@ -83,6 +102,9 @@ func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
offsetToHash: make(map[int64]plumbing.Hash, 0),
hashToOffset: make(map[plumbing.Hash]int64, 0),
crcs: make(map[plumbing.Hash]uint32, 0),

offsetToType: make(map[int64]plumbing.ObjectType, 0),
decoderType: t,
}, nil
}

Expand Down Expand Up @@ -174,17 +196,82 @@ func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error {

// DecodeObject reads the next object from the scanner and returns it. This
// method can be used in replacement of the Decode method, to work in a
// interative way
// interactive way. If you created a new decoder instance using NewDecoderForType
// constructor, if the object decoded is not equals to the specified one, nil will
// be returned
func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
h, err := d.s.NextObjectHeader()
if err != nil {
return nil, err
}

if d.decoderType == plumbing.AnyObject {
return d.decodeByHeader(h)
}

return d.decodeIfSpecificType(h)
}

func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) {
var realType plumbing.ObjectType
var err error
switch h.Type {
case plumbing.OFSDeltaObject:
realType, err = d.ofsDeltaType(h.OffsetReference)
case plumbing.REFDeltaObject:
realType, err = d.refDeltaType(h.Reference)

// If a reference delta is not found, it means that it isn't of
// the type we are looking for, because we don't have any reference
// and it is not present into the object storer
if err == plumbing.ErrObjectNotFound {
return nil, nil
}
default:
realType = h.Type
}

if err != nil {
return nil, err
}

d.offsetToType[h.Offset] = realType

if d.decoderType == realType {
return d.decodeByHeader(h)
}

return nil, nil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return nil?
Nope, return the next object with a relevant type.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, I see that logic is in the PackfileIter, ok.

}

func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
t, ok := d.offsetToType[offset]
if !ok {
return plumbing.InvalidObject, plumbing.ErrObjectNotFound
}

return t, nil
}

func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
if o, ok := d.hashToOffset[ref]; ok {
return d.ofsDeltaType(o)
}

obj, err := d.o.EncodedObject(plumbing.AnyObject, ref)
if err != nil {
return plumbing.InvalidObject, err
}

return obj.Type(), nil
}

func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
obj := d.newObject()
obj.SetSize(h.Length)
obj.SetType(h.Type)
var crc uint32
var err error
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
crc, err = d.fillRegularObjectContent(obj)
Expand Down
46 changes: 46 additions & 0 deletions plumbing/format/packfile/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,52 @@ func (s *ReaderSuite) TestDecode(c *C) {
})
}

func (s *ReaderSuite) TestDecodeByType(c *C) {
ts := []plumbing.ObjectType{
plumbing.CommitObject,
plumbing.TagObject,
plumbing.TreeObject,
plumbing.BlobObject,
}

fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
for _, t := range ts {
storage := memory.NewStorage()
scanner := packfile.NewScanner(f.Packfile())
d, err := packfile.NewDecoderForType(scanner, storage, t)
c.Assert(err, IsNil)
defer d.Close()

_, count, err := scanner.Header()
c.Assert(err, IsNil)

var i uint32
for i = 0; i < count; i++ {
obj, err := d.DecodeObject()
c.Assert(err, IsNil)

if obj != nil {
c.Assert(obj.Type(), Equals, t)
}
}
}
})
}
func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) {
f := fixtures.Basic().ByTag("packfile").One()
storage := memory.NewStorage()
scanner := packfile.NewScanner(f.Packfile())

_, err := packfile.NewDecoderForType(scanner, storage, plumbing.OFSDeltaObject)
c.Assert(err, Equals, plumbing.ErrInvalidType)

_, err = packfile.NewDecoderForType(scanner, storage, plumbing.REFDeltaObject)
c.Assert(err, Equals, plumbing.ErrInvalidType)

_, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject)
c.Assert(err, Equals, plumbing.ErrInvalidType)
}

func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) {
f := fixtures.Basic().ByTag("packfile").One()
scanner := packfile.NewScanner(f.Packfile())
Expand Down
38 changes: 22 additions & 16 deletions storage/filesystem/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,14 +271,18 @@ type packfileIter struct {
total uint32
}

func NewPackfileIter(f billy.File, t plumbing.ObjectType) (storer.EncodedObjectIter, error) {
return newPackfileIter(f, t, make(map[plumbing.Hash]bool))
}

func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash]bool) (storer.EncodedObjectIter, error) {
s := packfile.NewScanner(f)
_, total, err := s.Header()
if err != nil {
return nil, err
}

d, err := packfile.NewDecoder(s, memory.NewStorage())
d, err := packfile.NewDecoderForType(s, memory.NewStorage(), t)
if err != nil {
return nil, err
}
Expand All @@ -294,25 +298,27 @@ func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash
}

func (iter *packfileIter) Next() (plumbing.EncodedObject, error) {
if iter.position >= iter.total {
return nil, io.EOF
}
for {
if iter.position >= iter.total {
return nil, io.EOF
}

obj, err := iter.d.DecodeObject()
if err != nil {
return nil, err
}
obj, err := iter.d.DecodeObject()
if err != nil {
return nil, err
}

iter.position++
if iter.seen[obj.Hash()] {
return iter.Next()
}
iter.position++
if obj == nil {
continue
}

if iter.t != plumbing.AnyObject && iter.t != obj.Type() {
return iter.Next()
}
if iter.seen[obj.Hash()] {
return iter.Next()
}

return obj, nil
return obj, nil
}
}

// ForEach is never called since is used inside of a MultiObjectIterator
Expand Down
59 changes: 48 additions & 11 deletions storage/filesystem/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,17 @@ import (

type FsSuite struct {
fixtures.Suite
Types []plumbing.ObjectType
}

var _ = Suite(&FsSuite{})
var _ = Suite(&FsSuite{
Types: []plumbing.ObjectType{
plumbing.CommitObject,
plumbing.TagObject,
plumbing.TreeObject,
plumbing.BlobObject,
},
})

func (s *FsSuite) TestGetFromObjectFile(c *C) {
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
Expand Down Expand Up @@ -76,18 +84,47 @@ func (s *FsSuite) TestIter(c *C) {

func (s *FsSuite) TestIterWithType(c *C) {
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
fs := f.DotGit()
o, err := newObjectStorage(dotgit.New(fs))
c.Assert(err, IsNil)
for _, t := range s.Types {
fs := f.DotGit()
o, err := newObjectStorage(dotgit.New(fs))
c.Assert(err, IsNil)

iter, err := o.IterEncodedObjects(plumbing.CommitObject)
c.Assert(err, IsNil)
iter, err := o.IterEncodedObjects(t)
c.Assert(err, IsNil)

err = iter.ForEach(func(o plumbing.EncodedObject) error {
c.Assert(o.Type(), Equals, plumbing.CommitObject)
return nil
})
err = iter.ForEach(func(o plumbing.EncodedObject) error {
c.Assert(o.Type(), Equals, t)
return nil
})

c.Assert(err, IsNil)
}

c.Assert(err, IsNil)
})
}

func (s *FsSuite) TestPackfileIter(c *C) {
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
fs := f.DotGit()
dg := dotgit.New(fs)

for _, t := range s.Types {
ph, err := dg.ObjectPacks()
c.Assert(err, IsNil)

for _, h := range ph {
f, err := dg.ObjectPack(h)
c.Assert(err, IsNil)
iter, err := NewPackfileIter(f, t)
c.Assert(err, IsNil)
err = iter.ForEach(func(o plumbing.EncodedObject) error {
c.Assert(o.Type(), Equals, t)
return nil
})

c.Assert(err, IsNil)
}
}
})

}