Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

packfile: create packfile.Index and reuse it #510

Merged
merged 1 commit into from
Jul 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion plumbing/format/idxfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func readObjectNames(idx *Idxfile, r io.Reader) error {
return err
}

idx.Entries = append(idx.Entries, Entry{Hash: ref})
idx.Entries = append(idx.Entries, &Entry{Hash: ref})
}

return nil
Expand Down
11 changes: 4 additions & 7 deletions plumbing/format/idxfile/decoder_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package idxfile
package idxfile_test

import (
"bytes"
"fmt"
"testing"

"github.com/src-d/go-git-fixtures"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
"gopkg.in/src-d/go-git.v4/storage/memory"

Expand Down Expand Up @@ -48,12 +49,8 @@ func (s *IdxfileSuite) TestDecodeCRCs(c *C) {
_, err = pd.Decode()
c.Assert(err, IsNil)

i := &Idxfile{Version: VersionSupported}

offsets := pd.Offsets()
for h, crc := range pd.CRCs() {
i.Add(h, uint64(offsets[h]), crc)
}
i := pd.Index().ToIdxFile()
i.Version = VersionSupported

buf := bytes.NewBuffer(nil)
e := NewEncoder(buf)
Expand Down
2 changes: 1 addition & 1 deletion plumbing/format/idxfile/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) {
}

// EntryList implements sort.Interface allowing sorting in increasing order.
type EntryList []Entry
type EntryList []*Entry

func (p EntryList) Len() int { return len(p) }
func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() }
Expand Down
3 changes: 2 additions & 1 deletion plumbing/format/idxfile/encoder_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package idxfile
package idxfile_test

import (
"bytes"
"io/ioutil"

"github.com/src-d/go-git-fixtures"
"gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"

. "gopkg.in/check.v1"
)
Expand Down
6 changes: 5 additions & 1 deletion plumbing/format/idxfile/idxfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ type Idxfile struct {
IdxChecksum [20]byte
}

func NewIdxfile() *Idxfile {
return &Idxfile{}
}

// Entry is the in memory representation of an object entry in the idx file.
type Entry struct {
Hash plumbing.Hash
Expand All @@ -30,7 +34,7 @@ type Entry struct {

// Add adds a new Entry with the given values to the Idxfile.
func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
idx.Entries = append(idx.Entries, Entry{
idx.Entries = append(idx.Entries, &Entry{
Hash: h,
Offset: offset,
CRC32: crc32,
Expand Down
104 changes: 53 additions & 51 deletions plumbing/format/packfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ type Decoder struct {
o storer.EncodedObjectStorer
tx storer.Transaction

isDecoded bool
offsetToHash map[int64]plumbing.Hash
hashToOffset map[plumbing.Hash]int64
crcs map[plumbing.Hash]uint32
isDecoded bool

// hasBuiltIndex indicates if the index is fully built or not. If it is not,
// will be built incrementally while decoding.
hasBuiltIndex bool
idx *Index

offsetToType map[int64]plumbing.ObjectType
decoderType plumbing.ObjectType
Expand Down Expand Up @@ -102,10 +104,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
s: s,
o: o,

offsetToHash: make(map[int64]plumbing.Hash, 0),
hashToOffset: make(map[plumbing.Hash]int64, 0),
crcs: make(map[plumbing.Hash]uint32, 0),

idx: NewIndex(0),
offsetToType: make(map[int64]plumbing.ObjectType, 0),
decoderType: t,

Expand Down Expand Up @@ -139,6 +138,11 @@ func (d *Decoder) doDecode() error {
return err
}

if !d.hasBuiltIndex {
d.idx = NewIndex(int(count))
}
defer func() { d.hasBuiltIndex = true }()

_, isTxStorer := d.o.(storer.Transactioner)
switch {
case d.o == nil:
Expand Down Expand Up @@ -218,13 +222,22 @@ func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
}

func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) {
var realType plumbing.ObjectType
var err error
var (
obj plumbing.EncodedObject
realType plumbing.ObjectType
err error
)
switch h.Type {
case plumbing.OFSDeltaObject:
realType, err = d.ofsDeltaType(h.OffsetReference)
case plumbing.REFDeltaObject:
realType, err = d.refDeltaType(h.Reference)
if err == plumbing.ErrObjectNotFound {
obj, err = d.decodeByHeader(h)
if err != nil {
realType = obj.Type()
}
}
default:
realType = h.Type
}
Expand All @@ -236,6 +249,10 @@ func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject,
d.offsetToType[h.Offset] = realType

if d.decoderType == realType {
if obj != nil {
return obj, nil
}

return d.decodeByHeader(h)
}

Expand All @@ -252,16 +269,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
}

func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
if o, ok := d.hashToOffset[ref]; ok {
return d.ofsDeltaType(o)
}

obj, err := d.o.EncodedObject(plumbing.AnyObject, ref)
if err != nil {
return plumbing.InvalidObject, err
e, ok := d.idx.LookupHash(ref)
if !ok {
return plumbing.InvalidObject, plumbing.ErrObjectNotFound
}

return obj.Type(), nil
return d.ofsDeltaType(int64(e.Offset))
}

func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
Expand All @@ -285,9 +298,9 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error
return obj, err
}

hash := obj.Hash()
d.setOffset(hash, h.Offset)
d.setCRC(hash, crc)
if !d.hasBuiltIndex {
d.idx.Add(obj.Hash(), uint64(h.Offset), crc)
}

return obj, nil
}
Expand Down Expand Up @@ -365,10 +378,10 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
return 0, err
}

h := d.offsetToHash[offset]
e, ok := d.idx.LookupOffset(uint64(offset))
var base plumbing.EncodedObject
if h != plumbing.ZeroHash {
base = d.cache.Get(h)
if ok {
base = d.cache.Get(e.Hash)
}

if base == nil {
Expand All @@ -385,31 +398,22 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
return crc, err
}

func (d *Decoder) setOffset(h plumbing.Hash, offset int64) {
d.offsetToHash[offset] = h
d.hashToOffset[h] = offset
}

func (d *Decoder) setCRC(h plumbing.Hash, crc uint32) {
d.crcs[h] = crc
}

func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
if d.s.IsSeekable {
return d.DecodeObjectAt(o)
}

if h, ok := d.offsetToHash[o]; ok {
return d.recallByHashNonSeekable(h)
if e, ok := d.idx.LookupOffset(uint64(o)); ok {
return d.recallByHashNonSeekable(e.Hash)
}

return nil, plumbing.ErrObjectNotFound
}

func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) {
if d.s.IsSeekable {
if o, ok := d.hashToOffset[h]; ok {
return d.DecodeObjectAt(o)
if e, ok := d.idx.LookupHash(h); ok {
return d.DecodeObjectAt(int64(e.Offset))
}
}

Expand All @@ -432,22 +436,20 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded
return nil, plumbing.ErrObjectNotFound
}

// SetOffsets sets the offsets, required when using the method DecodeObjectAt,
// without decoding the full packfile
func (d *Decoder) SetOffsets(offsets map[plumbing.Hash]int64) {
d.hashToOffset = offsets
}

// Offsets returns the objects read offset, Decode method should be called
// before to calculate the Offsets
func (d *Decoder) Offsets() map[plumbing.Hash]int64 {
return d.hashToOffset
// SetIndex sets an index for the packfile. It is recommended to set this.
// The index might be read from a file or reused from a previous Decoder usage
// (see Index function).
func (d *Decoder) SetIndex(idx *Index) {
d.hasBuiltIndex = true
d.idx = idx
}

// CRCs returns the CRC-32 for each read object. Decode method should be called
// before to calculate the CRCs
func (d *Decoder) CRCs() map[plumbing.Hash]uint32 {
return d.crcs
// Index returns the index for the packfile. If index was set with SetIndex,
// Index will return it. Otherwise, it will return an index that is built while
// decoding. If neither SetIndex was called with a full index or Decode called
// for the whole packfile, then the returned index will be incomplete.
func (d *Decoder) Index() *Index {
return d.idx
}

// Close closes the Scanner. usually this mean that the whole reader is read and
Expand Down
53 changes: 22 additions & 31 deletions plumbing/format/packfile/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,8 @@ func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) {
d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject)
c.Assert(err, IsNil)

// Specific offset elements needed to decode correctly the ref-delta
offsets := map[plumbing.Hash]int64{
plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c"): 84880,
plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 85141,
plumbing.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021"): 85300,
}

d.SetOffsets(offsets)
// Index required to decode by ref-delta.
d.SetIndex(getIndexFromIdxFile(f.Idx()))

defer d.Close()

Expand Down Expand Up @@ -123,7 +117,7 @@ func (s *ReaderSuite) TestDecodeByType(c *C) {

// when the packfile is ref-delta based, the offsets are required
if f.Is("ref-delta") {
d.SetOffsets(getOffsetsFromIdx(f.Idx()))
d.SetIndex(getIndexFromIdxFile(f.Idx()))
}

defer d.Close()
Expand Down Expand Up @@ -291,8 +285,9 @@ func (s *ReaderSuite) TestDecodeCRCs(c *C) {
c.Assert(err, IsNil)

var sum uint64
for _, crc := range d.CRCs() {
sum += uint64(crc)
idx := d.Index().ToIdxFile()
for _, e := range idx.Entries {
sum += uint64(e.CRC32)
}

c.Assert(int(sum), Equals, 78022211966)
Expand All @@ -306,8 +301,7 @@ func (s *ReaderSuite) TestReadObjectAt(c *C) {

// when the packfile is ref-delta based, the offsets are required
if f.Is("ref-delta") {
offsets := getOffsetsFromIdx(f.Idx())
d.SetOffsets(offsets)
d.SetIndex(getIndexFromIdxFile(f.Idx()))
}

// the objects at reference 186, is a delta, so should be recall,
Expand All @@ -317,32 +311,34 @@ func (s *ReaderSuite) TestReadObjectAt(c *C) {
c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
}

func (s *ReaderSuite) TestOffsets(c *C) {
func (s *ReaderSuite) TestIndex(c *C) {
f := fixtures.Basic().One()
scanner := packfile.NewScanner(f.Packfile())
d, err := packfile.NewDecoder(scanner, nil)
c.Assert(err, IsNil)

c.Assert(d.Offsets(), HasLen, 0)
c.Assert(d.Index().ToIdxFile().Entries, HasLen, 0)

_, err = d.Decode()
c.Assert(err, IsNil)

c.Assert(d.Offsets(), HasLen, 31)
c.Assert(len(d.Index().ToIdxFile().Entries), Equals, 31)
}

func (s *ReaderSuite) TestSetOffsets(c *C) {
func (s *ReaderSuite) TestSetIndex(c *C) {
f := fixtures.Basic().One()
scanner := packfile.NewScanner(f.Packfile())
d, err := packfile.NewDecoder(scanner, nil)
c.Assert(err, IsNil)

idx := packfile.NewIndex(1)
h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
d.SetOffsets(map[plumbing.Hash]int64{h: 42})
idx.Add(h, uint64(42), 0)
d.SetIndex(idx)

o := d.Offsets()
c.Assert(o, HasLen, 1)
c.Assert(o[h], Equals, int64(42))
idxf := d.Index().ToIdxFile()
c.Assert(idxf.Entries, HasLen, 1)
c.Assert(idxf.Entries[0].Offset, Equals, uint64(42))
}

func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) {
Expand All @@ -362,17 +358,12 @@ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) {
}
}

func getOffsetsFromIdx(r io.Reader) map[plumbing.Hash]int64 {
idx := &idxfile.Idxfile{}
err := idxfile.NewDecoder(r).Decode(idx)
if err != nil {
func getIndexFromIdxFile(r io.Reader) *packfile.Index {
idxf := idxfile.NewIdxfile()
d := idxfile.NewDecoder(r)
if err := d.Decode(idxf); err != nil {
panic(err)
}

offsets := make(map[plumbing.Hash]int64)
for _, e := range idx.Entries {
offsets[e.Hash] = int64(e.Offset)
}

return offsets
return packfile.NewIndexFromIdxFile(idxf)
}
Loading