Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit d7b898e

Browse files
authored
Merge pull request #510 from smola/refactor-idxfile
packfile: create packfile.Index and reuse it
2 parents fbf2a4a + c64eb81 commit d7b898e

File tree

11 files changed

+318
-138
lines changed

11 files changed

+318
-138
lines changed

plumbing/format/idxfile/decoder.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func readObjectNames(idx *Idxfile, r io.Reader) error {
104104
return err
105105
}
106106

107-
idx.Entries = append(idx.Entries, Entry{Hash: ref})
107+
idx.Entries = append(idx.Entries, &Entry{Hash: ref})
108108
}
109109

110110
return nil

plumbing/format/idxfile/decoder_test.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
package idxfile
1+
package idxfile_test
22

33
import (
44
"bytes"
55
"fmt"
66
"testing"
77

88
"github.com/src-d/go-git-fixtures"
9+
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
910
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
1011
"gopkg.in/src-d/go-git.v4/storage/memory"
1112

@@ -48,12 +49,8 @@ func (s *IdxfileSuite) TestDecodeCRCs(c *C) {
4849
_, err = pd.Decode()
4950
c.Assert(err, IsNil)
5051

51-
i := &Idxfile{Version: VersionSupported}
52-
53-
offsets := pd.Offsets()
54-
for h, crc := range pd.CRCs() {
55-
i.Add(h, uint64(offsets[h]), crc)
56-
}
52+
i := pd.Index().ToIdxFile()
53+
i.Version = VersionSupported
5754

5855
buf := bytes.NewBuffer(nil)
5956
e := NewEncoder(buf)

plumbing/format/idxfile/encoder.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) {
124124
}
125125

126126
// EntryList implements sort.Interface allowing sorting in increasing order.
127-
type EntryList []Entry
127+
type EntryList []*Entry
128128

129129
func (p EntryList) Len() int { return len(p) }
130130
func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() }

plumbing/format/idxfile/encoder_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
package idxfile
1+
package idxfile_test
22

33
import (
44
"bytes"
55
"io/ioutil"
66

77
"github.com/src-d/go-git-fixtures"
88
"gopkg.in/src-d/go-git.v4/plumbing"
9+
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
910

1011
. "gopkg.in/check.v1"
1112
)

plumbing/format/idxfile/idxfile.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ type Idxfile struct {
2121
IdxChecksum [20]byte
2222
}
2323

24+
func NewIdxfile() *Idxfile {
25+
return &Idxfile{}
26+
}
27+
2428
// Entry is the in memory representation of an object entry in the idx file.
2529
type Entry struct {
2630
Hash plumbing.Hash
@@ -30,7 +34,7 @@ type Entry struct {
3034

3135
// Add adds a new Entry with the given values to the Idxfile.
3236
func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
33-
idx.Entries = append(idx.Entries, Entry{
37+
idx.Entries = append(idx.Entries, &Entry{
3438
Hash: h,
3539
Offset: offset,
3640
CRC32: crc32,

plumbing/format/packfile/decoder.go

Lines changed: 53 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ type Decoder struct {
5656
o storer.EncodedObjectStorer
5757
tx storer.Transaction
5858

59-
isDecoded bool
60-
offsetToHash map[int64]plumbing.Hash
61-
hashToOffset map[plumbing.Hash]int64
62-
crcs map[plumbing.Hash]uint32
59+
isDecoded bool
60+
61+
// hasBuiltIndex indicates if the index is fully built or not. If it is not,
62+
// will be built incrementally while decoding.
63+
hasBuiltIndex bool
64+
idx *Index
6365

6466
offsetToType map[int64]plumbing.ObjectType
6567
decoderType plumbing.ObjectType
@@ -102,10 +104,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
102104
s: s,
103105
o: o,
104106

105-
offsetToHash: make(map[int64]plumbing.Hash, 0),
106-
hashToOffset: make(map[plumbing.Hash]int64, 0),
107-
crcs: make(map[plumbing.Hash]uint32, 0),
108-
107+
idx: NewIndex(0),
109108
offsetToType: make(map[int64]plumbing.ObjectType, 0),
110109
decoderType: t,
111110

@@ -139,6 +138,11 @@ func (d *Decoder) doDecode() error {
139138
return err
140139
}
141140

141+
if !d.hasBuiltIndex {
142+
d.idx = NewIndex(int(count))
143+
}
144+
defer func() { d.hasBuiltIndex = true }()
145+
142146
_, isTxStorer := d.o.(storer.Transactioner)
143147
switch {
144148
case d.o == nil:
@@ -218,13 +222,22 @@ func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
218222
}
219223

220224
func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) {
221-
var realType plumbing.ObjectType
222-
var err error
225+
var (
226+
obj plumbing.EncodedObject
227+
realType plumbing.ObjectType
228+
err error
229+
)
223230
switch h.Type {
224231
case plumbing.OFSDeltaObject:
225232
realType, err = d.ofsDeltaType(h.OffsetReference)
226233
case plumbing.REFDeltaObject:
227234
realType, err = d.refDeltaType(h.Reference)
235+
if err == plumbing.ErrObjectNotFound {
236+
obj, err = d.decodeByHeader(h)
237+
if err != nil {
238+
realType = obj.Type()
239+
}
240+
}
228241
default:
229242
realType = h.Type
230243
}
@@ -236,6 +249,10 @@ func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject,
236249
d.offsetToType[h.Offset] = realType
237250

238251
if d.decoderType == realType {
252+
if obj != nil {
253+
return obj, nil
254+
}
255+
239256
return d.decodeByHeader(h)
240257
}
241258

@@ -252,16 +269,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
252269
}
253270

254271
func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
255-
if o, ok := d.hashToOffset[ref]; ok {
256-
return d.ofsDeltaType(o)
257-
}
258-
259-
obj, err := d.o.EncodedObject(plumbing.AnyObject, ref)
260-
if err != nil {
261-
return plumbing.InvalidObject, err
272+
e, ok := d.idx.LookupHash(ref)
273+
if !ok {
274+
return plumbing.InvalidObject, plumbing.ErrObjectNotFound
262275
}
263276

264-
return obj.Type(), nil
277+
return d.ofsDeltaType(int64(e.Offset))
265278
}
266279

267280
func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
@@ -285,9 +298,9 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error
285298
return obj, err
286299
}
287300

288-
hash := obj.Hash()
289-
d.setOffset(hash, h.Offset)
290-
d.setCRC(hash, crc)
301+
if !d.hasBuiltIndex {
302+
d.idx.Add(obj.Hash(), uint64(h.Offset), crc)
303+
}
291304

292305
return obj, nil
293306
}
@@ -365,10 +378,10 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
365378
return 0, err
366379
}
367380

368-
h := d.offsetToHash[offset]
381+
e, ok := d.idx.LookupOffset(uint64(offset))
369382
var base plumbing.EncodedObject
370-
if h != plumbing.ZeroHash {
371-
base = d.cache.Get(h)
383+
if ok {
384+
base = d.cache.Get(e.Hash)
372385
}
373386

374387
if base == nil {
@@ -385,31 +398,22 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
385398
return crc, err
386399
}
387400

388-
func (d *Decoder) setOffset(h plumbing.Hash, offset int64) {
389-
d.offsetToHash[offset] = h
390-
d.hashToOffset[h] = offset
391-
}
392-
393-
func (d *Decoder) setCRC(h plumbing.Hash, crc uint32) {
394-
d.crcs[h] = crc
395-
}
396-
397401
func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
398402
if d.s.IsSeekable {
399403
return d.DecodeObjectAt(o)
400404
}
401405

402-
if h, ok := d.offsetToHash[o]; ok {
403-
return d.recallByHashNonSeekable(h)
406+
if e, ok := d.idx.LookupOffset(uint64(o)); ok {
407+
return d.recallByHashNonSeekable(e.Hash)
404408
}
405409

406410
return nil, plumbing.ErrObjectNotFound
407411
}
408412

409413
func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) {
410414
if d.s.IsSeekable {
411-
if o, ok := d.hashToOffset[h]; ok {
412-
return d.DecodeObjectAt(o)
415+
if e, ok := d.idx.LookupHash(h); ok {
416+
return d.DecodeObjectAt(int64(e.Offset))
413417
}
414418
}
415419

@@ -432,22 +436,20 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded
432436
return nil, plumbing.ErrObjectNotFound
433437
}
434438

435-
// SetOffsets sets the offsets, required when using the method DecodeObjectAt,
436-
// without decoding the full packfile
437-
func (d *Decoder) SetOffsets(offsets map[plumbing.Hash]int64) {
438-
d.hashToOffset = offsets
439-
}
440-
441-
// Offsets returns the objects read offset, Decode method should be called
442-
// before to calculate the Offsets
443-
func (d *Decoder) Offsets() map[plumbing.Hash]int64 {
444-
return d.hashToOffset
439+
// SetIndex sets an index for the packfile. It is recommended to set this.
440+
// The index might be read from a file or reused from a previous Decoder usage
441+
// (see Index function).
442+
func (d *Decoder) SetIndex(idx *Index) {
443+
d.hasBuiltIndex = true
444+
d.idx = idx
445445
}
446446

447-
// CRCs returns the CRC-32 for each read object. Decode method should be called
448-
// before to calculate the CRCs
449-
func (d *Decoder) CRCs() map[plumbing.Hash]uint32 {
450-
return d.crcs
447+
// Index returns the index for the packfile. If index was set with SetIndex,
448+
// Index will return it. Otherwise, it will return an index that is built while
449+
// decoding. If neither SetIndex was called with a full index or Decode called
450+
// for the whole packfile, then the returned index will be incomplete.
451+
func (d *Decoder) Index() *Index {
452+
return d.idx
451453
}
452454

453455
// Close closes the Scanner. usually this mean that the whole reader is read and

plumbing/format/packfile/decoder_test.go

Lines changed: 22 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,8 @@ func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) {
5555
d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject)
5656
c.Assert(err, IsNil)
5757

58-
// Specific offset elements needed to decode correctly the ref-delta
59-
offsets := map[plumbing.Hash]int64{
60-
plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c"): 84880,
61-
plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 85141,
62-
plumbing.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021"): 85300,
63-
}
64-
65-
d.SetOffsets(offsets)
58+
// Index required to decode by ref-delta.
59+
d.SetIndex(getIndexFromIdxFile(f.Idx()))
6660

6761
defer d.Close()
6862

@@ -123,7 +117,7 @@ func (s *ReaderSuite) TestDecodeByType(c *C) {
123117

124118
// when the packfile is ref-delta based, the offsets are required
125119
if f.Is("ref-delta") {
126-
d.SetOffsets(getOffsetsFromIdx(f.Idx()))
120+
d.SetIndex(getIndexFromIdxFile(f.Idx()))
127121
}
128122

129123
defer d.Close()
@@ -291,8 +285,9 @@ func (s *ReaderSuite) TestDecodeCRCs(c *C) {
291285
c.Assert(err, IsNil)
292286

293287
var sum uint64
294-
for _, crc := range d.CRCs() {
295-
sum += uint64(crc)
288+
idx := d.Index().ToIdxFile()
289+
for _, e := range idx.Entries {
290+
sum += uint64(e.CRC32)
296291
}
297292

298293
c.Assert(int(sum), Equals, 78022211966)
@@ -306,8 +301,7 @@ func (s *ReaderSuite) TestReadObjectAt(c *C) {
306301

307302
// when the packfile is ref-delta based, the offsets are required
308303
if f.Is("ref-delta") {
309-
offsets := getOffsetsFromIdx(f.Idx())
310-
d.SetOffsets(offsets)
304+
d.SetIndex(getIndexFromIdxFile(f.Idx()))
311305
}
312306

313307
// the objects at reference 186, is a delta, so should be recall,
@@ -317,32 +311,34 @@ func (s *ReaderSuite) TestReadObjectAt(c *C) {
317311
c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
318312
}
319313

320-
func (s *ReaderSuite) TestOffsets(c *C) {
314+
func (s *ReaderSuite) TestIndex(c *C) {
321315
f := fixtures.Basic().One()
322316
scanner := packfile.NewScanner(f.Packfile())
323317
d, err := packfile.NewDecoder(scanner, nil)
324318
c.Assert(err, IsNil)
325319

326-
c.Assert(d.Offsets(), HasLen, 0)
320+
c.Assert(d.Index().ToIdxFile().Entries, HasLen, 0)
327321

328322
_, err = d.Decode()
329323
c.Assert(err, IsNil)
330324

331-
c.Assert(d.Offsets(), HasLen, 31)
325+
c.Assert(len(d.Index().ToIdxFile().Entries), Equals, 31)
332326
}
333327

334-
func (s *ReaderSuite) TestSetOffsets(c *C) {
328+
func (s *ReaderSuite) TestSetIndex(c *C) {
335329
f := fixtures.Basic().One()
336330
scanner := packfile.NewScanner(f.Packfile())
337331
d, err := packfile.NewDecoder(scanner, nil)
338332
c.Assert(err, IsNil)
339333

334+
idx := packfile.NewIndex(1)
340335
h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
341-
d.SetOffsets(map[plumbing.Hash]int64{h: 42})
336+
idx.Add(h, uint64(42), 0)
337+
d.SetIndex(idx)
342338

343-
o := d.Offsets()
344-
c.Assert(o, HasLen, 1)
345-
c.Assert(o[h], Equals, int64(42))
339+
idxf := d.Index().ToIdxFile()
340+
c.Assert(idxf.Entries, HasLen, 1)
341+
c.Assert(idxf.Entries[0].Offset, Equals, uint64(42))
346342
}
347343

348344
func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) {
@@ -362,17 +358,12 @@ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) {
362358
}
363359
}
364360

365-
func getOffsetsFromIdx(r io.Reader) map[plumbing.Hash]int64 {
366-
idx := &idxfile.Idxfile{}
367-
err := idxfile.NewDecoder(r).Decode(idx)
368-
if err != nil {
361+
func getIndexFromIdxFile(r io.Reader) *packfile.Index {
362+
idxf := idxfile.NewIdxfile()
363+
d := idxfile.NewDecoder(r)
364+
if err := d.Decode(idxf); err != nil {
369365
panic(err)
370366
}
371367

372-
offsets := make(map[plumbing.Hash]int64)
373-
for _, e := range idx.Entries {
374-
offsets[e.Hash] = int64(e.Offset)
375-
}
376-
377-
return offsets
368+
return packfile.NewIndexFromIdxFile(idxf)
378369
}

0 commit comments

Comments
 (0)