Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

packfile: improve Index memory representation to be more compact #846

Merged
merged 1 commit into from
Jun 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 43 additions & 10 deletions plumbing/format/packfile/index.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package packfile

import (
"sort"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
)
Expand All @@ -10,7 +12,7 @@ import (
// or to store them.
type Index struct {
byHash map[plumbing.Hash]*idxfile.Entry
byOffset map[uint64]*idxfile.Entry
byOffset []*idxfile.Entry // sorted by their offset
}

// NewIndex creates a new empty index with the given size. Size is a hint and
Expand All @@ -19,36 +21,62 @@ type Index struct {
func NewIndex(size int) *Index {
return &Index{
byHash: make(map[plumbing.Hash]*idxfile.Entry, size),
byOffset: make(map[uint64]*idxfile.Entry, size),
byOffset: make([]*idxfile.Entry, 0, size),
}
}

// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile.
func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
idx := &Index{
byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount),
byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
}
for _, e := range idxf.Entries {
idx.add(e)
idx.addUnsorted(e)
}
sort.Sort(orderByOffset(idx.byOffset))

return idx
}

// orderByOffset is a sort.Interface adapter that arranges
// a slice of entries by their offset.
type orderByOffset []*idxfile.Entry

func (o orderByOffset) Len() int { return len(o) }
func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] }

// Add adds a new Entry with the given values to the index.
func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
e := idxfile.Entry{
e := &idxfile.Entry{
Hash: h,
Offset: offset,
CRC32: crc32,
}
idx.add(&e)
idx.byHash[e.Hash] = e

// Find the right position in byOffset.
// Look for the first position whose offset is *greater* than e.Offset.
i := sort.Search(len(idx.byOffset), func(i int) bool {
return idx.byOffset[i].Offset > offset
})
if i == len(idx.byOffset) {
// Simple case: add it to the end.
idx.byOffset = append(idx.byOffset, e)
return
}
// Harder case: shift existing entries down by one to make room.
// Append a nil entry first so we can use existing capacity in case
// the index was carefully preallocated.
idx.byOffset = append(idx.byOffset, nil)
copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
idx.byOffset[i] = e
}

func (idx *Index) add(e *idxfile.Entry) {
func (idx *Index) addUnsorted(e *idxfile.Entry) {
idx.byHash[e.Hash] = e
idx.byOffset[e.Offset] = e
idx.byOffset = append(idx.byOffset, e)
}

// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
Expand All @@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
// is returned and a bool, which is true if it was found or false if it wasn't.
func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
e, ok := idx.byOffset[offset]
return e, ok
i := sort.Search(len(idx.byOffset), func(i int) bool {
return idx.byOffset[i].Offset >= offset
})
if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
return nil, false // not present
}
return idx.byOffset[i], true
}

// Size returns the number of entries in the index.
Expand Down
37 changes: 24 additions & 13 deletions plumbing/format/packfile/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package packfile
import (
"strconv"
"strings"
"testing"

"gopkg.in/src-d/go-git.v4/plumbing"

Expand All @@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
e, ok := idx.LookupOffset(uint64(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}

h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)

for o2 := 0; o2 < 10000; o2 += 100 {
Expand All @@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
e, ok := idx.LookupOffset(uint64(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
Expand All @@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) {
for o1 := 0; o1 < 10000; o1 += 100 {
for o2 := 0; o2 < 10000; o2 += 100 {
if o2 >= o1 {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, false)
c.Assert(e, IsNil)
} else {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}

h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)

for o2 := 0; o2 < 10000; o2 += 100 {
if o2 > o1 {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, false)
c.Assert(e, IsNil)
} else {
e, ok := idx.LookupHash(s.toHash(o2))
e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
c.Assert(e.Hash, Equals, s.toHash(o2))
c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
Expand All @@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) {

for o1 := 0; o1 < 1000; o1++ {
c.Assert(idx.Size(), Equals, o1)
h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
}
}
Expand All @@ -107,16 +108,26 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) {
func (s *IndexSuite) TestIdxFile(c *C) {
idx := NewIndex(0)
for o1 := 0; o1 < 1000; o1++ {
h1 := s.toHash(o1)
h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
}

idx2 := NewIndexFromIdxFile(idx.ToIdxFile())
c.Assert(idx, DeepEquals, idx2)
}

func (s *IndexSuite) toHash(i int) plumbing.Hash {
func toHash(i int) plumbing.Hash {
is := strconv.Itoa(i)
padding := strings.Repeat("a", 40-len(is))
return plumbing.NewHash(padding + is)
}

func BenchmarkIndexConstruction(b *testing.B) {
b.ReportAllocs()

idx := NewIndex(0)
for o := 0; o < 1e6*b.N; o += 100 {
h1 := toHash(o)
idx.Add(h1, uint64(o), 0)
}
}