Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Improve packfile reading performance #906

Merged
merged 37 commits into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
009f106
plumbing/format/idxfile: add new Index and MemoryIndex
erizocosmico Jul 19, 2018
a8ff3e5
Merge pull request #896 from erizocosmico/feature/new-index-decoder
erizocosmico Jul 26, 2018
da5677f
plumbing/packfile: add new packfile parser
jfontan Jul 19, 2018
ce91d71
plumbing/packfile: disable lookup by offset
jfontan Jul 19, 2018
355cfc3
plumbing: idxfile, add idxfile.Writer with Observer interface
jfontan Jul 19, 2018
4e3765a
plumbing/idxfile: use Entry to hold object data
jfontan Jul 20, 2018
65e8359
plumbing/idxfile: support offset64 generating indexes
jfontan Jul 20, 2018
a716126
plumbing/packfile: preallocate memory in PatchDelta
jfontan Jul 24, 2018
7418b41
plumbing/idxfile: fix bug searching in MemoryIndex
jfontan Jul 26, 2018
4ddd678
plumbing/idxfile: add offset/hash mapping to index
jfontan Jul 26, 2018
74f56f3
plumbing/idxfile: index is created only once and retrieved with Index
jfontan Jul 26, 2018
79f2494
plumbing, storage: integrate new index
jfontan Jul 26, 2018
a08061a
Merge pull request #898 from jfontan/feature/new-packfile-parser
jfontan Jul 26, 2018
ffdfb7d
plumbing: packfile, new Packfile representation
erizocosmico Jul 20, 2018
bc565c1
plumbing, packfile: delete index_test as is no longer used
jfontan Jul 26, 2018
4b366ac
plumbing: fix two errors in idxfile and packfile decoder
jfontan Jul 26, 2018
3657a32
storage/filesystem: add back IndexStorage
jfontan Jul 27, 2018
b713101
Merge pull request #902 from jfontan/feature/new-packfile-parser
jfontan Jul 27, 2018
ccd0fa0
plumbing: packfile, lazy object reads with DiskObjects
erizocosmico Jul 27, 2018
0ade8fb
Merge pull request #899 from erizocosmico/feature/new-packfile
erizocosmico Jul 27, 2018
823abfe
plumbing/idxfile: test FindHash and writer with 64 bit offsets
jfontan Jul 27, 2018
6f8f2ed
storage/filesystem: remove duplicated IndexStorage
jfontan Jul 27, 2018
b4cd089
plumbing/packfile: add index generation to decoder
jfontan Jul 27, 2018
d314e86
Merge pull request #904 from jfontan/feature/new-packfile-parser
jfontan Jul 27, 2018
6f7fc05
plumbing: packfile, fix package tests
erizocosmico Jul 30, 2018
b173cc0
Merge pull request #907 from erizocosmico/feature/fix-tests
erizocosmico Aug 1, 2018
6a24b4c
*: use parser to populate non writable storages and bug fixes
erizocosmico Aug 7, 2018
5889a3b
plumbing: packfile, allow non-seekable sources on Parser
erizocosmico Aug 8, 2018
b3d995f
plumbing: packfile, add Parse benchmark
erizocosmico Aug 9, 2018
71a3c91
plumbing: packfile, read object content only once
erizocosmico Aug 9, 2018
34cc506
storage: filesystem, benchmark PackfileIter
erizocosmico Aug 9, 2018
65dc4f9
plumbing: packfile, rename DiskObject to FSObject
erizocosmico Aug 9, 2018
038cf23
storage: filesystem, close Packfile after iterating objects
erizocosmico Aug 9, 2018
d93b386
storage: filesystem, add PackfileIter benchmark reading object content
erizocosmico Aug 9, 2018
56c5e91
plumbing: packfile, open and close packfile on FSObject reads
erizocosmico Aug 9, 2018
b944bc4
git: add benchmark for iterating repository objects
erizocosmico Aug 10, 2018
8d75d23
plumbing: idxfile, Crc32 to CRC32 and return ok from findHashIndex
erizocosmico Aug 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,7 @@ func (s *BaseSuite) NewRepositoryFromPackfile(f *fixtures.Fixture) *Repository {
p := f.Packfile()
defer p.Close()

n := packfile.NewScanner(p)
d, err := packfile.NewDecoder(n, storer)
if err != nil {
panic(err)
}

_, err = d.Decode()
if err != nil {
if err := packfile.UpdateObjectStorage(storer, p); err != nil {
panic(err)
}

Expand Down
109 changes: 64 additions & 45 deletions plumbing/format/idxfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ var (
ErrMalformedIdxFile = errors.New("Malformed IDX file")
)

const (
fanout = 256
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the previous fanout was 255, was an error?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

objectIDLength = 20
)

// Decoder reads and decodes idx files from an input stream.
type Decoder struct {
*bufio.Reader
Expand All @@ -27,13 +32,13 @@ func NewDecoder(r io.Reader) *Decoder {
return &Decoder{bufio.NewReader(r)}
}

// Decode reads from the stream and decode the content into the Idxfile struct.
func (d *Decoder) Decode(idx *Idxfile) error {
// Decode reads from the stream and decode the content into the MemoryIndex struct.
func (d *Decoder) Decode(idx *MemoryIndex) error {
if err := validateHeader(d); err != nil {
return err
}

flow := []func(*Idxfile, io.Reader) error{
flow := []func(*MemoryIndex, io.Reader) error{
readVersion,
readFanout,
readObjectNames,
Expand All @@ -48,10 +53,6 @@ func (d *Decoder) Decode(idx *Idxfile) error {
}
}

if !idx.isValid() {
return ErrMalformedIdxFile
}

return nil
}

Expand All @@ -68,7 +69,7 @@ func validateHeader(r io.Reader) error {
return nil
}

func readVersion(idx *Idxfile, r io.Reader) error {
func readVersion(idx *MemoryIndex, r io.Reader) error {
v, err := binary.ReadUint32(r)
if err != nil {
return err
Expand All @@ -82,74 +83,92 @@ func readVersion(idx *Idxfile, r io.Reader) error {
return nil
}

func readFanout(idx *Idxfile, r io.Reader) error {
var err error
for i := 0; i < 255; i++ {
idx.Fanout[i], err = binary.ReadUint32(r)
func readFanout(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
n, err := binary.ReadUint32(r)
if err != nil {
return err
}

idx.Fanout[k] = n
idx.FanoutMapping[k] = noMapping
}

idx.ObjectCount, err = binary.ReadUint32(r)
return err
return nil
}

func readObjectNames(idx *Idxfile, r io.Reader) error {
c := int(idx.ObjectCount)
new := make([]Entry, c)
for i := 0; i < c; i++ {
e := &new[i]
if _, err := io.ReadFull(r, e.Hash[:]); err != nil {
func readObjectNames(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
var buckets uint32
if k == 0 {
buckets = idx.Fanout[k]
} else {
buckets = idx.Fanout[k] - idx.Fanout[k-1]
}

if buckets == 0 {
continue
}

if buckets < 0 {
return ErrMalformedIdxFile
}

idx.FanoutMapping[k] = len(idx.Names)

nameLen := int(buckets * objectIDLength)
bin := make([]byte, nameLen)
if _, err := io.ReadFull(r, bin); err != nil {
return err
}

idx.Entries = append(idx.Entries, e)
idx.Names = append(idx.Names, bin)
idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4))
idx.CRC32 = append(idx.CRC32, make([]byte, buckets*4))
}

return nil
}

func readCRC32(idx *Idxfile, r io.Reader) error {
c := int(idx.ObjectCount)
for i := 0; i < c; i++ {
if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil {
return err
func readCRC32(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.CRC32[pos]); err != nil {
return err
}
}
}

return nil
}

func readOffsets(idx *Idxfile, r io.Reader) error {
c := int(idx.ObjectCount)

for i := 0; i < c; i++ {
o, err := binary.ReadUint32(r)
if err != nil {
return err
func readOffsets(idx *MemoryIndex, r io.Reader) error {
var o64cnt int
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil {
return err
}

for p := 0; p < len(idx.Offset32[pos]); p += 4 {
if idx.Offset32[pos][p]&(byte(1)<<7) > 0 {
o64cnt++
}
}
}

idx.Entries[i].Offset = uint64(o)
}

for i := 0; i < c; i++ {
if idx.Entries[i].Offset <= offsetLimit {
continue
}

o, err := binary.ReadUint64(r)
if err != nil {
if o64cnt > 0 {
idx.Offset64 = make([]byte, o64cnt*8)
if _, err := io.ReadFull(r, idx.Offset64); err != nil {
return err
}

idx.Entries[i].Offset = o
}

return nil
}

func readChecksums(idx *Idxfile, r io.Reader) error {
func readChecksums(idx *MemoryIndex, r io.Reader) error {
if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil {
return err
}
Expand Down
106 changes: 55 additions & 51 deletions plumbing/format/idxfile/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"bytes"
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"testing"

"gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
"gopkg.in/src-d/go-git.v4/storage/memory"

. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
Expand All @@ -26,51 +27,34 @@ func (s *IdxfileSuite) TestDecode(c *C) {
f := fixtures.Basic().One()

d := NewDecoder(f.Idx())
idx := &Idxfile{}
idx := new(MemoryIndex)
err := d.Decode(idx)
c.Assert(err, IsNil)

c.Assert(idx.Entries, HasLen, 31)
c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea")
c.Assert(idx.Entries[0].Offset, Equals, uint64(615))
c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190))
count, _ := idx.Count()
c.Assert(count, Equals, int64(31))

c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
}

func (s *IdxfileSuite) TestDecodeCRCs(c *C) {
f := fixtures.Basic().ByTag("ofs-delta").One()

scanner := packfile.NewScanner(f.Packfile())
storage := memory.NewStorage()

pd, err := packfile.NewDecoder(scanner, storage)
hash := plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea")
ok, err := idx.Contains(hash)
c.Assert(err, IsNil)
_, err = pd.Decode()
c.Assert(err, IsNil)

i := pd.Index().ToIdxFile()
i.Version = VersionSupported
c.Assert(ok, Equals, true)

buf := bytes.NewBuffer(nil)
e := NewEncoder(buf)
_, err = e.Encode(i)
offset, err := idx.FindOffset(hash)
c.Assert(err, IsNil)
c.Assert(offset, Equals, int64(615))

idx := &Idxfile{}

d := NewDecoder(buf)
err = d.Decode(idx)
crc32, err := idx.FindCRC32(hash)
c.Assert(err, IsNil)
c.Assert(crc32, Equals, uint32(3645019190))

c.Assert(idx.Entries, DeepEquals, i.Entries)
c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
}

func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
f := bytes.NewBufferString(fixtureLarge4GB)

idx := &Idxfile{}
idx := new(MemoryIndex)

d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
err := d.Decode(idx)
Expand All @@ -88,29 +72,22 @@ func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
"35858be9c6f5914cbe6768489c41eb6809a2bceb": 5924278919,
}

for _, e := range idx.Entries {
c.Assert(expected[e.Hash.String()], Equals, e.Offset)
}
}

func (s *IdxfileSuite) TestDecode64bitsOffsetsIdempotent(c *C) {
f := bytes.NewBufferString(fixtureLarge4GB)

expected := &Idxfile{}

d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
err := d.Decode(expected)
iter, err := idx.Entries()
c.Assert(err, IsNil)

buf := bytes.NewBuffer(nil)
_, err = NewEncoder(buf).Encode(expected)
c.Assert(err, IsNil)
var entries int
for {
e, err := iter.Next()
if err == io.EOF {
break
}
c.Assert(err, IsNil)
entries++

idx := &Idxfile{}
err = NewDecoder(buf).Decode(idx)
c.Assert(err, IsNil)
c.Assert(expected[e.Hash.String()], Equals, e.Offset)
}

c.Assert(idx.Entries, DeepEquals, expected.Entries)
c.Assert(entries, Equals, len(expected))
}

const fixtureLarge4GB = `/3RPYwAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEA
Expand Down Expand Up @@ -139,3 +116,30 @@ AAAAAAAMgAAAAQAAAI6AAAACgAAAA4AAAASAAAAFAAAAAV9Qam8AAAABYR1ShwAAAACdxfYxAAAA
ANz1Di4AAAABPUnxJAAAAADNxzlGr6vCJpIFz4XaG/fi/f9C9zgQ8ptKSQpfQ1NMJBGTDTxxYGGp
ch2xUA==
`

func BenchmarkDecode(b *testing.B) {
if err := fixtures.Init(); err != nil {
b.Errorf("unexpected error initializing fixtures: %s", err)
}

f := fixtures.Basic().One()
fixture, err := ioutil.ReadAll(f.Idx())
if err != nil {
b.Errorf("unexpected error reading idx file: %s", err)
}

defer func() {
if err := fixtures.Clean(); err != nil {
b.Errorf("unexpected error cleaning fixtures: %s", err)
}
}()

for i := 0; i < b.N; i++ {
f := bytes.NewBuffer(fixture)
idx := new(MemoryIndex)
d := NewDecoder(f)
if err := d.Decode(idx); err != nil {
b.Errorf("unexpected error decoding: %s", err)
}
}
}
Loading