Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

plumbing/format/idxfile: add new Index and MemoryIndex #896

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 64 additions & 45 deletions plumbing/format/idxfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ var (
ErrMalformedIdxFile = errors.New("Malformed IDX file")
)

const (
fanout = 256
objectIDLength = 20
)

// Decoder reads and decodes idx files from an input stream.
type Decoder struct {
*bufio.Reader
Expand All @@ -27,13 +32,13 @@ func NewDecoder(r io.Reader) *Decoder {
return &Decoder{bufio.NewReader(r)}
}

// Decode reads from the stream and decode the content into the Idxfile struct.
func (d *Decoder) Decode(idx *Idxfile) error {
// Decode reads from the stream and decode the content into the MemoryIndex struct.
func (d *Decoder) Decode(idx *MemoryIndex) error {
if err := validateHeader(d); err != nil {
return err
}

flow := []func(*Idxfile, io.Reader) error{
flow := []func(*MemoryIndex, io.Reader) error{
readVersion,
readFanout,
readObjectNames,
Expand All @@ -48,10 +53,6 @@ func (d *Decoder) Decode(idx *Idxfile) error {
}
}

if !idx.isValid() {
return ErrMalformedIdxFile
}

return nil
}

Expand All @@ -68,7 +69,7 @@ func validateHeader(r io.Reader) error {
return nil
}

func readVersion(idx *Idxfile, r io.Reader) error {
func readVersion(idx *MemoryIndex, r io.Reader) error {
v, err := binary.ReadUint32(r)
if err != nil {
return err
Expand All @@ -82,74 +83,92 @@ func readVersion(idx *Idxfile, r io.Reader) error {
return nil
}

func readFanout(idx *Idxfile, r io.Reader) error {
var err error
for i := 0; i < 255; i++ {
idx.Fanout[i], err = binary.ReadUint32(r)
func readFanout(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
n, err := binary.ReadUint32(r)
if err != nil {
return err
}

idx.Fanout[k] = n
idx.FanoutMapping[k] = noMapping
}

idx.ObjectCount, err = binary.ReadUint32(r)
return err
return nil
}

func readObjectNames(idx *Idxfile, r io.Reader) error {
c := int(idx.ObjectCount)
new := make([]Entry, c)
for i := 0; i < c; i++ {
e := &new[i]
if _, err := io.ReadFull(r, e.Hash[:]); err != nil {
func readObjectNames(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
var buckets uint32
if k == 0 {
buckets = idx.Fanout[k]
} else {
buckets = idx.Fanout[k] - idx.Fanout[k-1]
}

if buckets == 0 {
continue
}

if buckets < 0 {
return ErrMalformedIdxFile
}

idx.FanoutMapping[k] = len(idx.Names)

nameLen := int(buckets * objectIDLength)
bin := make([]byte, nameLen)
if _, err := io.ReadFull(r, bin); err != nil {
return err
}

idx.Entries = append(idx.Entries, e)
idx.Names = append(idx.Names, bin)
idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4))
idx.Crc32 = append(idx.Crc32, make([]byte, buckets*4))
}

return nil
}

func readCRC32(idx *Idxfile, r io.Reader) error {
c := int(idx.ObjectCount)
for i := 0; i < c; i++ {
if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil {
return err
func readCRC32(idx *MemoryIndex, r io.Reader) error {
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.Crc32[pos]); err != nil {
return err
}
}
}

return nil
}

func readOffsets(idx *Idxfile, r io.Reader) error {
c := int(idx.ObjectCount)

for i := 0; i < c; i++ {
o, err := binary.ReadUint32(r)
if err != nil {
return err
func readOffsets(idx *MemoryIndex, r io.Reader) error {
var o64cnt int
for k := 0; k < fanout; k++ {
if pos := idx.FanoutMapping[k]; pos != noMapping {
if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil {
return err
}

for p := 0; p < len(idx.Offset32[pos]); p += 4 {
if idx.Offset32[pos][p]&(byte(1)<<7) > 0 {
o64cnt++
}
}
}

idx.Entries[i].Offset = uint64(o)
}

for i := 0; i < c; i++ {
if idx.Entries[i].Offset <= offsetLimit {
continue
}

o, err := binary.ReadUint64(r)
if err != nil {
if o64cnt > 0 {
idx.Offset64 = make([]byte, o64cnt*8)
if _, err := io.ReadFull(r, idx.Offset64); err != nil {
return err
}

idx.Entries[i].Offset = o
}

return nil
}

func readChecksums(idx *Idxfile, r io.Reader) error {
func readChecksums(idx *MemoryIndex, r io.Reader) error {
if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil {
return err
}
Expand Down
106 changes: 55 additions & 51 deletions plumbing/format/idxfile/decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"bytes"
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"testing"

"gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
"gopkg.in/src-d/go-git.v4/storage/memory"

. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
Expand All @@ -26,51 +27,34 @@ func (s *IdxfileSuite) TestDecode(c *C) {
f := fixtures.Basic().One()

d := NewDecoder(f.Idx())
idx := &Idxfile{}
idx := new(MemoryIndex)
err := d.Decode(idx)
c.Assert(err, IsNil)

c.Assert(idx.Entries, HasLen, 31)
c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea")
c.Assert(idx.Entries[0].Offset, Equals, uint64(615))
c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190))
count, _ := idx.Count()
c.Assert(count, Equals, int64(31))

c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
}

func (s *IdxfileSuite) TestDecodeCRCs(c *C) {
f := fixtures.Basic().ByTag("ofs-delta").One()

scanner := packfile.NewScanner(f.Packfile())
storage := memory.NewStorage()

pd, err := packfile.NewDecoder(scanner, storage)
hash := plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea")
ok, err := idx.Contains(hash)
c.Assert(err, IsNil)
_, err = pd.Decode()
c.Assert(err, IsNil)

i := pd.Index().ToIdxFile()
i.Version = VersionSupported
c.Assert(ok, Equals, true)

buf := bytes.NewBuffer(nil)
e := NewEncoder(buf)
_, err = e.Encode(i)
offset, err := idx.FindOffset(hash)
c.Assert(err, IsNil)
c.Assert(offset, Equals, int64(615))

idx := &Idxfile{}

d := NewDecoder(buf)
err = d.Decode(idx)
crc32, err := idx.FindCRC32(hash)
c.Assert(err, IsNil)
c.Assert(crc32, Equals, uint32(3645019190))

c.Assert(idx.Entries, DeepEquals, i.Entries)
c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
}

func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
f := bytes.NewBufferString(fixtureLarge4GB)

idx := &Idxfile{}
idx := new(MemoryIndex)

d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
err := d.Decode(idx)
Expand All @@ -88,29 +72,22 @@ func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
"35858be9c6f5914cbe6768489c41eb6809a2bceb": 5924278919,
}

for _, e := range idx.Entries {
c.Assert(expected[e.Hash.String()], Equals, e.Offset)
}
}

func (s *IdxfileSuite) TestDecode64bitsOffsetsIdempotent(c *C) {
f := bytes.NewBufferString(fixtureLarge4GB)

expected := &Idxfile{}

d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
err := d.Decode(expected)
iter, err := idx.Entries()
c.Assert(err, IsNil)

buf := bytes.NewBuffer(nil)
_, err = NewEncoder(buf).Encode(expected)
c.Assert(err, IsNil)
var entries int
for {
e, err := iter.Next()
if err == io.EOF {
break
}
c.Assert(err, IsNil)
entries++

idx := &Idxfile{}
err = NewDecoder(buf).Decode(idx)
c.Assert(err, IsNil)
c.Assert(expected[e.Hash.String()], Equals, e.Offset)
}

c.Assert(idx.Entries, DeepEquals, expected.Entries)
c.Assert(entries, Equals, len(expected))
}

const fixtureLarge4GB = `/3RPYwAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEA
Expand Down Expand Up @@ -139,3 +116,30 @@ AAAAAAAMgAAAAQAAAI6AAAACgAAAA4AAAASAAAAFAAAAAV9Qam8AAAABYR1ShwAAAACdxfYxAAAA
ANz1Di4AAAABPUnxJAAAAADNxzlGr6vCJpIFz4XaG/fi/f9C9zgQ8ptKSQpfQ1NMJBGTDTxxYGGp
ch2xUA==
`

func BenchmarkDecode(b *testing.B) {
if err := fixtures.Init(); err != nil {
b.Errorf("unexpected error initializing fixtures: %s", err)
}

f := fixtures.Basic().One()
fixture, err := ioutil.ReadAll(f.Idx())
if err != nil {
b.Errorf("unexpected error reading idx file: %s", err)
}

defer func() {
if err := fixtures.Clean(); err != nil {
b.Errorf("unexpected error cleaning fixtures: %s", err)
}
}()

for i := 0; i < b.N; i++ {
f := bytes.NewBuffer(fixture)
idx := new(MemoryIndex)
d := NewDecoder(f)
if err := d.Decode(idx); err != nil {
b.Errorf("unexpected error decoding: %s", err)
}
}
}
Loading