Skip to content

Commit 1afef7e

Browse files
authored
format: index encoder and index decoder improvements (src-d#105)
1 parent 15fef29 commit 1afef7e

File tree

6 files changed

+362
-78
lines changed

6 files changed

+362
-78
lines changed

formats/index/decoder.go

Lines changed: 103 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package index
22

33
import (
44
"bytes"
5+
"crypto/sha1"
56
"errors"
7+
"hash"
68
"io"
79
"io/ioutil"
810
"strconv"
@@ -13,38 +15,42 @@ import (
1315
)
1416

1517
var (
16-
// IndexVersionSupported is the range of supported index versions
17-
IndexVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4}
18+
// DecodeVersionSupported is the range of supported index versions
19+
DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4}
1820

19-
// ErrUnsupportedVersion is returned by Decode when the idxindex file
20-
// version is not supported.
21-
ErrUnsupportedVersion = errors.New("Unsuported version")
2221
// ErrMalformedSignature is returned by Decode when the index header file is
2322
// malformed
24-
ErrMalformedSignature = errors.New("Malformed index signature file")
23+
ErrMalformedSignature = errors.New("malformed index signature file")
24+
// ErrInvalidChecksum is returned by Decode if the SHA1 hash missmatch with
25+
// the read content
26+
ErrInvalidChecksum = errors.New("invalid checksum")
2527

26-
indexSignature = []byte{'D', 'I', 'R', 'C'}
27-
treeExtSignature = []byte{'T', 'R', 'E', 'E'}
28-
resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'}
28+
errUnknownExtension = errors.New("unknown extension")
2929
)
3030

3131
const (
32-
EntryExtended = 0x4000
33-
EntryValid = 0x8000
34-
35-
nameMask = 0xfff
36-
intentToAddMask = 1 << 13
37-
skipWorkTreeMask = 1 << 14
32+
entryHeaderLength = 62
33+
entryExtended = 0x4000
34+
entryValid = 0x8000
35+
nameMask = 0xfff
36+
intentToAddMask = 1 << 13
37+
skipWorkTreeMask = 1 << 14
3838
)
3939

40+
// A Decoder reads and decodes idx files from an input stream.
4041
type Decoder struct {
4142
r io.Reader
43+
hash hash.Hash
4244
lastEntry *Entry
4345
}
4446

4547
// NewDecoder returns a new decoder that reads from r.
4648
func NewDecoder(r io.Reader) *Decoder {
47-
return &Decoder{r: r}
49+
h := sha1.New()
50+
return &Decoder{
51+
r: io.TeeReader(r, h),
52+
hash: h,
53+
}
4854
}
4955

5056
// Decode reads the whole index object from its input and stores it in the
@@ -56,20 +62,20 @@ func (d *Decoder) Decode(idx *Index) error {
5662
return err
5763
}
5864

59-
idx.EntryCount, err = binary.ReadUint32(d.r)
65+
entryCount, err := binary.ReadUint32(d.r)
6066
if err != nil {
6167
return err
6268
}
6369

64-
if err := d.readEntries(idx); err != nil {
70+
if err := d.readEntries(idx, int(entryCount)); err != nil {
6571
return err
6672
}
6773

6874
return d.readExtensions(idx)
6975
}
7076

71-
func (d *Decoder) readEntries(idx *Index) error {
72-
for i := 0; i < int(idx.EntryCount); i++ {
77+
func (d *Decoder) readEntries(idx *Index, count int) error {
78+
for i := 0; i < count; i++ {
7379
e, err := d.readEntry(idx)
7480
if err != nil {
7581
return err
@@ -86,31 +92,31 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
8692
e := &Entry{}
8793

8894
var msec, mnsec, sec, nsec uint32
95+
var flags uint16
8996

90-
flowSize := 62
9197
flow := []interface{}{
92-
&msec, &mnsec,
9398
&sec, &nsec,
99+
&msec, &mnsec,
94100
&e.Dev,
95101
&e.Inode,
96102
&e.Mode,
97103
&e.UID,
98104
&e.GID,
99105
&e.Size,
100106
&e.Hash,
101-
&e.Flags,
107+
&flags,
102108
}
103109

104110
if err := binary.Read(d.r, flow...); err != nil {
105111
return nil, err
106112
}
107113

108-
read := flowSize
109-
e.CreatedAt = time.Unix(int64(msec), int64(mnsec))
110-
e.ModifiedAt = time.Unix(int64(sec), int64(nsec))
111-
e.Stage = Stage(e.Flags>>12) & 0x3
114+
read := entryHeaderLength
115+
e.CreatedAt = time.Unix(int64(sec), int64(nsec))
116+
e.ModifiedAt = time.Unix(int64(msec), int64(mnsec))
117+
e.Stage = Stage(flags>>12) & 0x3
112118

113-
if e.Flags&EntryExtended != 0 {
119+
if flags&entryExtended != 0 {
114120
extended, err := binary.ReadUint16(d.r)
115121
if err != nil {
116122
return nil, err
@@ -121,20 +127,21 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
121127
e.SkipWorktree = extended&skipWorkTreeMask != 0
122128
}
123129

124-
if err := d.readEntryName(idx, e); err != nil {
130+
if err := d.readEntryName(idx, e, flags); err != nil {
125131
return nil, err
126132
}
127133

128134
return e, d.padEntry(idx, e, read)
129135
}
130136

131-
func (d *Decoder) readEntryName(idx *Index, e *Entry) error {
137+
func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error {
132138
var name string
133139
var err error
134140

135141
switch idx.Version {
136142
case 2, 3:
137-
name, err = d.doReadEntryName(e)
143+
len := flags & nameMask
144+
name, err = d.doReadEntryName(len)
138145
case 4:
139146
name, err = d.doReadEntryNameV4()
140147
default:
@@ -168,10 +175,8 @@ func (d *Decoder) doReadEntryNameV4() (string, error) {
168175
return base + string(name), nil
169176
}
170177

171-
func (d *Decoder) doReadEntryName(e *Entry) (string, error) {
172-
pLen := e.Flags & nameMask
173-
174-
name := make([]byte, int64(pLen))
178+
func (d *Decoder) doReadEntryName(len uint16) (string, error) {
179+
name := make([]byte, len)
175180
if err := binary.Read(d.r, &name); err != nil {
176181
return "", err
177182
}
@@ -195,50 +200,88 @@ func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error {
195200
return nil
196201
}
197202

203+
// TODO: support 'Split index' and 'Untracked cache' extensions, take in count
204+
// that they are not supported by jgit or libgit
198205
func (d *Decoder) readExtensions(idx *Index) error {
206+
var expected []byte
199207
var err error
208+
209+
var header [4]byte
200210
for {
201-
err = d.readExtension(idx)
211+
expected = d.hash.Sum(nil)
212+
213+
var n int
214+
if n, err = io.ReadFull(d.r, header[:]); err != nil {
215+
if n == 0 {
216+
err = io.EOF
217+
}
218+
219+
break
220+
}
221+
222+
err = d.readExtension(idx, header[:])
202223
if err != nil {
203224
break
204225
}
205226
}
206227

207-
if err == io.EOF {
208-
return nil
228+
if err != errUnknownExtension {
229+
return err
209230
}
210231

211-
return err
232+
return d.readChecksum(expected, header)
212233
}
213234

214-
func (d *Decoder) readExtension(idx *Index) error {
215-
var s = make([]byte, 4)
216-
if _, err := io.ReadFull(d.r, s); err != nil {
217-
return err
235+
func (d *Decoder) readExtension(idx *Index, header []byte) error {
236+
switch {
237+
case bytes.Equal(header, treeExtSignature):
238+
r, err := d.getExtensionReader()
239+
if err != nil {
240+
return err
241+
}
242+
243+
idx.Cache = &Tree{}
244+
d := &treeExtensionDecoder{r}
245+
if err := d.Decode(idx.Cache); err != nil {
246+
return err
247+
}
248+
case bytes.Equal(header, resolveUndoExtSignature):
249+
r, err := d.getExtensionReader()
250+
if err != nil {
251+
return err
252+
}
253+
254+
idx.ResolveUndo = &ResolveUndo{}
255+
d := &resolveUndoDecoder{r}
256+
if err := d.Decode(idx.ResolveUndo); err != nil {
257+
return err
258+
}
259+
default:
260+
return errUnknownExtension
218261
}
219262

263+
return nil
264+
}
265+
266+
func (d *Decoder) getExtensionReader() (io.Reader, error) {
220267
len, err := binary.ReadUint32(d.r)
221268
if err != nil {
222-
return err
269+
return nil, err
223270
}
224271

225-
switch {
226-
case bytes.Equal(s, treeExtSignature):
227-
t := &Tree{}
228-
td := &treeExtensionDecoder{&io.LimitedReader{R: d.r, N: int64(len)}}
229-
if err := td.Decode(t); err != nil {
230-
return err
231-
}
272+
return &io.LimitedReader{R: d.r, N: int64(len)}, nil
273+
}
232274

233-
idx.Cache = t
234-
case bytes.Equal(s, resolveUndoExtSignature):
235-
ru := &ResolveUndo{}
236-
rud := &resolveUndoDecoder{&io.LimitedReader{R: d.r, N: int64(len)}}
237-
if err := rud.Decode(ru); err != nil {
238-
return err
239-
}
275+
func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error {
276+
var h core.Hash
277+
copy(h[:4], alreadyRead[:])
278+
279+
if err := binary.Read(d.r, h[4:]); err != nil {
280+
return err
281+
}
240282

241-
idx.ResolveUndo = ru
283+
if bytes.Compare(h[:], expected) != 0 {
284+
return ErrInvalidChecksum
242285
}
243286

244287
return nil
@@ -259,7 +302,7 @@ func validateHeader(r io.Reader) (version uint32, err error) {
259302
return 0, err
260303
}
261304

262-
if version < IndexVersionSupported.Min || version > IndexVersionSupported.Max {
305+
if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max {
263306
return 0, ErrUnsupportedVersion
264307
}
265308

formats/index/decoder_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func (s *IdxfileSuite) TestDecode(c *C) {
2626
c.Assert(err, IsNil)
2727

2828
c.Assert(idx.Version, Equals, uint32(2))
29-
c.Assert(idx.EntryCount, Equals, uint32(9))
29+
c.Assert(idx.Entries, HasLen, 9)
3030
}
3131

3232
func (s *IdxfileSuite) TestDecodeEntries(c *C) {
@@ -97,7 +97,7 @@ func (s *IdxfileSuite) TestDecodeMergeConflict(c *C) {
9797
c.Assert(err, IsNil)
9898

9999
c.Assert(idx.Version, Equals, uint32(2))
100-
c.Assert(idx.EntryCount, Equals, uint32(13))
100+
c.Assert(idx.Entries, HasLen, 13)
101101

102102
expected := []struct {
103103
Stage Stage
@@ -136,7 +136,7 @@ func (s *IdxfileSuite) TestDecodeExtendedV3(c *C) {
136136
c.Assert(err, IsNil)
137137

138138
c.Assert(idx.Version, Equals, uint32(3))
139-
c.Assert(idx.EntryCount, Equals, uint32(11))
139+
c.Assert(idx.Entries, HasLen, 11)
140140

141141
c.Assert(idx.Entries[6].Name, Equals, "intent-to-add")
142142
c.Assert(idx.Entries[6].IntentToAdd, Equals, true)
@@ -153,7 +153,7 @@ func (s *IdxfileSuite) TestDecodeResolveUndo(c *C) {
153153
c.Assert(err, IsNil)
154154

155155
c.Assert(idx.Version, Equals, uint32(2))
156-
c.Assert(idx.EntryCount, Equals, uint32(8))
156+
c.Assert(idx.Entries, HasLen, 8)
157157

158158
ru := idx.ResolveUndo
159159
c.Assert(ru.Entries, HasLen, 2)
@@ -178,7 +178,7 @@ func (s *IdxfileSuite) TestDecodeV4(c *C) {
178178
c.Assert(err, IsNil)
179179

180180
c.Assert(idx.Version, Equals, uint32(4))
181-
c.Assert(idx.EntryCount, Equals, uint32(11))
181+
c.Assert(idx.Entries, HasLen, 11)
182182

183183
names := []string{
184184
".gitignore", "CHANGELOG", "LICENSE", "binary.jpg", "go/example.go",

formats/index/doc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Package idxfile implements a encoder/decoder of index format files
1+
// Package index implements a encoder/decoder of index format files
22
package index
33

44
/*

0 commit comments

Comments
 (0)