Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit ccf5bb1

Browse files
authored
difftree: merkletrie internal package with iterator (#133)
1 parent 0737406 commit ccf5bb1

File tree

9 files changed

+1004
-0
lines changed

9 files changed

+1004
-0
lines changed

difftree/internal/radixmerkle/doc.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package merkletrie
2+
3+
/*
4+
Package merkletrie gives support for n-ary trees that are at the same
5+
time Merkle trees and Radix trees, and provides an efficient tree
6+
comparison algorithm for them.
7+
8+
Git trees are Radix n-ary trees in virtue of the names of their
9+
tree entries. At the same time, git trees are Merkle trees thanks to
10+
their hashes.
11+
12+
When comparing git trees, the simple approach of alphabetically sorting
13+
their elements and comparing the resulting lists is not enough as it
14+
depends linearly on the number of files in the trees: When a directory
15+
has lots of files but none of them has been modified, this approach is
16+
very expensive. We can do better by prunning whole directories that
17+
have not change, by just by looking at their hashes. This package
18+
provides the tools to do exactly that.
19+
20+
This package defines Radix-Merkle trees as nodes that should have:
21+
- a hash: the Merkle part of the Radix-Merkle tree
22+
- a key: the Radix part of the Radix-Merkle tree
23+
24+
The Merkle hash condition is not enforced by this package though. This
25+
means that node hashes doesn't have to take into account the hashes of
26+
their children, which is good for testing purposes.
27+
28+
Nodes in the Radix-Merkle tree are abstracted by the Noder interface.
29+
The intended use is that git.Tree implements this interface.
30+
*/
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package merkletrie
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
)
7+
8+
const sep = "/"
9+
10+
// A frame represents siblings in a trie, along with the path to get to
11+
// them. For example the frame for the node with key `b` in this trie:
12+
//
13+
// a
14+
// / \
15+
// / \
16+
// / \
17+
// b c
18+
// /|\ / \
19+
// y z x d e
20+
// |
21+
// g
22+
//
23+
// would be:
24+
//
25+
// f := frame{
26+
// base: "a/b", // path to the siblings
27+
// stack: []Node{z, y, x} // in reverse alphabetical order
28+
// }
29+
type frame struct {
30+
base string // absolute key of their parents
31+
stack []Noder // siblings, sorted in reverse alphabetical order by key
32+
}
33+
34+
// newFrame returns a frame for the children of a node n.
35+
func newFrame(parentAbsoluteKey string, n Noder) *frame {
36+
return &frame{
37+
base: parentAbsoluteKey + sep + n.Key(),
38+
stack: n.Children(),
39+
}
40+
}
41+
42+
func (f *frame) String() string {
43+
var buf bytes.Buffer
44+
_, _ = buf.WriteString(fmt.Sprintf("base=%q, stack=[", f.base))
45+
46+
sep := ""
47+
for _, e := range f.stack {
48+
_, _ = buf.WriteString(sep)
49+
sep = ", "
50+
_, _ = buf.WriteString(fmt.Sprintf("%q", e.Key()))
51+
}
52+
53+
_ = buf.WriteByte(']')
54+
55+
return buf.String()
56+
}
57+
58+
func (f *frame) top() (Noder, bool) {
59+
if len(f.stack) == 0 {
60+
return nil, false
61+
}
62+
63+
top := len(f.stack) - 1
64+
65+
return f.stack[top], true
66+
}
67+
68+
func (f *frame) pop() (Noder, bool) {
69+
if len(f.stack) == 0 {
70+
return nil, false
71+
}
72+
73+
top := len(f.stack) - 1
74+
ret := f.stack[top]
75+
f.stack[top] = nil
76+
f.stack = f.stack[:top]
77+
78+
return ret, true
79+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package merkletrie
2+
3+
import . "gopkg.in/check.v1"
4+
5+
type FrameSuite struct{}
6+
7+
var _ = Suite(&FrameSuite{})
8+
9+
func (s *FrameSuite) TestNewFrameFromLeaf(c *C) {
10+
n := newNode(
11+
[]byte("hash"),
12+
"key",
13+
[]*node{},
14+
)
15+
16+
frame := newFrame("foo", n)
17+
18+
expectedString := `base="foo/key", stack=[]`
19+
c.Assert(frame.String(), Equals, expectedString)
20+
21+
obtainedTopNode, obtainedTopOK := frame.top()
22+
c.Assert(obtainedTopNode, IsNil)
23+
c.Assert(obtainedTopOK, Equals, false)
24+
25+
obtainedPopNode, obtainedPopOK := frame.top()
26+
c.Assert(obtainedPopNode, IsNil)
27+
c.Assert(obtainedPopOK, Equals, false)
28+
}
29+
30+
func (s *FrameSuite) TestNewFrameFromParent(c *C) {
31+
leaf0 := newNode([]byte("leaf0 hash"), "leaf0 key", []*node{})
32+
leaf1 := newNode([]byte("leaf1 hash"), "leaf1 key", []*node{})
33+
leaf2 := newNode([]byte("leaf2 hash"), "leaf2 key", []*node{})
34+
leaf3 := newNode([]byte("leaf3 hash"), "leaf3 key", []*node{})
35+
parent := newNode(
36+
[]byte("parent hash"),
37+
"parent key",
38+
[]*node{leaf3, leaf0, leaf2, leaf1}, // not alphabetically sorted
39+
)
40+
41+
frame := newFrame("foo", parent)
42+
43+
expectedString := `base="foo/parent key", stack=["leaf3 key", "leaf2 key", "leaf1 key", "leaf0 key"]`
44+
c.Assert(frame.String(), Equals, expectedString)
45+
46+
checkTopAndPop(c, frame, leaf0, true)
47+
checkTopAndPop(c, frame, leaf1, true)
48+
checkTopAndPop(c, frame, leaf2, true)
49+
checkTopAndPop(c, frame, leaf3, true)
50+
checkTopAndPop(c, frame, nil, false)
51+
}
52+
53+
func checkTopAndPop(c *C, f *frame, expectedNode *node, expectedOK bool) {
54+
n, ok := f.top()
55+
if expectedNode == nil {
56+
c.Assert(n, IsNil)
57+
} else {
58+
c.Assert(n, DeepEquals, expectedNode)
59+
}
60+
c.Assert(ok, Equals, expectedOK)
61+
62+
n, ok = f.pop()
63+
if expectedNode == nil {
64+
c.Assert(n, IsNil)
65+
} else {
66+
c.Assert(n, DeepEquals, expectedNode)
67+
}
68+
c.Assert(ok, Equals, expectedOK)
69+
}

difftree/internal/radixmerkle/iter.go

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
package merkletrie
2+
3+
// Iter is a radix tree iterator that will traverse the trie in
4+
// depth-first pre-order. Entries are traversed in (case-sensitive)
5+
// alphabetical order for each level.
6+
//
7+
// This is the kind of traversal you will expect when listing
8+
// ordinary files and directories recursively, for example:
9+
//
10+
// Trie Traversal order
11+
// ---- ---------------
12+
// .
13+
// / | \ a
14+
// / | \ b
15+
// b a z ===> b/a
16+
// / \ b/c
17+
// c a z
18+
//
19+
//
20+
// The Step method will return the next item, the Next method will do
21+
// the same but without descending deeper into the tree (i.e. skipping
22+
// the contents of "directories").
23+
//
24+
// The name of the type and its methods are based on the well known "next"
25+
// and "step" operations, quite common in debuggers, like gdb.
26+
type Iter struct {
27+
// tells if the iteration has started.
28+
hasStarted bool
29+
// Each level of the tree is represented as a frame, this stack
30+
// keeps track of the frames wrapping the current iterator position.
31+
// The iterator will "step" into a node by adding its frame to the
32+
// stack, or go to the next element at the same level by poping the
33+
// current frame.
34+
frameStack []*frame
35+
}
36+
37+
// NewIter returns a new iterator for the trie with its root at n.
38+
func NewIter(n Noder) *Iter {
39+
ret := &Iter{}
40+
ret.push(newFrame("", n))
41+
42+
return ret
43+
}
44+
45+
func (iter *Iter) top() (*frame, bool) {
46+
if len(iter.frameStack) == 0 {
47+
return nil, false
48+
}
49+
50+
top := len(iter.frameStack) - 1
51+
52+
return iter.frameStack[top], true
53+
}
54+
55+
func (iter *Iter) pop() (*frame, bool) {
56+
if len(iter.frameStack) == 0 {
57+
return nil, false
58+
}
59+
60+
top := len(iter.frameStack) - 1
61+
ret := iter.frameStack[top]
62+
iter.frameStack[top] = nil
63+
iter.frameStack = iter.frameStack[:top]
64+
65+
return ret, true
66+
}
67+
68+
func (iter *Iter) push(f *frame) {
69+
iter.frameStack = append(iter.frameStack, f)
70+
}
71+
72+
const (
73+
descend = true
74+
dontDescend = false
75+
)
76+
77+
// Next returns the next node without descending deeper into the tree
78+
// and true. If there are no more entries it returns nil and false.
79+
func (iter *Iter) Next() (Noder, bool) {
80+
return iter.advance(dontDescend)
81+
}
82+
83+
// Step returns the next node in the tree, descending deeper into it if
84+
// needed. If there are no more nodes in the tree, it returns nil and
85+
// false.
86+
func (iter *Iter) Step() (Noder, bool) {
87+
return iter.advance(descend)
88+
}
89+
90+
// advances the iterator in whatever direction you want: descend or
91+
// dontDescend.
92+
func (iter *Iter) advance(mustDescend bool) (Noder, bool) {
93+
node, ok := iter.current()
94+
if !ok {
95+
return nil, false
96+
}
97+
98+
// The first time we just return the current node.
99+
if !iter.hasStarted {
100+
iter.hasStarted = true
101+
return node, ok
102+
}
103+
// following advances will involve dropping already seen nodes
104+
// or getting into their children
105+
106+
ignoreChildren := node.NumChildren() == 0 || !mustDescend
107+
if ignoreChildren {
108+
// if we must ignore the current node children, just drop
109+
// it and find the next one in the existing frames.
110+
_ = iter.drop()
111+
node, ok = iter.current()
112+
return node, ok
113+
}
114+
115+
// if we must descend into the current's node children, drop the
116+
// parent and add a new frame with its children.
117+
_ = iter.drop()
118+
iter.push(newFrame(node.Key(), node))
119+
node, _ = iter.current()
120+
121+
return node, true
122+
}
123+
124+
// returns the current frame and the current node (i.e. the ones at the
125+
// top of their respective stacks.
126+
func (iter *Iter) current() (Noder, bool) {
127+
f, ok := iter.top()
128+
if !ok {
129+
return nil, false
130+
}
131+
132+
n, ok := f.top()
133+
if !ok {
134+
return nil, false
135+
}
136+
137+
return n, true
138+
}
139+
140+
// removes the current node and all the frames that become empty as a
141+
// consecuence of this action. It returns true if something was dropped,
142+
// and false if there were no more nodes in the iterator.
143+
func (iter *Iter) drop() bool {
144+
frame, ok := iter.top()
145+
if !ok {
146+
return false
147+
}
148+
149+
_, ok = frame.pop()
150+
if !ok {
151+
return false
152+
}
153+
154+
for { // remove empty frames
155+
if len(frame.stack) != 0 {
156+
break
157+
}
158+
159+
_, _ = iter.pop()
160+
frame, ok = iter.top()
161+
if !ok {
162+
break
163+
}
164+
}
165+
166+
return true
167+
}

0 commit comments

Comments
 (0)