Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions difftree/internal/radixmerkle/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package merkletrie

/*
Package merkletrie gives support for n-ary trees that are at the same
time Merkle trees and Radix trees, and provides an efficient tree
comparison algorithm for them.

Git trees are Radix n-ary trees in virtue of the names of their
tree entries. At the same time, git trees are Merkle trees thanks to
their hashes.

When comparing git trees, the simple approach of alphabetically sorting
their elements and comparing the resulting lists is not enough as it
depends linearly on the number of files in the trees: When a directory
has lots of files but none of them has been modified, this approach is
very expensive. We can do better by prunning whole directories that
have not change, by just by looking at their hashes. This package
provides the tools to do exactly that.

This package defines Radix-Merkle trees as nodes that should have:
- a hash: the Merkle part of the Radix-Merkle tree
- a key: the Radix part of the Radix-Merkle tree

The Merkle hash condition is not enforced by this package though. This
means that node hashes doesn't have to take into account the hashes of
their children, which is good for testing purposes.

Nodes in the Radix-Merkle tree are abstracted by the Noder interface.
The intended use is that git.Tree implements this interface.
*/
79 changes: 79 additions & 0 deletions difftree/internal/radixmerkle/frame.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package merkletrie

import (
"bytes"
"fmt"
)

const sep = "/"

// A frame represents siblings in a trie, along with the path to get to
// them. For example the frame for the node with key `b` in this trie:
//
// a
// / \
// / \
// / \
// b c
// /|\ / \
// y z x d e
// |
// g
//
// would be:
//
// f := frame{
// base: "a/b", // path to the siblings
// stack: []Node{z, y, x} // in reverse alphabetical order
// }
type frame struct {
base string // absolute key of their parents
stack []Noder // siblings, sorted in reverse alphabetical order by key
}

// newFrame returns a frame for the children of a node n.
func newFrame(parentAbsoluteKey string, n Noder) *frame {
return &frame{
base: parentAbsoluteKey + sep + n.Key(),
stack: n.Children(),
}
}

func (f *frame) String() string {
var buf bytes.Buffer
_, _ = buf.WriteString(fmt.Sprintf("base=%q, stack=[", f.base))

sep := ""
for _, e := range f.stack {
_, _ = buf.WriteString(sep)
sep = ", "
_, _ = buf.WriteString(fmt.Sprintf("%q", e.Key()))
}

_ = buf.WriteByte(']')

return buf.String()
}

func (f *frame) top() (Noder, bool) {
if len(f.stack) == 0 {
return nil, false
}

top := len(f.stack) - 1

return f.stack[top], true
}

func (f *frame) pop() (Noder, bool) {
if len(f.stack) == 0 {
return nil, false
}

top := len(f.stack) - 1
ret := f.stack[top]
f.stack[top] = nil
f.stack = f.stack[:top]

return ret, true
}
69 changes: 69 additions & 0 deletions difftree/internal/radixmerkle/frame_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package merkletrie

import . "gopkg.in/check.v1"

type FrameSuite struct{}

var _ = Suite(&FrameSuite{})

func (s *FrameSuite) TestNewFrameFromLeaf(c *C) {
n := newNode(
[]byte("hash"),
"key",
[]*node{},
)

frame := newFrame("foo", n)

expectedString := `base="foo/key", stack=[]`
c.Assert(frame.String(), Equals, expectedString)

obtainedTopNode, obtainedTopOK := frame.top()
c.Assert(obtainedTopNode, IsNil)
c.Assert(obtainedTopOK, Equals, false)

obtainedPopNode, obtainedPopOK := frame.top()
c.Assert(obtainedPopNode, IsNil)
c.Assert(obtainedPopOK, Equals, false)
}

func (s *FrameSuite) TestNewFrameFromParent(c *C) {
leaf0 := newNode([]byte("leaf0 hash"), "leaf0 key", []*node{})
leaf1 := newNode([]byte("leaf1 hash"), "leaf1 key", []*node{})
leaf2 := newNode([]byte("leaf2 hash"), "leaf2 key", []*node{})
leaf3 := newNode([]byte("leaf3 hash"), "leaf3 key", []*node{})
parent := newNode(
[]byte("parent hash"),
"parent key",
[]*node{leaf3, leaf0, leaf2, leaf1}, // not alphabetically sorted
)

frame := newFrame("foo", parent)

expectedString := `base="foo/parent key", stack=["leaf3 key", "leaf2 key", "leaf1 key", "leaf0 key"]`
c.Assert(frame.String(), Equals, expectedString)

checkTopAndPop(c, frame, leaf0, true)
checkTopAndPop(c, frame, leaf1, true)
checkTopAndPop(c, frame, leaf2, true)
checkTopAndPop(c, frame, leaf3, true)
checkTopAndPop(c, frame, nil, false)
}

func checkTopAndPop(c *C, f *frame, expectedNode *node, expectedOK bool) {
n, ok := f.top()
if expectedNode == nil {
c.Assert(n, IsNil)
} else {
c.Assert(n, DeepEquals, expectedNode)
}
c.Assert(ok, Equals, expectedOK)

n, ok = f.pop()
if expectedNode == nil {
c.Assert(n, IsNil)
} else {
c.Assert(n, DeepEquals, expectedNode)
}
c.Assert(ok, Equals, expectedOK)
}
167 changes: 167 additions & 0 deletions difftree/internal/radixmerkle/iter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package merkletrie

// Iter is a radix tree iterator that will traverse the trie in
// depth-first pre-order. Entries are traversed in (case-sensitive)
// alphabetical order for each level.
//
// This is the kind of traversal you will expect when listing
// ordinary files and directories recursively, for example:
//
// Trie Traversal order
// ---- ---------------
// .
// / | \ a
// / | \ b
// b a z ===> b/a
// / \ b/c
// c a z
//
//
// The Step method will return the next item, the Next method will do
// the same but without descending deeper into the tree (i.e. skipping
// the contents of "directories").
//
// The name of the type and its methods are based on the well known "next"
// and "step" operations, quite common in debuggers, like gdb.
type Iter struct {
// tells if the iteration has started.
hasStarted bool
// Each level of the tree is represented as a frame, this stack
// keeps track of the frames wrapping the current iterator position.
// The iterator will "step" into a node by adding its frame to the
// stack, or go to the next element at the same level by poping the
// current frame.
frameStack []*frame
}

// NewIter returns a new iterator for the trie with its root at n.
func NewIter(n Noder) *Iter {
ret := &Iter{}
ret.push(newFrame("", n))

return ret
}

func (iter *Iter) top() (*frame, bool) {
if len(iter.frameStack) == 0 {
return nil, false
}

top := len(iter.frameStack) - 1

return iter.frameStack[top], true
}

func (iter *Iter) pop() (*frame, bool) {
if len(iter.frameStack) == 0 {
return nil, false
}

top := len(iter.frameStack) - 1
ret := iter.frameStack[top]
iter.frameStack[top] = nil
iter.frameStack = iter.frameStack[:top]

return ret, true
}

func (iter *Iter) push(f *frame) {
iter.frameStack = append(iter.frameStack, f)
}

const (
descend = true
dontDescend = false
)

// Next returns the next node without descending deeper into the tree
// and true. If there are no more entries it returns nil and false.
func (iter *Iter) Next() (Noder, bool) {
return iter.advance(dontDescend)
}

// Step returns the next node in the tree, descending deeper into it if
// needed. If there are no more nodes in the tree, it returns nil and
// false.
func (iter *Iter) Step() (Noder, bool) {
return iter.advance(descend)
}

// advances the iterator in whatever direction you want: descend or
// dontDescend.
func (iter *Iter) advance(mustDescend bool) (Noder, bool) {
node, ok := iter.current()
if !ok {
return nil, false
}

// The first time we just return the current node.
if !iter.hasStarted {
iter.hasStarted = true
return node, ok
}
// following advances will involve dropping already seen nodes
// or getting into their children

ignoreChildren := node.NumChildren() == 0 || !mustDescend
if ignoreChildren {
// if we must ignore the current node children, just drop
// it and find the next one in the existing frames.
_ = iter.drop()
node, ok = iter.current()
return node, ok
}

// if we must descend into the current's node children, drop the
// parent and add a new frame with its children.
_ = iter.drop()
iter.push(newFrame(node.Key(), node))
node, _ = iter.current()

return node, true
}

// returns the current frame and the current node (i.e. the ones at the
// top of their respective stacks.
func (iter *Iter) current() (Noder, bool) {
f, ok := iter.top()
if !ok {
return nil, false
}

n, ok := f.top()
if !ok {
return nil, false
}

return n, true
}

// removes the current node and all the frames that become empty as a
// consecuence of this action. It returns true if something was dropped,
// and false if there were no more nodes in the iterator.
func (iter *Iter) drop() bool {
frame, ok := iter.top()
if !ok {
return false
}

_, ok = frame.pop()
if !ok {
return false
}

for { // remove empty frames
if len(frame.stack) != 0 {
break
}

_, _ = iter.pop()
frame, ok = iter.top()
if !ok {
break
}
}

return true
}
Loading