From ec65d90feaf3172a8bd1bf51bb85e7bdaaf28a54 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 19:11:43 +0200 Subject: [PATCH 01/17] plumbing: object, add APIs for traversing over commit graphs Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 310 +++++++++++++++++++++ plumbing/object/commitnode_test.go | 81 ++++++ plumbing/object/commitnode_walker_ctime.go | 108 +++++++ 3 files changed, 499 insertions(+) create mode 100644 plumbing/object/commitnode.go create mode 100644 plumbing/object/commitnode_test.go create mode 100644 plumbing/object/commitnode_walker_ctime.go diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go new file mode 100644 index 000000000..a613eb464 --- /dev/null +++ b/plumbing/object/commitnode.go @@ -0,0 +1,310 @@ +package object + +import ( + "fmt" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// CommitNode is generic interface encapsulating either Commit object or +// graphCommitNode object +type CommitNode interface { + ID() plumbing.Hash + Tree() (*Tree, error) + CommitTime() time.Time +} + +// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects +// and accessor methods for walking it as a directed graph +type CommitNodeIndex interface { + NumParents(node CommitNode) int + ParentNodes(node CommitNode) CommitNodeIter + ParentNode(node CommitNode, i int) (CommitNode, error) + ParentHashes(node CommitNode) []plumbing.Hash + + Get(hash plumbing.Hash) (CommitNode, error) + + // Commit returns the full commit object from the node + Commit(node CommitNode) (*Commit, error) +} + +// CommitNodeIter is a generic closable interface for iterating over commit nodes. +type CommitNodeIter interface { + Next() (CommitNode, error) + ForEach(func(CommitNode) error) error + Close() +} + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index int + + node *commitgraph.Node + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + commit *Commit +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +// ID returns the Commit object id referenced by the commit graph node. +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +// Tree returns the Tree referenced by the commit graph node. +func (c *graphCommitNode) Tree() (*Tree, error) { + return GetTree(c.gci.s, c.node.TreeHash) +} + +// CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. +func (c *graphCommitNode) CommitTime() time.Time { + return c.node.When +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(DateFormat), + ) +} + +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +// NumParents returns the number of parents in a commit. +func (gci *graphCommitNodeIndex) NumParents(node CommitNode) int { + if cgn, ok := node.(*graphCommitNode); ok { + return len(cgn.node.ParentIndexes) + } + co := node.(*objectCommitNode) + return co.commit.NumParents() +} + +// ParentNodes return a CommitNodeIter for parents of specified node. +func (gci *graphCommitNodeIndex) ParentNodes(node CommitNode) CommitNodeIter { + return newParentgraphCommitNodeIter(gci, node) +} + +// ParentNode returns the ith parent of a commit. +func (gci *graphCommitNodeIndex) ParentNode(node CommitNode, i int) (CommitNode, error) { + if cgn, ok := node.(*graphCommitNode); ok { + if len(cgn.node.ParentIndexes) == 0 || i >= len(cgn.node.ParentIndexes) { + return nil, ErrParentNotFound + } + + parent, err := gci.commitGraph.GetNodeByIndex(cgn.node.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: cgn.node.ParentHashes[i], + index: cgn.node.ParentIndexes[i], + node: parent, + gci: gci, + }, nil + } + + co := node.(*objectCommitNode) + if len(co.commit.ParentHashes) == 0 || i >= len(co.commit.ParentHashes) { + return nil, ErrParentNotFound + } + + parentHash := co.commit.ParentHashes[i] + return gci.Get(parentHash) +} + +// ParentHashes returns hashes of the parent commits for a specified node +func (gci *graphCommitNodeIndex) ParentHashes(node CommitNode) []plumbing.Hash { + if cgn, ok := node.(*graphCommitNode); ok { + return cgn.node.ParentHashes + } + co := node.(*objectCommitNode) + return co.commit.ParentHashes +} + +// NodeFromHash looks up a commit node by it's object hash +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetNodeByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + node: parent, + gci: gci, + }, nil + } + + // Fallback to loading full commit object + commit, err := GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{commit: commit}, nil +} + +// Commit returns the full Commit object representing the commit graph node. +func (gci *graphCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { + if cgn, ok := node.(*graphCommitNode); ok { + return GetCommit(gci.s, cgn.ID()) + } + co := node.(*objectCommitNode) + return co.commit, nil +} + +// CommitTime returns the time when the commit was performed. +// +// CommitTime is present to fulfill the CommitNode interface. +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +// ID returns the Commit object id referenced by the node. +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +// Tree returns the Tree referenced by the node. +func (c *objectCommitNode) Tree() (*Tree, error) { + return c.commit.Tree() +} + +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +// NumParents returns the number of parents in a commit. +func (oci *objectCommitNodeIndex) NumParents(node CommitNode) int { + co := node.(*objectCommitNode) + return co.commit.NumParents() +} + +// ParentNodes return a CommitNodeIter for parents of specified node. +func (oci *objectCommitNodeIndex) ParentNodes(node CommitNode) CommitNodeIter { + return newParentgraphCommitNodeIter(oci, node) +} + +// ParentNode returns the ith parent of a commit. +func (oci *objectCommitNodeIndex) ParentNode(node CommitNode, i int) (CommitNode, error) { + co := node.(*objectCommitNode) + parent, err := co.commit.Parent(i) + if err != nil { + return nil, err + } + return &objectCommitNode{commit: parent}, nil +} + +// ParentHashes returns hashes of the parent commits for a specified node +func (oci *objectCommitNodeIndex) ParentHashes(node CommitNode) []plumbing.Hash { + co := node.(*objectCommitNode) + return co.commit.ParentHashes +} + +// NodeFromHash looks up a commit node by it's object hash +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{commit: commit}, nil +} + +// Commit returns the full Commit object representing the commit graph node. +func (oci *objectCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { + co := node.(*objectCommitNode) + return co.commit, nil +} + +// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. +type parentCommitNodeIter struct { + gci CommitNodeIndex + node CommitNode + i int +} + +func newParentgraphCommitNodeIter(gci CommitNodeIndex, node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{gci, node, 0} +} + +// Next moves the iterator to the next commit and returns a pointer to it. If +// there are no more commits, it returns io.EOF. +func (iter *parentCommitNodeIter) Next() (CommitNode, error) { + obj, err := iter.gci.ParentNode(iter.node, iter.i) + if err == ErrParentNotFound { + return nil, io.EOF + } + if err == nil { + iter.i++ + } + + return obj, err +} + +// ForEach call the cb function for each commit contained on this iter until +// an error appends or the end of the iter is reached. If ErrStop is sent +// the iteration is stopped but no error is returned. The iterator is closed. +func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *parentCommitNodeIter) Close() { +} diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go new file mode 100644 index 000000000..8f5966568 --- /dev/null +++ b/plumbing/object/commitnode_test.go @@ -0,0 +1,81 @@ +package object + +import ( + "path" + + "golang.org/x/exp/mmap" + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/storage/filesystem" +) + +type CommitNodeSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitNodeSuite{}) + +func testWalker(c *C, nodeIndex CommitNodeIndex) { + head, err := nodeIndex.Get(plumbing.NewHash("b9d69064b190e7aedccf84731ca1d917871f8a1c")) + c.Assert(err, IsNil) + + iter := NewCommitNodeIterCTime( + head, + nodeIndex, + nil, + nil, + ) + + var commits []CommitNode + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c) + return nil + }) + + c.Assert(commits, HasLen, 9) + + expected := []string{ + "b9d69064b190e7aedccf84731ca1d917871f8a1c", + "6f6c5d2be7852c782be1dd13e36496dd7ad39560", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + "c0edf780dd0da6a65a7a49a86032fcf8a0c2d467", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "03d2c021ff68954cf3ef0a36825e194a4b98f981", + "ce275064ad67d51e99f026084e20827901a8361c", + "e713b52d7e13807e87a002e812041f248db3f643", + "347c91919944a68e9413581a1bc15519550a3afe", + } + for i, commit := range commits { + c.Assert(commit.ID().String(), Equals, expected[i]) + } +} + +func (s *CommitNodeSuite) TestWalkObject(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) + p := f.Packfile() + defer p.Close() + err := packfile.UpdateObjectStorage(storer, p) + c.Assert(err, IsNil) + + nodeIndex := NewObjectCommitNodeIndex(storer) + testWalker(c, nodeIndex) +} + +func (s *CommitNodeSuite) TestWalkCommitGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + dotgit := f.DotGit() + storer := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) + reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + nodeIndex := NewGraphCommitNodeIndex(index, storer) + testWalker(c, nodeIndex) +} diff --git a/plumbing/object/commitnode_walker_ctime.go b/plumbing/object/commitnode_walker_ctime.go new file mode 100644 index 000000000..86b6c5765 --- /dev/null +++ b/plumbing/object/commitnode_walker_ctime.go @@ -0,0 +1,108 @@ +package object + +import ( + "io" + + "github.com/emirpasic/gods/trees/binaryheap" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool + nodeIndex CommitNodeIndex +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this appears to be the closest order to `git log` +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + nodeIndex CommitNodeIndex, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + nodeIndex: nodeIndex, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range w.nodeIndex.ParentHashes(c) { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := w.nodeIndex.ParentNode(c, i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} From 9eb627fb7b86b2941fb96020f152cb5fd2df2bd3 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 11:13:53 +0200 Subject: [PATCH 02/17] Simplify the CommitNode API, make it look more like Commit Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 144 +++++++++------------ plumbing/object/commitnode_test.go | 1 - plumbing/object/commitnode_walker_ctime.go | 7 +- 3 files changed, 65 insertions(+), 87 deletions(-) diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go index a613eb464..a7af4a975 100644 --- a/plumbing/object/commitnode.go +++ b/plumbing/object/commitnode.go @@ -16,18 +16,17 @@ type CommitNode interface { ID() plumbing.Hash Tree() (*Tree, error) CommitTime() time.Time + NumParents() int + ParentNodes() CommitNodeIter + ParentNode(i int) (CommitNode, error) + ParentHashes() []plumbing.Hash } // CommitNodeIndex is generic interface encapsulating an index of CommitNode objects // and accessor methods for walking it as a directed graph type CommitNodeIndex interface { - NumParents(node CommitNode) int - ParentNodes(node CommitNode) CommitNodeIter - ParentNode(node CommitNode, i int) (CommitNode, error) - ParentHashes(node CommitNode) []plumbing.Hash - + // Get returns a commit node from a commit hash Get(hash plumbing.Hash) (CommitNode, error) - // Commit returns the full commit object from the node Commit(node CommitNode) (*Commit, error) } @@ -67,7 +66,8 @@ type graphCommitNodeIndex struct { // // objectCommitNode implements the CommitNode interface. type objectCommitNode struct { - commit *Commit + nodeIndex CommitNodeIndex + commit *Commit } // objectCommitNodeIndex is an index that can load CommitNode objects only from the @@ -93,68 +93,50 @@ func (c *graphCommitNode) CommitTime() time.Time { return c.node.When } -func (c *graphCommitNode) String() string { - return fmt.Sprintf( - "%s %s\nDate: %s", - plumbing.CommitObject, c.ID(), - c.CommitTime().Format(DateFormat), - ) -} - -func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { - return &graphCommitNodeIndex{commitGraph, s} -} - // NumParents returns the number of parents in a commit. -func (gci *graphCommitNodeIndex) NumParents(node CommitNode) int { - if cgn, ok := node.(*graphCommitNode); ok { - return len(cgn.node.ParentIndexes) - } - co := node.(*objectCommitNode) - return co.commit.NumParents() +func (c *graphCommitNode) NumParents() int { + return len(c.node.ParentIndexes) } // ParentNodes return a CommitNodeIter for parents of specified node. -func (gci *graphCommitNodeIndex) ParentNodes(node CommitNode) CommitNodeIter { - return newParentgraphCommitNodeIter(gci, node) +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) } // ParentNode returns the ith parent of a commit. -func (gci *graphCommitNodeIndex) ParentNode(node CommitNode, i int) (CommitNode, error) { - if cgn, ok := node.(*graphCommitNode); ok { - if len(cgn.node.ParentIndexes) == 0 || i >= len(cgn.node.ParentIndexes) { - return nil, ErrParentNotFound - } - - parent, err := gci.commitGraph.GetNodeByIndex(cgn.node.ParentIndexes[i]) - if err != nil { - return nil, err - } - - return &graphCommitNode{ - hash: cgn.node.ParentHashes[i], - index: cgn.node.ParentIndexes[i], - node: parent, - gci: gci, - }, nil +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.node.ParentIndexes) { + return nil, ErrParentNotFound } - co := node.(*objectCommitNode) - if len(co.commit.ParentHashes) == 0 || i >= len(co.commit.ParentHashes) { - return nil, ErrParentNotFound + parent, err := c.gci.commitGraph.GetNodeByIndex(c.node.ParentIndexes[i]) + if err != nil { + return nil, err } - parentHash := co.commit.ParentHashes[i] - return gci.Get(parentHash) + return &graphCommitNode{ + hash: c.node.ParentHashes[i], + index: c.node.ParentIndexes[i], + node: parent, + gci: c.gci, + }, nil } // ParentHashes returns hashes of the parent commits for a specified node -func (gci *graphCommitNodeIndex) ParentHashes(node CommitNode) []plumbing.Hash { - if cgn, ok := node.(*graphCommitNode); ok { - return cgn.node.ParentHashes - } - co := node.(*objectCommitNode) - return co.commit.ParentHashes +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.node.ParentHashes +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(DateFormat), + ) +} + +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} } // NodeFromHash looks up a commit node by it's object hash @@ -181,7 +163,10 @@ func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { return nil, err } - return &objectCommitNode{commit: commit}, nil + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil } // Commit returns the full Commit object representing the commit graph node. @@ -194,8 +179,6 @@ func (gci *graphCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { } // CommitTime returns the time when the commit was performed. -// -// CommitTime is present to fulfill the CommitNode interface. func (c *objectCommitNode) CommitTime() time.Time { return c.commit.Committer.When } @@ -210,35 +193,32 @@ func (c *objectCommitNode) Tree() (*Tree, error) { return c.commit.Tree() } -func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { - return &objectCommitNodeIndex{s} -} - // NumParents returns the number of parents in a commit. -func (oci *objectCommitNodeIndex) NumParents(node CommitNode) int { - co := node.(*objectCommitNode) - return co.commit.NumParents() +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() } // ParentNodes return a CommitNodeIter for parents of specified node. -func (oci *objectCommitNodeIndex) ParentNodes(node CommitNode) CommitNodeIter { - return newParentgraphCommitNodeIter(oci, node) +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) } // ParentNode returns the ith parent of a commit. -func (oci *objectCommitNodeIndex) ParentNode(node CommitNode, i int) (CommitNode, error) { - co := node.(*objectCommitNode) - parent, err := co.commit.Parent(i) - if err != nil { - return nil, err +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, ErrParentNotFound } - return &objectCommitNode{commit: parent}, nil + + return c.nodeIndex.Get(c.commit.ParentHashes[i]) } // ParentHashes returns hashes of the parent commits for a specified node -func (oci *objectCommitNodeIndex) ParentHashes(node CommitNode) []plumbing.Hash { - co := node.(*objectCommitNode) - return co.commit.ParentHashes +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} } // NodeFromHash looks up a commit node by it's object hash @@ -248,7 +228,10 @@ func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { return nil, err } - return &objectCommitNode{commit: commit}, nil + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil } // Commit returns the full Commit object representing the commit graph node. @@ -259,19 +242,18 @@ func (oci *objectCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { // parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. type parentCommitNodeIter struct { - gci CommitNodeIndex node CommitNode i int } -func newParentgraphCommitNodeIter(gci CommitNodeIndex, node CommitNode) CommitNodeIter { - return &parentCommitNodeIter{gci, node, 0} +func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{node, 0} } // Next moves the iterator to the next commit and returns a pointer to it. If // there are no more commits, it returns io.EOF. func (iter *parentCommitNodeIter) Next() (CommitNode, error) { - obj, err := iter.gci.ParentNode(iter.node, iter.i) + obj, err := iter.node.ParentNode(iter.i) if err == ErrParentNotFound { return nil, io.EOF } diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go index 8f5966568..883befc8c 100644 --- a/plumbing/object/commitnode_test.go +++ b/plumbing/object/commitnode_test.go @@ -25,7 +25,6 @@ func testWalker(c *C, nodeIndex CommitNodeIndex) { iter := NewCommitNodeIterCTime( head, - nodeIndex, nil, nil, ) diff --git a/plumbing/object/commitnode_walker_ctime.go b/plumbing/object/commitnode_walker_ctime.go index 86b6c5765..e55b4adbc 100644 --- a/plumbing/object/commitnode_walker_ctime.go +++ b/plumbing/object/commitnode_walker_ctime.go @@ -13,7 +13,6 @@ type commitNodeIteratorByCTime struct { heap *binaryheap.Heap seenExternal map[plumbing.Hash]bool seen map[plumbing.Hash]bool - nodeIndex CommitNodeIndex } // NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, @@ -26,7 +25,6 @@ type commitNodeIteratorByCTime struct { // commits from being iterated. func NewCommitNodeIterCTime( c CommitNode, - nodeIndex CommitNodeIndex, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitNodeIter { @@ -48,7 +46,6 @@ func NewCommitNodeIterCTime( heap: heap, seenExternal: seenExternal, seen: seen, - nodeIndex: nodeIndex, } } @@ -68,11 +65,11 @@ func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { w.seen[cID] = true - for i, h := range w.nodeIndex.ParentHashes(c) { + for i, h := range c.ParentHashes() { if w.seen[h] || w.seenExternal[h] { continue } - pc, err := w.nodeIndex.ParentNode(c, i) + pc, err := c.ParentNode(i) if err != nil { return nil, err } From 58c731411944090126f86208bcf0419d6ba84122 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 11:31:59 +0200 Subject: [PATCH 03/17] Move Commit() to CommitNode API Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go index a7af4a975..0717a653d 100644 --- a/plumbing/object/commitnode.go +++ b/plumbing/object/commitnode.go @@ -13,13 +13,22 @@ import ( // CommitNode is generic interface encapsulating either Commit object or // graphCommitNode object type CommitNode interface { + // ID returns the Commit object id referenced by the commit graph node. ID() plumbing.Hash + // Tree returns the Tree referenced by the commit graph node. Tree() (*Tree, error) + // CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. CommitTime() time.Time + // NumParents returns the number of parents in a commit. NumParents() int + // ParentNodes return a CommitNodeIter for parents of specified node. ParentNodes() CommitNodeIter + // ParentNode returns the ith parent of a commit. ParentNode(i int) (CommitNode, error) + // ParentHashes returns hashes of the parent commits for a specified node ParentHashes() []plumbing.Hash + // Commit returns the full commit object from the node + Commit() (*Commit, error) } // CommitNodeIndex is generic interface encapsulating an index of CommitNode objects @@ -27,8 +36,6 @@ type CommitNode interface { type CommitNodeIndex interface { // Get returns a commit node from a commit hash Get(hash plumbing.Hash) (CommitNode, error) - // Commit returns the full commit object from the node - Commit(node CommitNode) (*Commit, error) } // CommitNodeIter is a generic closable interface for iterating over commit nodes. @@ -127,6 +134,11 @@ func (c *graphCommitNode) ParentHashes() []plumbing.Hash { return c.node.ParentHashes } +// Commit returns the full Commit object representing the commit graph node. +func (c *graphCommitNode) Commit() (*Commit, error) { + return GetCommit(c.gci.s, c.hash) +} + func (c *graphCommitNode) String() string { return fmt.Sprintf( "%s %s\nDate: %s", @@ -169,15 +181,6 @@ func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { }, nil } -// Commit returns the full Commit object representing the commit graph node. -func (gci *graphCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { - if cgn, ok := node.(*graphCommitNode); ok { - return GetCommit(gci.s, cgn.ID()) - } - co := node.(*objectCommitNode) - return co.commit, nil -} - // CommitTime returns the time when the commit was performed. func (c *objectCommitNode) CommitTime() time.Time { return c.commit.Committer.When @@ -217,6 +220,11 @@ func (c *objectCommitNode) ParentHashes() []plumbing.Hash { return c.commit.ParentHashes } +// Commit returns the full Commit object representing the commit graph node. +func (c *objectCommitNode) Commit() (*Commit, error) { + return c.commit, nil +} + func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { return &objectCommitNodeIndex{s} } From f4c1a9140f8b2700d9910d35cbab62b2de1fc857 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 11:52:12 +0200 Subject: [PATCH 04/17] Code cleanup, split into more files for clarity Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 205 --------------------------- plumbing/object/commitnode_graph.go | 121 ++++++++++++++++ plumbing/object/commitnode_object.go | 79 +++++++++++ 3 files changed, 200 insertions(+), 205 deletions(-) create mode 100644 plumbing/object/commitnode_graph.go create mode 100644 plumbing/object/commitnode_object.go diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go index 0717a653d..62c0aefcd 100644 --- a/plumbing/object/commitnode.go +++ b/plumbing/object/commitnode.go @@ -1,12 +1,10 @@ package object import ( - "fmt" "io" "time" "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -45,209 +43,6 @@ type CommitNodeIter interface { Close() } -// graphCommitNode is a reduced representation of Commit as presented in the commit -// graph file (commitgraph.Node). It is merely useful as an optimization for walking -// the commit graphs. -// -// graphCommitNode implements the CommitNode interface. -type graphCommitNode struct { - // Hash for the Commit object - hash plumbing.Hash - // Index of the node in the commit graph file - index int - - node *commitgraph.Node - gci *graphCommitNodeIndex -} - -// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit -// graph files and the object store. -// -// graphCommitNodeIndex implements the CommitNodeIndex interface -type graphCommitNodeIndex struct { - commitGraph commitgraph.Index - s storer.EncodedObjectStorer -} - -// objectCommitNode is a representation of Commit as presented in the GIT object format. -// -// objectCommitNode implements the CommitNode interface. -type objectCommitNode struct { - nodeIndex CommitNodeIndex - commit *Commit -} - -// objectCommitNodeIndex is an index that can load CommitNode objects only from the -// object store. -// -// objectCommitNodeIndex implements the CommitNodeIndex interface -type objectCommitNodeIndex struct { - s storer.EncodedObjectStorer -} - -// ID returns the Commit object id referenced by the commit graph node. -func (c *graphCommitNode) ID() plumbing.Hash { - return c.hash -} - -// Tree returns the Tree referenced by the commit graph node. -func (c *graphCommitNode) Tree() (*Tree, error) { - return GetTree(c.gci.s, c.node.TreeHash) -} - -// CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. -func (c *graphCommitNode) CommitTime() time.Time { - return c.node.When -} - -// NumParents returns the number of parents in a commit. -func (c *graphCommitNode) NumParents() int { - return len(c.node.ParentIndexes) -} - -// ParentNodes return a CommitNodeIter for parents of specified node. -func (c *graphCommitNode) ParentNodes() CommitNodeIter { - return newParentgraphCommitNodeIter(c) -} - -// ParentNode returns the ith parent of a commit. -func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { - if i < 0 || i >= len(c.node.ParentIndexes) { - return nil, ErrParentNotFound - } - - parent, err := c.gci.commitGraph.GetNodeByIndex(c.node.ParentIndexes[i]) - if err != nil { - return nil, err - } - - return &graphCommitNode{ - hash: c.node.ParentHashes[i], - index: c.node.ParentIndexes[i], - node: parent, - gci: c.gci, - }, nil -} - -// ParentHashes returns hashes of the parent commits for a specified node -func (c *graphCommitNode) ParentHashes() []plumbing.Hash { - return c.node.ParentHashes -} - -// Commit returns the full Commit object representing the commit graph node. -func (c *graphCommitNode) Commit() (*Commit, error) { - return GetCommit(c.gci.s, c.hash) -} - -func (c *graphCommitNode) String() string { - return fmt.Sprintf( - "%s %s\nDate: %s", - plumbing.CommitObject, c.ID(), - c.CommitTime().Format(DateFormat), - ) -} - -func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { - return &graphCommitNodeIndex{commitGraph, s} -} - -// NodeFromHash looks up a commit node by it's object hash -func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { - // Check the commit graph first - parentIndex, err := gci.commitGraph.GetIndexByHash(hash) - if err == nil { - parent, err := gci.commitGraph.GetNodeByIndex(parentIndex) - if err != nil { - return nil, err - } - - return &graphCommitNode{ - hash: hash, - index: parentIndex, - node: parent, - gci: gci, - }, nil - } - - // Fallback to loading full commit object - commit, err := GetCommit(gci.s, hash) - if err != nil { - return nil, err - } - - return &objectCommitNode{ - nodeIndex: gci, - commit: commit, - }, nil -} - -// CommitTime returns the time when the commit was performed. -func (c *objectCommitNode) CommitTime() time.Time { - return c.commit.Committer.When -} - -// ID returns the Commit object id referenced by the node. -func (c *objectCommitNode) ID() plumbing.Hash { - return c.commit.ID() -} - -// Tree returns the Tree referenced by the node. -func (c *objectCommitNode) Tree() (*Tree, error) { - return c.commit.Tree() -} - -// NumParents returns the number of parents in a commit. -func (c *objectCommitNode) NumParents() int { - return c.commit.NumParents() -} - -// ParentNodes return a CommitNodeIter for parents of specified node. -func (c *objectCommitNode) ParentNodes() CommitNodeIter { - return newParentgraphCommitNodeIter(c) -} - -// ParentNode returns the ith parent of a commit. -func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { - if i < 0 || i >= len(c.commit.ParentHashes) { - return nil, ErrParentNotFound - } - - return c.nodeIndex.Get(c.commit.ParentHashes[i]) -} - -// ParentHashes returns hashes of the parent commits for a specified node -func (c *objectCommitNode) ParentHashes() []plumbing.Hash { - return c.commit.ParentHashes -} - -// Commit returns the full Commit object representing the commit graph node. -func (c *objectCommitNode) Commit() (*Commit, error) { - return c.commit, nil -} - -func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { - return &objectCommitNodeIndex{s} -} - -// NodeFromHash looks up a commit node by it's object hash -func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { - commit, err := GetCommit(oci.s, hash) - if err != nil { - return nil, err - } - - return &objectCommitNode{ - nodeIndex: oci, - commit: commit, - }, nil -} - -// Commit returns the full Commit object representing the commit graph node. -func (oci *objectCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { - co := node.(*objectCommitNode) - return co.commit, nil -} - // parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. type parentCommitNodeIter struct { node CommitNode diff --git a/plumbing/object/commitnode_graph.go b/plumbing/object/commitnode_graph.go new file mode 100644 index 000000000..c57a258cc --- /dev/null +++ b/plumbing/object/commitnode_graph.go @@ -0,0 +1,121 @@ +package object + +import ( + "fmt" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index int + + node *commitgraph.Node + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetNodeByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + node: parent, + gci: gci, + }, nil + } + + // Fallback to loading full commit object + commit, err := GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil +} + +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +func (c *graphCommitNode) Tree() (*Tree, error) { + return GetTree(c.gci.s, c.node.TreeHash) +} + +func (c *graphCommitNode) CommitTime() time.Time { + return c.node.When +} + +func (c *graphCommitNode) NumParents() int { + return len(c.node.ParentIndexes) +} + +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.node.ParentIndexes) { + return nil, ErrParentNotFound + } + + parent, err := c.gci.commitGraph.GetNodeByIndex(c.node.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: c.node.ParentHashes[i], + index: c.node.ParentIndexes[i], + node: parent, + gci: c.gci, + }, nil +} + +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.node.ParentHashes +} + +func (c *graphCommitNode) Commit() (*Commit, error) { + return GetCommit(c.gci.s, c.hash) +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(DateFormat), + ) +} diff --git a/plumbing/object/commitnode_object.go b/plumbing/object/commitnode_object.go new file mode 100644 index 000000000..08d8c0f3f --- /dev/null +++ b/plumbing/object/commitnode_object.go @@ -0,0 +1,79 @@ +package object + +import ( + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + nodeIndex CommitNodeIndex + commit *Commit +} + +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +func (c *objectCommitNode) Tree() (*Tree, error) { + return c.commit.Tree() +} + +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() +} + +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, ErrParentNotFound + } + + // Note: It's necessary to go through CommitNodeIndex here to ensure + // that if the commit-graph file covers only part of the history we + // start using it when that part is reached. + return c.nodeIndex.Get(c.commit.ParentHashes[i]) +} + +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func (c *objectCommitNode) Commit() (*Commit, error) { + return c.commit, nil +} From 55dd4be4dfe8af030b5652782af2d4c37f51197f Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 11:58:17 +0200 Subject: [PATCH 05/17] Add test for CommitNode.ParentNodes() Signed-off-by: Filip Navara --- plumbing/object/commitnode_test.go | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go index 883befc8c..1e5f2d935 100644 --- a/plumbing/object/commitnode_test.go +++ b/plumbing/object/commitnode_test.go @@ -53,7 +53,29 @@ func testWalker(c *C, nodeIndex CommitNodeIndex) { } } -func (s *CommitNodeSuite) TestWalkObject(c *C) { +func testParents(c *C, nodeIndex CommitNodeIndex) { + merge3, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + + var parents []CommitNode + merge3.ParentNodes().ForEach(func(c CommitNode) error { + parents = append(parents, c) + return nil + }) + + c.Assert(parents, HasLen, 3) + + expected := []string{ + "ce275064ad67d51e99f026084e20827901a8361c", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + } + for i, parent := range parents { + c.Assert(parent.ID().String(), Equals, expected[i]) + } +} + +func (s *CommitNodeSuite) TestObjectGraph(c *C) { f := fixtures.ByTag("commit-graph").One() storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) p := f.Packfile() @@ -63,9 +85,10 @@ func (s *CommitNodeSuite) TestWalkObject(c *C) { nodeIndex := NewObjectCommitNodeIndex(storer) testWalker(c, nodeIndex) + testParents(c, nodeIndex) } -func (s *CommitNodeSuite) TestWalkCommitGraph(c *C) { +func (s *CommitNodeSuite) TestCommitGraph(c *C) { f := fixtures.ByTag("commit-graph").One() dotgit := f.DotGit() storer := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) @@ -77,4 +100,5 @@ func (s *CommitNodeSuite) TestWalkCommitGraph(c *C) { nodeIndex := NewGraphCommitNodeIndex(index, storer) testWalker(c, nodeIndex) + testParents(c, nodeIndex) } From b48e4867d1aba5235132533006c8ed8be40344d8 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 12:14:53 +0200 Subject: [PATCH 06/17] Update comments Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 5 ++--- plumbing/object/commitnode_graph.go | 2 ++ plumbing/object/commitnode_object.go | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go index 62c0aefcd..22927f4c9 100644 --- a/plumbing/object/commitnode.go +++ b/plumbing/object/commitnode.go @@ -8,8 +8,8 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing/storer" ) -// CommitNode is generic interface encapsulating either Commit object or -// graphCommitNode object +// CommitNode is generic interface encapsulating a lightweight commit object retrieved +// from CommitNodeIndex type CommitNode interface { // ID returns the Commit object id referenced by the commit graph node. ID() plumbing.Hash @@ -30,7 +30,6 @@ type CommitNode interface { } // CommitNodeIndex is generic interface encapsulating an index of CommitNode objects -// and accessor methods for walking it as a directed graph type CommitNodeIndex interface { // Get returns a commit node from a commit hash Get(hash plumbing.Hash) (CommitNode, error) diff --git a/plumbing/object/commitnode_graph.go b/plumbing/object/commitnode_graph.go index c57a258cc..9fc28a27a 100644 --- a/plumbing/object/commitnode_graph.go +++ b/plumbing/object/commitnode_graph.go @@ -33,6 +33,8 @@ type graphCommitNodeIndex struct { s storer.EncodedObjectStorer } +// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph +// files as backing storage and falls back to object storage when necessary func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { return &graphCommitNodeIndex{commitGraph, s} } diff --git a/plumbing/object/commitnode_object.go b/plumbing/object/commitnode_object.go index 08d8c0f3f..52316f869 100644 --- a/plumbing/object/commitnode_object.go +++ b/plumbing/object/commitnode_object.go @@ -15,6 +15,8 @@ type objectCommitNode struct { commit *Commit } +// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses +// only object storage to load the nodes func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { return &objectCommitNodeIndex{s} } From cc48439674365fc8f216dd7df361872046f52f04 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 12:48:30 +0200 Subject: [PATCH 07/17] Add test for traversal on mixed object and commit-graph Signed-off-by: Filip Navara --- plumbing/object/commitnode_test.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go index 1e5f2d935..df307e314 100644 --- a/plumbing/object/commitnode_test.go +++ b/plumbing/object/commitnode_test.go @@ -102,3 +102,33 @@ func (s *CommitNodeSuite) TestCommitGraph(c *C) { testWalker(c, nodeIndex) testParents(c, nodeIndex) } + +func (s *CommitNodeSuite) TestMixedGraph(c *C) { + // Unpack the original repository with pack file + f := fixtures.ByTag("commit-graph").One() + dotgit := f.DotGit() + storer := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) + p := f.Packfile() + defer p.Close() + err := packfile.UpdateObjectStorage(storer, p) + c.Assert(err, IsNil) + + // Take the commit-graph file and copy it to memory index without the last commit + reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + fileIndex, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + memoryIndex := commitgraph.NewMemoryIndex() + for i, hash := range fileIndex.Hashes() { + if hash.String() != "b9d69064b190e7aedccf84731ca1d917871f8a1c" { + node, err := fileIndex.GetNodeByIndex(i) + c.Assert(err, IsNil) + memoryIndex.Add(hash, node) + } + } + + nodeIndex := NewGraphCommitNodeIndex(memoryIndex, storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) +} From 34cb7a3aededd002881504dd729527628e923fc6 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Apr 2019 13:14:17 +0200 Subject: [PATCH 08/17] Add test for CommitNode.Commit() and CommitNode.Tree() Signed-off-by: Filip Navara --- plumbing/object/commitnode_test.go | 44 ++++++++++++++++++------------ 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go index df307e314..3a70db3c0 100644 --- a/plumbing/object/commitnode_test.go +++ b/plumbing/object/commitnode_test.go @@ -3,7 +3,6 @@ package object import ( "path" - "golang.org/x/exp/mmap" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" @@ -19,6 +18,14 @@ type CommitNodeSuite struct { var _ = Suite(&CommitNodeSuite{}) +func unpackRepositry(f *fixtures.Fixture) *filesystem.Storage { + storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) + p := f.Packfile() + defer p.Close() + packfile.UpdateObjectStorage(storer, p) + return storer +} + func testWalker(c *C, nodeIndex CommitNodeIndex) { head, err := nodeIndex.Get(plumbing.NewHash("b9d69064b190e7aedccf84731ca1d917871f8a1c")) c.Assert(err, IsNil) @@ -75,24 +82,31 @@ func testParents(c *C, nodeIndex CommitNodeIndex) { } } +func testCommitAndTree(c *C, nodeIndex CommitNodeIndex) { + merge3node, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + merge3commit, err := merge3node.Commit() + c.Assert(err, IsNil) + c.Assert(merge3node.ID().String(), Equals, merge3commit.ID().String()) + tree, err := merge3node.Tree() + c.Assert(err, IsNil) + c.Assert(tree.ID().String(), Equals, merge3commit.TreeHash.String()) +} + func (s *CommitNodeSuite) TestObjectGraph(c *C) { f := fixtures.ByTag("commit-graph").One() - storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) - p := f.Packfile() - defer p.Close() - err := packfile.UpdateObjectStorage(storer, p) - c.Assert(err, IsNil) + storer := unpackRepositry(f) nodeIndex := NewObjectCommitNodeIndex(storer) testWalker(c, nodeIndex) testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) } func (s *CommitNodeSuite) TestCommitGraph(c *C) { f := fixtures.ByTag("commit-graph").One() - dotgit := f.DotGit() - storer := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) - reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + storer := unpackRepositry(f) + reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph")) c.Assert(err, IsNil) defer reader.Close() index, err := commitgraph.OpenFileIndex(reader) @@ -101,20 +115,15 @@ func (s *CommitNodeSuite) TestCommitGraph(c *C) { nodeIndex := NewGraphCommitNodeIndex(index, storer) testWalker(c, nodeIndex) testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) } func (s *CommitNodeSuite) TestMixedGraph(c *C) { - // Unpack the original repository with pack file f := fixtures.ByTag("commit-graph").One() - dotgit := f.DotGit() - storer := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) - p := f.Packfile() - defer p.Close() - err := packfile.UpdateObjectStorage(storer, p) - c.Assert(err, IsNil) + storer := unpackRepositry(f) // Take the commit-graph file and copy it to memory index without the last commit - reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph")) c.Assert(err, IsNil) defer reader.Close() fileIndex, err := commitgraph.OpenFileIndex(reader) @@ -131,4 +140,5 @@ func (s *CommitNodeSuite) TestMixedGraph(c *C) { nodeIndex := NewGraphCommitNodeIndex(memoryIndex, storer) testWalker(c, nodeIndex) testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) } From a47126b1ae5020dbdd268b304fef45a59d63d99b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 26 Apr 2019 10:11:07 +0200 Subject: [PATCH 09/17] pluming: object, adjust to new API names in format/commitgraph Signed-off-by: Filip Navara --- plumbing/object/commitnode_graph.go | 34 ++++++++++++++--------------- plumbing/object/commitnode_test.go | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/plumbing/object/commitnode_graph.go b/plumbing/object/commitnode_graph.go index 9fc28a27a..b2a6f5754 100644 --- a/plumbing/object/commitnode_graph.go +++ b/plumbing/object/commitnode_graph.go @@ -20,8 +20,8 @@ type graphCommitNode struct { // Index of the node in the commit graph file index int - node *commitgraph.Node - gci *graphCommitNodeIndex + commitData *commitgraph.CommitData + gci *graphCommitNodeIndex } // graphCommitNodeIndex is an index that can load CommitNode objects from both the commit @@ -43,16 +43,16 @@ func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { // Check the commit graph first parentIndex, err := gci.commitGraph.GetIndexByHash(hash) if err == nil { - parent, err := gci.commitGraph.GetNodeByIndex(parentIndex) + parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) if err != nil { return nil, err } return &graphCommitNode{ - hash: hash, - index: parentIndex, - node: parent, - gci: gci, + hash: hash, + index: parentIndex, + commitData: parent, + gci: gci, }, nil } @@ -73,15 +73,15 @@ func (c *graphCommitNode) ID() plumbing.Hash { } func (c *graphCommitNode) Tree() (*Tree, error) { - return GetTree(c.gci.s, c.node.TreeHash) + return GetTree(c.gci.s, c.commitData.TreeHash) } func (c *graphCommitNode) CommitTime() time.Time { - return c.node.When + return c.commitData.When } func (c *graphCommitNode) NumParents() int { - return len(c.node.ParentIndexes) + return len(c.commitData.ParentIndexes) } func (c *graphCommitNode) ParentNodes() CommitNodeIter { @@ -89,25 +89,25 @@ func (c *graphCommitNode) ParentNodes() CommitNodeIter { } func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { - if i < 0 || i >= len(c.node.ParentIndexes) { + if i < 0 || i >= len(c.commitData.ParentIndexes) { return nil, ErrParentNotFound } - parent, err := c.gci.commitGraph.GetNodeByIndex(c.node.ParentIndexes[i]) + parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) if err != nil { return nil, err } return &graphCommitNode{ - hash: c.node.ParentHashes[i], - index: c.node.ParentIndexes[i], - node: parent, - gci: c.gci, + hash: c.commitData.ParentHashes[i], + index: c.commitData.ParentIndexes[i], + commitData: parent, + gci: c.gci, }, nil } func (c *graphCommitNode) ParentHashes() []plumbing.Hash { - return c.node.ParentHashes + return c.commitData.ParentHashes } func (c *graphCommitNode) Commit() (*Commit, error) { diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go index 3a70db3c0..a295b8b49 100644 --- a/plumbing/object/commitnode_test.go +++ b/plumbing/object/commitnode_test.go @@ -4,7 +4,7 @@ import ( "path" . "gopkg.in/check.v1" - "gopkg.in/src-d/go-git-fixtures.v3" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" @@ -131,7 +131,7 @@ func (s *CommitNodeSuite) TestMixedGraph(c *C) { memoryIndex := commitgraph.NewMemoryIndex() for i, hash := range fileIndex.Hashes() { if hash.String() != "b9d69064b190e7aedccf84731ca1d917871f8a1c" { - node, err := fileIndex.GetNodeByIndex(i) + node, err := fileIndex.GetCommitDataByIndex(i) c.Assert(err, IsNil) memoryIndex.Add(hash, node) } From 5f53b23103a04f97220f325772646b603c4dc25f Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 29 Apr 2019 13:23:06 +0200 Subject: [PATCH 10/17] Expose Generation property on CommitNode Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 3 +++ plumbing/object/commitnode_graph.go | 7 +++++++ plumbing/object/commitnode_object.go | 8 ++++++++ 3 files changed, 18 insertions(+) diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go index 22927f4c9..ce2548794 100644 --- a/plumbing/object/commitnode.go +++ b/plumbing/object/commitnode.go @@ -25,6 +25,9 @@ type CommitNode interface { ParentNode(i int) (CommitNode, error) // ParentHashes returns hashes of the parent commits for a specified node ParentHashes() []plumbing.Hash + // Generation returns the generation of the commit for reachability analysis. + // Objects with newer generation are not reachable from objects of older generation. + Generation() uint64 // Commit returns the full commit object from the node Commit() (*Commit, error) } diff --git a/plumbing/object/commitnode_graph.go b/plumbing/object/commitnode_graph.go index b2a6f5754..ac790abdc 100644 --- a/plumbing/object/commitnode_graph.go +++ b/plumbing/object/commitnode_graph.go @@ -110,6 +110,13 @@ func (c *graphCommitNode) ParentHashes() []plumbing.Hash { return c.commitData.ParentHashes } +func (c *graphCommitNode) Generation() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return uint64(c.commitData.Generation) +} + func (c *graphCommitNode) Commit() (*Commit, error) { return GetCommit(c.gci.s, c.hash) } diff --git a/plumbing/object/commitnode_object.go b/plumbing/object/commitnode_object.go index 52316f869..9ac42d236 100644 --- a/plumbing/object/commitnode_object.go +++ b/plumbing/object/commitnode_object.go @@ -1,6 +1,7 @@ package object import ( + "math" "time" "gopkg.in/src-d/go-git.v4/plumbing" @@ -76,6 +77,13 @@ func (c *objectCommitNode) ParentHashes() []plumbing.Hash { return c.commit.ParentHashes } +func (c *objectCommitNode) Generation() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + func (c *objectCommitNode) Commit() (*Commit, error) { return c.commit, nil } From a661bca784d305e9df581302b725dc20fad8b995 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 3 May 2019 11:52:53 +0200 Subject: [PATCH 11/17] Move CommitNode/CommitNodeIndex into separate object/commitgraph package Signed-off-by: Filip Navara --- plumbing/object/{ => commitgraph}/commitnode.go | 9 +++++---- .../{ => commitgraph}/commitnode_graph.go | 17 +++++++++-------- .../{ => commitgraph}/commitnode_object.go | 13 +++++++------ .../object/{ => commitgraph}/commitnode_test.go | 2 +- .../commitnode_walker_ctime.go | 2 +- 5 files changed, 23 insertions(+), 20 deletions(-) rename plumbing/object/{ => commitgraph}/commitnode.go (91%) rename plumbing/object/{ => commitgraph}/commitnode_graph.go (85%) rename plumbing/object/{ => commitgraph}/commitnode_object.go (84%) rename plumbing/object/{ => commitgraph}/commitnode_test.go (96%) rename plumbing/object/{ => commitgraph}/commitnode_walker_ctime.go (94%) diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitgraph/commitnode.go similarity index 91% rename from plumbing/object/commitnode.go rename to plumbing/object/commitgraph/commitnode.go index ce2548794..e218d3210 100644 --- a/plumbing/object/commitnode.go +++ b/plumbing/object/commitgraph/commitnode.go @@ -1,10 +1,11 @@ -package object +package commitgraph import ( "io" "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -14,7 +15,7 @@ type CommitNode interface { // ID returns the Commit object id referenced by the commit graph node. ID() plumbing.Hash // Tree returns the Tree referenced by the commit graph node. - Tree() (*Tree, error) + Tree() (*object.Tree, error) // CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. CommitTime() time.Time // NumParents returns the number of parents in a commit. @@ -29,7 +30,7 @@ type CommitNode interface { // Objects with newer generation are not reachable from objects of older generation. Generation() uint64 // Commit returns the full commit object from the node - Commit() (*Commit, error) + Commit() (*object.Commit, error) } // CommitNodeIndex is generic interface encapsulating an index of CommitNode objects @@ -59,7 +60,7 @@ func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { // there are no more commits, it returns io.EOF. func (iter *parentCommitNodeIter) Next() (CommitNode, error) { obj, err := iter.node.ParentNode(iter.i) - if err == ErrParentNotFound { + if err == object.ErrParentNotFound { return nil, io.EOF } if err == nil { diff --git a/plumbing/object/commitnode_graph.go b/plumbing/object/commitgraph/commitnode_graph.go similarity index 85% rename from plumbing/object/commitnode_graph.go rename to plumbing/object/commitgraph/commitnode_graph.go index ac790abdc..bd54e1888 100644 --- a/plumbing/object/commitnode_graph.go +++ b/plumbing/object/commitgraph/commitnode_graph.go @@ -1,4 +1,4 @@ -package object +package commitgraph import ( "fmt" @@ -6,6 +6,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -57,7 +58,7 @@ func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { } // Fallback to loading full commit object - commit, err := GetCommit(gci.s, hash) + commit, err := object.GetCommit(gci.s, hash) if err != nil { return nil, err } @@ -72,8 +73,8 @@ func (c *graphCommitNode) ID() plumbing.Hash { return c.hash } -func (c *graphCommitNode) Tree() (*Tree, error) { - return GetTree(c.gci.s, c.commitData.TreeHash) +func (c *graphCommitNode) Tree() (*object.Tree, error) { + return object.GetTree(c.gci.s, c.commitData.TreeHash) } func (c *graphCommitNode) CommitTime() time.Time { @@ -90,7 +91,7 @@ func (c *graphCommitNode) ParentNodes() CommitNodeIter { func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { if i < 0 || i >= len(c.commitData.ParentIndexes) { - return nil, ErrParentNotFound + return nil, object.ErrParentNotFound } parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) @@ -117,14 +118,14 @@ func (c *graphCommitNode) Generation() uint64 { return uint64(c.commitData.Generation) } -func (c *graphCommitNode) Commit() (*Commit, error) { - return GetCommit(c.gci.s, c.hash) +func (c *graphCommitNode) Commit() (*object.Commit, error) { + return object.GetCommit(c.gci.s, c.hash) } func (c *graphCommitNode) String() string { return fmt.Sprintf( "%s %s\nDate: %s", plumbing.CommitObject, c.ID(), - c.CommitTime().Format(DateFormat), + c.CommitTime().Format(object.DateFormat), ) } diff --git a/plumbing/object/commitnode_object.go b/plumbing/object/commitgraph/commitnode_object.go similarity index 84% rename from plumbing/object/commitnode_object.go rename to plumbing/object/commitgraph/commitnode_object.go index 9ac42d236..2779a54bc 100644 --- a/plumbing/object/commitnode_object.go +++ b/plumbing/object/commitgraph/commitnode_object.go @@ -1,10 +1,11 @@ -package object +package commitgraph import ( "math" "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -13,7 +14,7 @@ import ( // objectCommitNode implements the CommitNode interface. type objectCommitNode struct { nodeIndex CommitNodeIndex - commit *Commit + commit *object.Commit } // NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses @@ -23,7 +24,7 @@ func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { } func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { - commit, err := GetCommit(oci.s, hash) + commit, err := object.GetCommit(oci.s, hash) if err != nil { return nil, err } @@ -50,7 +51,7 @@ func (c *objectCommitNode) ID() plumbing.Hash { return c.commit.ID() } -func (c *objectCommitNode) Tree() (*Tree, error) { +func (c *objectCommitNode) Tree() (*object.Tree, error) { return c.commit.Tree() } @@ -64,7 +65,7 @@ func (c *objectCommitNode) ParentNodes() CommitNodeIter { func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { if i < 0 || i >= len(c.commit.ParentHashes) { - return nil, ErrParentNotFound + return nil, object.ErrParentNotFound } // Note: It's necessary to go through CommitNodeIndex here to ensure @@ -84,6 +85,6 @@ func (c *objectCommitNode) Generation() uint64 { return math.MaxUint64 } -func (c *objectCommitNode) Commit() (*Commit, error) { +func (c *objectCommitNode) Commit() (*object.Commit, error) { return c.commit, nil } diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitgraph/commitnode_test.go similarity index 96% rename from plumbing/object/commitnode_test.go rename to plumbing/object/commitgraph/commitnode_test.go index a295b8b49..2a339c85a 100644 --- a/plumbing/object/commitnode_test.go +++ b/plumbing/object/commitgraph/commitnode_test.go @@ -1,4 +1,4 @@ -package object +package commitgraph import ( "path" diff --git a/plumbing/object/commitnode_walker_ctime.go b/plumbing/object/commitgraph/commitnode_walker_ctime.go similarity index 94% rename from plumbing/object/commitnode_walker_ctime.go rename to plumbing/object/commitgraph/commitnode_walker_ctime.go index e55b4adbc..f6a1b6a4e 100644 --- a/plumbing/object/commitnode_walker_ctime.go +++ b/plumbing/object/commitgraph/commitnode_walker_ctime.go @@ -1,4 +1,4 @@ -package object +package commitgraph import ( "io" From 940460f5422b02f01351396af703e9b63e8596ae Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Fri, 3 May 2019 12:13:15 +0200 Subject: [PATCH 12/17] Fix object/commitgraph tests Signed-off-by: Filip Navara --- plumbing/object/commitgraph/commitnode_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plumbing/object/commitgraph/commitnode_test.go b/plumbing/object/commitgraph/commitnode_test.go index 2a339c85a..954f873ab 100644 --- a/plumbing/object/commitgraph/commitnode_test.go +++ b/plumbing/object/commitgraph/commitnode_test.go @@ -2,6 +2,7 @@ package commitgraph import ( "path" + "testing" . "gopkg.in/check.v1" fixtures "gopkg.in/src-d/go-git-fixtures.v3" @@ -12,6 +13,8 @@ import ( "gopkg.in/src-d/go-git.v4/storage/filesystem" ) +func Test(t *testing.T) { TestingT(t) } + type CommitNodeSuite struct { fixtures.Suite } From 7d2695741f4d1f572a36f7225b6bbb2f569d59d7 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 7 May 2019 10:49:39 +0200 Subject: [PATCH 13/17] Add doc.go for commitgraph packages Signed-off-by: Filip Navara --- plumbing/format/commitgraph/doc.go | 103 +++++++++++++++++++++++++ plumbing/format/commitgraph/encoder.go | 1 + plumbing/format/commitgraph/memory.go | 2 + plumbing/object/commitgraph/doc.go | 7 ++ 4 files changed, 113 insertions(+) create mode 100644 plumbing/format/commitgraph/doc.go create mode 100644 plumbing/object/commitgraph/doc.go diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go new file mode 100644 index 000000000..41cd8b1e3 --- /dev/null +++ b/plumbing/format/commitgraph/doc.go @@ -0,0 +1,103 @@ +// Package commitgraph implements encoding and decoding of commit-graph files. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package commitgraph diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index 648153f05..a06871cb7 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -22,6 +22,7 @@ func NewEncoder(w io.Writer) *Encoder { return &Encoder{mw, h} } +// Encode writes an index into the commit-graph file func (e *Encoder) Encode(idx Index) error { var err error diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go index f084b85cb..a4a96e961 100644 --- a/plumbing/format/commitgraph/memory.go +++ b/plumbing/format/commitgraph/memory.go @@ -4,6 +4,8 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" ) +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. type MemoryIndex struct { commitData []*CommitData indexMap map[plumbing.Hash]int diff --git a/plumbing/object/commitgraph/doc.go b/plumbing/object/commitgraph/doc.go new file mode 100644 index 000000000..0a55ad5b0 --- /dev/null +++ b/plumbing/object/commitgraph/doc.go @@ -0,0 +1,7 @@ +// Package commitgraph provides an interface for efficient traversal over Git +// commit graph either through the regular object storage, or optionally with +// the index stored in commit-graph file (Git 2.18+). +// +// The API and functionality of this package are considered EXPERIMENTAL and is +// not considered stable nor production ready. +package commitgraph From 944861a8dfe85938e84007f399e8ed94acbf5d68 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 7 May 2019 11:33:25 +0200 Subject: [PATCH 14/17] Add example for commit-graph traversal Signed-off-by: Filip Navara --- _examples/ls/main.go | 268 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 _examples/ls/main.go diff --git a/_examples/ls/main.go b/_examples/ls/main.go new file mode 100644 index 000000000..bbd40d524 --- /dev/null +++ b/_examples/ls/main.go @@ -0,0 +1,268 @@ +package main + +import ( + "fmt" + "io" + "os" + "path" + "strings" + + "github.com/emirpasic/gods/trees/binaryheap" + "gopkg.in/src-d/go-git.v4" + . "gopkg.in/src-d/go-git.v4/_examples" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + commitgraph_fmt "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + + "gopkg.in/src-d/go-billy.v4" + "gopkg.in/src-d/go-billy.v4/osfs" +) + +// Example how to resolve a revision into its commit counterpart +func main() { + CheckArgs("", "", "") + + path := os.Args[1] + revision := os.Args[2] + treePath := os.Args[3] + + // We instantiate a new repository targeting the given path (the .git folder) + fs := osfs.New(path) + s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{KeepDescriptors: true}) + r, err := git.Open(s, fs) + CheckIfError(err) + + // Resolve revision into a sha1 commit, only some revisions are resolved + // look at the doc to get more details + Info("git rev-parse %s", revision) + + h, err := r.ResolveRevision(plumbing.Revision(revision)) + CheckIfError(err) + + commit, err := r.CommitObject(*h) + CheckIfError(err) + + tree, err := commit.Tree() + CheckIfError(err) + if treePath != "" { + tree, err = tree.Tree(treePath) + CheckIfError(err) + } + + var paths []string + for _, entry := range tree.Entries { + paths = append(paths, entry.Name) + } + + commitNodeIndex, file := getCommitNodeIndex(r, fs) + if file != nil { + defer file.Close() + } + + commitNode, err := commitNodeIndex.Get(*h) + CheckIfError(err) + + revs, err := getLastCommitForPaths(commitNode, treePath, paths) + CheckIfError(err) + for path, rev := range revs { + // Print one line per file (name hash message) + hash := rev.Hash.String() + line := strings.Split(rev.Message, "\n") + fmt.Println(path, hash[:7], line[0]) + } + + s.Close() +} + +func getCommitNodeIndex(r *git.Repository, fs billy.Filesystem) (commitgraph.CommitNodeIndex, io.ReadCloser) { + file, err := fs.Open(path.Join("objects", "info", "commit-graph")) + if err == nil { + index, err := commitgraph_fmt.OpenFileIndex(file) + if err == nil { + return commitgraph.NewGraphCommitNodeIndex(index, r.Storer), file + } + file.Close() + } + + return commitgraph.NewObjectCommitNodeIndex(r.Storer), nil +} + +type commitAndPaths struct { + commit commitgraph.CommitNode + // Paths that are still on the branch represented by commit + paths []string + // Set of hashes for the paths + hashes map[string]plumbing.Hash +} + +func getCommitTree(c commitgraph.CommitNode, treePath string) (*object.Tree, error) { + tree, err := c.Tree() + if err != nil { + return nil, err + } + + // Optimize deep traversals by focusing only on the specific tree + if treePath != "" { + tree, err = tree.Tree(treePath) + if err != nil { + return nil, err + } + } + + return tree, nil +} + +func getFullPath(treePath, path string) string { + if treePath != "" { + if path != "" { + return treePath + "/" + path + } + return treePath + } + return path +} + +func getFileHashes(c commitgraph.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { + tree, err := getCommitTree(c, treePath) + if err == object.ErrDirectoryNotFound { + // The whole tree didn't exist, so return empty map + return make(map[string]plumbing.Hash), nil + } + if err != nil { + return nil, err + } + + hashes := make(map[string]plumbing.Hash) + for _, path := range paths { + if path != "" { + entry, err := tree.FindEntry(path) + if err == nil { + hashes[path] = entry.Hash + } + } else { + hashes[path] = tree.Hash + } + } + + return hashes, nil +} + +func getLastCommitForPaths(c commitgraph.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { + // We do a tree traversal with nodes sorted by commit time + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { + return 1 + } + return -1 + }) + + resultNodes := make(map[string]commitgraph.CommitNode) + initialHashes, err := getFileHashes(c, treePath, paths) + if err != nil { + return nil, err + } + + // Start search from the root commit and with full set of paths + heap.Push(&commitAndPaths{c, paths, initialHashes}) + + for { + cIn, ok := heap.Pop() + if !ok { + break + } + current := cIn.(*commitAndPaths) + + // Load the parent commits for the one we are currently examining + numParents := current.commit.NumParents() + var parents []commitgraph.CommitNode + for i := 0; i < numParents; i++ { + parent, err := current.commit.ParentNode(i) + if err != nil { + break + } + parents = append(parents, parent) + } + + // Examine the current commit and set of interesting paths + pathUnchanged := make([]bool, len(current.paths)) + parentHashes := make([]map[string]plumbing.Hash, len(parents)) + for j, parent := range parents { + parentHashes[j], err = getFileHashes(parent, treePath, current.paths) + if err != nil { + break + } + + for i, path := range current.paths { + if parentHashes[j][path] == current.hashes[path] { + pathUnchanged[i] = true + } + } + } + + var remainingPaths []string + for i, path := range current.paths { + // The results could already contain some newer change for the same path, + // so don't override that and bail out on the file early. + if resultNodes[path] == nil { + if pathUnchanged[i] { + // The path existed with the same hash in at least one parent so it could + // not have been changed in this commit directly. + remainingPaths = append(remainingPaths, path) + } else { + // There are few possible cases how can we get here: + // - The path didn't exist in any parent, so it must have been created by + // this commit. + // - The path did exist in the parent commit, but the hash of the file has + // changed. + // - We are looking at a merge commit and the hash of the file doesn't + // match any of the hashes being merged. This is more common for directories, + // but it can also happen if a file is changed through conflict resolution. + resultNodes[path] = current.commit + } + } + } + + if len(remainingPaths) > 0 { + // Add the parent nodes along with remaining paths to the heap for further + // processing. + for j, parent := range parents { + // Combine remainingPath with paths available on the parent branch + // and make union of them + remainingPathsForParent := make([]string, 0, len(remainingPaths)) + newRemainingPaths := make([]string, 0, len(remainingPaths)) + for _, path := range remainingPaths { + if parentHashes[j][path] == current.hashes[path] { + remainingPathsForParent = append(remainingPathsForParent, path) + } else { + newRemainingPaths = append(newRemainingPaths, path) + } + } + + if remainingPathsForParent != nil { + heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]}) + } + + if len(newRemainingPaths) == 0 { + break + } else { + remainingPaths = newRemainingPaths + } + } + } + } + + // Post-processing + result := make(map[string]*object.Commit) + for path, commitNode := range resultNodes { + var err error + result[path], err = commitNode.Commit() + if err != nil { + return nil, err + } + } + + return result, nil +} From d8471a32b6c3ab4521ee47db4c7f7b78f0a8363b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 7 May 2019 11:48:52 +0200 Subject: [PATCH 15/17] Add test parameters for ls example Signed-off-by: Filip Navara --- _examples/common_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/_examples/common_test.go b/_examples/common_test.go index aa7c9b446..47463a1cd 100644 --- a/_examples/common_test.go +++ b/_examples/common_test.go @@ -28,6 +28,7 @@ var args = map[string][]string{ "showcase": {defaultURL, tempFolder()}, "tag": {cloneRepository(defaultURL, tempFolder())}, "pull": {createRepositoryWithRemote(tempFolder(), defaultURL)}, + "ls": {cloneRepository(defaultURL, tempFolder()), "HEAD", "vendor"}, } var ignored = map[string]bool{} From 0073a49b9575c2e64676485ca5126641510c6db5 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 7 May 2019 12:26:55 +0200 Subject: [PATCH 16/17] Allow non-.git path for ls example Signed-off-by: Filip Navara --- _examples/ls/main.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/_examples/ls/main.go b/_examples/ls/main.go index bbd40d524..bb686f1e6 100644 --- a/_examples/ls/main.go +++ b/_examples/ls/main.go @@ -31,9 +31,15 @@ func main() { // We instantiate a new repository targeting the given path (the .git folder) fs := osfs.New(path) + if _, err := fs.Stat(git.GitDirName); err == nil { + fs, err = fs.Chroot(git.GitDirName) + CheckIfError(err) + } + s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{KeepDescriptors: true}) r, err := git.Open(s, fs) CheckIfError(err) + defer s.Close() // Resolve revision into a sha1 commit, only some revisions are resolved // look at the doc to get more details @@ -73,8 +79,6 @@ func main() { line := strings.Split(rev.Message, "\n") fmt.Println(path, hash[:7], line[0]) } - - s.Close() } func getCommitNodeIndex(r *git.Repository, fs billy.Filesystem) (commitgraph.CommitNodeIndex, io.ReadCloser) { From d2596b8d7fe07aecf83b5377c527f5d8999f7d16 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 7 May 2019 12:31:24 +0200 Subject: [PATCH 17/17] Remove unnecessary mmap usage from tests Signed-off-by: Filip Navara --- plumbing/format/commitgraph/commitgraph_test.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/plumbing/format/commitgraph/commitgraph_test.go b/plumbing/format/commitgraph/commitgraph_test.go index 0e3870726..0214f49fd 100644 --- a/plumbing/format/commitgraph/commitgraph_test.go +++ b/plumbing/format/commitgraph/commitgraph_test.go @@ -6,10 +6,8 @@ import ( "path" "testing" - "golang.org/x/exp/mmap" - . "gopkg.in/check.v1" - "gopkg.in/src-d/go-git-fixtures.v3" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" ) @@ -23,7 +21,7 @@ type CommitgraphSuite struct { var _ = Suite(&CommitgraphSuite{}) func testDecodeHelper(c *C, path string) { - reader, err := mmap.Open(path) + reader, err := os.Open(path) c.Assert(err, IsNil) defer reader.Close() index, err := commitgraph.OpenFileIndex(reader) @@ -85,7 +83,7 @@ func (s *CommitgraphSuite) TestReencode(c *C) { fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { dotgit := f.DotGit() - reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + reader, err := os.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) c.Assert(err, IsNil) defer reader.Close() index, err := commitgraph.OpenFileIndex(reader) @@ -108,7 +106,7 @@ func (s *CommitgraphSuite) TestReencodeInMemory(c *C) { fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { dotgit := f.DotGit() - reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + reader, err := os.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) c.Assert(err, IsNil) index, err := commitgraph.OpenFileIndex(reader) c.Assert(err, IsNil)