This repository was archived by the owner on Sep 11, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 534
storage/repository: add new functions for garbage collection #669
Merged
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
026d7c4
filesystem: implement PackRefs()
strib ae2168c
dotgit: fix up PackRefs comment for upstreaming
strib 3447303
filesystem: todo comment about "all" param
strib d501611
dotgit: during rewriting, re-open packed-refs after locking
strib a6202ca
dotgit: use bufio for PackRefs
strib ac1914e
First pass of prune design
taruti 3f0b1ff
Address CI and move code around
taruti fae4389
Support for repacking objects
taruti d96582a
Make object repacking more configurable
taruti 9dcb096
Use Storer.Config pack window when repacking objects
taruti f28e447
Make prune object walker generic
taruti 2de4f03
Use object walker in repacking code
taruti aa092f5
plumbing: add `HasEncodedObject` method to Storer
strib b18457d
storage: some minor code cleanup
strib 4c15695
storer: separate loose and packed object mgmt into optional ifaces
strib 88acc31
repository: add tests for pruning and object re-packing
strib c2e6b5d
repository: oops, fix the prune test
strib 5a6cc4e
dotgit: open+lock packed-refs file until it doesn't change
strib d532648
dotgit: rewrite packed-refs while holding lock
strib File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package git | ||
|
||
import ( | ||
"fmt" | ||
|
||
"gopkg.in/src-d/go-git.v4/plumbing" | ||
"gopkg.in/src-d/go-git.v4/plumbing/filemode" | ||
"gopkg.in/src-d/go-git.v4/plumbing/object" | ||
"gopkg.in/src-d/go-git.v4/storage" | ||
) | ||
|
||
type objectWalker struct { | ||
Storer storage.Storer | ||
// seen is the set of objects seen in the repo. | ||
// seen map can become huge if walking over large | ||
// repos. Thus using struct{} as the value type. | ||
seen map[plumbing.Hash]struct{} | ||
} | ||
|
||
func newObjectWalker(s storage.Storer) *objectWalker { | ||
return &objectWalker{s, map[plumbing.Hash]struct{}{}} | ||
} | ||
|
||
// walkAllRefs walks all (hash) refererences from the repo. | ||
func (p *objectWalker) walkAllRefs() error { | ||
// Walk over all the references in the repo. | ||
it, err := p.Storer.IterReferences() | ||
if err != nil { | ||
return err | ||
} | ||
defer it.Close() | ||
err = it.ForEach(func(ref *plumbing.Reference) error { | ||
// Exit this iteration early for non-hash references. | ||
if ref.Type() != plumbing.HashReference { | ||
return nil | ||
} | ||
return p.walkObjectTree(ref.Hash()) | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func (p *objectWalker) isSeen(hash plumbing.Hash) bool { | ||
_, seen := p.seen[hash] | ||
return seen | ||
} | ||
|
||
func (p *objectWalker) add(hash plumbing.Hash) { | ||
p.seen[hash] = struct{}{} | ||
} | ||
|
||
// walkObjectTree walks over all objects and remembers references | ||
// to them in the objectWalker. This is used instead of the revlist | ||
// walks because memory usage is tight with huge repos. | ||
func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error { | ||
// Check if we have already seen, and mark this object | ||
if p.isSeen(hash) { | ||
return nil | ||
} | ||
p.add(hash) | ||
// Fetch the object. | ||
obj, err := object.GetObject(p.Storer, hash) | ||
if err != nil { | ||
return fmt.Errorf("Getting object %s failed: %v", hash, err) | ||
} | ||
// Walk all children depending on object type. | ||
switch obj := obj.(type) { | ||
case *object.Commit: | ||
err = p.walkObjectTree(obj.TreeHash) | ||
if err != nil { | ||
return err | ||
} | ||
for _, h := range obj.ParentHashes { | ||
err = p.walkObjectTree(h) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
case *object.Tree: | ||
for i := range obj.Entries { | ||
// Shortcut for blob objects: | ||
// 'or' the lower bits of a mode and check that it | ||
// it matches a filemode.Executable. The type information | ||
// is in the higher bits, but this is the cleanest way | ||
// to handle plain files with different modes. | ||
// Other non-tree objects are somewhat rare, so they | ||
// are not special-cased. | ||
if obj.Entries[i].Mode|0755 == filemode.Executable { | ||
p.add(obj.Entries[i].Hash) | ||
continue | ||
} | ||
// Normal walk for sub-trees (and symlinks etc). | ||
err = p.walkObjectTree(obj.Entries[i].Hash) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
default: | ||
// Error out on unhandled object types. | ||
return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) | ||
} | ||
return nil | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package git | ||
|
||
import ( | ||
"errors" | ||
"time" | ||
|
||
"gopkg.in/src-d/go-git.v4/plumbing" | ||
"gopkg.in/src-d/go-git.v4/plumbing/storer" | ||
) | ||
|
||
type PruneHandler func(unreferencedObjectHash plumbing.Hash) error | ||
type PruneOptions struct { | ||
// OnlyObjectsOlderThan if set to non-zero value | ||
// selects only objects older than the time provided. | ||
OnlyObjectsOlderThan time.Time | ||
// Handler is called on matching objects | ||
Handler PruneHandler | ||
} | ||
|
||
var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported") | ||
|
||
// DeleteObject deletes an object from a repository. | ||
// The type conveniently matches PruneHandler. | ||
func (r *Repository) DeleteObject(hash plumbing.Hash) error { | ||
los, ok := r.Storer.(storer.LooseObjectStorer) | ||
if !ok { | ||
return ErrLooseObjectsNotSupported | ||
} | ||
|
||
return los.DeleteLooseObject(hash) | ||
} | ||
|
||
func (r *Repository) Prune(opt PruneOptions) error { | ||
los, ok := r.Storer.(storer.LooseObjectStorer) | ||
if !ok { | ||
return ErrLooseObjectsNotSupported | ||
} | ||
|
||
pw := newObjectWalker(r.Storer) | ||
err := pw.walkAllRefs() | ||
if err != nil { | ||
return err | ||
} | ||
// Now walk all (loose) objects in storage. | ||
return los.ForEachObjectHash(func(hash plumbing.Hash) error { | ||
// Get out if we have seen this object. | ||
if pw.isSeen(hash) { | ||
return nil | ||
} | ||
// Otherwise it is a candidate for pruning. | ||
// Check out for too new objects next. | ||
if opt.OnlyObjectsOlderThan != (time.Time{}) { | ||
// Errors here are non-fatal. The object may be e.g. packed. | ||
// Or concurrently deleted. Skip such objects. | ||
t, err := los.LooseObjectTime(hash) | ||
if err != nil { | ||
return nil | ||
} | ||
// Skip too new objects. | ||
if !t.Before(opt.OnlyObjectsOlderThan) { | ||
return nil | ||
} | ||
} | ||
return opt.Handler(hash) | ||
}) | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package git | ||
|
||
import ( | ||
"time" | ||
|
||
"gopkg.in/src-d/go-git.v4/plumbing" | ||
"gopkg.in/src-d/go-git.v4/plumbing/storer" | ||
"gopkg.in/src-d/go-git.v4/storage" | ||
"gopkg.in/src-d/go-git.v4/storage/filesystem" | ||
|
||
. "gopkg.in/check.v1" | ||
"gopkg.in/src-d/go-git-fixtures.v3" | ||
) | ||
|
||
type PruneSuite struct { | ||
BaseSuite | ||
} | ||
|
||
var _ = Suite(&PruneSuite{}) | ||
|
||
func (s *PruneSuite) testPrune(c *C, deleteTime time.Time) { | ||
srcFs := fixtures.ByTag("unpacked").One().DotGit() | ||
var sto storage.Storer | ||
var err error | ||
sto, err = filesystem.NewStorage(srcFs) | ||
c.Assert(err, IsNil) | ||
|
||
los := sto.(storer.LooseObjectStorer) | ||
c.Assert(los, NotNil) | ||
|
||
count := 0 | ||
err = los.ForEachObjectHash(func(_ plumbing.Hash) error { | ||
count++ | ||
return nil | ||
}) | ||
c.Assert(err, IsNil) | ||
|
||
r, err := Open(sto, srcFs) | ||
c.Assert(err, IsNil) | ||
c.Assert(r, NotNil) | ||
|
||
// Remove a branch so we can prune some objects. | ||
err = sto.RemoveReference(plumbing.ReferenceName("refs/heads/v4")) | ||
c.Assert(err, IsNil) | ||
err = sto.RemoveReference(plumbing.ReferenceName("refs/remotes/origin/v4")) | ||
c.Assert(err, IsNil) | ||
|
||
err = r.Prune(PruneOptions{ | ||
OnlyObjectsOlderThan: deleteTime, | ||
Handler: r.DeleteObject, | ||
}) | ||
c.Assert(err, IsNil) | ||
|
||
newCount := 0 | ||
err = los.ForEachObjectHash(func(_ plumbing.Hash) error { | ||
newCount++ | ||
return nil | ||
}) | ||
if deleteTime.IsZero() { | ||
c.Assert(newCount < count, Equals, true) | ||
} else { | ||
// Assume a delete time older than any of the objects was passed in. | ||
c.Assert(newCount, Equals, count) | ||
} | ||
} | ||
|
||
func (s *PruneSuite) TestPrune(c *C) { | ||
s.testPrune(c, time.Time{}) | ||
} | ||
|
||
func (s *PruneSuite) TestPruneWithNoDelete(c *C) { | ||
s.testPrune(c, time.Unix(0, 1)) | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For avoid force the implementation of complex storers, I rather add this methods to a new interface, like,
LooseObjectStorer
or similar where includes all the related methods, and check if the current storer implement it, and if not, return a not supported error.Similar to
Transactioner
interface.