Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

storage/repository: add new functions for garbage collection #669

Merged
merged 19 commits into from
Nov 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions object_walker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package git

import (
"fmt"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/storage"
)

type objectWalker struct {
Storer storage.Storer
// seen is the set of objects seen in the repo.
// seen map can become huge if walking over large
// repos. Thus using struct{} as the value type.
seen map[plumbing.Hash]struct{}
}

func newObjectWalker(s storage.Storer) *objectWalker {
return &objectWalker{s, map[plumbing.Hash]struct{}{}}
}

// walkAllRefs walks all (hash) refererences from the repo.
func (p *objectWalker) walkAllRefs() error {
// Walk over all the references in the repo.
it, err := p.Storer.IterReferences()
if err != nil {
return err
}
defer it.Close()
err = it.ForEach(func(ref *plumbing.Reference) error {
// Exit this iteration early for non-hash references.
if ref.Type() != plumbing.HashReference {
return nil
}
return p.walkObjectTree(ref.Hash())
})
if err != nil {
return err
}
return nil
}

func (p *objectWalker) isSeen(hash plumbing.Hash) bool {
_, seen := p.seen[hash]
return seen
}

func (p *objectWalker) add(hash plumbing.Hash) {
p.seen[hash] = struct{}{}
}

// walkObjectTree walks over all objects and remembers references
// to them in the objectWalker. This is used instead of the revlist
// walks because memory usage is tight with huge repos.
func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error {
// Check if we have already seen, and mark this object
if p.isSeen(hash) {
return nil
}
p.add(hash)
// Fetch the object.
obj, err := object.GetObject(p.Storer, hash)
if err != nil {
return fmt.Errorf("Getting object %s failed: %v", hash, err)
}
// Walk all children depending on object type.
switch obj := obj.(type) {
case *object.Commit:
err = p.walkObjectTree(obj.TreeHash)
if err != nil {
return err
}
for _, h := range obj.ParentHashes {
err = p.walkObjectTree(h)
if err != nil {
return err
}
}
case *object.Tree:
for i := range obj.Entries {
// Shortcut for blob objects:
// 'or' the lower bits of a mode and check that it
// it matches a filemode.Executable. The type information
// is in the higher bits, but this is the cleanest way
// to handle plain files with different modes.
// Other non-tree objects are somewhat rare, so they
// are not special-cased.
if obj.Entries[i].Mode|0755 == filemode.Executable {
p.add(obj.Entries[i].Hash)
continue
}
// Normal walk for sub-trees (and symlinks etc).
err = p.walkObjectTree(obj.Entries[i].Hash)
if err != nil {
return err
}
}
default:
// Error out on unhandled object types.
return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
}
return nil
}
32 changes: 32 additions & 0 deletions plumbing/storer/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package storer
import (
"errors"
"io"
"time"

"gopkg.in/src-d/go-git.v4/plumbing"
)
Expand Down Expand Up @@ -36,6 +37,9 @@ type EncodedObjectStorer interface {
//
// Valid plumbing.ObjectType values are CommitObject, BlobObject, TagObject,
IterEncodedObjects(plumbing.ObjectType) (EncodedObjectIter, error)
// HasEncodedObject returns ErrObjNotFound if the object doesn't
// exist. If the object does exist, it returns nil.
HasEncodedObject(plumbing.Hash) error
}

// DeltaObjectStorer is an EncodedObjectStorer that can return delta
Expand All @@ -53,6 +57,34 @@ type Transactioner interface {
Begin() Transaction
}

// LooseObjectStorer is an optional interface for managing "loose"
// objects, i.e. those not in packfiles.
type LooseObjectStorer interface {
// ForEachObjectHash iterates over all the (loose) object hashes
// in the repository without necessarily having to read those objects.
// Objects only inside pack files may be omitted.
// If ErrStop is sent the iteration is stop but no error is returned.
ForEachObjectHash(func(plumbing.Hash) error) error
// LooseObjectTime looks up the (m)time associated with the
// loose object (that is not in a pack file). Some
// implementations (e.g. without loose objects)
// always return an error.
LooseObjectTime(plumbing.Hash) (time.Time, error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For avoid force the implementation of complex storers, I rather add this methods to a new interface, like,
LooseObjectStorer or similar where includes all the related methods, and check if the current storer implement it, and if not, return a not supported error.

Similar to Transactioner interface.

// DeleteLooseObject deletes a loose object if it exists.
DeleteLooseObject(plumbing.Hash) error
}

// PackedObjectStorer is an optional interface for managing objects in
// packfiles.
type PackedObjectStorer interface {
// ObjectPacks returns hashes of object packs if the underlying
// implementation has pack files.
ObjectPacks() ([]plumbing.Hash, error)
// DeleteOldObjectPackAndIndex deletes an object pack and the corresponding index file if they exist.
// Deletion is only performed if the pack is older than the supplied time (or the time is zero).
DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error
}

// PackfileWriter is a optional method for ObjectStorer, it enable direct write
// of packfile to the storage
type PackfileWriter interface {
Expand Down
9 changes: 9 additions & 0 deletions plumbing/storer/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,15 @@ func (o *MockObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbi
return plumbing.ZeroHash, nil
}

func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error {
for _, o := range o.db {
if o.Hash() == h {
return nil
}
}
return plumbing.ErrObjectNotFound
}

func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) {
for _, o := range o.db {
if o.Hash() == h {
Expand Down
2 changes: 2 additions & 0 deletions plumbing/storer/reference.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ type ReferenceStorer interface {
Reference(plumbing.ReferenceName) (*plumbing.Reference, error)
IterReferences() (ReferenceIter, error)
RemoveReference(plumbing.ReferenceName) error
CountLooseRefs() (int, error)
PackRefs() error
}

// ReferenceIter is a generic closable interface for iterating over references.
Expand Down
66 changes: 66 additions & 0 deletions prune.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package git

import (
"errors"
"time"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)

type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
type PruneOptions struct {
// OnlyObjectsOlderThan if set to non-zero value
// selects only objects older than the time provided.
OnlyObjectsOlderThan time.Time
// Handler is called on matching objects
Handler PruneHandler
}

var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported")

// DeleteObject deletes an object from a repository.
// The type conveniently matches PruneHandler.
func (r *Repository) DeleteObject(hash plumbing.Hash) error {
los, ok := r.Storer.(storer.LooseObjectStorer)
if !ok {
return ErrLooseObjectsNotSupported
}

return los.DeleteLooseObject(hash)
}

func (r *Repository) Prune(opt PruneOptions) error {
los, ok := r.Storer.(storer.LooseObjectStorer)
if !ok {
return ErrLooseObjectsNotSupported
}

pw := newObjectWalker(r.Storer)
err := pw.walkAllRefs()
if err != nil {
return err
}
// Now walk all (loose) objects in storage.
return los.ForEachObjectHash(func(hash plumbing.Hash) error {
// Get out if we have seen this object.
if pw.isSeen(hash) {
return nil
}
// Otherwise it is a candidate for pruning.
// Check out for too new objects next.
if opt.OnlyObjectsOlderThan != (time.Time{}) {
// Errors here are non-fatal. The object may be e.g. packed.
// Or concurrently deleted. Skip such objects.
t, err := los.LooseObjectTime(hash)
if err != nil {
return nil
}
// Skip too new objects.
if !t.Before(opt.OnlyObjectsOlderThan) {
return nil
}
}
return opt.Handler(hash)
})
}
73 changes: 73 additions & 0 deletions prune_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package git

import (
"time"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/storage"
"gopkg.in/src-d/go-git.v4/storage/filesystem"

. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
)

type PruneSuite struct {
BaseSuite
}

var _ = Suite(&PruneSuite{})

func (s *PruneSuite) testPrune(c *C, deleteTime time.Time) {
srcFs := fixtures.ByTag("unpacked").One().DotGit()
var sto storage.Storer
var err error
sto, err = filesystem.NewStorage(srcFs)
c.Assert(err, IsNil)

los := sto.(storer.LooseObjectStorer)
c.Assert(los, NotNil)

count := 0
err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
count++
return nil
})
c.Assert(err, IsNil)

r, err := Open(sto, srcFs)
c.Assert(err, IsNil)
c.Assert(r, NotNil)

// Remove a branch so we can prune some objects.
err = sto.RemoveReference(plumbing.ReferenceName("refs/heads/v4"))
c.Assert(err, IsNil)
err = sto.RemoveReference(plumbing.ReferenceName("refs/remotes/origin/v4"))
c.Assert(err, IsNil)

err = r.Prune(PruneOptions{
OnlyObjectsOlderThan: deleteTime,
Handler: r.DeleteObject,
})
c.Assert(err, IsNil)

newCount := 0
err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
newCount++
return nil
})
if deleteTime.IsZero() {
c.Assert(newCount < count, Equals, true)
} else {
// Assume a delete time older than any of the objects was passed in.
c.Assert(newCount, Equals, count)
}
}

func (s *PruneSuite) TestPrune(c *C) {
s.testPrune(c, time.Time{})
}

func (s *PruneSuite) TestPruneWithNoDelete(c *C) {
s.testPrune(c, time.Unix(0, 1))
}
Loading