From 026d7c48163a9d246820c84693673a13f42f9145 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Tue, 31 Oct 2017 15:15:58 -0700 Subject: [PATCH 01/19] filesystem: implement PackRefs() Currently this implementation is only valid for kbfsgit, since it assumes some things about the filesystem not being updated during the packing, and about conflict resolution rules. In the future, it would be nice to replace this with a more general one, and move this kbfsgit-optimized implementation into kbfsgit. Issue: KBFS-2517 --- plumbing/storer/reference.go | 2 + storage/filesystem/internal/dotgit/dotgit.go | 115 ++++++++++++++++++ .../filesystem/internal/dotgit/dotgit_test.go | 72 +++++++++++ storage/filesystem/reference.go | 8 ++ storage/memory/storage.go | 8 ++ 5 files changed, 205 insertions(+) diff --git a/plumbing/storer/reference.go b/plumbing/storer/reference.go index ae80a39d9..5e85a3be4 100644 --- a/plumbing/storer/reference.go +++ b/plumbing/storer/reference.go @@ -24,6 +24,8 @@ type ReferenceStorer interface { Reference(plumbing.ReferenceName) (*plumbing.Reference, error) IterReferences() (ReferenceIter, error) RemoveReference(plumbing.ReferenceName) error + CountLooseRefs() (int, error) + PackRefs() error } // ReferenceIter is a generic closable interface for iterating over references. diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 1cb97bd50..29e2525fc 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -576,6 +576,121 @@ func (d *DotGit) readReferenceFile(path, name string) (ref *plumbing.Reference, return d.readReferenceFrom(f, name) } +func (d *DotGit) CountLooseRefs() (int, error) { + var refs []*plumbing.Reference + var seen = make(map[plumbing.ReferenceName]bool) + if err := d.addRefsFromRefDir(&refs, seen); err != nil { + return 0, err + } + + return len(refs), nil +} + +// PackRefs packs all loose refs into the packed-refs file. +// +// This implementation only works under the assumption that the view +// of the file system won't be updated during this operation, which is +// true for kbfsgit after the Lock() operation is complete (and before +// the Unlock()/Close() of the locked file). If another process +// concurrently updates one of the loose refs we delete, then KBFS +// conflict resolution would just end up ignoring our delete. Also +// note that deleting a ref requires locking packed-refs, so a ref +// deleted by the user shouldn't be revived by ref-packing. +// +// The strategy would not work on a general file system though, +// without locking each loose reference and checking it again before +// deleting the file, because otherwise an updated reference could +// sneak in and then be deleted by the packed-refs process. +// Alternatively, every ref update could also lock packed-refs, so +// only one lock is required during ref-packing. But that would +// worsen performance in the common case. +// +// TODO: before trying to get this merged upstream, move it into a +// custom kbfsgit Storer implementation, and rewrite this function to +// work correctly on a general filesystem. +func (d *DotGit) PackRefs() (err error) { + // Lock packed-refs, and create it if it doesn't exist yet. + f, err := d.fs.Open(packedRefsPath) + if err != nil { + if os.IsNotExist(err) { + f, err = d.fs.Create(packedRefsPath) + if err != nil { + return err + } + } else { + return err + } + } + defer ioutil.CheckClose(f, &err) + + err = f.Lock() + if err != nil { + return err + } + + // Gather all refs using addRefsFromRefDir and addRefsFromPackedRefs. + var refs []*plumbing.Reference + var seen = make(map[plumbing.ReferenceName]bool) + if err := d.addRefsFromRefDir(&refs, seen); err != nil { + return err + } + if len(refs) == 0 { + // Nothing to do! + return nil + } + numLooseRefs := len(refs) + if err := d.addRefsFromPackedRefs(&refs, seen); err != nil { + return err + } + + // Write them all to a new temp packed-refs file. + tmp, err := d.fs.TempFile("", tmpPackedRefsPrefix) + if err != nil { + return err + } + doCloseTmp := true + defer func() { + if doCloseTmp { + ioutil.CheckClose(tmp, &err) + } + }() + for _, ref := range refs { + _, err := tmp.Write([]byte(ref.String() + "\n")) + if err != nil { + return err + } + } + + // Rename the temp packed-refs file. + doCloseTmp = false + if err := tmp.Close(); err != nil { + return err + } + err = d.fs.Rename(tmp.Name(), packedRefsPath) + if err != nil { + return err + } + + // Delete all the loose refs, while still holding the packed-refs + // lock. + for _, ref := range refs[:numLooseRefs] { + path := d.fs.Join(".", ref.Name().String()) + err = d.fs.Remove(path) + if err != nil && !os.IsNotExist(err) { + return err + } + } + + // Update packed-refs cache. + d.cachedPackedRefs = make(refCache) + for _, ref := range refs { + d.cachedPackedRefs[ref.Name()] = ref + } + d.packedRefsLastMod = time.Now() + + return nil +} + // Module return a billy.Filesystem poiting to the module folder func (d *DotGit) Module(name string) (billy.Filesystem, error) { return d.fs.Chroot(d.fs.Join(modulePath, name)) diff --git a/storage/filesystem/internal/dotgit/dotgit_test.go b/storage/filesystem/internal/dotgit/dotgit_test.go index 446a2049e..0ed9ec574 100644 --- a/storage/filesystem/internal/dotgit/dotgit_test.go +++ b/storage/filesystem/internal/dotgit/dotgit_test.go @@ -543,3 +543,75 @@ func (s *SuiteDotGit) TestSubmodules(c *C) { c.Assert(err, IsNil) c.Assert(strings.HasSuffix(m.Root(), m.Join(".git", "modules", "basic")), Equals, true) } + +func (s *SuiteDotGit) TestPackRefs(c *C) { + tmp, err := ioutil.TempDir("", "dot-git") + c.Assert(err, IsNil) + defer os.RemoveAll(tmp) + + fs := osfs.New(tmp) + dir := New(fs) + + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/foo", + "e8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/bar", + "a8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + + refs, err := dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err := dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 2) + + err = dir.PackRefs() + c.Assert(err, IsNil) + + // Make sure the refs are still there, but no longer loose. + refs, err = dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 0) + + ref, err := dir.Ref("refs/heads/foo") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "e8d3ffab552895c19b9fcf7aa264d277cde33881") + ref, err = dir.Ref("refs/heads/bar") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "a8d3ffab552895c19b9fcf7aa264d277cde33881") + + // Now update one of them, re-pack, and check again. + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/foo", + "b8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 1) + err = dir.PackRefs() + c.Assert(err, IsNil) + + // Make sure the refs are still there, but no longer loose. + refs, err = dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 0) + + ref, err = dir.Ref("refs/heads/foo") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "b8d3ffab552895c19b9fcf7aa264d277cde33881") +} diff --git a/storage/filesystem/reference.go b/storage/filesystem/reference.go index 54cdf560d..7313f05e8 100644 --- a/storage/filesystem/reference.go +++ b/storage/filesystem/reference.go @@ -34,3 +34,11 @@ func (r *ReferenceStorage) IterReferences() (storer.ReferenceIter, error) { func (r *ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error { return r.dir.RemoveRef(n) } + +func (r *ReferenceStorage) CountLooseRefs() (int, error) { + return r.dir.CountLooseRefs() +} + +func (r *ReferenceStorage) PackRefs() error { + return r.dir.PackRefs() +} diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 927ec41c3..3d4e84a1c 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -236,6 +236,14 @@ func (r ReferenceStorage) IterReferences() (storer.ReferenceIter, error) { return storer.NewReferenceSliceIter(refs), nil } +func (r ReferenceStorage) CountLooseRefs() (int, error) { + return len(r), nil +} + +func (r ReferenceStorage) PackRefs() error { + return nil +} + func (r ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error { delete(r, n) return nil From ae2168c749f04f61926bd40a375ead49e63bbf05 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 10:35:27 -0800 Subject: [PATCH 02/19] dotgit: fix up PackRefs comment for upstreaming --- storage/filesystem/internal/dotgit/dotgit.go | 34 +++++--------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 29e2525fc..6d56318df 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -589,25 +589,14 @@ func (d *DotGit) CountLooseRefs() (int, error) { // PackRefs packs all loose refs into the packed-refs file. // // This implementation only works under the assumption that the view -// of the file system won't be updated during this operation, which is -// true for kbfsgit after the Lock() operation is complete (and before -// the Unlock()/Close() of the locked file). If another process -// concurrently updates one of the loose refs we delete, then KBFS -// conflict resolution would just end up ignoring our delete. Also -// note that deleting a ref requires locking packed-refs, so a ref -// deleted by the user shouldn't be revived by ref-packing. -// -// The strategy would not work on a general file system though, -// without locking each loose reference and checking it again before -// deleting the file, because otherwise an updated reference could -// sneak in and then be deleted by the packed-refs process. -// Alternatively, every ref update could also lock packed-refs, so -// only one lock is required during ref-packing. But that would -// worsen performance in the common case. -// -// TODO: before trying to get this merged upstream, move it into a -// custom kbfsgit Storer implementation, and rewrite this function to -// work correctly on a general filesystem. +// of the file system won't be updated during this operation. This +// strategy would not work on a general file system though, without +// locking each loose reference and checking it again before deleting +// the file, because otherwise an updated reference could sneak in and +// then be deleted by the packed-refs process. Alternatively, every +// ref update could also lock packed-refs, so only one lock is +// required during ref-packing. But that would worsen performance in +// the common case. func (d *DotGit) PackRefs() (err error) { // Lock packed-refs, and create it if it doesn't exist yet. f, err := d.fs.Open(packedRefsPath) @@ -681,13 +670,6 @@ func (d *DotGit) PackRefs() (err error) { } } - // Update packed-refs cache. - d.cachedPackedRefs = make(refCache) - for _, ref := range refs { - d.cachedPackedRefs[ref.Name()] = ref - } - d.packedRefsLastMod = time.Now() - return nil } From 3447303ce76caa3a22e9f510fa00e73f5143267e Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 1 Nov 2017 12:01:36 -0700 Subject: [PATCH 03/19] filesystem: todo comment about "all" param Issue: KBFS-2517 --- storage/filesystem/internal/dotgit/dotgit.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 6d56318df..330f9d55c 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -597,6 +597,10 @@ func (d *DotGit) CountLooseRefs() (int, error) { // ref update could also lock packed-refs, so only one lock is // required during ref-packing. But that would worsen performance in // the common case. +// +// TODO: add an "all" boolean like the `git pack-refs --all` flag. +// When `all` is false, it would only pack refs that have already been +// packed, plus all tags. func (d *DotGit) PackRefs() (err error) { // Lock packed-refs, and create it if it doesn't exist yet. f, err := d.fs.Open(packedRefsPath) From d50161181583f7767038de58eff8ee204de7646d Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Thu, 2 Nov 2017 16:30:51 -0700 Subject: [PATCH 04/19] dotgit: during rewriting, re-open packed-refs after locking The file could have been completely replaced while waiting for the lock, so we need to re-open, otherwise we might be reading a stale file that has already been deleted/overwritten. --- storage/filesystem/internal/dotgit/dotgit.go | 30 ++++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 330f9d55c..8b1df3d04 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -421,18 +421,30 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e return err } - doCloseF := true - defer func() { - if doCloseF { - ioutil.CheckClose(f, &err) - } - }() + defer ioutil.CheckClose(f, &err) err = f.Lock() if err != nil { return err } + // Re-open the file after locking, since it could have been + // renamed over by a new file during the Lock process. + pr, err := d.fs.Open(packedRefsPath) + if err != nil { + if os.IsNotExist(err) { + return nil + } + + return err + } + doClosePR := true + defer func() { + if doClosePR { + ioutil.CheckClose(pr, &err) + } + }() + // Creating the temp file in the same directory as the target file // improves our chances for rename operation to be atomic. tmp, err := d.fs.TempFile("", tmpPackedRefsPrefix) @@ -446,7 +458,7 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e } }() - s := bufio.NewScanner(f) + s := bufio.NewScanner(pr) found := false for s.Scan() { line := s.Text() @@ -479,8 +491,8 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e return d.fs.Remove(tmp.Name()) } - doCloseF = false - if err := f.Close(); err != nil { + doClosePR = false + if err := pr.Close(); err != nil { return err } From a6202cacd12aa5ee20c54aff799b0e91330526c5 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Fri, 3 Nov 2017 10:47:46 -0700 Subject: [PATCH 05/19] dotgit: use bufio for PackRefs Suggested by taruti. Issue: #13 --- storage/filesystem/internal/dotgit/dotgit.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 8b1df3d04..9d49945de 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -615,16 +615,9 @@ func (d *DotGit) CountLooseRefs() (int, error) { // packed, plus all tags. func (d *DotGit) PackRefs() (err error) { // Lock packed-refs, and create it if it doesn't exist yet. - f, err := d.fs.Open(packedRefsPath) + f, err := d.fs.OpenFile(packedRefsPath, os.O_RDWR|os.O_CREATE, 0600) if err != nil { - if os.IsNotExist(err) { - f, err = d.fs.Create(packedRefsPath) - if err != nil { - return err - } - } else { - return err - } + return err } defer ioutil.CheckClose(f, &err) @@ -659,12 +652,17 @@ func (d *DotGit) PackRefs() (err error) { ioutil.CheckClose(tmp, &err) } }() + w := bufio.NewWriter(tmp) for _, ref := range refs { - _, err := tmp.Write([]byte(ref.String() + "\n")) + _, err := w.WriteString(ref.String() + "\n") if err != nil { return err } } + err = w.Flush() + if err != nil { + return err + } // Rename the temp packed-refs file. doCloseTmp = false From ac1914eac3c20efa63de8809229994364ad9639b Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Wed, 1 Nov 2017 21:06:37 +0200 Subject: [PATCH 06/19] First pass of prune design --- plumbing/storer/object.go | 14 +++ plumbing/storer/object_test.go | 13 ++ repository.go | 124 +++++++++++++++++++ storage/filesystem/internal/dotgit/dotgit.go | 48 +++++-- storage/filesystem/object.go | 24 ++++ storage/memory/storage.go | 23 ++++ 6 files changed, 236 insertions(+), 10 deletions(-) diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index e7932114a..5d043cbe6 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -3,6 +3,7 @@ package storer import ( "errors" "io" + "time" "gopkg.in/src-d/go-git.v4/plumbing" ) @@ -36,6 +37,19 @@ type EncodedObjectStorer interface { // // Valid plumbing.ObjectType values are CommitObject, BlobObject, TagObject, IterEncodedObjects(plumbing.ObjectType) (EncodedObjectIter, error) + // HasEncodedObject returns ErrObjNotFound if the object doesn't + // exist. If the object does exist, it returns nil. + HasEncodedObject(plumbing.Hash) error + // ForEachObjectHash iterates over all the (loose) object hashes + // in the repository without necessarily having to read those objects. + // Objects only inside pack files may be omitted. + ForEachObjectHash(func(plumbing.Hash) error) error + // LooseObjectTime looks up the (m)time associated with the + // loose object (that is not in a pack file). Implementations + // may + LooseObjectTime(plumbing.Hash) (time.Time, error) + // DeleteLooseObject deletes a loose object if it exists. + DeleteLooseObject(plumbing.Hash) error } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 6bdd25c68..f8b1f73dd 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -3,6 +3,7 @@ package storer import ( "fmt" "testing" + "time" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git.v4/plumbing" @@ -148,3 +149,15 @@ func (o *MockObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (EncodedOb func (o *MockObjectStorage) Begin() Transaction { return nil } + +func (o *MockObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + return nil +} + +func (o *MockObjectStorage) LooseObjectTime(plumbing.Hash) (time.Time, error) { + return time.Time{}, plumbing.ErrObjectNotFound +} + +func (o *MockObjectStorage) DeleteLooseObject(plumbing.Hash) error { + return plumbing.ErrObjectNotFound +} diff --git a/repository.go b/repository.go index b159ff0da..7079fd193 100644 --- a/repository.go +++ b/repository.go @@ -8,10 +8,12 @@ import ( "os" "path/filepath" "strings" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/internal/revision" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage" @@ -1011,3 +1013,125 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } + +type PruneHandler func(unreferencedObjectHash plumbing.Hash) error +type PruneOptions struct { + // OnlyObjectsOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyObjectsOlderThan time.Time + // Handler is called on matching objects + Handler PruneHandler +} + +// DeleteObject deletes an object from a repository. +// The type conveniently matches PruneHandler. +func (r *Repository) DeleteObject(hash plumbing.Hash) error { + return r.Storer.DeleteLooseObject(hash) +} + +func (r *Repository) Prune(opt PruneOptions) error { + pw := &pruneWalker{ + r: r, + seen: map[plumbing.Hash]struct{}{}, + } + // Walk over all the references in the repo. + it, err := r.Storer.IterReferences() + if err != nil { + return nil + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return pw.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + // Now walk all (loose) objects in storage. + err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + // Get out if we have seen this object. + if pw.isSeen(hash) { + return nil + } + // Otherwise it is a candidate for pruning. + // Check out for too new objects next. + if opt.OnlyObjectsOlderThan != (time.Time{}) { + // Errors here are non-fatal. The object may be e.g. packed. + // Or concurrently deleted. Skip such objects. + t, err := r.Storer.LooseObjectTime(hash) + if err != nil { + return nil + } + // Skip too new objects. + if !t.Before(opt.OnlyObjectsOlderThan) { + return nil + } + } + return opt.Handler(hash) + }) + if err != nil { + return err + } + return nil +} + +type pruneWalker struct { + r *Repository + seen map[plumbing.Hash]struct{} +} + +func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *pruneWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.r.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 9d49945de..83fe159e6 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -205,39 +205,67 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) { // Objects returns a slice with the hashes of objects found under the // .git/objects/ directory. func (d *DotGit) Objects() ([]plumbing.Hash, error) { + var objects []plumbing.Hash + err := d.ForEachObjectHash(func(hash plumbing.Hash) error { + objects = append(objects, hash) + return nil + }) + if err != nil { + return nil, err + } + return objects, nil +} + +// Objects returns a slice with the hashes of objects found under the +// .git/objects/ directory. +func (d *DotGit) ForEachObjectHash(fun func(plumbing.Hash) error) error { files, err := d.fs.ReadDir(objectsPath) if err != nil { if os.IsNotExist(err) { - return nil, nil + return nil } - return nil, err + return err } - var objects []plumbing.Hash for _, f := range files { if f.IsDir() && len(f.Name()) == 2 && isHex(f.Name()) { base := f.Name() d, err := d.fs.ReadDir(d.fs.Join(objectsPath, base)) if err != nil { - return nil, err + return err } for _, o := range d { - objects = append(objects, plumbing.NewHash(base+o.Name())) + err = fun(plumbing.NewHash(base + o.Name())) + if err != nil { + return err + } } } } - return objects, nil + return nil } -// Object return a fs.File pointing the object file, if exists -func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { +func (d *DotGit) objectPath(h plumbing.Hash) string { hash := h.String() - file := d.fs.Join(objectsPath, hash[0:2], hash[2:40]) + return d.fs.Join(objectsPath, hash[0:2], hash[2:40]) +} + +// Object returns a fs.File pointing the object file, if exists +func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { + return d.fs.Open(d.objectPath(h)) +} + +// ObjectStat returns a os.FileInfo pointing the object file, if exists +func (d *DotGit) ObjectStat(h plumbing.Hash) (os.FileInfo, error) { + return d.fs.Stat(d.objectPath(h)) +} - return d.fs.Open(file) +// ObjectDelete removes the object file, if exists +func (d *DotGit) ObjectDelete(h plumbing.Hash) error { + return d.fs.Remove(d.objectPath(h)) } func (d *DotGit) readReferenceFrom(rd io.Reader, name string) (ref *plumbing.Reference, err error) { diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 9690c0eb0..9b2790fc3 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -3,6 +3,7 @@ package filesystem import ( "io" "os" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -479,3 +480,26 @@ func hashListAsMap(l []plumbing.Hash) map[plumbing.Hash]bool { return m } + +func (s *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + err := s.dir.ForEachObjectHash(fun) + if err != nil { + if err == storer.ErrStop { + return nil + } + return err + } + return nil +} + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + fi, err := s.dir.ObjectStat(hash) + if err != nil { + return time.Time{}, err + } + return fi.ModTime(), nil +} + +func (s *ObjectStorage) DeleteLooseObject(hash plumbing.Hash) error { + return s.dir.ObjectDelete(hash) +} diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 3d4e84a1c..bfcad34c2 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -3,6 +3,7 @@ package memory import ( "fmt" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing" @@ -156,6 +157,28 @@ func (o *ObjectStorage) Begin() storer.Transaction { } } +func (o *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + for h, _ := range o.Objects { + err := fun(h) + if err != nil { + if err == storer.ErrStop { + return nil + } + return err + } + } + return nil +} + +var errNotSupported = fmt.Errorf("Not supported") + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + return time.Time{}, errNotSupported +} +func (s *ObjectStorage) DeleteLooseObject(plumbing.Hash) error { + return errNotSupported +} + type TxObjectStorage struct { Storage *ObjectStorage Objects map[plumbing.Hash]plumbing.EncodedObject From 3f0b1ff37b64108cfed1b57ea4ae1f1566592905 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Mon, 6 Nov 2017 13:30:42 +0200 Subject: [PATCH 07/19] Address CI and move code around --- plumbing/storer/object.go | 6 +- prune.go | 145 ++++++++++++++++++++++++++++++++++++++ repository.go | 124 -------------------------------- 3 files changed, 149 insertions(+), 126 deletions(-) create mode 100644 prune.go diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index 5d043cbe6..bd34be899 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -43,10 +43,12 @@ type EncodedObjectStorer interface { // ForEachObjectHash iterates over all the (loose) object hashes // in the repository without necessarily having to read those objects. // Objects only inside pack files may be omitted. + // If ErrStop is sent the iteration is stop but no error is returned. ForEachObjectHash(func(plumbing.Hash) error) error // LooseObjectTime looks up the (m)time associated with the - // loose object (that is not in a pack file). Implementations - // may + // loose object (that is not in a pack file). Some + // implementations (e.g. without loose objects) + // always return an error. LooseObjectTime(plumbing.Hash) (time.Time, error) // DeleteLooseObject deletes a loose object if it exists. DeleteLooseObject(plumbing.Hash) error diff --git a/prune.go b/prune.go new file mode 100644 index 000000000..9c3c4ff40 --- /dev/null +++ b/prune.go @@ -0,0 +1,145 @@ +package git + +import ( + "fmt" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/storage" +) + +type PruneHandler func(unreferencedObjectHash plumbing.Hash) error +type PruneOptions struct { + // OnlyObjectsOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyObjectsOlderThan time.Time + // Handler is called on matching objects + Handler PruneHandler +} + +// DeleteObject deletes an object from a repository. +// The type conveniently matches PruneHandler. +func (r *Repository) DeleteObject(hash plumbing.Hash) error { + return r.Storer.DeleteLooseObject(hash) +} + +func (r *Repository) Prune(opt PruneOptions) error { + pw := &pruneWalker{ + Storer: r.Storer, + seen: map[plumbing.Hash]struct{}{}, + } + // Walk over all the references in the repo. + it, err := r.Storer.IterReferences() + if err != nil { + return nil + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return pw.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + // Now walk all (loose) objects in storage. + err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + // Get out if we have seen this object. + if pw.isSeen(hash) { + return nil + } + // Otherwise it is a candidate for pruning. + // Check out for too new objects next. + if opt.OnlyObjectsOlderThan != (time.Time{}) { + // Errors here are non-fatal. The object may be e.g. packed. + // Or concurrently deleted. Skip such objects. + t, err := r.Storer.LooseObjectTime(hash) + if err != nil { + return nil + } + // Skip too new objects. + if !t.Before(opt.OnlyObjectsOlderThan) { + return nil + } + } + return opt.Handler(hash) + }) + if err != nil { + return err + } + return nil +} + +type pruneWalker struct { + Storer storage.Storer + // seen is the set of objects seen in the repo. + // seen map can become huge if walking over large + // repos. Thus using struct{} as the value type. + seen map[plumbing.Hash]struct{} +} + +func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *pruneWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +// walkObjectTree walks over all objects and remembers references +// to them in the pruneWalker. This is used instead of the revlist +// walks because memory usage is tight with huge repos. +func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + // 'or' the lower bits of a mode and check that it + // it matches a filemode.Executable. The type information + // is in the higher bits, but this is the cleanest way + // to handle plain files with different modes. + // Other non-tree objects are somewhat rare, so they + // are not special-cased. + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} diff --git a/repository.go b/repository.go index 7079fd193..b159ff0da 100644 --- a/repository.go +++ b/repository.go @@ -8,12 +8,10 @@ import ( "os" "path/filepath" "strings" - "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/internal/revision" "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage" @@ -1013,125 +1011,3 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } - -type PruneHandler func(unreferencedObjectHash plumbing.Hash) error -type PruneOptions struct { - // OnlyObjectsOlderThan if set to non-zero value - // selects only objects older than the time provided. - OnlyObjectsOlderThan time.Time - // Handler is called on matching objects - Handler PruneHandler -} - -// DeleteObject deletes an object from a repository. -// The type conveniently matches PruneHandler. -func (r *Repository) DeleteObject(hash plumbing.Hash) error { - return r.Storer.DeleteLooseObject(hash) -} - -func (r *Repository) Prune(opt PruneOptions) error { - pw := &pruneWalker{ - r: r, - seen: map[plumbing.Hash]struct{}{}, - } - // Walk over all the references in the repo. - it, err := r.Storer.IterReferences() - if err != nil { - return nil - } - defer it.Close() - err = it.ForEach(func(ref *plumbing.Reference) error { - // Exit this iteration early for non-hash references. - if ref.Type() != plumbing.HashReference { - return nil - } - return pw.walkObjectTree(ref.Hash()) - }) - if err != nil { - return err - } - // Now walk all (loose) objects in storage. - err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { - // Get out if we have seen this object. - if pw.isSeen(hash) { - return nil - } - // Otherwise it is a candidate for pruning. - // Check out for too new objects next. - if opt.OnlyObjectsOlderThan != (time.Time{}) { - // Errors here are non-fatal. The object may be e.g. packed. - // Or concurrently deleted. Skip such objects. - t, err := r.Storer.LooseObjectTime(hash) - if err != nil { - return nil - } - // Skip too new objects. - if !t.Before(opt.OnlyObjectsOlderThan) { - return nil - } - } - return opt.Handler(hash) - }) - if err != nil { - return err - } - return nil -} - -type pruneWalker struct { - r *Repository - seen map[plumbing.Hash]struct{} -} - -func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { - _, seen := p.seen[hash] - return seen -} - -func (p *pruneWalker) add(hash plumbing.Hash) { - p.seen[hash] = struct{}{} -} - -func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { - // Check if we have already seen, and mark this object - if p.isSeen(hash) { - return nil - } - p.add(hash) - // Fetch the object. - obj, err := object.GetObject(p.r.Storer, hash) - if err != nil { - return fmt.Errorf("Getting object %s failed: %v", hash, err) - } - // Walk all children depending on object type. - switch obj := obj.(type) { - case *object.Commit: - err = p.walkObjectTree(obj.TreeHash) - if err != nil { - return err - } - for _, h := range obj.ParentHashes { - err = p.walkObjectTree(h) - if err != nil { - return err - } - } - case *object.Tree: - for i := range obj.Entries { - // Shortcut for blob objects: - if obj.Entries[i].Mode|0755 == filemode.Executable { - p.add(obj.Entries[i].Hash) - continue - } - // Normal walk for sub-trees (and symlinks etc). - err = p.walkObjectTree(obj.Entries[i].Hash) - if err != nil { - return err - } - } - default: - // Error out on unhandled object types. - return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) - } - return nil -} From fae438980c3e17cb04f84ce92b99cd3c835e3e18 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Wed, 15 Nov 2017 18:33:41 +0200 Subject: [PATCH 08/19] Support for repacking objects --- plumbing/storer/object.go | 5 ++ plumbing/storer/object_test.go | 4 ++ repository.go | 62 ++++++++++++++++++++ storage/filesystem/internal/dotgit/dotgit.go | 34 +++++++---- storage/filesystem/object.go | 8 +++ storage/memory/storage.go | 7 +++ 6 files changed, 107 insertions(+), 13 deletions(-) diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index bd34be899..29e0090f8 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -52,6 +52,11 @@ type EncodedObjectStorer interface { LooseObjectTime(plumbing.Hash) (time.Time, error) // DeleteLooseObject deletes a loose object if it exists. DeleteLooseObject(plumbing.Hash) error + // ObjectPacks returns hashes of object packs if the underlying + // implementation has pack files. + ObjectPacks() ([]plumbing.Hash, error) + // DeleteObjectPackAndIndex deletes an object pack and the corresponding index file if they exist. + DeleteObjectPackAndIndex(plumbing.Hash) error } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index f8b1f73dd..68e8e1033 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -161,3 +161,7 @@ func (o *MockObjectStorage) LooseObjectTime(plumbing.Hash) (time.Time, error) { func (o *MockObjectStorage) DeleteLooseObject(plumbing.Hash) error { return plumbing.ErrObjectNotFound } + +func (o *MockObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { + return nil, nil +} diff --git a/repository.go b/repository.go index b159ff0da..51cd07750 100644 --- a/repository.go +++ b/repository.go @@ -12,6 +12,7 @@ import ( "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/internal/revision" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage" @@ -1011,3 +1012,64 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } + +func (r *Repository) RepackObjects() (err error) { + // Get the existing object packs. + hs, err := r.Storer.ObjectPacks() + if err != nil { + return err + } + + // Create a new pack. + nh, err := r.createNewObjectPack() + if err != nil { + return err + } + + // Delete old packs. + for _, h := range hs { + // Skip if new hash is the same as an old one. + if h == nh { + continue + } + err = r.Storer.DeleteObjectPackAndIndex(h) + if err != nil { + return err + } + } + + return nil +} + +// createNewObjectPack is a helper for RepackObjects taking care +// of creating a new pack. It is used so the the PackfileWriter +// deferred close has the right scope. +func (r *Repository) createNewObjectPack() (h plumbing.Hash, err error) { + pfw, ok := r.Storer.(storer.PackfileWriter) + if !ok { + return h, fmt.Errorf("Repository storer is not a storer.PackfileWriter") + } + wc, err := pfw.PackfileWriter(nil) + if err != nil { + return h, err + } + defer ioutil.CheckClose(wc, &err) + var objs []plumbing.Hash + iter, err := r.Storer.IterEncodedObjects(plumbing.AnyObject) + if err != nil { + return h, err + } + err = iter.ForEach(func(obj plumbing.EncodedObject) error { + objs = append(objs, obj.Hash()) + return nil + }) + if err != nil { + return h, err + } + enc := packfile.NewEncoder(wc, r.Storer, false) + h, err = enc.Encode(objs, 10, nil) + if err != nil { + return h, err + } + return h, err +} diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 83fe159e6..09688c184 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -166,11 +166,12 @@ func (d *DotGit) ObjectPacks() ([]plumbing.Hash, error) { return packs, nil } -// ObjectPack returns a fs.File of the given packfile -func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { - file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.pack", hash.String())) +func (d *DotGit) objectPackPath(hash plumbing.Hash, extension string) string { + return d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.%s", hash.String(), extension)) +} - pack, err := d.fs.Open(file) +func (d *DotGit) objectPackOpen(hash plumbing.Hash, extension string) (billy.File, error) { + pack, err := d.fs.Open(d.objectPackPath(hash, extension)) if err != nil { if os.IsNotExist(err) { return nil, ErrPackfileNotFound @@ -182,19 +183,26 @@ func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { return pack, nil } +// ObjectPack returns a fs.File of the given packfile +func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { + return d.objectPackOpen(hash, `pack`) +} + // ObjectPackIdx returns a fs.File of the index file for a given packfile func (d *DotGit) ObjectPackIdx(hash plumbing.Hash) (billy.File, error) { - file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.idx", hash.String())) - idx, err := d.fs.Open(file) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrPackfileNotFound - } + return d.objectPackOpen(hash, `idx`) +} - return nil, err +func (d *DotGit) DeleteObjectPackAndIndex(hash plumbing.Hash) error { + err := d.fs.Remove(d.objectPackPath(hash, `pack`)) + if err != nil { + return err } - - return idx, nil + err = d.fs.Remove(d.objectPackPath(hash, `idx`)) + if err != nil { + return err + } + return nil } // NewObject return a writer for a new object file. diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 9b2790fc3..f23520417 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -503,3 +503,11 @@ func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { func (s *ObjectStorage) DeleteLooseObject(hash plumbing.Hash) error { return s.dir.ObjectDelete(hash) } + +func (s *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { + return s.dir.ObjectPacks() +} + +func (s *ObjectStorage) DeleteObjectPackAndIndex(h plumbing.Hash) error { + return s.dir.DeleteObjectPackAndIndex(h) +} diff --git a/storage/memory/storage.go b/storage/memory/storage.go index bfcad34c2..5049036e9 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -170,6 +170,13 @@ func (o *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { return nil } +func (o *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { + return nil, nil +} +func (o *ObjectStorage) DeleteObjectPackAndIndex(plumbing.Hash) error { + return errNotSupported +} + var errNotSupported = fmt.Errorf("Not supported") func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { From d96582a6fb7df092c2856f56decd33034fe0ade3 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Thu, 16 Nov 2017 21:00:51 +0200 Subject: [PATCH 09/19] Make object repacking more configurable --- plumbing/storer/object.go | 5 ++-- plumbing/storer/object_test.go | 4 +++ repository.go | 26 ++++++++++++++------ storage/filesystem/internal/dotgit/dotgit.go | 16 ++++++++++-- storage/filesystem/object.go | 4 +-- storage/memory/storage.go | 4 +-- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index 29e0090f8..e5f98d78d 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -55,8 +55,9 @@ type EncodedObjectStorer interface { // ObjectPacks returns hashes of object packs if the underlying // implementation has pack files. ObjectPacks() ([]plumbing.Hash, error) - // DeleteObjectPackAndIndex deletes an object pack and the corresponding index file if they exist. - DeleteObjectPackAndIndex(plumbing.Hash) error + // DeleteOldObjectPackAndIndex deletes an object pack and the corresponding index file if they exist. + // Deletion is only performed if the pack is older than the supplied time (or the time is zero). + DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 68e8e1033..da0db8182 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -165,3 +165,7 @@ func (o *MockObjectStorage) DeleteLooseObject(plumbing.Hash) error { func (o *MockObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { return nil, nil } + +func (o *MockObjectStorage) DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error { + return plumbing.ErrObjectNotFound +} diff --git a/repository.go b/repository.go index 51cd07750..999cc6dd5 100644 --- a/repository.go +++ b/repository.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strings" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/internal/revision" @@ -1013,7 +1014,18 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } -func (r *Repository) RepackObjects() (err error) { +type RepackConfig struct { + // UseRefDeltas configures whether packfile encoder will use reference deltas. + // By default OFSDeltaObject is used. + UseRefDeltas bool + // PackWindow for packing objects. + PackWindow uint + // OnlyDeletePacksOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyDeletePacksOlderThan time.Time +} + +func (r *Repository) RepackObjects(cfg *RepackConfig) (err error) { // Get the existing object packs. hs, err := r.Storer.ObjectPacks() if err != nil { @@ -1021,7 +1033,7 @@ func (r *Repository) RepackObjects() (err error) { } // Create a new pack. - nh, err := r.createNewObjectPack() + nh, err := r.createNewObjectPack(cfg) if err != nil { return err } @@ -1032,7 +1044,7 @@ func (r *Repository) RepackObjects() (err error) { if h == nh { continue } - err = r.Storer.DeleteObjectPackAndIndex(h) + err = r.Storer.DeleteOldObjectPackAndIndex(h, cfg.OnlyDeletePacksOlderThan) if err != nil { return err } @@ -1044,12 +1056,12 @@ func (r *Repository) RepackObjects() (err error) { // createNewObjectPack is a helper for RepackObjects taking care // of creating a new pack. It is used so the the PackfileWriter // deferred close has the right scope. -func (r *Repository) createNewObjectPack() (h plumbing.Hash, err error) { +func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, err error) { pfw, ok := r.Storer.(storer.PackfileWriter) if !ok { return h, fmt.Errorf("Repository storer is not a storer.PackfileWriter") } - wc, err := pfw.PackfileWriter(nil) + wc, err := pfw.PackfileWriter() if err != nil { return h, err } @@ -1066,8 +1078,8 @@ func (r *Repository) createNewObjectPack() (h plumbing.Hash, err error) { if err != nil { return h, err } - enc := packfile.NewEncoder(wc, r.Storer, false) - h, err = enc.Encode(objs, 10, nil) + enc := packfile.NewEncoder(wc, r.Storer, cfg.UseRefDeltas) + h, err = enc.Encode(objs, cfg.PackWindow) if err != nil { return h, err } diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 09688c184..cd4f51728 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -9,6 +9,7 @@ import ( stdioutil "io/ioutil" "os" "strings" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/ioutil" @@ -193,8 +194,19 @@ func (d *DotGit) ObjectPackIdx(hash plumbing.Hash) (billy.File, error) { return d.objectPackOpen(hash, `idx`) } -func (d *DotGit) DeleteObjectPackAndIndex(hash plumbing.Hash) error { - err := d.fs.Remove(d.objectPackPath(hash, `pack`)) +func (d *DotGit) DeleteOldObjectPackAndIndex(hash plumbing.Hash, t time.Time) error { + path := d.objectPackPath(hash, `pack`) + if !t.IsZero() { + fi, err := d.fs.Stat(path) + if err != nil { + return err + } + // too new, skip deletion. + if !fi.ModTime().Before(t) { + return nil + } + } + err := d.fs.Remove(path) if err != nil { return err } diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index f23520417..13b8ddb15 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -508,6 +508,6 @@ func (s *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { return s.dir.ObjectPacks() } -func (s *ObjectStorage) DeleteObjectPackAndIndex(h plumbing.Hash) error { - return s.dir.DeleteObjectPackAndIndex(h) +func (s *ObjectStorage) DeleteOldObjectPackAndIndex(h plumbing.Hash, t time.Time) error { + return s.dir.DeleteOldObjectPackAndIndex(h, t) } diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 5049036e9..2d1a4b65c 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -173,8 +173,8 @@ func (o *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { func (o *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { return nil, nil } -func (o *ObjectStorage) DeleteObjectPackAndIndex(plumbing.Hash) error { - return errNotSupported +func (o *ObjectStorage) DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error { + return nil } var errNotSupported = fmt.Errorf("Not supported") From 9dcb096416b6ad16994763fcbc029bcfa95730e8 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Tue, 21 Nov 2017 16:03:30 +0200 Subject: [PATCH 10/19] Use Storer.Config pack window when repacking objects --- repository.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/repository.go b/repository.go index 999cc6dd5..1191b96df 100644 --- a/repository.go +++ b/repository.go @@ -1018,8 +1018,6 @@ type RepackConfig struct { // UseRefDeltas configures whether packfile encoder will use reference deltas. // By default OFSDeltaObject is used. UseRefDeltas bool - // PackWindow for packing objects. - PackWindow uint // OnlyDeletePacksOlderThan if set to non-zero value // selects only objects older than the time provided. OnlyDeletePacksOlderThan time.Time @@ -1078,8 +1076,12 @@ func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, er if err != nil { return h, err } + scfg, err := r.Storer.Config() + if err != nil { + return h, err + } enc := packfile.NewEncoder(wc, r.Storer, cfg.UseRefDeltas) - h, err = enc.Encode(objs, cfg.PackWindow) + h, err = enc.Encode(objs, scfg.Pack.Window) if err != nil { return h, err } From f28e4477dfe49a36dbd55027f8d1133c324bdac5 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Tue, 21 Nov 2017 17:33:15 +0200 Subject: [PATCH 11/19] Make prune object walker generic --- object_walker.go | 105 +++++++++++++++++++++++++++++++++++++++++++++++ prune.go | 93 +---------------------------------------- 2 files changed, 107 insertions(+), 91 deletions(-) create mode 100644 object_walker.go diff --git a/object_walker.go b/object_walker.go new file mode 100644 index 000000000..8bae1faea --- /dev/null +++ b/object_walker.go @@ -0,0 +1,105 @@ +package git + +import ( + "fmt" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/storage" +) + +type objectWalker struct { + Storer storage.Storer + // seen is the set of objects seen in the repo. + // seen map can become huge if walking over large + // repos. Thus using struct{} as the value type. + seen map[plumbing.Hash]struct{} +} + +func newObjectWalker(s storage.Storer) *objectWalker { + return &objectWalker{s, map[plumbing.Hash]struct{}{}} +} + +// walkAllRefs walks all (hash) refererences from the repo. +func (p *objectWalker) walkAllRefs() error { + // Walk over all the references in the repo. + it, err := p.Storer.IterReferences() + if err != nil { + return err + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return p.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + return nil +} + +func (p *objectWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *objectWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +// walkObjectTree walks over all objects and remembers references +// to them in the objectWalker. This is used instead of the revlist +// walks because memory usage is tight with huge repos. +func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + // 'or' the lower bits of a mode and check that it + // it matches a filemode.Executable. The type information + // is in the higher bits, but this is the cleanest way + // to handle plain files with different modes. + // Other non-tree objects are somewhat rare, so they + // are not special-cased. + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} diff --git a/prune.go b/prune.go index 9c3c4ff40..fce3bfdfc 100644 --- a/prune.go +++ b/prune.go @@ -1,13 +1,9 @@ package git import ( - "fmt" "time" "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/filemode" - "gopkg.in/src-d/go-git.v4/plumbing/object" - "gopkg.in/src-d/go-git.v4/storage" ) type PruneHandler func(unreferencedObjectHash plumbing.Hash) error @@ -26,23 +22,8 @@ func (r *Repository) DeleteObject(hash plumbing.Hash) error { } func (r *Repository) Prune(opt PruneOptions) error { - pw := &pruneWalker{ - Storer: r.Storer, - seen: map[plumbing.Hash]struct{}{}, - } - // Walk over all the references in the repo. - it, err := r.Storer.IterReferences() - if err != nil { - return nil - } - defer it.Close() - err = it.ForEach(func(ref *plumbing.Reference) error { - // Exit this iteration early for non-hash references. - if ref.Type() != plumbing.HashReference { - return nil - } - return pw.walkObjectTree(ref.Hash()) - }) + pw := newObjectWalker(r.Storer) + err := pw.walkAllRefs() if err != nil { return err } @@ -73,73 +54,3 @@ func (r *Repository) Prune(opt PruneOptions) error { } return nil } - -type pruneWalker struct { - Storer storage.Storer - // seen is the set of objects seen in the repo. - // seen map can become huge if walking over large - // repos. Thus using struct{} as the value type. - seen map[plumbing.Hash]struct{} -} - -func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { - _, seen := p.seen[hash] - return seen -} - -func (p *pruneWalker) add(hash plumbing.Hash) { - p.seen[hash] = struct{}{} -} - -// walkObjectTree walks over all objects and remembers references -// to them in the pruneWalker. This is used instead of the revlist -// walks because memory usage is tight with huge repos. -func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { - // Check if we have already seen, and mark this object - if p.isSeen(hash) { - return nil - } - p.add(hash) - // Fetch the object. - obj, err := object.GetObject(p.Storer, hash) - if err != nil { - return fmt.Errorf("Getting object %s failed: %v", hash, err) - } - // Walk all children depending on object type. - switch obj := obj.(type) { - case *object.Commit: - err = p.walkObjectTree(obj.TreeHash) - if err != nil { - return err - } - for _, h := range obj.ParentHashes { - err = p.walkObjectTree(h) - if err != nil { - return err - } - } - case *object.Tree: - for i := range obj.Entries { - // Shortcut for blob objects: - // 'or' the lower bits of a mode and check that it - // it matches a filemode.Executable. The type information - // is in the higher bits, but this is the cleanest way - // to handle plain files with different modes. - // Other non-tree objects are somewhat rare, so they - // are not special-cased. - if obj.Entries[i].Mode|0755 == filemode.Executable { - p.add(obj.Entries[i].Hash) - continue - } - // Normal walk for sub-trees (and symlinks etc). - err = p.walkObjectTree(obj.Entries[i].Hash) - if err != nil { - return err - } - } - default: - // Error out on unhandled object types. - return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) - } - return nil -} From 2de4f034288bac895b0a87dcfb5cc3a2f9026f43 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Tue, 21 Nov 2017 17:39:24 +0200 Subject: [PATCH 12/19] Use object walker in repacking code --- repository.go | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/repository.go b/repository.go index 1191b96df..d96399b30 100644 --- a/repository.go +++ b/repository.go @@ -1055,6 +1055,15 @@ func (r *Repository) RepackObjects(cfg *RepackConfig) (err error) { // of creating a new pack. It is used so the the PackfileWriter // deferred close has the right scope. func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, err error) { + ow := newObjectWalker(r.Storer) + err = ow.walkAllRefs() + if err != nil { + return h, err + } + objs := make([]plumbing.Hash, 0, len(ow.seen)) + for h := range ow.seen { + objs = append(objs, h) + } pfw, ok := r.Storer.(storer.PackfileWriter) if !ok { return h, fmt.Errorf("Repository storer is not a storer.PackfileWriter") @@ -1064,18 +1073,6 @@ func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, er return h, err } defer ioutil.CheckClose(wc, &err) - var objs []plumbing.Hash - iter, err := r.Storer.IterEncodedObjects(plumbing.AnyObject) - if err != nil { - return h, err - } - err = iter.ForEach(func(obj plumbing.EncodedObject) error { - objs = append(objs, obj.Hash()) - return nil - }) - if err != nil { - return h, err - } scfg, err := r.Storer.Config() if err != nil { return h, err From aa092f5474da3e9c3ff4f40b88849726b645f39f Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Fri, 22 Sep 2017 21:14:17 -0700 Subject: [PATCH 13/19] plumbing: add `HasEncodedObject` method to Storer This allows the user to check whether an object exists, without reading all the object data from storage. Issue: KBFS-2445 --- plumbing/storer/object_test.go | 9 +++++++++ storage/filesystem/object.go | 26 ++++++++++++++++++++++++++ storage/memory/storage.go | 8 ++++++++ 3 files changed, 43 insertions(+) diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index da0db8182..9a6959db6 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -133,6 +133,15 @@ func (o *MockObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbi return plumbing.ZeroHash, nil } +func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error { + for _, o := range o.db { + if o.Hash() == h { + return nil + } + } + return plumbing.ErrObjectNotFound +} + func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { for _, o := range o.db { if o.Hash() == h { diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 13b8ddb15..34501ec83 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -126,6 +126,32 @@ func (s *ObjectStorage) SetEncodedObject(o plumbing.EncodedObject) (plumbing.Has return o.Hash(), err } +// HasEncodedObject returns nil if the object exists, without actually +// reading the object data from storage. +func (s *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { + // Check unpacked objects + f, err := s.dir.Object(h) + if err != nil { + if !os.IsNotExist(err) { + return err + } + // Fall through to check packed objects. + } else { + defer ioutil.CheckClose(f, &err) + return nil + } + + // Check packed objects. + if err := s.requireIndex(); err != nil { + return err + } + _, _, offset := s.findObjectInPackfile(h) + if offset == -1 { + return plumbing.ErrObjectNotFound + } + return nil +} + // EncodedObject returns the object with the given hash, by searching for it in // the packfile and the git object directories. func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 2d1a4b65c..c76d163ab 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -115,6 +115,14 @@ func (o *ObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbing.H return h, nil } +func (o *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { + _, ok := o.Objects[h] + if !ok { + return plumbing.ErrObjectNotFound + } + return nil +} + func (o *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { obj, ok := o.Objects[h] if !ok || (plumbing.AnyObject != t && obj.Type() != t) { From b18457df6a1f75283d95999fde5c162ba1a19651 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 13:57:36 -0800 Subject: [PATCH 14/19] storage: some minor code cleanup Suggested by mcuadros. Issue: #669 --- prune.go | 6 +----- storage/filesystem/object.go | 9 +++------ storage/memory/storage.go | 3 +-- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/prune.go b/prune.go index fce3bfdfc..81f2582ee 100644 --- a/prune.go +++ b/prune.go @@ -28,7 +28,7 @@ func (r *Repository) Prune(opt PruneOptions) error { return err } // Now walk all (loose) objects in storage. - err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + return r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { // Get out if we have seen this object. if pw.isSeen(hash) { return nil @@ -49,8 +49,4 @@ func (r *Repository) Prune(opt PruneOptions) error { } return opt.Handler(hash) }) - if err != nil { - return err - } - return nil } diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 34501ec83..6ca67cc94 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -509,13 +509,10 @@ func hashListAsMap(l []plumbing.Hash) map[plumbing.Hash]bool { func (s *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { err := s.dir.ForEachObjectHash(fun) - if err != nil { - if err == storer.ErrStop { - return nil - } - return err + if err == storer.ErrStop { + return nil } - return nil + return err } func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { diff --git a/storage/memory/storage.go b/storage/memory/storage.go index c76d163ab..0f66f1ede 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -116,8 +116,7 @@ func (o *ObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbing.H } func (o *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { - _, ok := o.Objects[h] - if !ok { + if _, ok := o.Objects[h]; !ok { return plumbing.ErrObjectNotFound } return nil From 4c1569511db5e1d26e42e9cd8dadb9e65ccafb20 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 14:15:32 -0800 Subject: [PATCH 15/19] storer: separate loose and packed object mgmt into optional ifaces Suggested by mcuadros. --- plumbing/storer/object.go | 40 +++++++++++++++++++++------------- plumbing/storer/object_test.go | 21 ------------------ prune.go | 20 ++++++++++++++--- repository.go | 26 +++++++++++++--------- 4 files changed, 58 insertions(+), 49 deletions(-) diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index e5f98d78d..f1d19ef24 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -40,6 +40,26 @@ type EncodedObjectStorer interface { // HasEncodedObject returns ErrObjNotFound if the object doesn't // exist. If the object does exist, it returns nil. HasEncodedObject(plumbing.Hash) error +} + +// DeltaObjectStorer is an EncodedObjectStorer that can return delta +// objects. +type DeltaObjectStorer interface { + // DeltaObject is the same as EncodedObject but without resolving deltas. + // Deltas will be returned as plumbing.DeltaObject instances. + DeltaObject(plumbing.ObjectType, plumbing.Hash) (plumbing.EncodedObject, error) +} + +// Transactioner is a optional method for ObjectStorer, it enable transaction +// base write and read operations in the storage +type Transactioner interface { + // Begin starts a transaction. + Begin() Transaction +} + +// LooseObjectStorer is an optional interface for managing "loose" +// objects, i.e. those not in packfiles. +type LooseObjectStorer interface { // ForEachObjectHash iterates over all the (loose) object hashes // in the repository without necessarily having to read those objects. // Objects only inside pack files may be omitted. @@ -52,6 +72,11 @@ type EncodedObjectStorer interface { LooseObjectTime(plumbing.Hash) (time.Time, error) // DeleteLooseObject deletes a loose object if it exists. DeleteLooseObject(plumbing.Hash) error +} + +// PackedObjectStorer is an optional interface for managing objects in +// packfiles. +type PackedObjectStorer interface { // ObjectPacks returns hashes of object packs if the underlying // implementation has pack files. ObjectPacks() ([]plumbing.Hash, error) @@ -60,21 +85,6 @@ type EncodedObjectStorer interface { DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error } -// DeltaObjectStorer is an EncodedObjectStorer that can return delta -// objects. -type DeltaObjectStorer interface { - // DeltaObject is the same as EncodedObject but without resolving deltas. - // Deltas will be returned as plumbing.DeltaObject instances. - DeltaObject(plumbing.ObjectType, plumbing.Hash) (plumbing.EncodedObject, error) -} - -// Transactioner is a optional method for ObjectStorer, it enable transaction -// base write and read operations in the storage -type Transactioner interface { - // Begin starts a transaction. - Begin() Transaction -} - // PackfileWriter is a optional method for ObjectStorer, it enable direct write // of packfile to the storage type PackfileWriter interface { diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 9a6959db6..6b4fe0fb6 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -3,7 +3,6 @@ package storer import ( "fmt" "testing" - "time" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git.v4/plumbing" @@ -158,23 +157,3 @@ func (o *MockObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (EncodedOb func (o *MockObjectStorage) Begin() Transaction { return nil } - -func (o *MockObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { - return nil -} - -func (o *MockObjectStorage) LooseObjectTime(plumbing.Hash) (time.Time, error) { - return time.Time{}, plumbing.ErrObjectNotFound -} - -func (o *MockObjectStorage) DeleteLooseObject(plumbing.Hash) error { - return plumbing.ErrObjectNotFound -} - -func (o *MockObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { - return nil, nil -} - -func (o *MockObjectStorage) DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error { - return plumbing.ErrObjectNotFound -} diff --git a/prune.go b/prune.go index 81f2582ee..04913d656 100644 --- a/prune.go +++ b/prune.go @@ -1,9 +1,11 @@ package git import ( + "errors" "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) type PruneHandler func(unreferencedObjectHash plumbing.Hash) error @@ -15,20 +17,32 @@ type PruneOptions struct { Handler PruneHandler } +var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported") + // DeleteObject deletes an object from a repository. // The type conveniently matches PruneHandler. func (r *Repository) DeleteObject(hash plumbing.Hash) error { - return r.Storer.DeleteLooseObject(hash) + los, ok := r.Storer.(storer.LooseObjectStorer) + if !ok { + return ErrLooseObjectsNotSupported + } + + return los.DeleteLooseObject(hash) } func (r *Repository) Prune(opt PruneOptions) error { + los, ok := r.Storer.(storer.LooseObjectStorer) + if !ok { + return ErrLooseObjectsNotSupported + } + pw := newObjectWalker(r.Storer) err := pw.walkAllRefs() if err != nil { return err } // Now walk all (loose) objects in storage. - return r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + return los.ForEachObjectHash(func(hash plumbing.Hash) error { // Get out if we have seen this object. if pw.isSeen(hash) { return nil @@ -38,7 +52,7 @@ func (r *Repository) Prune(opt PruneOptions) error { if opt.OnlyObjectsOlderThan != (time.Time{}) { // Errors here are non-fatal. The object may be e.g. packed. // Or concurrently deleted. Skip such objects. - t, err := r.Storer.LooseObjectTime(hash) + t, err := los.LooseObjectTime(hash) if err != nil { return nil } diff --git a/repository.go b/repository.go index d96399b30..d7cca891b 100644 --- a/repository.go +++ b/repository.go @@ -25,14 +25,15 @@ import ( ) var ( - ErrInvalidReference = errors.New("invalid reference, should be a tag or a branch") - ErrRepositoryNotExists = errors.New("repository does not exist") - ErrRepositoryAlreadyExists = errors.New("repository already exists") - ErrRemoteNotFound = errors.New("remote not found") - ErrRemoteExists = errors.New("remote already exists ") - ErrWorktreeNotProvided = errors.New("worktree should be provided") - ErrIsBareRepository = errors.New("worktree not available in a bare repository") - ErrUnableToResolveCommit = errors.New("unable to resolve commit") + ErrInvalidReference = errors.New("invalid reference, should be a tag or a branch") + ErrRepositoryNotExists = errors.New("repository does not exist") + ErrRepositoryAlreadyExists = errors.New("repository already exists") + ErrRemoteNotFound = errors.New("remote not found") + ErrRemoteExists = errors.New("remote already exists ") + ErrWorktreeNotProvided = errors.New("worktree should be provided") + ErrIsBareRepository = errors.New("worktree not available in a bare repository") + ErrUnableToResolveCommit = errors.New("unable to resolve commit") + ErrPackedObjectsNotSupported = errors.New("Packed objects not supported") ) // Repository represents a git repository @@ -1024,8 +1025,13 @@ type RepackConfig struct { } func (r *Repository) RepackObjects(cfg *RepackConfig) (err error) { + pos, ok := r.Storer.(storer.PackedObjectStorer) + if !ok { + return ErrPackedObjectsNotSupported + } + // Get the existing object packs. - hs, err := r.Storer.ObjectPacks() + hs, err := pos.ObjectPacks() if err != nil { return err } @@ -1042,7 +1048,7 @@ func (r *Repository) RepackObjects(cfg *RepackConfig) (err error) { if h == nh { continue } - err = r.Storer.DeleteOldObjectPackAndIndex(h, cfg.OnlyDeletePacksOlderThan) + err = pos.DeleteOldObjectPackAndIndex(h, cfg.OnlyDeletePacksOlderThan) if err != nil { return err } From 88acc31c76a3033a4d02e0d6cc751c74b9aeeea5 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 14:52:01 -0800 Subject: [PATCH 16/19] repository: add tests for pruning and object re-packing Also, object re-packing should clean up any loose objects that were packed. --- prune_test.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++ repository.go | 17 +++++++++++ repository_test.go | 61 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 prune_test.go diff --git a/prune_test.go b/prune_test.go new file mode 100644 index 000000000..613fb0f6c --- /dev/null +++ b/prune_test.go @@ -0,0 +1,72 @@ +package git + +import ( + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type PruneSuite struct { + BaseSuite +} + +var _ = Suite(&PruneSuite{}) + +func (s *PruneSuite) TestPrune(c *C, deleteTime time.Time) { + srcFs := fixtures.ByTag("unpacked").One().DotGit() + var sto storage.Storer + var err error + sto, err = filesystem.NewStorage(srcFs) + c.Assert(err, IsNil) + + los := sto.(storer.LooseObjectStorer) + c.Assert(los, NotNil) + + count := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + count++ + return nil + }) + c.Assert(err, IsNil) + + r, err := Open(sto, srcFs) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + // Remove a branch so we can prune some objects. + err = sto.RemoveReference(plumbing.ReferenceName("refs/heads/v4")) + c.Assert(err, IsNil) + err = sto.RemoveReference(plumbing.ReferenceName("refs/remotes/origin/v4")) + c.Assert(err, IsNil) + + err = r.Prune(PruneOptions{ + Handler: r.DeleteObject, + }) + c.Assert(err, IsNil) + + newCount := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + newCount++ + return nil + }) + if deleteTime.IsZero() { + c.Assert(newCount < count, Equals, true) + } else { + // Assume a delete time older than any of the objects was passed in. + c.Assert(newCount, Equals, count) + } +} + +func (s *PruneSuite) TestPrune(c *C) { + s.testPrune(c, time.Time{}) +} + +func (s *PruneSuite) TestPruneWithNoDelete(c *C) { + s.testPrune(c, time.Unix(0, 1)) +} diff --git a/repository.go b/repository.go index d7cca891b..7cdc0d52c 100644 --- a/repository.go +++ b/repository.go @@ -1088,5 +1088,22 @@ func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, er if err != nil { return h, err } + + // Delete the packed, loose objects. + if los, ok := r.Storer.(storer.LooseObjectStorer); ok { + err = los.ForEachObjectHash(func(hash plumbing.Hash) error { + if ow.isSeen(hash) { + err := los.DeleteLooseObject(hash) + if err != nil { + return err + } + } + return nil + }) + if err != nil { + return h, err + } + } + return h, err } diff --git a/repository_test.go b/repository_test.go index 9d82651ec..2ebc5977d 100644 --- a/repository_test.go +++ b/repository_test.go @@ -10,10 +10,13 @@ import ( "os/exec" "path/filepath" "strings" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage" "gopkg.in/src-d/go-git.v4/storage/filesystem" "gopkg.in/src-d/go-git.v4/storage/memory" @@ -1313,6 +1316,64 @@ func (s *RepositorySuite) TestResolveRevisionWithErrors(c *C) { } } +func (s *RepositorySuite) testRepackObjects( + c *C, deleteTime time.Time, expectedPacks int) { + srcFs := fixtures.ByTag("unpacked").One().DotGit() + var sto storage.Storer + var err error + sto, err = filesystem.NewStorage(srcFs) + c.Assert(err, IsNil) + + los := sto.(storer.LooseObjectStorer) + c.Assert(los, NotNil) + + numLooseStart := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + numLooseStart++ + return nil + }) + c.Assert(err, IsNil) + c.Assert(numLooseStart > 0, Equals, true) + + pos := sto.(storer.PackedObjectStorer) + c.Assert(los, NotNil) + + packs, err := pos.ObjectPacks() + c.Assert(err, IsNil) + numPacksStart := len(packs) + c.Assert(numPacksStart > 1, Equals, true) + + r, err := Open(sto, srcFs) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + err = r.RepackObjects(&RepackConfig{ + OnlyDeletePacksOlderThan: deleteTime, + }) + c.Assert(err, IsNil) + + numLooseEnd := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + numLooseEnd++ + return nil + }) + c.Assert(err, IsNil) + c.Assert(numLooseEnd, Equals, 0) + + packs, err = pos.ObjectPacks() + c.Assert(err, IsNil) + numPacksEnd := len(packs) + c.Assert(numPacksEnd, Equals, expectedPacks) +} + +func (s *RepositorySuite) TestRepackObjects(c *C) { + s.testRepackObjects(c, time.Time{}, 1) +} + +func (s *RepositorySuite) TestRepackObjectsWithNoDelete(c *C) { + s.testRepackObjects(c, time.Unix(0, 1), 3) +} + func ExecuteOnPath(c *C, path string, cmds ...string) error { for _, cmd := range cmds { err := executeOnPath(path, cmd) From c2e6b5dcf8ff1cb6b61f4a165e526af209c6c185 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 16:12:07 -0800 Subject: [PATCH 17/19] repository: oops, fix the prune test --- prune_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prune_test.go b/prune_test.go index 613fb0f6c..60652ec9f 100644 --- a/prune_test.go +++ b/prune_test.go @@ -18,7 +18,7 @@ type PruneSuite struct { var _ = Suite(&PruneSuite{}) -func (s *PruneSuite) TestPrune(c *C, deleteTime time.Time) { +func (s *PruneSuite) testPrune(c *C, deleteTime time.Time) { srcFs := fixtures.ByTag("unpacked").One().DotGit() var sto storage.Storer var err error @@ -46,7 +46,8 @@ func (s *PruneSuite) TestPrune(c *C, deleteTime time.Time) { c.Assert(err, IsNil) err = r.Prune(PruneOptions{ - Handler: r.DeleteObject, + OnlyObjectsOlderThan: deleteTime, + Handler: r.DeleteObject, }) c.Assert(err, IsNil) From 5a6cc4e0996d1dd6c66d46ced92dee26c3121628 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 17:10:43 -0800 Subject: [PATCH 18/19] dotgit: open+lock packed-refs file until it doesn't change Windows doesn't like it when we re-open a file we already have locked. --- storage/filesystem/internal/dotgit/dotgit.go | 64 ++++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index cd4f51728..3a1238547 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -460,32 +460,60 @@ func (d *DotGit) addRefsFromPackedRefs(refs *[]*plumbing.Reference, seen map[plu return nil } -func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { - f, err := d.fs.Open(packedRefsPath) - if err != nil { - if os.IsNotExist(err) { - return nil +func (d *DotGit) openAndLockPackedRefs() (pr billy.File, err error) { + var f billy.File + defer func() { + if err != nil && f != nil { + ioutil.CheckClose(f, &err) } + }() - return err - } - defer ioutil.CheckClose(f, &err) + // Keep trying to open and lock the file until we're sure the file + // didn't change between the open and the lock. + for { + f, err = d.fs.Open(packedRefsPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } - err = f.Lock() - if err != nil { - return err - } + return nil, err + } + fi, err := d.fs.Stat(packedRefsPath) + if err != nil { + return nil, err + } + mtime := fi.ModTime() - // Re-open the file after locking, since it could have been - // renamed over by a new file during the Lock process. - pr, err := d.fs.Open(packedRefsPath) - if err != nil { - if os.IsNotExist(err) { - return nil + err = f.Lock() + if err != nil { + return nil, err + } + + fi, err = d.fs.Stat(packedRefsPath) + if err != nil { + return nil, err + } + if mtime == fi.ModTime() { + break } + // The file has changed since we opened it. Close and retry. + err = f.Close() + if err != nil { + return nil, err + } + } + return f, nil +} +func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { + pr, err := d.openAndLockPackedRefs() + if err != nil { return err } + if pr == nil { + return nil + } doClosePR := true defer func() { if doClosePR { From d53264806f0d5ddef259f45f4490a19398a102ba Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Thu, 30 Nov 2017 11:36:50 -0800 Subject: [PATCH 19/19] dotgit: rewrite packed-refs while holding lock Windows file system doesn't let us rename over a file while holding that file's lock, so use rewrite as a last resort. It could result in a partially-written file, if there's a failure at the wrong time. --- storage/filesystem/internal/dotgit/dotgit.go | 113 ++++++++---------- .../dotgit/dotgit_rewrite_packed_refs_nix.go | 11 ++ .../dotgit_rewrite_packed_refs_windows.go | 39 ++++++ 3 files changed, 103 insertions(+), 60 deletions(-) create mode 100644 storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go create mode 100644 storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 3a1238547..5e23e66b0 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -387,17 +387,7 @@ func (d *DotGit) Ref(name plumbing.ReferenceName) (*plumbing.Reference, error) { return d.packedRef(name) } -func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { - f, err := d.fs.Open(packedRefsPath) - if err != nil { - if os.IsNotExist(err) { - return nil, nil - } - return nil, err - } - - defer ioutil.CheckClose(f, &err) - +func (d *DotGit) findPackedRefsInFile(f billy.File) ([]*plumbing.Reference, error) { s := bufio.NewScanner(f) var refs []*plumbing.Reference for s.Scan() { @@ -414,6 +404,19 @@ func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { return refs, s.Err() } +func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { + f, err := d.fs.Open(packedRefsPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + defer ioutil.CheckClose(f, &err) + return d.findPackedRefsInFile(f) +} + func (d *DotGit) packedRef(name plumbing.ReferenceName) (*plumbing.Reference, error) { refs, err := d.findPackedRefs() if err != nil { @@ -460,7 +463,23 @@ func (d *DotGit) addRefsFromPackedRefs(refs *[]*plumbing.Reference, seen map[plu return nil } -func (d *DotGit) openAndLockPackedRefs() (pr billy.File, err error) { +func (d *DotGit) addRefsFromPackedRefsFile(refs *[]*plumbing.Reference, f billy.File, seen map[plumbing.ReferenceName]bool) (err error) { + packedRefs, err := d.findPackedRefsInFile(f) + if err != nil { + return err + } + + for _, ref := range packedRefs { + if !seen[ref.Name()] { + *refs = append(*refs, ref) + seen[ref.Name()] = true + } + } + return nil +} + +func (d *DotGit) openAndLockPackedRefs(doCreate bool) ( + pr billy.File, err error) { var f billy.File defer func() { if err != nil && f != nil { @@ -468,12 +487,17 @@ func (d *DotGit) openAndLockPackedRefs() (pr billy.File, err error) { } }() + openFlags := os.O_RDWR + if doCreate { + openFlags |= os.O_CREATE + } + // Keep trying to open and lock the file until we're sure the file // didn't change between the open and the lock. for { - f, err = d.fs.Open(packedRefsPath) + f, err = d.fs.OpenFile(packedRefsPath, openFlags, 0600) if err != nil { - if os.IsNotExist(err) { + if os.IsNotExist(err) && !doCreate { return nil, nil } @@ -507,19 +531,14 @@ func (d *DotGit) openAndLockPackedRefs() (pr billy.File, err error) { } func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { - pr, err := d.openAndLockPackedRefs() + pr, err := d.openAndLockPackedRefs(false) if err != nil { return err } if pr == nil { return nil } - doClosePR := true - defer func() { - if doClosePR { - ioutil.CheckClose(pr, &err) - } - }() + defer ioutil.CheckClose(pr, &err) // Creating the temp file in the same directory as the target file // improves our chances for rename operation to be atomic. @@ -527,11 +546,10 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e if err != nil { return err } - doCloseTmp := true + tmpName := tmp.Name() defer func() { - if doCloseTmp { - ioutil.CheckClose(tmp, &err) - } + ioutil.CheckClose(tmp, &err) + _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it }() s := bufio.NewScanner(pr) @@ -558,26 +576,10 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e } if !found { - doCloseTmp = false - ioutil.CheckClose(tmp, &err) - if err != nil { - return err - } - // Delete the temp file if nothing needed to be removed. - return d.fs.Remove(tmp.Name()) - } - - doClosePR = false - if err := pr.Close(); err != nil { - return err - } - - doCloseTmp = false - if err := tmp.Close(); err != nil { - return err + return nil } - return d.fs.Rename(tmp.Name(), packedRefsPath) + return d.rewritePackedRefsWhileLocked(tmp, pr) } // process lines from a packed-refs file @@ -691,20 +693,15 @@ func (d *DotGit) CountLooseRefs() (int, error) { // packed, plus all tags. func (d *DotGit) PackRefs() (err error) { // Lock packed-refs, and create it if it doesn't exist yet. - f, err := d.fs.OpenFile(packedRefsPath, os.O_RDWR|os.O_CREATE, 0600) + f, err := d.openAndLockPackedRefs(true) if err != nil { return err } defer ioutil.CheckClose(f, &err) - err = f.Lock() - if err != nil { - return err - } - // Gather all refs using addRefsFromRefDir and addRefsFromPackedRefs. var refs []*plumbing.Reference - var seen = make(map[plumbing.ReferenceName]bool) + seen := make(map[plumbing.ReferenceName]bool) if err := d.addRefsFromRefDir(&refs, seen); err != nil { return err } @@ -713,7 +710,7 @@ func (d *DotGit) PackRefs() (err error) { return nil } numLooseRefs := len(refs) - if err := d.addRefsFromPackedRefs(&refs, seen); err != nil { + if err := d.addRefsFromPackedRefsFile(&refs, f, seen); err != nil { return err } @@ -722,12 +719,12 @@ func (d *DotGit) PackRefs() (err error) { if err != nil { return err } - doCloseTmp := true + tmpName := tmp.Name() defer func() { - if doCloseTmp { - ioutil.CheckClose(tmp, &err) - } + ioutil.CheckClose(tmp, &err) + _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it }() + w := bufio.NewWriter(tmp) for _, ref := range refs { _, err := w.WriteString(ref.String() + "\n") @@ -741,11 +738,7 @@ func (d *DotGit) PackRefs() (err error) { } // Rename the temp packed-refs file. - doCloseTmp = false - if err := tmp.Close(); err != nil { - return err - } - err = d.fs.Rename(tmp.Name(), packedRefsPath) + err = d.rewritePackedRefsWhileLocked(tmp, f) if err != nil { return err } diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go new file mode 100644 index 000000000..af9619654 --- /dev/null +++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go @@ -0,0 +1,11 @@ +// +build !windows + +package dotgit + +import "gopkg.in/src-d/go-billy.v4" + +func (d *DotGit) rewritePackedRefsWhileLocked( + tmp billy.File, pr billy.File) error { + // On non-Windows platforms, we can have atomic rename. + return d.fs.Rename(tmp.Name(), pr.Name()) +} diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go new file mode 100644 index 000000000..bcdb93e36 --- /dev/null +++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go @@ -0,0 +1,39 @@ +// +build windows + +package dotgit + +import ( + "io" + + "gopkg.in/src-d/go-billy.v4" +) + +func (d *DotGit) rewritePackedRefsWhileLocked( + tmp billy.File, pr billy.File) error { + // If we aren't using the bare Windows filesystem as the storage + // layer, we might be able to get away with a rename over a locked + // file. + err := d.fs.Rename(tmp.Name(), pr.Name()) + if err == nil { + return nil + } + + // Otherwise, Windows doesn't let us rename over a locked file, so + // we have to do a straight copy. Unfortunately this could result + // in a partially-written file if the process fails before the + // copy completes. + _, err = pr.Seek(0, io.SeekStart) + if err != nil { + return err + } + err = pr.Truncate(0) + if err != nil { + return err + } + _, err = tmp.Seek(0, io.SeekStart) + if err != nil { + return err + } + _, err = io.Copy(pr, tmp) + return err +}