Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit db279de

Browse files
committed
config: support a configurable, and turn-off-able, pack.window
One use of go-git is to transfer git data from a non-standard git repo (not stored in a file system, for example) to a "remote" backed by a standard, local .git repo. In this scenario, delta compression is not needed to reduce transfer time over the "network", because there is no network. The underlying storage layer has already taken care of the data tranfer, and sending the objects to local .git storage doesn't require compression. So this PR gives the user the option to turn off compression when it isn't needed. Of course, this results in a larger, uncompressed local .git repo, but the user can then run git gc or git repack on that repo if they care about the storage costs. Turning the pack window to 0 on reduces total push time of a 36K repo by 50 seconds (out of a pre-PR total of 3m26s).
1 parent 0d74736 commit db279de

File tree

10 files changed

+137
-38
lines changed

10 files changed

+137
-38
lines changed

config/config.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"errors"
77
"fmt"
88
"sort"
9+
"strconv"
910

1011
format "gopkg.in/src-d/go-git.v4/plumbing/format/config"
1112
)
@@ -40,6 +41,14 @@ type Config struct {
4041
// Worktree is the path to the root of the working tree.
4142
Worktree string
4243
}
44+
45+
Pack struct {
46+
// Window controls the size of the sliding window for delta
47+
// compression. The default is 10. A value of 0 turns off
48+
// delta compression entirely.
49+
Window uint
50+
}
51+
4352
// Remotes list of repository remotes, the key of the map is the name
4453
// of the remote, should equal to RemoteConfig.Name.
4554
Remotes map[string]*RemoteConfig
@@ -81,10 +90,14 @@ const (
8190
remoteSection = "remote"
8291
submoduleSection = "submodule"
8392
coreSection = "core"
93+
packSection = "pack"
8494
fetchKey = "fetch"
8595
urlKey = "url"
8696
bareKey = "bare"
8797
worktreeKey = "worktree"
98+
windowKey = "window"
99+
100+
defaultPackWindow = uint(10)
88101
)
89102

90103
// Unmarshal parses a git-config file and stores it.
@@ -98,6 +111,9 @@ func (c *Config) Unmarshal(b []byte) error {
98111
}
99112

100113
c.unmarshalCore()
114+
if err := c.unmarshalPack(); err != nil {
115+
return err
116+
}
101117
c.unmarshalSubmodules()
102118
return c.unmarshalRemotes()
103119
}
@@ -111,6 +127,21 @@ func (c *Config) unmarshalCore() {
111127
c.Core.Worktree = s.Options.Get(worktreeKey)
112128
}
113129

130+
func (c *Config) unmarshalPack() error {
131+
s := c.Raw.Section(packSection)
132+
window := s.Options.Get(windowKey)
133+
if window == "" {
134+
c.Pack.Window = defaultPackWindow
135+
} else {
136+
winUint, err := strconv.ParseUint(window, 10, 32)
137+
if err != nil {
138+
return err
139+
}
140+
c.Pack.Window = uint(winUint)
141+
}
142+
return nil
143+
}
144+
114145
func (c *Config) unmarshalRemotes() error {
115146
s := c.Raw.Section(remoteSection)
116147
for _, sub := range s.Subsections {
@@ -138,6 +169,7 @@ func (c *Config) unmarshalSubmodules() {
138169
// Marshal returns Config encoded as a git-config file.
139170
func (c *Config) Marshal() ([]byte, error) {
140171
c.marshalCore()
172+
c.marshalPack()
141173
c.marshalRemotes()
142174
c.marshalSubmodules()
143175

@@ -158,6 +190,13 @@ func (c *Config) marshalCore() {
158190
}
159191
}
160192

193+
func (c *Config) marshalPack() {
194+
s := c.Raw.Section(packSection)
195+
if c.Pack.Window != defaultPackWindow {
196+
s.SetOption(windowKey, fmt.Sprintf("%d", c.Pack.Window))
197+
}
198+
}
199+
161200
func (c *Config) marshalRemotes() {
162201
s := c.Raw.Section(remoteSection)
163202
newSubsections := make(format.Subsections, 0, len(c.Remotes))

config/config_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ func (s *ConfigSuite) TestUnmarshall(c *C) {
1010
input := []byte(`[core]
1111
bare = true
1212
worktree = foo
13+
[pack]
14+
window = 20
1315
[remote "origin"]
1416
url = [email protected]:mcuadros/go-git.git
1517
fetch = +refs/heads/*:refs/remotes/origin/*
@@ -33,6 +35,7 @@ func (s *ConfigSuite) TestUnmarshall(c *C) {
3335

3436
c.Assert(cfg.Core.IsBare, Equals, true)
3537
c.Assert(cfg.Core.Worktree, Equals, "foo")
38+
c.Assert(cfg.Pack.Window, Equals, uint(20))
3639
c.Assert(cfg.Remotes, HasLen, 2)
3740
c.Assert(cfg.Remotes["origin"].Name, Equals, "origin")
3841
c.Assert(cfg.Remotes["origin"].URLs, DeepEquals, []string{"[email protected]:mcuadros/go-git.git"})
@@ -51,6 +54,8 @@ func (s *ConfigSuite) TestMarshall(c *C) {
5154
output := []byte(`[core]
5255
bare = true
5356
worktree = bar
57+
[pack]
58+
window = 20
5459
[remote "alt"]
5560
url = [email protected]:mcuadros/go-git.git
5661
url = [email protected]:src-d/go-git.git
@@ -65,6 +70,7 @@ func (s *ConfigSuite) TestMarshall(c *C) {
6570
cfg := NewConfig()
6671
cfg.Core.IsBare = true
6772
cfg.Core.Worktree = "bar"
73+
cfg.Pack.Window = 20
6874
cfg.Remotes["origin"] = &RemoteConfig{
6975
Name: "origin",
7076
URLs: []string{"[email protected]:mcuadros/go-git.git"},
@@ -92,6 +98,8 @@ func (s *ConfigSuite) TestUnmarshallMarshall(c *C) {
9298
bare = true
9399
worktree = foo
94100
custom = ignored
101+
[pack]
102+
window = 20
95103
[remote "origin"]
96104
url = [email protected]:mcuadros/go-git.git
97105
fetch = +refs/heads/*:refs/remotes/origin/*

plumbing/format/packfile/delta_selector.go

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@ import (
99
)
1010

1111
const (
12-
// How far back in the sorted list to search for deltas. 10 is
13-
// the default in command line git.
14-
deltaWindowSize = 10
1512
// deltas based on deltas, how many steps we can do.
1613
// 50 is the default value used in JGit
1714
maxDepth = int64(50)
@@ -33,12 +30,19 @@ func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector {
3330

3431
// ObjectsToPack creates a list of ObjectToPack from the hashes provided,
3532
// creating deltas if it's suitable, using an specific internal logic
36-
func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
37-
otp, err := dw.objectsToPack(hashes)
33+
func (dw *deltaSelector) ObjectsToPack(
34+
hashes []plumbing.Hash,
35+
packWindow uint,
36+
) ([]*ObjectToPack, error) {
37+
otp, err := dw.objectsToPack(hashes, packWindow)
3838
if err != nil {
3939
return nil, err
4040
}
4141

42+
if packWindow == 0 {
43+
return otp, nil
44+
}
45+
4246
dw.sort(otp)
4347

4448
var objectGroups [][]*ObjectToPack
@@ -60,7 +64,7 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
6064
objs := objs
6165
wg.Add(1)
6266
go func() {
63-
if walkErr := dw.walk(objs); walkErr != nil {
67+
if walkErr := dw.walk(objs, packWindow); walkErr != nil {
6468
once.Do(func() {
6569
err = walkErr
6670
})
@@ -77,10 +81,19 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
7781
return otp, nil
7882
}
7983

80-
func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
84+
func (dw *deltaSelector) objectsToPack(
85+
hashes []plumbing.Hash,
86+
packWindow uint,
87+
) ([]*ObjectToPack, error) {
8188
var objectsToPack []*ObjectToPack
8289
for _, h := range hashes {
83-
o, err := dw.encodedDeltaObject(h)
90+
var o plumbing.EncodedObject
91+
var err error
92+
if packWindow == 0 {
93+
o, err = dw.encodedObject(h)
94+
} else {
95+
o, err = dw.encodedDeltaObject(h)
96+
}
8497
if err != nil {
8598
return nil, err
8699
}
@@ -93,6 +106,10 @@ func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
93106
objectsToPack = append(objectsToPack, otp)
94107
}
95108

109+
if packWindow == 0 {
110+
return objectsToPack, nil
111+
}
112+
96113
if err := dw.fixAndBreakChains(objectsToPack); err != nil {
97114
return nil, err
98115
}
@@ -201,7 +218,10 @@ func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
201218
sort.Sort(byTypeAndSize(objectsToPack))
202219
}
203220

204-
func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
221+
func (dw *deltaSelector) walk(
222+
objectsToPack []*ObjectToPack,
223+
packWindow uint,
224+
) error {
205225
indexMap := make(map[plumbing.Hash]*deltaIndex)
206226
for i := 0; i < len(objectsToPack); i++ {
207227
target := objectsToPack[i]
@@ -218,7 +238,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
218238
continue
219239
}
220240

221-
for j := i - 1; j >= 0 && i-j < deltaWindowSize; j-- {
241+
for j := i - 1; j >= 0 && i-j < int(packWindow); j-- {
222242
base := objectsToPack[j]
223243
// Objects must use only the same type as their delta base.
224244
// Since objectsToPack is sorted by type and size, once we find

plumbing/format/packfile/delta_selector_test.go

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -146,31 +146,32 @@ func (s *DeltaSelectorSuite) createTestObjects() {
146146
func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
147147
// Different type
148148
hashes := []plumbing.Hash{s.hashes["base"], s.hashes["treeType"]}
149-
otp, err := s.ds.ObjectsToPack(hashes)
149+
deltaWindowSize := uint(10)
150+
otp, err := s.ds.ObjectsToPack(hashes, deltaWindowSize)
150151
c.Assert(err, IsNil)
151152
c.Assert(len(otp), Equals, 2)
152153
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]])
153154
c.Assert(otp[1].Object, Equals, s.store.Objects[s.hashes["treeType"]])
154155

155156
// Size radically different
156157
hashes = []plumbing.Hash{s.hashes["bigBase"], s.hashes["target"]}
157-
otp, err = s.ds.ObjectsToPack(hashes)
158+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
158159
c.Assert(err, IsNil)
159160
c.Assert(len(otp), Equals, 2)
160161
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["bigBase"]])
161162
c.Assert(otp[1].Object, Equals, s.store.Objects[s.hashes["target"]])
162163

163164
// Delta Size Limit with no best delta yet
164165
hashes = []plumbing.Hash{s.hashes["smallBase"], s.hashes["smallTarget"]}
165-
otp, err = s.ds.ObjectsToPack(hashes)
166+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
166167
c.Assert(err, IsNil)
167168
c.Assert(len(otp), Equals, 2)
168169
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["smallBase"]])
169170
c.Assert(otp[1].Object, Equals, s.store.Objects[s.hashes["smallTarget"]])
170171

171172
// It will create the delta
172173
hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]}
173-
otp, err = s.ds.ObjectsToPack(hashes)
174+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
174175
c.Assert(err, IsNil)
175176
c.Assert(len(otp), Equals, 2)
176177
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["target"]])
@@ -185,7 +186,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
185186
s.hashes["o2"],
186187
s.hashes["o3"],
187188
}
188-
otp, err = s.ds.ObjectsToPack(hashes)
189+
otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
189190
c.Assert(err, IsNil)
190191
c.Assert(len(otp), Equals, 3)
191192
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["o1"]])
@@ -201,20 +202,32 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
201202
// a delta.
202203
hashes = make([]plumbing.Hash, 0, deltaWindowSize+2)
203204
hashes = append(hashes, s.hashes["base"])
204-
for i := 0; i < deltaWindowSize; i++ {
205+
for i := uint(0); i < deltaWindowSize; i++ {
205206
hashes = append(hashes, s.hashes["smallTarget"])
206207
}
207208
hashes = append(hashes, s.hashes["target"])
208209

209210
// Don't sort so we can easily check the sliding window without
210211
// creating a bunch of new objects.
211-
otp, err = s.ds.objectsToPack(hashes)
212+
otp, err = s.ds.objectsToPack(hashes, deltaWindowSize)
212213
c.Assert(err, IsNil)
213-
err = s.ds.walk(otp)
214+
err = s.ds.walk(otp, deltaWindowSize)
214215
c.Assert(err, IsNil)
215-
c.Assert(len(otp), Equals, deltaWindowSize+2)
216+
c.Assert(len(otp), Equals, int(deltaWindowSize)+2)
216217
targetIdx := len(otp) - 1
217218
c.Assert(otp[targetIdx].IsDelta(), Equals, false)
219+
220+
// Check that no deltas are created, and the objects are unsorted,
221+
// if compression is off.
222+
hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]}
223+
otp, err = s.ds.ObjectsToPack(hashes, 0)
224+
c.Assert(err, IsNil)
225+
c.Assert(len(otp), Equals, 2)
226+
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]])
227+
c.Assert(otp[0].IsDelta(), Equals, false)
228+
c.Assert(otp[1].Original, Equals, s.store.Objects[s.hashes["target"]])
229+
c.Assert(otp[1].IsDelta(), Equals, false)
230+
c.Assert(otp[1].Depth, Equals, 0)
218231
}
219232

220233
func (s *DeltaSelectorSuite) TestMaxDepth(c *C) {

plumbing/format/packfile/encoder.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ import (
1414
// Encoder gets the data from the storage and write it into the writer in PACK
1515
// format
1616
type Encoder struct {
17-
selector *deltaSelector
18-
w *offsetWriter
19-
zw *zlib.Writer
20-
hasher plumbing.Hasher
17+
selector *deltaSelector
18+
w *offsetWriter
19+
zw *zlib.Writer
20+
hasher plumbing.Hasher
2121
// offsets is a map of object hashes to corresponding offsets in the packfile.
2222
// It is used to determine offset of the base of a delta when a OFS_DELTA is
2323
// used.
@@ -47,8 +47,11 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E
4747

4848
// Encode creates a packfile containing all the objects referenced in hashes
4949
// and writes it to the writer in the Encoder.
50-
func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
51-
objects, err := e.selector.ObjectsToPack(hashes)
50+
func (e *Encoder) Encode(
51+
hashes []plumbing.Hash,
52+
packWindow uint,
53+
) (plumbing.Hash, error) {
54+
objects, err := e.selector.ObjectsToPack(hashes, packWindow)
5255
if err != nil {
5356
return plumbing.ZeroHash, err
5457
}
@@ -137,7 +140,7 @@ func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) err
137140

138141
// for OFS_DELTA, offset of the base is interpreted as negative offset
139142
// relative to the type-byte of the header of the ofs-delta entry.
140-
relativeOffset := deltaOffset-baseOffset
143+
relativeOffset := deltaOffset - baseOffset
141144
if relativeOffset <= 0 {
142145
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
143146
}

plumbing/format/packfile/encoder_advanced_test.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,23 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
2727
fixs.Test(c, func(f *fixtures.Fixture) {
2828
storage, err := filesystem.NewStorage(f.DotGit())
2929
c.Assert(err, IsNil)
30-
s.testEncodeDecode(c, storage)
30+
s.testEncodeDecode(c, storage, 10)
3131
})
3232

3333
}
3434

35-
func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
35+
func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) {
36+
fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
37+
fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
38+
ByTag("packfile").ByTag(".git").One())
39+
fixs.Test(c, func(f *fixtures.Fixture) {
40+
storage, err := filesystem.NewStorage(f.DotGit())
41+
c.Assert(err, IsNil)
42+
s.testEncodeDecode(c, storage, 0)
43+
})
44+
}
45+
46+
func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) {
3647

3748
objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
3849
c.Assert(err, IsNil)
@@ -57,7 +68,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
5768

5869
buf := bytes.NewBuffer(nil)
5970
enc := NewEncoder(buf, storage, false)
60-
_, err = enc.Encode(hashes)
71+
_, err = enc.Encode(hashes, packWindow)
6172
c.Assert(err, IsNil)
6273

6374
scanner := NewScanner(buf)

0 commit comments

Comments
 (0)