Skip to content

Commit b2eb836

Browse files
authored
s2: Make allow commandline input to be http/https (#348)
Download input.
1 parent 460ec9e commit b2eb836

File tree

3 files changed

+138
-56
lines changed

3 files changed

+138
-56
lines changed

s2/README.md

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -157,29 +157,32 @@ Use - as the only file name to read from stdin and write to stdout.
157157
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
158158
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
159159
160+
File names beginning with 'http://' and 'https://' will be downloaded and compressed.
161+
Only http response code 200 is accepted.
162+
160163
Options:
161164
-bench int
162-
Run benchmark n times. No output will be written
165+
Run benchmark n times. No output will be written
163166
-blocksize string
164-
Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M")
165-
-c Write all output to stdout. Multiple input files will be concatenated
167+
Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M")
168+
-c Write all output to stdout. Multiple input files will be concatenated
166169
-cpu int
167-
Compress using this amount of threads (default 32)
170+
Compress using this amount of threads (default 32)
168171
-faster
169-
Compress faster, but with a minor compression loss
172+
Compress faster, but with a minor compression loss
170173
-help
171-
Display help
174+
Display help
172175
-pad string
173-
Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1")
174-
-q Don't write any output to terminal, except errors
176+
Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1")
177+
-q Don't write any output to terminal, except errors
175178
-rm
176-
Delete source file(s) after successful compression
179+
Delete source file(s) after successful compression
177180
-safe
178-
Do not overwrite output files
181+
Do not overwrite output files
179182
-slower
180-
Compress more, but a lot slower
183+
Compress more, but a lot slower
181184
-verify
182-
Verify written files
185+
Verify written files
183186
184187
```
185188

@@ -195,19 +198,22 @@ Use - as the only file name to read from stdin and write to stdout.
195198
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
196199
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
197200
201+
File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
202+
Extensions on downloaded files are ignored. Only http response code 200 is accepted.
203+
198204
Options:
199205
-bench int
200-
Run benchmark n times. No output will be written
201-
-c Write all output to stdout. Multiple input files will be concatenated
206+
Run benchmark n times. No output will be written
207+
-c Write all output to stdout. Multiple input files will be concatenated
202208
-help
203-
Display help
204-
-q Don't write any output to terminal, except errors
209+
Display help
210+
-q Don't write any output to terminal, except errors
205211
-rm
206-
Delete source file(s) after successful decompression
212+
Delete source file(s) after successful decompression
207213
-safe
208-
Do not overwrite output files
214+
Do not overwrite output files
209215
-verify
210-
Verify files, but do not write output
216+
Verify files, but do not write output
211217
```
212218

213219
## s2sx: self-extracting archives

s2/cmd/s2c/main.go

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"io"
1010
"io/ioutil"
1111
"log"
12+
"net/http"
1213
"os"
1314
"os/signal"
1415
"path/filepath"
@@ -73,6 +74,9 @@ Use - as the only file name to read from stdin and write to stdout.
7374
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
7475
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
7576
77+
File names beginning with 'http://' and 'https://' will be downloaded and compressed.
78+
Only http response code 200 is accepted.
79+
7680
Options:`)
7781
flag.PrintDefaults()
7882
os.Exit(0)
@@ -100,6 +104,10 @@ Options:`)
100104
var files []string
101105

102106
for _, pattern := range args {
107+
if isHTTP(pattern) {
108+
files = append(files, pattern)
109+
continue
110+
}
103111
found, err := filepath.Glob(pattern)
104112
exitErr(err)
105113
if len(found) == 0 {
@@ -146,11 +154,8 @@ Options:`)
146154
fmt.Print("Reading ", filename, "...")
147155
}
148156
// Input file.
149-
file, err := os.Open(filename)
150-
exitErr(err)
151-
finfo, err := file.Stat()
152-
exitErr(err)
153-
b := make([]byte, finfo.Size())
157+
file, size, _ := openFile(filename)
158+
b := make([]byte, size)
154159
_, err = io.ReadFull(file, b)
155160
exitErr(err)
156161
file.Close()
@@ -215,30 +220,22 @@ Options:`)
215220
for _, filename := range files {
216221
func() {
217222
var closeOnce sync.Once
218-
dstFilename := fmt.Sprintf("%s%s", filename, ".s2")
219-
if *bench > 0 {
220-
dstFilename = "(discarded)"
221-
}
223+
dstFilename := cleanFileName(fmt.Sprintf("%s%s", filename, ".s2"))
222224
if !*quiet {
223225
fmt.Print("Compressing ", filename, " -> ", dstFilename)
224226
}
225227
// Input file.
226-
file, err := os.Open(filename)
228+
file, _, mode := openFile(filename)
227229
exitErr(err)
228230
defer closeOnce.Do(func() { file.Close() })
229231
src, err := readahead.NewReaderSize(file, *cpu+1, 1<<20)
230232
exitErr(err)
231233
defer src.Close()
232-
finfo, err := file.Stat()
233-
exitErr(err)
234234
var out io.Writer
235235
switch {
236-
case *bench > 0:
237-
out = ioutil.Discard
238236
case *stdout:
239237
out = os.Stdout
240238
default:
241-
mode := finfo.Mode() // use the same mode for the output file
242239
if *safe {
243240
_, err := os.Stat(dstFilename)
244241
if !os.IsNotExist(err) {
@@ -282,6 +279,44 @@ Options:`)
282279
}
283280
}
284281

282+
func isHTTP(name string) bool {
283+
return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://")
284+
}
285+
286+
func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) {
287+
if isHTTP(name) {
288+
resp, err := http.Get(name)
289+
exitErr(err)
290+
if resp.StatusCode != http.StatusOK {
291+
exitErr(fmt.Errorf("unexpected response status code %v, want OK", resp.Status))
292+
}
293+
return resp.Body, resp.ContentLength, os.ModePerm
294+
}
295+
file, err := os.Open(name)
296+
exitErr(err)
297+
st, err := file.Stat()
298+
exitErr(err)
299+
return file, st.Size(), st.Mode()
300+
}
301+
302+
func cleanFileName(s string) string {
303+
if isHTTP(s) {
304+
s = strings.TrimPrefix(s, "http://")
305+
s = strings.TrimPrefix(s, "https://")
306+
s = strings.Map(func(r rune) rune {
307+
switch r {
308+
case '\\', '/', '*', '?', ':', '|', '<', '>', '~':
309+
return '_'
310+
}
311+
if r < 20 {
312+
return '_'
313+
}
314+
return r
315+
}, s)
316+
}
317+
return s
318+
}
319+
285320
func verifyTo(w io.Writer) (io.Writer, func() error) {
286321
if !*verify {
287322
return w, func() error {

s2/cmd/s2d/main.go

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"fmt"
99
"io"
1010
"io/ioutil"
11+
"net/http"
1112
"os"
1213
"path/filepath"
1314
"runtime/debug"
@@ -51,6 +52,9 @@ Use - as the only file name to read from stdin and write to stdout.
5152
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
5253
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
5354
55+
File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
56+
Extensions on downloaded files are ignored. Only http response code 200 is accepted.
57+
5458
Options:`)
5559
flag.PrintDefaults()
5660
os.Exit(0)
@@ -69,6 +73,11 @@ Options:`)
6973
var files []string
7074

7175
for _, pattern := range args {
76+
if isHTTP(pattern) {
77+
files = append(files, pattern)
78+
continue
79+
}
80+
7281
found, err := filepath.Glob(pattern)
7382
exitErr(err)
7483
if len(found) == 0 {
@@ -86,21 +95,20 @@ Options:`)
8695
case strings.HasSuffix(filename, ".s2"):
8796
case strings.HasSuffix(filename, ".snappy"):
8897
default:
89-
fmt.Println("Skipping", filename)
90-
continue
98+
if !isHTTP(filename) {
99+
fmt.Println("Skipping", filename)
100+
continue
101+
}
91102
}
92103

93104
func() {
94105
if !*quiet {
95106
fmt.Print("Reading ", filename, "...")
96107
}
97108
// Input file.
98-
file, err := os.Open(filename)
99-
exitErr(err)
100-
finfo, err := file.Stat()
101-
exitErr(err)
102-
b := make([]byte, finfo.Size())
103-
_, err = io.ReadFull(file, b)
109+
file, size, _ := openFile(filename)
110+
b := make([]byte, size)
111+
_, err := io.ReadFull(file, b)
104112
exitErr(err)
105113
file.Close()
106114

@@ -127,18 +135,17 @@ Options:`)
127135
}
128136

129137
for _, filename := range files {
130-
dstFilename := filename
138+
dstFilename := cleanFileName(filename)
131139
switch {
132140
case strings.HasSuffix(filename, ".s2"):
133-
dstFilename = strings.TrimSuffix(filename, ".s2")
141+
dstFilename = strings.TrimSuffix(dstFilename, ".s2")
134142
case strings.HasSuffix(filename, ".snappy"):
135-
dstFilename = strings.TrimSuffix(filename, ".snappy")
143+
dstFilename = strings.TrimSuffix(dstFilename, ".snappy")
136144
default:
137-
fmt.Println("Skipping", filename)
138-
continue
139-
}
140-
if *bench > 0 {
141-
dstFilename = "(discarded)"
145+
if !isHTTP(filename) {
146+
fmt.Println("Skipping", filename)
147+
continue
148+
}
142149
}
143150
if *verify {
144151
dstFilename = "(verify)"
@@ -150,16 +157,12 @@ Options:`)
150157
fmt.Print("Decompressing ", filename, " -> ", dstFilename)
151158
}
152159
// Input file.
153-
file, err := os.Open(filename)
154-
exitErr(err)
160+
file, _, mode := openFile(filename)
155161
defer closeOnce.Do(func() { file.Close() })
156162
rc := rCounter{in: file}
157163
src, err := readahead.NewReaderSize(&rc, 2, 4<<20)
158164
exitErr(err)
159165
defer src.Close()
160-
finfo, err := file.Stat()
161-
exitErr(err)
162-
mode := finfo.Mode() // use the same mode for the output file
163166
if *safe {
164167
_, err := os.Stat(dstFilename)
165168
if !os.IsNotExist(err) {
@@ -168,7 +171,7 @@ Options:`)
168171
}
169172
var out io.Writer
170173
switch {
171-
case *bench > 0 || *verify:
174+
case *verify:
172175
out = ioutil.Discard
173176
case *stdout:
174177
out = os.Stdout
@@ -204,6 +207,44 @@ Options:`)
204207
}
205208
}
206209

210+
func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) {
211+
if isHTTP(name) {
212+
resp, err := http.Get(name)
213+
exitErr(err)
214+
if resp.StatusCode != http.StatusOK {
215+
exitErr(fmt.Errorf("unexpected response status code %v, want 200 OK", resp.Status))
216+
}
217+
return resp.Body, resp.ContentLength, os.ModePerm
218+
}
219+
file, err := os.Open(name)
220+
exitErr(err)
221+
st, err := file.Stat()
222+
exitErr(err)
223+
return file, st.Size(), st.Mode()
224+
}
225+
226+
func cleanFileName(s string) string {
227+
if isHTTP(s) {
228+
s = strings.TrimPrefix(s, "http://")
229+
s = strings.TrimPrefix(s, "https://")
230+
s = strings.Map(func(r rune) rune {
231+
switch r {
232+
case '\\', '/', '*', '?', ':', '|', '<', '>', '~':
233+
return '_'
234+
}
235+
if r < 20 {
236+
return '_'
237+
}
238+
return r
239+
}, s)
240+
}
241+
return s
242+
}
243+
244+
func isHTTP(name string) bool {
245+
return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://")
246+
}
247+
207248
func exitErr(err error) {
208249
if err != nil {
209250
fmt.Fprintln(os.Stderr, "\nERROR:", err.Error())

0 commit comments

Comments
 (0)