From f77266f25478469583512b12358941f3c9ab0319 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 30 Mar 2021 17:09:37 +0200 Subject: [PATCH] s2: Make allow commandline input to be http/https Download input. --- s2/README.md | 44 +++++++++++++----------- s2/cmd/s2c/main.go | 65 +++++++++++++++++++++++++++-------- s2/cmd/s2d/main.go | 85 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 138 insertions(+), 56 deletions(-) diff --git a/s2/README.md b/s2/README.md index fc74706be6..5eea18e3ce 100644 --- a/s2/README.md +++ b/s2/README.md @@ -157,29 +157,32 @@ Use - as the only file name to read from stdin and write to stdout. Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt +File names beginning with 'http://' and 'https://' will be downloaded and compressed. +Only http response code 200 is accepted. + Options: -bench int - Run benchmark n times. No output will be written + Run benchmark n times. No output will be written -blocksize string - Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M") - -c Write all output to stdout. Multiple input files will be concatenated + Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M") + -c Write all output to stdout. Multiple input files will be concatenated -cpu int - Compress using this amount of threads (default 32) + Compress using this amount of threads (default 32) -faster - Compress faster, but with a minor compression loss + Compress faster, but with a minor compression loss -help - Display help + Display help -pad string - Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1") - -q Don't write any output to terminal, except errors + Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1") + -q Don't write any output to terminal, except errors -rm - Delete source file(s) after successful compression + Delete source file(s) after successful compression -safe - Do not overwrite output files + Do not overwrite output files -slower - Compress more, but a lot slower + Compress more, but a lot slower -verify - Verify written files + Verify written files ``` @@ -195,19 +198,22 @@ Use - as the only file name to read from stdin and write to stdout. Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt +File names beginning with 'http://' and 'https://' will be downloaded and decompressed. +Extensions on downloaded files are ignored. Only http response code 200 is accepted. + Options: -bench int - Run benchmark n times. No output will be written - -c Write all output to stdout. Multiple input files will be concatenated + Run benchmark n times. No output will be written + -c Write all output to stdout. Multiple input files will be concatenated -help - Display help - -q Don't write any output to terminal, except errors + Display help + -q Don't write any output to terminal, except errors -rm - Delete source file(s) after successful decompression + Delete source file(s) after successful decompression -safe - Do not overwrite output files + Do not overwrite output files -verify - Verify files, but do not write output + Verify files, but do not write output ``` ## s2sx: self-extracting archives diff --git a/s2/cmd/s2c/main.go b/s2/cmd/s2c/main.go index 86f4c6989e..15586bf8a5 100644 --- a/s2/cmd/s2c/main.go +++ b/s2/cmd/s2c/main.go @@ -9,6 +9,7 @@ import ( "io" "io/ioutil" "log" + "net/http" "os" "os/signal" "path/filepath" @@ -73,6 +74,9 @@ Use - as the only file name to read from stdin and write to stdout. Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt +File names beginning with 'http://' and 'https://' will be downloaded and compressed. +Only http response code 200 is accepted. + Options:`) flag.PrintDefaults() os.Exit(0) @@ -100,6 +104,10 @@ Options:`) var files []string for _, pattern := range args { + if isHTTP(pattern) { + files = append(files, pattern) + continue + } found, err := filepath.Glob(pattern) exitErr(err) if len(found) == 0 { @@ -146,11 +154,8 @@ Options:`) fmt.Print("Reading ", filename, "...") } // Input file. - file, err := os.Open(filename) - exitErr(err) - finfo, err := file.Stat() - exitErr(err) - b := make([]byte, finfo.Size()) + file, size, _ := openFile(filename) + b := make([]byte, size) _, err = io.ReadFull(file, b) exitErr(err) file.Close() @@ -215,30 +220,22 @@ Options:`) for _, filename := range files { func() { var closeOnce sync.Once - dstFilename := fmt.Sprintf("%s%s", filename, ".s2") - if *bench > 0 { - dstFilename = "(discarded)" - } + dstFilename := cleanFileName(fmt.Sprintf("%s%s", filename, ".s2")) if !*quiet { fmt.Print("Compressing ", filename, " -> ", dstFilename) } // Input file. - file, err := os.Open(filename) + file, _, mode := openFile(filename) exitErr(err) defer closeOnce.Do(func() { file.Close() }) src, err := readahead.NewReaderSize(file, *cpu+1, 1<<20) exitErr(err) defer src.Close() - finfo, err := file.Stat() - exitErr(err) var out io.Writer switch { - case *bench > 0: - out = ioutil.Discard case *stdout: out = os.Stdout default: - mode := finfo.Mode() // use the same mode for the output file if *safe { _, err := os.Stat(dstFilename) if !os.IsNotExist(err) { @@ -282,6 +279,44 @@ Options:`) } } +func isHTTP(name string) bool { + return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://") +} + +func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) { + if isHTTP(name) { + resp, err := http.Get(name) + exitErr(err) + if resp.StatusCode != http.StatusOK { + exitErr(fmt.Errorf("unexpected response status code %v, want OK", resp.Status)) + } + return resp.Body, resp.ContentLength, os.ModePerm + } + file, err := os.Open(name) + exitErr(err) + st, err := file.Stat() + exitErr(err) + return file, st.Size(), st.Mode() +} + +func cleanFileName(s string) string { + if isHTTP(s) { + s = strings.TrimPrefix(s, "http://") + s = strings.TrimPrefix(s, "https://") + s = strings.Map(func(r rune) rune { + switch r { + case '\\', '/', '*', '?', ':', '|', '<', '>', '~': + return '_' + } + if r < 20 { + return '_' + } + return r + }, s) + } + return s +} + func verifyTo(w io.Writer) (io.Writer, func() error) { if !*verify { return w, func() error { diff --git a/s2/cmd/s2d/main.go b/s2/cmd/s2d/main.go index cd36799664..124b51fa07 100644 --- a/s2/cmd/s2d/main.go +++ b/s2/cmd/s2d/main.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "io/ioutil" + "net/http" "os" "path/filepath" "runtime/debug" @@ -51,6 +52,9 @@ Use - as the only file name to read from stdin and write to stdout. Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt +File names beginning with 'http://' and 'https://' will be downloaded and decompressed. +Extensions on downloaded files are ignored. Only http response code 200 is accepted. + Options:`) flag.PrintDefaults() os.Exit(0) @@ -69,6 +73,11 @@ Options:`) var files []string for _, pattern := range args { + if isHTTP(pattern) { + files = append(files, pattern) + continue + } + found, err := filepath.Glob(pattern) exitErr(err) if len(found) == 0 { @@ -86,8 +95,10 @@ Options:`) case strings.HasSuffix(filename, ".s2"): case strings.HasSuffix(filename, ".snappy"): default: - fmt.Println("Skipping", filename) - continue + if !isHTTP(filename) { + fmt.Println("Skipping", filename) + continue + } } func() { @@ -95,12 +106,9 @@ Options:`) fmt.Print("Reading ", filename, "...") } // Input file. - file, err := os.Open(filename) - exitErr(err) - finfo, err := file.Stat() - exitErr(err) - b := make([]byte, finfo.Size()) - _, err = io.ReadFull(file, b) + file, size, _ := openFile(filename) + b := make([]byte, size) + _, err := io.ReadFull(file, b) exitErr(err) file.Close() @@ -127,18 +135,17 @@ Options:`) } for _, filename := range files { - dstFilename := filename + dstFilename := cleanFileName(filename) switch { case strings.HasSuffix(filename, ".s2"): - dstFilename = strings.TrimSuffix(filename, ".s2") + dstFilename = strings.TrimSuffix(dstFilename, ".s2") case strings.HasSuffix(filename, ".snappy"): - dstFilename = strings.TrimSuffix(filename, ".snappy") + dstFilename = strings.TrimSuffix(dstFilename, ".snappy") default: - fmt.Println("Skipping", filename) - continue - } - if *bench > 0 { - dstFilename = "(discarded)" + if !isHTTP(filename) { + fmt.Println("Skipping", filename) + continue + } } if *verify { dstFilename = "(verify)" @@ -150,16 +157,12 @@ Options:`) fmt.Print("Decompressing ", filename, " -> ", dstFilename) } // Input file. - file, err := os.Open(filename) - exitErr(err) + file, _, mode := openFile(filename) defer closeOnce.Do(func() { file.Close() }) rc := rCounter{in: file} src, err := readahead.NewReaderSize(&rc, 2, 4<<20) exitErr(err) defer src.Close() - finfo, err := file.Stat() - exitErr(err) - mode := finfo.Mode() // use the same mode for the output file if *safe { _, err := os.Stat(dstFilename) if !os.IsNotExist(err) { @@ -168,7 +171,7 @@ Options:`) } var out io.Writer switch { - case *bench > 0 || *verify: + case *verify: out = ioutil.Discard case *stdout: out = os.Stdout @@ -204,6 +207,44 @@ Options:`) } } +func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) { + if isHTTP(name) { + resp, err := http.Get(name) + exitErr(err) + if resp.StatusCode != http.StatusOK { + exitErr(fmt.Errorf("unexpected response status code %v, want 200 OK", resp.Status)) + } + return resp.Body, resp.ContentLength, os.ModePerm + } + file, err := os.Open(name) + exitErr(err) + st, err := file.Stat() + exitErr(err) + return file, st.Size(), st.Mode() +} + +func cleanFileName(s string) string { + if isHTTP(s) { + s = strings.TrimPrefix(s, "http://") + s = strings.TrimPrefix(s, "https://") + s = strings.Map(func(r rune) rune { + switch r { + case '\\', '/', '*', '?', ':', '|', '<', '>', '~': + return '_' + } + if r < 20 { + return '_' + } + return r + }, s) + } + return s +} + +func isHTTP(name string) bool { + return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://") +} + func exitErr(err error) { if err != nil { fmt.Fprintln(os.Stderr, "\nERROR:", err.Error())