Skip to content

Commit d803790

Browse files
authored
Read STDIN in catp, optimize PrepareLine (#23)
1 parent ef213c4 commit d803790

7 files changed

Lines changed: 119 additions & 53 deletions

File tree

.github/workflows/release-assets.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ env:
1414
jobs:
1515
build:
1616
name: Upload Release Assets
17-
runs-on: ubuntu-20.04
17+
runs-on: ubuntu-22.04
1818
steps:
1919
- name: Install Go
2020
uses: actions/setup-go@v5

.github/workflows/test-unit.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,9 @@ jobs:
8080
curl -sLO https://github.com/vearutop/gocovdiff/releases/download/v1.4.2/linux_amd64.tar.gz && tar xf linux_amd64.tar.gz && rm linux_amd64.tar.gz
8181
gocovdiff_hash=$(git hash-object ./gocovdiff)
8282
[ "$gocovdiff_hash" == "c37862c73a677e5a9c069470287823ab5bbf0244" ] || (echo "::error::unexpected hash for gocovdiff, possible tampering: $gocovdiff_hash" && exit 1)
83-
git fetch origin master ${{ github.event.pull_request.base.sha }}
84-
REP=$(./gocovdiff -mod github.com/$GITHUB_REPOSITORY -cov unit.coverprofile -gha-annotations gha-unit.txt -delta-cov-file delta-cov-unit.txt -target-delta-cov ${TARGET_DELTA_COV})
83+
# Fetch PR diff from GitHub API.
84+
curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -H "Accept: application/vnd.github.v3.diff" https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }} > pull_request.diff
85+
REP=$(./gocovdiff -diff pull_request.diff -mod github.com/$GITHUB_REPOSITORY -cov unit.coverprofile -gha-annotations gha-unit.txt -delta-cov-file delta-cov-unit.txt -target-delta-cov ${TARGET_DELTA_COV})
8586
echo "${REP}"
8687
cat gha-unit.txt
8788
DIFF=$(test -e unit-base.txt && ./gocovdiff -mod github.com/$GITHUB_REPOSITORY -func-cov unit.txt -func-base-cov unit-base.txt || echo "Missing base coverage file")

cmd/catp/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ catp dev, go1.22rc1 CGO_ZSTD
2323
catp prints contents of files to STDOUT or dir/file output,
2424
while printing current progress status to STDERR.
2525
It can decompress data from .gz and .zst files.
26+
Use dash (-) as PATH to read STDIN.
2627
2728
Usage of catp:
2829
catp [OPTIONS] PATH ...
@@ -37,7 +38,7 @@ catp [OPTIONS] PATH ...
3738
files will be written to out dir with original base names
3839
disables output flag
3940
-output string
40-
output to file instead of STDOUT
41+
output to file (can have .gz or .zst ext for compression) instead of STDOUT
4142
-parallel int
4243
number of parallel readers if multiple files are provided
4344
lines from different files will go to output simultaneously (out of order of files, but in order of lines in each file)

cmd/catp/catp/app.go

Lines changed: 107 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,32 @@ type runner struct {
6161

6262
hasOptions bool
6363
options Options
64+
65+
hasCompression bool
66+
}
67+
68+
// humanReadableBytes converts bytes to a human-readable string (TB, GB, MB, KB, or bytes).
69+
func humanReadableBytes(bytes int64) string {
70+
const (
71+
Byte = 1
72+
KByte = Byte * 1024
73+
MByte = KByte * 1024
74+
GByte = MByte * 1024
75+
TByte = GByte * 1024
76+
)
77+
78+
switch {
79+
case bytes >= TByte:
80+
return fmt.Sprintf("%.2f TB", float64(bytes)/float64(TByte))
81+
case bytes >= GByte:
82+
return fmt.Sprintf("%.2f GB", float64(bytes)/float64(GByte))
83+
case bytes >= MByte:
84+
return fmt.Sprintf("%.2f MB", float64(bytes)/float64(MByte))
85+
case bytes >= KByte:
86+
return fmt.Sprintf("%.2f KB", float64(bytes)/float64(KByte))
87+
default:
88+
return fmt.Sprintf("%d B", bytes)
89+
}
6490
}
6591

6692
// st renders Status as a string.
@@ -98,17 +124,29 @@ func (r *runner) st(s progress.Status) string {
98124
}
99125
} else {
100126
if s.LinesCompleted != 0 {
101-
res = fmt.Sprintf("%s: %.1f%% bytes read, %d lines processed, %.1f l/s, %.1f MB/s, elapsed %s, remaining %s",
102-
s.Task, s.DonePercent, s.LinesCompleted, s.SpeedLPS, s.SpeedMBPS,
103-
s.Elapsed.Round(10*time.Millisecond).String(), s.Remaining.String())
127+
if r.totalBytes == -1 { // STDIN
128+
res = fmt.Sprintf("%s read, %d lines processed, %.1f l/s, %.1f MB/s, elapsed %s",
129+
humanReadableBytes(s.BytesCompleted), s.LinesCompleted, s.SpeedLPS, s.SpeedMBPS,
130+
s.Elapsed.Round(10*time.Millisecond).String())
131+
} else {
132+
res = fmt.Sprintf("%s: %.1f%% bytes read, %d lines processed, %.1f l/s, %.1f MB/s, elapsed %s, remaining %s",
133+
s.Task, s.DonePercent, s.LinesCompleted, s.SpeedLPS, s.SpeedMBPS,
134+
s.Elapsed.Round(10*time.Millisecond).String(), s.Remaining.String())
135+
}
104136
} else {
105-
res = fmt.Sprintf("%s: %.1f%% bytes read, %.1f MB/s, elapsed %s, remaining %s",
106-
s.Task, s.DonePercent, s.SpeedMBPS,
107-
s.Elapsed.Round(10*time.Millisecond).String(), s.Remaining.String())
137+
if r.totalBytes == -1 {
138+
res = fmt.Sprintf("%s read, %.1f MB/s, elapsed %s",
139+
humanReadableBytes(s.BytesCompleted), s.SpeedMBPS,
140+
s.Elapsed.Round(10*time.Millisecond).String())
141+
} else {
142+
res = fmt.Sprintf("%s: %.1f%% bytes read, %.1f MB/s, elapsed %s, remaining %s",
143+
s.Task, s.DonePercent, s.SpeedMBPS,
144+
s.Elapsed.Round(10*time.Millisecond).String(), s.Remaining.String())
145+
}
108146
}
109147
}
110148

111-
if currentBytesUncompressed > currentBytes {
149+
if currentBytesUncompressed > currentBytes && r.hasCompression {
112150
lastBytesUncompressed := atomic.LoadInt64(&r.lastBytesUncompressed)
113151
lastStatusTime := atomic.LoadInt64(&r.lastStatusTime)
114152
now := time.Now().Unix()
@@ -158,6 +196,7 @@ func (r *runner) scanFile(filename string, rd io.Reader, out io.Writer) {
158196
s.Buffer(make([]byte, 64*1024), 10*1024*1024)
159197

160198
lines := 0
199+
buf := make([]byte, 64*1024)
161200

162201
for s.Scan() {
163202
lines++
@@ -175,7 +214,8 @@ func (r *runner) scanFile(filename string, rd io.Reader, out io.Writer) {
175214

176215
if r.hasOptions {
177216
if r.options.PrepareLine != nil {
178-
line = r.options.PrepareLine(filename, lines, line)
217+
buf = buf[:0]
218+
line = r.options.PrepareLine(filename, lines, line, &buf)
179219
}
180220

181221
if line == nil {
@@ -265,26 +305,32 @@ func (r *runner) shouldWrite(line []byte) bool {
265305
}
266306

267307
func (r *runner) cat(filename string) (err error) { //nolint:gocyclo
268-
file, err := os.Open(filename) //nolint:gosec
269-
if err != nil {
270-
return err
271-
}
308+
var rd io.Reader
272309

273-
defer func() {
274-
if clErr := file.Close(); clErr != nil && err == nil {
275-
err = clErr
310+
if filename == "-" {
311+
rd = os.Stdin
312+
} else {
313+
file, err := os.Open(filename) //nolint:gosec
314+
if err != nil {
315+
return err
276316
}
277-
}()
278317

279-
rd := io.Reader(file)
318+
defer func() {
319+
if clErr := file.Close(); clErr != nil && err == nil {
320+
err = clErr
321+
}
322+
}()
323+
324+
rd = io.Reader(file)
325+
}
280326

281327
if !r.noProgress {
282-
cr := progress.NewCountingReader(file)
328+
cr := progress.NewCountingReader(rd)
283329
cr.SetBytes(&r.currentBytes)
284330
cr.SetLines(nil)
285331

286332
if r.parallel <= 1 {
287-
cr = progress.NewCountingReader(file)
333+
cr = progress.NewCountingReader(rd)
288334
cr.SetLines(nil)
289335
r.currentFile = cr
290336
r.currentTotal = r.sizes[filename]
@@ -425,7 +471,8 @@ func (i *stringFlags) Set(value string) error {
425471
// Options allows behavior customisations.
426472
type Options struct {
427473
// PrepareLine is invoked for every line, if result is nil, line is skipped.
428-
PrepareLine func(filename string, lineNr int, line []byte) []byte
474+
// You can use buf to avoid allocations for a result, and change its capacity if needed.
475+
PrepareLine func(filename string, lineNr int, line []byte, buf *[]byte) []byte
429476
}
430477

431478
// Main is the entry point for catp CLI tool.
@@ -437,14 +484,6 @@ func Main(options ...func(o *Options)) error { //nolint:funlen,cyclop,gocognit,g
437484

438485
r := &runner{}
439486

440-
if len(options) > 0 {
441-
r.hasOptions = true
442-
443-
for _, opt := range options {
444-
opt(&r.options)
445-
}
446-
}
447-
448487
flag.Var(&pass, "pass", "filter matching, may contain multiple AND patterns separated by ^,\n"+
449488
"if filter matches, line is passed to the output (unless filtered out by -skip)\n"+
450489
"each -pass value is added with OR logic,\n"+
@@ -476,7 +515,8 @@ func Main(options ...func(o *Options)) error { //nolint:funlen,cyclop,gocognit,g
476515
fmt.Println()
477516
fmt.Println("catp prints contents of files to STDOUT or dir/file output, \n" +
478517
"while printing current progress status to STDERR. \n" +
479-
"It can decompress data from .gz and .zst files.")
518+
"It can decompress data from .gz and .zst files.\n" +
519+
"Use dash (-) as PATH to read STDIN.")
480520
fmt.Println()
481521
fmt.Println("Usage of catp:")
482522
fmt.Println("catp [OPTIONS] PATH ...")
@@ -502,6 +542,14 @@ func Main(options ...func(o *Options)) error { //nolint:funlen,cyclop,gocognit,g
502542
defer pprof.StopCPUProfile()
503543
}
504544

545+
if len(options) > 0 {
546+
r.hasOptions = true
547+
548+
for _, opt := range options {
549+
opt(&r.options)
550+
}
551+
}
552+
505553
if *output != "" && r.outDir == "" { //nolint:nestif
506554
fn := *output
507555

@@ -597,35 +645,51 @@ func Main(options ...func(o *Options)) error { //nolint:funlen,cyclop,gocognit,g
597645

598646
var files []string
599647

600-
for _, f := range flag.Args() {
601-
glob, err := filepath.Glob(f)
602-
if err != nil {
603-
return err
604-
}
648+
args := flag.Args()
605649

606-
for _, f := range glob {
607-
alreadyThere := false
650+
if len(args) == 1 && args[0] == "-" {
651+
files = append(files, "-") // STDIN
652+
} else {
653+
for _, f := range args {
654+
glob, err := filepath.Glob(f)
655+
if err != nil {
656+
return err
657+
}
608658

609-
for _, e := range files {
610-
if e == f {
611-
alreadyThere = true
659+
for _, f := range glob {
660+
alreadyThere := false
612661

613-
break
662+
for _, e := range files {
663+
if e == f {
664+
alreadyThere = true
665+
666+
break
667+
}
614668
}
615-
}
616669

617-
if !alreadyThere {
618-
files = append(files, f)
670+
if !alreadyThere {
671+
files = append(files, f)
672+
}
619673
}
620674
}
621675
}
622676

623677
for _, fn := range files {
678+
if fn == "-" {
679+
r.totalBytes = -1
680+
681+
continue
682+
}
683+
624684
st, err := os.Stat(fn)
625685
if err != nil {
626686
return fmt.Errorf("failed to read file stats %s: %w", fn, err)
627687
}
628688

689+
if strings.HasSuffix(fn, ".zst") || strings.HasSuffix(fn, ".gz") {
690+
r.hasCompression = true
691+
}
692+
629693
r.totalBytes += st.Size()
630694
r.sizes[fn] = st.Size()
631695
}

cmd/catp/default.pgo

-6.18 KB
Binary file not shown.

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ module github.com/bool64/progress
33
go 1.22
44

55
require (
6-
github.com/DataDog/zstd v1.5.6
7-
github.com/bool64/dev v0.2.39
6+
github.com/DataDog/zstd v1.5.7
7+
github.com/bool64/dev v0.2.40
88
github.com/klauspost/compress v1.18.0
99
github.com/klauspost/pgzip v1.2.6
1010
)

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
github.com/DataDog/zstd v1.5.6 h1:LbEglqepa/ipmmQJUDnSsfvA8e8IStVcGaFWDuxvGOY=
2-
github.com/DataDog/zstd v1.5.6/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
3-
github.com/bool64/dev v0.2.39 h1:kP8DnMGlWXhGYJEZE/J0l/gVBdbuhoPGL+MJG4QbofE=
4-
github.com/bool64/dev v0.2.39/go.mod h1:iJbh1y/HkunEPhgebWRNcs8wfGq7sjvJ6W5iabL8ACg=
1+
github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE=
2+
github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
3+
github.com/bool64/dev v0.2.40 h1:LUSD+Aq+WB3KwVntqXstevJ0wB12ig1bEgoG8ZafsZU=
4+
github.com/bool64/dev v0.2.40/go.mod h1:iJbh1y/HkunEPhgebWRNcs8wfGq7sjvJ6W5iabL8ACg=
55
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
66
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
77
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=

0 commit comments

Comments
 (0)