Skip to content

Commit 179af51

Browse files
committed
Add unsafe little endian loaders
Benchmarks pending
1 parent 6ad807b commit 179af51

File tree

15 files changed

+166
-87
lines changed

15 files changed

+166
-87
lines changed

.github/workflows/go.yml

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,21 @@ jobs:
3535
- name: Test Noasm
3636
run: go test -tags=noasm ./...
3737

38+
- name: Test Nounsafe
39+
run: go test -tags=nounsafe ./...
40+
41+
- name: Test Nounsafe, noasm
42+
run: go test -tags=nounsafe,noasm ./...
43+
3844
- name: Test Race 1 CPU
3945
env:
4046
CGO_ENABLED: 1
41-
run: go test -cpu=1 -short -race -v ./...
47+
run: go test -cpu=1 -short -race -tags=nounsafe -v ./...
4248

4349
- name: Test Race 4 CPU
4450
env:
4551
CGO_ENABLED: 1
46-
run: go test -cpu=4 -short -race -v ./...
52+
run: go test -cpu=4 -short -race -tags=nounsafe -v ./...
4753

4854
generate:
4955
strategy:
@@ -122,22 +128,22 @@ jobs:
122128
uses: actions/checkout@v4
123129

124130
- name: S2/FuzzDictBlocks
125-
run: go test -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
131+
run: go test -tags=nounsafe -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
126132

127133
- name: S2/FuzzEncodingBlocks
128-
run: go test -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
134+
run: go test -tags=nounsafe -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
129135

130136
- name: S2/FuzzLZ4Block
131-
run: go test -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
137+
run: go test -tags=nounsafe -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
132138

133139
- name: S2/FuzzDictBlocks/noasm
134-
run: go test -tags=noasm -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
140+
run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/.
135141

136142
- name: S2/FuzzEncodingBlocks/noasm
137-
run: go test -tags=noasm -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
143+
run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
138144

139145
- name: S2/FuzzLZ4Block/noasm
140-
run: go test -tags=noasm -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
146+
run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/.
141147

142148
fuzz-zstd:
143149
env:
@@ -153,28 +159,28 @@ jobs:
153159
uses: actions/checkout@v4
154160

155161
- name: zstd/FuzzDecodeAll
156-
run: go test -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
162+
run: go test -tags=nounsafe -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
157163

158164
- name: zstd/FuzzDecAllNoBMI2
159-
run: go test -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
165+
run: go test -tags=nounsafe -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
160166

161167
- name: zstd/FuzzDecoder
162-
run: go test -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
168+
run: go test -tags=nounsafe -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
163169

164170
- name: zstd/FuzzNoBMI2Dec
165-
run: go test -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
171+
run: go test -tags=nounsafe -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
166172

167173
- name: zstd/FuzzEncoding
168-
run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
174+
run: cd zstd&&go test -tags=nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
169175

170176
- name: zstd/FuzzDecodeAll/noasm
171-
run: go test -tags=noasm -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
177+
run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
172178

173179
- name: zstd/FuzzDecoder/noasm
174-
run: go test -tags=noasm -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
180+
run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/.
175181

176182
- name: zstd/FuzzEncoding/noasm
177-
run: cd zstd&&go test -tags=noasm -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
183+
run: cd zstd&&go test -tags=noasm,nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd ..
178184

179185
- name: zstd/FuzzEncodingBest
180186
run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=25000x -test.fuzzminimizetime=10ms -fuzz-start=4&&cd ..
@@ -193,16 +199,16 @@ jobs:
193199
uses: actions/checkout@v4
194200

195201
- name: flate/FuzzEncoding
196-
run: go test -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
202+
run: go test -tags=nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
197203

198204
- name: flate/FuzzEncoding/noasm
199-
run: go test -run=none -tags=noasm -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
205+
run: go test -run=none -tags=noasm,nounsafe -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
200206

201207
- name: zip/FuzzReader
202-
run: go test -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.
208+
run: go test -tags=nounsafe -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.
203209

204210
- name: fse/FuzzCompress
205-
run: go test -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
211+
run: go test -tags=nounsafe -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
206212

207213
- name: fse/FuzzDecompress
208-
run: go test -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.
214+
run: go test -tags=nounsafe -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/.

flate/fast_encoder.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
package flate
77

88
import (
9-
"encoding/binary"
109
"fmt"
10+
11+
"github.com/klauspost/compress/internal/le"
1112
)
1213

1314
type fastEnc interface {
@@ -58,11 +59,11 @@ const (
5859
)
5960

6061
func load3232(b []byte, i int32) uint32 {
61-
return binary.LittleEndian.Uint32(b[i:])
62+
return le.Load32(b, i)
6263
}
6364

6465
func load6432(b []byte, i int32) uint64 {
65-
return binary.LittleEndian.Uint64(b[i:])
66+
return le.Load64(b, i)
6667
}
6768

6869
type tableEntry struct {

flate/fuzz_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
//go:build go1.18
2-
// +build go1.18
32

43
package flate
54

internal/le/le.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package le
2+
3+
type Indexer interface {
4+
int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64
5+
}

internal/le/unsafe_disabled.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//go:build !(amd64 || arm64 || ppc64le || riscv64) || nounsafe || purego || appengine
2+
3+
package le
4+
5+
import (
6+
"encoding/binary"
7+
)
8+
9+
func Load16[I Indexer](b []byte, i I) uint16 {
10+
return binary.LittleEndian.Uint16(b[i:])
11+
}
12+
13+
func Load32[I Indexer](b []byte, i I) uint32 {
14+
return binary.LittleEndian.Uint32(b[i:])
15+
}
16+
17+
func Load64[I Indexer](b []byte, i I) uint64 {
18+
return binary.LittleEndian.Uint64(b[i:])
19+
}
20+
21+
func Store16(b []byte, v uint16) {
22+
binary.LittleEndian.PutUint16(b, v)
23+
}
24+
25+
func Store32(b []byte, v uint32) {
26+
binary.LittleEndian.PutUint32(b, v)
27+
}

internal/le/unsafe_enabled.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// We enable 64 bit LE platforms:
2+
3+
//go:build (amd64 || arm64 || ppc64le || riscv64) && !nounsafe && !purego && !appengine
4+
5+
package le
6+
7+
import (
8+
"unsafe"
9+
)
10+
11+
func Load16[I Indexer](b []byte, i I) uint16 {
12+
//return binary.LittleEndian.Uint16(b[i:])
13+
//return *(*uint16)(unsafe.Pointer(&b[i]))
14+
return *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0])))
15+
}
16+
17+
func Load32[I Indexer](b []byte, i I) uint32 {
18+
//return binary.LittleEndian.Uint32(b[i:])
19+
//return *(*uint32)(unsafe.Pointer(&b[i]))
20+
return *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0])))
21+
}
22+
23+
func Load64[I Indexer](b []byte, i I) uint64 {
24+
//return binary.LittleEndian.Uint64(b[i:])
25+
//return *(*uint64)(unsafe.Pointer(&b[i]))
26+
return *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0])))
27+
}
28+
29+
func Store16(b []byte, v uint16) {
30+
//binary.LittleEndian.PutUint16(b, v)
31+
*(*uint16)(unsafe.Pointer(&b[0])) = v
32+
}
33+
34+
func Store32(b []byte, v uint32) {
35+
//binary.LittleEndian.PutUint32(b, v)
36+
*(*uint32)(unsafe.Pointer(&b[0])) = v
37+
}

s2/encode_all.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@ import (
1010
"encoding/binary"
1111
"fmt"
1212
"math/bits"
13+
14+
"github.com/klauspost/compress/internal/le"
1315
)
1416

1517
func load32(b []byte, i int) uint32 {
16-
return binary.LittleEndian.Uint32(b[i:])
18+
return le.Load32(b, i)
1719
}
1820

1921
func load64(b []byte, i int) uint64 {
20-
return binary.LittleEndian.Uint64(b[i:])
22+
return le.Load64(b, i)
2123
}
2224

2325
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.

zstd/_generate/gen.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
157157
Load(br.Field("value"), brValue)
158158
Load(br.Field("bitsRead"), brBitsRead)
159159
Load(br.Field("in").Base(), brPointer)
160-
Load(br.Field("in").Len(), brOffset)
160+
Load(br.Field("off"), brOffset)
161161
ADDQ(brOffset, brPointer) // Add current offset to read pointer.
162162
MOVQ(brPointer, brPointerStash)
163163
}
@@ -438,7 +438,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
438438
br := Dereference(Param("br"))
439439
Store(brValue, br.Field("value"))
440440
Store(brBitsRead.As8(), br.Field("bitsRead"))
441-
Store(brOffset, br.Field("in").Len())
441+
Store(brOffset, br.Field("off"))
442442

443443
if !o.useSeqs {
444444
Comment("Update the context")

zstd/bitreader.go

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
package zstd
66

77
import (
8-
"encoding/binary"
98
"errors"
109
"fmt"
1110
"io"
1211
"math/bits"
12+
13+
"github.com/klauspost/compress/internal/le"
1314
)
1415

1516
// bitReader reads a bitstream in reverse.
@@ -18,6 +19,7 @@ import (
1819
type bitReader struct {
1920
in []byte
2021
value uint64 // Maybe use [16]byte, but shifting is awkward.
22+
off int // offset where next read should end
2123
bitsRead uint8
2224
}
2325

@@ -32,6 +34,7 @@ func (b *bitReader) init(in []byte) error {
3234
if v == 0 {
3335
return errors.New("corrupt stream, did not find end of stream")
3436
}
37+
b.off = len(in)
3538
b.bitsRead = 64
3639
b.value = 0
3740
if len(in) >= 8 {
@@ -67,18 +70,15 @@ func (b *bitReader) fillFast() {
6770
if b.bitsRead < 32 {
6871
return
6972
}
70-
v := b.in[len(b.in)-4:]
71-
b.in = b.in[:len(b.in)-4]
72-
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
73-
b.value = (b.value << 32) | uint64(low)
73+
b.off -= 4
74+
b.value = (b.value << 32) | uint64(le.Load32(b.in, b.off))
7475
b.bitsRead -= 32
7576
}
7677

7778
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
7879
func (b *bitReader) fillFastStart() {
79-
v := b.in[len(b.in)-8:]
80-
b.in = b.in[:len(b.in)-8]
81-
b.value = binary.LittleEndian.Uint64(v)
80+
b.off -= 8
81+
b.value = le.Load64(b.in, b.off)
8282
b.bitsRead = 0
8383
}
8484

@@ -87,25 +87,23 @@ func (b *bitReader) fill() {
8787
if b.bitsRead < 32 {
8888
return
8989
}
90-
if len(b.in) >= 4 {
91-
v := b.in[len(b.in)-4:]
92-
b.in = b.in[:len(b.in)-4]
93-
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
94-
b.value = (b.value << 32) | uint64(low)
90+
if b.off >= 4 {
91+
b.off -= 4
92+
b.value = (b.value << 32) | uint64(le.Load32(b.in, b.off))
9593
b.bitsRead -= 32
9694
return
9795
}
9896

99-
b.bitsRead -= uint8(8 * len(b.in))
100-
for len(b.in) > 0 {
101-
b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
102-
b.in = b.in[:len(b.in)-1]
97+
b.bitsRead -= uint8(8 * b.off)
98+
for b.off > 0 {
99+
b.off -= 1
100+
b.value = (b.value << 8) | uint64(b.in[b.off])
103101
}
104102
}
105103

106104
// finished returns true if all bits have been read from the bit stream.
107105
func (b *bitReader) finished() bool {
108-
return len(b.in) == 0 && b.bitsRead >= 64
106+
return b.off == 0 && b.bitsRead >= 64
109107
}
110108

111109
// overread returns true if more bits have been requested than is on the stream.
@@ -115,13 +113,14 @@ func (b *bitReader) overread() bool {
115113

116114
// remain returns the number of bits remaining.
117115
func (b *bitReader) remain() uint {
118-
return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
116+
return 8*uint(b.off) + 64 - uint(b.bitsRead)
119117
}
120118

121119
// close the bitstream and returns an error if out-of-buffer reads occurred.
122120
func (b *bitReader) close() error {
123121
// Release reference.
124122
b.in = nil
123+
b.off = 0
125124
if !b.finished() {
126125
return fmt.Errorf("%d extra bits on block, should be 0", b.remain())
127126
}

zstd/decoder.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
323323
frame.bBuf = nil
324324
if frame.history.decoders.br != nil {
325325
frame.history.decoders.br.in = nil
326+
frame.history.decoders.br.off = 0
326327
}
327328
d.decoders <- block
328329
}()

0 commit comments

Comments
 (0)