Skip to content

Commit 6a9df69

Browse files
authored
Test asm slice reads/writes in race tests (#286)
* Test asm slice reads/writes in race tests * Update CI and go.mod When calling asm functions in race mode, check inputs and outputs for races.
1 parent 67157af commit 6a9df69

15 files changed

+329
-63
lines changed

.github/workflows/go.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
build:
1212
strategy:
1313
matrix:
14-
go-version: [1.20.x, 1.21.x, 1.22.x]
14+
go-version: [1.21.x, 1.22.x, 1.23.x]
1515
os: [ubuntu-latest, macos-latest, windows-latest]
1616
env:
1717
CGO_ENABLED: 0

_gen/gen.go

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,14 @@ func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(ma
178178
if pshufb {
179179
w.WriteString(`
180180
181-
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
182-
n := stop-start
181+
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) {
182+
n = stop - start
183+
if raceEnabled {
184+
defer func() {
185+
raceReadSlices(in, start, n)
186+
raceWriteSlices(out, start, n)
187+
}()
188+
}
183189
184190
`)
185191

@@ -197,8 +203,14 @@ func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
197203
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
198204
}
199205
200-
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
201-
n := (stop-start)
206+
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) {
207+
n = stop - start
208+
if raceEnabled {
209+
defer func() {
210+
raceReadSlices(in, start, n)
211+
raceWriteSlices(out, start, n)
212+
}()
213+
}
202214
203215
`)
204216

@@ -223,6 +235,11 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
223235
func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
224236
n := (stop-start) & (maxInt - (64 - 1))
225237
238+
if raceEnabled {
239+
raceReadSlices(in, start, n)
240+
raceWriteSlices(out, start, n)
241+
}
242+
226243
`)
227244

228245
w.WriteString(`switch len(in) {
@@ -242,6 +259,11 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
242259
func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
243260
n := (stop-start) & (maxInt - (64 - 1))
244261
262+
if raceEnabled {
263+
raceReadSlices(in, start, n)
264+
raceWriteSlices(out, start, n)
265+
}
266+
245267
`)
246268

247269
w.WriteString(`switch len(in) {
@@ -264,6 +286,11 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int
264286
func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
265287
n := (stop-start) & (maxInt - (32 - 1))
266288
289+
if raceEnabled {
290+
raceReadSlices(in, start, n)
291+
raceWriteSlices(out, start, n)
292+
}
293+
267294
`)
268295

269296
w.WriteString(`switch len(in) {
@@ -283,6 +310,11 @@ func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int
283310
func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
284311
n := (stop-start) & (maxInt - (32 - 1))
285312
313+
if raceEnabled {
314+
raceReadSlices(in, start, n)
315+
raceWriteSlices(out, start, n)
316+
}
317+
286318
`)
287319

288320
w.WriteString(`switch len(in) {

galois_amd64.go

Lines changed: 102 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) {
5353
}
5454
if o.useAVX2 {
5555
if len(in) >= bigSwitchover {
56-
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
5756
done := (len(in) >> 6) << 6
57+
if raceEnabled {
58+
raceReadSlice(in[:done])
59+
raceWriteSlice(out[:done])
60+
}
61+
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
5862
in = in[done:]
5963
out = out[done:]
6064
}
6165
if len(in) > 32 {
62-
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
6366
done := (len(in) >> 5) << 5
67+
if raceEnabled {
68+
raceReadSlice(in[:done])
69+
raceWriteSlice(out[:done])
70+
}
71+
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
6472
in = in[done:]
6573
out = out[done:]
6674
}
6775
} else if o.useSSSE3 {
68-
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
6976
done := (len(in) >> 4) << 4
77+
if raceEnabled {
78+
raceReadSlice(in[:done])
79+
raceWriteSlice(out[:done])
80+
}
81+
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
7082
in = in[done:]
7183
out = out[done:]
7284
}
@@ -85,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
8597

8698
if o.useAVX2 {
8799
if len(in) >= bigSwitchover {
88-
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
89100
done := (len(in) >> 6) << 6
101+
if raceEnabled {
102+
raceReadSlice(in[:done])
103+
raceWriteSlice(out[:done])
104+
}
105+
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
90106
in = in[done:]
91107
out = out[done:]
92108
}
93109
if len(in) >= 32 {
94-
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
95110
done := (len(in) >> 5) << 5
111+
if raceEnabled {
112+
raceReadSlice(in[:done])
113+
raceWriteSlice(out[:done])
114+
}
115+
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
96116
in = in[done:]
97117
out = out[done:]
98118
}
99119
} else if o.useSSSE3 {
100-
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
101120
done := (len(in) >> 4) << 4
121+
if raceEnabled {
122+
raceReadSlice(in[:done])
123+
raceWriteSlice(out[:done])
124+
}
125+
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
102126
in = in[done:]
103127
out = out[done:]
104128
}
@@ -117,20 +141,32 @@ func sliceXor(in, out []byte, o *options) {
117141
if o.useSSE2 {
118142
if len(in) >= bigSwitchover {
119143
if o.useAVX2 {
120-
avx2XorSlice_64(in, out)
121144
done := (len(in) >> 6) << 6
145+
if raceEnabled {
146+
raceReadSlice(in[:done])
147+
raceWriteSlice(out[:done])
148+
}
149+
avx2XorSlice_64(in, out)
122150
in = in[done:]
123151
out = out[done:]
124152
} else {
125-
sSE2XorSlice_64(in, out)
126153
done := (len(in) >> 6) << 6
154+
if raceEnabled {
155+
raceReadSlice(in[:done])
156+
raceWriteSlice(out[:done])
157+
}
158+
sSE2XorSlice_64(in, out)
127159
in = in[done:]
128160
out = out[done:]
129161
}
130162
}
131163
if len(in) >= 16 {
132-
sSE2XorSlice(in, out)
133164
done := (len(in) >> 4) << 4
165+
if raceEnabled {
166+
raceReadSlice(in[:done])
167+
raceWriteSlice(out[:done])
168+
}
169+
sSE2XorSlice(in, out)
134170
in = in[done:]
135171
out = out[done:]
136172
}
@@ -462,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) {
462498
}
463499
if o.useAVX2 {
464500
tmp := &multiply256LUT[log_m]
501+
if raceEnabled {
502+
raceReadSlice(y)
503+
raceWriteSlice(x)
504+
}
465505
fftDIT2_avx2(x, y, tmp)
466506
} else if o.useSSSE3 {
467507
tmp := &multiply256LUT[log_m]
508+
if raceEnabled {
509+
raceReadSlice(y)
510+
raceWriteSlice(x)
511+
}
468512
fftDIT2_ssse3(x, y, tmp)
469513
} else {
470514
// Reference version:
@@ -480,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) {
480524
}
481525

482526
if o.useAVX2 {
527+
done := (len(y) >> 6) << 6
528+
if raceEnabled {
529+
raceReadSlice(y[:done])
530+
raceWriteSlice(x[:done])
531+
}
483532
fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
484533
if len(x)&63 == 0 {
485534
return
486535
}
487-
done := (len(y) >> 6) << 6
488536
y = y[done:]
489537
x = x[done:]
490538
}
@@ -499,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
499547
}
500548

501549
if o.useAVX2 {
550+
done := (len(y) >> 6) << 6
551+
if raceEnabled {
552+
raceReadSlice(y[:done])
553+
raceWriteSlice(x[:done])
554+
}
502555
ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
503556
if len(x)&63 == 0 {
504557
return
505558
}
506-
done := (len(y) >> 6) << 6
507559
y = y[done:]
508560
x = x[done:]
509561
}
@@ -514,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
514566
func mulAdd8(x, y []byte, log_m ffe8, o *options) {
515567
if o.useAVX2 {
516568
t := &multiply256LUT8[log_m]
517-
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
518569
done := (len(y) >> 6) << 6
570+
if raceEnabled {
571+
raceReadSlice(y[:done])
572+
raceWriteSlice(x[:done])
573+
}
574+
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
519575
y = y[done:]
520576
x = x[done:]
521577
} else if o.useSSSE3 {
522578
t := &multiply256LUT8[log_m]
523-
galMulSSSE3Xor(t[:16], t[16:32], y, x)
524579
done := (len(y) >> 4) << 4
580+
if raceEnabled {
581+
raceReadSlice(y[:done])
582+
raceWriteSlice(x[:done])
583+
}
584+
galMulSSSE3Xor(t[:16], t[16:32], y, x)
525585
y = y[done:]
526586
x = x[done:]
527587
}
@@ -535,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) {
535595
}
536596
if o.useAVX2 {
537597
tmp := &multiply256LUT[log_m]
598+
if raceEnabled {
599+
raceReadSlice(y)
600+
raceWriteSlice(x)
601+
}
602+
538603
ifftDIT2_avx2(x, y, tmp)
539604
} else if o.useSSSE3 {
540605
tmp := &multiply256LUT[log_m]
606+
if raceEnabled {
607+
raceReadSlice(y)
608+
raceWriteSlice(x)
609+
}
610+
541611
ifftDIT2_ssse3(x, y, tmp)
542612
} else {
543613
// Reference version:
@@ -552,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
552622
}
553623
if o.useAVX2 {
554624
tmp := &multiply256LUT[log_m]
625+
if raceEnabled {
626+
raceReadSlice(y)
627+
raceWriteSlice(x)
628+
}
555629
mulgf16_avx2(x, y, tmp)
556630
} else if o.useSSSE3 {
557631
tmp := &multiply256LUT[log_m]
632+
if raceEnabled {
633+
raceReadSlice(y)
634+
raceWriteSlice(x)
635+
}
558636
mulgf16_ssse3(x, y, tmp)
559637
} else {
560638
refMul(x, y, log_m)
@@ -564,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
564642
func mulgf8(out, in []byte, log_m ffe8, o *options) {
565643
if o.useAVX2 {
566644
t := &multiply256LUT8[log_m]
567-
galMulAVX2_64(t[:16], t[16:32], in, out)
568645
done := (len(in) >> 6) << 6
646+
if raceEnabled {
647+
raceReadSlice(in[:done])
648+
raceWriteSlice(out[:done])
649+
}
650+
651+
galMulAVX2_64(t[:16], t[16:32], in, out)
569652
in = in[done:]
570653
out = out[done:]
571654
} else if o.useSSSE3 {
572655
t := &multiply256LUT8[log_m]
573-
galMulSSSE3(t[:16], t[16:32], in, out)
574656
done := (len(in) >> 4) << 4
657+
if raceEnabled {
658+
raceReadSlice(in[:done])
659+
raceWriteSlice(out[:done])
660+
}
661+
galMulSSSE3(t[:16], t[16:32], in, out)
575662
in = in[done:]
576663
out = out[done:]
577664
}

galois_arm64.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@ func galMulSlice(c byte, in, out []byte, o *options) {
3333
return
3434
}
3535
var done int
36-
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
3736
done = (len(in) >> 5) << 5
37+
if raceEnabled {
38+
raceReadSlice(in[:done])
39+
raceWriteSlice(out[:done])
40+
}
41+
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
3842

3943
remain := len(in) - done
4044
if remain > 0 {
@@ -50,9 +54,12 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
5054
sliceXor(in, out, o)
5155
return
5256
}
53-
var done int
57+
done := (len(in) >> 5) << 5
58+
if raceEnabled {
59+
raceReadSlice(in[:done])
60+
raceWriteSlice(out[:done])
61+
}
5462
galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
55-
done = (len(in) >> 5) << 5
5663

5764
remain := len(in) - done
5865
if remain > 0 {

0 commit comments

Comments
 (0)