@@ -53,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) {
5353 }
5454 if o .useAVX2 {
5555 if len (in ) >= bigSwitchover {
56- galMulAVX2_64 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
5756 done := (len (in ) >> 6 ) << 6
57+ if raceEnabled {
58+ raceReadSlice (in [:done ])
59+ raceWriteSlice (out [:done ])
60+ }
61+ galMulAVX2_64 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
5862 in = in [done :]
5963 out = out [done :]
6064 }
6165 if len (in ) > 32 {
62- galMulAVX2 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
6366 done := (len (in ) >> 5 ) << 5
67+ if raceEnabled {
68+ raceReadSlice (in [:done ])
69+ raceWriteSlice (out [:done ])
70+ }
71+ galMulAVX2 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
6472 in = in [done :]
6573 out = out [done :]
6674 }
6775 } else if o .useSSSE3 {
68- galMulSSSE3 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
6976 done := (len (in ) >> 4 ) << 4
77+ if raceEnabled {
78+ raceReadSlice (in [:done ])
79+ raceWriteSlice (out [:done ])
80+ }
81+ galMulSSSE3 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
7082 in = in [done :]
7183 out = out [done :]
7284 }
@@ -85,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) {
8597
8698 if o .useAVX2 {
8799 if len (in ) >= bigSwitchover {
88- galMulAVX2Xor_64 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
89100 done := (len (in ) >> 6 ) << 6
101+ if raceEnabled {
102+ raceReadSlice (in [:done ])
103+ raceWriteSlice (out [:done ])
104+ }
105+ galMulAVX2Xor_64 (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
90106 in = in [done :]
91107 out = out [done :]
92108 }
93109 if len (in ) >= 32 {
94- galMulAVX2Xor (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
95110 done := (len (in ) >> 5 ) << 5
111+ if raceEnabled {
112+ raceReadSlice (in [:done ])
113+ raceWriteSlice (out [:done ])
114+ }
115+ galMulAVX2Xor (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
96116 in = in [done :]
97117 out = out [done :]
98118 }
99119 } else if o .useSSSE3 {
100- galMulSSSE3Xor (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
101120 done := (len (in ) >> 4 ) << 4
121+ if raceEnabled {
122+ raceReadSlice (in [:done ])
123+ raceWriteSlice (out [:done ])
124+ }
125+ galMulSSSE3Xor (mulTableLow [c ][:], mulTableHigh [c ][:], in , out )
102126 in = in [done :]
103127 out = out [done :]
104128 }
@@ -117,20 +141,32 @@ func sliceXor(in, out []byte, o *options) {
117141 if o .useSSE2 {
118142 if len (in ) >= bigSwitchover {
119143 if o .useAVX2 {
120- avx2XorSlice_64 (in , out )
121144 done := (len (in ) >> 6 ) << 6
145+ if raceEnabled {
146+ raceReadSlice (in [:done ])
147+ raceWriteSlice (out [:done ])
148+ }
149+ avx2XorSlice_64 (in , out )
122150 in = in [done :]
123151 out = out [done :]
124152 } else {
125- sSE2XorSlice_64 (in , out )
126153 done := (len (in ) >> 6 ) << 6
154+ if raceEnabled {
155+ raceReadSlice (in [:done ])
156+ raceWriteSlice (out [:done ])
157+ }
158+ sSE2XorSlice_64 (in , out )
127159 in = in [done :]
128160 out = out [done :]
129161 }
130162 }
131163 if len (in ) >= 16 {
132- sSE2XorSlice (in , out )
133164 done := (len (in ) >> 4 ) << 4
165+ if raceEnabled {
166+ raceReadSlice (in [:done ])
167+ raceWriteSlice (out [:done ])
168+ }
169+ sSE2XorSlice (in , out )
134170 in = in [done :]
135171 out = out [done :]
136172 }
@@ -462,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) {
462498 }
463499 if o .useAVX2 {
464500 tmp := & multiply256LUT [log_m ]
501+ if raceEnabled {
502+ raceReadSlice (y )
503+ raceWriteSlice (x )
504+ }
465505 fftDIT2_avx2 (x , y , tmp )
466506 } else if o .useSSSE3 {
467507 tmp := & multiply256LUT [log_m ]
508+ if raceEnabled {
509+ raceReadSlice (y )
510+ raceWriteSlice (x )
511+ }
468512 fftDIT2_ssse3 (x , y , tmp )
469513 } else {
470514 // Reference version:
@@ -480,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) {
480524 }
481525
482526 if o .useAVX2 {
527+ done := (len (y ) >> 6 ) << 6
528+ if raceEnabled {
529+ raceReadSlice (y [:done ])
530+ raceWriteSlice (x [:done ])
531+ }
483532 fftDIT28_avx2 (x , y , & multiply256LUT8 [log_m ])
484533 if len (x )& 63 == 0 {
485534 return
486535 }
487- done := (len (y ) >> 6 ) << 6
488536 y = y [done :]
489537 x = x [done :]
490538 }
@@ -499,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
499547 }
500548
501549 if o .useAVX2 {
550+ done := (len (y ) >> 6 ) << 6
551+ if raceEnabled {
552+ raceReadSlice (y [:done ])
553+ raceWriteSlice (x [:done ])
554+ }
502555 ifftDIT28_avx2 (x , y , & multiply256LUT8 [log_m ])
503556 if len (x )& 63 == 0 {
504557 return
505558 }
506- done := (len (y ) >> 6 ) << 6
507559 y = y [done :]
508560 x = x [done :]
509561 }
@@ -514,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
514566func mulAdd8 (x , y []byte , log_m ffe8 , o * options ) {
515567 if o .useAVX2 {
516568 t := & multiply256LUT8 [log_m ]
517- galMulAVX2Xor_64 (t [:16 ], t [16 :32 ], y , x )
518569 done := (len (y ) >> 6 ) << 6
570+ if raceEnabled {
571+ raceReadSlice (y [:done ])
572+ raceWriteSlice (x [:done ])
573+ }
574+ galMulAVX2Xor_64 (t [:16 ], t [16 :32 ], y , x )
519575 y = y [done :]
520576 x = x [done :]
521577 } else if o .useSSSE3 {
522578 t := & multiply256LUT8 [log_m ]
523- galMulSSSE3Xor (t [:16 ], t [16 :32 ], y , x )
524579 done := (len (y ) >> 4 ) << 4
580+ if raceEnabled {
581+ raceReadSlice (y [:done ])
582+ raceWriteSlice (x [:done ])
583+ }
584+ galMulSSSE3Xor (t [:16 ], t [16 :32 ], y , x )
525585 y = y [done :]
526586 x = x [done :]
527587 }
@@ -535,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) {
535595 }
536596 if o .useAVX2 {
537597 tmp := & multiply256LUT [log_m ]
598+ if raceEnabled {
599+ raceReadSlice (y )
600+ raceWriteSlice (x )
601+ }
602+
538603 ifftDIT2_avx2 (x , y , tmp )
539604 } else if o .useSSSE3 {
540605 tmp := & multiply256LUT [log_m ]
606+ if raceEnabled {
607+ raceReadSlice (y )
608+ raceWriteSlice (x )
609+ }
610+
541611 ifftDIT2_ssse3 (x , y , tmp )
542612 } else {
543613 // Reference version:
@@ -552,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
552622 }
553623 if o .useAVX2 {
554624 tmp := & multiply256LUT [log_m ]
625+ if raceEnabled {
626+ raceReadSlice (y )
627+ raceWriteSlice (x )
628+ }
555629 mulgf16_avx2 (x , y , tmp )
556630 } else if o .useSSSE3 {
557631 tmp := & multiply256LUT [log_m ]
632+ if raceEnabled {
633+ raceReadSlice (y )
634+ raceWriteSlice (x )
635+ }
558636 mulgf16_ssse3 (x , y , tmp )
559637 } else {
560638 refMul (x , y , log_m )
@@ -564,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) {
564642func mulgf8 (out , in []byte , log_m ffe8 , o * options ) {
565643 if o .useAVX2 {
566644 t := & multiply256LUT8 [log_m ]
567- galMulAVX2_64 (t [:16 ], t [16 :32 ], in , out )
568645 done := (len (in ) >> 6 ) << 6
646+ if raceEnabled {
647+ raceReadSlice (in [:done ])
648+ raceWriteSlice (out [:done ])
649+ }
650+
651+ galMulAVX2_64 (t [:16 ], t [16 :32 ], in , out )
569652 in = in [done :]
570653 out = out [done :]
571654 } else if o .useSSSE3 {
572655 t := & multiply256LUT8 [log_m ]
573- galMulSSSE3 (t [:16 ], t [16 :32 ], in , out )
574656 done := (len (in ) >> 4 ) << 4
657+ if raceEnabled {
658+ raceReadSlice (in [:done ])
659+ raceWriteSlice (out [:done ])
660+ }
661+ galMulSSSE3 (t [:16 ], t [16 :32 ], in , out )
575662 in = in [done :]
576663 out = out [done :]
577664 }
0 commit comments