Skip to content

Commit 0e54620

Browse files
authored
zstd decode: Use compound decSymbol (#144)
* zstd decode: Use compound decSymbol The compiler is not generating very nice code when decSymbol is separate values. Use a compound value instead of a struct. This will allow all values to be transferred in a single register. ``` name old time/op new time/op delta Decoder_DecodeAll/kppkn.gtb.zst-12 610µs ± 1% 599µs ± 1% -1.84% (p=0.000 n=10+10) Decoder_DecodeAll/geo.protodata.zst-12 138µs ± 2% 136µs ± 0% -1.72% (p=0.000 n=10+9) Decoder_DecodeAll/plrabn12.txt.zst-12 1.95ms ± 1% 1.90ms ± 0% -2.19% (p=0.000 n=10+7) Decoder_DecodeAll/lcet10.txt.zst-12 1.46ms ± 2% 1.42ms ± 1% -2.48% (p=0.000 n=10+10) Decoder_DecodeAll/asyoulik.txt.zst-12 506µs ± 1% 497µs ± 1% -1.83% (p=0.000 n=10+9) Decoder_DecodeAll/alice29.txt.zst-12 655µs ± 1% 636µs ± 1% -2.97% (p=0.000 n=10+10) Decoder_DecodeAll/html_x_4.zst-12 267µs ± 1% 261µs ± 1% -2.43% (p=0.000 n=10+9) Decoder_DecodeAll/paper-100k.pdf.zst-12 25.0µs ± 1% 24.3µs ± 1% -2.61% (p=0.000 n=10+10) Decoder_DecodeAll/fireworks.jpeg.zst-12 9.65µs ± 1% 9.61µs ± 0% ~ (p=0.250 n=10+9) Decoder_DecodeAll/urls.10K.zst-12 1.67ms ± 1% 1.63ms ± 2% -2.29% (p=0.000 n=10+10) Decoder_DecodeAll/html.zst-12 156µs ± 1% 155µs ± 1% -0.85% (p=0.014 n=10+10) name old speed new speed delta Decoder_DecodeAll/kppkn.gtb.zst-12 302MB/s ± 1% 308MB/s ± 1% +1.88% (p=0.000 n=10+10) Decoder_DecodeAll/geo.protodata.zst-12 860MB/s ± 2% 875MB/s ± 0% +1.75% (p=0.000 n=10+9) Decoder_DecodeAll/plrabn12.txt.zst-12 248MB/s ± 1% 253MB/s ± 0% +2.24% (p=0.000 n=10+7) Decoder_DecodeAll/lcet10.txt.zst-12 293MB/s ± 2% 300MB/s ± 1% +2.54% (p=0.000 n=10+10) Decoder_DecodeAll/asyoulik.txt.zst-12 247MB/s ± 1% 252MB/s ± 1% +1.86% (p=0.000 n=10+9) Decoder_DecodeAll/alice29.txt.zst-12 232MB/s ± 1% 239MB/s ± 1% +3.06% (p=0.000 n=10+10) Decoder_DecodeAll/html_x_4.zst-12 1.53GB/s ± 1% 1.57GB/s ± 1% +2.49% (p=0.000 n=10+9) Decoder_DecodeAll/paper-100k.pdf.zst-12 4.10GB/s ± 1% 4.21GB/s ± 1% +2.68% (p=0.000 n=10+10) Decoder_DecodeAll/fireworks.jpeg.zst-12 12.8GB/s ± 1% 12.8GB/s ± 0% ~ (p=0.286 n=10+9) Decoder_DecodeAll/urls.10K.zst-12 420MB/s ± 1% 430MB/s ± 2% +2.35% (p=0.000 n=10+10) Decoder_DecodeAll/html.zst-12 655MB/s ± 1% 661MB/s ± 1% +0.86% (p=0.015 n=10+10) ```
1 parent 1a36bca commit 0e54620

File tree

3 files changed

+215
-168
lines changed

3 files changed

+215
-168
lines changed

zstd/decoder_test.go

Lines changed: 87 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -966,103 +966,110 @@ func testDecoderDecodeAllError(t *testing.T, fn string, dec *Decoder) {
966966
// We don't predefine them, since this also tests our transformations.
967967
// Reference from here: https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L234
968968
func TestPredefTables(t *testing.T) {
969+
x := func(nextState uint16, nbAddBits, nbBits uint8, baseVal uint32) decSymbol {
970+
return newDecSymbol(nbBits, nbAddBits, nextState, baseVal)
971+
}
969972
for i := range fsePredef[:] {
970973
var want []decSymbol
971974
switch tableIndex(i) {
972975
case tableLiteralLengths:
973976
want = []decSymbol{
974977
/* nextState, nbAddBits, nbBits, baseVal */
975-
{0, 0, 4, 0}, {16, 0, 4, 0},
976-
{32, 0, 5, 1}, {0, 0, 5, 3},
977-
{0, 0, 5, 4}, {0, 0, 5, 6},
978-
{0, 0, 5, 7}, {0, 0, 5, 9},
979-
{0, 0, 5, 10}, {0, 0, 5, 12},
980-
{0, 0, 6, 14}, {0, 1, 5, 16},
981-
{0, 1, 5, 20}, {0, 1, 5, 22},
982-
{0, 2, 5, 28}, {0, 3, 5, 32},
983-
{0, 4, 5, 48}, {32, 6, 5, 64},
984-
{0, 7, 5, 128}, {0, 8, 6, 256},
985-
{0, 10, 6, 1024}, {0, 12, 6, 4096},
986-
{32, 0, 4, 0}, {0, 0, 4, 1},
987-
{0, 0, 5, 2}, {32, 0, 5, 4},
988-
{0, 0, 5, 5}, {32, 0, 5, 7},
989-
{0, 0, 5, 8}, {32, 0, 5, 10},
990-
{0, 0, 5, 11}, {0, 0, 6, 13},
991-
{32, 1, 5, 16}, {0, 1, 5, 18},
992-
{32, 1, 5, 22}, {0, 2, 5, 24},
993-
{32, 3, 5, 32}, {0, 3, 5, 40},
994-
{0, 6, 4, 64}, {16, 6, 4, 64},
995-
{32, 7, 5, 128}, {0, 9, 6, 512},
996-
{0, 11, 6, 2048}, {48, 0, 4, 0},
997-
{16, 0, 4, 1}, {32, 0, 5, 2},
998-
{32, 0, 5, 3}, {32, 0, 5, 5},
999-
{32, 0, 5, 6}, {32, 0, 5, 8},
1000-
{32, 0, 5, 9}, {32, 0, 5, 11},
1001-
{32, 0, 5, 12}, {0, 0, 6, 15},
1002-
{32, 1, 5, 18}, {32, 1, 5, 20},
1003-
{32, 2, 5, 24}, {32, 2, 5, 28},
1004-
{32, 3, 5, 40}, {32, 4, 5, 48},
1005-
{0, 16, 6, 65536}, {0, 15, 6, 32768},
1006-
{0, 14, 6, 16384}, {0, 13, 6, 8192}}
978+
x(0, 0, 4, 0), x(16, 0, 4, 0),
979+
x(32, 0, 5, 1), x(0, 0, 5, 3),
980+
x(0, 0, 5, 4), x(0, 0, 5, 6),
981+
x(0, 0, 5, 7), x(0, 0, 5, 9),
982+
x(0, 0, 5, 10), x(0, 0, 5, 12),
983+
x(0, 0, 6, 14), x(0, 1, 5, 16),
984+
x(0, 1, 5, 20), x(0, 1, 5, 22),
985+
x(0, 2, 5, 28), x(0, 3, 5, 32),
986+
x(0, 4, 5, 48), x(32, 6, 5, 64),
987+
x(0, 7, 5, 128), x(0, 8, 6, 256),
988+
x(0, 10, 6, 1024), x(0, 12, 6, 4096),
989+
x(32, 0, 4, 0), x(0, 0, 4, 1),
990+
x(0, 0, 5, 2), x(32, 0, 5, 4),
991+
x(0, 0, 5, 5), x(32, 0, 5, 7),
992+
x(0, 0, 5, 8), x(32, 0, 5, 10),
993+
x(0, 0, 5, 11), x(0, 0, 6, 13),
994+
x(32, 1, 5, 16), x(0, 1, 5, 18),
995+
x(32, 1, 5, 22), x(0, 2, 5, 24),
996+
x(32, 3, 5, 32), x(0, 3, 5, 40),
997+
x(0, 6, 4, 64), x(16, 6, 4, 64),
998+
x(32, 7, 5, 128), x(0, 9, 6, 512),
999+
x(0, 11, 6, 2048), x(48, 0, 4, 0),
1000+
x(16, 0, 4, 1), x(32, 0, 5, 2),
1001+
x(32, 0, 5, 3), x(32, 0, 5, 5),
1002+
x(32, 0, 5, 6), x(32, 0, 5, 8),
1003+
x(32, 0, 5, 9), x(32, 0, 5, 11),
1004+
x(32, 0, 5, 12), x(0, 0, 6, 15),
1005+
x(32, 1, 5, 18), x(32, 1, 5, 20),
1006+
x(32, 2, 5, 24), x(32, 2, 5, 28),
1007+
x(32, 3, 5, 40), x(32, 4, 5, 48),
1008+
x(0, 16, 6, 65536), x(0, 15, 6, 32768),
1009+
x(0, 14, 6, 16384), x(0, 13, 6, 8192),
1010+
}
10071011
case tableOffsets:
10081012
want = []decSymbol{
10091013
/* nextState, nbAddBits, nbBits, baseVal */
1010-
{0, 0, 5, 0}, {0, 6, 4, 61},
1011-
{0, 9, 5, 509}, {0, 15, 5, 32765},
1012-
{0, 21, 5, 2097149}, {0, 3, 5, 5},
1013-
{0, 7, 4, 125}, {0, 12, 5, 4093},
1014-
{0, 18, 5, 262141}, {0, 23, 5, 8388605},
1015-
{0, 5, 5, 29}, {0, 8, 4, 253},
1016-
{0, 14, 5, 16381}, {0, 20, 5, 1048573},
1017-
{0, 2, 5, 1}, {16, 7, 4, 125},
1018-
{0, 11, 5, 2045}, {0, 17, 5, 131069},
1019-
{0, 22, 5, 4194301}, {0, 4, 5, 13},
1020-
{16, 8, 4, 253}, {0, 13, 5, 8189},
1021-
{0, 19, 5, 524285}, {0, 1, 5, 1},
1022-
{16, 6, 4, 61}, {0, 10, 5, 1021},
1023-
{0, 16, 5, 65533}, {0, 28, 5, 268435453},
1024-
{0, 27, 5, 134217725}, {0, 26, 5, 67108861},
1025-
{0, 25, 5, 33554429}, {0, 24, 5, 16777213}}
1014+
x(0, 0, 5, 0), x(0, 6, 4, 61),
1015+
x(0, 9, 5, 509), x(0, 15, 5, 32765),
1016+
x(0, 21, 5, 2097149), x(0, 3, 5, 5),
1017+
x(0, 7, 4, 125), x(0, 12, 5, 4093),
1018+
x(0, 18, 5, 262141), x(0, 23, 5, 8388605),
1019+
x(0, 5, 5, 29), x(0, 8, 4, 253),
1020+
x(0, 14, 5, 16381), x(0, 20, 5, 1048573),
1021+
x(0, 2, 5, 1), x(16, 7, 4, 125),
1022+
x(0, 11, 5, 2045), x(0, 17, 5, 131069),
1023+
x(0, 22, 5, 4194301), x(0, 4, 5, 13),
1024+
x(16, 8, 4, 253), x(0, 13, 5, 8189),
1025+
x(0, 19, 5, 524285), x(0, 1, 5, 1),
1026+
x(16, 6, 4, 61), x(0, 10, 5, 1021),
1027+
x(0, 16, 5, 65533), x(0, 28, 5, 268435453),
1028+
x(0, 27, 5, 134217725), x(0, 26, 5, 67108861),
1029+
x(0, 25, 5, 33554429), x(0, 24, 5, 16777213),
1030+
}
10261031
case tableMatchLengths:
10271032
want = []decSymbol{
10281033
/* nextState, nbAddBits, nbBits, baseVal */
1029-
{0, 0, 6, 3}, {0, 0, 4, 4},
1030-
{32, 0, 5, 5}, {0, 0, 5, 6},
1031-
{0, 0, 5, 8}, {0, 0, 5, 9},
1032-
{0, 0, 5, 11}, {0, 0, 6, 13},
1033-
{0, 0, 6, 16}, {0, 0, 6, 19},
1034-
{0, 0, 6, 22}, {0, 0, 6, 25},
1035-
{0, 0, 6, 28}, {0, 0, 6, 31},
1036-
{0, 0, 6, 34}, {0, 1, 6, 37},
1037-
{0, 1, 6, 41}, {0, 2, 6, 47},
1038-
{0, 3, 6, 59}, {0, 4, 6, 83},
1039-
{0, 7, 6, 131}, {0, 9, 6, 515},
1040-
{16, 0, 4, 4}, {0, 0, 4, 5},
1041-
{32, 0, 5, 6}, {0, 0, 5, 7},
1042-
{32, 0, 5, 9}, {0, 0, 5, 10},
1043-
{0, 0, 6, 12}, {0, 0, 6, 15},
1044-
{0, 0, 6, 18}, {0, 0, 6, 21},
1045-
{0, 0, 6, 24}, {0, 0, 6, 27},
1046-
{0, 0, 6, 30}, {0, 0, 6, 33},
1047-
{0, 1, 6, 35}, {0, 1, 6, 39},
1048-
{0, 2, 6, 43}, {0, 3, 6, 51},
1049-
{0, 4, 6, 67}, {0, 5, 6, 99},
1050-
{0, 8, 6, 259}, {32, 0, 4, 4},
1051-
{48, 0, 4, 4}, {16, 0, 4, 5},
1052-
{32, 0, 5, 7}, {32, 0, 5, 8},
1053-
{32, 0, 5, 10}, {32, 0, 5, 11},
1054-
{0, 0, 6, 14}, {0, 0, 6, 17},
1055-
{0, 0, 6, 20}, {0, 0, 6, 23},
1056-
{0, 0, 6, 26}, {0, 0, 6, 29},
1057-
{0, 0, 6, 32}, {0, 16, 6, 65539},
1058-
{0, 15, 6, 32771}, {0, 14, 6, 16387},
1059-
{0, 13, 6, 8195}, {0, 12, 6, 4099},
1060-
{0, 11, 6, 2051}, {0, 10, 6, 1027},
1034+
x(0, 0, 6, 3), x(0, 0, 4, 4),
1035+
x(32, 0, 5, 5), x(0, 0, 5, 6),
1036+
x(0, 0, 5, 8), x(0, 0, 5, 9),
1037+
x(0, 0, 5, 11), x(0, 0, 6, 13),
1038+
x(0, 0, 6, 16), x(0, 0, 6, 19),
1039+
x(0, 0, 6, 22), x(0, 0, 6, 25),
1040+
x(0, 0, 6, 28), x(0, 0, 6, 31),
1041+
x(0, 0, 6, 34), x(0, 1, 6, 37),
1042+
x(0, 1, 6, 41), x(0, 2, 6, 47),
1043+
x(0, 3, 6, 59), x(0, 4, 6, 83),
1044+
x(0, 7, 6, 131), x(0, 9, 6, 515),
1045+
x(16, 0, 4, 4), x(0, 0, 4, 5),
1046+
x(32, 0, 5, 6), x(0, 0, 5, 7),
1047+
x(32, 0, 5, 9), x(0, 0, 5, 10),
1048+
x(0, 0, 6, 12), x(0, 0, 6, 15),
1049+
x(0, 0, 6, 18), x(0, 0, 6, 21),
1050+
x(0, 0, 6, 24), x(0, 0, 6, 27),
1051+
x(0, 0, 6, 30), x(0, 0, 6, 33),
1052+
x(0, 1, 6, 35), x(0, 1, 6, 39),
1053+
x(0, 2, 6, 43), x(0, 3, 6, 51),
1054+
x(0, 4, 6, 67), x(0, 5, 6, 99),
1055+
x(0, 8, 6, 259), x(32, 0, 4, 4),
1056+
x(48, 0, 4, 4), x(16, 0, 4, 5),
1057+
x(32, 0, 5, 7), x(32, 0, 5, 8),
1058+
x(32, 0, 5, 10), x(32, 0, 5, 11),
1059+
x(0, 0, 6, 14), x(0, 0, 6, 17),
1060+
x(0, 0, 6, 20), x(0, 0, 6, 23),
1061+
x(0, 0, 6, 26), x(0, 0, 6, 29),
1062+
x(0, 0, 6, 32), x(0, 16, 6, 65539),
1063+
x(0, 15, 6, 32771), x(0, 14, 6, 16387),
1064+
x(0, 13, 6, 8195), x(0, 12, 6, 4099),
1065+
x(0, 11, 6, 2051), x(0, 10, 6, 1027),
10611066
}
10621067
}
10631068
pre := fsePredef[i]
10641069
got := pre.dt[:1<<pre.actualTableLog]
10651070
if !reflect.DeepEqual(got, want) {
1071+
t.Logf("want: %v", want)
1072+
t.Logf("got : %v", got)
10661073
t.Errorf("Predefined table %d incorrect, len(got) = %d, len(want) = %d", i, len(got), len(want))
10671074
}
10681075
}

zstd/fse_decoder.go

Lines changed: 80 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -184,29 +184,75 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
184184
// decSymbol contains information about a state entry,
185185
// Including the state offset base, the output symbol and
186186
// the number of bits to read for the low part of the destination state.
187-
type decSymbol struct {
188-
newState uint16
189-
addBits uint8 // Used for symbols until transformed.
190-
nbBits uint8
191-
baseline uint32
187+
// Using a composite uint64 is faster than a struct with separate members.
188+
type decSymbol uint64
189+
190+
func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol {
191+
return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
192+
}
193+
194+
func (d decSymbol) nbBits() uint8 {
195+
return uint8(d)
196+
}
197+
198+
func (d decSymbol) addBits() uint8 {
199+
return uint8(d >> 8)
200+
}
201+
202+
func (d decSymbol) newState() uint16 {
203+
return uint16(d >> 16)
204+
}
205+
206+
func (d decSymbol) baseline() uint32 {
207+
return uint32(d >> 32)
208+
}
209+
210+
func (d decSymbol) baselineInt() int {
211+
return int(d >> 32)
212+
}
213+
214+
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
215+
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
216+
}
217+
218+
func (d *decSymbol) setNBits(nBits uint8) {
219+
const mask = 0xffffffffffffff00
220+
*d = (*d & mask) | decSymbol(nBits)
221+
}
222+
223+
func (d *decSymbol) setAddBits(addBits uint8) {
224+
const mask = 0xffffffffffff00ff
225+
*d = (*d & mask) | (decSymbol(addBits) << 8)
226+
}
227+
228+
func (d *decSymbol) setNewState(state uint16) {
229+
const mask = 0xffffffff0000ffff
230+
*d = (*d & mask) | decSymbol(state)<<16
231+
}
232+
233+
func (d *decSymbol) setBaseline(baseline uint32) {
234+
const mask = 0xffffffff
235+
*d = (*d & mask) | decSymbol(baseline)<<32
236+
}
237+
238+
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
239+
const mask = 0xffff00ff
240+
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
192241
}
193242

194243
// decSymbolValue returns the transformed decSymbol for the given symbol.
195244
func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) {
196245
if int(symb) >= len(t) {
197-
return decSymbol{}, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
246+
return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
198247
}
199248
lu := t[symb]
200-
return decSymbol{
201-
addBits: lu.addBits,
202-
baseline: lu.baseLine,
203-
}, nil
249+
return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil
204250
}
205251

206252
// setRLE will set the decoder til RLE mode.
207253
func (s *fseDecoder) setRLE(symbol decSymbol) {
208254
s.actualTableLog = 0
209-
s.maxBits = symbol.addBits
255+
s.maxBits = symbol.addBits()
210256
s.dt[0] = symbol
211257
}
212258

@@ -220,7 +266,7 @@ func (s *fseDecoder) buildDtable() error {
220266
{
221267
for i, v := range s.norm[:s.symbolLen] {
222268
if v == -1 {
223-
s.dt[highThreshold].addBits = uint8(i)
269+
s.dt[highThreshold].setAddBits(uint8(i))
224270
highThreshold--
225271
symbolNext[i] = 1
226272
} else {
@@ -235,7 +281,7 @@ func (s *fseDecoder) buildDtable() error {
235281
position := uint32(0)
236282
for ss, v := range s.norm[:s.symbolLen] {
237283
for i := 0; i < int(v); i++ {
238-
s.dt[position].addBits = uint8(ss)
284+
s.dt[position].setAddBits(uint8(ss))
239285
position = (position + step) & tableMask
240286
for position > highThreshold {
241287
// lowprob area
@@ -253,11 +299,11 @@ func (s *fseDecoder) buildDtable() error {
253299
{
254300
tableSize := uint16(1 << s.actualTableLog)
255301
for u, v := range s.dt[:tableSize] {
256-
symbol := v.addBits
302+
symbol := v.addBits()
257303
nextState := symbolNext[symbol]
258304
symbolNext[symbol] = nextState + 1
259305
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
260-
s.dt[u&maxTableMask].nbBits = nBits
306+
s.dt[u&maxTableMask].setNBits(nBits)
261307
newState := (nextState << nBits) - tableSize
262308
if newState > tableSize {
263309
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
@@ -266,7 +312,7 @@ func (s *fseDecoder) buildDtable() error {
266312
// Seems weird that this is possible with nbits > 0.
267313
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
268314
}
269-
s.dt[u&maxTableMask].newState = newState
315+
s.dt[u&maxTableMask].setNewState(newState)
270316
}
271317
}
272318
return nil
@@ -279,25 +325,21 @@ func (s *fseDecoder) transform(t []baseOffset) error {
279325
tableSize := uint16(1 << s.actualTableLog)
280326
s.maxBits = 0
281327
for i, v := range s.dt[:tableSize] {
282-
if int(v.addBits) >= len(t) {
283-
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits, len(t))
328+
add := v.addBits()
329+
if int(add) >= len(t) {
330+
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t))
284331
}
285-
lu := t[v.addBits]
332+
lu := t[add]
286333
if lu.addBits > s.maxBits {
287334
s.maxBits = lu.addBits
288335
}
289-
s.dt[i&maxTableMask] = decSymbol{
290-
newState: v.newState,
291-
nbBits: v.nbBits,
292-
addBits: lu.addBits,
293-
baseline: lu.baseLine,
294-
}
336+
v.setExt(lu.addBits, lu.baseLine)
337+
s.dt[i] = v
295338
}
296339
return nil
297340
}
298341

299342
type fseState struct {
300-
// TODO: Check if *[1 << maxTablelog]decSymbol is faster.
301343
dt []decSymbol
302344
state decSymbol
303345
}
@@ -312,26 +354,31 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
312354
// next returns the current symbol and sets the next state.
313355
// At least tablelog bits must be available in the bit reader.
314356
func (s *fseState) next(br *bitReader) {
315-
lowBits := uint16(br.getBits(s.state.nbBits))
316-
s.state = s.dt[s.state.newState+lowBits]
357+
lowBits := uint16(br.getBits(s.state.nbBits()))
358+
s.state = s.dt[s.state.newState()+lowBits]
317359
}
318360

319361
// finished returns true if all bits have been read from the bitstream
320362
// and the next state would require reading bits from the input.
321363
func (s *fseState) finished(br *bitReader) bool {
322-
return br.finished() && s.state.nbBits > 0
364+
return br.finished() && s.state.nbBits() > 0
323365
}
324366

325367
// final returns the current state symbol without decoding the next.
326368
func (s *fseState) final() (int, uint8) {
327-
return int(s.state.baseline), s.state.addBits
369+
return s.state.baselineInt(), s.state.addBits()
370+
}
371+
372+
// final returns the current state symbol without decoding the next.
373+
func (s decSymbol) final() (int, uint8) {
374+
return s.baselineInt(), s.addBits()
328375
}
329376

330377
// nextFast returns the next symbol and sets the next state.
331378
// This can only be used if no symbols are 0 bits.
332379
// At least tablelog bits must be available in the bit reader.
333380
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
334-
lowBits := uint16(br.getBitsFast(s.state.nbBits))
335-
s.state = s.dt[s.state.newState+lowBits]
336-
return s.state.baseline, s.state.addBits
381+
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
382+
s.state = s.dt[s.state.newState()+lowBits]
383+
return s.state.baseline(), s.state.addBits()
337384
}

0 commit comments

Comments
 (0)