Skip to content

Commit 444d5d9

Browse files
authored
Fix invalid encoding on level 9 with single value input (#1115)
* Fix invalid encoding on level 9 with single value input With single value input and a full block write (>=64K) the indexing function would overflow a uint16 to a 0. This would make it impossible to generate a valid huffman table for the literal size prediction. In turn this would mean that the entire block would be output as literals - since the cost of the value would be 0 bits. This would in turn mean that EOB could not be encoded for the bit writer - since there were no matches. This was previously being satisfied with "filling". Fixes: 1. First never encode more than `maxFlateBlockTokens` - 32K for the literal estimate table. 2. Always include EOB explicitly - if somehow literals should slip through. 3. Add test that will write big single-value input as regression test. Others were using copy that does smaller writes. Fixes #1114 * Retract v1.18.1
1 parent 503c028 commit 444d5d9

File tree

6 files changed

+46
-10
lines changed

6 files changed

+46
-10
lines changed

flate/deflate.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,9 @@ func (d *compressor) deflateLazy() {
421421
d.h = newHuffmanEncoder(maxFlateBlockTokens)
422422
}
423423
var tmp [256]uint16
424-
for _, v := range d.window[s.index:d.windowEnd] {
424+
toIndex := d.window[s.index:d.windowEnd]
425+
toIndex = toIndex[:min(len(toIndex), maxFlateBlockTokens)]
426+
for _, v := range toIndex {
425427
tmp[v]++
426428
}
427429
d.h.generate(tmp[:], 15)

flate/deflate_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,39 @@ func TestVeryLongSparseChunk(t *testing.T) {
170170
t.Log("Length:", buf.Len())
171171
}
172172

173+
func TestOneMByte(t *testing.T) {
174+
var input [1024 * 1024]byte
175+
176+
var compressedOutput bytes.Buffer
177+
for level := HuffmanOnly; level <= BestCompression; level++ {
178+
compressedOutput.Reset()
179+
compressor, err := NewWriter(&compressedOutput, level)
180+
if err != nil {
181+
t.Fatalf("create: %s", err)
182+
}
183+
// Use single write...
184+
if _, err := compressor.Write(input[:]); err != nil {
185+
t.Fatalf("compress: %s", err)
186+
}
187+
188+
if err := compressor.Close(); err != nil {
189+
t.Fatalf("close: %s", err)
190+
}
191+
192+
var decompressedOutput bytes.Buffer
193+
194+
decompresser := NewReader(&compressedOutput)
195+
t.Log("level:", level, "compressed:", compressedOutput.Len())
196+
if _, err := io.Copy(&decompressedOutput, decompresser); err != nil {
197+
t.Fatalf("decompress: %s", err)
198+
}
199+
200+
if !bytes.Equal(input[:], decompressedOutput.Bytes()) {
201+
t.Fatal("input and output do not match")
202+
}
203+
}
204+
}
205+
173206
type syncBuffer struct {
174207
buf bytes.Buffer
175208
mu sync.RWMutex

flate/fuzz_test.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,11 @@ func FuzzEncoding(f *testing.F) {
7575
if !bytes.Equal(data, data2) {
7676
t.Fatal(msg + "not equal")
7777
}
78-
// Do it again...
78+
// Do it again... (also uses copy)
7979
msg = "level " + strconv.Itoa(level) + " (reset):"
8080
buf.Reset()
8181
fw.Reset(buf)
82-
n, err = fw.Write(data)
83-
if n != len(data) {
84-
t.Fatal(msg + "short write")
85-
}
82+
_, err = io.Copy(fw, bytes.NewReader(data))
8683
if err != nil {
8784
t.Fatal(msg + err.Error())
8885
}

flate/huffman_bit_writer.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
646646
w.lastHeader = 0
647647
}
648648

649-
numLiterals, numOffsets := w.indexTokens(tokens, fillReuse && !sync)
649+
numLiterals, numOffsets := w.indexTokens(tokens, true)
650650
extraBits := 0
651651
ssize, storable := w.storedSize(input)
652652

@@ -781,7 +781,7 @@ func (w *huffmanBitWriter) fillTokens() {
781781
// literalFreq and offsetFreq, and generates literalEncoding
782782
// and offsetEncoding.
783783
// The number of literal and offset tokens is returned.
784-
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
784+
func (w *huffmanBitWriter) indexTokens(t *tokens, alwaysEOB bool) (numLiterals, numOffsets int) {
785785
//copy(w.literalFreq[:], t.litHist[:])
786786
*(*[256]uint16)(w.literalFreq[:]) = t.litHist
787787
//copy(w.literalFreq[256:], t.extraHist[:])
@@ -791,9 +791,10 @@ func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, num
791791
if t.n == 0 {
792792
return
793793
}
794-
if filled {
795-
return maxNumLit, maxNumDist
794+
if alwaysEOB {
795+
w.literalFreq[endBlockMarker] = 1
796796
}
797+
797798
// get the number of literals
798799
numLiterals = len(w.literalFreq)
799800
for w.literalFreq[numLiterals-1] == 0 {
0 Bytes
Binary file not shown.

go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ module github.com/klauspost/compress
33
go 1.23
44

55
retract (
6+
// https://github.com/klauspost/compress/issues/1114
7+
v1.18.1
8+
69
// https://github.com/klauspost/compress/pull/503
710
v1.14.3
811
v1.14.2

0 commit comments

Comments
 (0)