Skip to content

Commit c1e79a0

Browse files
authored
zstd: Improve "best" end search (#693)
Check "best" end-of-match offset further forward. Missed bytes are picked up later. nyc-taxi-data: 474949772 -> 469436962 silesia: 60073508 -> 59993313
1 parent 6f95269 commit c1e79a0

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

zstd/enc_best.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,10 +278,13 @@ encodeLoop:
278278
if sAt := best.s + best.length; sAt < sLimit {
279279
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
280280
candidateEnd := e.longTable[nextHashL]
281-
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
282-
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
283-
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
284-
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
281+
// Start check at a fixed offset to allow for a few mismatches.
282+
// For this compression level 2 yields the best results.
283+
const skipBeginning = 2
284+
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
285+
bestEnd := bestOf(best, matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1))
286+
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
287+
bestEnd = bestOf(bestEnd, matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1))
285288
}
286289
best = bestEnd
287290
}

0 commit comments

Comments
 (0)