Skip to content

Commit 5e801bf

Browse files
authored
Fix up ShannonEntropyBits (#127)
Breaking, but typo to embarrassing to keep ;)
1 parent 4dbb2ac commit 5e801bf

File tree

2 files changed

+26
-23
lines changed

2 files changed

+26
-23
lines changed

compressible.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,20 +62,23 @@ func Estimate(b []byte) float64 {
6262
return math.Pow((prediction+entropy)/2, 0.9)
6363
}
6464

65-
// SnannonEntropyBits returns the number of bits minimum required to represent
65+
// ShannonEntropyBits returns the number of bits minimum required to represent
6666
// an entropy encoding of the input bytes.
6767
// https://en.wiktionary.org/wiki/Shannon_entropy
68-
func SnannonEntropyBits(b []byte) int {
68+
func ShannonEntropyBits(b []byte) int {
69+
if len(b) == 0 {
70+
return 0
71+
}
6972
var hist [256]int
7073
for _, c := range b {
7174
hist[c]++
7275
}
7376
shannon := float64(0)
74-
total := float64(len(b))
75-
for i := range hist[:] {
76-
n := float64(hist[i])
77-
if n > 0 {
78-
shannon += math.Log2(total/n) * n
77+
invTotal := 1.0 / float64(len(b))
78+
for _, v := range hist[:] {
79+
if v > 0 {
80+
n := float64(v)
81+
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
7982
}
8083
}
8184
return int(math.Ceil(shannon))

compressible_test.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
134134
b.SetBytes(int64(len(testData)))
135135
b.ResetTimer()
136136
for i := 0; i < b.N; i++ {
137-
SnannonEntropyBits(testData)
137+
ShannonEntropyBits(testData)
138138
}
139-
b.Log(SnannonEntropyBits(testData))
139+
b.Log(ShannonEntropyBits(testData))
140140
})
141141

142142
// (predictable, high entropy distibution)
@@ -148,9 +148,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
148148
b.SetBytes(int64(len(testData)))
149149
b.ResetTimer()
150150
for i := 0; i < b.N; i++ {
151-
SnannonEntropyBits(testData)
151+
ShannonEntropyBits(testData)
152152
}
153-
b.Log(SnannonEntropyBits(testData))
153+
b.Log(ShannonEntropyBits(testData))
154154
})
155155

156156
// (not predictable, high entropy distibution)
@@ -160,9 +160,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
160160
b.SetBytes(int64(len(testData)))
161161
b.ResetTimer()
162162
for i := 0; i < b.N; i++ {
163-
SnannonEntropyBits(testData)
163+
ShannonEntropyBits(testData)
164164
}
165-
b.Log(SnannonEntropyBits(testData))
165+
b.Log(ShannonEntropyBits(testData))
166166
})
167167

168168
// (not predictable, high entropy distibution)
@@ -172,9 +172,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
172172
b.SetBytes(int64(len(testData)))
173173
b.ResetTimer()
174174
for i := 0; i < b.N; i++ {
175-
SnannonEntropyBits(testData)
175+
ShannonEntropyBits(testData)
176176
}
177-
b.Log(SnannonEntropyBits(testData))
177+
b.Log(ShannonEntropyBits(testData))
178178
})
179179

180180
// (not predictable, high entropy distibution)
@@ -184,9 +184,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
184184
b.SetBytes(int64(len(testData)))
185185
b.ResetTimer()
186186
for i := 0; i < b.N; i++ {
187-
SnannonEntropyBits(testData)
187+
ShannonEntropyBits(testData)
188188
}
189-
b.Log(SnannonEntropyBits(testData))
189+
b.Log(ShannonEntropyBits(testData))
190190
})
191191

192192
// (not predictable, high entropy distibution)
@@ -196,9 +196,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
196196
b.SetBytes(int64(len(testData)))
197197
b.ResetTimer()
198198
for i := 0; i < b.N; i++ {
199-
SnannonEntropyBits(testData)
199+
ShannonEntropyBits(testData)
200200
}
201-
b.Log(SnannonEntropyBits(testData))
201+
b.Log(ShannonEntropyBits(testData))
202202
})
203203

204204
// (not predictable, medium entropy distibution)
@@ -211,9 +211,9 @@ func BenchmarkSnannonEntropyBits(b *testing.B) {
211211
b.SetBytes(int64(len(testData)))
212212
b.ResetTimer()
213213
for i := 0; i < b.N; i++ {
214-
SnannonEntropyBits(testData)
214+
ShannonEntropyBits(testData)
215215
}
216-
b.Log(SnannonEntropyBits(testData))
216+
b.Log(ShannonEntropyBits(testData))
217217
})
218218
// (medium predictable, medium entropy distibution)
219219
b.Run("text", func(b *testing.B) {
@@ -234,9 +234,9 @@ Thoughts?`)
234234
b.SetBytes(int64(len(testData)))
235235
b.ResetTimer()
236236
for i := 0; i < b.N; i++ {
237-
SnannonEntropyBits(testData)
237+
ShannonEntropyBits(testData)
238238
}
239-
b.Log(SnannonEntropyBits(testData))
239+
b.Log(ShannonEntropyBits(testData))
240240
})
241241
}
242242

0 commit comments

Comments
 (0)