18
18
19
19
import com .google .common .math .LongMath ;
20
20
import com .google .common .primitives .Ints ;
21
+ import com .google .common .primitives .Longs ;
21
22
22
23
import java .math .RoundingMode ;
23
24
import java .util .Arrays ;
32
33
* on their ordinal for BloomFilter serialization.
33
34
*
34
35
* @author Dimitris Andreou
36
+ * @author Kurt Alfred Kluever
35
37
*/
36
38
enum BloomFilterStrategies implements BloomFilter .Strategy {
37
39
/**
@@ -42,40 +44,100 @@ enum BloomFilterStrategies implements BloomFilter.Strategy {
42
44
MURMUR128_MITZ_32 () {
43
45
@ Override public <T > boolean put (T object , Funnel <? super T > funnel ,
44
46
int numHashFunctions , BitArray bits ) {
47
+ long bitSize = bits .bitSize ();
45
48
long hash64 = Hashing .murmur3_128 ().hashObject (object , funnel ).asLong ();
46
49
int hash1 = (int ) hash64 ;
47
50
int hash2 = (int ) (hash64 >>> 32 );
51
+
48
52
boolean bitsChanged = false ;
49
53
for (int i = 1 ; i <= numHashFunctions ; i ++) {
50
- int nextHash = hash1 + i * hash2 ;
51
- if (nextHash < 0 ) {
52
- nextHash = ~nextHash ;
54
+ int combinedHash = hash1 + (i * hash2 );
55
+ // Flip all the bits if it's negative (guaranteed positive number)
56
+ if (combinedHash < 0 ) {
57
+ combinedHash = ~combinedHash ;
53
58
}
54
- bitsChanged |= bits .set (nextHash % bits . bitSize () );
59
+ bitsChanged |= bits .set (combinedHash % bitSize );
55
60
}
56
61
return bitsChanged ;
57
62
}
58
63
59
64
@ Override public <T > boolean mightContain (T object , Funnel <? super T > funnel ,
60
65
int numHashFunctions , BitArray bits ) {
66
+ long bitSize = bits .bitSize ();
61
67
long hash64 = Hashing .murmur3_128 ().hashObject (object , funnel ).asLong ();
62
68
int hash1 = (int ) hash64 ;
63
69
int hash2 = (int ) (hash64 >>> 32 );
70
+
64
71
for (int i = 1 ; i <= numHashFunctions ; i ++) {
65
- int nextHash = hash1 + i * hash2 ;
66
- if (nextHash < 0 ) {
67
- nextHash = ~nextHash ;
72
+ int combinedHash = hash1 + (i * hash2 );
73
+ // Flip all the bits if it's negative (guaranteed positive number)
74
+ if (combinedHash < 0 ) {
75
+ combinedHash = ~combinedHash ;
68
76
}
69
- if (!bits .get (nextHash % bits . bitSize () )) {
77
+ if (!bits .get (combinedHash % bitSize )) {
70
78
return false ;
71
79
}
72
80
}
73
81
return true ;
74
82
}
83
+ },
84
+ /**
85
+ * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks
86
+ * different than the implementation in MURMUR128_MITZ_32 because we're avoiding the
87
+ * multiplication in the loop and doing a (much simpler) += hash2. We're also changing the
88
+ * index to a positive number by AND'ing with Long.MAX_VALUE instead of flipping the bits.
89
+ */
90
+ MURMUR128_MITZ_64 () {
91
+ @ Override
92
+ public <T > boolean put (T object , Funnel <? super T > funnel ,
93
+ int numHashFunctions , BitArray bits ) {
94
+ long bitSize = bits .bitSize ();
95
+ byte [] bytes = Hashing .murmur3_128 ().hashObject (object , funnel ).getBytesInternal ();
96
+ long hash1 = lowerEight (bytes );
97
+ long hash2 = upperEight (bytes );
98
+
99
+ boolean bitsChanged = false ;
100
+ long combinedHash = hash1 + hash2 ;
101
+ for (int i = 0 ; i < numHashFunctions ; i ++) {
102
+ // Make the combined hash positive and indexable
103
+ bitsChanged |= bits .set ((combinedHash & Long .MAX_VALUE ) % bitSize );
104
+ combinedHash += hash2 ;
105
+ }
106
+ return bitsChanged ;
107
+ }
108
+
109
+ @ Override
110
+ public <T > boolean mightContain (T object , Funnel <? super T > funnel ,
111
+ int numHashFunctions , BitArray bits ) {
112
+ long bitSize = bits .bitSize ();
113
+ byte [] bytes = Hashing .murmur3_128 ().hashObject (object , funnel ).getBytesInternal ();
114
+ long hash1 = lowerEight (bytes );
115
+ long hash2 = upperEight (bytes );
116
+
117
+ long combinedHash = hash1 + hash2 ;
118
+ for (int i = 0 ; i < numHashFunctions ; i ++) {
119
+ // Make the combined hash positive and indexable
120
+ if (!bits .get ((combinedHash & Long .MAX_VALUE ) % bitSize )) {
121
+ return false ;
122
+ }
123
+ combinedHash += hash2 ;
124
+ }
125
+ return true ;
126
+ }
127
+
128
+ private /* static */ long lowerEight (byte [] bytes ) {
129
+ return Longs .fromBytes (
130
+ bytes [7 ], bytes [6 ], bytes [5 ], bytes [4 ], bytes [3 ], bytes [2 ], bytes [1 ], bytes [0 ]);
131
+ }
132
+
133
+ private /* static */ long upperEight (byte [] bytes ) {
134
+ return Longs .fromBytes (
135
+ bytes [15 ], bytes [14 ], bytes [13 ], bytes [12 ], bytes [11 ], bytes [10 ], bytes [9 ], bytes [8 ]);
136
+ }
75
137
};
76
138
77
139
// Note: We use this instead of java.util.BitSet because we need access to the long[] data field
78
- static class BitArray {
140
+ static final class BitArray {
79
141
final long [] data ;
80
142
long bitCount ;
81
143
0 commit comments