Skip to content

Commit 510d503

Browse files
committed
Simplify
1 parent 4c50aad commit 510d503

File tree

5 files changed

+13
-63
lines changed

5 files changed

+13
-63
lines changed

src/control/group/generic.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -119,17 +119,6 @@ impl Group {
119119
BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le())
120120
}
121121

122-
/// Loads the group and checks for empty tags. On the generic backend
123-
/// this just delegates to `Group::load` + `match_empty`.
124-
///
125-
/// # Safety
126-
///
127-
/// `ptr` must be valid to read `Group::WIDTH` bytes from.
128-
#[inline]
129-
pub(crate) unsafe fn load_and_match_empty(ptr: *const Tag) -> BitMask {
130-
unsafe { Group::load(ptr).match_empty() }
131-
}
132-
133122
/// Returns a `BitMask` indicating all tags in the group which are
134123
/// `EMPTY` or `DELETED`.
135124
#[inline]

src/control/group/lsx.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,6 @@ impl Group {
8484
}
8585
}
8686

87-
/// Loads the group and checks for empty tags. On LSX this just
88-
/// delegates to `Group::load` + `match_empty`.
89-
///
90-
/// # Safety
91-
///
92-
/// `ptr` must be valid to read `Group::WIDTH` bytes from.
93-
#[inline]
94-
pub(crate) unsafe fn load_and_match_empty(ptr: *const Tag) -> BitMask {
95-
unsafe { Group::load(ptr).match_empty() }
96-
}
97-
9887
/// Returns a `BitMask` indicating all tags in the group which are
9988
/// `EMPTY` or `DELETED`.
10089
#[inline]

src/control/group/neon.rs

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -74,26 +74,18 @@ impl Group {
7474

7575
/// Returns a `BitMask` indicating all tags in the group which are
7676
/// `EMPTY`.
77-
#[inline]
78-
pub(crate) fn match_empty(self) -> BitMask {
79-
self.match_tag(Tag::EMPTY)
80-
}
81-
82-
/// Loads the group as a scalar `u64` and checks for empty tags using
83-
/// SWAR bit operations. This is faster than the NEON `match_empty`
84-
/// followed by `any_bit_set` because it avoids the slow `umaxv`
85-
/// horizontal vector reduction that LLVM emits for that pattern.
8677
///
87-
/// EMPTY (0xFF) is the only tag with both top bits set, so
78+
/// This uses SWAR bit operations on the scalar representation of the
79+
/// group rather than NEON comparison. Converting the `uint8x8_t`
80+
/// directly to a `u64` allows LLVM to eliminate the vector load
81+
/// entirely. EMPTY (0xFF) is the only tag with both top bits set, so
8882
/// `(x & (x << 1) & 0x8080..80) != 0` detects empty bytes.
89-
///
90-
/// # Safety
91-
///
92-
/// `ptr` must be valid to read `Group::WIDTH` bytes from.
9383
#[inline]
94-
pub(crate) unsafe fn load_and_match_empty(ptr: *const Tag) -> BitMask {
95-
let ctrl = unsafe { ptr.cast::<u64>().read_unaligned() };
96-
BitMask(ctrl & (ctrl << 1) & BITMASK_ITER_MASK)
84+
pub(crate) fn match_empty(self) -> BitMask {
85+
unsafe {
86+
let ctrl = neon::vget_lane_u64(neon::vreinterpret_u64_u8(self.0), 0);
87+
BitMask(ctrl & (ctrl << 1) & BITMASK_ITER_MASK)
88+
}
9789
}
9890

9991
/// Returns a `BitMask` indicating all tags in the group which are

src/control/group/sse2.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -92,18 +92,6 @@ impl Group {
9292
self.match_tag(Tag::EMPTY)
9393
}
9494

95-
/// Loads the group and checks for empty tags. On SSE2 this just
96-
/// delegates to `Group::load` + `match_empty` since the movemask
97-
/// codegen is already optimal.
98-
///
99-
/// # Safety
100-
///
101-
/// `ptr` must be valid to read `Group::WIDTH` bytes from.
102-
#[inline]
103-
pub(crate) unsafe fn load_and_match_empty(ptr: *const Tag) -> BitMask {
104-
unsafe { Group::load(ptr).match_empty() }
105-
}
106-
10795
/// Returns a `BitMask` indicating all tags in the group which are
10896
/// `EMPTY` or `DELETED`.
10997
#[inline]

src/raw.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,9 +1818,7 @@ impl RawTableInner {
18181818
// * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
18191819
// always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
18201820
// bytes, which is safe (see RawTableInner::new).
1821-
let ctrl = unsafe { self.ctrl(probe_seq.pos) };
1822-
let group = unsafe { Group::load(ctrl) };
1823-
let empty = unsafe { Group::load_and_match_empty(ctrl) };
1821+
let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
18241822

18251823
for bit in group.match_tag(tag_hash) {
18261824
let index = (probe_seq.pos + bit) & self.bucket_mask;
@@ -1839,7 +1837,7 @@ impl RawTableInner {
18391837
if let Some(insert_index) = insert_index {
18401838
// Only stop the search if the group contains at least one empty element.
18411839
// Otherwise, the element that we are looking for might be in a following group.
1842-
if likely(empty.any_bit_set()) {
1840+
if likely(group.match_empty().any_bit_set()) {
18431841
// We must have found a insert slot by now, since the current group contains at
18441842
// least one. For tables smaller than the group width, there will still be an
18451843
// empty element in the current (and only) group due to the load factor.
@@ -2026,13 +2024,7 @@ impl RawTableInner {
20262024
// * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
20272025
// always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
20282026
// bytes, which is safe (see RawTableInner::new_in).
2029-
let ctrl = unsafe { self.ctrl(probe_seq.pos) };
2030-
let group = unsafe { Group::load(ctrl) };
2031-
2032-
// Compute the empty check early so it can execute in parallel
2033-
// with the tag comparison. On some backends this uses a
2034-
// separate code path optimized for the empty check.
2035-
let empty = unsafe { Group::load_and_match_empty(ctrl) };
2027+
let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
20362028

20372029
for bit in group.match_tag(tag_hash) {
20382030
// This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number
@@ -2044,7 +2036,7 @@ impl RawTableInner {
20442036
}
20452037
}
20462038

2047-
if likely(empty.any_bit_set()) {
2039+
if likely(group.match_empty().any_bit_set()) {
20482040
return None;
20492041
}
20502042

0 commit comments

Comments
 (0)