Skip to content

Mark atom! allocated Atoms as having a 'static' lifetime and stop refcounting them #10948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/itchy-cycles-compare.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
hstr: patch
---

Mark `atom!` allocated Atoms as having a 'static' lifetime and stop refcounting them
188 changes: 182 additions & 6 deletions crates/hstr/src/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use std::{
ptr::{self, NonNull},
};

use rustc_hash::FxHasher;
use triomphe::ThinArc;

use crate::{
Expand Down Expand Up @@ -204,10 +203,137 @@ impl Storage for &'_ mut AtomStore {
}

#[inline(never)]
fn calc_hash(text: &str) -> u64 {
let mut hasher = FxHasher::default();
text.hash(&mut hasher);
hasher.finish()
pub(crate) const fn calc_hash(text: &str) -> u64 {
hash_bytes(text.as_bytes())
}

// Nothing special, digits of pi.
const SEED1: u64 = 0x243f6a8885a308d3;
const SEED2: u64 = 0x13198a2e03707344;
const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;

#[inline]
const fn multiply_mix(x: u64, y: u64) -> u64 {
#[cfg(target_pointer_width = "64")]
{
// We compute the full u64 x u64 -> u128 product, this is a single mul
// instruction on x86-64, one mul plus one mulhi on ARM64.
let full = (x as u128) * (y as u128);
let lo = full as u64;
let hi = (full >> 64) as u64;

// The middle bits of the full product fluctuate the most with small
// changes in the input. This is the top bits of lo and the bottom bits
// of hi. We can thus make the entire output fluctuate with small
// changes to the input by XOR'ing these two halves.
lo ^ hi

// Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
// otherwise combining with + or - could result in a really strong hash,
// as: x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi,
// (mod 2^64 + 1) x * y = 2^64 * hi + lo = 1 * hi + lo =
// lo + hi, (mod 2^64 - 1) Multiplicative hashing is universal
// in a field (like mod p).
}

#[cfg(target_pointer_width = "32")]
{
// u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
// Decompose into 32-bit parts.
let lx = x as u32;
let ly = y as u32;
let hx = (x >> 32) as u32;
let hy = (y >> 32) as u32;

// u32 x u32 -> u64 the low bits of one with the high bits of the other.
let afull = (lx as u64) * (hy as u64);
let bfull = (hx as u64) * (ly as u64);

// Combine, swapping low/high of one of them so the upper bits of the
// product of one combine with the lower bits of the other.
afull ^ bfull.rotate_right(32)
}
}

// Const compatible helper function to read a u64 from a byte array at a given
// offset
#[inline(always)]
const fn read_u64_le(bytes: &[u8], offset: usize) -> u64 {
let array = unsafe { bytes.as_ptr().add(offset) } as *const [u8; 8];
u64::from_le_bytes(unsafe { *array })
}

// Const compatible helper function to read a u32 from a byte array at a given
// offset
#[inline(always)]
const fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
let array = unsafe { bytes.as_ptr().add(offset) } as *const [u8; 4];
u32::from_le_bytes(unsafe { *array })
}

/// Copied from `hash_bytes` of `rustc-hash`.
///
/// See: https://github.com/rust-lang/rustc-hash/blob/dc5c33f1283de2da64d8d7a06401d91aded03ad4/src/lib.rs#L252-L297
///
/// ---
///
/// A wyhash-inspired non-collision-resistant hash for strings/slices designed
/// by Orson Peters, with a focus on small strings and small codesize.
///
/// The 64-bit version of this hash passes the SMHasher3 test suite on the full
/// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
/// permutation f() passed all tests with zero failures. When using the 32-bit
/// version of multiply_mix this hash has a few non-catastrophic failures where
/// there are a handful more collisions than an optimal hash would give.
///
/// We don't bother avalanching here as we'll feed this hash into a
/// multiplication after which we take the high bits, which avalanches for us.
#[inline]
#[doc(hidden)]
const fn hash_bytes(bytes: &[u8]) -> u64 {
let len = bytes.len();
let mut s0 = SEED1;
let mut s1 = SEED2;

if len <= 16 {
// XOR the input into s0, s1.
if len >= 8 {
s0 ^= read_u64_le(bytes, 0);
s1 ^= read_u64_le(bytes, len - 8);
} else if len >= 4 {
s0 ^= read_u32_le(bytes, 0) as u64;
s1 ^= read_u32_le(bytes, len - 4) as u64;
} else if len > 0 {
let lo = bytes[0];
let mid = bytes[len / 2];
let hi = bytes[len - 1];
s0 ^= lo as u64;
s1 ^= ((hi as u64) << 8) | mid as u64;
}
} else {
// Handle bulk (can partially overlap with suffix).
let mut off = 0;
while off < len - 16 {
let x = read_u64_le(bytes, off);
let y = read_u64_le(bytes, off + 8);

// Replace s1 with a mix of s0, x, and y, and s0 with s1.
// This ensures the compiler can unroll this loop into two
// independent streams, one operating on s0, the other on s1.
//
// Since zeroes are a common input we prevent an immediate trivial
// collapse of the hash function by XOR'ing a constant with y.
let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y);
s0 = s1;
s1 = t;
off += 16;
}

s0 ^= read_u64_le(bytes, len - 16);
s1 ^= read_u64_le(bytes, len - 8);
}

multiply_mix(s0, s1) ^ (len as u64)
}

type BuildEntryHasher = BuildHasherDefault<EntryHasher>;
Expand Down Expand Up @@ -253,7 +379,11 @@ impl Hasher for EntryHasher {

#[cfg(test)]
mod tests {
use crate::{dynamic::GLOBAL_DATA, global_atom_store_gc, Atom};
use std::hash::{Hash, Hasher};

use rustc_hash::FxHasher;

use crate::{atom, dynamic::GLOBAL_DATA, global_atom_store_gc, Atom};

fn expect_size(expected: usize) {
// This is a helper function to count the number of bytes in the global store.
Expand Down Expand Up @@ -327,4 +457,50 @@ mod tests {
global_atom_store_gc();
expect_size(0);
}

// Ensure that the hash value is the same as the one generated by FxHasher.
//
// This is important for `Borrow<str>` implementation to be correct.
// Note that if we enable `nightly` feature of `rustc-hash`, we need to remove
// `state.write_u8(0xff);` from the hash implementation of `RcStr`.
#[test]
fn test_hash() {
const LONG_STRING: &str = "A very long long long string that would not be inlined";

{
let u64_value = super::hash_bytes(LONG_STRING.as_bytes());
dbg!(u64_value);
let mut hasher = FxHasher::default();
hasher.write_u64(u64_value);
let expected = hasher.finish();

println!("Expected: {expected:?}");
}

let str = Atom::from(LONG_STRING);
assert_eq!(fxhash(str.clone()), fxhash(LONG_STRING));
assert_eq!(fxhash(str.clone()), fxhash(atom!(LONG_STRING)));
assert_eq!(fxhash((1, str, 1)), fxhash((1, LONG_STRING, 1)));
}

fn fxhash<T: Hash>(value: T) -> u64 {
let mut hasher = FxHasher::default();
value.hash(&mut hasher);
hasher.finish()
}

#[test]
fn static_items_are_not_in_the_store() {
const VALUE: &str = "hello a long string that cannot be inline";
expect_size(0);
let long_str = atom!(VALUE);
expect_size(0);
let store_str = Atom::new(VALUE);
expect_size(1);
drop(store_str);
expect_size(1);
global_atom_store_gc();
drop(long_str);
expect_size(0);
}
}
Loading
Loading