Skip to content

[turbopack] Optimize PartialEq for RcStr #80551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,10 @@ opt-level = 3
[profile.release.package.serde]
opt-level = 3

[profile.profiling]
inherits = "release"
debug = true

[workspace.dependencies]
# Workspace crates
next-api = { path = "crates/next-api" }
Expand Down
42 changes: 15 additions & 27 deletions turbopack/crates/turbo-rcstr/src/dynamic.rs
Original file line number Diff line number Diff line change
@@ -1,57 +1,46 @@
use std::{num::NonZeroU8, ptr::NonNull};
use std::{mem::ManuallyDrop, num::NonZeroU8, ptr::NonNull};

use triomphe::Arc;
use triomphe::ThinArc;

use crate::{
INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, RcStr, TAG_MASK,
tagged_value::{MAX_INLINE_LEN, TaggedValue},
};

pub(crate) struct PrehashedString {
pub value: String,
/// This is not the actual `fxhash`, but rather it's a value that passed to
/// `write_u64` of [rustc_hash::FxHasher].
pub hash: u64,
}

pub unsafe fn cast(ptr: TaggedValue) -> *const PrehashedString {
ptr.get_ptr().cast()
}

pub(crate) unsafe fn deref_from<'i>(ptr: TaggedValue) -> &'i PrehashedString {
unsafe { &*cast(ptr) }
}

/// Caller should call `forget` (or `clone`) on the returned `Arc`
pub unsafe fn restore_arc(v: TaggedValue) -> Arc<PrehashedString> {
let ptr = v.get_ptr() as *const PrehashedString;
unsafe { Arc::from_raw(ptr) }
/// Caller should call `drop` (or `clone`) on the returned `Arc` as this does manipulate the
/// refcount
pub(crate) unsafe fn restore_arc(v: TaggedValue) -> ManuallyDrop<ThinArc<PrehashedString, u8>> {
let ptr = v.get_ptr();
ManuallyDrop::new(unsafe { ThinArc::from_raw(ptr) })
}

/// This can create any kind of [Atom], although this lives in the `dynamic`
/// module.
pub(crate) fn new_atom<T: AsRef<str> + Into<String>>(text: T) -> RcStr {
let len = text.as_ref().len();
pub(crate) fn new_atom(text: &str) -> RcStr {
let bytes = text.as_bytes();
let len = bytes.len();

if len < MAX_INLINE_LEN {
// INLINE_TAG ensures this is never zero
let tag = INLINE_TAG_INIT | ((len as u8) << LEN_OFFSET);
let mut unsafe_data = TaggedValue::new_tag(tag);
unsafe {
unsafe_data.data_mut()[..len].copy_from_slice(text.as_ref().as_bytes());
unsafe_data.data_mut()[..len].copy_from_slice(bytes);
}
return RcStr { unsafe_data };
}

let hash = hash_bytes(text.as_ref().as_bytes());
let hash = hash_bytes(bytes);

let entry: Arc<PrehashedString> = Arc::new(PrehashedString {
value: text.into(),
hash,
});
let entry = Arc::into_raw(entry);
let entry = ThinArc::from_header_and_slice(PrehashedString { hash }, bytes);
let entry = ThinArc::into_raw(entry);

let ptr: NonNull<PrehashedString> = unsafe {
let ptr: NonNull<*const ()> = unsafe {
// Safety: Arc::into_raw returns a non-null pointer
NonNull::new_unchecked(entry as *mut _)
};
Expand All @@ -63,7 +52,6 @@ pub(crate) fn new_atom<T: AsRef<str> + Into<String>>(text: T) -> RcStr {

/// Attempts to construct an RcStr but only if it can be constructed inline.
/// This is primarily useful in constant contexts.
#[doc(hidden)]
pub(crate) const fn inline_atom(text: &str) -> Option<RcStr> {
let len = text.len();
if len < MAX_INLINE_LEN {
Expand Down
115 changes: 68 additions & 47 deletions turbopack/crates/turbo-rcstr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{
ffi::OsStr,
fmt::{Debug, Display},
hash::{Hash, Hasher},
mem::{ManuallyDrop, forget},
mem::ManuallyDrop,
num::NonZeroU8,
ops::Deref,
path::{Path, PathBuf},
Expand All @@ -17,7 +17,7 @@ use triomphe::Arc;
use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};

use crate::{
dynamic::{deref_from, new_atom},
dynamic::{new_atom, restore_arc},
tagged_value::TaggedValue,
};

Expand Down Expand Up @@ -71,6 +71,11 @@ pub struct RcStr {
unsafe_data: TaggedValue,
}

const _: () = {
// Enforce that RcStr triggers the non-zero size optimization.
assert!(std::mem::size_of::<RcStr>() == std::mem::size_of::<Option<RcStr>>());
};

unsafe impl Send for RcStr {}
unsafe impl Sync for RcStr {}

Expand All @@ -90,16 +95,35 @@ impl RcStr {
#[inline(never)]
pub fn as_str(&self) -> &str {
match self.tag() {
DYNAMIC_TAG => unsafe { dynamic::deref_from(self.unsafe_data).value.as_str() },
INLINE_TAG => {
let len = (self.unsafe_data.tag() & LEN_MASK) >> LEN_OFFSET;
let src = self.unsafe_data.data();
unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
}
DYNAMIC_TAG => self.dynamic_rcstr_as_str(),
INLINE_TAG => self.inline_rcstr_as_str(),
_ => unsafe { debug_unreachable!() },
}
}

/// [Self::as_str] for the dynamic case, useful if you have already checked the tag
#[inline(always)]
fn dynamic_rcstr_as_str(&self) -> &str {
debug_assert!(self.tag() == DYNAMIC_TAG);
let arc = unsafe { restore_arc(self.unsafe_data) };

let ptr = &arc.slice as *const [u8];
// SAFETY: the data is valid utf8 because it is always constructed from a string. See
// `dynamic` The ptr cast allows us to declare that the lifetime of the returned
// `str` is the same as `self`. This is true because `restore_arc` is ManuallyDrop and thus
// the arc outlives this function
unsafe { std::str::from_utf8_unchecked(&*ptr) }
}

/// [Self::as_str] for the inline case, useful if you have already checked the tag
#[inline(always)]
fn inline_rcstr_as_str(&self) -> &str {
debug_assert!(self.tag() == INLINE_TAG);
let len = (self.unsafe_data.tag() & LEN_MASK) >> LEN_OFFSET;
let src = self.unsafe_data.data();
unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
}

/// Returns an owned mutable [`String`].
///
/// This implementation is more efficient than [`ToString::to_string`]:
Expand All @@ -108,18 +132,7 @@ impl RcStr {
/// underlying string without cloning in `O(1)` time.
/// - This avoids some of the potential overhead of the `Display` trait.
pub fn into_owned(self) -> String {
match self.tag() {
DYNAMIC_TAG => {
// convert `self` into `arc`
let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) };
match Arc::try_unwrap(arc) {
Ok(v) => v.value,
Err(arc) => arc.value.to_string(),
}
}
INLINE_TAG => self.as_str().to_string(),
_ => unsafe { debug_unreachable!() },
}
self.as_str().to_string()
}

pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
Expand All @@ -129,10 +142,9 @@ impl RcStr {
#[inline]
pub(crate) fn from_alias(alias: TaggedValue) -> Self {
if alias.tag() & TAG_MASK == DYNAMIC_TAG {
// Increment the ref-count
unsafe {
let arc = dynamic::restore_arc(alias);
forget(arc.clone());
forget(arc);
let _ = dynamic::restore_arc(alias).clone();
}
}

Expand Down Expand Up @@ -173,28 +185,25 @@ impl From<BytesStr> for RcStr {

impl From<Arc<String>> for RcStr {
fn from(s: Arc<String>) -> Self {
match Arc::try_unwrap(s) {
Ok(v) => new_atom(Cow::Owned(v)),
Err(arc) => new_atom(Cow::Borrowed(&**arc)),
}
new_atom(s.as_str())
}
}

impl From<String> for RcStr {
fn from(s: String) -> Self {
new_atom(Cow::Owned(s))
new_atom(s.as_str())
}
}

impl From<&'_ str> for RcStr {
fn from(s: &str) -> Self {
new_atom(Cow::Borrowed(s))
new_atom(s)
}
}

impl From<Cow<'_, str>> for RcStr {
fn from(s: Cow<str>) -> Self {
new_atom(s)
new_atom(s.as_ref())
}
}

Expand Down Expand Up @@ -276,15 +285,23 @@ impl Default for RcStr {

impl PartialEq for RcStr {
fn eq(&self, other: &Self) -> bool {
match (self.tag(), other.tag()) {
(DYNAMIC_TAG, DYNAMIC_TAG) => {
let l = unsafe { deref_from(self.unsafe_data) };
let r = unsafe { deref_from(other.unsafe_data) };
l.hash == r.hash && l.value == r.value
}
(INLINE_TAG, INLINE_TAG) => self.unsafe_data == other.unsafe_data,
_ => false,
// There are 3 cases
// 1. different tags? false
// 2. both inline? compare unsafe_data
// 3. both dynamic? compare contents
// However, it isn't unusual to compare identical RcStr instances so compare the raw data
// first.
if self.unsafe_data == other.unsafe_data {
return true;
}
// Now we can only possibly still be true if the tags are both dynamic
if (self.tag() | other.tag()) != DYNAMIC_TAG {
return false;
}
// They are both dynamic so we need to query memory, compare hashes and then values
let l = unsafe { restore_arc(self.unsafe_data) };
let r = unsafe { restore_arc(other.unsafe_data) };
l.header.header.hash == r.header.header.hash && l.slice == r.slice
}
}

Expand All @@ -306,12 +323,12 @@ impl Hash for RcStr {
fn hash<H: Hasher>(&self, state: &mut H) {
match self.tag() {
DYNAMIC_TAG => {
let l = unsafe { deref_from(self.unsafe_data) };
state.write_u64(l.hash);
let arc = unsafe { restore_arc(self.unsafe_data) };
state.write_u64(arc.header.header.hash);
state.write_u8(0xff);
}
INLINE_TAG => {
self.as_str().hash(state);
self.inline_rcstr_as_str().hash(state);
}
_ => unsafe { debug_unreachable!() },
}
Expand All @@ -334,7 +351,11 @@ impl<'de> Deserialize<'de> for RcStr {
impl Drop for RcStr {
fn drop(&mut self) {
if self.tag() == DYNAMIC_TAG {
unsafe { drop(dynamic::restore_arc(self.unsafe_data)) }
unsafe {
drop(ManuallyDrop::into_inner(dynamic::restore_arc(
self.unsafe_data,
)))
}
}
}
}
Expand All @@ -349,15 +370,15 @@ pub const fn inline_atom(s: &str) -> Option<RcStr> {
#[macro_export]
macro_rules! rcstr {
($s:expr) => {{
const INLINE: core::option::Option<$crate::RcStr> = $crate::inline_atom($s);
// this condition should be able to be compile time evaluated and inlined.
const INLINE: ::core::option::Option<$crate::RcStr> = $crate::inline_atom($s);
// This condition can be evaluated at compile time to enable inlining as a simple constant
if INLINE.is_some() {
INLINE.unwrap()
} else {
#[inline(never)]
fn get_rcstr() -> $crate::RcStr {
static CACHE: std::sync::LazyLock<$crate::RcStr> =
std::sync::LazyLock::new(|| $crate::RcStr::from($s));
static CACHE: ::std::sync::LazyLock<$crate::RcStr> =
::std::sync::LazyLock::new(|| $crate::RcStr::from($s));

(*CACHE).clone()
}
Expand Down Expand Up @@ -426,7 +447,7 @@ mod tests {
fn refcount(str: &RcStr) -> usize {
assert!(str.tag() == DYNAMIC_TAG);
let arc = ManuallyDrop::new(unsafe { dynamic::restore_arc(str.unsafe_data) });
triomphe::Arc::count(&arc)
triomphe::ThinArc::strong_count(&arc)
}

let str = RcStr::from("this is a long string that won't be inlined");
Expand Down
Loading