From 676eed3ad948557fd797bf20839752671d37805c Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 29 Jun 2025 12:45:42 -0400 Subject: [PATCH] Remove no-op cleanups as post-mono MIR opt On cargo this cuts ~5% of the LLVM IR lines we generate (measured with -Cno-prepopulate-passes). --- compiler/rustc_codegen_ssa/src/mir/analyze.rs | 12 +- compiler/rustc_codegen_ssa/src/mir/block.rs | 16 +- compiler/rustc_codegen_ssa/src/mir/mod.rs | 155 +++++++++++++++++- tests/codegen/mem-replace-big-type.rs | 2 +- 4 files changed, 172 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs index 99f35b7920864..1f1c40ab2e058 100644 --- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs +++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs @@ -279,10 +279,14 @@ impl CleanupKind { /// MSVC requires unwinding code to be split to a tree of *funclets*, where each funclet can only /// branch to itself or to its parent. Luckily, the code we generates matches this pattern. /// Recover that structure in an analyze pass. -pub(crate) fn cleanup_kinds(mir: &mir::Body<'_>) -> IndexVec { +pub(crate) fn cleanup_kinds( + mir: &mir::Body<'_>, + live_blocks: &DenseBitSet, +) -> IndexVec { fn discover_masters<'tcx>( result: &mut IndexSlice, mir: &mir::Body<'tcx>, + live_blocks: &DenseBitSet, ) { for (bb, data) in mir.basic_blocks.iter_enumerated() { match data.terminator().kind { @@ -301,7 +305,9 @@ pub(crate) fn cleanup_kinds(mir: &mir::Body<'_>) -> IndexVec { - if let mir::UnwindAction::Cleanup(unwind) = unwind { + if let mir::UnwindAction::Cleanup(unwind) = unwind + && live_blocks.contains(unwind) + { debug!( "cleanup_kinds: {:?}/{:?} registering {:?} as funclet", bb, data, unwind @@ -382,7 +388,7 @@ pub(crate) fn cleanup_kinds(mir: &mir::Body<'_>) -> IndexVec TerminatorCodegenHelper<'tcx> { } let unwind_block = match unwind { - mir::UnwindAction::Cleanup(cleanup) => Some(self.llbb_with_cleanup(fx, cleanup)), + mir::UnwindAction::Cleanup(cleanup) => { + if fx.live_blocks.contains(cleanup) { + Some(self.llbb_with_cleanup(fx, cleanup)) + } else { + None + } + } mir::UnwindAction::Continue => None, mir::UnwindAction::Unreachable => None, mir::UnwindAction::Terminate(reason) => { @@ -286,7 +292,13 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> { mergeable_succ: bool, ) -> MergingSucc { let unwind_target = match unwind { - mir::UnwindAction::Cleanup(cleanup) => Some(self.llbb_with_cleanup(fx, cleanup)), + mir::UnwindAction::Cleanup(cleanup) => { + if fx.live_blocks.contains(cleanup) { + Some(self.llbb_with_cleanup(fx, cleanup)) + } else { + None + } + } mir::UnwindAction::Terminate(reason) => Some(fx.terminate_block(reason)), mir::UnwindAction::Continue => None, mir::UnwindAction::Unreachable => None, diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index 66c4af4c935b5..9d20cf89c5d3c 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -97,6 +97,12 @@ pub struct FunctionCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> { /// A cold block is a block that is unlikely to be executed at runtime. cold_blocks: IndexVec, + /// A set of blocks which are live. + /// + /// This is constrained by the reachable block set, currently specifically targeting replacing + /// UnwindAction::Cleanup(target) with UnwindAction::Continue if target is dead. + live_blocks: DenseBitSet, + /// The location where each MIR arg/var/tmp/ret is stored. This is /// usually an `PlaceRef` representing an alloca, but not always: /// sometimes we can skip the alloca and just store the value @@ -176,8 +182,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( let mut mir = tcx.instance_mir(instance.def); - let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty()); - debug!("fn_abi: {:?}", fn_abi); + let live_blocks = find_noop_cleanup::(cx, &mir, instance); if tcx.features().ergonomic_clones() { let monomorphized_mir = instance.instantiate_mir_and_normalize_erasing_regions( @@ -188,19 +193,24 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( mir = tcx.arena.alloc(optimize_use_clone::(cx, monomorphized_mir)); } + let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty()); + debug!("fn_abi: {:?}", fn_abi); + let debug_context = cx.create_function_debug_context(instance, fn_abi, llfn, &mir); let start_llbb = Bx::append_block(cx, llfn, "start"); let mut start_bx = Bx::build(cx, start_llbb); - if mir.basic_blocks.iter().any(|bb| { - bb.is_cleanup || matches!(bb.terminator().unwind(), Some(mir::UnwindAction::Terminate(_))) + if mir::traversal::mono_reachable(&mir, tcx, instance).any(|(bb, block)| { + live_blocks.contains(bb) + && (block.is_cleanup + || matches!(block.terminator().unwind(), Some(mir::UnwindAction::Terminate(_)))) }) { start_bx.set_personality_fn(cx.eh_personality()); } - let cleanup_kinds = - base::wants_new_eh_instructions(tcx.sess).then(|| analyze::cleanup_kinds(&mir)); + let cleanup_kinds = base::wants_new_eh_instructions(tcx.sess) + .then(|| analyze::cleanup_kinds(&mir, &live_blocks)); let cached_llbbs: IndexVec> = mir.basic_blocks @@ -228,6 +238,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( debug_context, per_local_var_debug_info: None, caller_location: None, + live_blocks, }; // It may seem like we should iterate over `required_consts` to ensure they all successfully @@ -239,7 +250,8 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( fx.compute_per_local_var_debug_info(&mut start_bx).unzip(); fx.per_local_var_debug_info = per_local_var_debug_info; - let traversal_order = traversal::mono_reachable_reverse_postorder(mir, tcx, instance); + let mut traversal_order = traversal::mono_reachable_reverse_postorder(mir, tcx, instance); + traversal_order.retain(|bb| fx.live_blocks.contains(*bb)); let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order); // Allocate variable and temp allocas @@ -371,6 +383,135 @@ fn optimize_use_clone<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( mir } +// +/// Detect cases where monomorphized MIR has a cleanup block (or series of blocks) that never does +/// anything, just resumes unwinding. +/// +/// This usually results from pre-mono MIR having a no-op drop(...) for a specific type. +/// +/// Returns a set with all basic blocks that should be treated as dead code (i.e., not codegen'd and +/// any Cleanup branch to them should instead UnwindAction::Continue). This doesn't mutate the MIR +/// because that can be quite expensive. +fn find_noop_cleanup<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( + cx: &'a Bx::CodegenCx, + mir: &Body<'tcx>, + instance: Instance<'tcx>, +) -> DenseBitSet { + let tcx = cx.tcx(); + + // First we construct a bitset containing basic blocks that do something (`any_action`). These + // are live code that can't be skipped at codegen time. + let mut any_action = DenseBitSet::new_empty(mir.basic_blocks.len()); + for (bb, block) in mir.basic_blocks.iter_enumerated() { + if !block.is_cleanup { + // We don't care about non-cleanup blocks. + any_action.insert(bb); + continue; + } + + let mut has_actions = false; + for stmt in &block.statements { + match stmt.kind { + mir::StatementKind::SetDiscriminant { .. } + | mir::StatementKind::Deinit(..) + | mir::StatementKind::StorageLive(..) + | mir::StatementKind::StorageDead(..) + | mir::StatementKind::Retag(..) + | mir::StatementKind::Coverage(..) + | mir::StatementKind::Intrinsic(..) + | mir::StatementKind::Assign(..) => { + has_actions = true; + break; + } + mir::StatementKind::FakeRead(..) + | mir::StatementKind::PlaceMention(..) + | mir::StatementKind::AscribeUserType(..) + | mir::StatementKind::ConstEvalCounter + | mir::StatementKind::Nop + | mir::StatementKind::BackwardIncompatibleDropHint { .. } => {} + } + } + match block.terminator().kind { + mir::TerminatorKind::Goto { .. } + | mir::TerminatorKind::SwitchInt { .. } + | mir::TerminatorKind::UnwindResume + | mir::TerminatorKind::Unreachable => {} + + mir::TerminatorKind::Call { .. } + | mir::TerminatorKind::Assert { .. } + | mir::TerminatorKind::Yield { .. } + | mir::TerminatorKind::InlineAsm { .. } + | mir::TerminatorKind::CoroutineDrop + | mir::TerminatorKind::TailCall { .. } + | mir::TerminatorKind::UnwindTerminate(..) + | mir::TerminatorKind::Return => has_actions = true, + + mir::TerminatorKind::Drop { place, .. } => { + let ty = place.ty(mir, tcx).ty; + debug!("monomorphize: instance={:?}", instance); + let ty = instance.instantiate_mir_and_normalize_erasing_regions( + tcx, + cx.typing_env(), + ty::EarlyBinder::bind(ty), + ); + let drop_fn = Instance::resolve_drop_in_place(tcx, ty); + if let ty::InstanceKind::DropGlue(_, None) = drop_fn.def { + // no need to drop anything + } else { + has_actions = true; + } + } + + mir::TerminatorKind::FalseEdge { .. } | mir::TerminatorKind::FalseUnwind { .. } => { + bug!("not present in optimized mir") + } + } + + if has_actions { + any_action.insert(bb); + } + } + + let mut visited = DenseBitSet::new_empty(mir.basic_blocks.len()); + let mut stack = vec![mir::START_BLOCK]; + while let Some(next) = stack.pop() { + // Mark blocks live as we go. + if !visited.insert(next) { + continue; + } + + // To find successors, we consider whether to skip the target of the + // UnwindAction::Cleanup(...) edge. If it hits blocks which do something (are in + // any_action), then the edge must be kept and those blocks visited. Otherwise the + // blocks can be considered dead. + if let Some(mir::UnwindAction::Cleanup(cleanup_target)) = + mir.basic_blocks[next].terminator().unwind() + { + let found_action = mir::traversal::Postorder::new( + &mir.basic_blocks, + *cleanup_target, + Some((tcx, instance)), + ) + .any(|bb| any_action.contains(bb)); + if !found_action { + // Do not traverse into the cleanup target if no action is found. + stack.extend( + mir.basic_blocks[next] + .mono_successors(tcx, instance) + .filter(|v| v != cleanup_target), + ); + + // Avoid hitting the extend below. + continue; + } + } + + stack.extend(mir.basic_blocks[next].mono_successors(tcx, instance)); + } + + visited +} + /// Produces, for each argument, a `Value` pointing at the /// argument's value. As arguments are places, these are always /// indirect. diff --git a/tests/codegen/mem-replace-big-type.rs b/tests/codegen/mem-replace-big-type.rs index 0b2229ba7d104..b28b505b4e5c9 100644 --- a/tests/codegen/mem-replace-big-type.rs +++ b/tests/codegen/mem-replace-big-type.rs @@ -25,7 +25,7 @@ pub fn replace_big(dst: &mut Big, src: Big) -> Big { // CHECK-NOT: call void @llvm.memcpy // For a large type, we expect exactly three `memcpy`s -// CHECK-LABEL: define internal void @{{.+}}mem{{.+}}replace{{.+}}(ptr +// CHECK-LABEL: define void @{{.+}}mem{{.+}}replace{{.+}}(ptr // CHECK-SAME: sret([56 x i8]){{.+}}[[RESULT:%.+]], ptr{{.+}}%dest, ptr{{.+}}%src) // CHECK-NOT: call void @llvm.memcpy // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 8 [[RESULT]], ptr align 8 %dest, i{{.*}} 56, i1 false)