diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 5e62ce285dd8c..55b1e728b70db 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -53,13 +53,6 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { None => return, }; - // The order of entries in this global file table needs to be deterministic, - // and ideally should also be independent of the details of stable-hashing, - // because coverage tests snapshots (`.cov-map`) can observe the order and - // would need to be re-blessed if it changes. As long as those requirements - // are satisfied, the order can be arbitrary. - let mut global_file_table = GlobalFileTable::new(); - let mut covfun_records = instances_used .iter() .copied() @@ -67,17 +60,13 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { // order that doesn't depend on the stable-hash-based order in which // instances were visited during codegen. .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) - .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true)) + .filter_map(|instance| prepare_covfun_record(tcx, instance, true)) .collect::>(); // In a single designated CGU, also prepare covfun records for functions // in this crate that were instrumented for coverage, but are unused. if cx.codegen_unit.is_code_coverage_dead_code_cgu() { - unused::prepare_covfun_records_for_unused_functions( - cx, - &mut global_file_table, - &mut covfun_records, - ); + unused::prepare_covfun_records_for_unused_functions(cx, &mut covfun_records); } // If there are no covfun records for this CGU, don't generate a covmap record. @@ -89,68 +78,88 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { return; } - // Encode all filenames referenced by coverage mappings in this CGU. - let filenames_buffer = global_file_table.make_filenames_buffer(tcx); - // The `llvm-cov` tool uses this hash to associate each covfun record with - // its corresponding filenames table, since the final binary will typically - // contain multiple covmap records from different compilation units. - let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer); + // Prepare the global file table for this CGU, containing all paths needed + // by one or more covfun records. + let global_file_table = + GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files())); for covfun in &covfun_records { - covfun::generate_covfun_record(cx, filenames_hash, covfun) + covfun::generate_covfun_record(cx, &global_file_table, covfun) } // Generate the coverage map header, which contains the filenames used by // this CGU's coverage mappings, and store it in a well-known global. // (This is skipped if we returned early due to having no covfun records.) - generate_covmap_record(cx, covmap_version, &filenames_buffer); + generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer); } -/// Maps "global" (per-CGU) file ID numbers to their underlying source files. +/// Maps "global" (per-CGU) file ID numbers to their underlying source file paths. +#[derive(Debug)] struct GlobalFileTable { /// This "raw" table doesn't include the working dir, so a file's /// global ID is its index in this set **plus one**. - raw_file_table: FxIndexMap>, + raw_file_table: FxIndexMap, + + /// The file table in encoded form (possibly compressed), which can be + /// included directly in this CGU's `__llvm_covmap` record. + filenames_buffer: Vec, + + /// Truncated hash of the bytes in `filenames_buffer`. + /// + /// The `llvm-cov` tool uses this hash to associate each covfun record with + /// its corresponding filenames table, since the final binary will typically + /// contain multiple covmap records from different compilation units. + filenames_hash: u64, } impl GlobalFileTable { - fn new() -> Self { - Self { raw_file_table: FxIndexMap::default() } - } + /// Builds a "global file table" for this CGU, mapping numeric IDs to + /// path strings. + fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator) -> Self { + let mut raw_file_table = FxIndexMap::default(); + + for file in all_files { + raw_file_table.entry(file.stable_id).or_insert_with(|| { + file.name + .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) + .to_string_lossy() + .into_owned() + }); + } + + // FIXME(Zalathar): Consider sorting the file table here, but maybe + // only after adding filename support to coverage-dump, so that the + // table order isn't directly visible in `.coverage-map` snapshots. + + let mut table = Vec::with_capacity(raw_file_table.len() + 1); + + // Since version 6 of the LLVM coverage mapping format, the first entry + // in the global file table is treated as a base directory, used to + // resolve any other entries that are stored as relative paths. + let base_dir = tcx + .sess + .opts + .working_dir + .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) + .to_string_lossy(); + table.push(base_dir.as_ref()); - fn global_file_id_for_file(&mut self, file: &Arc) -> GlobalFileId { - // Ensure the given file has a table entry, and get its index. - let entry = self.raw_file_table.entry(file.stable_id); - let raw_id = entry.index(); - entry.or_insert_with(|| Arc::clone(file)); - - // The raw file table doesn't include an entry for the working dir - // (which has ID 0), so add 1 to get the correct ID. - GlobalFileId::from_usize(raw_id + 1) - } + // Add the regular entries after the base directory. + table.extend(raw_file_table.values().map(|name| name.as_str())); - fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec { - let mut table = Vec::with_capacity(self.raw_file_table.len() + 1); - - // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) - // requires setting the first filename to the compilation directory. - // Since rustc generates coverage maps with relative paths, the - // compilation directory can be combined with the relative paths - // to get absolute paths, if needed. - table.push( - tcx.sess - .opts - .working_dir - .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) - .to_string_lossy(), - ); + // Encode the file table into a buffer, and get the hash of its encoded + // bytes, so that we can embed that hash in `__llvm_covfun` records. + let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table); + let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer); - // Add the regular entries after the base directory. - table.extend(self.raw_file_table.values().map(|file| { - file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy() - })); + Self { raw_file_table, filenames_buffer, filenames_hash } + } - llvm_cov::write_filenames_to_buffer(&table) + fn get_existing_id(&self, file: &SourceFile) -> Option { + let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?; + // The raw file table doesn't include an entry for the base dir + // (which has ID 0), so add 1 to get the correct ID. + Some(GlobalFileId::from_usize(raw_id + 1)) } } @@ -166,26 +175,31 @@ rustc_index::newtype_index! { struct LocalFileId {} } -/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) -/// file IDs. +/// Holds a mapping from "local" (per-function) file IDs to their corresponding +/// source files. #[derive(Debug, Default)] struct VirtualFileMapping { - local_to_global: IndexVec, - global_to_local: FxIndexMap, + local_file_table: IndexVec>, } impl VirtualFileMapping { - fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId { - *self - .global_to_local - .entry(global_file_id) - .or_insert_with(|| self.local_to_global.push(global_file_id)) + fn push_file(&mut self, source_file: &Arc) -> LocalFileId { + self.local_file_table.push(Arc::clone(source_file)) } - fn to_vec(&self) -> Vec { - // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`, - // but it isn't hot or expensive enough to justify the extra unsafety. - self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect() + /// Resolves all of the filenames in this local file mapping to a list of + /// global file IDs in its CGU, for inclusion in this function's + /// `__llvm_covfun` record. + /// + /// The global file IDs are returned as `u32` to make FFI easier. + fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option> { + self.local_file_table + .iter() + .map(|file| try { + let id = global_file_table.get_existing_id(file)?; + GlobalFileId::as_u32(id) + }) + .collect::>>() } } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs index 93419c2caad25..7bdbc68595290 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs @@ -5,6 +5,7 @@ //! [^win]: On Windows the section name is `.lcovfun`. use std::ffi::CString; +use std::sync::Arc; use rustc_abi::Align; use rustc_codegen_ssa::traits::{ @@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{ MappingKind, Op, }; use rustc_middle::ty::{Instance, TyCtxt}; -use rustc_span::Span; +use rustc_span::{SourceFile, Span}; use rustc_target::spec::HasTargetSpec; use tracing::debug; @@ -37,9 +38,16 @@ pub(crate) struct CovfunRecord<'tcx> { regions: ffi::Regions, } +impl<'tcx> CovfunRecord<'tcx> { + /// Iterator that yields all source files referred to by this function's + /// coverage mappings. Used to build the global file table for the CGU. + pub(crate) fn all_source_files(&self) -> impl Iterator { + self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref) + } +} + pub(crate) fn prepare_covfun_record<'tcx>( tcx: TyCtxt<'tcx>, - global_file_table: &mut GlobalFileTable, instance: Instance<'tcx>, is_used: bool, ) -> Option> { @@ -57,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>( regions: ffi::Regions::default(), }; - fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun); + fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun); if covfun.regions.has_no_regions() { debug!(?covfun, "function has no mappings to embed; skipping"); @@ -92,7 +100,6 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec( tcx: TyCtxt<'tcx>, - global_file_table: &mut GlobalFileTable, fn_cov_info: &'tcx FunctionCoverageInfo, ids_info: &'tcx CoverageIdsInfo, covfun: &mut CovfunRecord<'tcx>, @@ -106,11 +113,7 @@ fn fill_region_tables<'tcx>( }; let source_file = source_map.lookup_source_file(first_span.lo()); - // Look up the global file ID for that file. - let global_file_id = global_file_table.global_file_id_for_file(&source_file); - - // Associate that global file ID with a local file ID for this function. - let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id); + let local_file_id = covfun.virtual_file_mapping.push_file(&source_file); // In rare cases, _all_ of a function's spans are discarded, and coverage // codegen needs to handle that gracefully to avoid #133606. @@ -179,7 +182,7 @@ fn fill_region_tables<'tcx>( /// as a global variable in the `__llvm_covfun` section. pub(crate) fn generate_covfun_record<'tcx>( cx: &CodegenCx<'_, 'tcx>, - filenames_hash: u64, + global_file_table: &GlobalFileTable, covfun: &CovfunRecord<'tcx>, ) { let &CovfunRecord { @@ -191,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>( ref regions, } = covfun; + let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else { + debug_assert!( + false, + "all local files should be present in the global file table: \ + global_file_table = {global_file_table:?}, \ + virtual_file_mapping = {virtual_file_mapping:?}" + ); + return; + }; + // Encode the function's coverage mappings into a buffer. - let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer( - &virtual_file_mapping.to_vec(), - expressions, - regions, - ); + let coverage_mapping_buffer = + llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions); // A covfun record consists of four target-endian integers, followed by the // encoded mapping data in bytes. Note that the length field is 32 bits. @@ -209,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>( cx.const_u64(func_name_hash), cx.const_u32(coverage_mapping_buffer.len() as u32), cx.const_u64(source_hash), - cx.const_u64(filenames_hash), + cx.const_u64(global_file_table.filenames_hash), cx.const_bytes(&coverage_mapping_buffer), ], // This struct needs to be packed, so that the 32-bit length field diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs index ab030f5b6152a..68f60f169b5b5 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs @@ -7,7 +7,6 @@ use rustc_middle::ty::{self, TyCtxt}; use rustc_span::def_id::DefIdSet; use crate::common::CodegenCx; -use crate::coverageinfo::mapgen::GlobalFileTable; use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record}; use crate::llvm; @@ -21,7 +20,6 @@ use crate::llvm; /// its embedded coverage data. pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>( cx: &CodegenCx<'_, 'tcx>, - global_file_table: &mut GlobalFileTable, covfun_records: &mut Vec>, ) { assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); @@ -33,7 +31,7 @@ pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>( // Try to create a covfun record for each unused function. let mut name_globals = Vec::with_capacity(unused_instances.len()); covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try { - let record = prepare_covfun_record(cx.tcx, global_file_table, unused.instance, false)?; + let record = prepare_covfun_record(cx.tcx, unused.instance, false)?; // If successful, also store its symbol name in a global constant. name_globals.push(cx.const_str(unused.symbol_name.name).0); record