Skip to content

Commit

Permalink
Auto merge of #96867 - michaelwoerister:path-prefix-fixes-2, r=davidtwco
Browse files Browse the repository at this point in the history
--remap-path-prefix: Fix duplicated path components in debuginfo

This PR fixes an issue with `--remap-path-prefix` where path components could appear twice in the remapped version of the path (e.g. #78479). The underlying problem was that `--remap-path-prefix` is often used to map an absolute path to something that looks like a relative path, e.g.:

```
--remap-path-prefix=/home/calvin/.cargo/registry/src/github.aaakk.us.kg-1ecc6299db9ec823=crates.io",
```

and relative paths in debuginfo are interpreted as being relative to the compilation directory. So if Cargo invokes the compiler with `/home/calvin/.cargo/registry/src/github.aaakk.us.kg-1ecc6299db9ec823/some_crate-0.1.0/src/lib.rs` as input and `/home/calvin/.cargo/registry/src/github.aaakk.us.kg-1ecc6299db9ec823/some_crate-0.1.0` as the compiler's working directory, then debuginfo will state that the working directory was `crates.io/some_crate-0.1.0` and the file is question was `crates.io/some_crate-0.1.0/src/lib.rs`, which combined gives the path:

```
crates.io/some_crate-0.1.0/crates.io/some_crate-0.1.0/src/lib.rs
```

With this PR the compiler will detect this situation and set up debuginfo in LLVM in a way that makes it strip the duplicated path components when emitting DWARF.

The PR also extracts the logic for making remapped paths absolute into a common helper function that is now used by debuginfo too (instead of just during crate metadata generation).
  • Loading branch information
bors committed May 18, 2022
2 parents e5732a2 + 6411fef commit 936eba3
Show file tree
Hide file tree
Showing 10 changed files with 463 additions and 197 deletions.
162 changes: 95 additions & 67 deletions compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,21 @@ use rustc_middle::ty::subst::GenericArgKind;
use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt, COMMON_VTABLE_ENTRIES};
use rustc_session::config::{self, DebugInfo};
use rustc_span::symbol::Symbol;
use rustc_span::FileName;
use rustc_span::FileNameDisplayPreference;
use rustc_span::{self, SourceFile, SourceFileHash};
use rustc_span::{self, SourceFile};
use rustc_target::abi::{Align, Size};
use smallvec::smallvec;
use tracing::debug;

use libc::{c_longlong, c_uint};
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::fmt::{self, Write};
use std::hash::{Hash, Hasher};
use std::iter;
use std::path::{Path, PathBuf};
use std::ptr;
use tracing::instrument;

impl PartialEq for llvm::Metadata {
fn eq(&self, other: &Self) -> bool {
Expand Down Expand Up @@ -527,78 +528,105 @@ fn hex_encode(data: &[u8]) -> String {
}

pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile {
debug!("file_metadata: file_name: {:?}", source_file.name);

let hash = Some(&source_file.src_hash);
let file_name = Some(source_file.name.prefer_remapped().to_string());
let directory = if source_file.is_real_file() && !source_file.is_imported() {
Some(
cx.sess()
.opts
.working_dir
.to_string_lossy(FileNameDisplayPreference::Remapped)
.to_string(),
)
} else {
// If the path comes from an upstream crate we assume it has been made
// independent of the compiler's working directory one way or another.
None
};
file_metadata_raw(cx, file_name, directory, hash)
}

pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
file_metadata_raw(cx, None, None, None)
}

fn file_metadata_raw<'ll>(
cx: &CodegenCx<'ll, '_>,
file_name: Option<String>,
directory: Option<String>,
hash: Option<&SourceFileHash>,
) -> &'ll DIFile {
let key = (file_name, directory);

match debug_context(cx).created_files.borrow_mut().entry(key) {
Entry::Occupied(o) => o.get(),
Entry::Vacant(v) => {
let (file_name, directory) = v.key();
debug!("file_metadata: file_name: {:?}, directory: {:?}", file_name, directory);

let file_name = file_name.as_deref().unwrap_or("<unknown>");
let directory = directory.as_deref().unwrap_or("");

let (hash_kind, hash_value) = match hash {
Some(hash) => {
let kind = match hash.kind {
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
};
(kind, hex_encode(hash.hash_bytes()))
let cache_key = Some((source_file.name_hash, source_file.src_hash));
return debug_context(cx)
.created_files
.borrow_mut()
.entry(cache_key)
.or_insert_with(|| alloc_new_file_metadata(cx, source_file));

#[instrument(skip(cx, source_file), level = "debug")]
fn alloc_new_file_metadata<'ll>(
cx: &CodegenCx<'ll, '_>,
source_file: &SourceFile,
) -> &'ll DIFile {
debug!(?source_file.name);

let (directory, file_name) = match &source_file.name {
FileName::Real(filename) => {
let working_directory = &cx.sess().opts.working_dir;
debug!(?working_directory);

let filename = cx
.sess()
.source_map()
.path_mapping()
.to_embeddable_absolute_path(filename.clone(), working_directory);

// Construct the absolute path of the file
let abs_path = filename.remapped_path_if_available();
debug!(?abs_path);

if let Ok(rel_path) =
abs_path.strip_prefix(working_directory.remapped_path_if_available())
{
// If the compiler's working directory (which also is the DW_AT_comp_dir of
// the compilation unit) is a prefix of the path we are about to emit, then
// only emit the part relative to the working directory.
// Because of path remapping we sometimes see strange things here: `abs_path`
// might actually look like a relative path
// (e.g. `<crate-name-and-version>/src/lib.rs`), so if we emit it without
// taking the working directory into account, downstream tooling will
// interpret it as `<working-directory>/<crate-name-and-version>/src/lib.rs`,
// which makes no sense. Usually in such cases the working directory will also
// be remapped to `<crate-name-and-version>` or some other prefix of the path
// we are remapping, so we end up with
// `<crate-name-and-version>/<crate-name-and-version>/src/lib.rs`.
// By moving the working directory portion into the `directory` part of the
// DIFile, we allow LLVM to emit just the relative path for DWARF, while
// still emitting the correct absolute path for CodeView.
(
working_directory.to_string_lossy(FileNameDisplayPreference::Remapped),
rel_path.to_string_lossy().into_owned(),
)
} else {
("".into(), abs_path.to_string_lossy().into_owned())
}
None => (llvm::ChecksumKind::None, String::new()),
};
}
other => ("".into(), other.prefer_remapped().to_string_lossy().into_owned()),
};

let file_metadata = unsafe {
llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
file_name.as_ptr().cast(),
file_name.len(),
directory.as_ptr().cast(),
directory.len(),
hash_kind,
hash_value.as_ptr().cast(),
hash_value.len(),
)
};
let hash_kind = match source_file.src_hash.kind {
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
};
let hash_value = hex_encode(source_file.src_hash.hash_bytes());

v.insert(file_metadata);
file_metadata
unsafe {
llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
file_name.as_ptr().cast(),
file_name.len(),
directory.as_ptr().cast(),
directory.len(),
hash_kind,
hash_value.as_ptr().cast(),
hash_value.len(),
)
}
}
}

pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe {
let file_name = "<unknown>";
let directory = "";
let hash_value = "";

llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
file_name.as_ptr().cast(),
file_name.len(),
directory.as_ptr().cast(),
directory.len(),
llvm::ChecksumKind::None,
hash_value.as_ptr().cast(),
hash_value.len(),
)
})
}

trait MsvcBasicName {
fn msvc_basic_name(self) -> &'static str;
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeFoldable};
use rustc_session::config::{self, DebugInfo};
use rustc_session::Session;
use rustc_span::symbol::Symbol;
use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, Span};
use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span};
use rustc_target::abi::Size;

use libc::c_uint;
Expand Down Expand Up @@ -61,7 +61,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
llcontext: &'ll llvm::Context,
llmod: &'ll llvm::Module,
builder: &'ll mut DIBuilder<'ll>,
created_files: RefCell<FxHashMap<(Option<String>, Option<String>), &'ll DIFile>>,
created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,

type_map: metadata::TypeMap<'ll, 'tcx>,
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
Expand Down
100 changes: 36 additions & 64 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,14 @@ use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt};
use rustc_serialize::{opaque, Encodable, Encoder};
use rustc_session::config::CrateType;
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
use rustc_span::symbol::{sym, Ident, Symbol};
use rustc_span::{
self, DebuggerVisualizerFile, ExternalSource, FileName, SourceFile, Span, SyntaxContext,
};
use rustc_span::{
hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind},
RealFileName,
};
use rustc_target::abi::VariantIdx;
use std::hash::Hash;
use std::num::NonZeroUsize;
use std::path::Path;
use tracing::{debug, trace};

pub(super) struct EncodeContext<'a, 'tcx> {
Expand Down Expand Up @@ -490,6 +486,8 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
// is done.
let required_source_files = self.required_source_files.take().unwrap();

let working_directory = &self.tcx.sess.opts.working_dir;

let adapted = all_source_files
.iter()
.enumerate()
Expand All @@ -502,76 +500,50 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
(!source_file.is_imported() || self.is_proc_macro)
})
.map(|(_, source_file)| {
let mut adapted = match source_file.name {
FileName::Real(ref realname) => {
let mut adapted = (**source_file).clone();
adapted.name = FileName::Real(match realname {
RealFileName::LocalPath(path_to_file) => {
// Prepend path of working directory onto potentially
// relative paths, because they could become relative
// to a wrong directory.
// We include `working_dir` as part of the crate hash,
// so it's okay for us to use it as part of the encoded
// metadata.
let working_dir = &self.tcx.sess.opts.working_dir;
match working_dir {
RealFileName::LocalPath(absolute) => {
// Although neither working_dir or the file name were subject
// to path remapping, the concatenation between the two may
// be. Hence we need to do a remapping here.
let joined = Path::new(absolute).join(path_to_file);
let (joined, remapped) =
source_map.path_mapping().map_prefix(joined);
if remapped {
RealFileName::Remapped {
local_path: None,
virtual_name: joined,
}
} else {
RealFileName::LocalPath(joined)
}
}
RealFileName::Remapped { local_path: _, virtual_name } => {
// If working_dir has been remapped, then we emit
// Remapped variant as the expanded path won't be valid
RealFileName::Remapped {
local_path: None,
virtual_name: Path::new(virtual_name)
.join(path_to_file),
}
}
}
}
RealFileName::Remapped { local_path: _, virtual_name } => {
RealFileName::Remapped {
// We do not want any local path to be exported into metadata
local_path: None,
virtual_name: virtual_name.clone(),
}
}
});
adapted.name_hash = {
let mut hasher: StableHasher = StableHasher::new();
adapted.name.hash(&mut hasher);
hasher.finish::<u128>()
};
Lrc::new(adapted)
// At export time we expand all source file paths to absolute paths because
// downstream compilation sessions can have a different compiler working
// directory, so relative paths from this or any other upstream crate
// won't be valid anymore.
//
// At this point we also erase the actual on-disk path and only keep
// the remapped version -- as is necessary for reproducible builds.
match source_file.name {
FileName::Real(ref original_file_name) => {
let adapted_file_name =
source_map.path_mapping().to_embeddable_absolute_path(
original_file_name.clone(),
working_directory,
);

if adapted_file_name != *original_file_name {
let mut adapted: SourceFile = (**source_file).clone();
adapted.name = FileName::Real(adapted_file_name);
adapted.name_hash = {
let mut hasher: StableHasher = StableHasher::new();
adapted.name.hash(&mut hasher);
hasher.finish::<u128>()
};
Lrc::new(adapted)
} else {
// Nothing to adapt
source_file.clone()
}
}

// expanded code, not from a file
_ => source_file.clone(),
};

}
})
.map(|mut source_file| {
// We're serializing this `SourceFile` into our crate metadata,
// so mark it as coming from this crate.
// This also ensures that we don't try to deserialize the
// `CrateNum` for a proc-macro dependency - since proc macro
// dependencies aren't loaded when we deserialize a proc-macro,
// trying to remap the `CrateNum` would fail.
if self.is_proc_macro {
Lrc::make_mut(&mut adapted).cnum = LOCAL_CRATE;
Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE;
}
adapted
source_file
})
.collect::<Vec<_>>();

Expand Down
6 changes: 3 additions & 3 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,8 @@ impl fmt::Display for FileNameDisplay<'_> {
}
}

impl FileNameDisplay<'_> {
pub fn to_string_lossy(&self) -> Cow<'_, str> {
impl<'a> FileNameDisplay<'a> {
pub fn to_string_lossy(&self) -> Cow<'a, str> {
match self.inner {
FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
_ => Cow::from(format!("{}", self)),
Expand Down Expand Up @@ -1153,7 +1153,7 @@ impl FromStr for SourceFileHashAlgorithm {
}

/// The hash of the on-disk source file used for debug info.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct SourceFileHash {
pub kind: SourceFileHashAlgorithm,
Expand Down
Loading

0 comments on commit 936eba3

Please sign in to comment.