Skip to content

Commit

Permalink
bootstrap: use internment instead of hand-rolled interning
Browse files Browse the repository at this point in the history
  • Loading branch information
GrigorenkoPV committed Nov 10, 2024
1 parent 6689597 commit 6d676d0
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 203 deletions.
39 changes: 39 additions & 0 deletions src/bootstrap/Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ dependencies = [
"memchr",
]

[[package]]
name = "allocator-api2"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611cc2ae7d2e242c457e4be7f97036b8ad9ca152b499f53faf99b1ed8fc2553f"

[[package]]
name = "anstyle"
version = "1.0.8"
Expand Down Expand Up @@ -44,6 +50,7 @@ dependencies = [
"fd-lock",
"home",
"ignore",
"internment",
"junction",
"libc",
"object",
Expand Down Expand Up @@ -219,6 +226,12 @@ dependencies = [
"crypto-common",
]

[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"

[[package]]
name = "errno"
version = "0.3.9"
Expand Down Expand Up @@ -252,6 +265,12 @@ dependencies = [
"windows-sys 0.59.0",
]

[[package]]
name = "foldhash"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"

[[package]]
name = "generic-array"
version = "0.14.7"
Expand All @@ -275,6 +294,17 @@ dependencies = [
"regex-syntax",
]

[[package]]
name = "hashbrown"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]

[[package]]
name = "heck"
version = "0.5.0"
Expand Down Expand Up @@ -306,6 +336,15 @@ dependencies = [
"winapi-util",
]

[[package]]
name = "internment"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2"
dependencies = [
"hashbrown",
]

[[package]]
name = "itoa"
version = "1.0.11"
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ clap = { version = "4.4", default-features = false, features = ["std", "usage",
clap_complete = "4.4"
fd-lock = "4.0"
home = "0.5"
internment = "0.8.5"
ignore = "0.4"
libc = "0.2"
object = { version = "0.36.3", default-features = false, features = ["archive", "coff", "read_core", "unaligned"] }
Expand Down
30 changes: 16 additions & 14 deletions src/bootstrap/src/core/config/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::core::build_steps::llvm;
pub use crate::core::config::flags::Subcommand;
use crate::core::config::flags::{Color, Flags, Warnings};
use crate::core::download::is_download_ci_available;
use crate::utils::cache::{INTERNER, Interned};
use crate::utils::cache::Interned;
use crate::utils::channel::{self, GitInfo};
use crate::utils::helpers::{self, exe, output, t};

Expand Down Expand Up @@ -465,15 +465,21 @@ impl std::str::FromStr for RustcLto {
}
}

#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
// N.B.: This type is used everywhere, and the entire codebase relies on it being Copy.
// Making !Copy is highly nontrivial!
pub struct TargetSelection {
pub triple: Interned<String>,
file: Option<Interned<String>>,
pub triple: Interned<str>,
file: Option<Interned<str>>,
synthetic: bool,
}

impl Default for TargetSelection {
fn default() -> Self {
Self { triple: "".into(), file: Default::default(), synthetic: Default::default() }
}
}

/// Newtype over `Vec<TargetSelection>` so we can implement custom parsing logic
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct TargetSelectionList(Vec<TargetSelection>);
Expand All @@ -500,18 +506,14 @@ impl TargetSelection {
(selection, None)
};

let triple = INTERNER.intern_str(triple);
let file = file.map(|f| INTERNER.intern_str(f));
let triple: Interned<str> = triple.into();
let file: Option<Interned<str>> = file.map(|f| f.into());

Self { triple, file, synthetic: false }
}

pub fn create_synthetic(triple: &str, file: &str) -> Self {
Self {
triple: INTERNER.intern_str(triple),
file: Some(INTERNER.intern_str(file)),
synthetic: true,
}
Self { triple: triple.into(), file: Some(file.into()), synthetic: true }
}

pub fn rustc_target_arg(&self) -> &str {
Expand Down Expand Up @@ -571,15 +573,15 @@ impl fmt::Debug for TargetSelection {

impl PartialEq<&str> for TargetSelection {
fn eq(&self, other: &&str) -> bool {
self.triple == *other
&*self.triple == *other
}
}

// Targets are often used as directory names throughout bootstrap.
// This impl makes it more ergonomics to use them as such.
impl AsRef<Path> for TargetSelection {
fn as_ref(&self) -> &Path {
self.triple.as_ref()
(*self.triple).as_ref()
}
}

Expand Down Expand Up @@ -2119,7 +2121,7 @@ impl Config {
// thus, disabled
// - similarly, lld will not be built nor used by default when explicitly asked not to, e.g.
// when the config sets `rust.lld = false`
if config.build.triple == "x86_64-unknown-linux-gnu"
if &*config.build.triple == "x86_64-unknown-linux-gnu"
&& config.hosts == [config.build]
&& (config.channel == "dev" || config.channel == "nightly")
{
Expand Down
190 changes: 1 addition & 189 deletions src/bootstrap/src/utils/cache.rs
Original file line number Diff line number Diff line change
@@ -1,198 +1,10 @@
use std::any::{Any, TypeId};
use std::borrow::Borrow;
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use std::ops::Deref;
use std::path::PathBuf;
use std::sync::{LazyLock, Mutex};
use std::{fmt, mem};

use crate::core::builder::Step;

pub struct Interned<T>(usize, PhantomData<*const T>);

impl<T: Internable + Default> Default for Interned<T> {
fn default() -> Self {
T::default().intern()
}
}

impl<T> Copy for Interned<T> {}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Interned<T> {
*self
}
}

impl<T> PartialEq for Interned<T> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<T> Eq for Interned<T> {}

impl PartialEq<str> for Interned<String> {
fn eq(&self, other: &str) -> bool {
*self == other
}
}
impl PartialEq<&str> for Interned<String> {
fn eq(&self, other: &&str) -> bool {
**self == **other
}
}
impl<T> PartialEq<&Interned<T>> for Interned<T> {
fn eq(&self, other: &&Self) -> bool {
self.0 == other.0
}
}
impl<T> PartialEq<Interned<T>> for &Interned<T> {
fn eq(&self, other: &Interned<T>) -> bool {
self.0 == other.0
}
}

unsafe impl<T> Send for Interned<T> {}
unsafe impl<T> Sync for Interned<T> {}

impl fmt::Display for Interned<String> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: &str = self;
f.write_str(s)
}
}

impl<T, U: ?Sized + fmt::Debug> fmt::Debug for Interned<T>
where
Self: Deref<Target = U>,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: &U = self;
f.write_fmt(format_args!("{s:?}"))
}
}

impl<T: Internable + Hash> Hash for Interned<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let l = T::intern_cache().lock().unwrap();
l.get(*self).hash(state)
}
}

impl<T: Internable + Deref> Deref for Interned<T> {
type Target = T::Target;
fn deref(&self) -> &Self::Target {
let l = T::intern_cache().lock().unwrap();
unsafe { mem::transmute::<&Self::Target, &Self::Target>(l.get(*self)) }
}
}

impl<T: Internable + AsRef<U>, U: ?Sized> AsRef<U> for Interned<T> {
fn as_ref(&self) -> &U {
let l = T::intern_cache().lock().unwrap();
unsafe { mem::transmute::<&U, &U>(l.get(*self).as_ref()) }
}
}

impl<T: Internable + PartialOrd> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let l = T::intern_cache().lock().unwrap();
l.get(*self).partial_cmp(l.get(*other))
}
}

impl<T: Internable + Ord> Ord for Interned<T> {
fn cmp(&self, other: &Self) -> Ordering {
let l = T::intern_cache().lock().unwrap();
l.get(*self).cmp(l.get(*other))
}
}

struct TyIntern<T: Clone + Eq> {
items: Vec<T>,
set: HashMap<T, Interned<T>>,
}

impl<T: Hash + Clone + Eq> Default for TyIntern<T> {
fn default() -> Self {
TyIntern { items: Vec::new(), set: Default::default() }
}
}

impl<T: Hash + Clone + Eq> TyIntern<T> {
fn intern_borrow<B>(&mut self, item: &B) -> Interned<T>
where
B: Eq + Hash + ToOwned<Owned = T> + ?Sized,
T: Borrow<B>,
{
if let Some(i) = self.set.get(item) {
return *i;
}
let item = item.to_owned();
let interned = Interned(self.items.len(), PhantomData::<*const T>);
self.set.insert(item.clone(), interned);
self.items.push(item);
interned
}

fn intern(&mut self, item: T) -> Interned<T> {
if let Some(i) = self.set.get(&item) {
return *i;
}
let interned = Interned(self.items.len(), PhantomData::<*const T>);
self.set.insert(item.clone(), interned);
self.items.push(item);
interned
}

fn get(&self, i: Interned<T>) -> &T {
&self.items[i.0]
}
}

#[derive(Default)]
pub struct Interner {
strs: Mutex<TyIntern<String>>,
paths: Mutex<TyIntern<PathBuf>>,
lists: Mutex<TyIntern<Vec<String>>>,
}

trait Internable: Clone + Eq + Hash + 'static {
fn intern_cache() -> &'static Mutex<TyIntern<Self>>;

fn intern(self) -> Interned<Self> {
Self::intern_cache().lock().unwrap().intern(self)
}
}

impl Internable for String {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.strs
}
}

impl Internable for PathBuf {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.paths
}
}

impl Internable for Vec<String> {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.lists
}
}

impl Interner {
pub fn intern_str(&self, s: &str) -> Interned<String> {
self.strs.lock().unwrap().intern_borrow(s)
}
}

pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default);
pub type Interned<T> = internment::Intern<T>;

/// This is essentially a `HashMap` which allows storing any type in its input and
/// any type in its output. It is a write-once cache; values are never evicted,
Expand Down

0 comments on commit 6d676d0

Please sign in to comment.