Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Turn quadratic time on number of impl blocks into linear time #78317

Merged
merged 3 commits into from
Dec 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 159 additions & 16 deletions compiler/rustc_typeck/src/coherence/inherent_impls_overlap.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_errors::struct_span_err;
use rustc_hir as hir;
use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
use rustc_hir::itemlikevisit::ItemLikeVisitor;
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::Symbol;
use rustc_trait_selection::traits::{self, SkipLeakCheck};
use smallvec::SmallVec;
use std::collections::hash_map::Entry;

pub fn crate_inherent_impls_overlap_check(tcx: TyCtxt<'_>, crate_num: CrateNum) {
assert_eq!(crate_num, LOCAL_CRATE);
Expand Down Expand Up @@ -33,12 +36,9 @@ impl InherentOverlapChecker<'tcx> {
}

for item1 in impl_items1.in_definition_order() {
let collision = impl_items2.filter_by_name_unhygienic(item1.ident.name).any(|item2| {
// Symbols and namespace match, compare hygienically.
item1.kind.namespace() == item2.kind.namespace()
&& item1.ident.normalize_to_macros_2_0()
== item2.ident.normalize_to_macros_2_0()
});
let collision = impl_items2
.filter_by_name_unhygienic(item1.ident.name)
.any(|item2| self.compare_hygienically(item1, item2));

if collision {
return true;
Expand All @@ -48,6 +48,12 @@ impl InherentOverlapChecker<'tcx> {
false
}

fn compare_hygienically(&self, item1: &ty::AssocItem, item2: &ty::AssocItem) -> bool {
// Symbols and namespace match, compare hygienically.
item1.kind.namespace() == item2.kind.namespace()
&& item1.ident.normalize_to_macros_2_0() == item2.ident.normalize_to_macros_2_0()
}

fn check_for_common_items_in_impls(
&self,
impl1: DefId,
Expand All @@ -58,12 +64,9 @@ impl InherentOverlapChecker<'tcx> {
let impl_items2 = self.tcx.associated_items(impl2);

for item1 in impl_items1.in_definition_order() {
let collision = impl_items2.filter_by_name_unhygienic(item1.ident.name).find(|item2| {
// Symbols and namespace match, compare hygienically.
item1.kind.namespace() == item2.kind.namespace()
&& item1.ident.normalize_to_macros_2_0()
== item2.ident.normalize_to_macros_2_0()
});
let collision = impl_items2
.filter_by_name_unhygienic(item1.ident.name)
.find(|item2| self.compare_hygienically(item1, item2));

if let Some(item2) = collision {
let name = item1.ident.normalize_to_macros_2_0();
Expand Down Expand Up @@ -134,10 +137,150 @@ impl ItemLikeVisitor<'v> for InherentOverlapChecker<'tcx> {
.map(|impl_def_id| (impl_def_id, self.tcx.associated_items(*impl_def_id)))
.collect::<SmallVec<[_; 8]>>();

for (i, &(&impl1_def_id, impl_items1)) in impls_items.iter().enumerate() {
for &(&impl2_def_id, impl_items2) in &impls_items[(i + 1)..] {
if self.impls_have_common_items(impl_items1, impl_items2) {
self.check_for_overlapping_inherent_impls(impl1_def_id, impl2_def_id);
// Perform a O(n^2) algorithm for small n,
// otherwise switch to an allocating algorithm with
// faster asymptotic runtime.
const ALLOCATING_ALGO_THRESHOLD: usize = 500;
if impls.len() < ALLOCATING_ALGO_THRESHOLD {
for (i, &(&impl1_def_id, impl_items1)) in impls_items.iter().enumerate() {
for &(&impl2_def_id, impl_items2) in &impls_items[(i + 1)..] {
if self.impls_have_common_items(impl_items1, impl_items2) {
self.check_for_overlapping_inherent_impls(
impl1_def_id,
impl2_def_id,
);
}
}
}
} else {
// Build a set of connected regions of impl blocks.
// Two impl blocks are regarded as connected if they share
// an item with the same unhygienic identifier.
// After we have assembled the connected regions,
// run the O(n^2) algorithm on each connected region.
// This is advantageous to running the algorithm over the
// entire graph when there are many connected regions.

struct ConnectedRegion {
idents: SmallVec<[Symbol; 8]>,
impl_blocks: FxHashSet<usize>,
}
// Highest connected region id
let mut highest_region_id = 0;
let mut connected_region_ids = FxHashMap::default();
let mut connected_regions = FxHashMap::default();

for (i, &(&_impl_def_id, impl_items)) in impls_items.iter().enumerate() {
if impl_items.len() == 0 {
continue;
}
// First obtain a list of existing connected region ids
let mut idents_to_add = SmallVec::<[Symbol; 8]>::new();
let ids = impl_items
.in_definition_order()
.filter_map(|item| {
let entry = connected_region_ids.entry(item.ident.name);
if let Entry::Occupied(e) = &entry {
Some(*e.get())
} else {
idents_to_add.push(item.ident.name);
None
}
})
.collect::<FxHashSet<usize>>();
match ids.len() {
0 | 1 => {
let id_to_set = if ids.len() == 0 {
// Create a new connected region
let region = ConnectedRegion {
idents: idents_to_add,
impl_blocks: std::iter::once(i).collect(),
};
connected_regions.insert(highest_region_id, region);
(highest_region_id, highest_region_id += 1).0
} else {
// Take the only id inside the list
let id_to_set = *ids.iter().next().unwrap();
let region = connected_regions.get_mut(&id_to_set).unwrap();
region.impl_blocks.insert(i);
region.idents.extend_from_slice(&idents_to_add);
id_to_set
};
let (_id, region) = connected_regions.iter().next().unwrap();
// Update the connected region ids
for ident in region.idents.iter() {
connected_region_ids.insert(*ident, id_to_set);
}
}
_ => {
// We have multiple connected regions to merge.
// In the worst case this might add impl blocks
// one by one and can thus be O(n^2) in the size
// of the resulting final connected region, but
// this is no issue as the final step to check
// for overlaps runs in O(n^2) as well.

// Take the smallest id from the list
let id_to_set = *ids.iter().min().unwrap();

// Sort the id list so that the algorithm is deterministic
let mut ids = ids.into_iter().collect::<SmallVec<[_; 8]>>();
ids.sort();

let mut region = connected_regions.remove(&id_to_set).unwrap();
region.idents.extend_from_slice(&idents_to_add);
region.impl_blocks.insert(i);

for &id in ids.iter() {
if id == id_to_set {
continue;
}
let r = connected_regions.remove(&id).unwrap();
// Update the connected region ids
for ident in r.idents.iter() {
connected_region_ids.insert(*ident, id_to_set);
}
region.idents.extend_from_slice(&r.idents);
region.impl_blocks.extend(r.impl_blocks);
}
connected_regions.insert(id_to_set, region);
}
}
}

debug!(
"churning through {} components (sum={}, avg={}, var={}, max={})",
connected_regions.len(),
impls.len(),
impls.len() / connected_regions.len(),
{
let avg = impls.len() / connected_regions.len();
let s = connected_regions
.iter()
.map(|r| r.1.impl_blocks.len() as isize - avg as isize)
.map(|v| v.abs() as usize)
.sum::<usize>();
s / connected_regions.len()
},
connected_regions.iter().map(|r| r.1.impl_blocks.len()).max().unwrap()
);
// List of connected regions is built. Now, run the overlap check
// for each pair of impl blocks in the same connected region.
for (_id, region) in connected_regions.into_iter() {
let mut impl_blocks =
region.impl_blocks.into_iter().collect::<SmallVec<[_; 8]>>();
impl_blocks.sort();
for (i, &impl1_items_idx) in impl_blocks.iter().enumerate() {
let &(&impl1_def_id, impl_items1) = &impls_items[impl1_items_idx];
for &impl2_items_idx in impl_blocks[(i + 1)..].iter() {
let &(&impl2_def_id, impl_items2) = &impls_items[impl2_items_idx];
if self.impls_have_common_items(impl_items1, impl_items2) {
self.check_for_overlapping_inherent_impls(
impl1_def_id,
impl2_def_id,
);
}
}
}
}
}
Expand Down
54 changes: 54 additions & 0 deletions src/test/ui/inherent-impls-overlap-check/auxiliary/repeat.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// force-host
// no-prefer-dynamic

#![crate_type = "proc-macro"]

extern crate proc_macro;
use proc_macro::{Ident, Group, TokenStream, TokenTree as Tt};

// This constant has to be above the ALLOCATING_ALGO_THRESHOLD
// constant in inherent_impls_overlap.rs
const REPEAT_COUNT: u32 = 501;

#[proc_macro]
/// Repeats the input many times, while replacing idents
/// named "IDENT" with "id_$v", where v is a counter.
pub fn repeat_with_idents(input: TokenStream) -> TokenStream {
let mut res = Vec::new();
fn visit_stream(res: &mut Vec<Tt>, stream :TokenStream, v: u32) {
let mut stream_iter = stream.into_iter();
while let Some(tt) = stream_iter.next() {
match tt {
Tt::Group(group) => {
let tt = Tt::Group(visit_group(group, v));
res.push(tt);
},
Tt::Ident(id) => {
let id = if &id.to_string() == "IDENT" {
Ident::new(&format!("id_{}", v), id.span())
} else {
id
};
res.push(Tt::Ident(id));
},
Tt::Punct(p) => {
res.push(Tt::Punct(p));
},
Tt::Literal(lit) => {
res.push(Tt::Literal(lit));
},
}
}
}
fn visit_group(group :Group, v: u32) -> Group {
let mut res = Vec::new();
visit_stream(&mut res, group.stream(), v);
let stream = res.into_iter().collect();
let delim = group.delimiter();
Group::new(delim, stream)
}
for v in 0 .. REPEAT_COUNT {
visit_stream(&mut res, input.clone(), v)
}
res.into_iter().collect()
}
34 changes: 34 additions & 0 deletions src/test/ui/inherent-impls-overlap-check/no-overlap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// run-pass
// aux-build:repeat.rs

// This tests the allocating algo branch of the
// inherent impls overlap checker.
// This branch was added by PR:
// https://github.com/rust-lang/rust/pull/78317
// In this test, we repeat many impl blocks
// to trigger the allocating branch.

#![allow(unused)]

extern crate repeat;

// Simple case where each impl block is distinct

struct Foo {}

repeat::repeat_with_idents!(impl Foo { fn IDENT() {} });

// There are overlapping impl blocks but due to generics,
// they may overlap.

struct Bar<T>(T);

struct A;
struct B;

repeat::repeat_with_idents!(impl Bar<A> { fn IDENT() {} });

impl Bar<A> { fn foo() {} }
impl Bar<B> { fn foo() {} }

fn main() {}
71 changes: 71 additions & 0 deletions src/test/ui/inherent-impls-overlap-check/overlap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// aux-build:repeat.rs

#![allow(unused)]

// This tests the allocating algo branch of the
// inherent impls overlap checker.
// This branch was added by PR:
// https://github.com/rust-lang/rust/pull/78317
// In this test, we repeat many impl blocks
// to trigger the allocating branch.

// Simple overlap

extern crate repeat;

struct Foo {}

repeat::repeat_with_idents!(impl Foo { fn IDENT() {} });

impl Foo { fn hello() {} } //~ERROR duplicate definitions with name `hello`
impl Foo { fn hello() {} }

// Transitive overlap

struct Foo2 {}

repeat::repeat_with_idents!(impl Foo2 { fn IDENT() {} });

impl Foo2 {
fn bar() {}
fn hello2() {} //~ERROR duplicate definitions with name `hello2`
}

impl Foo2 {
fn baz() {}
fn hello2() {}
}

// Slightly stronger transitive overlap

struct Foo3 {}

repeat::repeat_with_idents!(impl Foo3 { fn IDENT() {} });

impl Foo3 {
fn bar() {} //~ERROR duplicate definitions with name `bar`
fn hello3() {} //~ERROR duplicate definitions with name `hello3`
}

impl Foo3 {
fn bar() {}
fn hello3() {}
}

// Generic overlap

struct Bar<T>(T);

struct A;
struct B;

repeat::repeat_with_idents!(impl Bar<A> { fn IDENT() {} });

impl Bar<A> { fn foo() {} fn bar2() {} }
impl Bar<B> {
fn foo() {}
fn bar2() {} //~ERROR duplicate definitions with name `bar2`
}
impl Bar<B> { fn bar2() {} }

fn main() {}
Loading